1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include "i915_drv.h" 37 #include "gvt.h" 38 #include "i915_pvinfo.h" 39 #include "trace.h" 40 41 static bool enable_out_of_sync = false; 42 static int preallocated_oos_pages = 8192; 43 44 /* 45 * validate a gm address and related range size, 46 * translate it to host gm address 47 */ 48 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 49 { 50 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size 51 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) { 52 gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n", 53 vgpu->id, addr, size); 54 return false; 55 } 56 return true; 57 } 58 59 /* translate a guest gmadr to host gmadr */ 60 int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr) 61 { 62 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr), 63 "invalid guest gmadr %llx\n", g_addr)) 64 return -EACCES; 65 66 if (vgpu_gmadr_is_aperture(vgpu, g_addr)) 67 *h_addr = vgpu_aperture_gmadr_base(vgpu) 68 + (g_addr - vgpu_aperture_offset(vgpu)); 69 else 70 *h_addr = vgpu_hidden_gmadr_base(vgpu) 71 + (g_addr - vgpu_hidden_offset(vgpu)); 72 return 0; 73 } 74 75 /* translate a host gmadr to guest gmadr */ 76 int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr) 77 { 78 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr), 79 "invalid host gmadr %llx\n", h_addr)) 80 return -EACCES; 81 82 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr)) 83 *g_addr = vgpu_aperture_gmadr_base(vgpu) 84 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt)); 85 else 86 *g_addr = vgpu_hidden_gmadr_base(vgpu) 87 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt)); 88 return 0; 89 } 90 91 int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, 92 unsigned long *h_index) 93 { 94 u64 h_addr; 95 int ret; 96 97 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << GTT_PAGE_SHIFT, 98 &h_addr); 99 if (ret) 100 return ret; 101 102 *h_index = h_addr >> GTT_PAGE_SHIFT; 103 return 0; 104 } 105 106 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, 107 unsigned long *g_index) 108 { 109 u64 g_addr; 110 int ret; 111 112 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << GTT_PAGE_SHIFT, 113 &g_addr); 114 if (ret) 115 return ret; 116 117 *g_index = g_addr >> GTT_PAGE_SHIFT; 118 return 0; 119 } 120 121 #define gtt_type_is_entry(type) \ 122 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 123 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 124 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 125 126 #define gtt_type_is_pt(type) \ 127 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 128 129 #define gtt_type_is_pte_pt(type) \ 130 (type == GTT_TYPE_PPGTT_PTE_PT) 131 132 #define gtt_type_is_root_pointer(type) \ 133 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 134 135 #define gtt_init_entry(e, t, p, v) do { \ 136 (e)->type = t; \ 137 (e)->pdev = p; \ 138 memcpy(&(e)->val64, &v, sizeof(v)); \ 139 } while (0) 140 141 /* 142 * Mappings between GTT_TYPE* enumerations. 143 * Following information can be found according to the given type: 144 * - type of next level page table 145 * - type of entry inside this level page table 146 * - type of entry with PSE set 147 * 148 * If the given type doesn't have such a kind of information, 149 * e.g. give a l4 root entry type, then request to get its PSE type, 150 * give a PTE page table type, then request to get its next level page 151 * table type, as we know l4 root entry doesn't have a PSE bit, 152 * and a PTE page table doesn't have a next level page table type, 153 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 154 * page table. 155 */ 156 157 struct gtt_type_table_entry { 158 int entry_type; 159 int next_pt_type; 160 int pse_entry_type; 161 }; 162 163 #define GTT_TYPE_TABLE_ENTRY(type, e_type, npt_type, pse_type) \ 164 [type] = { \ 165 .entry_type = e_type, \ 166 .next_pt_type = npt_type, \ 167 .pse_entry_type = pse_type, \ 168 } 169 170 static struct gtt_type_table_entry gtt_type_table[] = { 171 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 172 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 173 GTT_TYPE_PPGTT_PML4_PT, 174 GTT_TYPE_INVALID), 175 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 176 GTT_TYPE_PPGTT_PML4_ENTRY, 177 GTT_TYPE_PPGTT_PDP_PT, 178 GTT_TYPE_INVALID), 179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 180 GTT_TYPE_PPGTT_PML4_ENTRY, 181 GTT_TYPE_PPGTT_PDP_PT, 182 GTT_TYPE_INVALID), 183 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 184 GTT_TYPE_PPGTT_PDP_ENTRY, 185 GTT_TYPE_PPGTT_PDE_PT, 186 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 187 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 188 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 189 GTT_TYPE_PPGTT_PDE_PT, 190 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 191 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 192 GTT_TYPE_PPGTT_PDP_ENTRY, 193 GTT_TYPE_PPGTT_PDE_PT, 194 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 195 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 196 GTT_TYPE_PPGTT_PDE_ENTRY, 197 GTT_TYPE_PPGTT_PTE_PT, 198 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 199 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 200 GTT_TYPE_PPGTT_PDE_ENTRY, 201 GTT_TYPE_PPGTT_PTE_PT, 202 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 203 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 204 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 205 GTT_TYPE_INVALID, 206 GTT_TYPE_INVALID), 207 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 208 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 209 GTT_TYPE_INVALID, 210 GTT_TYPE_INVALID), 211 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 212 GTT_TYPE_PPGTT_PDE_ENTRY, 213 GTT_TYPE_INVALID, 214 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 215 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 216 GTT_TYPE_PPGTT_PDP_ENTRY, 217 GTT_TYPE_INVALID, 218 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 219 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 220 GTT_TYPE_GGTT_PTE, 221 GTT_TYPE_INVALID, 222 GTT_TYPE_INVALID), 223 }; 224 225 static inline int get_next_pt_type(int type) 226 { 227 return gtt_type_table[type].next_pt_type; 228 } 229 230 static inline int get_entry_type(int type) 231 { 232 return gtt_type_table[type].entry_type; 233 } 234 235 static inline int get_pse_type(int type) 236 { 237 return gtt_type_table[type].pse_entry_type; 238 } 239 240 static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) 241 { 242 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 243 u64 pte; 244 245 #ifdef readq 246 pte = readq(addr); 247 #else 248 pte = ioread32(addr); 249 pte |= (u64)ioread32(addr + 4) << 32; 250 #endif 251 return pte; 252 } 253 254 static void write_pte64(struct drm_i915_private *dev_priv, 255 unsigned long index, u64 pte) 256 { 257 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 258 259 #ifdef writeq 260 writeq(pte, addr); 261 #else 262 iowrite32((u32)pte, addr); 263 iowrite32(pte >> 32, addr + 4); 264 #endif 265 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 266 POSTING_READ(GFX_FLSH_CNTL_GEN6); 267 } 268 269 static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt, 270 struct intel_gvt_gtt_entry *e, 271 unsigned long index, bool hypervisor_access, unsigned long gpa, 272 struct intel_vgpu *vgpu) 273 { 274 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 275 int ret; 276 277 if (WARN_ON(info->gtt_entry_size != 8)) 278 return e; 279 280 if (hypervisor_access) { 281 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa + 282 (index << info->gtt_entry_size_shift), 283 &e->val64, 8); 284 WARN_ON(ret); 285 } else if (!pt) { 286 e->val64 = read_pte64(vgpu->gvt->dev_priv, index); 287 } else { 288 e->val64 = *((u64 *)pt + index); 289 } 290 return e; 291 } 292 293 static inline struct intel_gvt_gtt_entry *gtt_set_entry64(void *pt, 294 struct intel_gvt_gtt_entry *e, 295 unsigned long index, bool hypervisor_access, unsigned long gpa, 296 struct intel_vgpu *vgpu) 297 { 298 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 299 int ret; 300 301 if (WARN_ON(info->gtt_entry_size != 8)) 302 return e; 303 304 if (hypervisor_access) { 305 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa + 306 (index << info->gtt_entry_size_shift), 307 &e->val64, 8); 308 WARN_ON(ret); 309 } else if (!pt) { 310 write_pte64(vgpu->gvt->dev_priv, index, e->val64); 311 } else { 312 *((u64 *)pt + index) = e->val64; 313 } 314 return e; 315 } 316 317 #define GTT_HAW 46 318 319 #define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) 320 #define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) 321 #define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) 322 323 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 324 { 325 unsigned long pfn; 326 327 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 328 pfn = (e->val64 & ADDR_1G_MASK) >> 12; 329 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 330 pfn = (e->val64 & ADDR_2M_MASK) >> 12; 331 else 332 pfn = (e->val64 & ADDR_4K_MASK) >> 12; 333 return pfn; 334 } 335 336 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 337 { 338 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 339 e->val64 &= ~ADDR_1G_MASK; 340 pfn &= (ADDR_1G_MASK >> 12); 341 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 342 e->val64 &= ~ADDR_2M_MASK; 343 pfn &= (ADDR_2M_MASK >> 12); 344 } else { 345 e->val64 &= ~ADDR_4K_MASK; 346 pfn &= (ADDR_4K_MASK >> 12); 347 } 348 349 e->val64 |= (pfn << 12); 350 } 351 352 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 353 { 354 /* Entry doesn't have PSE bit. */ 355 if (get_pse_type(e->type) == GTT_TYPE_INVALID) 356 return false; 357 358 e->type = get_entry_type(e->type); 359 if (!(e->val64 & (1 << 7))) 360 return false; 361 362 e->type = get_pse_type(e->type); 363 return true; 364 } 365 366 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 367 { 368 /* 369 * i915 writes PDP root pointer registers without present bit, 370 * it also works, so we need to treat root pointer entry 371 * specifically. 372 */ 373 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 374 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 375 return (e->val64 != 0); 376 else 377 return (e->val64 & (1 << 0)); 378 } 379 380 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 381 { 382 e->val64 &= ~(1 << 0); 383 } 384 385 /* 386 * Per-platform GMA routines. 387 */ 388 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 389 { 390 unsigned long x = (gma >> GTT_PAGE_SHIFT); 391 392 trace_gma_index(__func__, gma, x); 393 return x; 394 } 395 396 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 397 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 398 { \ 399 unsigned long x = (exp); \ 400 trace_gma_index(__func__, gma, x); \ 401 return x; \ 402 } 403 404 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 405 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 406 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 407 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 408 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 409 410 static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 411 .get_entry = gtt_get_entry64, 412 .set_entry = gtt_set_entry64, 413 .clear_present = gtt_entry_clear_present, 414 .test_present = gen8_gtt_test_present, 415 .test_pse = gen8_gtt_test_pse, 416 .get_pfn = gen8_gtt_get_pfn, 417 .set_pfn = gen8_gtt_set_pfn, 418 }; 419 420 static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 421 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 422 .gma_to_pte_index = gen8_gma_to_pte_index, 423 .gma_to_pde_index = gen8_gma_to_pde_index, 424 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 425 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 426 .gma_to_pml4_index = gen8_gma_to_pml4_index, 427 }; 428 429 static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, 430 struct intel_gvt_gtt_entry *m) 431 { 432 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 433 unsigned long gfn, mfn; 434 435 *m = *p; 436 437 if (!ops->test_present(p)) 438 return 0; 439 440 gfn = ops->get_pfn(p); 441 442 mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); 443 if (mfn == INTEL_GVT_INVALID_ADDR) { 444 gvt_err("fail to translate gfn: 0x%lx\n", gfn); 445 return -ENXIO; 446 } 447 448 ops->set_pfn(m, mfn); 449 return 0; 450 } 451 452 /* 453 * MM helpers. 454 */ 455 struct intel_gvt_gtt_entry *intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, 456 void *page_table, struct intel_gvt_gtt_entry *e, 457 unsigned long index) 458 { 459 struct intel_gvt *gvt = mm->vgpu->gvt; 460 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 461 462 e->type = mm->page_table_entry_type; 463 464 ops->get_entry(page_table, e, index, false, 0, mm->vgpu); 465 ops->test_pse(e); 466 return e; 467 } 468 469 struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, 470 void *page_table, struct intel_gvt_gtt_entry *e, 471 unsigned long index) 472 { 473 struct intel_gvt *gvt = mm->vgpu->gvt; 474 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 475 476 return ops->set_entry(page_table, e, index, false, 0, mm->vgpu); 477 } 478 479 /* 480 * PPGTT shadow page table helpers. 481 */ 482 static inline struct intel_gvt_gtt_entry *ppgtt_spt_get_entry( 483 struct intel_vgpu_ppgtt_spt *spt, 484 void *page_table, int type, 485 struct intel_gvt_gtt_entry *e, unsigned long index, 486 bool guest) 487 { 488 struct intel_gvt *gvt = spt->vgpu->gvt; 489 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 490 491 e->type = get_entry_type(type); 492 493 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 494 return e; 495 496 ops->get_entry(page_table, e, index, guest, 497 spt->guest_page.gfn << GTT_PAGE_SHIFT, 498 spt->vgpu); 499 ops->test_pse(e); 500 return e; 501 } 502 503 static inline struct intel_gvt_gtt_entry *ppgtt_spt_set_entry( 504 struct intel_vgpu_ppgtt_spt *spt, 505 void *page_table, int type, 506 struct intel_gvt_gtt_entry *e, unsigned long index, 507 bool guest) 508 { 509 struct intel_gvt *gvt = spt->vgpu->gvt; 510 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 511 512 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 513 return e; 514 515 return ops->set_entry(page_table, e, index, guest, 516 spt->guest_page.gfn << GTT_PAGE_SHIFT, 517 spt->vgpu); 518 } 519 520 #define ppgtt_get_guest_entry(spt, e, index) \ 521 ppgtt_spt_get_entry(spt, NULL, \ 522 spt->guest_page_type, e, index, true) 523 524 #define ppgtt_set_guest_entry(spt, e, index) \ 525 ppgtt_spt_set_entry(spt, NULL, \ 526 spt->guest_page_type, e, index, true) 527 528 #define ppgtt_get_shadow_entry(spt, e, index) \ 529 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 530 spt->shadow_page.type, e, index, false) 531 532 #define ppgtt_set_shadow_entry(spt, e, index) \ 533 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 534 spt->shadow_page.type, e, index, false) 535 536 /** 537 * intel_vgpu_init_guest_page - init a guest page data structure 538 * @vgpu: a vGPU 539 * @p: a guest page data structure 540 * @gfn: guest memory page frame number 541 * @handler: function will be called when target guest memory page has 542 * been modified. 543 * 544 * This function is called when user wants to track a guest memory page. 545 * 546 * Returns: 547 * Zero on success, negative error code if failed. 548 */ 549 int intel_vgpu_init_guest_page(struct intel_vgpu *vgpu, 550 struct intel_vgpu_guest_page *p, 551 unsigned long gfn, 552 int (*handler)(void *, u64, void *, int), 553 void *data) 554 { 555 INIT_HLIST_NODE(&p->node); 556 557 p->writeprotection = false; 558 p->gfn = gfn; 559 p->handler = handler; 560 p->data = data; 561 p->oos_page = NULL; 562 p->write_cnt = 0; 563 564 hash_add(vgpu->gtt.guest_page_hash_table, &p->node, p->gfn); 565 return 0; 566 } 567 568 static int detach_oos_page(struct intel_vgpu *vgpu, 569 struct intel_vgpu_oos_page *oos_page); 570 571 /** 572 * intel_vgpu_clean_guest_page - release the resource owned by guest page data 573 * structure 574 * @vgpu: a vGPU 575 * @p: a tracked guest page 576 * 577 * This function is called when user tries to stop tracking a guest memory 578 * page. 579 */ 580 void intel_vgpu_clean_guest_page(struct intel_vgpu *vgpu, 581 struct intel_vgpu_guest_page *p) 582 { 583 if (!hlist_unhashed(&p->node)) 584 hash_del(&p->node); 585 586 if (p->oos_page) 587 detach_oos_page(vgpu, p->oos_page); 588 589 if (p->writeprotection) 590 intel_gvt_hypervisor_unset_wp_page(vgpu, p); 591 } 592 593 /** 594 * intel_vgpu_find_guest_page - find a guest page data structure by GFN. 595 * @vgpu: a vGPU 596 * @gfn: guest memory page frame number 597 * 598 * This function is called when emulation logic wants to know if a trapped GFN 599 * is a tracked guest page. 600 * 601 * Returns: 602 * Pointer to guest page data structure, NULL if failed. 603 */ 604 struct intel_vgpu_guest_page *intel_vgpu_find_guest_page( 605 struct intel_vgpu *vgpu, unsigned long gfn) 606 { 607 struct intel_vgpu_guest_page *p; 608 609 hash_for_each_possible(vgpu->gtt.guest_page_hash_table, 610 p, node, gfn) { 611 if (p->gfn == gfn) 612 return p; 613 } 614 return NULL; 615 } 616 617 static inline int init_shadow_page(struct intel_vgpu *vgpu, 618 struct intel_vgpu_shadow_page *p, int type) 619 { 620 p->vaddr = page_address(p->page); 621 p->type = type; 622 623 INIT_HLIST_NODE(&p->node); 624 625 p->mfn = intel_gvt_hypervisor_virt_to_mfn(p->vaddr); 626 if (p->mfn == INTEL_GVT_INVALID_ADDR) 627 return -EFAULT; 628 629 hash_add(vgpu->gtt.shadow_page_hash_table, &p->node, p->mfn); 630 return 0; 631 } 632 633 static inline void clean_shadow_page(struct intel_vgpu_shadow_page *p) 634 { 635 if (!hlist_unhashed(&p->node)) 636 hash_del(&p->node); 637 } 638 639 static inline struct intel_vgpu_shadow_page *find_shadow_page( 640 struct intel_vgpu *vgpu, unsigned long mfn) 641 { 642 struct intel_vgpu_shadow_page *p; 643 644 hash_for_each_possible(vgpu->gtt.shadow_page_hash_table, 645 p, node, mfn) { 646 if (p->mfn == mfn) 647 return p; 648 } 649 return NULL; 650 } 651 652 #define guest_page_to_ppgtt_spt(ptr) \ 653 container_of(ptr, struct intel_vgpu_ppgtt_spt, guest_page) 654 655 #define shadow_page_to_ppgtt_spt(ptr) \ 656 container_of(ptr, struct intel_vgpu_ppgtt_spt, shadow_page) 657 658 static void *alloc_spt(gfp_t gfp_mask) 659 { 660 struct intel_vgpu_ppgtt_spt *spt; 661 662 spt = kzalloc(sizeof(*spt), gfp_mask); 663 if (!spt) 664 return NULL; 665 666 spt->shadow_page.page = alloc_page(gfp_mask); 667 if (!spt->shadow_page.page) { 668 kfree(spt); 669 return NULL; 670 } 671 return spt; 672 } 673 674 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 675 { 676 __free_page(spt->shadow_page.page); 677 kfree(spt); 678 } 679 680 static void ppgtt_free_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 681 { 682 trace_spt_free(spt->vgpu->id, spt, spt->shadow_page.type); 683 684 clean_shadow_page(&spt->shadow_page); 685 intel_vgpu_clean_guest_page(spt->vgpu, &spt->guest_page); 686 list_del_init(&spt->post_shadow_list); 687 688 free_spt(spt); 689 } 690 691 static void ppgtt_free_all_shadow_page(struct intel_vgpu *vgpu) 692 { 693 struct hlist_node *n; 694 struct intel_vgpu_shadow_page *sp; 695 int i; 696 697 hash_for_each_safe(vgpu->gtt.shadow_page_hash_table, i, n, sp, node) 698 ppgtt_free_shadow_page(shadow_page_to_ppgtt_spt(sp)); 699 } 700 701 static int ppgtt_handle_guest_write_page_table_bytes(void *gp, 702 u64 pa, void *p_data, int bytes); 703 704 static int ppgtt_write_protection_handler(void *gp, u64 pa, 705 void *p_data, int bytes) 706 { 707 struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; 708 int ret; 709 710 if (bytes != 4 && bytes != 8) 711 return -EINVAL; 712 713 if (!gpt->writeprotection) 714 return -EINVAL; 715 716 ret = ppgtt_handle_guest_write_page_table_bytes(gp, 717 pa, p_data, bytes); 718 if (ret) 719 return ret; 720 return ret; 721 } 722 723 static int reclaim_one_mm(struct intel_gvt *gvt); 724 725 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page( 726 struct intel_vgpu *vgpu, int type, unsigned long gfn) 727 { 728 struct intel_vgpu_ppgtt_spt *spt = NULL; 729 int ret; 730 731 retry: 732 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 733 if (!spt) { 734 if (reclaim_one_mm(vgpu->gvt)) 735 goto retry; 736 737 gvt_err("fail to allocate ppgtt shadow page\n"); 738 return ERR_PTR(-ENOMEM); 739 } 740 741 spt->vgpu = vgpu; 742 spt->guest_page_type = type; 743 atomic_set(&spt->refcount, 1); 744 INIT_LIST_HEAD(&spt->post_shadow_list); 745 746 /* 747 * TODO: guest page type may be different with shadow page type, 748 * when we support PSE page in future. 749 */ 750 ret = init_shadow_page(vgpu, &spt->shadow_page, type); 751 if (ret) { 752 gvt_err("fail to initialize shadow page for spt\n"); 753 goto err; 754 } 755 756 ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page, 757 gfn, ppgtt_write_protection_handler, NULL); 758 if (ret) { 759 gvt_err("fail to initialize guest page for spt\n"); 760 goto err; 761 } 762 763 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 764 return spt; 765 err: 766 ppgtt_free_shadow_page(spt); 767 return ERR_PTR(ret); 768 } 769 770 static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( 771 struct intel_vgpu *vgpu, unsigned long mfn) 772 { 773 struct intel_vgpu_shadow_page *p = find_shadow_page(vgpu, mfn); 774 775 if (p) 776 return shadow_page_to_ppgtt_spt(p); 777 778 gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n", 779 vgpu->id, mfn); 780 return NULL; 781 } 782 783 #define pt_entry_size_shift(spt) \ 784 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 785 786 #define pt_entries(spt) \ 787 (GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 788 789 #define for_each_present_guest_entry(spt, e, i) \ 790 for (i = 0; i < pt_entries(spt); i++) \ 791 if (spt->vgpu->gvt->gtt.pte_ops->test_present( \ 792 ppgtt_get_guest_entry(spt, e, i))) 793 794 #define for_each_present_shadow_entry(spt, e, i) \ 795 for (i = 0; i < pt_entries(spt); i++) \ 796 if (spt->vgpu->gvt->gtt.pte_ops->test_present( \ 797 ppgtt_get_shadow_entry(spt, e, i))) 798 799 static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 800 { 801 int v = atomic_read(&spt->refcount); 802 803 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 804 805 atomic_inc(&spt->refcount); 806 } 807 808 static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); 809 810 static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, 811 struct intel_gvt_gtt_entry *e) 812 { 813 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 814 struct intel_vgpu_ppgtt_spt *s; 815 intel_gvt_gtt_type_t cur_pt_type; 816 817 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) 818 return -EINVAL; 819 820 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 821 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 822 cur_pt_type = get_next_pt_type(e->type) + 1; 823 if (ops->get_pfn(e) == 824 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 825 return 0; 826 } 827 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); 828 if (!s) { 829 gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n", 830 vgpu->id, ops->get_pfn(e)); 831 return -ENXIO; 832 } 833 return ppgtt_invalidate_shadow_page(s); 834 } 835 836 static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 837 { 838 struct intel_gvt_gtt_entry e; 839 unsigned long index; 840 int ret; 841 int v = atomic_read(&spt->refcount); 842 843 trace_spt_change(spt->vgpu->id, "die", spt, 844 spt->guest_page.gfn, spt->shadow_page.type); 845 846 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 847 848 if (atomic_dec_return(&spt->refcount) > 0) 849 return 0; 850 851 if (gtt_type_is_pte_pt(spt->shadow_page.type)) 852 goto release; 853 854 for_each_present_shadow_entry(spt, &e, index) { 855 if (!gtt_type_is_pt(get_next_pt_type(e.type))) { 856 gvt_err("GVT doesn't support pse bit for now\n"); 857 return -EINVAL; 858 } 859 ret = ppgtt_invalidate_shadow_page_by_shadow_entry( 860 spt->vgpu, &e); 861 if (ret) 862 goto fail; 863 } 864 release: 865 trace_spt_change(spt->vgpu->id, "release", spt, 866 spt->guest_page.gfn, spt->shadow_page.type); 867 ppgtt_free_shadow_page(spt); 868 return 0; 869 fail: 870 gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n", 871 spt->vgpu->id, spt, e.val64, e.type); 872 return ret; 873 } 874 875 static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt); 876 877 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( 878 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 879 { 880 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 881 struct intel_vgpu_ppgtt_spt *s = NULL; 882 struct intel_vgpu_guest_page *g; 883 int ret; 884 885 if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(we->type)))) { 886 ret = -EINVAL; 887 goto fail; 888 } 889 890 g = intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); 891 if (g) { 892 s = guest_page_to_ppgtt_spt(g); 893 ppgtt_get_shadow_page(s); 894 } else { 895 int type = get_next_pt_type(we->type); 896 897 s = ppgtt_alloc_shadow_page(vgpu, type, ops->get_pfn(we)); 898 if (IS_ERR(s)) { 899 ret = PTR_ERR(s); 900 goto fail; 901 } 902 903 ret = intel_gvt_hypervisor_set_wp_page(vgpu, &s->guest_page); 904 if (ret) 905 goto fail; 906 907 ret = ppgtt_populate_shadow_page(s); 908 if (ret) 909 goto fail; 910 911 trace_spt_change(vgpu->id, "new", s, s->guest_page.gfn, 912 s->shadow_page.type); 913 } 914 return s; 915 fail: 916 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", 917 vgpu->id, s, we->val64, we->type); 918 return ERR_PTR(ret); 919 } 920 921 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 922 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 923 { 924 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 925 926 se->type = ge->type; 927 se->val64 = ge->val64; 928 929 ops->set_pfn(se, s->shadow_page.mfn); 930 } 931 932 static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) 933 { 934 struct intel_vgpu *vgpu = spt->vgpu; 935 struct intel_vgpu_ppgtt_spt *s; 936 struct intel_gvt_gtt_entry se, ge; 937 unsigned long i; 938 int ret; 939 940 trace_spt_change(spt->vgpu->id, "born", spt, 941 spt->guest_page.gfn, spt->shadow_page.type); 942 943 if (gtt_type_is_pte_pt(spt->shadow_page.type)) { 944 for_each_present_guest_entry(spt, &ge, i) { 945 ret = gtt_entry_p2m(vgpu, &ge, &se); 946 if (ret) 947 goto fail; 948 ppgtt_set_shadow_entry(spt, &se, i); 949 } 950 return 0; 951 } 952 953 for_each_present_guest_entry(spt, &ge, i) { 954 if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { 955 gvt_err("GVT doesn't support pse bit now\n"); 956 ret = -EINVAL; 957 goto fail; 958 } 959 960 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); 961 if (IS_ERR(s)) { 962 ret = PTR_ERR(s); 963 goto fail; 964 } 965 ppgtt_get_shadow_entry(spt, &se, i); 966 ppgtt_generate_shadow_entry(&se, s, &ge); 967 ppgtt_set_shadow_entry(spt, &se, i); 968 } 969 return 0; 970 fail: 971 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", 972 vgpu->id, spt, ge.val64, ge.type); 973 return ret; 974 } 975 976 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, 977 unsigned long index) 978 { 979 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 980 struct intel_vgpu_shadow_page *sp = &spt->shadow_page; 981 struct intel_vgpu *vgpu = spt->vgpu; 982 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 983 struct intel_gvt_gtt_entry e; 984 int ret; 985 986 ppgtt_get_shadow_entry(spt, &e, index); 987 988 trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, e.val64, 989 index); 990 991 if (!ops->test_present(&e)) 992 return 0; 993 994 if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn) 995 return 0; 996 997 if (gtt_type_is_pt(get_next_pt_type(e.type))) { 998 struct intel_vgpu_ppgtt_spt *s = 999 ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e)); 1000 if (!s) { 1001 gvt_err("fail to find guest page\n"); 1002 ret = -ENXIO; 1003 goto fail; 1004 } 1005 ret = ppgtt_invalidate_shadow_page(s); 1006 if (ret) 1007 goto fail; 1008 } 1009 ops->set_pfn(&e, vgpu->gtt.scratch_pt[sp->type].page_mfn); 1010 ppgtt_set_shadow_entry(spt, &e, index); 1011 return 0; 1012 fail: 1013 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", 1014 vgpu->id, spt, e.val64, e.type); 1015 return ret; 1016 } 1017 1018 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, 1019 struct intel_gvt_gtt_entry *we, unsigned long index) 1020 { 1021 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1022 struct intel_vgpu_shadow_page *sp = &spt->shadow_page; 1023 struct intel_vgpu *vgpu = spt->vgpu; 1024 struct intel_gvt_gtt_entry m; 1025 struct intel_vgpu_ppgtt_spt *s; 1026 int ret; 1027 1028 trace_gpt_change(spt->vgpu->id, "add", spt, sp->type, 1029 we->val64, index); 1030 1031 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1032 s = ppgtt_populate_shadow_page_by_guest_entry(vgpu, we); 1033 if (IS_ERR(s)) { 1034 ret = PTR_ERR(s); 1035 goto fail; 1036 } 1037 ppgtt_get_shadow_entry(spt, &m, index); 1038 ppgtt_generate_shadow_entry(&m, s, we); 1039 ppgtt_set_shadow_entry(spt, &m, index); 1040 } else { 1041 ret = gtt_entry_p2m(vgpu, we, &m); 1042 if (ret) 1043 goto fail; 1044 ppgtt_set_shadow_entry(spt, &m, index); 1045 } 1046 return 0; 1047 fail: 1048 gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id, 1049 spt, we->val64, we->type); 1050 return ret; 1051 } 1052 1053 static int sync_oos_page(struct intel_vgpu *vgpu, 1054 struct intel_vgpu_oos_page *oos_page) 1055 { 1056 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1057 struct intel_gvt *gvt = vgpu->gvt; 1058 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1059 struct intel_vgpu_ppgtt_spt *spt = 1060 guest_page_to_ppgtt_spt(oos_page->guest_page); 1061 struct intel_gvt_gtt_entry old, new, m; 1062 int index; 1063 int ret; 1064 1065 trace_oos_change(vgpu->id, "sync", oos_page->id, 1066 oos_page->guest_page, spt->guest_page_type); 1067 1068 old.type = new.type = get_entry_type(spt->guest_page_type); 1069 old.val64 = new.val64 = 0; 1070 1071 for (index = 0; index < (GTT_PAGE_SIZE >> info->gtt_entry_size_shift); 1072 index++) { 1073 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1074 ops->get_entry(NULL, &new, index, true, 1075 oos_page->guest_page->gfn << PAGE_SHIFT, vgpu); 1076 1077 if (old.val64 == new.val64 1078 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1079 continue; 1080 1081 trace_oos_sync(vgpu->id, oos_page->id, 1082 oos_page->guest_page, spt->guest_page_type, 1083 new.val64, index); 1084 1085 ret = gtt_entry_p2m(vgpu, &new, &m); 1086 if (ret) 1087 return ret; 1088 1089 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1090 ppgtt_set_shadow_entry(spt, &m, index); 1091 } 1092 1093 oos_page->guest_page->write_cnt = 0; 1094 list_del_init(&spt->post_shadow_list); 1095 return 0; 1096 } 1097 1098 static int detach_oos_page(struct intel_vgpu *vgpu, 1099 struct intel_vgpu_oos_page *oos_page) 1100 { 1101 struct intel_gvt *gvt = vgpu->gvt; 1102 struct intel_vgpu_ppgtt_spt *spt = 1103 guest_page_to_ppgtt_spt(oos_page->guest_page); 1104 1105 trace_oos_change(vgpu->id, "detach", oos_page->id, 1106 oos_page->guest_page, spt->guest_page_type); 1107 1108 oos_page->guest_page->write_cnt = 0; 1109 oos_page->guest_page->oos_page = NULL; 1110 oos_page->guest_page = NULL; 1111 1112 list_del_init(&oos_page->vm_list); 1113 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1114 1115 return 0; 1116 } 1117 1118 static int attach_oos_page(struct intel_vgpu *vgpu, 1119 struct intel_vgpu_oos_page *oos_page, 1120 struct intel_vgpu_guest_page *gpt) 1121 { 1122 struct intel_gvt *gvt = vgpu->gvt; 1123 int ret; 1124 1125 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpt->gfn << GTT_PAGE_SHIFT, 1126 oos_page->mem, GTT_PAGE_SIZE); 1127 if (ret) 1128 return ret; 1129 1130 oos_page->guest_page = gpt; 1131 gpt->oos_page = oos_page; 1132 1133 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1134 1135 trace_oos_change(vgpu->id, "attach", gpt->oos_page->id, 1136 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); 1137 return 0; 1138 } 1139 1140 static int ppgtt_set_guest_page_sync(struct intel_vgpu *vgpu, 1141 struct intel_vgpu_guest_page *gpt) 1142 { 1143 int ret; 1144 1145 ret = intel_gvt_hypervisor_set_wp_page(vgpu, gpt); 1146 if (ret) 1147 return ret; 1148 1149 trace_oos_change(vgpu->id, "set page sync", gpt->oos_page->id, 1150 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); 1151 1152 list_del_init(&gpt->oos_page->vm_list); 1153 return sync_oos_page(vgpu, gpt->oos_page); 1154 } 1155 1156 static int ppgtt_allocate_oos_page(struct intel_vgpu *vgpu, 1157 struct intel_vgpu_guest_page *gpt) 1158 { 1159 struct intel_gvt *gvt = vgpu->gvt; 1160 struct intel_gvt_gtt *gtt = &gvt->gtt; 1161 struct intel_vgpu_oos_page *oos_page = gpt->oos_page; 1162 int ret; 1163 1164 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1165 1166 if (list_empty(>t->oos_page_free_list_head)) { 1167 oos_page = container_of(gtt->oos_page_use_list_head.next, 1168 struct intel_vgpu_oos_page, list); 1169 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); 1170 if (ret) 1171 return ret; 1172 ret = detach_oos_page(vgpu, oos_page); 1173 if (ret) 1174 return ret; 1175 } else 1176 oos_page = container_of(gtt->oos_page_free_list_head.next, 1177 struct intel_vgpu_oos_page, list); 1178 return attach_oos_page(vgpu, oos_page, gpt); 1179 } 1180 1181 static int ppgtt_set_guest_page_oos(struct intel_vgpu *vgpu, 1182 struct intel_vgpu_guest_page *gpt) 1183 { 1184 struct intel_vgpu_oos_page *oos_page = gpt->oos_page; 1185 1186 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1187 return -EINVAL; 1188 1189 trace_oos_change(vgpu->id, "set page out of sync", gpt->oos_page->id, 1190 gpt, guest_page_to_ppgtt_spt(gpt)->guest_page_type); 1191 1192 list_add_tail(&oos_page->vm_list, &vgpu->gtt.oos_page_list_head); 1193 return intel_gvt_hypervisor_unset_wp_page(vgpu, gpt); 1194 } 1195 1196 /** 1197 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1198 * @vgpu: a vGPU 1199 * 1200 * This function is called before submitting a guest workload to host, 1201 * to sync all the out-of-synced shadow for vGPU 1202 * 1203 * Returns: 1204 * Zero on success, negative error code if failed. 1205 */ 1206 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1207 { 1208 struct list_head *pos, *n; 1209 struct intel_vgpu_oos_page *oos_page; 1210 int ret; 1211 1212 if (!enable_out_of_sync) 1213 return 0; 1214 1215 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1216 oos_page = container_of(pos, 1217 struct intel_vgpu_oos_page, vm_list); 1218 ret = ppgtt_set_guest_page_sync(vgpu, oos_page->guest_page); 1219 if (ret) 1220 return ret; 1221 } 1222 return 0; 1223 } 1224 1225 /* 1226 * The heart of PPGTT shadow page table. 1227 */ 1228 static int ppgtt_handle_guest_write_page_table( 1229 struct intel_vgpu_guest_page *gpt, 1230 struct intel_gvt_gtt_entry *we, unsigned long index) 1231 { 1232 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1233 struct intel_vgpu *vgpu = spt->vgpu; 1234 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1235 1236 int ret; 1237 int new_present; 1238 1239 new_present = ops->test_present(we); 1240 1241 ret = ppgtt_handle_guest_entry_removal(gpt, index); 1242 if (ret) 1243 goto fail; 1244 1245 if (new_present) { 1246 ret = ppgtt_handle_guest_entry_add(gpt, we, index); 1247 if (ret) 1248 goto fail; 1249 } 1250 return 0; 1251 fail: 1252 gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n", 1253 vgpu->id, spt, we->val64, we->type); 1254 return ret; 1255 } 1256 1257 static inline bool can_do_out_of_sync(struct intel_vgpu_guest_page *gpt) 1258 { 1259 return enable_out_of_sync 1260 && gtt_type_is_pte_pt( 1261 guest_page_to_ppgtt_spt(gpt)->guest_page_type) 1262 && gpt->write_cnt >= 2; 1263 } 1264 1265 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1266 unsigned long index) 1267 { 1268 set_bit(index, spt->post_shadow_bitmap); 1269 if (!list_empty(&spt->post_shadow_list)) 1270 return; 1271 1272 list_add_tail(&spt->post_shadow_list, 1273 &spt->vgpu->gtt.post_shadow_list_head); 1274 } 1275 1276 /** 1277 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1278 * @vgpu: a vGPU 1279 * 1280 * This function is called before submitting a guest workload to host, 1281 * to flush all the post shadows for a vGPU. 1282 * 1283 * Returns: 1284 * Zero on success, negative error code if failed. 1285 */ 1286 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1287 { 1288 struct list_head *pos, *n; 1289 struct intel_vgpu_ppgtt_spt *spt; 1290 struct intel_gvt_gtt_entry ge; 1291 unsigned long index; 1292 int ret; 1293 1294 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1295 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1296 post_shadow_list); 1297 1298 for_each_set_bit(index, spt->post_shadow_bitmap, 1299 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1300 ppgtt_get_guest_entry(spt, &ge, index); 1301 1302 ret = ppgtt_handle_guest_write_page_table( 1303 &spt->guest_page, &ge, index); 1304 if (ret) 1305 return ret; 1306 clear_bit(index, spt->post_shadow_bitmap); 1307 } 1308 list_del_init(&spt->post_shadow_list); 1309 } 1310 return 0; 1311 } 1312 1313 static int ppgtt_handle_guest_write_page_table_bytes(void *gp, 1314 u64 pa, void *p_data, int bytes) 1315 { 1316 struct intel_vgpu_guest_page *gpt = (struct intel_vgpu_guest_page *)gp; 1317 struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); 1318 struct intel_vgpu *vgpu = spt->vgpu; 1319 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1320 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1321 struct intel_gvt_gtt_entry we; 1322 unsigned long index; 1323 int ret; 1324 1325 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1326 1327 ppgtt_get_guest_entry(spt, &we, index); 1328 1329 ops->test_pse(&we); 1330 1331 if (bytes == info->gtt_entry_size) { 1332 ret = ppgtt_handle_guest_write_page_table(gpt, &we, index); 1333 if (ret) 1334 return ret; 1335 } else { 1336 if (!test_bit(index, spt->post_shadow_bitmap)) { 1337 ret = ppgtt_handle_guest_entry_removal(gpt, index); 1338 if (ret) 1339 return ret; 1340 } 1341 1342 ppgtt_set_post_shadow(spt, index); 1343 } 1344 1345 if (!enable_out_of_sync) 1346 return 0; 1347 1348 gpt->write_cnt++; 1349 1350 if (gpt->oos_page) 1351 ops->set_entry(gpt->oos_page->mem, &we, index, 1352 false, 0, vgpu); 1353 1354 if (can_do_out_of_sync(gpt)) { 1355 if (!gpt->oos_page) 1356 ppgtt_allocate_oos_page(vgpu, gpt); 1357 1358 ret = ppgtt_set_guest_page_oos(vgpu, gpt); 1359 if (ret < 0) 1360 return ret; 1361 } 1362 return 0; 1363 } 1364 1365 /* 1366 * mm page table allocation policy for bdw+ 1367 * - for ggtt, only virtual page table will be allocated. 1368 * - for ppgtt, dedicated virtual/shadow page table will be allocated. 1369 */ 1370 static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm) 1371 { 1372 struct intel_vgpu *vgpu = mm->vgpu; 1373 struct intel_gvt *gvt = vgpu->gvt; 1374 const struct intel_gvt_device_info *info = &gvt->device_info; 1375 void *mem; 1376 1377 if (mm->type == INTEL_GVT_MM_PPGTT) { 1378 mm->page_table_entry_cnt = 4; 1379 mm->page_table_entry_size = mm->page_table_entry_cnt * 1380 info->gtt_entry_size; 1381 mem = kzalloc(mm->has_shadow_page_table ? 1382 mm->page_table_entry_size * 2 1383 : mm->page_table_entry_size, 1384 GFP_ATOMIC); 1385 if (!mem) 1386 return -ENOMEM; 1387 mm->virtual_page_table = mem; 1388 if (!mm->has_shadow_page_table) 1389 return 0; 1390 mm->shadow_page_table = mem + mm->page_table_entry_size; 1391 } else if (mm->type == INTEL_GVT_MM_GGTT) { 1392 mm->page_table_entry_cnt = 1393 (gvt_ggtt_gm_sz(gvt) >> GTT_PAGE_SHIFT); 1394 mm->page_table_entry_size = mm->page_table_entry_cnt * 1395 info->gtt_entry_size; 1396 mem = vzalloc(mm->page_table_entry_size); 1397 if (!mem) 1398 return -ENOMEM; 1399 mm->virtual_page_table = mem; 1400 } 1401 return 0; 1402 } 1403 1404 static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm) 1405 { 1406 if (mm->type == INTEL_GVT_MM_PPGTT) { 1407 kfree(mm->virtual_page_table); 1408 } else if (mm->type == INTEL_GVT_MM_GGTT) { 1409 if (mm->virtual_page_table) 1410 vfree(mm->virtual_page_table); 1411 } 1412 mm->virtual_page_table = mm->shadow_page_table = NULL; 1413 } 1414 1415 static void invalidate_mm(struct intel_vgpu_mm *mm) 1416 { 1417 struct intel_vgpu *vgpu = mm->vgpu; 1418 struct intel_gvt *gvt = vgpu->gvt; 1419 struct intel_gvt_gtt *gtt = &gvt->gtt; 1420 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1421 struct intel_gvt_gtt_entry se; 1422 int i; 1423 1424 if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed)) 1425 return; 1426 1427 for (i = 0; i < mm->page_table_entry_cnt; i++) { 1428 ppgtt_get_shadow_root_entry(mm, &se, i); 1429 if (!ops->test_present(&se)) 1430 continue; 1431 ppgtt_invalidate_shadow_page_by_shadow_entry( 1432 vgpu, &se); 1433 se.val64 = 0; 1434 ppgtt_set_shadow_root_entry(mm, &se, i); 1435 1436 trace_gpt_change(vgpu->id, "destroy root pointer", 1437 NULL, se.type, se.val64, i); 1438 } 1439 mm->shadowed = false; 1440 } 1441 1442 /** 1443 * intel_vgpu_destroy_mm - destroy a mm object 1444 * @mm: a kref object 1445 * 1446 * This function is used to destroy a mm object for vGPU 1447 * 1448 */ 1449 void intel_vgpu_destroy_mm(struct kref *mm_ref) 1450 { 1451 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1452 struct intel_vgpu *vgpu = mm->vgpu; 1453 struct intel_gvt *gvt = vgpu->gvt; 1454 struct intel_gvt_gtt *gtt = &gvt->gtt; 1455 1456 if (!mm->initialized) 1457 goto out; 1458 1459 list_del(&mm->list); 1460 list_del(&mm->lru_list); 1461 1462 if (mm->has_shadow_page_table) 1463 invalidate_mm(mm); 1464 1465 gtt->mm_free_page_table(mm); 1466 out: 1467 kfree(mm); 1468 } 1469 1470 static int shadow_mm(struct intel_vgpu_mm *mm) 1471 { 1472 struct intel_vgpu *vgpu = mm->vgpu; 1473 struct intel_gvt *gvt = vgpu->gvt; 1474 struct intel_gvt_gtt *gtt = &gvt->gtt; 1475 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1476 struct intel_vgpu_ppgtt_spt *spt; 1477 struct intel_gvt_gtt_entry ge, se; 1478 int i; 1479 int ret; 1480 1481 if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed)) 1482 return 0; 1483 1484 mm->shadowed = true; 1485 1486 for (i = 0; i < mm->page_table_entry_cnt; i++) { 1487 ppgtt_get_guest_root_entry(mm, &ge, i); 1488 if (!ops->test_present(&ge)) 1489 continue; 1490 1491 trace_gpt_change(vgpu->id, __func__, NULL, 1492 ge.type, ge.val64, i); 1493 1494 spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); 1495 if (IS_ERR(spt)) { 1496 gvt_err("fail to populate guest root pointer\n"); 1497 ret = PTR_ERR(spt); 1498 goto fail; 1499 } 1500 ppgtt_generate_shadow_entry(&se, spt, &ge); 1501 ppgtt_set_shadow_root_entry(mm, &se, i); 1502 1503 trace_gpt_change(vgpu->id, "populate root pointer", 1504 NULL, se.type, se.val64, i); 1505 } 1506 return 0; 1507 fail: 1508 invalidate_mm(mm); 1509 return ret; 1510 } 1511 1512 /** 1513 * intel_vgpu_create_mm - create a mm object for a vGPU 1514 * @vgpu: a vGPU 1515 * @mm_type: mm object type, should be PPGTT or GGTT 1516 * @virtual_page_table: page table root pointers. Could be NULL if user wants 1517 * to populate shadow later. 1518 * @page_table_level: describe the page table level of the mm object 1519 * @pde_base_index: pde root pointer base in GGTT MMIO. 1520 * 1521 * This function is used to create a mm object for a vGPU. 1522 * 1523 * Returns: 1524 * Zero on success, negative error code in pointer if failed. 1525 */ 1526 struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, 1527 int mm_type, void *virtual_page_table, int page_table_level, 1528 u32 pde_base_index) 1529 { 1530 struct intel_gvt *gvt = vgpu->gvt; 1531 struct intel_gvt_gtt *gtt = &gvt->gtt; 1532 struct intel_vgpu_mm *mm; 1533 int ret; 1534 1535 mm = kzalloc(sizeof(*mm), GFP_ATOMIC); 1536 if (!mm) { 1537 ret = -ENOMEM; 1538 goto fail; 1539 } 1540 1541 mm->type = mm_type; 1542 1543 if (page_table_level == 1) 1544 mm->page_table_entry_type = GTT_TYPE_GGTT_PTE; 1545 else if (page_table_level == 3) 1546 mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; 1547 else if (page_table_level == 4) 1548 mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; 1549 else { 1550 WARN_ON(1); 1551 ret = -EINVAL; 1552 goto fail; 1553 } 1554 1555 mm->page_table_level = page_table_level; 1556 mm->pde_base_index = pde_base_index; 1557 1558 mm->vgpu = vgpu; 1559 mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT); 1560 1561 kref_init(&mm->ref); 1562 atomic_set(&mm->pincount, 0); 1563 INIT_LIST_HEAD(&mm->list); 1564 INIT_LIST_HEAD(&mm->lru_list); 1565 list_add_tail(&mm->list, &vgpu->gtt.mm_list_head); 1566 1567 ret = gtt->mm_alloc_page_table(mm); 1568 if (ret) { 1569 gvt_err("fail to allocate page table for mm\n"); 1570 goto fail; 1571 } 1572 1573 mm->initialized = true; 1574 1575 if (virtual_page_table) 1576 memcpy(mm->virtual_page_table, virtual_page_table, 1577 mm->page_table_entry_size); 1578 1579 if (mm->has_shadow_page_table) { 1580 ret = shadow_mm(mm); 1581 if (ret) 1582 goto fail; 1583 list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head); 1584 } 1585 return mm; 1586 fail: 1587 gvt_err("fail to create mm\n"); 1588 if (mm) 1589 intel_gvt_mm_unreference(mm); 1590 return ERR_PTR(ret); 1591 } 1592 1593 /** 1594 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1595 * @mm: a vGPU mm object 1596 * 1597 * This function is called when user doesn't want to use a vGPU mm object 1598 */ 1599 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1600 { 1601 if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) 1602 return; 1603 1604 atomic_dec(&mm->pincount); 1605 } 1606 1607 /** 1608 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1609 * @vgpu: a vGPU 1610 * 1611 * This function is called when user wants to use a vGPU mm object. If this 1612 * mm object hasn't been shadowed yet, the shadow will be populated at this 1613 * time. 1614 * 1615 * Returns: 1616 * Zero on success, negative error code if failed. 1617 */ 1618 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1619 { 1620 int ret; 1621 1622 if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) 1623 return 0; 1624 1625 atomic_inc(&mm->pincount); 1626 1627 if (!mm->shadowed) { 1628 ret = shadow_mm(mm); 1629 if (ret) 1630 return ret; 1631 } 1632 1633 list_del_init(&mm->lru_list); 1634 list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); 1635 return 0; 1636 } 1637 1638 static int reclaim_one_mm(struct intel_gvt *gvt) 1639 { 1640 struct intel_vgpu_mm *mm; 1641 struct list_head *pos, *n; 1642 1643 list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) { 1644 mm = container_of(pos, struct intel_vgpu_mm, lru_list); 1645 1646 if (mm->type != INTEL_GVT_MM_PPGTT) 1647 continue; 1648 if (atomic_read(&mm->pincount)) 1649 continue; 1650 1651 list_del_init(&mm->lru_list); 1652 invalidate_mm(mm); 1653 return 1; 1654 } 1655 return 0; 1656 } 1657 1658 /* 1659 * GMA translation APIs. 1660 */ 1661 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1662 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1663 { 1664 struct intel_vgpu *vgpu = mm->vgpu; 1665 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1666 struct intel_vgpu_ppgtt_spt *s; 1667 1668 if (WARN_ON(!mm->has_shadow_page_table)) 1669 return -EINVAL; 1670 1671 s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); 1672 if (!s) 1673 return -ENXIO; 1674 1675 if (!guest) 1676 ppgtt_get_shadow_entry(s, e, index); 1677 else 1678 ppgtt_get_guest_entry(s, e, index); 1679 return 0; 1680 } 1681 1682 /** 1683 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1684 * @mm: mm object. could be a PPGTT or GGTT mm object 1685 * @gma: graphics memory address in this mm object 1686 * 1687 * This function is used to translate a graphics memory address in specific 1688 * graphics memory space to guest physical address. 1689 * 1690 * Returns: 1691 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 1692 */ 1693 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 1694 { 1695 struct intel_vgpu *vgpu = mm->vgpu; 1696 struct intel_gvt *gvt = vgpu->gvt; 1697 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 1698 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 1699 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 1700 unsigned long gma_index[4]; 1701 struct intel_gvt_gtt_entry e; 1702 int i, index; 1703 int ret; 1704 1705 if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT) 1706 return INTEL_GVT_INVALID_ADDR; 1707 1708 if (mm->type == INTEL_GVT_MM_GGTT) { 1709 if (!vgpu_gmadr_is_valid(vgpu, gma)) 1710 goto err; 1711 1712 ggtt_get_guest_entry(mm, &e, 1713 gma_ops->gma_to_ggtt_pte_index(gma)); 1714 gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) 1715 + (gma & ~GTT_PAGE_MASK); 1716 1717 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 1718 return gpa; 1719 } 1720 1721 switch (mm->page_table_level) { 1722 case 4: 1723 ppgtt_get_shadow_root_entry(mm, &e, 0); 1724 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 1725 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 1726 gma_index[2] = gma_ops->gma_to_pde_index(gma); 1727 gma_index[3] = gma_ops->gma_to_pte_index(gma); 1728 index = 4; 1729 break; 1730 case 3: 1731 ppgtt_get_shadow_root_entry(mm, &e, 1732 gma_ops->gma_to_l3_pdp_index(gma)); 1733 gma_index[0] = gma_ops->gma_to_pde_index(gma); 1734 gma_index[1] = gma_ops->gma_to_pte_index(gma); 1735 index = 2; 1736 break; 1737 case 2: 1738 ppgtt_get_shadow_root_entry(mm, &e, 1739 gma_ops->gma_to_pde_index(gma)); 1740 gma_index[0] = gma_ops->gma_to_pte_index(gma); 1741 index = 1; 1742 break; 1743 default: 1744 WARN_ON(1); 1745 goto err; 1746 } 1747 1748 /* walk into the shadow page table and get gpa from guest entry */ 1749 for (i = 0; i < index; i++) { 1750 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 1751 (i == index - 1)); 1752 if (ret) 1753 goto err; 1754 } 1755 1756 gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) 1757 + (gma & ~GTT_PAGE_MASK); 1758 1759 trace_gma_translate(vgpu->id, "ppgtt", 0, 1760 mm->page_table_level, gma, gpa); 1761 return gpa; 1762 err: 1763 gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma); 1764 return INTEL_GVT_INVALID_ADDR; 1765 } 1766 1767 static int emulate_gtt_mmio_read(struct intel_vgpu *vgpu, 1768 unsigned int off, void *p_data, unsigned int bytes) 1769 { 1770 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 1771 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1772 unsigned long index = off >> info->gtt_entry_size_shift; 1773 struct intel_gvt_gtt_entry e; 1774 1775 if (bytes != 4 && bytes != 8) 1776 return -EINVAL; 1777 1778 ggtt_get_guest_entry(ggtt_mm, &e, index); 1779 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 1780 bytes); 1781 return 0; 1782 } 1783 1784 /** 1785 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read 1786 * @vgpu: a vGPU 1787 * @off: register offset 1788 * @p_data: data will be returned to guest 1789 * @bytes: data length 1790 * 1791 * This function is used to emulate the GTT MMIO register read 1792 * 1793 * Returns: 1794 * Zero on success, error code if failed. 1795 */ 1796 int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 1797 void *p_data, unsigned int bytes) 1798 { 1799 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1800 int ret; 1801 1802 if (bytes != 4 && bytes != 8) 1803 return -EINVAL; 1804 1805 off -= info->gtt_start_offset; 1806 ret = emulate_gtt_mmio_read(vgpu, off, p_data, bytes); 1807 return ret; 1808 } 1809 1810 static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 1811 void *p_data, unsigned int bytes) 1812 { 1813 struct intel_gvt *gvt = vgpu->gvt; 1814 const struct intel_gvt_device_info *info = &gvt->device_info; 1815 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 1816 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1817 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 1818 unsigned long gma; 1819 struct intel_gvt_gtt_entry e, m; 1820 int ret; 1821 1822 if (bytes != 4 && bytes != 8) 1823 return -EINVAL; 1824 1825 gma = g_gtt_index << GTT_PAGE_SHIFT; 1826 1827 /* the VM may configure the whole GM space when ballooning is used */ 1828 if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma), 1829 "vgpu%d: found oob ggtt write, offset %x\n", 1830 vgpu->id, off)) { 1831 return 0; 1832 } 1833 1834 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); 1835 1836 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 1837 bytes); 1838 1839 if (ops->test_present(&e)) { 1840 ret = gtt_entry_p2m(vgpu, &e, &m); 1841 if (ret) { 1842 gvt_err("vgpu%d: fail to translate guest gtt entry\n", 1843 vgpu->id); 1844 return ret; 1845 } 1846 } else { 1847 m = e; 1848 m.val64 = 0; 1849 } 1850 1851 ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); 1852 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 1853 return 0; 1854 } 1855 1856 /* 1857 * intel_vgpu_emulate_gtt_mmio_write - emulate GTT MMIO register write 1858 * @vgpu: a vGPU 1859 * @off: register offset 1860 * @p_data: data from guest write 1861 * @bytes: data length 1862 * 1863 * This function is used to emulate the GTT MMIO register write 1864 * 1865 * Returns: 1866 * Zero on success, error code if failed. 1867 */ 1868 int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 1869 void *p_data, unsigned int bytes) 1870 { 1871 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1872 int ret; 1873 1874 if (bytes != 4 && bytes != 8) 1875 return -EINVAL; 1876 1877 off -= info->gtt_start_offset; 1878 ret = emulate_gtt_mmio_write(vgpu, off, p_data, bytes); 1879 return ret; 1880 } 1881 1882 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 1883 intel_gvt_gtt_type_t type) 1884 { 1885 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 1886 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1887 int page_entry_num = GTT_PAGE_SIZE >> 1888 vgpu->gvt->device_info.gtt_entry_size_shift; 1889 struct page *scratch_pt; 1890 unsigned long mfn; 1891 int i; 1892 void *p; 1893 1894 if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 1895 return -EINVAL; 1896 1897 scratch_pt = alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO); 1898 if (!scratch_pt) { 1899 gvt_err("fail to allocate scratch page\n"); 1900 return -ENOMEM; 1901 } 1902 1903 p = kmap_atomic(scratch_pt); 1904 mfn = intel_gvt_hypervisor_virt_to_mfn(p); 1905 if (mfn == INTEL_GVT_INVALID_ADDR) { 1906 gvt_err("fail to translate vaddr:0x%llx\n", (u64)p); 1907 kunmap_atomic(p); 1908 __free_page(scratch_pt); 1909 return -EFAULT; 1910 } 1911 gtt->scratch_pt[type].page_mfn = mfn; 1912 gtt->scratch_pt[type].page = scratch_pt; 1913 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 1914 vgpu->id, type, mfn); 1915 1916 /* Build the tree by full filled the scratch pt with the entries which 1917 * point to the next level scratch pt or scratch page. The 1918 * scratch_pt[type] indicate the scratch pt/scratch page used by the 1919 * 'type' pt. 1920 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 1921 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scatch_pt it self 1922 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 1923 */ 1924 if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { 1925 struct intel_gvt_gtt_entry se; 1926 1927 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 1928 se.type = get_entry_type(type - 1); 1929 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 1930 1931 /* The entry parameters like present/writeable/cache type 1932 * set to the same as i915's scratch page tree. 1933 */ 1934 se.val64 |= _PAGE_PRESENT | _PAGE_RW; 1935 if (type == GTT_TYPE_PPGTT_PDE_PT) 1936 se.val64 |= PPAT_CACHED_INDEX; 1937 1938 for (i = 0; i < page_entry_num; i++) 1939 ops->set_entry(p, &se, i, false, 0, vgpu); 1940 } 1941 1942 kunmap_atomic(p); 1943 1944 return 0; 1945 } 1946 1947 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 1948 { 1949 int i; 1950 1951 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 1952 if (vgpu->gtt.scratch_pt[i].page != NULL) { 1953 __free_page(vgpu->gtt.scratch_pt[i].page); 1954 vgpu->gtt.scratch_pt[i].page = NULL; 1955 vgpu->gtt.scratch_pt[i].page_mfn = 0; 1956 } 1957 } 1958 1959 return 0; 1960 } 1961 1962 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 1963 { 1964 int i, ret; 1965 1966 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 1967 ret = alloc_scratch_pages(vgpu, i); 1968 if (ret) 1969 goto err; 1970 } 1971 1972 return 0; 1973 1974 err: 1975 release_scratch_page_tree(vgpu); 1976 return ret; 1977 } 1978 1979 /** 1980 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 1981 * @vgpu: a vGPU 1982 * 1983 * This function is used to initialize per-vGPU graphics memory virtualization 1984 * components. 1985 * 1986 * Returns: 1987 * Zero on success, error code if failed. 1988 */ 1989 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 1990 { 1991 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 1992 struct intel_vgpu_mm *ggtt_mm; 1993 1994 hash_init(gtt->guest_page_hash_table); 1995 hash_init(gtt->shadow_page_hash_table); 1996 1997 INIT_LIST_HEAD(>t->mm_list_head); 1998 INIT_LIST_HEAD(>t->oos_page_list_head); 1999 INIT_LIST_HEAD(>t->post_shadow_list_head); 2000 2001 ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT, 2002 NULL, 1, 0); 2003 if (IS_ERR(ggtt_mm)) { 2004 gvt_err("fail to create mm for ggtt.\n"); 2005 return PTR_ERR(ggtt_mm); 2006 } 2007 2008 gtt->ggtt_mm = ggtt_mm; 2009 2010 return create_scratch_page_tree(vgpu); 2011 } 2012 2013 /** 2014 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2015 * @vgpu: a vGPU 2016 * 2017 * This function is used to clean up per-vGPU graphics memory virtualization 2018 * components. 2019 * 2020 * Returns: 2021 * Zero on success, error code if failed. 2022 */ 2023 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2024 { 2025 struct list_head *pos, *n; 2026 struct intel_vgpu_mm *mm; 2027 2028 ppgtt_free_all_shadow_page(vgpu); 2029 release_scratch_page_tree(vgpu); 2030 2031 list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { 2032 mm = container_of(pos, struct intel_vgpu_mm, list); 2033 vgpu->gvt->gtt.mm_free_page_table(mm); 2034 list_del(&mm->list); 2035 list_del(&mm->lru_list); 2036 kfree(mm); 2037 } 2038 } 2039 2040 static void clean_spt_oos(struct intel_gvt *gvt) 2041 { 2042 struct intel_gvt_gtt *gtt = &gvt->gtt; 2043 struct list_head *pos, *n; 2044 struct intel_vgpu_oos_page *oos_page; 2045 2046 WARN(!list_empty(>t->oos_page_use_list_head), 2047 "someone is still using oos page\n"); 2048 2049 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2050 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2051 list_del(&oos_page->list); 2052 kfree(oos_page); 2053 } 2054 } 2055 2056 static int setup_spt_oos(struct intel_gvt *gvt) 2057 { 2058 struct intel_gvt_gtt *gtt = &gvt->gtt; 2059 struct intel_vgpu_oos_page *oos_page; 2060 int i; 2061 int ret; 2062 2063 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2064 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2065 2066 for (i = 0; i < preallocated_oos_pages; i++) { 2067 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2068 if (!oos_page) { 2069 gvt_err("fail to pre-allocate oos page\n"); 2070 ret = -ENOMEM; 2071 goto fail; 2072 } 2073 2074 INIT_LIST_HEAD(&oos_page->list); 2075 INIT_LIST_HEAD(&oos_page->vm_list); 2076 oos_page->id = i; 2077 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2078 } 2079 2080 gvt_dbg_mm("%d oos pages preallocated\n", i); 2081 2082 return 0; 2083 fail: 2084 clean_spt_oos(gvt); 2085 return ret; 2086 } 2087 2088 /** 2089 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2090 * @vgpu: a vGPU 2091 * @page_table_level: PPGTT page table level 2092 * @root_entry: PPGTT page table root pointers 2093 * 2094 * This function is used to find a PPGTT mm object from mm object pool 2095 * 2096 * Returns: 2097 * pointer to mm object on success, NULL if failed. 2098 */ 2099 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2100 int page_table_level, void *root_entry) 2101 { 2102 struct list_head *pos; 2103 struct intel_vgpu_mm *mm; 2104 u64 *src, *dst; 2105 2106 list_for_each(pos, &vgpu->gtt.mm_list_head) { 2107 mm = container_of(pos, struct intel_vgpu_mm, list); 2108 if (mm->type != INTEL_GVT_MM_PPGTT) 2109 continue; 2110 2111 if (mm->page_table_level != page_table_level) 2112 continue; 2113 2114 src = root_entry; 2115 dst = mm->virtual_page_table; 2116 2117 if (page_table_level == 3) { 2118 if (src[0] == dst[0] 2119 && src[1] == dst[1] 2120 && src[2] == dst[2] 2121 && src[3] == dst[3]) 2122 return mm; 2123 } else { 2124 if (src[0] == dst[0]) 2125 return mm; 2126 } 2127 } 2128 return NULL; 2129 } 2130 2131 /** 2132 * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from 2133 * g2v notification 2134 * @vgpu: a vGPU 2135 * @page_table_level: PPGTT page table level 2136 * 2137 * This function is used to create a PPGTT mm object from a guest to GVT-g 2138 * notification. 2139 * 2140 * Returns: 2141 * Zero on success, negative error code if failed. 2142 */ 2143 int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, 2144 int page_table_level) 2145 { 2146 u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); 2147 struct intel_vgpu_mm *mm; 2148 2149 if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) 2150 return -EINVAL; 2151 2152 mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); 2153 if (mm) { 2154 intel_gvt_mm_reference(mm); 2155 } else { 2156 mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT, 2157 pdp, page_table_level, 0); 2158 if (IS_ERR(mm)) { 2159 gvt_err("fail to create mm\n"); 2160 return PTR_ERR(mm); 2161 } 2162 } 2163 return 0; 2164 } 2165 2166 /** 2167 * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from 2168 * g2v notification 2169 * @vgpu: a vGPU 2170 * @page_table_level: PPGTT page table level 2171 * 2172 * This function is used to create a PPGTT mm object from a guest to GVT-g 2173 * notification. 2174 * 2175 * Returns: 2176 * Zero on success, negative error code if failed. 2177 */ 2178 int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, 2179 int page_table_level) 2180 { 2181 u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0])); 2182 struct intel_vgpu_mm *mm; 2183 2184 if (WARN_ON((page_table_level != 4) && (page_table_level != 3))) 2185 return -EINVAL; 2186 2187 mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); 2188 if (!mm) { 2189 gvt_err("fail to find ppgtt instance.\n"); 2190 return -EINVAL; 2191 } 2192 intel_gvt_mm_unreference(mm); 2193 return 0; 2194 } 2195 2196 /** 2197 * intel_gvt_init_gtt - initialize mm components of a GVT device 2198 * @gvt: GVT device 2199 * 2200 * This function is called at the initialization stage, to initialize 2201 * the mm components of a GVT device. 2202 * 2203 * Returns: 2204 * zero on success, negative error code if failed. 2205 */ 2206 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2207 { 2208 int ret; 2209 2210 gvt_dbg_core("init gtt\n"); 2211 2212 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { 2213 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2214 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2215 gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table; 2216 gvt->gtt.mm_free_page_table = gen8_mm_free_page_table; 2217 } else { 2218 return -ENODEV; 2219 } 2220 2221 if (enable_out_of_sync) { 2222 ret = setup_spt_oos(gvt); 2223 if (ret) { 2224 gvt_err("fail to initialize SPT oos\n"); 2225 return ret; 2226 } 2227 } 2228 INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head); 2229 return 0; 2230 } 2231 2232 /** 2233 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2234 * @gvt: GVT device 2235 * 2236 * This function is called at the driver unloading stage, to clean up the 2237 * the mm components of a GVT device. 2238 * 2239 */ 2240 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2241 { 2242 if (enable_out_of_sync) 2243 clean_spt_oos(gvt); 2244 } 2245