1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include "i915_drv.h" 37 #include "gvt.h" 38 #include "i915_pvinfo.h" 39 #include "trace.h" 40 41 #if defined(VERBOSE_DEBUG) 42 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) 43 #else 44 #define gvt_vdbg_mm(fmt, args...) 45 #endif 46 47 static bool enable_out_of_sync = false; 48 static int preallocated_oos_pages = 8192; 49 50 /* 51 * validate a gm address and related range size, 52 * translate it to host gm address 53 */ 54 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 55 { 56 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size 57 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) { 58 gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n", 59 addr, size); 60 return false; 61 } 62 return true; 63 } 64 65 /* translate a guest gmadr to host gmadr */ 66 int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr) 67 { 68 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr), 69 "invalid guest gmadr %llx\n", g_addr)) 70 return -EACCES; 71 72 if (vgpu_gmadr_is_aperture(vgpu, g_addr)) 73 *h_addr = vgpu_aperture_gmadr_base(vgpu) 74 + (g_addr - vgpu_aperture_offset(vgpu)); 75 else 76 *h_addr = vgpu_hidden_gmadr_base(vgpu) 77 + (g_addr - vgpu_hidden_offset(vgpu)); 78 return 0; 79 } 80 81 /* translate a host gmadr to guest gmadr */ 82 int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr) 83 { 84 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr), 85 "invalid host gmadr %llx\n", h_addr)) 86 return -EACCES; 87 88 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr)) 89 *g_addr = vgpu_aperture_gmadr_base(vgpu) 90 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt)); 91 else 92 *g_addr = vgpu_hidden_gmadr_base(vgpu) 93 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt)); 94 return 0; 95 } 96 97 int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, 98 unsigned long *h_index) 99 { 100 u64 h_addr; 101 int ret; 102 103 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT, 104 &h_addr); 105 if (ret) 106 return ret; 107 108 *h_index = h_addr >> I915_GTT_PAGE_SHIFT; 109 return 0; 110 } 111 112 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, 113 unsigned long *g_index) 114 { 115 u64 g_addr; 116 int ret; 117 118 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT, 119 &g_addr); 120 if (ret) 121 return ret; 122 123 *g_index = g_addr >> I915_GTT_PAGE_SHIFT; 124 return 0; 125 } 126 127 #define gtt_type_is_entry(type) \ 128 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 129 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 130 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 131 132 #define gtt_type_is_pt(type) \ 133 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 134 135 #define gtt_type_is_pte_pt(type) \ 136 (type == GTT_TYPE_PPGTT_PTE_PT) 137 138 #define gtt_type_is_root_pointer(type) \ 139 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 140 141 #define gtt_init_entry(e, t, p, v) do { \ 142 (e)->type = t; \ 143 (e)->pdev = p; \ 144 memcpy(&(e)->val64, &v, sizeof(v)); \ 145 } while (0) 146 147 /* 148 * Mappings between GTT_TYPE* enumerations. 149 * Following information can be found according to the given type: 150 * - type of next level page table 151 * - type of entry inside this level page table 152 * - type of entry with PSE set 153 * 154 * If the given type doesn't have such a kind of information, 155 * e.g. give a l4 root entry type, then request to get its PSE type, 156 * give a PTE page table type, then request to get its next level page 157 * table type, as we know l4 root entry doesn't have a PSE bit, 158 * and a PTE page table doesn't have a next level page table type, 159 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 160 * page table. 161 */ 162 163 struct gtt_type_table_entry { 164 int entry_type; 165 int pt_type; 166 int next_pt_type; 167 int pse_entry_type; 168 }; 169 170 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ 171 [type] = { \ 172 .entry_type = e_type, \ 173 .pt_type = cpt_type, \ 174 .next_pt_type = npt_type, \ 175 .pse_entry_type = pse_type, \ 176 } 177 178 static struct gtt_type_table_entry gtt_type_table[] = { 179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 180 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 181 GTT_TYPE_INVALID, 182 GTT_TYPE_PPGTT_PML4_PT, 183 GTT_TYPE_INVALID), 184 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 185 GTT_TYPE_PPGTT_PML4_ENTRY, 186 GTT_TYPE_PPGTT_PML4_PT, 187 GTT_TYPE_PPGTT_PDP_PT, 188 GTT_TYPE_INVALID), 189 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 190 GTT_TYPE_PPGTT_PML4_ENTRY, 191 GTT_TYPE_PPGTT_PML4_PT, 192 GTT_TYPE_PPGTT_PDP_PT, 193 GTT_TYPE_INVALID), 194 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 195 GTT_TYPE_PPGTT_PDP_ENTRY, 196 GTT_TYPE_PPGTT_PDP_PT, 197 GTT_TYPE_PPGTT_PDE_PT, 198 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 199 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 200 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 201 GTT_TYPE_INVALID, 202 GTT_TYPE_PPGTT_PDE_PT, 203 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 204 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 205 GTT_TYPE_PPGTT_PDP_ENTRY, 206 GTT_TYPE_PPGTT_PDP_PT, 207 GTT_TYPE_PPGTT_PDE_PT, 208 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 209 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 210 GTT_TYPE_PPGTT_PDE_ENTRY, 211 GTT_TYPE_PPGTT_PDE_PT, 212 GTT_TYPE_PPGTT_PTE_PT, 213 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 214 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 215 GTT_TYPE_PPGTT_PDE_ENTRY, 216 GTT_TYPE_PPGTT_PDE_PT, 217 GTT_TYPE_PPGTT_PTE_PT, 218 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 219 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 220 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 221 GTT_TYPE_PPGTT_PTE_PT, 222 GTT_TYPE_INVALID, 223 GTT_TYPE_INVALID), 224 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 225 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 226 GTT_TYPE_PPGTT_PTE_PT, 227 GTT_TYPE_INVALID, 228 GTT_TYPE_INVALID), 229 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 230 GTT_TYPE_PPGTT_PDE_ENTRY, 231 GTT_TYPE_PPGTT_PDE_PT, 232 GTT_TYPE_INVALID, 233 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 234 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 235 GTT_TYPE_PPGTT_PDP_ENTRY, 236 GTT_TYPE_PPGTT_PDP_PT, 237 GTT_TYPE_INVALID, 238 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 239 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 240 GTT_TYPE_GGTT_PTE, 241 GTT_TYPE_INVALID, 242 GTT_TYPE_INVALID, 243 GTT_TYPE_INVALID), 244 }; 245 246 static inline int get_next_pt_type(int type) 247 { 248 return gtt_type_table[type].next_pt_type; 249 } 250 251 static inline int get_pt_type(int type) 252 { 253 return gtt_type_table[type].pt_type; 254 } 255 256 static inline int get_entry_type(int type) 257 { 258 return gtt_type_table[type].entry_type; 259 } 260 261 static inline int get_pse_type(int type) 262 { 263 return gtt_type_table[type].pse_entry_type; 264 } 265 266 static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) 267 { 268 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 269 270 return readq(addr); 271 } 272 273 static void ggtt_invalidate(struct drm_i915_private *dev_priv) 274 { 275 mmio_hw_access_pre(dev_priv); 276 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 277 mmio_hw_access_post(dev_priv); 278 } 279 280 static void write_pte64(struct drm_i915_private *dev_priv, 281 unsigned long index, u64 pte) 282 { 283 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 284 285 writeq(pte, addr); 286 } 287 288 static inline int gtt_get_entry64(void *pt, 289 struct intel_gvt_gtt_entry *e, 290 unsigned long index, bool hypervisor_access, unsigned long gpa, 291 struct intel_vgpu *vgpu) 292 { 293 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 294 int ret; 295 296 if (WARN_ON(info->gtt_entry_size != 8)) 297 return -EINVAL; 298 299 if (hypervisor_access) { 300 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa + 301 (index << info->gtt_entry_size_shift), 302 &e->val64, 8); 303 if (WARN_ON(ret)) 304 return ret; 305 } else if (!pt) { 306 e->val64 = read_pte64(vgpu->gvt->dev_priv, index); 307 } else { 308 e->val64 = *((u64 *)pt + index); 309 } 310 return 0; 311 } 312 313 static inline int gtt_set_entry64(void *pt, 314 struct intel_gvt_gtt_entry *e, 315 unsigned long index, bool hypervisor_access, unsigned long gpa, 316 struct intel_vgpu *vgpu) 317 { 318 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 319 int ret; 320 321 if (WARN_ON(info->gtt_entry_size != 8)) 322 return -EINVAL; 323 324 if (hypervisor_access) { 325 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa + 326 (index << info->gtt_entry_size_shift), 327 &e->val64, 8); 328 if (WARN_ON(ret)) 329 return ret; 330 } else if (!pt) { 331 write_pte64(vgpu->gvt->dev_priv, index, e->val64); 332 } else { 333 *((u64 *)pt + index) = e->val64; 334 } 335 return 0; 336 } 337 338 #define GTT_HAW 46 339 340 #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) 341 #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) 342 #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) 343 344 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 345 { 346 unsigned long pfn; 347 348 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 349 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; 350 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 351 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; 352 else 353 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; 354 return pfn; 355 } 356 357 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 358 { 359 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 360 e->val64 &= ~ADDR_1G_MASK; 361 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); 362 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 363 e->val64 &= ~ADDR_2M_MASK; 364 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); 365 } else { 366 e->val64 &= ~ADDR_4K_MASK; 367 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); 368 } 369 370 e->val64 |= (pfn << PAGE_SHIFT); 371 } 372 373 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 374 { 375 /* Entry doesn't have PSE bit. */ 376 if (get_pse_type(e->type) == GTT_TYPE_INVALID) 377 return false; 378 379 e->type = get_entry_type(e->type); 380 if (!(e->val64 & _PAGE_PSE)) 381 return false; 382 383 e->type = get_pse_type(e->type); 384 return true; 385 } 386 387 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 388 { 389 /* 390 * i915 writes PDP root pointer registers without present bit, 391 * it also works, so we need to treat root pointer entry 392 * specifically. 393 */ 394 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 395 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 396 return (e->val64 != 0); 397 else 398 return (e->val64 & _PAGE_PRESENT); 399 } 400 401 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 402 { 403 e->val64 &= ~_PAGE_PRESENT; 404 } 405 406 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) 407 { 408 e->val64 |= _PAGE_PRESENT; 409 } 410 411 /* 412 * Per-platform GMA routines. 413 */ 414 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 415 { 416 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); 417 418 trace_gma_index(__func__, gma, x); 419 return x; 420 } 421 422 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 423 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 424 { \ 425 unsigned long x = (exp); \ 426 trace_gma_index(__func__, gma, x); \ 427 return x; \ 428 } 429 430 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 431 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 432 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 433 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 434 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 435 436 static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 437 .get_entry = gtt_get_entry64, 438 .set_entry = gtt_set_entry64, 439 .clear_present = gtt_entry_clear_present, 440 .set_present = gtt_entry_set_present, 441 .test_present = gen8_gtt_test_present, 442 .test_pse = gen8_gtt_test_pse, 443 .get_pfn = gen8_gtt_get_pfn, 444 .set_pfn = gen8_gtt_set_pfn, 445 }; 446 447 static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 448 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 449 .gma_to_pte_index = gen8_gma_to_pte_index, 450 .gma_to_pde_index = gen8_gma_to_pde_index, 451 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 452 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 453 .gma_to_pml4_index = gen8_gma_to_pml4_index, 454 }; 455 456 /* 457 * MM helpers. 458 */ 459 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, 460 struct intel_gvt_gtt_entry *entry, unsigned long index, 461 bool guest) 462 { 463 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 464 465 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); 466 467 entry->type = mm->ppgtt_mm.root_entry_type; 468 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : 469 mm->ppgtt_mm.shadow_pdps, 470 entry, index, false, 0, mm->vgpu); 471 472 pte_ops->test_pse(entry); 473 } 474 475 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, 476 struct intel_gvt_gtt_entry *entry, unsigned long index) 477 { 478 _ppgtt_get_root_entry(mm, entry, index, true); 479 } 480 481 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, 482 struct intel_gvt_gtt_entry *entry, unsigned long index) 483 { 484 _ppgtt_get_root_entry(mm, entry, index, false); 485 } 486 487 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, 488 struct intel_gvt_gtt_entry *entry, unsigned long index, 489 bool guest) 490 { 491 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 492 493 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : 494 mm->ppgtt_mm.shadow_pdps, 495 entry, index, false, 0, mm->vgpu); 496 } 497 498 static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm, 499 struct intel_gvt_gtt_entry *entry, unsigned long index) 500 { 501 _ppgtt_set_root_entry(mm, entry, index, true); 502 } 503 504 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, 505 struct intel_gvt_gtt_entry *entry, unsigned long index) 506 { 507 _ppgtt_set_root_entry(mm, entry, index, false); 508 } 509 510 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, 511 struct intel_gvt_gtt_entry *entry, unsigned long index) 512 { 513 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 514 515 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 516 517 entry->type = GTT_TYPE_GGTT_PTE; 518 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 519 false, 0, mm->vgpu); 520 } 521 522 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, 523 struct intel_gvt_gtt_entry *entry, unsigned long index) 524 { 525 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 526 527 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 528 529 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 530 false, 0, mm->vgpu); 531 } 532 533 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, 534 struct intel_gvt_gtt_entry *entry, unsigned long index) 535 { 536 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 537 538 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 539 540 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); 541 } 542 543 /* 544 * PPGTT shadow page table helpers. 545 */ 546 static inline int ppgtt_spt_get_entry( 547 struct intel_vgpu_ppgtt_spt *spt, 548 void *page_table, int type, 549 struct intel_gvt_gtt_entry *e, unsigned long index, 550 bool guest) 551 { 552 struct intel_gvt *gvt = spt->vgpu->gvt; 553 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 554 int ret; 555 556 e->type = get_entry_type(type); 557 558 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 559 return -EINVAL; 560 561 ret = ops->get_entry(page_table, e, index, guest, 562 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 563 spt->vgpu); 564 if (ret) 565 return ret; 566 567 ops->test_pse(e); 568 569 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 570 type, e->type, index, e->val64); 571 return 0; 572 } 573 574 static inline int ppgtt_spt_set_entry( 575 struct intel_vgpu_ppgtt_spt *spt, 576 void *page_table, int type, 577 struct intel_gvt_gtt_entry *e, unsigned long index, 578 bool guest) 579 { 580 struct intel_gvt *gvt = spt->vgpu->gvt; 581 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 582 583 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 584 return -EINVAL; 585 586 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 587 type, e->type, index, e->val64); 588 589 return ops->set_entry(page_table, e, index, guest, 590 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 591 spt->vgpu); 592 } 593 594 #define ppgtt_get_guest_entry(spt, e, index) \ 595 ppgtt_spt_get_entry(spt, NULL, \ 596 spt->guest_page.type, e, index, true) 597 598 #define ppgtt_set_guest_entry(spt, e, index) \ 599 ppgtt_spt_set_entry(spt, NULL, \ 600 spt->guest_page.type, e, index, true) 601 602 #define ppgtt_get_shadow_entry(spt, e, index) \ 603 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 604 spt->shadow_page.type, e, index, false) 605 606 #define ppgtt_set_shadow_entry(spt, e, index) \ 607 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 608 spt->shadow_page.type, e, index, false) 609 610 static void *alloc_spt(gfp_t gfp_mask) 611 { 612 struct intel_vgpu_ppgtt_spt *spt; 613 614 spt = kzalloc(sizeof(*spt), gfp_mask); 615 if (!spt) 616 return NULL; 617 618 spt->shadow_page.page = alloc_page(gfp_mask); 619 if (!spt->shadow_page.page) { 620 kfree(spt); 621 return NULL; 622 } 623 return spt; 624 } 625 626 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 627 { 628 __free_page(spt->shadow_page.page); 629 kfree(spt); 630 } 631 632 static int detach_oos_page(struct intel_vgpu *vgpu, 633 struct intel_vgpu_oos_page *oos_page); 634 635 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) 636 { 637 struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev; 638 639 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); 640 641 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 642 PCI_DMA_BIDIRECTIONAL); 643 644 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); 645 646 if (spt->guest_page.oos_page) 647 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 648 649 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); 650 651 list_del_init(&spt->post_shadow_list); 652 free_spt(spt); 653 } 654 655 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 656 { 657 struct intel_vgpu_ppgtt_spt *spt; 658 struct radix_tree_iter iter; 659 void **slot; 660 661 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { 662 spt = radix_tree_deref_slot(slot); 663 ppgtt_free_spt(spt); 664 } 665 } 666 667 static int ppgtt_handle_guest_write_page_table_bytes( 668 struct intel_vgpu_ppgtt_spt *spt, 669 u64 pa, void *p_data, int bytes); 670 671 static int ppgtt_write_protection_handler( 672 struct intel_vgpu_page_track *page_track, 673 u64 gpa, void *data, int bytes) 674 { 675 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; 676 677 int ret; 678 679 if (bytes != 4 && bytes != 8) 680 return -EINVAL; 681 682 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); 683 if (ret) 684 return ret; 685 return ret; 686 } 687 688 /* Find a spt by guest gfn. */ 689 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( 690 struct intel_vgpu *vgpu, unsigned long gfn) 691 { 692 struct intel_vgpu_page_track *track; 693 694 track = intel_vgpu_find_page_track(vgpu, gfn); 695 if (track && track->handler == ppgtt_write_protection_handler) 696 return track->priv_data; 697 698 return NULL; 699 } 700 701 /* Find the spt by shadow page mfn. */ 702 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 703 struct intel_vgpu *vgpu, unsigned long mfn) 704 { 705 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); 706 } 707 708 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 709 710 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( 711 struct intel_vgpu *vgpu, int type, unsigned long gfn) 712 { 713 struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; 714 struct intel_vgpu_ppgtt_spt *spt = NULL; 715 dma_addr_t daddr; 716 int ret; 717 718 retry: 719 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 720 if (!spt) { 721 if (reclaim_one_ppgtt_mm(vgpu->gvt)) 722 goto retry; 723 724 gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); 725 return ERR_PTR(-ENOMEM); 726 } 727 728 spt->vgpu = vgpu; 729 atomic_set(&spt->refcount, 1); 730 INIT_LIST_HEAD(&spt->post_shadow_list); 731 732 /* 733 * Init shadow_page. 734 */ 735 spt->shadow_page.type = type; 736 daddr = dma_map_page(kdev, spt->shadow_page.page, 737 0, 4096, PCI_DMA_BIDIRECTIONAL); 738 if (dma_mapping_error(kdev, daddr)) { 739 gvt_vgpu_err("fail to map dma addr\n"); 740 ret = -EINVAL; 741 goto err_free_spt; 742 } 743 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 744 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 745 746 /* 747 * Init guest_page. 748 */ 749 spt->guest_page.type = type; 750 spt->guest_page.gfn = gfn; 751 752 ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn, 753 ppgtt_write_protection_handler, spt); 754 if (ret) 755 goto err_unmap_dma; 756 757 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); 758 if (ret) 759 goto err_unreg_page_track; 760 761 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 762 return spt; 763 764 err_unreg_page_track: 765 intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn); 766 err_unmap_dma: 767 dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 768 err_free_spt: 769 free_spt(spt); 770 return ERR_PTR(ret); 771 } 772 773 #define pt_entry_size_shift(spt) \ 774 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 775 776 #define pt_entries(spt) \ 777 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 778 779 #define for_each_present_guest_entry(spt, e, i) \ 780 for (i = 0; i < pt_entries(spt); i++) \ 781 if (!ppgtt_get_guest_entry(spt, e, i) && \ 782 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 783 784 #define for_each_present_shadow_entry(spt, e, i) \ 785 for (i = 0; i < pt_entries(spt); i++) \ 786 if (!ppgtt_get_shadow_entry(spt, e, i) && \ 787 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 788 789 static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) 790 { 791 int v = atomic_read(&spt->refcount); 792 793 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 794 795 atomic_inc(&spt->refcount); 796 } 797 798 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); 799 800 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, 801 struct intel_gvt_gtt_entry *e) 802 { 803 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 804 struct intel_vgpu_ppgtt_spt *s; 805 intel_gvt_gtt_type_t cur_pt_type; 806 807 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); 808 809 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 810 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 811 cur_pt_type = get_next_pt_type(e->type) + 1; 812 if (ops->get_pfn(e) == 813 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 814 return 0; 815 } 816 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 817 if (!s) { 818 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", 819 ops->get_pfn(e)); 820 return -ENXIO; 821 } 822 return ppgtt_invalidate_spt(s); 823 } 824 825 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, 826 struct intel_gvt_gtt_entry *entry) 827 { 828 struct intel_vgpu *vgpu = spt->vgpu; 829 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 830 unsigned long pfn; 831 int type; 832 833 pfn = ops->get_pfn(entry); 834 type = spt->shadow_page.type; 835 836 if (pfn == vgpu->gtt.scratch_pt[type].page_mfn) 837 return; 838 839 intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 840 } 841 842 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) 843 { 844 struct intel_vgpu *vgpu = spt->vgpu; 845 struct intel_gvt_gtt_entry e; 846 unsigned long index; 847 int ret; 848 int v = atomic_read(&spt->refcount); 849 850 trace_spt_change(spt->vgpu->id, "die", spt, 851 spt->guest_page.gfn, spt->shadow_page.type); 852 853 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 854 855 if (atomic_dec_return(&spt->refcount) > 0) 856 return 0; 857 858 for_each_present_shadow_entry(spt, &e, index) { 859 switch (e.type) { 860 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 861 gvt_vdbg_mm("invalidate 4K entry\n"); 862 ppgtt_invalidate_pte(spt, &e); 863 break; 864 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 865 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 866 WARN(1, "GVT doesn't support 2M/1GB page\n"); 867 continue; 868 case GTT_TYPE_PPGTT_PML4_ENTRY: 869 case GTT_TYPE_PPGTT_PDP_ENTRY: 870 case GTT_TYPE_PPGTT_PDE_ENTRY: 871 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); 872 ret = ppgtt_invalidate_spt_by_shadow_entry( 873 spt->vgpu, &e); 874 if (ret) 875 goto fail; 876 break; 877 default: 878 GEM_BUG_ON(1); 879 } 880 } 881 882 trace_spt_change(spt->vgpu->id, "release", spt, 883 spt->guest_page.gfn, spt->shadow_page.type); 884 ppgtt_free_spt(spt); 885 return 0; 886 fail: 887 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", 888 spt, e.val64, e.type); 889 return ret; 890 } 891 892 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); 893 894 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( 895 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 896 { 897 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 898 struct intel_vgpu_ppgtt_spt *spt = NULL; 899 int ret; 900 901 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); 902 903 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); 904 if (spt) 905 ppgtt_get_spt(spt); 906 else { 907 int type = get_next_pt_type(we->type); 908 909 spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we)); 910 if (IS_ERR(spt)) { 911 ret = PTR_ERR(spt); 912 goto fail; 913 } 914 915 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); 916 if (ret) 917 goto fail; 918 919 ret = ppgtt_populate_spt(spt); 920 if (ret) 921 goto fail; 922 923 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, 924 spt->shadow_page.type); 925 } 926 return spt; 927 fail: 928 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 929 spt, we->val64, we->type); 930 return ERR_PTR(ret); 931 } 932 933 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 934 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 935 { 936 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 937 938 se->type = ge->type; 939 se->val64 = ge->val64; 940 941 ops->set_pfn(se, s->shadow_page.mfn); 942 } 943 944 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, 945 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 946 struct intel_gvt_gtt_entry *ge) 947 { 948 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 949 struct intel_gvt_gtt_entry se = *ge; 950 unsigned long gfn; 951 dma_addr_t dma_addr; 952 int ret; 953 954 if (!pte_ops->test_present(ge)) 955 return 0; 956 957 gfn = pte_ops->get_pfn(ge); 958 959 switch (ge->type) { 960 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 961 gvt_vdbg_mm("shadow 4K gtt entry\n"); 962 break; 963 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 964 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 965 gvt_vgpu_err("GVT doesn't support 2M/1GB entry\n"); 966 return -EINVAL; 967 default: 968 GEM_BUG_ON(1); 969 }; 970 971 /* direct shadow */ 972 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); 973 if (ret) 974 return -ENXIO; 975 976 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); 977 ppgtt_set_shadow_entry(spt, &se, index); 978 return 0; 979 } 980 981 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) 982 { 983 struct intel_vgpu *vgpu = spt->vgpu; 984 struct intel_gvt *gvt = vgpu->gvt; 985 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 986 struct intel_vgpu_ppgtt_spt *s; 987 struct intel_gvt_gtt_entry se, ge; 988 unsigned long gfn, i; 989 int ret; 990 991 trace_spt_change(spt->vgpu->id, "born", spt, 992 spt->guest_page.gfn, spt->shadow_page.type); 993 994 for_each_present_guest_entry(spt, &ge, i) { 995 if (gtt_type_is_pt(get_next_pt_type(ge.type))) { 996 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 997 if (IS_ERR(s)) { 998 ret = PTR_ERR(s); 999 goto fail; 1000 } 1001 ppgtt_get_shadow_entry(spt, &se, i); 1002 ppgtt_generate_shadow_entry(&se, s, &ge); 1003 ppgtt_set_shadow_entry(spt, &se, i); 1004 } else { 1005 gfn = ops->get_pfn(&ge); 1006 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { 1007 ops->set_pfn(&se, gvt->gtt.scratch_mfn); 1008 ppgtt_set_shadow_entry(spt, &se, i); 1009 continue; 1010 } 1011 1012 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); 1013 if (ret) 1014 goto fail; 1015 } 1016 } 1017 return 0; 1018 fail: 1019 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1020 spt, ge.val64, ge.type); 1021 return ret; 1022 } 1023 1024 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, 1025 struct intel_gvt_gtt_entry *se, unsigned long index) 1026 { 1027 struct intel_vgpu *vgpu = spt->vgpu; 1028 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1029 int ret; 1030 1031 trace_spt_guest_change(spt->vgpu->id, "remove", spt, 1032 spt->shadow_page.type, se->val64, index); 1033 1034 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", 1035 se->type, index, se->val64); 1036 1037 if (!ops->test_present(se)) 1038 return 0; 1039 1040 if (ops->get_pfn(se) == 1041 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) 1042 return 0; 1043 1044 if (gtt_type_is_pt(get_next_pt_type(se->type))) { 1045 struct intel_vgpu_ppgtt_spt *s = 1046 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); 1047 if (!s) { 1048 gvt_vgpu_err("fail to find guest page\n"); 1049 ret = -ENXIO; 1050 goto fail; 1051 } 1052 ret = ppgtt_invalidate_spt(s); 1053 if (ret) 1054 goto fail; 1055 } else 1056 ppgtt_invalidate_pte(spt, se); 1057 1058 return 0; 1059 fail: 1060 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1061 spt, se->val64, se->type); 1062 return ret; 1063 } 1064 1065 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, 1066 struct intel_gvt_gtt_entry *we, unsigned long index) 1067 { 1068 struct intel_vgpu *vgpu = spt->vgpu; 1069 struct intel_gvt_gtt_entry m; 1070 struct intel_vgpu_ppgtt_spt *s; 1071 int ret; 1072 1073 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, 1074 we->val64, index); 1075 1076 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", 1077 we->type, index, we->val64); 1078 1079 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1080 s = ppgtt_populate_spt_by_guest_entry(vgpu, we); 1081 if (IS_ERR(s)) { 1082 ret = PTR_ERR(s); 1083 goto fail; 1084 } 1085 ppgtt_get_shadow_entry(spt, &m, index); 1086 ppgtt_generate_shadow_entry(&m, s, we); 1087 ppgtt_set_shadow_entry(spt, &m, index); 1088 } else { 1089 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); 1090 if (ret) 1091 goto fail; 1092 } 1093 return 0; 1094 fail: 1095 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", 1096 spt, we->val64, we->type); 1097 return ret; 1098 } 1099 1100 static int sync_oos_page(struct intel_vgpu *vgpu, 1101 struct intel_vgpu_oos_page *oos_page) 1102 { 1103 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1104 struct intel_gvt *gvt = vgpu->gvt; 1105 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1106 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1107 struct intel_gvt_gtt_entry old, new; 1108 int index; 1109 int ret; 1110 1111 trace_oos_change(vgpu->id, "sync", oos_page->id, 1112 spt, spt->guest_page.type); 1113 1114 old.type = new.type = get_entry_type(spt->guest_page.type); 1115 old.val64 = new.val64 = 0; 1116 1117 for (index = 0; index < (I915_GTT_PAGE_SIZE >> 1118 info->gtt_entry_size_shift); index++) { 1119 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1120 ops->get_entry(NULL, &new, index, true, 1121 spt->guest_page.gfn << PAGE_SHIFT, vgpu); 1122 1123 if (old.val64 == new.val64 1124 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1125 continue; 1126 1127 trace_oos_sync(vgpu->id, oos_page->id, 1128 spt, spt->guest_page.type, 1129 new.val64, index); 1130 1131 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); 1132 if (ret) 1133 return ret; 1134 1135 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1136 } 1137 1138 spt->guest_page.write_cnt = 0; 1139 list_del_init(&spt->post_shadow_list); 1140 return 0; 1141 } 1142 1143 static int detach_oos_page(struct intel_vgpu *vgpu, 1144 struct intel_vgpu_oos_page *oos_page) 1145 { 1146 struct intel_gvt *gvt = vgpu->gvt; 1147 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1148 1149 trace_oos_change(vgpu->id, "detach", oos_page->id, 1150 spt, spt->guest_page.type); 1151 1152 spt->guest_page.write_cnt = 0; 1153 spt->guest_page.oos_page = NULL; 1154 oos_page->spt = NULL; 1155 1156 list_del_init(&oos_page->vm_list); 1157 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1158 1159 return 0; 1160 } 1161 1162 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, 1163 struct intel_vgpu_ppgtt_spt *spt) 1164 { 1165 struct intel_gvt *gvt = spt->vgpu->gvt; 1166 int ret; 1167 1168 ret = intel_gvt_hypervisor_read_gpa(spt->vgpu, 1169 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 1170 oos_page->mem, I915_GTT_PAGE_SIZE); 1171 if (ret) 1172 return ret; 1173 1174 oos_page->spt = spt; 1175 spt->guest_page.oos_page = oos_page; 1176 1177 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1178 1179 trace_oos_change(spt->vgpu->id, "attach", oos_page->id, 1180 spt, spt->guest_page.type); 1181 return 0; 1182 } 1183 1184 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) 1185 { 1186 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1187 int ret; 1188 1189 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); 1190 if (ret) 1191 return ret; 1192 1193 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, 1194 spt, spt->guest_page.type); 1195 1196 list_del_init(&oos_page->vm_list); 1197 return sync_oos_page(spt->vgpu, oos_page); 1198 } 1199 1200 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) 1201 { 1202 struct intel_gvt *gvt = spt->vgpu->gvt; 1203 struct intel_gvt_gtt *gtt = &gvt->gtt; 1204 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1205 int ret; 1206 1207 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1208 1209 if (list_empty(>t->oos_page_free_list_head)) { 1210 oos_page = container_of(gtt->oos_page_use_list_head.next, 1211 struct intel_vgpu_oos_page, list); 1212 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1213 if (ret) 1214 return ret; 1215 ret = detach_oos_page(spt->vgpu, oos_page); 1216 if (ret) 1217 return ret; 1218 } else 1219 oos_page = container_of(gtt->oos_page_free_list_head.next, 1220 struct intel_vgpu_oos_page, list); 1221 return attach_oos_page(oos_page, spt); 1222 } 1223 1224 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) 1225 { 1226 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1227 1228 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1229 return -EINVAL; 1230 1231 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, 1232 spt, spt->guest_page.type); 1233 1234 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); 1235 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); 1236 } 1237 1238 /** 1239 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1240 * @vgpu: a vGPU 1241 * 1242 * This function is called before submitting a guest workload to host, 1243 * to sync all the out-of-synced shadow for vGPU 1244 * 1245 * Returns: 1246 * Zero on success, negative error code if failed. 1247 */ 1248 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1249 { 1250 struct list_head *pos, *n; 1251 struct intel_vgpu_oos_page *oos_page; 1252 int ret; 1253 1254 if (!enable_out_of_sync) 1255 return 0; 1256 1257 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1258 oos_page = container_of(pos, 1259 struct intel_vgpu_oos_page, vm_list); 1260 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1261 if (ret) 1262 return ret; 1263 } 1264 return 0; 1265 } 1266 1267 /* 1268 * The heart of PPGTT shadow page table. 1269 */ 1270 static int ppgtt_handle_guest_write_page_table( 1271 struct intel_vgpu_ppgtt_spt *spt, 1272 struct intel_gvt_gtt_entry *we, unsigned long index) 1273 { 1274 struct intel_vgpu *vgpu = spt->vgpu; 1275 int type = spt->shadow_page.type; 1276 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1277 struct intel_gvt_gtt_entry old_se; 1278 int new_present; 1279 int ret; 1280 1281 new_present = ops->test_present(we); 1282 1283 /* 1284 * Adding the new entry first and then removing the old one, that can 1285 * guarantee the ppgtt table is validated during the window between 1286 * adding and removal. 1287 */ 1288 ppgtt_get_shadow_entry(spt, &old_se, index); 1289 1290 if (new_present) { 1291 ret = ppgtt_handle_guest_entry_add(spt, we, index); 1292 if (ret) 1293 goto fail; 1294 } 1295 1296 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); 1297 if (ret) 1298 goto fail; 1299 1300 if (!new_present) { 1301 ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn); 1302 ppgtt_set_shadow_entry(spt, &old_se, index); 1303 } 1304 1305 return 0; 1306 fail: 1307 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", 1308 spt, we->val64, we->type); 1309 return ret; 1310 } 1311 1312 1313 1314 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) 1315 { 1316 return enable_out_of_sync 1317 && gtt_type_is_pte_pt(spt->guest_page.type) 1318 && spt->guest_page.write_cnt >= 2; 1319 } 1320 1321 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1322 unsigned long index) 1323 { 1324 set_bit(index, spt->post_shadow_bitmap); 1325 if (!list_empty(&spt->post_shadow_list)) 1326 return; 1327 1328 list_add_tail(&spt->post_shadow_list, 1329 &spt->vgpu->gtt.post_shadow_list_head); 1330 } 1331 1332 /** 1333 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1334 * @vgpu: a vGPU 1335 * 1336 * This function is called before submitting a guest workload to host, 1337 * to flush all the post shadows for a vGPU. 1338 * 1339 * Returns: 1340 * Zero on success, negative error code if failed. 1341 */ 1342 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1343 { 1344 struct list_head *pos, *n; 1345 struct intel_vgpu_ppgtt_spt *spt; 1346 struct intel_gvt_gtt_entry ge; 1347 unsigned long index; 1348 int ret; 1349 1350 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1351 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1352 post_shadow_list); 1353 1354 for_each_set_bit(index, spt->post_shadow_bitmap, 1355 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1356 ppgtt_get_guest_entry(spt, &ge, index); 1357 1358 ret = ppgtt_handle_guest_write_page_table(spt, 1359 &ge, index); 1360 if (ret) 1361 return ret; 1362 clear_bit(index, spt->post_shadow_bitmap); 1363 } 1364 list_del_init(&spt->post_shadow_list); 1365 } 1366 return 0; 1367 } 1368 1369 static int ppgtt_handle_guest_write_page_table_bytes( 1370 struct intel_vgpu_ppgtt_spt *spt, 1371 u64 pa, void *p_data, int bytes) 1372 { 1373 struct intel_vgpu *vgpu = spt->vgpu; 1374 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1375 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1376 struct intel_gvt_gtt_entry we, se; 1377 unsigned long index; 1378 int ret; 1379 1380 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1381 1382 ppgtt_get_guest_entry(spt, &we, index); 1383 1384 ops->test_pse(&we); 1385 1386 if (bytes == info->gtt_entry_size) { 1387 ret = ppgtt_handle_guest_write_page_table(spt, &we, index); 1388 if (ret) 1389 return ret; 1390 } else { 1391 if (!test_bit(index, spt->post_shadow_bitmap)) { 1392 int type = spt->shadow_page.type; 1393 1394 ppgtt_get_shadow_entry(spt, &se, index); 1395 ret = ppgtt_handle_guest_entry_removal(spt, &se, index); 1396 if (ret) 1397 return ret; 1398 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); 1399 ppgtt_set_shadow_entry(spt, &se, index); 1400 } 1401 ppgtt_set_post_shadow(spt, index); 1402 } 1403 1404 if (!enable_out_of_sync) 1405 return 0; 1406 1407 spt->guest_page.write_cnt++; 1408 1409 if (spt->guest_page.oos_page) 1410 ops->set_entry(spt->guest_page.oos_page->mem, &we, index, 1411 false, 0, vgpu); 1412 1413 if (can_do_out_of_sync(spt)) { 1414 if (!spt->guest_page.oos_page) 1415 ppgtt_allocate_oos_page(spt); 1416 1417 ret = ppgtt_set_guest_page_oos(spt); 1418 if (ret < 0) 1419 return ret; 1420 } 1421 return 0; 1422 } 1423 1424 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) 1425 { 1426 struct intel_vgpu *vgpu = mm->vgpu; 1427 struct intel_gvt *gvt = vgpu->gvt; 1428 struct intel_gvt_gtt *gtt = &gvt->gtt; 1429 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1430 struct intel_gvt_gtt_entry se; 1431 int index; 1432 1433 if (!mm->ppgtt_mm.shadowed) 1434 return; 1435 1436 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { 1437 ppgtt_get_shadow_root_entry(mm, &se, index); 1438 1439 if (!ops->test_present(&se)) 1440 continue; 1441 1442 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); 1443 se.val64 = 0; 1444 ppgtt_set_shadow_root_entry(mm, &se, index); 1445 1446 trace_spt_guest_change(vgpu->id, "destroy root pointer", 1447 NULL, se.type, se.val64, index); 1448 } 1449 1450 mm->ppgtt_mm.shadowed = false; 1451 } 1452 1453 1454 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) 1455 { 1456 struct intel_vgpu *vgpu = mm->vgpu; 1457 struct intel_gvt *gvt = vgpu->gvt; 1458 struct intel_gvt_gtt *gtt = &gvt->gtt; 1459 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1460 struct intel_vgpu_ppgtt_spt *spt; 1461 struct intel_gvt_gtt_entry ge, se; 1462 int index, ret; 1463 1464 if (mm->ppgtt_mm.shadowed) 1465 return 0; 1466 1467 mm->ppgtt_mm.shadowed = true; 1468 1469 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { 1470 ppgtt_get_guest_root_entry(mm, &ge, index); 1471 1472 if (!ops->test_present(&ge)) 1473 continue; 1474 1475 trace_spt_guest_change(vgpu->id, __func__, NULL, 1476 ge.type, ge.val64, index); 1477 1478 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1479 if (IS_ERR(spt)) { 1480 gvt_vgpu_err("fail to populate guest root pointer\n"); 1481 ret = PTR_ERR(spt); 1482 goto fail; 1483 } 1484 ppgtt_generate_shadow_entry(&se, spt, &ge); 1485 ppgtt_set_shadow_root_entry(mm, &se, index); 1486 1487 trace_spt_guest_change(vgpu->id, "populate root pointer", 1488 NULL, se.type, se.val64, index); 1489 } 1490 1491 return 0; 1492 fail: 1493 invalidate_ppgtt_mm(mm); 1494 return ret; 1495 } 1496 1497 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) 1498 { 1499 struct intel_vgpu_mm *mm; 1500 1501 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 1502 if (!mm) 1503 return NULL; 1504 1505 mm->vgpu = vgpu; 1506 kref_init(&mm->ref); 1507 atomic_set(&mm->pincount, 0); 1508 1509 return mm; 1510 } 1511 1512 static void vgpu_free_mm(struct intel_vgpu_mm *mm) 1513 { 1514 kfree(mm); 1515 } 1516 1517 /** 1518 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU 1519 * @vgpu: a vGPU 1520 * @root_entry_type: ppgtt root entry type 1521 * @pdps: guest pdps. 1522 * 1523 * This function is used to create a ppgtt mm object for a vGPU. 1524 * 1525 * Returns: 1526 * Zero on success, negative error code in pointer if failed. 1527 */ 1528 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, 1529 intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) 1530 { 1531 struct intel_gvt *gvt = vgpu->gvt; 1532 struct intel_vgpu_mm *mm; 1533 int ret; 1534 1535 mm = vgpu_alloc_mm(vgpu); 1536 if (!mm) 1537 return ERR_PTR(-ENOMEM); 1538 1539 mm->type = INTEL_GVT_MM_PPGTT; 1540 1541 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && 1542 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); 1543 mm->ppgtt_mm.root_entry_type = root_entry_type; 1544 1545 INIT_LIST_HEAD(&mm->ppgtt_mm.list); 1546 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); 1547 1548 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 1549 mm->ppgtt_mm.guest_pdps[0] = pdps[0]; 1550 else 1551 memcpy(mm->ppgtt_mm.guest_pdps, pdps, 1552 sizeof(mm->ppgtt_mm.guest_pdps)); 1553 1554 ret = shadow_ppgtt_mm(mm); 1555 if (ret) { 1556 gvt_vgpu_err("failed to shadow ppgtt mm\n"); 1557 vgpu_free_mm(mm); 1558 return ERR_PTR(ret); 1559 } 1560 1561 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); 1562 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); 1563 return mm; 1564 } 1565 1566 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) 1567 { 1568 struct intel_vgpu_mm *mm; 1569 unsigned long nr_entries; 1570 1571 mm = vgpu_alloc_mm(vgpu); 1572 if (!mm) 1573 return ERR_PTR(-ENOMEM); 1574 1575 mm->type = INTEL_GVT_MM_GGTT; 1576 1577 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; 1578 mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries * 1579 vgpu->gvt->device_info.gtt_entry_size); 1580 if (!mm->ggtt_mm.virtual_ggtt) { 1581 vgpu_free_mm(mm); 1582 return ERR_PTR(-ENOMEM); 1583 } 1584 1585 return mm; 1586 } 1587 1588 /** 1589 * _intel_vgpu_mm_release - destroy a mm object 1590 * @mm_ref: a kref object 1591 * 1592 * This function is used to destroy a mm object for vGPU 1593 * 1594 */ 1595 void _intel_vgpu_mm_release(struct kref *mm_ref) 1596 { 1597 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1598 1599 if (GEM_WARN_ON(atomic_read(&mm->pincount))) 1600 gvt_err("vgpu mm pin count bug detected\n"); 1601 1602 if (mm->type == INTEL_GVT_MM_PPGTT) { 1603 list_del(&mm->ppgtt_mm.list); 1604 list_del(&mm->ppgtt_mm.lru_list); 1605 invalidate_ppgtt_mm(mm); 1606 } else { 1607 vfree(mm->ggtt_mm.virtual_ggtt); 1608 } 1609 1610 vgpu_free_mm(mm); 1611 } 1612 1613 /** 1614 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1615 * @mm: a vGPU mm object 1616 * 1617 * This function is called when user doesn't want to use a vGPU mm object 1618 */ 1619 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1620 { 1621 atomic_dec(&mm->pincount); 1622 } 1623 1624 /** 1625 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1626 * @vgpu: a vGPU 1627 * 1628 * This function is called when user wants to use a vGPU mm object. If this 1629 * mm object hasn't been shadowed yet, the shadow will be populated at this 1630 * time. 1631 * 1632 * Returns: 1633 * Zero on success, negative error code if failed. 1634 */ 1635 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1636 { 1637 int ret; 1638 1639 atomic_inc(&mm->pincount); 1640 1641 if (mm->type == INTEL_GVT_MM_PPGTT) { 1642 ret = shadow_ppgtt_mm(mm); 1643 if (ret) 1644 return ret; 1645 1646 list_move_tail(&mm->ppgtt_mm.lru_list, 1647 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); 1648 1649 } 1650 1651 return 0; 1652 } 1653 1654 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) 1655 { 1656 struct intel_vgpu_mm *mm; 1657 struct list_head *pos, *n; 1658 1659 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { 1660 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); 1661 1662 if (atomic_read(&mm->pincount)) 1663 continue; 1664 1665 list_del_init(&mm->ppgtt_mm.lru_list); 1666 invalidate_ppgtt_mm(mm); 1667 return 1; 1668 } 1669 return 0; 1670 } 1671 1672 /* 1673 * GMA translation APIs. 1674 */ 1675 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 1676 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 1677 { 1678 struct intel_vgpu *vgpu = mm->vgpu; 1679 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1680 struct intel_vgpu_ppgtt_spt *s; 1681 1682 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 1683 if (!s) 1684 return -ENXIO; 1685 1686 if (!guest) 1687 ppgtt_get_shadow_entry(s, e, index); 1688 else 1689 ppgtt_get_guest_entry(s, e, index); 1690 return 0; 1691 } 1692 1693 /** 1694 * intel_vgpu_gma_to_gpa - translate a gma to GPA 1695 * @mm: mm object. could be a PPGTT or GGTT mm object 1696 * @gma: graphics memory address in this mm object 1697 * 1698 * This function is used to translate a graphics memory address in specific 1699 * graphics memory space to guest physical address. 1700 * 1701 * Returns: 1702 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 1703 */ 1704 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 1705 { 1706 struct intel_vgpu *vgpu = mm->vgpu; 1707 struct intel_gvt *gvt = vgpu->gvt; 1708 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 1709 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 1710 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 1711 unsigned long gma_index[4]; 1712 struct intel_gvt_gtt_entry e; 1713 int i, levels = 0; 1714 int ret; 1715 1716 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && 1717 mm->type != INTEL_GVT_MM_PPGTT); 1718 1719 if (mm->type == INTEL_GVT_MM_GGTT) { 1720 if (!vgpu_gmadr_is_valid(vgpu, gma)) 1721 goto err; 1722 1723 ggtt_get_guest_entry(mm, &e, 1724 gma_ops->gma_to_ggtt_pte_index(gma)); 1725 1726 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) 1727 + (gma & ~I915_GTT_PAGE_MASK); 1728 1729 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 1730 } else { 1731 switch (mm->ppgtt_mm.root_entry_type) { 1732 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 1733 ppgtt_get_shadow_root_entry(mm, &e, 0); 1734 1735 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 1736 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 1737 gma_index[2] = gma_ops->gma_to_pde_index(gma); 1738 gma_index[3] = gma_ops->gma_to_pte_index(gma); 1739 levels = 4; 1740 break; 1741 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 1742 ppgtt_get_shadow_root_entry(mm, &e, 1743 gma_ops->gma_to_l3_pdp_index(gma)); 1744 1745 gma_index[0] = gma_ops->gma_to_pde_index(gma); 1746 gma_index[1] = gma_ops->gma_to_pte_index(gma); 1747 levels = 2; 1748 break; 1749 default: 1750 GEM_BUG_ON(1); 1751 } 1752 1753 /* walk the shadow page table and get gpa from guest entry */ 1754 for (i = 0; i < levels; i++) { 1755 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 1756 (i == levels - 1)); 1757 if (ret) 1758 goto err; 1759 1760 if (!pte_ops->test_present(&e)) { 1761 gvt_dbg_core("GMA 0x%lx is not present\n", gma); 1762 goto err; 1763 } 1764 } 1765 1766 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + 1767 (gma & ~I915_GTT_PAGE_MASK); 1768 trace_gma_translate(vgpu->id, "ppgtt", 0, 1769 mm->ppgtt_mm.root_entry_type, gma, gpa); 1770 } 1771 1772 return gpa; 1773 err: 1774 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); 1775 return INTEL_GVT_INVALID_ADDR; 1776 } 1777 1778 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, 1779 unsigned int off, void *p_data, unsigned int bytes) 1780 { 1781 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 1782 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1783 unsigned long index = off >> info->gtt_entry_size_shift; 1784 struct intel_gvt_gtt_entry e; 1785 1786 if (bytes != 4 && bytes != 8) 1787 return -EINVAL; 1788 1789 ggtt_get_guest_entry(ggtt_mm, &e, index); 1790 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 1791 bytes); 1792 return 0; 1793 } 1794 1795 /** 1796 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read 1797 * @vgpu: a vGPU 1798 * @off: register offset 1799 * @p_data: data will be returned to guest 1800 * @bytes: data length 1801 * 1802 * This function is used to emulate the GTT MMIO register read 1803 * 1804 * Returns: 1805 * Zero on success, error code if failed. 1806 */ 1807 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 1808 void *p_data, unsigned int bytes) 1809 { 1810 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1811 int ret; 1812 1813 if (bytes != 4 && bytes != 8) 1814 return -EINVAL; 1815 1816 off -= info->gtt_start_offset; 1817 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); 1818 return ret; 1819 } 1820 1821 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 1822 void *p_data, unsigned int bytes) 1823 { 1824 struct intel_gvt *gvt = vgpu->gvt; 1825 const struct intel_gvt_device_info *info = &gvt->device_info; 1826 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 1827 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1828 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 1829 unsigned long gma, gfn; 1830 struct intel_gvt_gtt_entry e, m; 1831 dma_addr_t dma_addr; 1832 int ret; 1833 1834 if (bytes != 4 && bytes != 8) 1835 return -EINVAL; 1836 1837 gma = g_gtt_index << I915_GTT_PAGE_SHIFT; 1838 1839 /* the VM may configure the whole GM space when ballooning is used */ 1840 if (!vgpu_gmadr_is_valid(vgpu, gma)) 1841 return 0; 1842 1843 ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); 1844 1845 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 1846 bytes); 1847 m = e; 1848 1849 if (ops->test_present(&e)) { 1850 gfn = ops->get_pfn(&e); 1851 1852 /* one PTE update may be issued in multiple writes and the 1853 * first write may not construct a valid gfn 1854 */ 1855 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { 1856 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 1857 goto out; 1858 } 1859 1860 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, 1861 &dma_addr); 1862 if (ret) { 1863 gvt_vgpu_err("fail to populate guest ggtt entry\n"); 1864 /* guest driver may read/write the entry when partial 1865 * update the entry in this situation p2m will fail 1866 * settting the shadow entry to point to a scratch page 1867 */ 1868 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 1869 } else 1870 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); 1871 } else 1872 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 1873 1874 out: 1875 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); 1876 ggtt_invalidate(gvt->dev_priv); 1877 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 1878 return 0; 1879 } 1880 1881 /* 1882 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write 1883 * @vgpu: a vGPU 1884 * @off: register offset 1885 * @p_data: data from guest write 1886 * @bytes: data length 1887 * 1888 * This function is used to emulate the GTT MMIO register write 1889 * 1890 * Returns: 1891 * Zero on success, error code if failed. 1892 */ 1893 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, 1894 unsigned int off, void *p_data, unsigned int bytes) 1895 { 1896 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1897 int ret; 1898 1899 if (bytes != 4 && bytes != 8) 1900 return -EINVAL; 1901 1902 off -= info->gtt_start_offset; 1903 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); 1904 return ret; 1905 } 1906 1907 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 1908 intel_gvt_gtt_type_t type) 1909 { 1910 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 1911 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1912 int page_entry_num = I915_GTT_PAGE_SIZE >> 1913 vgpu->gvt->device_info.gtt_entry_size_shift; 1914 void *scratch_pt; 1915 int i; 1916 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; 1917 dma_addr_t daddr; 1918 1919 if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 1920 return -EINVAL; 1921 1922 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); 1923 if (!scratch_pt) { 1924 gvt_vgpu_err("fail to allocate scratch page\n"); 1925 return -ENOMEM; 1926 } 1927 1928 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 1929 4096, PCI_DMA_BIDIRECTIONAL); 1930 if (dma_mapping_error(dev, daddr)) { 1931 gvt_vgpu_err("fail to dmamap scratch_pt\n"); 1932 __free_page(virt_to_page(scratch_pt)); 1933 return -ENOMEM; 1934 } 1935 gtt->scratch_pt[type].page_mfn = 1936 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 1937 gtt->scratch_pt[type].page = virt_to_page(scratch_pt); 1938 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 1939 vgpu->id, type, gtt->scratch_pt[type].page_mfn); 1940 1941 /* Build the tree by full filled the scratch pt with the entries which 1942 * point to the next level scratch pt or scratch page. The 1943 * scratch_pt[type] indicate the scratch pt/scratch page used by the 1944 * 'type' pt. 1945 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 1946 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self 1947 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 1948 */ 1949 if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { 1950 struct intel_gvt_gtt_entry se; 1951 1952 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 1953 se.type = get_entry_type(type - 1); 1954 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 1955 1956 /* The entry parameters like present/writeable/cache type 1957 * set to the same as i915's scratch page tree. 1958 */ 1959 se.val64 |= _PAGE_PRESENT | _PAGE_RW; 1960 if (type == GTT_TYPE_PPGTT_PDE_PT) 1961 se.val64 |= PPAT_CACHED; 1962 1963 for (i = 0; i < page_entry_num; i++) 1964 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); 1965 } 1966 1967 return 0; 1968 } 1969 1970 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 1971 { 1972 int i; 1973 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; 1974 dma_addr_t daddr; 1975 1976 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 1977 if (vgpu->gtt.scratch_pt[i].page != NULL) { 1978 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << 1979 I915_GTT_PAGE_SHIFT); 1980 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 1981 __free_page(vgpu->gtt.scratch_pt[i].page); 1982 vgpu->gtt.scratch_pt[i].page = NULL; 1983 vgpu->gtt.scratch_pt[i].page_mfn = 0; 1984 } 1985 } 1986 1987 return 0; 1988 } 1989 1990 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 1991 { 1992 int i, ret; 1993 1994 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 1995 ret = alloc_scratch_pages(vgpu, i); 1996 if (ret) 1997 goto err; 1998 } 1999 2000 return 0; 2001 2002 err: 2003 release_scratch_page_tree(vgpu); 2004 return ret; 2005 } 2006 2007 /** 2008 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 2009 * @vgpu: a vGPU 2010 * 2011 * This function is used to initialize per-vGPU graphics memory virtualization 2012 * components. 2013 * 2014 * Returns: 2015 * Zero on success, error code if failed. 2016 */ 2017 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 2018 { 2019 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2020 2021 INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); 2022 2023 INIT_LIST_HEAD(>t->ppgtt_mm_list_head); 2024 INIT_LIST_HEAD(>t->oos_page_list_head); 2025 INIT_LIST_HEAD(>t->post_shadow_list_head); 2026 2027 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); 2028 if (IS_ERR(gtt->ggtt_mm)) { 2029 gvt_vgpu_err("fail to create mm for ggtt.\n"); 2030 return PTR_ERR(gtt->ggtt_mm); 2031 } 2032 2033 intel_vgpu_reset_ggtt(vgpu); 2034 2035 return create_scratch_page_tree(vgpu); 2036 } 2037 2038 static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) 2039 { 2040 struct list_head *pos, *n; 2041 struct intel_vgpu_mm *mm; 2042 2043 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2044 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2045 intel_vgpu_destroy_mm(mm); 2046 } 2047 2048 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2049 gvt_err("vgpu ppgtt mm is not fully destoried\n"); 2050 2051 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { 2052 gvt_err("Why we still has spt not freed?\n"); 2053 ppgtt_free_all_spt(vgpu); 2054 } 2055 } 2056 2057 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) 2058 { 2059 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); 2060 vgpu->gtt.ggtt_mm = NULL; 2061 } 2062 2063 /** 2064 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2065 * @vgpu: a vGPU 2066 * 2067 * This function is used to clean up per-vGPU graphics memory virtualization 2068 * components. 2069 * 2070 * Returns: 2071 * Zero on success, error code if failed. 2072 */ 2073 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2074 { 2075 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2076 intel_vgpu_destroy_ggtt_mm(vgpu); 2077 release_scratch_page_tree(vgpu); 2078 } 2079 2080 static void clean_spt_oos(struct intel_gvt *gvt) 2081 { 2082 struct intel_gvt_gtt *gtt = &gvt->gtt; 2083 struct list_head *pos, *n; 2084 struct intel_vgpu_oos_page *oos_page; 2085 2086 WARN(!list_empty(>t->oos_page_use_list_head), 2087 "someone is still using oos page\n"); 2088 2089 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2090 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2091 list_del(&oos_page->list); 2092 kfree(oos_page); 2093 } 2094 } 2095 2096 static int setup_spt_oos(struct intel_gvt *gvt) 2097 { 2098 struct intel_gvt_gtt *gtt = &gvt->gtt; 2099 struct intel_vgpu_oos_page *oos_page; 2100 int i; 2101 int ret; 2102 2103 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2104 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2105 2106 for (i = 0; i < preallocated_oos_pages; i++) { 2107 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2108 if (!oos_page) { 2109 ret = -ENOMEM; 2110 goto fail; 2111 } 2112 2113 INIT_LIST_HEAD(&oos_page->list); 2114 INIT_LIST_HEAD(&oos_page->vm_list); 2115 oos_page->id = i; 2116 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2117 } 2118 2119 gvt_dbg_mm("%d oos pages preallocated\n", i); 2120 2121 return 0; 2122 fail: 2123 clean_spt_oos(gvt); 2124 return ret; 2125 } 2126 2127 /** 2128 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2129 * @vgpu: a vGPU 2130 * @page_table_level: PPGTT page table level 2131 * @root_entry: PPGTT page table root pointers 2132 * 2133 * This function is used to find a PPGTT mm object from mm object pool 2134 * 2135 * Returns: 2136 * pointer to mm object on success, NULL if failed. 2137 */ 2138 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2139 u64 pdps[]) 2140 { 2141 struct intel_vgpu_mm *mm; 2142 struct list_head *pos; 2143 2144 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { 2145 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2146 2147 switch (mm->ppgtt_mm.root_entry_type) { 2148 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2149 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) 2150 return mm; 2151 break; 2152 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2153 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, 2154 sizeof(mm->ppgtt_mm.guest_pdps))) 2155 return mm; 2156 break; 2157 default: 2158 GEM_BUG_ON(1); 2159 } 2160 } 2161 return NULL; 2162 } 2163 2164 /** 2165 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. 2166 * @vgpu: a vGPU 2167 * @root_entry_type: ppgtt root entry type 2168 * @pdps: guest pdps 2169 * 2170 * This function is used to find or create a PPGTT mm object from a guest. 2171 * 2172 * Returns: 2173 * Zero on success, negative error code if failed. 2174 */ 2175 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, 2176 intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) 2177 { 2178 struct intel_vgpu_mm *mm; 2179 2180 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2181 if (mm) { 2182 intel_vgpu_mm_get(mm); 2183 } else { 2184 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); 2185 if (IS_ERR(mm)) 2186 gvt_vgpu_err("fail to create mm\n"); 2187 } 2188 return mm; 2189 } 2190 2191 /** 2192 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. 2193 * @vgpu: a vGPU 2194 * @pdps: guest pdps 2195 * 2196 * This function is used to find a PPGTT mm object from a guest and destroy it. 2197 * 2198 * Returns: 2199 * Zero on success, negative error code if failed. 2200 */ 2201 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) 2202 { 2203 struct intel_vgpu_mm *mm; 2204 2205 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2206 if (!mm) { 2207 gvt_vgpu_err("fail to find ppgtt instance.\n"); 2208 return -EINVAL; 2209 } 2210 intel_vgpu_mm_put(mm); 2211 return 0; 2212 } 2213 2214 /** 2215 * intel_gvt_init_gtt - initialize mm components of a GVT device 2216 * @gvt: GVT device 2217 * 2218 * This function is called at the initialization stage, to initialize 2219 * the mm components of a GVT device. 2220 * 2221 * Returns: 2222 * zero on success, negative error code if failed. 2223 */ 2224 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2225 { 2226 int ret; 2227 void *page; 2228 struct device *dev = &gvt->dev_priv->drm.pdev->dev; 2229 dma_addr_t daddr; 2230 2231 gvt_dbg_core("init gtt\n"); 2232 2233 if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) 2234 || IS_KABYLAKE(gvt->dev_priv)) { 2235 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2236 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2237 } else { 2238 return -ENODEV; 2239 } 2240 2241 page = (void *)get_zeroed_page(GFP_KERNEL); 2242 if (!page) { 2243 gvt_err("fail to allocate scratch ggtt page\n"); 2244 return -ENOMEM; 2245 } 2246 2247 daddr = dma_map_page(dev, virt_to_page(page), 0, 2248 4096, PCI_DMA_BIDIRECTIONAL); 2249 if (dma_mapping_error(dev, daddr)) { 2250 gvt_err("fail to dmamap scratch ggtt page\n"); 2251 __free_page(virt_to_page(page)); 2252 return -ENOMEM; 2253 } 2254 2255 gvt->gtt.scratch_page = virt_to_page(page); 2256 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2257 2258 if (enable_out_of_sync) { 2259 ret = setup_spt_oos(gvt); 2260 if (ret) { 2261 gvt_err("fail to initialize SPT oos\n"); 2262 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 2263 __free_page(gvt->gtt.scratch_page); 2264 return ret; 2265 } 2266 } 2267 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); 2268 return 0; 2269 } 2270 2271 /** 2272 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2273 * @gvt: GVT device 2274 * 2275 * This function is called at the driver unloading stage, to clean up the 2276 * the mm components of a GVT device. 2277 * 2278 */ 2279 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2280 { 2281 struct device *dev = &gvt->dev_priv->drm.pdev->dev; 2282 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << 2283 I915_GTT_PAGE_SHIFT); 2284 2285 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 2286 2287 __free_page(gvt->gtt.scratch_page); 2288 2289 if (enable_out_of_sync) 2290 clean_spt_oos(gvt); 2291 } 2292 2293 /** 2294 * intel_vgpu_reset_ggtt - reset the GGTT entry 2295 * @vgpu: a vGPU 2296 * 2297 * This function is called at the vGPU create stage 2298 * to reset all the GGTT entries. 2299 * 2300 */ 2301 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) 2302 { 2303 struct intel_gvt *gvt = vgpu->gvt; 2304 struct drm_i915_private *dev_priv = gvt->dev_priv; 2305 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2306 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; 2307 u32 index; 2308 u32 num_entries; 2309 2310 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); 2311 pte_ops->set_present(&entry); 2312 2313 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2314 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2315 while (num_entries--) 2316 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2317 2318 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2319 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2320 while (num_entries--) 2321 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2322 2323 ggtt_invalidate(dev_priv); 2324 } 2325 2326 /** 2327 * intel_vgpu_reset_gtt - reset the all GTT related status 2328 * @vgpu: a vGPU 2329 * 2330 * This function is called from vfio core to reset reset all 2331 * GTT related status, including GGTT, PPGTT, scratch page. 2332 * 2333 */ 2334 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) 2335 { 2336 /* Shadow pages are only created when there is no page 2337 * table tracking data, so remove page tracking data after 2338 * removing the shadow pages. 2339 */ 2340 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2341 intel_vgpu_reset_ggtt(vgpu); 2342 } 2343