1 /* 2 * GTT virtualization 3 * 4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Zhi Wang <zhi.a.wang@intel.com> 27 * Zhenyu Wang <zhenyuw@linux.intel.com> 28 * Xiao Zheng <xiao.zheng@intel.com> 29 * 30 * Contributors: 31 * Min He <min.he@intel.com> 32 * Bing Niu <bing.niu@intel.com> 33 * 34 */ 35 36 #include "i915_drv.h" 37 #include "gvt.h" 38 #include "i915_pvinfo.h" 39 #include "trace.h" 40 41 #if defined(VERBOSE_DEBUG) 42 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args) 43 #else 44 #define gvt_vdbg_mm(fmt, args...) 45 #endif 46 47 static bool enable_out_of_sync = false; 48 static int preallocated_oos_pages = 8192; 49 50 /* 51 * validate a gm address and related range size, 52 * translate it to host gm address 53 */ 54 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) 55 { 56 if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size 57 && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) { 58 gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n", 59 addr, size); 60 return false; 61 } 62 return true; 63 } 64 65 /* translate a guest gmadr to host gmadr */ 66 int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr) 67 { 68 if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr), 69 "invalid guest gmadr %llx\n", g_addr)) 70 return -EACCES; 71 72 if (vgpu_gmadr_is_aperture(vgpu, g_addr)) 73 *h_addr = vgpu_aperture_gmadr_base(vgpu) 74 + (g_addr - vgpu_aperture_offset(vgpu)); 75 else 76 *h_addr = vgpu_hidden_gmadr_base(vgpu) 77 + (g_addr - vgpu_hidden_offset(vgpu)); 78 return 0; 79 } 80 81 /* translate a host gmadr to guest gmadr */ 82 int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr) 83 { 84 if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr), 85 "invalid host gmadr %llx\n", h_addr)) 86 return -EACCES; 87 88 if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr)) 89 *g_addr = vgpu_aperture_gmadr_base(vgpu) 90 + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt)); 91 else 92 *g_addr = vgpu_hidden_gmadr_base(vgpu) 93 + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt)); 94 return 0; 95 } 96 97 int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, 98 unsigned long *h_index) 99 { 100 u64 h_addr; 101 int ret; 102 103 ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT, 104 &h_addr); 105 if (ret) 106 return ret; 107 108 *h_index = h_addr >> I915_GTT_PAGE_SHIFT; 109 return 0; 110 } 111 112 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, 113 unsigned long *g_index) 114 { 115 u64 g_addr; 116 int ret; 117 118 ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT, 119 &g_addr); 120 if (ret) 121 return ret; 122 123 *g_index = g_addr >> I915_GTT_PAGE_SHIFT; 124 return 0; 125 } 126 127 #define gtt_type_is_entry(type) \ 128 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \ 129 && type != GTT_TYPE_PPGTT_PTE_ENTRY \ 130 && type != GTT_TYPE_PPGTT_ROOT_ENTRY) 131 132 #define gtt_type_is_pt(type) \ 133 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) 134 135 #define gtt_type_is_pte_pt(type) \ 136 (type == GTT_TYPE_PPGTT_PTE_PT) 137 138 #define gtt_type_is_root_pointer(type) \ 139 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY) 140 141 #define gtt_init_entry(e, t, p, v) do { \ 142 (e)->type = t; \ 143 (e)->pdev = p; \ 144 memcpy(&(e)->val64, &v, sizeof(v)); \ 145 } while (0) 146 147 /* 148 * Mappings between GTT_TYPE* enumerations. 149 * Following information can be found according to the given type: 150 * - type of next level page table 151 * - type of entry inside this level page table 152 * - type of entry with PSE set 153 * 154 * If the given type doesn't have such a kind of information, 155 * e.g. give a l4 root entry type, then request to get its PSE type, 156 * give a PTE page table type, then request to get its next level page 157 * table type, as we know l4 root entry doesn't have a PSE bit, 158 * and a PTE page table doesn't have a next level page table type, 159 * GTT_TYPE_INVALID will be returned. This is useful when traversing a 160 * page table. 161 */ 162 163 struct gtt_type_table_entry { 164 int entry_type; 165 int pt_type; 166 int next_pt_type; 167 int pse_entry_type; 168 }; 169 170 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \ 171 [type] = { \ 172 .entry_type = e_type, \ 173 .pt_type = cpt_type, \ 174 .next_pt_type = npt_type, \ 175 .pse_entry_type = pse_type, \ 176 } 177 178 static struct gtt_type_table_entry gtt_type_table[] = { 179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 180 GTT_TYPE_PPGTT_ROOT_L4_ENTRY, 181 GTT_TYPE_INVALID, 182 GTT_TYPE_PPGTT_PML4_PT, 183 GTT_TYPE_INVALID), 184 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT, 185 GTT_TYPE_PPGTT_PML4_ENTRY, 186 GTT_TYPE_PPGTT_PML4_PT, 187 GTT_TYPE_PPGTT_PDP_PT, 188 GTT_TYPE_INVALID), 189 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY, 190 GTT_TYPE_PPGTT_PML4_ENTRY, 191 GTT_TYPE_PPGTT_PML4_PT, 192 GTT_TYPE_PPGTT_PDP_PT, 193 GTT_TYPE_INVALID), 194 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT, 195 GTT_TYPE_PPGTT_PDP_ENTRY, 196 GTT_TYPE_PPGTT_PDP_PT, 197 GTT_TYPE_PPGTT_PDE_PT, 198 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 199 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 200 GTT_TYPE_PPGTT_ROOT_L3_ENTRY, 201 GTT_TYPE_INVALID, 202 GTT_TYPE_PPGTT_PDE_PT, 203 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 204 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY, 205 GTT_TYPE_PPGTT_PDP_ENTRY, 206 GTT_TYPE_PPGTT_PDP_PT, 207 GTT_TYPE_PPGTT_PDE_PT, 208 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 209 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT, 210 GTT_TYPE_PPGTT_PDE_ENTRY, 211 GTT_TYPE_PPGTT_PDE_PT, 212 GTT_TYPE_PPGTT_PTE_PT, 213 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 214 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY, 215 GTT_TYPE_PPGTT_PDE_ENTRY, 216 GTT_TYPE_PPGTT_PDE_PT, 217 GTT_TYPE_PPGTT_PTE_PT, 218 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 219 /* We take IPS bit as 'PSE' for PTE level. */ 220 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT, 221 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 222 GTT_TYPE_PPGTT_PTE_PT, 223 GTT_TYPE_INVALID, 224 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 225 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY, 226 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 227 GTT_TYPE_PPGTT_PTE_PT, 228 GTT_TYPE_INVALID, 229 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 230 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY, 231 GTT_TYPE_PPGTT_PTE_4K_ENTRY, 232 GTT_TYPE_PPGTT_PTE_PT, 233 GTT_TYPE_INVALID, 234 GTT_TYPE_PPGTT_PTE_64K_ENTRY), 235 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY, 236 GTT_TYPE_PPGTT_PDE_ENTRY, 237 GTT_TYPE_PPGTT_PDE_PT, 238 GTT_TYPE_INVALID, 239 GTT_TYPE_PPGTT_PTE_2M_ENTRY), 240 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY, 241 GTT_TYPE_PPGTT_PDP_ENTRY, 242 GTT_TYPE_PPGTT_PDP_PT, 243 GTT_TYPE_INVALID, 244 GTT_TYPE_PPGTT_PTE_1G_ENTRY), 245 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE, 246 GTT_TYPE_GGTT_PTE, 247 GTT_TYPE_INVALID, 248 GTT_TYPE_INVALID, 249 GTT_TYPE_INVALID), 250 }; 251 252 static inline int get_next_pt_type(int type) 253 { 254 return gtt_type_table[type].next_pt_type; 255 } 256 257 static inline int get_pt_type(int type) 258 { 259 return gtt_type_table[type].pt_type; 260 } 261 262 static inline int get_entry_type(int type) 263 { 264 return gtt_type_table[type].entry_type; 265 } 266 267 static inline int get_pse_type(int type) 268 { 269 return gtt_type_table[type].pse_entry_type; 270 } 271 272 static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) 273 { 274 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 275 276 return readq(addr); 277 } 278 279 static void ggtt_invalidate(struct drm_i915_private *dev_priv) 280 { 281 mmio_hw_access_pre(dev_priv); 282 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 283 mmio_hw_access_post(dev_priv); 284 } 285 286 static void write_pte64(struct drm_i915_private *dev_priv, 287 unsigned long index, u64 pte) 288 { 289 void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index; 290 291 writeq(pte, addr); 292 } 293 294 static inline int gtt_get_entry64(void *pt, 295 struct intel_gvt_gtt_entry *e, 296 unsigned long index, bool hypervisor_access, unsigned long gpa, 297 struct intel_vgpu *vgpu) 298 { 299 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 300 int ret; 301 302 if (WARN_ON(info->gtt_entry_size != 8)) 303 return -EINVAL; 304 305 if (hypervisor_access) { 306 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa + 307 (index << info->gtt_entry_size_shift), 308 &e->val64, 8); 309 if (WARN_ON(ret)) 310 return ret; 311 } else if (!pt) { 312 e->val64 = read_pte64(vgpu->gvt->dev_priv, index); 313 } else { 314 e->val64 = *((u64 *)pt + index); 315 } 316 return 0; 317 } 318 319 static inline int gtt_set_entry64(void *pt, 320 struct intel_gvt_gtt_entry *e, 321 unsigned long index, bool hypervisor_access, unsigned long gpa, 322 struct intel_vgpu *vgpu) 323 { 324 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 325 int ret; 326 327 if (WARN_ON(info->gtt_entry_size != 8)) 328 return -EINVAL; 329 330 if (hypervisor_access) { 331 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa + 332 (index << info->gtt_entry_size_shift), 333 &e->val64, 8); 334 if (WARN_ON(ret)) 335 return ret; 336 } else if (!pt) { 337 write_pte64(vgpu->gvt->dev_priv, index, e->val64); 338 } else { 339 *((u64 *)pt + index) = e->val64; 340 } 341 return 0; 342 } 343 344 #define GTT_HAW 46 345 346 #define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30) 347 #define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21) 348 #define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16) 349 #define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12) 350 351 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52) 352 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */ 353 354 #define GTT_64K_PTE_STRIDE 16 355 356 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) 357 { 358 unsigned long pfn; 359 360 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) 361 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT; 362 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) 363 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT; 364 else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) 365 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT; 366 else 367 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT; 368 return pfn; 369 } 370 371 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn) 372 { 373 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 374 e->val64 &= ~ADDR_1G_MASK; 375 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT); 376 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) { 377 e->val64 &= ~ADDR_2M_MASK; 378 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT); 379 } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) { 380 e->val64 &= ~ADDR_64K_MASK; 381 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT); 382 } else { 383 e->val64 &= ~ADDR_4K_MASK; 384 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT); 385 } 386 387 e->val64 |= (pfn << PAGE_SHIFT); 388 } 389 390 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e) 391 { 392 return !!(e->val64 & _PAGE_PSE); 393 } 394 395 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e) 396 { 397 if (gen8_gtt_test_pse(e)) { 398 switch (e->type) { 399 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 400 e->val64 &= ~_PAGE_PSE; 401 e->type = GTT_TYPE_PPGTT_PDE_ENTRY; 402 break; 403 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 404 e->type = GTT_TYPE_PPGTT_PDP_ENTRY; 405 e->val64 &= ~_PAGE_PSE; 406 break; 407 default: 408 WARN_ON(1); 409 } 410 } 411 } 412 413 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e) 414 { 415 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 416 return false; 417 418 return !!(e->val64 & GEN8_PDE_IPS_64K); 419 } 420 421 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e) 422 { 423 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY)) 424 return; 425 426 e->val64 &= ~GEN8_PDE_IPS_64K; 427 } 428 429 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e) 430 { 431 /* 432 * i915 writes PDP root pointer registers without present bit, 433 * it also works, so we need to treat root pointer entry 434 * specifically. 435 */ 436 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY 437 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 438 return (e->val64 != 0); 439 else 440 return (e->val64 & _PAGE_PRESENT); 441 } 442 443 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e) 444 { 445 e->val64 &= ~_PAGE_PRESENT; 446 } 447 448 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e) 449 { 450 e->val64 |= _PAGE_PRESENT; 451 } 452 453 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e) 454 { 455 return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED); 456 } 457 458 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e) 459 { 460 e->val64 |= GTT_SPTE_FLAG_64K_SPLITED; 461 } 462 463 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e) 464 { 465 e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED; 466 } 467 468 /* 469 * Per-platform GMA routines. 470 */ 471 static unsigned long gma_to_ggtt_pte_index(unsigned long gma) 472 { 473 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT); 474 475 trace_gma_index(__func__, gma, x); 476 return x; 477 } 478 479 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \ 480 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \ 481 { \ 482 unsigned long x = (exp); \ 483 trace_gma_index(__func__, gma, x); \ 484 return x; \ 485 } 486 487 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff)); 488 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff)); 489 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3)); 490 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff)); 491 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff)); 492 493 static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = { 494 .get_entry = gtt_get_entry64, 495 .set_entry = gtt_set_entry64, 496 .clear_present = gtt_entry_clear_present, 497 .set_present = gtt_entry_set_present, 498 .test_present = gen8_gtt_test_present, 499 .test_pse = gen8_gtt_test_pse, 500 .clear_pse = gen8_gtt_clear_pse, 501 .clear_ips = gen8_gtt_clear_ips, 502 .test_ips = gen8_gtt_test_ips, 503 .clear_64k_splited = gen8_gtt_clear_64k_splited, 504 .set_64k_splited = gen8_gtt_set_64k_splited, 505 .test_64k_splited = gen8_gtt_test_64k_splited, 506 .get_pfn = gen8_gtt_get_pfn, 507 .set_pfn = gen8_gtt_set_pfn, 508 }; 509 510 static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = { 511 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index, 512 .gma_to_pte_index = gen8_gma_to_pte_index, 513 .gma_to_pde_index = gen8_gma_to_pde_index, 514 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index, 515 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index, 516 .gma_to_pml4_index = gen8_gma_to_pml4_index, 517 }; 518 519 /* Update entry type per pse and ips bit. */ 520 static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops, 521 struct intel_gvt_gtt_entry *entry, bool ips) 522 { 523 switch (entry->type) { 524 case GTT_TYPE_PPGTT_PDE_ENTRY: 525 case GTT_TYPE_PPGTT_PDP_ENTRY: 526 if (pte_ops->test_pse(entry)) 527 entry->type = get_pse_type(entry->type); 528 break; 529 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 530 if (ips) 531 entry->type = get_pse_type(entry->type); 532 break; 533 default: 534 GEM_BUG_ON(!gtt_type_is_entry(entry->type)); 535 } 536 537 GEM_BUG_ON(entry->type == GTT_TYPE_INVALID); 538 } 539 540 /* 541 * MM helpers. 542 */ 543 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm, 544 struct intel_gvt_gtt_entry *entry, unsigned long index, 545 bool guest) 546 { 547 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 548 549 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT); 550 551 entry->type = mm->ppgtt_mm.root_entry_type; 552 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps : 553 mm->ppgtt_mm.shadow_pdps, 554 entry, index, false, 0, mm->vgpu); 555 update_entry_type_for_real(pte_ops, entry, false); 556 } 557 558 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm, 559 struct intel_gvt_gtt_entry *entry, unsigned long index) 560 { 561 _ppgtt_get_root_entry(mm, entry, index, true); 562 } 563 564 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm, 565 struct intel_gvt_gtt_entry *entry, unsigned long index) 566 { 567 _ppgtt_get_root_entry(mm, entry, index, false); 568 } 569 570 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm, 571 struct intel_gvt_gtt_entry *entry, unsigned long index, 572 bool guest) 573 { 574 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 575 576 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps : 577 mm->ppgtt_mm.shadow_pdps, 578 entry, index, false, 0, mm->vgpu); 579 } 580 581 static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm, 582 struct intel_gvt_gtt_entry *entry, unsigned long index) 583 { 584 _ppgtt_set_root_entry(mm, entry, index, true); 585 } 586 587 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm, 588 struct intel_gvt_gtt_entry *entry, unsigned long index) 589 { 590 _ppgtt_set_root_entry(mm, entry, index, false); 591 } 592 593 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm, 594 struct intel_gvt_gtt_entry *entry, unsigned long index) 595 { 596 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 597 598 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 599 600 entry->type = GTT_TYPE_GGTT_PTE; 601 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 602 false, 0, mm->vgpu); 603 } 604 605 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, 606 struct intel_gvt_gtt_entry *entry, unsigned long index) 607 { 608 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 609 610 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 611 612 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index, 613 false, 0, mm->vgpu); 614 } 615 616 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, 617 struct intel_gvt_gtt_entry *entry, unsigned long index) 618 { 619 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 620 621 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 622 623 pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); 624 } 625 626 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, 627 struct intel_gvt_gtt_entry *entry, unsigned long index) 628 { 629 struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; 630 631 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); 632 633 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu); 634 } 635 636 /* 637 * PPGTT shadow page table helpers. 638 */ 639 static inline int ppgtt_spt_get_entry( 640 struct intel_vgpu_ppgtt_spt *spt, 641 void *page_table, int type, 642 struct intel_gvt_gtt_entry *e, unsigned long index, 643 bool guest) 644 { 645 struct intel_gvt *gvt = spt->vgpu->gvt; 646 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 647 int ret; 648 649 e->type = get_entry_type(type); 650 651 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 652 return -EINVAL; 653 654 ret = ops->get_entry(page_table, e, index, guest, 655 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 656 spt->vgpu); 657 if (ret) 658 return ret; 659 660 update_entry_type_for_real(ops, e, guest ? 661 spt->guest_page.pde_ips : false); 662 663 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 664 type, e->type, index, e->val64); 665 return 0; 666 } 667 668 static inline int ppgtt_spt_set_entry( 669 struct intel_vgpu_ppgtt_spt *spt, 670 void *page_table, int type, 671 struct intel_gvt_gtt_entry *e, unsigned long index, 672 bool guest) 673 { 674 struct intel_gvt *gvt = spt->vgpu->gvt; 675 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 676 677 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) 678 return -EINVAL; 679 680 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n", 681 type, e->type, index, e->val64); 682 683 return ops->set_entry(page_table, e, index, guest, 684 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 685 spt->vgpu); 686 } 687 688 #define ppgtt_get_guest_entry(spt, e, index) \ 689 ppgtt_spt_get_entry(spt, NULL, \ 690 spt->guest_page.type, e, index, true) 691 692 #define ppgtt_set_guest_entry(spt, e, index) \ 693 ppgtt_spt_set_entry(spt, NULL, \ 694 spt->guest_page.type, e, index, true) 695 696 #define ppgtt_get_shadow_entry(spt, e, index) \ 697 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \ 698 spt->shadow_page.type, e, index, false) 699 700 #define ppgtt_set_shadow_entry(spt, e, index) \ 701 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \ 702 spt->shadow_page.type, e, index, false) 703 704 static void *alloc_spt(gfp_t gfp_mask) 705 { 706 struct intel_vgpu_ppgtt_spt *spt; 707 708 spt = kzalloc(sizeof(*spt), gfp_mask); 709 if (!spt) 710 return NULL; 711 712 spt->shadow_page.page = alloc_page(gfp_mask); 713 if (!spt->shadow_page.page) { 714 kfree(spt); 715 return NULL; 716 } 717 return spt; 718 } 719 720 static void free_spt(struct intel_vgpu_ppgtt_spt *spt) 721 { 722 __free_page(spt->shadow_page.page); 723 kfree(spt); 724 } 725 726 static int detach_oos_page(struct intel_vgpu *vgpu, 727 struct intel_vgpu_oos_page *oos_page); 728 729 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt) 730 { 731 struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev; 732 733 trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type); 734 735 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096, 736 PCI_DMA_BIDIRECTIONAL); 737 738 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn); 739 740 if (spt->guest_page.gfn) { 741 if (spt->guest_page.oos_page) 742 detach_oos_page(spt->vgpu, spt->guest_page.oos_page); 743 744 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn); 745 } 746 747 list_del_init(&spt->post_shadow_list); 748 free_spt(spt); 749 } 750 751 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu) 752 { 753 struct intel_vgpu_ppgtt_spt *spt; 754 struct radix_tree_iter iter; 755 void **slot; 756 757 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) { 758 spt = radix_tree_deref_slot(slot); 759 ppgtt_free_spt(spt); 760 } 761 } 762 763 static int ppgtt_handle_guest_write_page_table_bytes( 764 struct intel_vgpu_ppgtt_spt *spt, 765 u64 pa, void *p_data, int bytes); 766 767 static int ppgtt_write_protection_handler( 768 struct intel_vgpu_page_track *page_track, 769 u64 gpa, void *data, int bytes) 770 { 771 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data; 772 773 int ret; 774 775 if (bytes != 4 && bytes != 8) 776 return -EINVAL; 777 778 ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes); 779 if (ret) 780 return ret; 781 return ret; 782 } 783 784 /* Find a spt by guest gfn. */ 785 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn( 786 struct intel_vgpu *vgpu, unsigned long gfn) 787 { 788 struct intel_vgpu_page_track *track; 789 790 track = intel_vgpu_find_page_track(vgpu, gfn); 791 if (track && track->handler == ppgtt_write_protection_handler) 792 return track->priv_data; 793 794 return NULL; 795 } 796 797 /* Find the spt by shadow page mfn. */ 798 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn( 799 struct intel_vgpu *vgpu, unsigned long mfn) 800 { 801 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn); 802 } 803 804 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); 805 806 /* Allocate shadow page table without guest page. */ 807 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( 808 struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) 809 { 810 struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; 811 struct intel_vgpu_ppgtt_spt *spt = NULL; 812 dma_addr_t daddr; 813 int ret; 814 815 retry: 816 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO); 817 if (!spt) { 818 if (reclaim_one_ppgtt_mm(vgpu->gvt)) 819 goto retry; 820 821 gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); 822 return ERR_PTR(-ENOMEM); 823 } 824 825 spt->vgpu = vgpu; 826 atomic_set(&spt->refcount, 1); 827 INIT_LIST_HEAD(&spt->post_shadow_list); 828 829 /* 830 * Init shadow_page. 831 */ 832 spt->shadow_page.type = type; 833 daddr = dma_map_page(kdev, spt->shadow_page.page, 834 0, 4096, PCI_DMA_BIDIRECTIONAL); 835 if (dma_mapping_error(kdev, daddr)) { 836 gvt_vgpu_err("fail to map dma addr\n"); 837 ret = -EINVAL; 838 goto err_free_spt; 839 } 840 spt->shadow_page.vaddr = page_address(spt->shadow_page.page); 841 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT; 842 843 ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt); 844 if (ret) 845 goto err_unmap_dma; 846 847 return spt; 848 849 err_unmap_dma: 850 dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 851 err_free_spt: 852 free_spt(spt); 853 return ERR_PTR(ret); 854 } 855 856 /* Allocate shadow page table associated with specific gfn. */ 857 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( 858 struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type, 859 unsigned long gfn, bool guest_pde_ips) 860 { 861 struct intel_vgpu_ppgtt_spt *spt; 862 int ret; 863 864 spt = ppgtt_alloc_spt(vgpu, type); 865 if (IS_ERR(spt)) 866 return spt; 867 868 /* 869 * Init guest_page. 870 */ 871 ret = intel_vgpu_register_page_track(vgpu, gfn, 872 ppgtt_write_protection_handler, spt); 873 if (ret) { 874 ppgtt_free_spt(spt); 875 return ERR_PTR(ret); 876 } 877 878 spt->guest_page.type = type; 879 spt->guest_page.gfn = gfn; 880 spt->guest_page.pde_ips = guest_pde_ips; 881 882 trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn); 883 884 return spt; 885 } 886 887 #define pt_entry_size_shift(spt) \ 888 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift) 889 890 #define pt_entries(spt) \ 891 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt)) 892 893 #define for_each_present_guest_entry(spt, e, i) \ 894 for (i = 0; i < pt_entries(spt); \ 895 i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 896 if (!ppgtt_get_guest_entry(spt, e, i) && \ 897 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 898 899 #define for_each_present_shadow_entry(spt, e, i) \ 900 for (i = 0; i < pt_entries(spt); \ 901 i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \ 902 if (!ppgtt_get_shadow_entry(spt, e, i) && \ 903 spt->vgpu->gvt->gtt.pte_ops->test_present(e)) 904 905 #define for_each_shadow_entry(spt, e, i) \ 906 for (i = 0; i < pt_entries(spt); \ 907 i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \ 908 if (!ppgtt_get_shadow_entry(spt, e, i)) 909 910 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt) 911 { 912 int v = atomic_read(&spt->refcount); 913 914 trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1)); 915 atomic_inc(&spt->refcount); 916 } 917 918 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt) 919 { 920 int v = atomic_read(&spt->refcount); 921 922 trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1)); 923 return atomic_dec_return(&spt->refcount); 924 } 925 926 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt); 927 928 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, 929 struct intel_gvt_gtt_entry *e) 930 { 931 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 932 struct intel_vgpu_ppgtt_spt *s; 933 intel_gvt_gtt_type_t cur_pt_type; 934 935 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); 936 937 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY 938 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 939 cur_pt_type = get_next_pt_type(e->type) + 1; 940 if (ops->get_pfn(e) == 941 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) 942 return 0; 943 } 944 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 945 if (!s) { 946 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", 947 ops->get_pfn(e)); 948 return -ENXIO; 949 } 950 return ppgtt_invalidate_spt(s); 951 } 952 953 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt, 954 struct intel_gvt_gtt_entry *entry) 955 { 956 struct intel_vgpu *vgpu = spt->vgpu; 957 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 958 unsigned long pfn; 959 int type; 960 961 pfn = ops->get_pfn(entry); 962 type = spt->shadow_page.type; 963 964 /* Uninitialized spte or unshadowed spte. */ 965 if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn) 966 return; 967 968 intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT); 969 } 970 971 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt) 972 { 973 struct intel_vgpu *vgpu = spt->vgpu; 974 struct intel_gvt_gtt_entry e; 975 unsigned long index; 976 int ret; 977 978 trace_spt_change(spt->vgpu->id, "die", spt, 979 spt->guest_page.gfn, spt->shadow_page.type); 980 981 if (ppgtt_put_spt(spt) > 0) 982 return 0; 983 984 for_each_present_shadow_entry(spt, &e, index) { 985 switch (e.type) { 986 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 987 gvt_vdbg_mm("invalidate 4K entry\n"); 988 ppgtt_invalidate_pte(spt, &e); 989 break; 990 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 991 /* We don't setup 64K shadow entry so far. */ 992 WARN(1, "suspicious 64K gtt entry\n"); 993 continue; 994 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 995 gvt_vdbg_mm("invalidate 2M entry\n"); 996 continue; 997 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 998 WARN(1, "GVT doesn't support 1GB page\n"); 999 continue; 1000 case GTT_TYPE_PPGTT_PML4_ENTRY: 1001 case GTT_TYPE_PPGTT_PDP_ENTRY: 1002 case GTT_TYPE_PPGTT_PDE_ENTRY: 1003 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n"); 1004 ret = ppgtt_invalidate_spt_by_shadow_entry( 1005 spt->vgpu, &e); 1006 if (ret) 1007 goto fail; 1008 break; 1009 default: 1010 GEM_BUG_ON(1); 1011 } 1012 } 1013 1014 trace_spt_change(spt->vgpu->id, "release", spt, 1015 spt->guest_page.gfn, spt->shadow_page.type); 1016 ppgtt_free_spt(spt); 1017 return 0; 1018 fail: 1019 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", 1020 spt, e.val64, e.type); 1021 return ret; 1022 } 1023 1024 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) 1025 { 1026 struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; 1027 1028 if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) { 1029 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & 1030 GAMW_ECO_ENABLE_64K_IPS_FIELD; 1031 1032 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD; 1033 } else if (INTEL_GEN(dev_priv) >= 11) { 1034 /* 64K paging only controlled by IPS bit in PTE now. */ 1035 return true; 1036 } else 1037 return false; 1038 } 1039 1040 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt); 1041 1042 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( 1043 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we) 1044 { 1045 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1046 struct intel_vgpu_ppgtt_spt *spt = NULL; 1047 bool ips = false; 1048 int ret; 1049 1050 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type))); 1051 1052 if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1053 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we); 1054 1055 spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we)); 1056 if (spt) { 1057 ppgtt_get_spt(spt); 1058 1059 if (ips != spt->guest_page.pde_ips) { 1060 spt->guest_page.pde_ips = ips; 1061 1062 gvt_dbg_mm("reshadow PDE since ips changed\n"); 1063 clear_page(spt->shadow_page.vaddr); 1064 ret = ppgtt_populate_spt(spt); 1065 if (ret) { 1066 ppgtt_put_spt(spt); 1067 goto err; 1068 } 1069 } 1070 } else { 1071 int type = get_next_pt_type(we->type); 1072 1073 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); 1074 if (IS_ERR(spt)) { 1075 ret = PTR_ERR(spt); 1076 goto err; 1077 } 1078 1079 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn); 1080 if (ret) 1081 goto err_free_spt; 1082 1083 ret = ppgtt_populate_spt(spt); 1084 if (ret) 1085 goto err_free_spt; 1086 1087 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn, 1088 spt->shadow_page.type); 1089 } 1090 return spt; 1091 1092 err_free_spt: 1093 ppgtt_free_spt(spt); 1094 err: 1095 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1096 spt, we->val64, we->type); 1097 return ERR_PTR(ret); 1098 } 1099 1100 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se, 1101 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge) 1102 { 1103 struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops; 1104 1105 se->type = ge->type; 1106 se->val64 = ge->val64; 1107 1108 /* Because we always split 64KB pages, so clear IPS in shadow PDE. */ 1109 if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY) 1110 ops->clear_ips(se); 1111 1112 ops->set_pfn(se, s->shadow_page.mfn); 1113 } 1114 1115 /** 1116 * Check if can do 2M page 1117 * @vgpu: target vgpu 1118 * @entry: target pfn's gtt entry 1119 * 1120 * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition, 1121 * negtive if found err. 1122 */ 1123 static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, 1124 struct intel_gvt_gtt_entry *entry) 1125 { 1126 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1127 unsigned long pfn; 1128 1129 if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M)) 1130 return 0; 1131 1132 pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry)); 1133 if (pfn == INTEL_GVT_INVALID_ADDR) 1134 return -EINVAL; 1135 1136 return PageTransHuge(pfn_to_page(pfn)); 1137 } 1138 1139 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, 1140 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1141 struct intel_gvt_gtt_entry *se) 1142 { 1143 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1144 struct intel_vgpu_ppgtt_spt *sub_spt; 1145 struct intel_gvt_gtt_entry sub_se; 1146 unsigned long start_gfn; 1147 dma_addr_t dma_addr; 1148 unsigned long sub_index; 1149 int ret; 1150 1151 gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index); 1152 1153 start_gfn = ops->get_pfn(se); 1154 1155 sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT); 1156 if (IS_ERR(sub_spt)) 1157 return PTR_ERR(sub_spt); 1158 1159 for_each_shadow_entry(sub_spt, &sub_se, sub_index) { 1160 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, 1161 start_gfn + sub_index, PAGE_SIZE, &dma_addr); 1162 if (ret) { 1163 ppgtt_invalidate_spt(spt); 1164 return ret; 1165 } 1166 sub_se.val64 = se->val64; 1167 1168 /* Copy the PAT field from PDE. */ 1169 sub_se.val64 &= ~_PAGE_PAT; 1170 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5; 1171 1172 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT); 1173 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index); 1174 } 1175 1176 /* Clear dirty field. */ 1177 se->val64 &= ~_PAGE_DIRTY; 1178 1179 ops->clear_pse(se); 1180 ops->clear_ips(se); 1181 ops->set_pfn(se, sub_spt->shadow_page.mfn); 1182 ppgtt_set_shadow_entry(spt, se, index); 1183 return 0; 1184 } 1185 1186 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, 1187 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1188 struct intel_gvt_gtt_entry *se) 1189 { 1190 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1191 struct intel_gvt_gtt_entry entry = *se; 1192 unsigned long start_gfn; 1193 dma_addr_t dma_addr; 1194 int i, ret; 1195 1196 gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index); 1197 1198 GEM_BUG_ON(index % GTT_64K_PTE_STRIDE); 1199 1200 start_gfn = ops->get_pfn(se); 1201 1202 entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY; 1203 ops->set_64k_splited(&entry); 1204 1205 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1206 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, 1207 start_gfn + i, PAGE_SIZE, &dma_addr); 1208 if (ret) 1209 return ret; 1210 1211 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT); 1212 ppgtt_set_shadow_entry(spt, &entry, index + i); 1213 } 1214 return 0; 1215 } 1216 1217 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, 1218 struct intel_vgpu_ppgtt_spt *spt, unsigned long index, 1219 struct intel_gvt_gtt_entry *ge) 1220 { 1221 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 1222 struct intel_gvt_gtt_entry se = *ge; 1223 unsigned long gfn, page_size = PAGE_SIZE; 1224 dma_addr_t dma_addr; 1225 int ret; 1226 1227 if (!pte_ops->test_present(ge)) 1228 return 0; 1229 1230 gfn = pte_ops->get_pfn(ge); 1231 1232 switch (ge->type) { 1233 case GTT_TYPE_PPGTT_PTE_4K_ENTRY: 1234 gvt_vdbg_mm("shadow 4K gtt entry\n"); 1235 break; 1236 case GTT_TYPE_PPGTT_PTE_64K_ENTRY: 1237 gvt_vdbg_mm("shadow 64K gtt entry\n"); 1238 /* 1239 * The layout of 64K page is special, the page size is 1240 * controlled by uper PDE. To be simple, we always split 1241 * 64K page to smaller 4K pages in shadow PT. 1242 */ 1243 return split_64KB_gtt_entry(vgpu, spt, index, &se); 1244 case GTT_TYPE_PPGTT_PTE_2M_ENTRY: 1245 gvt_vdbg_mm("shadow 2M gtt entry\n"); 1246 ret = is_2MB_gtt_possible(vgpu, ge); 1247 if (ret == 0) 1248 return split_2MB_gtt_entry(vgpu, spt, index, &se); 1249 else if (ret < 0) 1250 return ret; 1251 page_size = I915_GTT_PAGE_SIZE_2M; 1252 break; 1253 case GTT_TYPE_PPGTT_PTE_1G_ENTRY: 1254 gvt_vgpu_err("GVT doesn't support 1GB entry\n"); 1255 return -EINVAL; 1256 default: 1257 GEM_BUG_ON(1); 1258 }; 1259 1260 /* direct shadow */ 1261 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size, 1262 &dma_addr); 1263 if (ret) 1264 return -ENXIO; 1265 1266 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT); 1267 ppgtt_set_shadow_entry(spt, &se, index); 1268 return 0; 1269 } 1270 1271 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt) 1272 { 1273 struct intel_vgpu *vgpu = spt->vgpu; 1274 struct intel_gvt *gvt = vgpu->gvt; 1275 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1276 struct intel_vgpu_ppgtt_spt *s; 1277 struct intel_gvt_gtt_entry se, ge; 1278 unsigned long gfn, i; 1279 int ret; 1280 1281 trace_spt_change(spt->vgpu->id, "born", spt, 1282 spt->guest_page.gfn, spt->shadow_page.type); 1283 1284 for_each_present_guest_entry(spt, &ge, i) { 1285 if (gtt_type_is_pt(get_next_pt_type(ge.type))) { 1286 s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1287 if (IS_ERR(s)) { 1288 ret = PTR_ERR(s); 1289 goto fail; 1290 } 1291 ppgtt_get_shadow_entry(spt, &se, i); 1292 ppgtt_generate_shadow_entry(&se, s, &ge); 1293 ppgtt_set_shadow_entry(spt, &se, i); 1294 } else { 1295 gfn = ops->get_pfn(&ge); 1296 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { 1297 ops->set_pfn(&se, gvt->gtt.scratch_mfn); 1298 ppgtt_set_shadow_entry(spt, &se, i); 1299 continue; 1300 } 1301 1302 ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge); 1303 if (ret) 1304 goto fail; 1305 } 1306 } 1307 return 0; 1308 fail: 1309 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1310 spt, ge.val64, ge.type); 1311 return ret; 1312 } 1313 1314 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt, 1315 struct intel_gvt_gtt_entry *se, unsigned long index) 1316 { 1317 struct intel_vgpu *vgpu = spt->vgpu; 1318 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1319 int ret; 1320 1321 trace_spt_guest_change(spt->vgpu->id, "remove", spt, 1322 spt->shadow_page.type, se->val64, index); 1323 1324 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n", 1325 se->type, index, se->val64); 1326 1327 if (!ops->test_present(se)) 1328 return 0; 1329 1330 if (ops->get_pfn(se) == 1331 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn) 1332 return 0; 1333 1334 if (gtt_type_is_pt(get_next_pt_type(se->type))) { 1335 struct intel_vgpu_ppgtt_spt *s = 1336 intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se)); 1337 if (!s) { 1338 gvt_vgpu_err("fail to find guest page\n"); 1339 ret = -ENXIO; 1340 goto fail; 1341 } 1342 ret = ppgtt_invalidate_spt(s); 1343 if (ret) 1344 goto fail; 1345 } else { 1346 /* We don't setup 64K shadow entry so far. */ 1347 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY, 1348 "suspicious 64K entry\n"); 1349 ppgtt_invalidate_pte(spt, se); 1350 } 1351 1352 return 0; 1353 fail: 1354 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", 1355 spt, se->val64, se->type); 1356 return ret; 1357 } 1358 1359 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt, 1360 struct intel_gvt_gtt_entry *we, unsigned long index) 1361 { 1362 struct intel_vgpu *vgpu = spt->vgpu; 1363 struct intel_gvt_gtt_entry m; 1364 struct intel_vgpu_ppgtt_spt *s; 1365 int ret; 1366 1367 trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type, 1368 we->val64, index); 1369 1370 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n", 1371 we->type, index, we->val64); 1372 1373 if (gtt_type_is_pt(get_next_pt_type(we->type))) { 1374 s = ppgtt_populate_spt_by_guest_entry(vgpu, we); 1375 if (IS_ERR(s)) { 1376 ret = PTR_ERR(s); 1377 goto fail; 1378 } 1379 ppgtt_get_shadow_entry(spt, &m, index); 1380 ppgtt_generate_shadow_entry(&m, s, we); 1381 ppgtt_set_shadow_entry(spt, &m, index); 1382 } else { 1383 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we); 1384 if (ret) 1385 goto fail; 1386 } 1387 return 0; 1388 fail: 1389 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", 1390 spt, we->val64, we->type); 1391 return ret; 1392 } 1393 1394 static int sync_oos_page(struct intel_vgpu *vgpu, 1395 struct intel_vgpu_oos_page *oos_page) 1396 { 1397 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1398 struct intel_gvt *gvt = vgpu->gvt; 1399 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 1400 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1401 struct intel_gvt_gtt_entry old, new; 1402 int index; 1403 int ret; 1404 1405 trace_oos_change(vgpu->id, "sync", oos_page->id, 1406 spt, spt->guest_page.type); 1407 1408 old.type = new.type = get_entry_type(spt->guest_page.type); 1409 old.val64 = new.val64 = 0; 1410 1411 for (index = 0; index < (I915_GTT_PAGE_SIZE >> 1412 info->gtt_entry_size_shift); index++) { 1413 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu); 1414 ops->get_entry(NULL, &new, index, true, 1415 spt->guest_page.gfn << PAGE_SHIFT, vgpu); 1416 1417 if (old.val64 == new.val64 1418 && !test_and_clear_bit(index, spt->post_shadow_bitmap)) 1419 continue; 1420 1421 trace_oos_sync(vgpu->id, oos_page->id, 1422 spt, spt->guest_page.type, 1423 new.val64, index); 1424 1425 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new); 1426 if (ret) 1427 return ret; 1428 1429 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu); 1430 } 1431 1432 spt->guest_page.write_cnt = 0; 1433 list_del_init(&spt->post_shadow_list); 1434 return 0; 1435 } 1436 1437 static int detach_oos_page(struct intel_vgpu *vgpu, 1438 struct intel_vgpu_oos_page *oos_page) 1439 { 1440 struct intel_gvt *gvt = vgpu->gvt; 1441 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt; 1442 1443 trace_oos_change(vgpu->id, "detach", oos_page->id, 1444 spt, spt->guest_page.type); 1445 1446 spt->guest_page.write_cnt = 0; 1447 spt->guest_page.oos_page = NULL; 1448 oos_page->spt = NULL; 1449 1450 list_del_init(&oos_page->vm_list); 1451 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head); 1452 1453 return 0; 1454 } 1455 1456 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page, 1457 struct intel_vgpu_ppgtt_spt *spt) 1458 { 1459 struct intel_gvt *gvt = spt->vgpu->gvt; 1460 int ret; 1461 1462 ret = intel_gvt_hypervisor_read_gpa(spt->vgpu, 1463 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT, 1464 oos_page->mem, I915_GTT_PAGE_SIZE); 1465 if (ret) 1466 return ret; 1467 1468 oos_page->spt = spt; 1469 spt->guest_page.oos_page = oos_page; 1470 1471 list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head); 1472 1473 trace_oos_change(spt->vgpu->id, "attach", oos_page->id, 1474 spt, spt->guest_page.type); 1475 return 0; 1476 } 1477 1478 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt) 1479 { 1480 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1481 int ret; 1482 1483 ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn); 1484 if (ret) 1485 return ret; 1486 1487 trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id, 1488 spt, spt->guest_page.type); 1489 1490 list_del_init(&oos_page->vm_list); 1491 return sync_oos_page(spt->vgpu, oos_page); 1492 } 1493 1494 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt) 1495 { 1496 struct intel_gvt *gvt = spt->vgpu->gvt; 1497 struct intel_gvt_gtt *gtt = &gvt->gtt; 1498 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1499 int ret; 1500 1501 WARN(oos_page, "shadow PPGTT page has already has a oos page\n"); 1502 1503 if (list_empty(>t->oos_page_free_list_head)) { 1504 oos_page = container_of(gtt->oos_page_use_list_head.next, 1505 struct intel_vgpu_oos_page, list); 1506 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1507 if (ret) 1508 return ret; 1509 ret = detach_oos_page(spt->vgpu, oos_page); 1510 if (ret) 1511 return ret; 1512 } else 1513 oos_page = container_of(gtt->oos_page_free_list_head.next, 1514 struct intel_vgpu_oos_page, list); 1515 return attach_oos_page(oos_page, spt); 1516 } 1517 1518 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt) 1519 { 1520 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page; 1521 1522 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n")) 1523 return -EINVAL; 1524 1525 trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id, 1526 spt, spt->guest_page.type); 1527 1528 list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head); 1529 return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn); 1530 } 1531 1532 /** 1533 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU 1534 * @vgpu: a vGPU 1535 * 1536 * This function is called before submitting a guest workload to host, 1537 * to sync all the out-of-synced shadow for vGPU 1538 * 1539 * Returns: 1540 * Zero on success, negative error code if failed. 1541 */ 1542 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu) 1543 { 1544 struct list_head *pos, *n; 1545 struct intel_vgpu_oos_page *oos_page; 1546 int ret; 1547 1548 if (!enable_out_of_sync) 1549 return 0; 1550 1551 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) { 1552 oos_page = container_of(pos, 1553 struct intel_vgpu_oos_page, vm_list); 1554 ret = ppgtt_set_guest_page_sync(oos_page->spt); 1555 if (ret) 1556 return ret; 1557 } 1558 return 0; 1559 } 1560 1561 /* 1562 * The heart of PPGTT shadow page table. 1563 */ 1564 static int ppgtt_handle_guest_write_page_table( 1565 struct intel_vgpu_ppgtt_spt *spt, 1566 struct intel_gvt_gtt_entry *we, unsigned long index) 1567 { 1568 struct intel_vgpu *vgpu = spt->vgpu; 1569 int type = spt->shadow_page.type; 1570 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1571 struct intel_gvt_gtt_entry old_se; 1572 int new_present; 1573 int i, ret; 1574 1575 new_present = ops->test_present(we); 1576 1577 /* 1578 * Adding the new entry first and then removing the old one, that can 1579 * guarantee the ppgtt table is validated during the window between 1580 * adding and removal. 1581 */ 1582 ppgtt_get_shadow_entry(spt, &old_se, index); 1583 1584 if (new_present) { 1585 ret = ppgtt_handle_guest_entry_add(spt, we, index); 1586 if (ret) 1587 goto fail; 1588 } 1589 1590 ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index); 1591 if (ret) 1592 goto fail; 1593 1594 if (!new_present) { 1595 /* For 64KB splited entries, we need clear them all. */ 1596 if (ops->test_64k_splited(&old_se) && 1597 !(index % GTT_64K_PTE_STRIDE)) { 1598 gvt_vdbg_mm("remove splited 64K shadow entries\n"); 1599 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { 1600 ops->clear_64k_splited(&old_se); 1601 ops->set_pfn(&old_se, 1602 vgpu->gtt.scratch_pt[type].page_mfn); 1603 ppgtt_set_shadow_entry(spt, &old_se, index + i); 1604 } 1605 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY || 1606 old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) { 1607 ops->clear_pse(&old_se); 1608 ops->set_pfn(&old_se, 1609 vgpu->gtt.scratch_pt[type].page_mfn); 1610 ppgtt_set_shadow_entry(spt, &old_se, index); 1611 } else { 1612 ops->set_pfn(&old_se, 1613 vgpu->gtt.scratch_pt[type].page_mfn); 1614 ppgtt_set_shadow_entry(spt, &old_se, index); 1615 } 1616 } 1617 1618 return 0; 1619 fail: 1620 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", 1621 spt, we->val64, we->type); 1622 return ret; 1623 } 1624 1625 1626 1627 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt) 1628 { 1629 return enable_out_of_sync 1630 && gtt_type_is_pte_pt(spt->guest_page.type) 1631 && spt->guest_page.write_cnt >= 2; 1632 } 1633 1634 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt, 1635 unsigned long index) 1636 { 1637 set_bit(index, spt->post_shadow_bitmap); 1638 if (!list_empty(&spt->post_shadow_list)) 1639 return; 1640 1641 list_add_tail(&spt->post_shadow_list, 1642 &spt->vgpu->gtt.post_shadow_list_head); 1643 } 1644 1645 /** 1646 * intel_vgpu_flush_post_shadow - flush the post shadow transactions 1647 * @vgpu: a vGPU 1648 * 1649 * This function is called before submitting a guest workload to host, 1650 * to flush all the post shadows for a vGPU. 1651 * 1652 * Returns: 1653 * Zero on success, negative error code if failed. 1654 */ 1655 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) 1656 { 1657 struct list_head *pos, *n; 1658 struct intel_vgpu_ppgtt_spt *spt; 1659 struct intel_gvt_gtt_entry ge; 1660 unsigned long index; 1661 int ret; 1662 1663 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) { 1664 spt = container_of(pos, struct intel_vgpu_ppgtt_spt, 1665 post_shadow_list); 1666 1667 for_each_set_bit(index, spt->post_shadow_bitmap, 1668 GTT_ENTRY_NUM_IN_ONE_PAGE) { 1669 ppgtt_get_guest_entry(spt, &ge, index); 1670 1671 ret = ppgtt_handle_guest_write_page_table(spt, 1672 &ge, index); 1673 if (ret) 1674 return ret; 1675 clear_bit(index, spt->post_shadow_bitmap); 1676 } 1677 list_del_init(&spt->post_shadow_list); 1678 } 1679 return 0; 1680 } 1681 1682 static int ppgtt_handle_guest_write_page_table_bytes( 1683 struct intel_vgpu_ppgtt_spt *spt, 1684 u64 pa, void *p_data, int bytes) 1685 { 1686 struct intel_vgpu *vgpu = spt->vgpu; 1687 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 1688 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 1689 struct intel_gvt_gtt_entry we, se; 1690 unsigned long index; 1691 int ret; 1692 1693 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; 1694 1695 ppgtt_get_guest_entry(spt, &we, index); 1696 1697 /* 1698 * For page table which has 64K gtt entry, only PTE#0, PTE#16, 1699 * PTE#32, ... PTE#496 are used. Unused PTEs update should be 1700 * ignored. 1701 */ 1702 if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY && 1703 (index % GTT_64K_PTE_STRIDE)) { 1704 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n", 1705 index); 1706 return 0; 1707 } 1708 1709 if (bytes == info->gtt_entry_size) { 1710 ret = ppgtt_handle_guest_write_page_table(spt, &we, index); 1711 if (ret) 1712 return ret; 1713 } else { 1714 if (!test_bit(index, spt->post_shadow_bitmap)) { 1715 int type = spt->shadow_page.type; 1716 1717 ppgtt_get_shadow_entry(spt, &se, index); 1718 ret = ppgtt_handle_guest_entry_removal(spt, &se, index); 1719 if (ret) 1720 return ret; 1721 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); 1722 ppgtt_set_shadow_entry(spt, &se, index); 1723 } 1724 ppgtt_set_post_shadow(spt, index); 1725 } 1726 1727 if (!enable_out_of_sync) 1728 return 0; 1729 1730 spt->guest_page.write_cnt++; 1731 1732 if (spt->guest_page.oos_page) 1733 ops->set_entry(spt->guest_page.oos_page->mem, &we, index, 1734 false, 0, vgpu); 1735 1736 if (can_do_out_of_sync(spt)) { 1737 if (!spt->guest_page.oos_page) 1738 ppgtt_allocate_oos_page(spt); 1739 1740 ret = ppgtt_set_guest_page_oos(spt); 1741 if (ret < 0) 1742 return ret; 1743 } 1744 return 0; 1745 } 1746 1747 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm) 1748 { 1749 struct intel_vgpu *vgpu = mm->vgpu; 1750 struct intel_gvt *gvt = vgpu->gvt; 1751 struct intel_gvt_gtt *gtt = &gvt->gtt; 1752 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1753 struct intel_gvt_gtt_entry se; 1754 int index; 1755 1756 if (!mm->ppgtt_mm.shadowed) 1757 return; 1758 1759 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) { 1760 ppgtt_get_shadow_root_entry(mm, &se, index); 1761 1762 if (!ops->test_present(&se)) 1763 continue; 1764 1765 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se); 1766 se.val64 = 0; 1767 ppgtt_set_shadow_root_entry(mm, &se, index); 1768 1769 trace_spt_guest_change(vgpu->id, "destroy root pointer", 1770 NULL, se.type, se.val64, index); 1771 } 1772 1773 mm->ppgtt_mm.shadowed = false; 1774 } 1775 1776 1777 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm) 1778 { 1779 struct intel_vgpu *vgpu = mm->vgpu; 1780 struct intel_gvt *gvt = vgpu->gvt; 1781 struct intel_gvt_gtt *gtt = &gvt->gtt; 1782 struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops; 1783 struct intel_vgpu_ppgtt_spt *spt; 1784 struct intel_gvt_gtt_entry ge, se; 1785 int index, ret; 1786 1787 if (mm->ppgtt_mm.shadowed) 1788 return 0; 1789 1790 mm->ppgtt_mm.shadowed = true; 1791 1792 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) { 1793 ppgtt_get_guest_root_entry(mm, &ge, index); 1794 1795 if (!ops->test_present(&ge)) 1796 continue; 1797 1798 trace_spt_guest_change(vgpu->id, __func__, NULL, 1799 ge.type, ge.val64, index); 1800 1801 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge); 1802 if (IS_ERR(spt)) { 1803 gvt_vgpu_err("fail to populate guest root pointer\n"); 1804 ret = PTR_ERR(spt); 1805 goto fail; 1806 } 1807 ppgtt_generate_shadow_entry(&se, spt, &ge); 1808 ppgtt_set_shadow_root_entry(mm, &se, index); 1809 1810 trace_spt_guest_change(vgpu->id, "populate root pointer", 1811 NULL, se.type, se.val64, index); 1812 } 1813 1814 return 0; 1815 fail: 1816 invalidate_ppgtt_mm(mm); 1817 return ret; 1818 } 1819 1820 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu) 1821 { 1822 struct intel_vgpu_mm *mm; 1823 1824 mm = kzalloc(sizeof(*mm), GFP_KERNEL); 1825 if (!mm) 1826 return NULL; 1827 1828 mm->vgpu = vgpu; 1829 kref_init(&mm->ref); 1830 atomic_set(&mm->pincount, 0); 1831 1832 return mm; 1833 } 1834 1835 static void vgpu_free_mm(struct intel_vgpu_mm *mm) 1836 { 1837 kfree(mm); 1838 } 1839 1840 /** 1841 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU 1842 * @vgpu: a vGPU 1843 * @root_entry_type: ppgtt root entry type 1844 * @pdps: guest pdps. 1845 * 1846 * This function is used to create a ppgtt mm object for a vGPU. 1847 * 1848 * Returns: 1849 * Zero on success, negative error code in pointer if failed. 1850 */ 1851 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, 1852 intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) 1853 { 1854 struct intel_gvt *gvt = vgpu->gvt; 1855 struct intel_vgpu_mm *mm; 1856 int ret; 1857 1858 mm = vgpu_alloc_mm(vgpu); 1859 if (!mm) 1860 return ERR_PTR(-ENOMEM); 1861 1862 mm->type = INTEL_GVT_MM_PPGTT; 1863 1864 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY && 1865 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY); 1866 mm->ppgtt_mm.root_entry_type = root_entry_type; 1867 1868 INIT_LIST_HEAD(&mm->ppgtt_mm.list); 1869 INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); 1870 1871 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) 1872 mm->ppgtt_mm.guest_pdps[0] = pdps[0]; 1873 else 1874 memcpy(mm->ppgtt_mm.guest_pdps, pdps, 1875 sizeof(mm->ppgtt_mm.guest_pdps)); 1876 1877 ret = shadow_ppgtt_mm(mm); 1878 if (ret) { 1879 gvt_vgpu_err("failed to shadow ppgtt mm\n"); 1880 vgpu_free_mm(mm); 1881 return ERR_PTR(ret); 1882 } 1883 1884 list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); 1885 1886 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1887 list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); 1888 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1889 1890 return mm; 1891 } 1892 1893 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) 1894 { 1895 struct intel_vgpu_mm *mm; 1896 unsigned long nr_entries; 1897 1898 mm = vgpu_alloc_mm(vgpu); 1899 if (!mm) 1900 return ERR_PTR(-ENOMEM); 1901 1902 mm->type = INTEL_GVT_MM_GGTT; 1903 1904 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT; 1905 mm->ggtt_mm.virtual_ggtt = 1906 vzalloc(array_size(nr_entries, 1907 vgpu->gvt->device_info.gtt_entry_size)); 1908 if (!mm->ggtt_mm.virtual_ggtt) { 1909 vgpu_free_mm(mm); 1910 return ERR_PTR(-ENOMEM); 1911 } 1912 1913 return mm; 1914 } 1915 1916 /** 1917 * _intel_vgpu_mm_release - destroy a mm object 1918 * @mm_ref: a kref object 1919 * 1920 * This function is used to destroy a mm object for vGPU 1921 * 1922 */ 1923 void _intel_vgpu_mm_release(struct kref *mm_ref) 1924 { 1925 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref); 1926 1927 if (GEM_WARN_ON(atomic_read(&mm->pincount))) 1928 gvt_err("vgpu mm pin count bug detected\n"); 1929 1930 if (mm->type == INTEL_GVT_MM_PPGTT) { 1931 list_del(&mm->ppgtt_mm.list); 1932 list_del(&mm->ppgtt_mm.lru_list); 1933 invalidate_ppgtt_mm(mm); 1934 } else { 1935 vfree(mm->ggtt_mm.virtual_ggtt); 1936 } 1937 1938 vgpu_free_mm(mm); 1939 } 1940 1941 /** 1942 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object 1943 * @mm: a vGPU mm object 1944 * 1945 * This function is called when user doesn't want to use a vGPU mm object 1946 */ 1947 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm) 1948 { 1949 atomic_dec(&mm->pincount); 1950 } 1951 1952 /** 1953 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object 1954 * @mm: target vgpu mm 1955 * 1956 * This function is called when user wants to use a vGPU mm object. If this 1957 * mm object hasn't been shadowed yet, the shadow will be populated at this 1958 * time. 1959 * 1960 * Returns: 1961 * Zero on success, negative error code if failed. 1962 */ 1963 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) 1964 { 1965 int ret; 1966 1967 atomic_inc(&mm->pincount); 1968 1969 if (mm->type == INTEL_GVT_MM_PPGTT) { 1970 ret = shadow_ppgtt_mm(mm); 1971 if (ret) 1972 return ret; 1973 1974 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1975 list_move_tail(&mm->ppgtt_mm.lru_list, 1976 &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); 1977 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); 1978 } 1979 1980 return 0; 1981 } 1982 1983 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) 1984 { 1985 struct intel_vgpu_mm *mm; 1986 struct list_head *pos, *n; 1987 1988 mutex_lock(&gvt->gtt.ppgtt_mm_lock); 1989 1990 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { 1991 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); 1992 1993 if (atomic_read(&mm->pincount)) 1994 continue; 1995 1996 list_del_init(&mm->ppgtt_mm.lru_list); 1997 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 1998 invalidate_ppgtt_mm(mm); 1999 return 1; 2000 } 2001 mutex_unlock(&gvt->gtt.ppgtt_mm_lock); 2002 return 0; 2003 } 2004 2005 /* 2006 * GMA translation APIs. 2007 */ 2008 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm, 2009 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest) 2010 { 2011 struct intel_vgpu *vgpu = mm->vgpu; 2012 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 2013 struct intel_vgpu_ppgtt_spt *s; 2014 2015 s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e)); 2016 if (!s) 2017 return -ENXIO; 2018 2019 if (!guest) 2020 ppgtt_get_shadow_entry(s, e, index); 2021 else 2022 ppgtt_get_guest_entry(s, e, index); 2023 return 0; 2024 } 2025 2026 /** 2027 * intel_vgpu_gma_to_gpa - translate a gma to GPA 2028 * @mm: mm object. could be a PPGTT or GGTT mm object 2029 * @gma: graphics memory address in this mm object 2030 * 2031 * This function is used to translate a graphics memory address in specific 2032 * graphics memory space to guest physical address. 2033 * 2034 * Returns: 2035 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed. 2036 */ 2037 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) 2038 { 2039 struct intel_vgpu *vgpu = mm->vgpu; 2040 struct intel_gvt *gvt = vgpu->gvt; 2041 struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops; 2042 struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops; 2043 unsigned long gpa = INTEL_GVT_INVALID_ADDR; 2044 unsigned long gma_index[4]; 2045 struct intel_gvt_gtt_entry e; 2046 int i, levels = 0; 2047 int ret; 2048 2049 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT && 2050 mm->type != INTEL_GVT_MM_PPGTT); 2051 2052 if (mm->type == INTEL_GVT_MM_GGTT) { 2053 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2054 goto err; 2055 2056 ggtt_get_guest_entry(mm, &e, 2057 gma_ops->gma_to_ggtt_pte_index(gma)); 2058 2059 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) 2060 + (gma & ~I915_GTT_PAGE_MASK); 2061 2062 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa); 2063 } else { 2064 switch (mm->ppgtt_mm.root_entry_type) { 2065 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2066 ppgtt_get_shadow_root_entry(mm, &e, 0); 2067 2068 gma_index[0] = gma_ops->gma_to_pml4_index(gma); 2069 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); 2070 gma_index[2] = gma_ops->gma_to_pde_index(gma); 2071 gma_index[3] = gma_ops->gma_to_pte_index(gma); 2072 levels = 4; 2073 break; 2074 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2075 ppgtt_get_shadow_root_entry(mm, &e, 2076 gma_ops->gma_to_l3_pdp_index(gma)); 2077 2078 gma_index[0] = gma_ops->gma_to_pde_index(gma); 2079 gma_index[1] = gma_ops->gma_to_pte_index(gma); 2080 levels = 2; 2081 break; 2082 default: 2083 GEM_BUG_ON(1); 2084 } 2085 2086 /* walk the shadow page table and get gpa from guest entry */ 2087 for (i = 0; i < levels; i++) { 2088 ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i], 2089 (i == levels - 1)); 2090 if (ret) 2091 goto err; 2092 2093 if (!pte_ops->test_present(&e)) { 2094 gvt_dbg_core("GMA 0x%lx is not present\n", gma); 2095 goto err; 2096 } 2097 } 2098 2099 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) + 2100 (gma & ~I915_GTT_PAGE_MASK); 2101 trace_gma_translate(vgpu->id, "ppgtt", 0, 2102 mm->ppgtt_mm.root_entry_type, gma, gpa); 2103 } 2104 2105 return gpa; 2106 err: 2107 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); 2108 return INTEL_GVT_INVALID_ADDR; 2109 } 2110 2111 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, 2112 unsigned int off, void *p_data, unsigned int bytes) 2113 { 2114 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2115 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2116 unsigned long index = off >> info->gtt_entry_size_shift; 2117 struct intel_gvt_gtt_entry e; 2118 2119 if (bytes != 4 && bytes != 8) 2120 return -EINVAL; 2121 2122 ggtt_get_guest_entry(ggtt_mm, &e, index); 2123 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)), 2124 bytes); 2125 return 0; 2126 } 2127 2128 /** 2129 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read 2130 * @vgpu: a vGPU 2131 * @off: register offset 2132 * @p_data: data will be returned to guest 2133 * @bytes: data length 2134 * 2135 * This function is used to emulate the GTT MMIO register read 2136 * 2137 * Returns: 2138 * Zero on success, error code if failed. 2139 */ 2140 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, 2141 void *p_data, unsigned int bytes) 2142 { 2143 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2144 int ret; 2145 2146 if (bytes != 4 && bytes != 8) 2147 return -EINVAL; 2148 2149 off -= info->gtt_start_offset; 2150 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes); 2151 return ret; 2152 } 2153 2154 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, 2155 struct intel_gvt_gtt_entry *entry) 2156 { 2157 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2158 unsigned long pfn; 2159 2160 pfn = pte_ops->get_pfn(entry); 2161 if (pfn != vgpu->gvt->gtt.scratch_mfn) 2162 intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, 2163 pfn << PAGE_SHIFT); 2164 } 2165 2166 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, 2167 void *p_data, unsigned int bytes) 2168 { 2169 struct intel_gvt *gvt = vgpu->gvt; 2170 const struct intel_gvt_device_info *info = &gvt->device_info; 2171 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm; 2172 struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; 2173 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift; 2174 unsigned long gma, gfn; 2175 struct intel_gvt_gtt_entry e, m; 2176 dma_addr_t dma_addr; 2177 int ret; 2178 struct intel_gvt_partial_pte *partial_pte, *pos, *n; 2179 bool partial_update = false; 2180 2181 if (bytes != 4 && bytes != 8) 2182 return -EINVAL; 2183 2184 gma = g_gtt_index << I915_GTT_PAGE_SHIFT; 2185 2186 /* the VM may configure the whole GM space when ballooning is used */ 2187 if (!vgpu_gmadr_is_valid(vgpu, gma)) 2188 return 0; 2189 2190 e.type = GTT_TYPE_GGTT_PTE; 2191 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, 2192 bytes); 2193 2194 /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes 2195 * write, save the first 4 bytes in a list and update virtual 2196 * PTE. Only update shadow PTE when the second 4 bytes comes. 2197 */ 2198 if (bytes < info->gtt_entry_size) { 2199 bool found = false; 2200 2201 list_for_each_entry_safe(pos, n, 2202 &ggtt_mm->ggtt_mm.partial_pte_list, list) { 2203 if (g_gtt_index == pos->offset >> 2204 info->gtt_entry_size_shift) { 2205 if (off != pos->offset) { 2206 /* the second partial part*/ 2207 int last_off = pos->offset & 2208 (info->gtt_entry_size - 1); 2209 2210 memcpy((void *)&e.val64 + last_off, 2211 (void *)&pos->data + last_off, 2212 bytes); 2213 2214 list_del(&pos->list); 2215 kfree(pos); 2216 found = true; 2217 break; 2218 } 2219 2220 /* update of the first partial part */ 2221 pos->data = e.val64; 2222 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2223 return 0; 2224 } 2225 } 2226 2227 if (!found) { 2228 /* the first partial part */ 2229 partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL); 2230 if (!partial_pte) 2231 return -ENOMEM; 2232 partial_pte->offset = off; 2233 partial_pte->data = e.val64; 2234 list_add_tail(&partial_pte->list, 2235 &ggtt_mm->ggtt_mm.partial_pte_list); 2236 partial_update = true; 2237 } 2238 } 2239 2240 if (!partial_update && (ops->test_present(&e))) { 2241 gfn = ops->get_pfn(&e); 2242 m = e; 2243 2244 /* one PTE update may be issued in multiple writes and the 2245 * first write may not construct a valid gfn 2246 */ 2247 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) { 2248 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2249 goto out; 2250 } 2251 2252 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, 2253 PAGE_SIZE, &dma_addr); 2254 if (ret) { 2255 gvt_vgpu_err("fail to populate guest ggtt entry\n"); 2256 /* guest driver may read/write the entry when partial 2257 * update the entry in this situation p2m will fail 2258 * settting the shadow entry to point to a scratch page 2259 */ 2260 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2261 } else 2262 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); 2263 } else { 2264 ops->set_pfn(&m, gvt->gtt.scratch_mfn); 2265 ops->clear_present(&m); 2266 } 2267 2268 out: 2269 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index); 2270 2271 ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index); 2272 ggtt_invalidate_pte(vgpu, &e); 2273 2274 ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); 2275 ggtt_invalidate(gvt->dev_priv); 2276 return 0; 2277 } 2278 2279 /* 2280 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write 2281 * @vgpu: a vGPU 2282 * @off: register offset 2283 * @p_data: data from guest write 2284 * @bytes: data length 2285 * 2286 * This function is used to emulate the GTT MMIO register write 2287 * 2288 * Returns: 2289 * Zero on success, error code if failed. 2290 */ 2291 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, 2292 unsigned int off, void *p_data, unsigned int bytes) 2293 { 2294 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; 2295 int ret; 2296 2297 if (bytes != 4 && bytes != 8) 2298 return -EINVAL; 2299 2300 off -= info->gtt_start_offset; 2301 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); 2302 return ret; 2303 } 2304 2305 static int alloc_scratch_pages(struct intel_vgpu *vgpu, 2306 intel_gvt_gtt_type_t type) 2307 { 2308 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2309 struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; 2310 int page_entry_num = I915_GTT_PAGE_SIZE >> 2311 vgpu->gvt->device_info.gtt_entry_size_shift; 2312 void *scratch_pt; 2313 int i; 2314 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; 2315 dma_addr_t daddr; 2316 2317 if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) 2318 return -EINVAL; 2319 2320 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); 2321 if (!scratch_pt) { 2322 gvt_vgpu_err("fail to allocate scratch page\n"); 2323 return -ENOMEM; 2324 } 2325 2326 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 2327 4096, PCI_DMA_BIDIRECTIONAL); 2328 if (dma_mapping_error(dev, daddr)) { 2329 gvt_vgpu_err("fail to dmamap scratch_pt\n"); 2330 __free_page(virt_to_page(scratch_pt)); 2331 return -ENOMEM; 2332 } 2333 gtt->scratch_pt[type].page_mfn = 2334 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2335 gtt->scratch_pt[type].page = virt_to_page(scratch_pt); 2336 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", 2337 vgpu->id, type, gtt->scratch_pt[type].page_mfn); 2338 2339 /* Build the tree by full filled the scratch pt with the entries which 2340 * point to the next level scratch pt or scratch page. The 2341 * scratch_pt[type] indicate the scratch pt/scratch page used by the 2342 * 'type' pt. 2343 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by 2344 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self 2345 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. 2346 */ 2347 if (type > GTT_TYPE_PPGTT_PTE_PT) { 2348 struct intel_gvt_gtt_entry se; 2349 2350 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); 2351 se.type = get_entry_type(type - 1); 2352 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); 2353 2354 /* The entry parameters like present/writeable/cache type 2355 * set to the same as i915's scratch page tree. 2356 */ 2357 se.val64 |= _PAGE_PRESENT | _PAGE_RW; 2358 if (type == GTT_TYPE_PPGTT_PDE_PT) 2359 se.val64 |= PPAT_CACHED; 2360 2361 for (i = 0; i < page_entry_num; i++) 2362 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); 2363 } 2364 2365 return 0; 2366 } 2367 2368 static int release_scratch_page_tree(struct intel_vgpu *vgpu) 2369 { 2370 int i; 2371 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; 2372 dma_addr_t daddr; 2373 2374 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2375 if (vgpu->gtt.scratch_pt[i].page != NULL) { 2376 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn << 2377 I915_GTT_PAGE_SHIFT); 2378 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 2379 __free_page(vgpu->gtt.scratch_pt[i].page); 2380 vgpu->gtt.scratch_pt[i].page = NULL; 2381 vgpu->gtt.scratch_pt[i].page_mfn = 0; 2382 } 2383 } 2384 2385 return 0; 2386 } 2387 2388 static int create_scratch_page_tree(struct intel_vgpu *vgpu) 2389 { 2390 int i, ret; 2391 2392 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { 2393 ret = alloc_scratch_pages(vgpu, i); 2394 if (ret) 2395 goto err; 2396 } 2397 2398 return 0; 2399 2400 err: 2401 release_scratch_page_tree(vgpu); 2402 return ret; 2403 } 2404 2405 /** 2406 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization 2407 * @vgpu: a vGPU 2408 * 2409 * This function is used to initialize per-vGPU graphics memory virtualization 2410 * components. 2411 * 2412 * Returns: 2413 * Zero on success, error code if failed. 2414 */ 2415 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) 2416 { 2417 struct intel_vgpu_gtt *gtt = &vgpu->gtt; 2418 2419 INIT_RADIX_TREE(>t->spt_tree, GFP_KERNEL); 2420 2421 INIT_LIST_HEAD(>t->ppgtt_mm_list_head); 2422 INIT_LIST_HEAD(>t->oos_page_list_head); 2423 INIT_LIST_HEAD(>t->post_shadow_list_head); 2424 2425 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu); 2426 if (IS_ERR(gtt->ggtt_mm)) { 2427 gvt_vgpu_err("fail to create mm for ggtt.\n"); 2428 return PTR_ERR(gtt->ggtt_mm); 2429 } 2430 2431 intel_vgpu_reset_ggtt(vgpu, false); 2432 2433 INIT_LIST_HEAD(>t->ggtt_mm->ggtt_mm.partial_pte_list); 2434 2435 return create_scratch_page_tree(vgpu); 2436 } 2437 2438 static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu) 2439 { 2440 struct list_head *pos, *n; 2441 struct intel_vgpu_mm *mm; 2442 2443 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2444 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2445 intel_vgpu_destroy_mm(mm); 2446 } 2447 2448 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head))) 2449 gvt_err("vgpu ppgtt mm is not fully destroyed\n"); 2450 2451 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) { 2452 gvt_err("Why we still has spt not freed?\n"); 2453 ppgtt_free_all_spt(vgpu); 2454 } 2455 } 2456 2457 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu) 2458 { 2459 struct intel_gvt_partial_pte *pos, *next; 2460 2461 list_for_each_entry_safe(pos, next, 2462 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, 2463 list) { 2464 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n", 2465 pos->offset, pos->data); 2466 kfree(pos); 2467 } 2468 intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm); 2469 vgpu->gtt.ggtt_mm = NULL; 2470 } 2471 2472 /** 2473 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization 2474 * @vgpu: a vGPU 2475 * 2476 * This function is used to clean up per-vGPU graphics memory virtualization 2477 * components. 2478 * 2479 * Returns: 2480 * Zero on success, error code if failed. 2481 */ 2482 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) 2483 { 2484 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2485 intel_vgpu_destroy_ggtt_mm(vgpu); 2486 release_scratch_page_tree(vgpu); 2487 } 2488 2489 static void clean_spt_oos(struct intel_gvt *gvt) 2490 { 2491 struct intel_gvt_gtt *gtt = &gvt->gtt; 2492 struct list_head *pos, *n; 2493 struct intel_vgpu_oos_page *oos_page; 2494 2495 WARN(!list_empty(>t->oos_page_use_list_head), 2496 "someone is still using oos page\n"); 2497 2498 list_for_each_safe(pos, n, >t->oos_page_free_list_head) { 2499 oos_page = container_of(pos, struct intel_vgpu_oos_page, list); 2500 list_del(&oos_page->list); 2501 kfree(oos_page); 2502 } 2503 } 2504 2505 static int setup_spt_oos(struct intel_gvt *gvt) 2506 { 2507 struct intel_gvt_gtt *gtt = &gvt->gtt; 2508 struct intel_vgpu_oos_page *oos_page; 2509 int i; 2510 int ret; 2511 2512 INIT_LIST_HEAD(>t->oos_page_free_list_head); 2513 INIT_LIST_HEAD(>t->oos_page_use_list_head); 2514 2515 for (i = 0; i < preallocated_oos_pages; i++) { 2516 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); 2517 if (!oos_page) { 2518 ret = -ENOMEM; 2519 goto fail; 2520 } 2521 2522 INIT_LIST_HEAD(&oos_page->list); 2523 INIT_LIST_HEAD(&oos_page->vm_list); 2524 oos_page->id = i; 2525 list_add_tail(&oos_page->list, >t->oos_page_free_list_head); 2526 } 2527 2528 gvt_dbg_mm("%d oos pages preallocated\n", i); 2529 2530 return 0; 2531 fail: 2532 clean_spt_oos(gvt); 2533 return ret; 2534 } 2535 2536 /** 2537 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object 2538 * @vgpu: a vGPU 2539 * @pdps: pdp root array 2540 * 2541 * This function is used to find a PPGTT mm object from mm object pool 2542 * 2543 * Returns: 2544 * pointer to mm object on success, NULL if failed. 2545 */ 2546 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, 2547 u64 pdps[]) 2548 { 2549 struct intel_vgpu_mm *mm; 2550 struct list_head *pos; 2551 2552 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) { 2553 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2554 2555 switch (mm->ppgtt_mm.root_entry_type) { 2556 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY: 2557 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0]) 2558 return mm; 2559 break; 2560 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY: 2561 if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps, 2562 sizeof(mm->ppgtt_mm.guest_pdps))) 2563 return mm; 2564 break; 2565 default: 2566 GEM_BUG_ON(1); 2567 } 2568 } 2569 return NULL; 2570 } 2571 2572 /** 2573 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object. 2574 * @vgpu: a vGPU 2575 * @root_entry_type: ppgtt root entry type 2576 * @pdps: guest pdps 2577 * 2578 * This function is used to find or create a PPGTT mm object from a guest. 2579 * 2580 * Returns: 2581 * Zero on success, negative error code if failed. 2582 */ 2583 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, 2584 intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) 2585 { 2586 struct intel_vgpu_mm *mm; 2587 2588 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2589 if (mm) { 2590 intel_vgpu_mm_get(mm); 2591 } else { 2592 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps); 2593 if (IS_ERR(mm)) 2594 gvt_vgpu_err("fail to create mm\n"); 2595 } 2596 return mm; 2597 } 2598 2599 /** 2600 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object. 2601 * @vgpu: a vGPU 2602 * @pdps: guest pdps 2603 * 2604 * This function is used to find a PPGTT mm object from a guest and destroy it. 2605 * 2606 * Returns: 2607 * Zero on success, negative error code if failed. 2608 */ 2609 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]) 2610 { 2611 struct intel_vgpu_mm *mm; 2612 2613 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); 2614 if (!mm) { 2615 gvt_vgpu_err("fail to find ppgtt instance.\n"); 2616 return -EINVAL; 2617 } 2618 intel_vgpu_mm_put(mm); 2619 return 0; 2620 } 2621 2622 /** 2623 * intel_gvt_init_gtt - initialize mm components of a GVT device 2624 * @gvt: GVT device 2625 * 2626 * This function is called at the initialization stage, to initialize 2627 * the mm components of a GVT device. 2628 * 2629 * Returns: 2630 * zero on success, negative error code if failed. 2631 */ 2632 int intel_gvt_init_gtt(struct intel_gvt *gvt) 2633 { 2634 int ret; 2635 void *page; 2636 struct device *dev = &gvt->dev_priv->drm.pdev->dev; 2637 dma_addr_t daddr; 2638 2639 gvt_dbg_core("init gtt\n"); 2640 2641 gvt->gtt.pte_ops = &gen8_gtt_pte_ops; 2642 gvt->gtt.gma_ops = &gen8_gtt_gma_ops; 2643 2644 page = (void *)get_zeroed_page(GFP_KERNEL); 2645 if (!page) { 2646 gvt_err("fail to allocate scratch ggtt page\n"); 2647 return -ENOMEM; 2648 } 2649 2650 daddr = dma_map_page(dev, virt_to_page(page), 0, 2651 4096, PCI_DMA_BIDIRECTIONAL); 2652 if (dma_mapping_error(dev, daddr)) { 2653 gvt_err("fail to dmamap scratch ggtt page\n"); 2654 __free_page(virt_to_page(page)); 2655 return -ENOMEM; 2656 } 2657 2658 gvt->gtt.scratch_page = virt_to_page(page); 2659 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT); 2660 2661 if (enable_out_of_sync) { 2662 ret = setup_spt_oos(gvt); 2663 if (ret) { 2664 gvt_err("fail to initialize SPT oos\n"); 2665 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 2666 __free_page(gvt->gtt.scratch_page); 2667 return ret; 2668 } 2669 } 2670 INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); 2671 mutex_init(&gvt->gtt.ppgtt_mm_lock); 2672 return 0; 2673 } 2674 2675 /** 2676 * intel_gvt_clean_gtt - clean up mm components of a GVT device 2677 * @gvt: GVT device 2678 * 2679 * This function is called at the driver unloading stage, to clean up the 2680 * the mm components of a GVT device. 2681 * 2682 */ 2683 void intel_gvt_clean_gtt(struct intel_gvt *gvt) 2684 { 2685 struct device *dev = &gvt->dev_priv->drm.pdev->dev; 2686 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn << 2687 I915_GTT_PAGE_SHIFT); 2688 2689 dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 2690 2691 __free_page(gvt->gtt.scratch_page); 2692 2693 if (enable_out_of_sync) 2694 clean_spt_oos(gvt); 2695 } 2696 2697 /** 2698 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances 2699 * @vgpu: a vGPU 2700 * 2701 * This function is called when invalidate all PPGTT instances of a vGPU. 2702 * 2703 */ 2704 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) 2705 { 2706 struct list_head *pos, *n; 2707 struct intel_vgpu_mm *mm; 2708 2709 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { 2710 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); 2711 if (mm->type == INTEL_GVT_MM_PPGTT) { 2712 mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2713 list_del_init(&mm->ppgtt_mm.lru_list); 2714 mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock); 2715 if (mm->ppgtt_mm.shadowed) 2716 invalidate_ppgtt_mm(mm); 2717 } 2718 } 2719 } 2720 2721 /** 2722 * intel_vgpu_reset_ggtt - reset the GGTT entry 2723 * @vgpu: a vGPU 2724 * @invalidate_old: invalidate old entries 2725 * 2726 * This function is called at the vGPU create stage 2727 * to reset all the GGTT entries. 2728 * 2729 */ 2730 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) 2731 { 2732 struct intel_gvt *gvt = vgpu->gvt; 2733 struct drm_i915_private *dev_priv = gvt->dev_priv; 2734 struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; 2735 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; 2736 struct intel_gvt_gtt_entry old_entry; 2737 u32 index; 2738 u32 num_entries; 2739 2740 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn); 2741 pte_ops->set_present(&entry); 2742 2743 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; 2744 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; 2745 while (num_entries--) { 2746 if (invalidate_old) { 2747 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2748 ggtt_invalidate_pte(vgpu, &old_entry); 2749 } 2750 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2751 } 2752 2753 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; 2754 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; 2755 while (num_entries--) { 2756 if (invalidate_old) { 2757 ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); 2758 ggtt_invalidate_pte(vgpu, &old_entry); 2759 } 2760 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); 2761 } 2762 2763 ggtt_invalidate(dev_priv); 2764 } 2765 2766 /** 2767 * intel_vgpu_reset_gtt - reset the all GTT related status 2768 * @vgpu: a vGPU 2769 * 2770 * This function is called from vfio core to reset reset all 2771 * GTT related status, including GGTT, PPGTT, scratch page. 2772 * 2773 */ 2774 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) 2775 { 2776 /* Shadow pages are only created when there is no page 2777 * table tracking data, so remove page tracking data after 2778 * removing the shadow pages. 2779 */ 2780 intel_vgpu_destroy_all_ppgtt_mm(vgpu); 2781 intel_vgpu_reset_ggtt(vgpu, true); 2782 } 2783