1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2008-2015 Intel Corporation 4 */ 5 6 #include <linux/highmem.h> 7 8 #include "i915_drv.h" 9 #include "i915_reg.h" 10 #include "i915_scatterlist.h" 11 #include "i915_pvinfo.h" 12 #include "i915_vgpu.h" 13 #include "intel_gt_regs.h" 14 #include "intel_mchbar_regs.h" 15 16 /** 17 * DOC: fence register handling 18 * 19 * Important to avoid confusions: "fences" in the i915 driver are not execution 20 * fences used to track command completion but hardware detiler objects which 21 * wrap a given range of the global GTT. Each platform has only a fairly limited 22 * set of these objects. 23 * 24 * Fences are used to detile GTT memory mappings. They're also connected to the 25 * hardware frontbuffer render tracking and hence interact with frontbuffer 26 * compression. Furthermore on older platforms fences are required for tiled 27 * objects used by the display engine. They can also be used by the render 28 * engine - they're required for blitter commands and are optional for render 29 * commands. But on gen4+ both display (with the exception of fbc) and rendering 30 * have their own tiling state bits and don't need fences. 31 * 32 * Also note that fences only support X and Y tiling and hence can't be used for 33 * the fancier new tiling formats like W, Ys and Yf. 34 * 35 * Finally note that because fences are such a restricted resource they're 36 * dynamically associated with objects. Furthermore fence state is committed to 37 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must 38 * explicitly call i915_gem_object_get_fence() to synchronize fencing status 39 * for cpu access. Also note that some code wants an unfenced view, for those 40 * cases the fence can be removed forcefully with i915_gem_object_put_fence(). 41 * 42 * Internally these functions will synchronize with userspace access by removing 43 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 44 */ 45 46 #define pipelined 0 47 48 static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence) 49 { 50 return fence->ggtt->vm.i915; 51 } 52 53 static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence) 54 { 55 return fence->ggtt->vm.gt->uncore; 56 } 57 58 static void i965_write_fence_reg(struct i915_fence_reg *fence) 59 { 60 i915_reg_t fence_reg_lo, fence_reg_hi; 61 int fence_pitch_shift; 62 u64 val; 63 64 if (GRAPHICS_VER(fence_to_i915(fence)) >= 6) { 65 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); 66 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); 67 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 68 69 } else { 70 fence_reg_lo = FENCE_REG_965_LO(fence->id); 71 fence_reg_hi = FENCE_REG_965_HI(fence->id); 72 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 73 } 74 75 val = 0; 76 if (fence->tiling) { 77 unsigned int stride = fence->stride; 78 79 GEM_BUG_ON(!IS_ALIGNED(stride, 128)); 80 81 val = fence->start + fence->size - I965_FENCE_PAGE; 82 val <<= 32; 83 val |= fence->start; 84 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 85 if (fence->tiling == I915_TILING_Y) 86 val |= BIT(I965_FENCE_TILING_Y_SHIFT); 87 val |= I965_FENCE_REG_VALID; 88 } 89 90 if (!pipelined) { 91 struct intel_uncore *uncore = fence_to_uncore(fence); 92 93 /* 94 * To w/a incoherency with non-atomic 64-bit register updates, 95 * we split the 64-bit update into two 32-bit writes. In order 96 * for a partial fence not to be evaluated between writes, we 97 * precede the update with write to turn off the fence register, 98 * and only enable the fence as the last step. 99 * 100 * For extra levels of paranoia, we make sure each step lands 101 * before applying the next step. 102 */ 103 intel_uncore_write_fw(uncore, fence_reg_lo, 0); 104 intel_uncore_posting_read_fw(uncore, fence_reg_lo); 105 106 intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val)); 107 intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val)); 108 intel_uncore_posting_read_fw(uncore, fence_reg_lo); 109 } 110 } 111 112 static void i915_write_fence_reg(struct i915_fence_reg *fence) 113 { 114 u32 val; 115 116 val = 0; 117 if (fence->tiling) { 118 unsigned int stride = fence->stride; 119 unsigned int tiling = fence->tiling; 120 bool is_y_tiled = tiling == I915_TILING_Y; 121 122 if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence))) 123 stride /= 128; 124 else 125 stride /= 512; 126 GEM_BUG_ON(!is_power_of_2(stride)); 127 128 val = fence->start; 129 if (is_y_tiled) 130 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 131 val |= I915_FENCE_SIZE_BITS(fence->size); 132 val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; 133 134 val |= I830_FENCE_REG_VALID; 135 } 136 137 if (!pipelined) { 138 struct intel_uncore *uncore = fence_to_uncore(fence); 139 i915_reg_t reg = FENCE_REG(fence->id); 140 141 intel_uncore_write_fw(uncore, reg, val); 142 intel_uncore_posting_read_fw(uncore, reg); 143 } 144 } 145 146 static void i830_write_fence_reg(struct i915_fence_reg *fence) 147 { 148 u32 val; 149 150 val = 0; 151 if (fence->tiling) { 152 unsigned int stride = fence->stride; 153 154 val = fence->start; 155 if (fence->tiling == I915_TILING_Y) 156 val |= BIT(I830_FENCE_TILING_Y_SHIFT); 157 val |= I830_FENCE_SIZE_BITS(fence->size); 158 val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; 159 val |= I830_FENCE_REG_VALID; 160 } 161 162 if (!pipelined) { 163 struct intel_uncore *uncore = fence_to_uncore(fence); 164 i915_reg_t reg = FENCE_REG(fence->id); 165 166 intel_uncore_write_fw(uncore, reg, val); 167 intel_uncore_posting_read_fw(uncore, reg); 168 } 169 } 170 171 static void fence_write(struct i915_fence_reg *fence) 172 { 173 struct drm_i915_private *i915 = fence_to_i915(fence); 174 175 /* 176 * Previous access through the fence register is marshalled by 177 * the mb() inside the fault handlers (i915_gem_release_mmaps) 178 * and explicitly managed for internal users. 179 */ 180 181 if (GRAPHICS_VER(i915) == 2) 182 i830_write_fence_reg(fence); 183 else if (GRAPHICS_VER(i915) == 3) 184 i915_write_fence_reg(fence); 185 else 186 i965_write_fence_reg(fence); 187 188 /* 189 * Access through the fenced region afterwards is 190 * ordered by the posting reads whilst writing the registers. 191 */ 192 } 193 194 static bool gpu_uses_fence_registers(struct i915_fence_reg *fence) 195 { 196 return GRAPHICS_VER(fence_to_i915(fence)) < 4; 197 } 198 199 static int fence_update(struct i915_fence_reg *fence, 200 struct i915_vma *vma) 201 { 202 struct i915_ggtt *ggtt = fence->ggtt; 203 struct intel_uncore *uncore = fence_to_uncore(fence); 204 intel_wakeref_t wakeref; 205 struct i915_vma *old; 206 int ret; 207 208 fence->tiling = 0; 209 if (vma) { 210 GEM_BUG_ON(!i915_gem_object_get_stride(vma->obj) || 211 !i915_gem_object_get_tiling(vma->obj)); 212 213 if (!i915_vma_is_map_and_fenceable(vma)) 214 return -EINVAL; 215 216 if (gpu_uses_fence_registers(fence)) { 217 /* implicit 'unfenced' GPU blits */ 218 ret = i915_vma_sync(vma); 219 if (ret) 220 return ret; 221 } 222 223 GEM_BUG_ON(vma->fence_size > i915_vma_size(vma)); 224 fence->start = i915_ggtt_offset(vma); 225 fence->size = vma->fence_size; 226 fence->stride = i915_gem_object_get_stride(vma->obj); 227 fence->tiling = i915_gem_object_get_tiling(vma->obj); 228 } 229 WRITE_ONCE(fence->dirty, false); 230 231 old = xchg(&fence->vma, NULL); 232 if (old) { 233 /* XXX Ideally we would move the waiting to outside the mutex */ 234 ret = i915_active_wait(&fence->active); 235 if (ret) { 236 fence->vma = old; 237 return ret; 238 } 239 240 i915_vma_flush_writes(old); 241 242 /* 243 * Ensure that all userspace CPU access is completed before 244 * stealing the fence. 245 */ 246 if (old != vma) { 247 GEM_BUG_ON(old->fence != fence); 248 i915_vma_revoke_mmap(old); 249 old->fence = NULL; 250 } 251 252 list_move(&fence->link, &ggtt->fence_list); 253 } 254 255 /* 256 * We only need to update the register itself if the device is awake. 257 * If the device is currently powered down, we will defer the write 258 * to the runtime resume, see intel_ggtt_restore_fences(). 259 * 260 * This only works for removing the fence register, on acquisition 261 * the caller must hold the rpm wakeref. The fence register must 262 * be cleared before we can use any other fences to ensure that 263 * the new fences do not overlap the elided clears, confusing HW. 264 */ 265 wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm); 266 if (!wakeref) { 267 GEM_BUG_ON(vma); 268 return 0; 269 } 270 271 WRITE_ONCE(fence->vma, vma); 272 fence_write(fence); 273 274 if (vma) { 275 vma->fence = fence; 276 list_move_tail(&fence->link, &ggtt->fence_list); 277 } 278 279 intel_runtime_pm_put(uncore->rpm, wakeref); 280 return 0; 281 } 282 283 /** 284 * i915_vma_revoke_fence - force-remove fence for a VMA 285 * @vma: vma to map linearly (not through a fence reg) 286 * 287 * This function force-removes any fence from the given object, which is useful 288 * if the kernel wants to do untiled GTT access. 289 */ 290 void i915_vma_revoke_fence(struct i915_vma *vma) 291 { 292 struct i915_fence_reg *fence = vma->fence; 293 intel_wakeref_t wakeref; 294 295 lockdep_assert_held(&vma->vm->mutex); 296 if (!fence) 297 return; 298 299 GEM_BUG_ON(fence->vma != vma); 300 GEM_BUG_ON(!i915_active_is_idle(&fence->active)); 301 GEM_BUG_ON(atomic_read(&fence->pin_count)); 302 303 fence->tiling = 0; 304 WRITE_ONCE(fence->vma, NULL); 305 vma->fence = NULL; 306 307 /* 308 * Skip the write to HW if and only if the device is currently 309 * suspended. 310 * 311 * If the driver does not currently hold a wakeref (if_in_use == 0), 312 * the device may currently be runtime suspended, or it may be woken 313 * up before the suspend takes place. If the device is not suspended 314 * (powered down) and we skip clearing the fence register, the HW is 315 * left in an undefined state where we may end up with multiple 316 * registers overlapping. 317 */ 318 with_intel_runtime_pm_if_active(fence_to_uncore(fence)->rpm, wakeref) 319 fence_write(fence); 320 } 321 322 static bool fence_is_active(const struct i915_fence_reg *fence) 323 { 324 return fence->vma && i915_vma_is_active(fence->vma); 325 } 326 327 static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) 328 { 329 struct i915_fence_reg *active = NULL; 330 struct i915_fence_reg *fence, *fn; 331 332 list_for_each_entry_safe(fence, fn, &ggtt->fence_list, link) { 333 GEM_BUG_ON(fence->vma && fence->vma->fence != fence); 334 335 if (fence == active) /* now seen this fence twice */ 336 active = ERR_PTR(-EAGAIN); 337 338 /* Prefer idle fences so we do not have to wait on the GPU */ 339 if (active != ERR_PTR(-EAGAIN) && fence_is_active(fence)) { 340 if (!active) 341 active = fence; 342 343 list_move_tail(&fence->link, &ggtt->fence_list); 344 continue; 345 } 346 347 if (atomic_read(&fence->pin_count)) 348 continue; 349 350 return fence; 351 } 352 353 /* Wait for completion of pending flips which consume fences */ 354 if (intel_has_pending_fb_unpin(ggtt->vm.i915)) 355 return ERR_PTR(-EAGAIN); 356 357 return ERR_PTR(-ENOBUFS); 358 } 359 360 int __i915_vma_pin_fence(struct i915_vma *vma) 361 { 362 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); 363 struct i915_fence_reg *fence; 364 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; 365 int err; 366 367 lockdep_assert_held(&vma->vm->mutex); 368 369 /* Just update our place in the LRU if our fence is getting reused. */ 370 if (vma->fence) { 371 fence = vma->fence; 372 GEM_BUG_ON(fence->vma != vma); 373 atomic_inc(&fence->pin_count); 374 if (!fence->dirty) { 375 list_move_tail(&fence->link, &ggtt->fence_list); 376 return 0; 377 } 378 } else if (set) { 379 fence = fence_find(ggtt); 380 if (IS_ERR(fence)) 381 return PTR_ERR(fence); 382 383 GEM_BUG_ON(atomic_read(&fence->pin_count)); 384 atomic_inc(&fence->pin_count); 385 } else { 386 return 0; 387 } 388 389 err = fence_update(fence, set); 390 if (err) 391 goto out_unpin; 392 393 GEM_BUG_ON(fence->vma != set); 394 GEM_BUG_ON(vma->fence != (set ? fence : NULL)); 395 396 if (set) 397 return 0; 398 399 out_unpin: 400 atomic_dec(&fence->pin_count); 401 return err; 402 } 403 404 /** 405 * i915_vma_pin_fence - set up fencing for a vma 406 * @vma: vma to map through a fence reg 407 * 408 * When mapping objects through the GTT, userspace wants to be able to write 409 * to them without having to worry about swizzling if the object is tiled. 410 * This function walks the fence regs looking for a free one for @obj, 411 * stealing one if it can't find any. 412 * 413 * It then sets up the reg based on the object's properties: address, pitch 414 * and tiling format. 415 * 416 * For an untiled surface, this removes any existing fence. 417 * 418 * Returns: 419 * 420 * 0 on success, negative error code on failure. 421 */ 422 int i915_vma_pin_fence(struct i915_vma *vma) 423 { 424 int err; 425 426 if (!vma->fence && !i915_gem_object_is_tiled(vma->obj)) 427 return 0; 428 429 /* 430 * Note that we revoke fences on runtime suspend. Therefore the user 431 * must keep the device awake whilst using the fence. 432 */ 433 assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); 434 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 435 436 err = mutex_lock_interruptible(&vma->vm->mutex); 437 if (err) 438 return err; 439 440 err = __i915_vma_pin_fence(vma); 441 mutex_unlock(&vma->vm->mutex); 442 443 return err; 444 } 445 446 /** 447 * i915_reserve_fence - Reserve a fence for vGPU 448 * @ggtt: Global GTT 449 * 450 * This function walks the fence regs looking for a free one and remove 451 * it from the fence_list. It is used to reserve fence for vGPU to use. 452 */ 453 struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt) 454 { 455 struct i915_fence_reg *fence; 456 int count; 457 int ret; 458 459 lockdep_assert_held(&ggtt->vm.mutex); 460 461 /* Keep at least one fence available for the display engine. */ 462 count = 0; 463 list_for_each_entry(fence, &ggtt->fence_list, link) 464 count += !atomic_read(&fence->pin_count); 465 if (count <= 1) 466 return ERR_PTR(-ENOSPC); 467 468 fence = fence_find(ggtt); 469 if (IS_ERR(fence)) 470 return fence; 471 472 if (fence->vma) { 473 /* Force-remove fence from VMA */ 474 ret = fence_update(fence, NULL); 475 if (ret) 476 return ERR_PTR(ret); 477 } 478 479 list_del(&fence->link); 480 481 return fence; 482 } 483 484 /** 485 * i915_unreserve_fence - Reclaim a reserved fence 486 * @fence: the fence reg 487 * 488 * This function add a reserved fence register from vGPU to the fence_list. 489 */ 490 void i915_unreserve_fence(struct i915_fence_reg *fence) 491 { 492 struct i915_ggtt *ggtt = fence->ggtt; 493 494 lockdep_assert_held(&ggtt->vm.mutex); 495 496 list_add(&fence->link, &ggtt->fence_list); 497 } 498 499 /** 500 * intel_ggtt_restore_fences - restore fence state 501 * @ggtt: Global GTT 502 * 503 * Restore the hw fence state to match the software tracking again, to be called 504 * after a gpu reset and on resume. Note that on runtime suspend we only cancel 505 * the fences, to be reacquired by the user later. 506 */ 507 void intel_ggtt_restore_fences(struct i915_ggtt *ggtt) 508 { 509 int i; 510 511 for (i = 0; i < ggtt->num_fences; i++) 512 fence_write(&ggtt->fence_regs[i]); 513 } 514 515 /** 516 * DOC: tiling swizzling details 517 * 518 * The idea behind tiling is to increase cache hit rates by rearranging 519 * pixel data so that a group of pixel accesses are in the same cacheline. 520 * Performance improvement from doing this on the back/depth buffer are on 521 * the order of 30%. 522 * 523 * Intel architectures make this somewhat more complicated, though, by 524 * adjustments made to addressing of data when the memory is in interleaved 525 * mode (matched pairs of DIMMS) to improve memory bandwidth. 526 * For interleaved memory, the CPU sends every sequential 64 bytes 527 * to an alternate memory channel so it can get the bandwidth from both. 528 * 529 * The GPU also rearranges its accesses for increased bandwidth to interleaved 530 * memory, and it matches what the CPU does for non-tiled. However, when tiled 531 * it does it a little differently, since one walks addresses not just in the 532 * X direction but also Y. So, along with alternating channels when bit 533 * 6 of the address flips, it also alternates when other bits flip -- Bits 9 534 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) 535 * are common to both the 915 and 965-class hardware. 536 * 537 * The CPU also sometimes XORs in higher bits as well, to improve 538 * bandwidth doing strided access like we do so frequently in graphics. This 539 * is called "Channel XOR Randomization" in the MCH documentation. The result 540 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address 541 * decode. 542 * 543 * All of this bit 6 XORing has an effect on our memory management, 544 * as we need to make sure that the 3d driver can correctly address object 545 * contents. 546 * 547 * If we don't have interleaved memory, all tiling is safe and no swizzling is 548 * required. 549 * 550 * When bit 17 is XORed in, we simply refuse to tile at all. Bit 551 * 17 is not just a page offset, so as we page an object out and back in, 552 * individual pages in it will have different bit 17 addresses, resulting in 553 * each 64 bytes being swapped with its neighbor! 554 * 555 * Otherwise, if interleaved, we have to tell the 3d driver what the address 556 * swizzling it needs to do is, since it's writing with the CPU to the pages 557 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the 558 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling 559 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order 560 * to match what the GPU expects. 561 */ 562 563 /** 564 * detect_bit_6_swizzle - detect bit 6 swizzling pattern 565 * @ggtt: Global GGTT 566 * 567 * Detects bit 6 swizzling of address lookup between IGD access and CPU 568 * access through main memory. 569 */ 570 static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) 571 { 572 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 573 struct drm_i915_private *i915 = ggtt->vm.i915; 574 u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 575 u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 576 577 if (GRAPHICS_VER(i915) >= 8 || IS_VALLEYVIEW(i915)) { 578 /* 579 * On BDW+, swizzling is not used. We leave the CPU memory 580 * controller in charge of optimizing memory accesses without 581 * the extra address manipulation GPU side. 582 * 583 * VLV and CHV don't have GPU swizzling. 584 */ 585 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 586 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 587 } else if (GRAPHICS_VER(i915) >= 6) { 588 if (i915->preserve_bios_swizzle) { 589 if (intel_uncore_read(uncore, DISP_ARB_CTL) & 590 DISP_TILE_SURFACE_SWIZZLING) { 591 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 592 swizzle_y = I915_BIT_6_SWIZZLE_9; 593 } else { 594 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 595 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 596 } 597 } else { 598 u32 dimm_c0, dimm_c1; 599 600 dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0); 601 dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1); 602 dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 603 dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 604 /* 605 * Enable swizzling when the channels are populated 606 * with identically sized dimms. We don't need to check 607 * the 3rd channel because no cpu with gpu attached 608 * ships in that configuration. Also, swizzling only 609 * makes sense for 2 channels anyway. 610 */ 611 if (dimm_c0 == dimm_c1) { 612 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 613 swizzle_y = I915_BIT_6_SWIZZLE_9; 614 } else { 615 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 616 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 617 } 618 } 619 } else if (GRAPHICS_VER(i915) == 5) { 620 /* 621 * On Ironlake whatever DRAM config, GPU always do 622 * same swizzling setup. 623 */ 624 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 625 swizzle_y = I915_BIT_6_SWIZZLE_9; 626 } else if (GRAPHICS_VER(i915) == 2) { 627 /* 628 * As far as we know, the 865 doesn't have these bit 6 629 * swizzling issues. 630 */ 631 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 632 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 633 } else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) { 634 /* 635 * The 965, G33, and newer, have a very flexible memory 636 * configuration. It will enable dual-channel mode 637 * (interleaving) on as much memory as it can, and the GPU 638 * will additionally sometimes enable different bit 6 639 * swizzling for tiled objects from the CPU. 640 * 641 * Here's what I found on the G965: 642 * slot fill memory size swizzling 643 * 0A 0B 1A 1B 1-ch 2-ch 644 * 512 0 0 0 512 0 O 645 * 512 0 512 0 16 1008 X 646 * 512 0 0 512 16 1008 X 647 * 0 512 0 512 16 1008 X 648 * 1024 1024 1024 0 2048 1024 O 649 * 650 * We could probably detect this based on either the DRB 651 * matching, which was the case for the swizzling required in 652 * the table above, or from the 1-ch value being less than 653 * the minimum size of a rank. 654 * 655 * Reports indicate that the swizzling actually 656 * varies depending upon page placement inside the 657 * channels, i.e. we see swizzled pages where the 658 * banks of memory are paired and unswizzled on the 659 * uneven portion, so leave that as unknown. 660 */ 661 if (intel_uncore_read16(uncore, C0DRB3_BW) == 662 intel_uncore_read16(uncore, C1DRB3_BW)) { 663 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 664 swizzle_y = I915_BIT_6_SWIZZLE_9; 665 } 666 } else { 667 u32 dcc = intel_uncore_read(uncore, DCC); 668 669 /* 670 * On 9xx chipsets, channel interleave by the CPU is 671 * determined by DCC. For single-channel, neither the CPU 672 * nor the GPU do swizzling. For dual channel interleaved, 673 * the GPU's interleave is bit 9 and 10 for X tiled, and bit 674 * 9 for Y tiled. The CPU's interleave is independent, and 675 * can be based on either bit 11 (haven't seen this yet) or 676 * bit 17 (common). 677 */ 678 switch (dcc & DCC_ADDRESSING_MODE_MASK) { 679 case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: 680 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: 681 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 682 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 683 break; 684 case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: 685 if (dcc & DCC_CHANNEL_XOR_DISABLE) { 686 /* 687 * This is the base swizzling by the GPU for 688 * tiled buffers. 689 */ 690 swizzle_x = I915_BIT_6_SWIZZLE_9_10; 691 swizzle_y = I915_BIT_6_SWIZZLE_9; 692 } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { 693 /* Bit 11 swizzling by the CPU in addition. */ 694 swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; 695 swizzle_y = I915_BIT_6_SWIZZLE_9_11; 696 } else { 697 /* Bit 17 swizzling by the CPU in addition. */ 698 swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; 699 swizzle_y = I915_BIT_6_SWIZZLE_9_17; 700 } 701 break; 702 } 703 704 /* check for L-shaped memory aka modified enhanced addressing */ 705 if (GRAPHICS_VER(i915) == 4 && 706 !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) { 707 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 708 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 709 } 710 711 if (dcc == 0xffffffff) { 712 drm_err(&i915->drm, "Couldn't read from MCHBAR. " 713 "Disabling tiling.\n"); 714 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 715 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 716 } 717 } 718 719 if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN || 720 swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) { 721 /* 722 * Userspace likes to explode if it sees unknown swizzling, 723 * so lie. We will finish the lie when reporting through 724 * the get-tiling-ioctl by reporting the physical swizzle 725 * mode as unknown instead. 726 * 727 * As we don't strictly know what the swizzling is, it may be 728 * bit17 dependent, and so we need to also prevent the pages 729 * from being moved. 730 */ 731 i915->gem_quirks |= GEM_QUIRK_PIN_SWIZZLED_PAGES; 732 swizzle_x = I915_BIT_6_SWIZZLE_NONE; 733 swizzle_y = I915_BIT_6_SWIZZLE_NONE; 734 } 735 736 to_gt(i915)->ggtt->bit_6_swizzle_x = swizzle_x; 737 to_gt(i915)->ggtt->bit_6_swizzle_y = swizzle_y; 738 } 739 740 /* 741 * Swap every 64 bytes of this page around, to account for it having a new 742 * bit 17 of its physical address and therefore being interpreted differently 743 * by the GPU. 744 */ 745 static void swizzle_page(struct page *page) 746 { 747 char temp[64]; 748 char *vaddr; 749 int i; 750 751 vaddr = kmap(page); 752 753 for (i = 0; i < PAGE_SIZE; i += 128) { 754 memcpy(temp, &vaddr[i], 64); 755 memcpy(&vaddr[i], &vaddr[i + 64], 64); 756 memcpy(&vaddr[i + 64], temp, 64); 757 } 758 759 kunmap(page); 760 } 761 762 /** 763 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling 764 * @obj: i915 GEM buffer object 765 * @pages: the scattergather list of physical pages 766 * 767 * This function fixes up the swizzling in case any page frame number for this 768 * object has changed in bit 17 since that state has been saved with 769 * i915_gem_object_save_bit_17_swizzle(). 770 * 771 * This is called when pinning backing storage again, since the kernel is free 772 * to move unpinned backing storage around (either by directly moving pages or 773 * by swapping them out and back in again). 774 */ 775 void 776 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, 777 struct sg_table *pages) 778 { 779 struct sgt_iter sgt_iter; 780 struct page *page; 781 int i; 782 783 if (obj->bit_17 == NULL) 784 return; 785 786 i = 0; 787 for_each_sgt_page(page, sgt_iter, pages) { 788 char new_bit_17 = page_to_phys(page) >> 17; 789 790 if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { 791 swizzle_page(page); 792 set_page_dirty(page); 793 } 794 795 i++; 796 } 797 } 798 799 /** 800 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling 801 * @obj: i915 GEM buffer object 802 * @pages: the scattergather list of physical pages 803 * 804 * This function saves the bit 17 of each page frame number so that swizzling 805 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must 806 * be called before the backing storage can be unpinned. 807 */ 808 void 809 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, 810 struct sg_table *pages) 811 { 812 const unsigned int page_count = obj->base.size >> PAGE_SHIFT; 813 struct sgt_iter sgt_iter; 814 struct page *page; 815 int i; 816 817 if (obj->bit_17 == NULL) { 818 obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL); 819 if (obj->bit_17 == NULL) { 820 drm_err(&to_i915(obj->base.dev)->drm, 821 "Failed to allocate memory for bit 17 record\n"); 822 return; 823 } 824 } 825 826 i = 0; 827 828 for_each_sgt_page(page, sgt_iter, pages) { 829 if (page_to_phys(page) & (1 << 17)) 830 __set_bit(i, obj->bit_17); 831 else 832 __clear_bit(i, obj->bit_17); 833 i++; 834 } 835 } 836 837 void intel_ggtt_init_fences(struct i915_ggtt *ggtt) 838 { 839 struct drm_i915_private *i915 = ggtt->vm.i915; 840 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 841 int num_fences; 842 int i; 843 844 INIT_LIST_HEAD(&ggtt->fence_list); 845 INIT_LIST_HEAD(&ggtt->userfault_list); 846 847 detect_bit_6_swizzle(ggtt); 848 849 if (!i915_ggtt_has_aperture(ggtt)) 850 num_fences = 0; 851 else if (GRAPHICS_VER(i915) >= 7 && 852 !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) 853 num_fences = 32; 854 else if (GRAPHICS_VER(i915) >= 4 || 855 IS_I945G(i915) || IS_I945GM(i915) || 856 IS_G33(i915) || IS_PINEVIEW(i915)) 857 num_fences = 16; 858 else 859 num_fences = 8; 860 861 if (intel_vgpu_active(i915)) 862 num_fences = intel_uncore_read(uncore, 863 vgtif_reg(avail_rs.fence_num)); 864 ggtt->fence_regs = kcalloc(num_fences, 865 sizeof(*ggtt->fence_regs), 866 GFP_KERNEL); 867 if (!ggtt->fence_regs) 868 num_fences = 0; 869 870 /* Initialize fence registers to zero */ 871 for (i = 0; i < num_fences; i++) { 872 struct i915_fence_reg *fence = &ggtt->fence_regs[i]; 873 874 i915_active_init(&fence->active, NULL, NULL, 0); 875 fence->ggtt = ggtt; 876 fence->id = i; 877 list_add_tail(&fence->link, &ggtt->fence_list); 878 } 879 ggtt->num_fences = num_fences; 880 881 intel_ggtt_restore_fences(ggtt); 882 } 883 884 void intel_ggtt_fini_fences(struct i915_ggtt *ggtt) 885 { 886 int i; 887 888 for (i = 0; i < ggtt->num_fences; i++) { 889 struct i915_fence_reg *fence = &ggtt->fence_regs[i]; 890 891 i915_active_fini(&fence->active); 892 } 893 894 kfree(ggtt->fence_regs); 895 } 896 897 void intel_gt_init_swizzling(struct intel_gt *gt) 898 { 899 struct drm_i915_private *i915 = gt->i915; 900 struct intel_uncore *uncore = gt->uncore; 901 902 if (GRAPHICS_VER(i915) < 5 || 903 to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 904 return; 905 906 intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); 907 908 if (GRAPHICS_VER(i915) == 5) 909 return; 910 911 intel_uncore_rmw(uncore, TILECTL, 0, TILECTL_SWZCTL); 912 913 if (GRAPHICS_VER(i915) == 6) 914 intel_uncore_write(uncore, 915 ARB_MODE, 916 _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 917 else if (GRAPHICS_VER(i915) == 7) 918 intel_uncore_write(uncore, 919 ARB_MODE, 920 _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 921 else if (GRAPHICS_VER(i915) == 8) 922 intel_uncore_write(uncore, 923 GAMTARBMODE, 924 _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 925 else 926 MISSING_CASE(GRAPHICS_VER(i915)); 927 } 928