1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include "gen2_engine_cs.h" 7 #include "i915_drv.h" 8 #include "i915_reg.h" 9 #include "intel_engine.h" 10 #include "intel_engine_regs.h" 11 #include "intel_gpu_commands.h" 12 #include "intel_gt.h" 13 #include "intel_gt_irq.h" 14 #include "intel_ring.h" 15 16 int gen2_emit_flush(struct i915_request *rq, u32 mode) 17 { 18 unsigned int num_store_dw = 12; 19 u32 cmd, *cs; 20 21 cmd = MI_FLUSH; 22 if (mode & EMIT_INVALIDATE) 23 cmd |= MI_READ_FLUSH; 24 25 cs = intel_ring_begin(rq, 2 + 4 * num_store_dw); 26 if (IS_ERR(cs)) 27 return PTR_ERR(cs); 28 29 *cs++ = cmd; 30 while (num_store_dw--) { 31 *cs++ = MI_STORE_DWORD_INDEX; 32 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); 33 *cs++ = 0; 34 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH; 35 } 36 *cs++ = cmd; 37 38 intel_ring_advance(rq, cs); 39 40 return 0; 41 } 42 43 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) 44 { 45 u32 cmd, *cs; 46 int i; 47 48 /* 49 * read/write caches: 50 * 51 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 52 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 53 * also flushed at 2d versus 3d pipeline switches. 54 * 55 * read-only caches: 56 * 57 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 58 * MI_READ_FLUSH is set, and is always flushed on 965. 59 * 60 * I915_GEM_DOMAIN_COMMAND may not exist? 61 * 62 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 63 * invalidated when MI_EXE_FLUSH is set. 64 * 65 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 66 * invalidated with every MI_FLUSH. 67 * 68 * TLBs: 69 * 70 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 71 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 72 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 73 * are flushed at any MI_FLUSH. 74 */ 75 76 cmd = MI_FLUSH; 77 if (mode & EMIT_INVALIDATE) { 78 cmd |= MI_EXE_FLUSH; 79 if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5) 80 cmd |= MI_INVALIDATE_ISP; 81 } 82 83 i = 2; 84 if (mode & EMIT_INVALIDATE) 85 i += 20; 86 87 cs = intel_ring_begin(rq, i); 88 if (IS_ERR(cs)) 89 return PTR_ERR(cs); 90 91 *cs++ = cmd; 92 93 /* 94 * A random delay to let the CS invalidate take effect? Without this 95 * delay, the GPU relocation path fails as the CS does not see 96 * the updated contents. Just as important, if we apply the flushes 97 * to the EMIT_FLUSH branch (i.e. immediately after the relocation 98 * write and before the invalidate on the next batch), the relocations 99 * still fail. This implies that is a delay following invalidation 100 * that is required to reset the caches as opposed to a delay to 101 * ensure the memory is written. 102 */ 103 if (mode & EMIT_INVALIDATE) { 104 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 105 *cs++ = intel_gt_scratch_offset(rq->engine->gt, 106 INTEL_GT_SCRATCH_FIELD_DEFAULT) | 107 PIPE_CONTROL_GLOBAL_GTT; 108 *cs++ = 0; 109 *cs++ = 0; 110 111 for (i = 0; i < 12; i++) 112 *cs++ = MI_FLUSH; 113 114 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; 115 *cs++ = intel_gt_scratch_offset(rq->engine->gt, 116 INTEL_GT_SCRATCH_FIELD_DEFAULT) | 117 PIPE_CONTROL_GLOBAL_GTT; 118 *cs++ = 0; 119 *cs++ = 0; 120 } 121 122 *cs++ = cmd; 123 124 intel_ring_advance(rq, cs); 125 126 return 0; 127 } 128 129 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode) 130 { 131 u32 *cs; 132 133 cs = intel_ring_begin(rq, 2); 134 if (IS_ERR(cs)) 135 return PTR_ERR(cs); 136 137 *cs++ = MI_FLUSH; 138 *cs++ = MI_NOOP; 139 intel_ring_advance(rq, cs); 140 141 return 0; 142 } 143 144 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, 145 int flush, int post) 146 { 147 GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); 148 GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR); 149 150 *cs++ = MI_FLUSH; 151 152 while (flush--) { 153 *cs++ = MI_STORE_DWORD_INDEX; 154 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32); 155 *cs++ = rq->fence.seqno; 156 } 157 158 while (post--) { 159 *cs++ = MI_STORE_DWORD_INDEX; 160 *cs++ = I915_GEM_HWS_SEQNO_ADDR; 161 *cs++ = rq->fence.seqno; 162 } 163 164 *cs++ = MI_USER_INTERRUPT; 165 166 rq->tail = intel_ring_offset(rq, cs); 167 assert_ring_tail_valid(rq->ring, rq->tail); 168 169 return cs; 170 } 171 172 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs) 173 { 174 return __gen2_emit_breadcrumb(rq, cs, 16, 8); 175 } 176 177 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) 178 { 179 return __gen2_emit_breadcrumb(rq, cs, 8, 8); 180 } 181 182 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 183 #define I830_BATCH_LIMIT SZ_256K 184 #define I830_TLB_ENTRIES (2) 185 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT) 186 int i830_emit_bb_start(struct i915_request *rq, 187 u64 offset, u32 len, 188 unsigned int dispatch_flags) 189 { 190 u32 *cs, cs_offset = 191 intel_gt_scratch_offset(rq->engine->gt, 192 INTEL_GT_SCRATCH_FIELD_DEFAULT); 193 194 GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE); 195 196 cs = intel_ring_begin(rq, 6); 197 if (IS_ERR(cs)) 198 return PTR_ERR(cs); 199 200 /* Evict the invalid PTE TLBs */ 201 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; 202 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; 203 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ 204 *cs++ = cs_offset; 205 *cs++ = 0xdeadbeef; 206 *cs++ = MI_NOOP; 207 intel_ring_advance(rq, cs); 208 209 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { 210 if (len > I830_BATCH_LIMIT) 211 return -ENOSPC; 212 213 cs = intel_ring_begin(rq, 6 + 2); 214 if (IS_ERR(cs)) 215 return PTR_ERR(cs); 216 217 /* 218 * Blit the batch (which has now all relocs applied) to the 219 * stable batch scratch bo area (so that the CS never 220 * stumbles over its tlb invalidation bug) ... 221 */ 222 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); 223 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; 224 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; 225 *cs++ = cs_offset; 226 *cs++ = 4096; 227 *cs++ = offset; 228 229 *cs++ = MI_FLUSH; 230 *cs++ = MI_NOOP; 231 intel_ring_advance(rq, cs); 232 233 /* ... and execute it. */ 234 offset = cs_offset; 235 } 236 237 if (!(dispatch_flags & I915_DISPATCH_SECURE)) 238 offset |= MI_BATCH_NON_SECURE; 239 240 cs = intel_ring_begin(rq, 2); 241 if (IS_ERR(cs)) 242 return PTR_ERR(cs); 243 244 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 245 *cs++ = offset; 246 intel_ring_advance(rq, cs); 247 248 return 0; 249 } 250 251 int gen3_emit_bb_start(struct i915_request *rq, 252 u64 offset, u32 len, 253 unsigned int dispatch_flags) 254 { 255 u32 *cs; 256 257 if (!(dispatch_flags & I915_DISPATCH_SECURE)) 258 offset |= MI_BATCH_NON_SECURE; 259 260 cs = intel_ring_begin(rq, 2); 261 if (IS_ERR(cs)) 262 return PTR_ERR(cs); 263 264 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 265 *cs++ = offset; 266 intel_ring_advance(rq, cs); 267 268 return 0; 269 } 270 271 int gen4_emit_bb_start(struct i915_request *rq, 272 u64 offset, u32 length, 273 unsigned int dispatch_flags) 274 { 275 u32 security; 276 u32 *cs; 277 278 security = MI_BATCH_NON_SECURE_I965; 279 if (dispatch_flags & I915_DISPATCH_SECURE) 280 security = 0; 281 282 cs = intel_ring_begin(rq, 2); 283 if (IS_ERR(cs)) 284 return PTR_ERR(cs); 285 286 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security; 287 *cs++ = offset; 288 intel_ring_advance(rq, cs); 289 290 return 0; 291 } 292 293 void gen2_irq_enable(struct intel_engine_cs *engine) 294 { 295 struct drm_i915_private *i915 = engine->i915; 296 297 i915->irq_mask &= ~engine->irq_enable_mask; 298 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); 299 ENGINE_POSTING_READ16(engine, RING_IMR); 300 } 301 302 void gen2_irq_disable(struct intel_engine_cs *engine) 303 { 304 struct drm_i915_private *i915 = engine->i915; 305 306 i915->irq_mask |= engine->irq_enable_mask; 307 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); 308 } 309 310 void gen3_irq_enable(struct intel_engine_cs *engine) 311 { 312 engine->i915->irq_mask &= ~engine->irq_enable_mask; 313 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); 314 intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR); 315 } 316 317 void gen3_irq_disable(struct intel_engine_cs *engine) 318 { 319 engine->i915->irq_mask |= engine->irq_enable_mask; 320 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask); 321 } 322 323 void gen5_irq_enable(struct intel_engine_cs *engine) 324 { 325 gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask); 326 } 327 328 void gen5_irq_disable(struct intel_engine_cs *engine) 329 { 330 gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask); 331 } 332