1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "gen7_renderclear.h" 7 #include "i915_drv.h" 8 #include "intel_gpu_commands.h" 9 10 #define MAX_URB_ENTRIES 64 11 #define STATE_SIZE (4 * 1024) 12 #define GT3_INLINE_DATA_DELAYS 0x1E00 13 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) 14 15 struct cb_kernel { 16 const void *data; 17 u32 size; 18 }; 19 20 #define CB_KERNEL(name) { .data = (name), .size = sizeof(name) } 21 22 #include "ivb_clear_kernel.c" 23 static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel); 24 25 #include "hsw_clear_kernel.c" 26 static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel); 27 28 struct batch_chunk { 29 struct i915_vma *vma; 30 u32 offset; 31 u32 *start; 32 u32 *end; 33 u32 max_items; 34 }; 35 36 struct batch_vals { 37 u32 max_primitives; 38 u32 max_urb_entries; 39 u32 cmd_size; 40 u32 state_size; 41 u32 state_start; 42 u32 batch_size; 43 u32 surface_height; 44 u32 surface_width; 45 u32 scratch_size; 46 u32 max_size; 47 }; 48 49 static void 50 batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) 51 { 52 if (IS_HASWELL(i915)) { 53 bv->max_primitives = 280; 54 bv->max_urb_entries = MAX_URB_ENTRIES; 55 bv->surface_height = 16 * 16; 56 bv->surface_width = 32 * 2 * 16; 57 } else { 58 bv->max_primitives = 128; 59 bv->max_urb_entries = MAX_URB_ENTRIES / 2; 60 bv->surface_height = 16 * 8; 61 bv->surface_width = 32 * 16; 62 } 63 bv->cmd_size = bv->max_primitives * 4096; 64 bv->state_size = STATE_SIZE; 65 bv->state_start = bv->cmd_size; 66 bv->batch_size = bv->cmd_size + bv->state_size; 67 bv->scratch_size = bv->surface_height * bv->surface_width; 68 bv->max_size = bv->batch_size + bv->scratch_size; 69 } 70 71 static void batch_init(struct batch_chunk *bc, 72 struct i915_vma *vma, 73 u32 *start, u32 offset, u32 max_bytes) 74 { 75 bc->vma = vma; 76 bc->offset = offset; 77 bc->start = start + bc->offset / sizeof(*bc->start); 78 bc->end = bc->start; 79 bc->max_items = max_bytes / sizeof(*bc->start); 80 } 81 82 static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) 83 { 84 return (cs - bc->start) * sizeof(*bc->start) + bc->offset; 85 } 86 87 static u32 batch_addr(const struct batch_chunk *bc) 88 { 89 return bc->vma->node.start; 90 } 91 92 static void batch_add(struct batch_chunk *bc, const u32 d) 93 { 94 GEM_BUG_ON((bc->end - bc->start) >= bc->max_items); 95 *bc->end++ = d; 96 } 97 98 static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items) 99 { 100 u32 *map; 101 102 if (align) { 103 u32 *end = PTR_ALIGN(bc->end, align); 104 105 memset32(bc->end, 0, end - bc->end); 106 bc->end = end; 107 } 108 109 map = bc->end; 110 bc->end += items; 111 112 return map; 113 } 114 115 static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes) 116 { 117 GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start))); 118 return batch_alloc_items(bc, align, bytes / sizeof(*bc->start)); 119 } 120 121 static u32 122 gen7_fill_surface_state(struct batch_chunk *state, 123 const u32 dst_offset, 124 const struct batch_vals *bv) 125 { 126 u32 surface_h = bv->surface_height; 127 u32 surface_w = bv->surface_width; 128 u32 *cs = batch_alloc_items(state, 32, 8); 129 u32 offset = batch_offset(state, cs); 130 131 #define SURFACE_2D 1 132 #define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 133 #define RENDER_CACHE_READ_WRITE 1 134 135 *cs++ = SURFACE_2D << 29 | 136 (SURFACEFORMAT_B8G8R8A8_UNORM << 18) | 137 (RENDER_CACHE_READ_WRITE << 8); 138 139 *cs++ = batch_addr(state) + dst_offset; 140 141 *cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1); 142 *cs++ = surface_w; 143 *cs++ = 0; 144 *cs++ = 0; 145 *cs++ = 0; 146 #define SHADER_CHANNELS(r, g, b, a) \ 147 (((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16)) 148 *cs++ = SHADER_CHANNELS(4, 5, 6, 7); 149 batch_advance(state, cs); 150 151 return offset; 152 } 153 154 static u32 155 gen7_fill_binding_table(struct batch_chunk *state, 156 const struct batch_vals *bv) 157 { 158 u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); 159 u32 *cs = batch_alloc_items(state, 32, 8); 160 u32 offset = batch_offset(state, cs); 161 162 *cs++ = surface_start - state->offset; 163 *cs++ = 0; 164 *cs++ = 0; 165 *cs++ = 0; 166 *cs++ = 0; 167 *cs++ = 0; 168 *cs++ = 0; 169 *cs++ = 0; 170 batch_advance(state, cs); 171 172 return offset; 173 } 174 175 static u32 176 gen7_fill_kernel_data(struct batch_chunk *state, 177 const u32 *data, 178 const u32 size) 179 { 180 return batch_offset(state, 181 memcpy(batch_alloc_bytes(state, 64, size), 182 data, size)); 183 } 184 185 static u32 186 gen7_fill_interface_descriptor(struct batch_chunk *state, 187 const struct batch_vals *bv, 188 const struct cb_kernel *kernel, 189 unsigned int count) 190 { 191 u32 kernel_offset = 192 gen7_fill_kernel_data(state, kernel->data, kernel->size); 193 u32 binding_table = gen7_fill_binding_table(state, bv); 194 u32 *cs = batch_alloc_items(state, 32, 8 * count); 195 u32 offset = batch_offset(state, cs); 196 197 *cs++ = kernel_offset; 198 *cs++ = (1 << 7) | (1 << 13); 199 *cs++ = 0; 200 *cs++ = (binding_table - state->offset) | 1; 201 *cs++ = 0; 202 *cs++ = 0; 203 *cs++ = 0; 204 *cs++ = 0; 205 206 /* 1 - 63dummy idds */ 207 memset32(cs, 0x00, (count - 1) * 8); 208 batch_advance(state, cs + (count - 1) * 8); 209 210 return offset; 211 } 212 213 static void 214 gen7_emit_state_base_address(struct batch_chunk *batch, 215 u32 surface_state_base) 216 { 217 u32 *cs = batch_alloc_items(batch, 0, 12); 218 219 *cs++ = STATE_BASE_ADDRESS | (12 - 2); 220 /* general */ 221 *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; 222 /* surface */ 223 *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; 224 /* dynamic */ 225 *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; 226 /* indirect */ 227 *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; 228 /* instruction */ 229 *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; 230 231 /* general/dynamic/indirect/instruction access Bound */ 232 *cs++ = 0; 233 *cs++ = BASE_ADDRESS_MODIFY; 234 *cs++ = 0; 235 *cs++ = BASE_ADDRESS_MODIFY; 236 *cs++ = 0; 237 *cs++ = 0; 238 batch_advance(batch, cs); 239 } 240 241 static void 242 gen7_emit_vfe_state(struct batch_chunk *batch, 243 const struct batch_vals *bv, 244 u32 urb_size, u32 curbe_size, 245 u32 mode) 246 { 247 u32 urb_entries = bv->max_urb_entries; 248 u32 threads = bv->max_primitives - 1; 249 u32 *cs = batch_alloc_items(batch, 32, 8); 250 251 *cs++ = MEDIA_VFE_STATE | (8 - 2); 252 253 /* scratch buffer */ 254 *cs++ = 0; 255 256 /* number of threads & urb entries for GPGPU vs Media Mode */ 257 *cs++ = threads << 16 | urb_entries << 8 | mode << 2; 258 259 *cs++ = 0; 260 261 /* urb entry size & curbe size in 256 bits unit */ 262 *cs++ = urb_size << 16 | curbe_size; 263 264 /* scoreboard */ 265 *cs++ = 0; 266 *cs++ = 0; 267 *cs++ = 0; 268 batch_advance(batch, cs); 269 } 270 271 static void 272 gen7_emit_interface_descriptor_load(struct batch_chunk *batch, 273 const u32 interface_descriptor, 274 unsigned int count) 275 { 276 u32 *cs = batch_alloc_items(batch, 8, 4); 277 278 *cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2); 279 *cs++ = 0; 280 *cs++ = count * 8 * sizeof(*cs); 281 282 /* 283 * interface descriptor address - it is relative to the dynamics base 284 * address 285 */ 286 *cs++ = interface_descriptor; 287 batch_advance(batch, cs); 288 } 289 290 static void 291 gen7_emit_media_object(struct batch_chunk *batch, 292 unsigned int media_object_index) 293 { 294 unsigned int x_offset = (media_object_index % 16) * 64; 295 unsigned int y_offset = (media_object_index / 16) * 16; 296 unsigned int inline_data_size; 297 unsigned int media_batch_size; 298 unsigned int i; 299 u32 *cs; 300 301 inline_data_size = 112 * 8; 302 media_batch_size = inline_data_size + 6; 303 304 cs = batch_alloc_items(batch, 8, media_batch_size); 305 306 *cs++ = MEDIA_OBJECT | (media_batch_size - 2); 307 308 /* interface descriptor offset */ 309 *cs++ = 0; 310 311 /* without indirect data */ 312 *cs++ = 0; 313 *cs++ = 0; 314 315 /* scoreboard */ 316 *cs++ = 0; 317 *cs++ = 0; 318 319 /* inline */ 320 *cs++ = (y_offset << 16) | (x_offset); 321 *cs++ = 0; 322 *cs++ = GT3_INLINE_DATA_DELAYS; 323 for (i = 3; i < inline_data_size; i++) 324 *cs++ = 0; 325 326 batch_advance(batch, cs); 327 } 328 329 static void gen7_emit_pipeline_flush(struct batch_chunk *batch) 330 { 331 u32 *cs = batch_alloc_items(batch, 0, 5); 332 333 *cs++ = GFX_OP_PIPE_CONTROL(5); 334 *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | 335 PIPE_CONTROL_GLOBAL_GTT_IVB; 336 *cs++ = 0; 337 *cs++ = 0; 338 *cs++ = 0; 339 batch_advance(batch, cs); 340 } 341 342 static void emit_batch(struct i915_vma * const vma, 343 u32 *start, 344 const struct batch_vals *bv) 345 { 346 struct drm_i915_private *i915 = vma->vm->i915; 347 unsigned int desc_count = 64; 348 const u32 urb_size = 112; 349 struct batch_chunk cmds, state; 350 u32 interface_descriptor; 351 unsigned int i; 352 353 batch_init(&cmds, vma, start, 0, bv->cmd_size); 354 batch_init(&state, vma, start, bv->state_start, bv->state_size); 355 356 interface_descriptor = 357 gen7_fill_interface_descriptor(&state, bv, 358 IS_HASWELL(i915) ? 359 &cb_kernel_hsw : 360 &cb_kernel_ivb, 361 desc_count); 362 gen7_emit_pipeline_flush(&cmds); 363 batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); 364 batch_add(&cmds, MI_NOOP); 365 gen7_emit_state_base_address(&cmds, interface_descriptor); 366 gen7_emit_pipeline_flush(&cmds); 367 368 gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); 369 370 gen7_emit_interface_descriptor_load(&cmds, 371 interface_descriptor, 372 desc_count); 373 374 for (i = 0; i < bv->max_primitives; i++) 375 gen7_emit_media_object(&cmds, i); 376 377 batch_add(&cmds, MI_BATCH_BUFFER_END); 378 } 379 380 int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, 381 struct i915_vma * const vma) 382 { 383 struct batch_vals bv; 384 u32 *batch; 385 386 batch_get_defaults(engine->i915, &bv); 387 if (!vma) 388 return bv.max_size; 389 390 GEM_BUG_ON(vma->obj->base.size < bv.max_size); 391 392 batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); 393 if (IS_ERR(batch)) 394 return PTR_ERR(batch); 395 396 emit_batch(vma, memset(batch, 0, bv.max_size), &bv); 397 398 i915_gem_object_flush_map(vma->obj); 399 i915_gem_object_unpin_map(vma->obj); 400 401 return 0; 402 } 403