1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008,2010 Intel Corporation 5 */ 6 7 #include <linux/intel-iommu.h> 8 #include <linux/dma-resv.h> 9 #include <linux/sync_file.h> 10 #include <linux/uaccess.h> 11 12 #include <drm/drm_syncobj.h> 13 14 #include "display/intel_frontbuffer.h" 15 16 #include "gem/i915_gem_ioctls.h" 17 #include "gt/intel_context.h" 18 #include "gt/intel_gpu_commands.h" 19 #include "gt/intel_gt.h" 20 #include "gt/intel_gt_buffer_pool.h" 21 #include "gt/intel_gt_pm.h" 22 #include "gt/intel_ring.h" 23 24 #include "i915_drv.h" 25 #include "i915_gem_clflush.h" 26 #include "i915_gem_context.h" 27 #include "i915_gem_ioctls.h" 28 #include "i915_trace.h" 29 #include "i915_user_extensions.h" 30 31 struct eb_vma { 32 struct i915_vma *vma; 33 unsigned int flags; 34 35 /** This vma's place in the execbuf reservation list */ 36 struct drm_i915_gem_exec_object2 *exec; 37 struct list_head bind_link; 38 struct list_head reloc_link; 39 40 struct hlist_node node; 41 u32 handle; 42 }; 43 44 enum { 45 FORCE_CPU_RELOC = 1, 46 FORCE_GTT_RELOC, 47 FORCE_GPU_RELOC, 48 #define DBG_FORCE_RELOC 0 /* choose one of the above! */ 49 }; 50 51 /* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ 52 #define __EXEC_OBJECT_HAS_PIN BIT(30) 53 #define __EXEC_OBJECT_HAS_FENCE BIT(29) 54 #define __EXEC_OBJECT_USERPTR_INIT BIT(28) 55 #define __EXEC_OBJECT_NEEDS_MAP BIT(27) 56 #define __EXEC_OBJECT_NEEDS_BIAS BIT(26) 57 #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ 58 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) 59 60 #define __EXEC_HAS_RELOC BIT(31) 61 #define __EXEC_ENGINE_PINNED BIT(30) 62 #define __EXEC_USERPTR_USED BIT(29) 63 #define __EXEC_INTERNAL_FLAGS (~0u << 29) 64 #define UPDATE PIN_OFFSET_FIXED 65 66 #define BATCH_OFFSET_BIAS (256*1024) 67 68 #define __I915_EXEC_ILLEGAL_FLAGS \ 69 (__I915_EXEC_UNKNOWN_FLAGS | \ 70 I915_EXEC_CONSTANTS_MASK | \ 71 I915_EXEC_RESOURCE_STREAMER) 72 73 /* Catch emission of unexpected errors for CI! */ 74 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 75 #undef EINVAL 76 #define EINVAL ({ \ 77 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ 78 22; \ 79 }) 80 #endif 81 82 /** 83 * DOC: User command execution 84 * 85 * Userspace submits commands to be executed on the GPU as an instruction 86 * stream within a GEM object we call a batchbuffer. This instructions may 87 * refer to other GEM objects containing auxiliary state such as kernels, 88 * samplers, render targets and even secondary batchbuffers. Userspace does 89 * not know where in the GPU memory these objects reside and so before the 90 * batchbuffer is passed to the GPU for execution, those addresses in the 91 * batchbuffer and auxiliary objects are updated. This is known as relocation, 92 * or patching. To try and avoid having to relocate each object on the next 93 * execution, userspace is told the location of those objects in this pass, 94 * but this remains just a hint as the kernel may choose a new location for 95 * any object in the future. 96 * 97 * At the level of talking to the hardware, submitting a batchbuffer for the 98 * GPU to execute is to add content to a buffer from which the HW 99 * command streamer is reading. 100 * 101 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. 102 * Execlists, this command is not placed on the same buffer as the 103 * remaining items. 104 * 105 * 2. Add a command to invalidate caches to the buffer. 106 * 107 * 3. Add a batchbuffer start command to the buffer; the start command is 108 * essentially a token together with the GPU address of the batchbuffer 109 * to be executed. 110 * 111 * 4. Add a pipeline flush to the buffer. 112 * 113 * 5. Add a memory write command to the buffer to record when the GPU 114 * is done executing the batchbuffer. The memory write writes the 115 * global sequence number of the request, ``i915_request::global_seqno``; 116 * the i915 driver uses the current value in the register to determine 117 * if the GPU has completed the batchbuffer. 118 * 119 * 6. Add a user interrupt command to the buffer. This command instructs 120 * the GPU to issue an interrupt when the command, pipeline flush and 121 * memory write are completed. 122 * 123 * 7. Inform the hardware of the additional commands added to the buffer 124 * (by updating the tail pointer). 125 * 126 * Processing an execbuf ioctl is conceptually split up into a few phases. 127 * 128 * 1. Validation - Ensure all the pointers, handles and flags are valid. 129 * 2. Reservation - Assign GPU address space for every object 130 * 3. Relocation - Update any addresses to point to the final locations 131 * 4. Serialisation - Order the request with respect to its dependencies 132 * 5. Construction - Construct a request to execute the batchbuffer 133 * 6. Submission (at some point in the future execution) 134 * 135 * Reserving resources for the execbuf is the most complicated phase. We 136 * neither want to have to migrate the object in the address space, nor do 137 * we want to have to update any relocations pointing to this object. Ideally, 138 * we want to leave the object where it is and for all the existing relocations 139 * to match. If the object is given a new address, or if userspace thinks the 140 * object is elsewhere, we have to parse all the relocation entries and update 141 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that 142 * all the target addresses in all of its objects match the value in the 143 * relocation entries and that they all match the presumed offsets given by the 144 * list of execbuffer objects. Using this knowledge, we know that if we haven't 145 * moved any buffers, all the relocation entries are valid and we can skip 146 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU 147 * hang.) The requirement for using I915_EXEC_NO_RELOC are: 148 * 149 * The addresses written in the objects must match the corresponding 150 * reloc.presumed_offset which in turn must match the corresponding 151 * execobject.offset. 152 * 153 * Any render targets written to in the batch must be flagged with 154 * EXEC_OBJECT_WRITE. 155 * 156 * To avoid stalling, execobject.offset should match the current 157 * address of that object within the active context. 158 * 159 * The reservation is done is multiple phases. First we try and keep any 160 * object already bound in its current location - so as long as meets the 161 * constraints imposed by the new execbuffer. Any object left unbound after the 162 * first pass is then fitted into any available idle space. If an object does 163 * not fit, all objects are removed from the reservation and the process rerun 164 * after sorting the objects into a priority order (more difficult to fit 165 * objects are tried first). Failing that, the entire VM is cleared and we try 166 * to fit the execbuf once last time before concluding that it simply will not 167 * fit. 168 * 169 * A small complication to all of this is that we allow userspace not only to 170 * specify an alignment and a size for the object in the address space, but 171 * we also allow userspace to specify the exact offset. This objects are 172 * simpler to place (the location is known a priori) all we have to do is make 173 * sure the space is available. 174 * 175 * Once all the objects are in place, patching up the buried pointers to point 176 * to the final locations is a fairly simple job of walking over the relocation 177 * entry arrays, looking up the right address and rewriting the value into 178 * the object. Simple! ... The relocation entries are stored in user memory 179 * and so to access them we have to copy them into a local buffer. That copy 180 * has to avoid taking any pagefaults as they may lead back to a GEM object 181 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split 182 * the relocation into multiple passes. First we try to do everything within an 183 * atomic context (avoid the pagefaults) which requires that we never wait. If 184 * we detect that we may wait, or if we need to fault, then we have to fallback 185 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm 186 * bells yet?) Dropping the mutex means that we lose all the state we have 187 * built up so far for the execbuf and we must reset any global data. However, 188 * we do leave the objects pinned in their final locations - which is a 189 * potential issue for concurrent execbufs. Once we have left the mutex, we can 190 * allocate and copy all the relocation entries into a large array at our 191 * leisure, reacquire the mutex, reclaim all the objects and other state and 192 * then proceed to update any incorrect addresses with the objects. 193 * 194 * As we process the relocation entries, we maintain a record of whether the 195 * object is being written to. Using NORELOC, we expect userspace to provide 196 * this information instead. We also check whether we can skip the relocation 197 * by comparing the expected value inside the relocation entry with the target's 198 * final address. If they differ, we have to map the current object and rewrite 199 * the 4 or 8 byte pointer within. 200 * 201 * Serialising an execbuf is quite simple according to the rules of the GEM 202 * ABI. Execution within each context is ordered by the order of submission. 203 * Writes to any GEM object are in order of submission and are exclusive. Reads 204 * from a GEM object are unordered with respect to other reads, but ordered by 205 * writes. A write submitted after a read cannot occur before the read, and 206 * similarly any read submitted after a write cannot occur before the write. 207 * Writes are ordered between engines such that only one write occurs at any 208 * time (completing any reads beforehand) - using semaphores where available 209 * and CPU serialisation otherwise. Other GEM access obey the same rules, any 210 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU 211 * reads before starting, and any read (either using set-domain or pread) must 212 * flush all GPU writes before starting. (Note we only employ a barrier before, 213 * we currently rely on userspace not concurrently starting a new execution 214 * whilst reading or writing to an object. This may be an advantage or not 215 * depending on how much you trust userspace not to shoot themselves in the 216 * foot.) Serialisation may just result in the request being inserted into 217 * a DAG awaiting its turn, but most simple is to wait on the CPU until 218 * all dependencies are resolved. 219 * 220 * After all of that, is just a matter of closing the request and handing it to 221 * the hardware (well, leaving it in a queue to be executed). However, we also 222 * offer the ability for batchbuffers to be run with elevated privileges so 223 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) 224 * Before any batch is given extra privileges we first must check that it 225 * contains no nefarious instructions, we check that each instruction is from 226 * our whitelist and all registers are also from an allowed list. We first 227 * copy the user's batchbuffer to a shadow (so that the user doesn't have 228 * access to it, either by the CPU or GPU as we scan it) and then parse each 229 * instruction. If everything is ok, we set a flag telling the hardware to run 230 * the batchbuffer in trusted mode, otherwise the ioctl is rejected. 231 */ 232 233 struct eb_fence { 234 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ 235 struct dma_fence *dma_fence; 236 u64 value; 237 struct dma_fence_chain *chain_fence; 238 }; 239 240 struct i915_execbuffer { 241 struct drm_i915_private *i915; /** i915 backpointer */ 242 struct drm_file *file; /** per-file lookup tables and limits */ 243 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ 244 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ 245 struct eb_vma *vma; 246 247 struct intel_engine_cs *engine; /** engine to queue the request to */ 248 struct intel_context *context; /* logical state for the request */ 249 struct i915_gem_context *gem_context; /** caller's context */ 250 251 struct i915_request *request; /** our request to build */ 252 struct eb_vma *batch; /** identity of the batch obj/vma */ 253 struct i915_vma *trampoline; /** trampoline used for chaining */ 254 255 /** actual size of execobj[] as we may extend it for the cmdparser */ 256 unsigned int buffer_count; 257 258 /** list of vma not yet bound during reservation phase */ 259 struct list_head unbound; 260 261 /** list of vma that have execobj.relocation_count */ 262 struct list_head relocs; 263 264 struct i915_gem_ww_ctx ww; 265 266 /** 267 * Track the most recently used object for relocations, as we 268 * frequently have to perform multiple relocations within the same 269 * obj/page 270 */ 271 struct reloc_cache { 272 struct drm_mm_node node; /** temporary GTT binding */ 273 unsigned long vaddr; /** Current kmap address */ 274 unsigned long page; /** Currently mapped page index */ 275 unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */ 276 bool use_64bit_reloc : 1; 277 bool has_llc : 1; 278 bool has_fence : 1; 279 bool needs_unfenced : 1; 280 281 struct i915_request *rq; 282 u32 *rq_cmd; 283 unsigned int rq_size; 284 struct intel_gt_buffer_pool_node *pool; 285 } reloc_cache; 286 287 struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ 288 struct intel_context *reloc_context; 289 290 u64 invalid_flags; /** Set of execobj.flags that are invalid */ 291 u32 context_flags; /** Set of execobj.flags to insert from the ctx */ 292 293 u64 batch_len; /** Length of batch within object */ 294 u32 batch_start_offset; /** Location within object of batch */ 295 u32 batch_flags; /** Flags composed for emit_bb_start() */ 296 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ 297 298 /** 299 * Indicate either the size of the hastable used to resolve 300 * relocation handles, or if negative that we are using a direct 301 * index into the execobj[]. 302 */ 303 int lut_size; 304 struct hlist_head *buckets; /** ht for relocation handles */ 305 306 struct eb_fence *fences; 307 unsigned long num_fences; 308 }; 309 310 static int eb_parse(struct i915_execbuffer *eb); 311 static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, 312 bool throttle); 313 static void eb_unpin_engine(struct i915_execbuffer *eb); 314 315 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) 316 { 317 return intel_engine_requires_cmd_parser(eb->engine) || 318 (intel_engine_using_cmd_parser(eb->engine) && 319 eb->args->batch_len); 320 } 321 322 static int eb_create(struct i915_execbuffer *eb) 323 { 324 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { 325 unsigned int size = 1 + ilog2(eb->buffer_count); 326 327 /* 328 * Without a 1:1 association between relocation handles and 329 * the execobject[] index, we instead create a hashtable. 330 * We size it dynamically based on available memory, starting 331 * first with 1:1 assocative hash and scaling back until 332 * the allocation succeeds. 333 * 334 * Later on we use a positive lut_size to indicate we are 335 * using this hashtable, and a negative value to indicate a 336 * direct lookup. 337 */ 338 do { 339 gfp_t flags; 340 341 /* While we can still reduce the allocation size, don't 342 * raise a warning and allow the allocation to fail. 343 * On the last pass though, we want to try as hard 344 * as possible to perform the allocation and warn 345 * if it fails. 346 */ 347 flags = GFP_KERNEL; 348 if (size > 1) 349 flags |= __GFP_NORETRY | __GFP_NOWARN; 350 351 eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 352 flags); 353 if (eb->buckets) 354 break; 355 } while (--size); 356 357 if (unlikely(!size)) 358 return -ENOMEM; 359 360 eb->lut_size = size; 361 } else { 362 eb->lut_size = -eb->buffer_count; 363 } 364 365 return 0; 366 } 367 368 static bool 369 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, 370 const struct i915_vma *vma, 371 unsigned int flags) 372 { 373 if (vma->node.size < entry->pad_to_size) 374 return true; 375 376 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) 377 return true; 378 379 if (flags & EXEC_OBJECT_PINNED && 380 vma->node.start != entry->offset) 381 return true; 382 383 if (flags & __EXEC_OBJECT_NEEDS_BIAS && 384 vma->node.start < BATCH_OFFSET_BIAS) 385 return true; 386 387 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && 388 (vma->node.start + vma->node.size + 4095) >> 32) 389 return true; 390 391 if (flags & __EXEC_OBJECT_NEEDS_MAP && 392 !i915_vma_is_map_and_fenceable(vma)) 393 return true; 394 395 return false; 396 } 397 398 static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, 399 unsigned int exec_flags) 400 { 401 u64 pin_flags = 0; 402 403 if (exec_flags & EXEC_OBJECT_NEEDS_GTT) 404 pin_flags |= PIN_GLOBAL; 405 406 /* 407 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 408 * limit address to the first 4GBs for unflagged objects. 409 */ 410 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 411 pin_flags |= PIN_ZONE_4G; 412 413 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) 414 pin_flags |= PIN_MAPPABLE; 415 416 if (exec_flags & EXEC_OBJECT_PINNED) 417 pin_flags |= entry->offset | PIN_OFFSET_FIXED; 418 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) 419 pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 420 421 return pin_flags; 422 } 423 424 static inline int 425 eb_pin_vma(struct i915_execbuffer *eb, 426 const struct drm_i915_gem_exec_object2 *entry, 427 struct eb_vma *ev) 428 { 429 struct i915_vma *vma = ev->vma; 430 u64 pin_flags; 431 int err; 432 433 if (vma->node.size) 434 pin_flags = vma->node.start; 435 else 436 pin_flags = entry->offset & PIN_OFFSET_MASK; 437 438 pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; 439 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)) 440 pin_flags |= PIN_GLOBAL; 441 442 /* Attempt to reuse the current location if available */ 443 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); 444 if (err == -EDEADLK) 445 return err; 446 447 if (unlikely(err)) { 448 if (entry->flags & EXEC_OBJECT_PINNED) 449 return err; 450 451 /* Failing that pick any _free_ space if suitable */ 452 err = i915_vma_pin_ww(vma, &eb->ww, 453 entry->pad_to_size, 454 entry->alignment, 455 eb_pin_flags(entry, ev->flags) | 456 PIN_USER | PIN_NOEVICT); 457 if (unlikely(err)) 458 return err; 459 } 460 461 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 462 err = i915_vma_pin_fence(vma); 463 if (unlikely(err)) { 464 i915_vma_unpin(vma); 465 return err; 466 } 467 468 if (vma->fence) 469 ev->flags |= __EXEC_OBJECT_HAS_FENCE; 470 } 471 472 ev->flags |= __EXEC_OBJECT_HAS_PIN; 473 if (eb_vma_misplaced(entry, vma, ev->flags)) 474 return -EBADSLT; 475 476 return 0; 477 } 478 479 static inline void 480 eb_unreserve_vma(struct eb_vma *ev) 481 { 482 if (!(ev->flags & __EXEC_OBJECT_HAS_PIN)) 483 return; 484 485 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)) 486 __i915_vma_unpin_fence(ev->vma); 487 488 __i915_vma_unpin(ev->vma); 489 ev->flags &= ~__EXEC_OBJECT_RESERVED; 490 } 491 492 static int 493 eb_validate_vma(struct i915_execbuffer *eb, 494 struct drm_i915_gem_exec_object2 *entry, 495 struct i915_vma *vma) 496 { 497 /* Relocations are disallowed for all platforms after TGL-LP. This 498 * also covers all platforms with local memory. 499 */ 500 if (entry->relocation_count && 501 GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) 502 return -EINVAL; 503 504 if (unlikely(entry->flags & eb->invalid_flags)) 505 return -EINVAL; 506 507 if (unlikely(entry->alignment && 508 !is_power_of_2_u64(entry->alignment))) 509 return -EINVAL; 510 511 /* 512 * Offset can be used as input (EXEC_OBJECT_PINNED), reject 513 * any non-page-aligned or non-canonical addresses. 514 */ 515 if (unlikely(entry->flags & EXEC_OBJECT_PINNED && 516 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) 517 return -EINVAL; 518 519 /* pad_to_size was once a reserved field, so sanitize it */ 520 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { 521 if (unlikely(offset_in_page(entry->pad_to_size))) 522 return -EINVAL; 523 } else { 524 entry->pad_to_size = 0; 525 } 526 /* 527 * From drm_mm perspective address space is continuous, 528 * so from this point we're always using non-canonical 529 * form internally. 530 */ 531 entry->offset = gen8_noncanonical_addr(entry->offset); 532 533 if (!eb->reloc_cache.has_fence) { 534 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 535 } else { 536 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || 537 eb->reloc_cache.needs_unfenced) && 538 i915_gem_object_is_tiled(vma->obj)) 539 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; 540 } 541 542 if (!(entry->flags & EXEC_OBJECT_PINNED)) 543 entry->flags |= eb->context_flags; 544 545 return 0; 546 } 547 548 static void 549 eb_add_vma(struct i915_execbuffer *eb, 550 unsigned int i, unsigned batch_idx, 551 struct i915_vma *vma) 552 { 553 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 554 struct eb_vma *ev = &eb->vma[i]; 555 556 ev->vma = vma; 557 ev->exec = entry; 558 ev->flags = entry->flags; 559 560 if (eb->lut_size > 0) { 561 ev->handle = entry->handle; 562 hlist_add_head(&ev->node, 563 &eb->buckets[hash_32(entry->handle, 564 eb->lut_size)]); 565 } 566 567 if (entry->relocation_count) 568 list_add_tail(&ev->reloc_link, &eb->relocs); 569 570 /* 571 * SNA is doing fancy tricks with compressing batch buffers, which leads 572 * to negative relocation deltas. Usually that works out ok since the 573 * relocate address is still positive, except when the batch is placed 574 * very low in the GTT. Ensure this doesn't happen. 575 * 576 * Note that actual hangs have only been observed on gen7, but for 577 * paranoia do it everywhere. 578 */ 579 if (i == batch_idx) { 580 if (entry->relocation_count && 581 !(ev->flags & EXEC_OBJECT_PINNED)) 582 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS; 583 if (eb->reloc_cache.has_fence) 584 ev->flags |= EXEC_OBJECT_NEEDS_FENCE; 585 586 eb->batch = ev; 587 } 588 } 589 590 static inline int use_cpu_reloc(const struct reloc_cache *cache, 591 const struct drm_i915_gem_object *obj) 592 { 593 if (!i915_gem_object_has_struct_page(obj)) 594 return false; 595 596 if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) 597 return true; 598 599 if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) 600 return false; 601 602 return (cache->has_llc || 603 obj->cache_dirty || 604 obj->cache_level != I915_CACHE_NONE); 605 } 606 607 static int eb_reserve_vma(struct i915_execbuffer *eb, 608 struct eb_vma *ev, 609 u64 pin_flags) 610 { 611 struct drm_i915_gem_exec_object2 *entry = ev->exec; 612 struct i915_vma *vma = ev->vma; 613 int err; 614 615 if (drm_mm_node_allocated(&vma->node) && 616 eb_vma_misplaced(entry, vma, ev->flags)) { 617 err = i915_vma_unbind(vma); 618 if (err) 619 return err; 620 } 621 622 err = i915_vma_pin_ww(vma, &eb->ww, 623 entry->pad_to_size, entry->alignment, 624 eb_pin_flags(entry, ev->flags) | pin_flags); 625 if (err) 626 return err; 627 628 if (entry->offset != vma->node.start) { 629 entry->offset = vma->node.start | UPDATE; 630 eb->args->flags |= __EXEC_HAS_RELOC; 631 } 632 633 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { 634 err = i915_vma_pin_fence(vma); 635 if (unlikely(err)) { 636 i915_vma_unpin(vma); 637 return err; 638 } 639 640 if (vma->fence) 641 ev->flags |= __EXEC_OBJECT_HAS_FENCE; 642 } 643 644 ev->flags |= __EXEC_OBJECT_HAS_PIN; 645 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags)); 646 647 return 0; 648 } 649 650 static int eb_reserve(struct i915_execbuffer *eb) 651 { 652 const unsigned int count = eb->buffer_count; 653 unsigned int pin_flags = PIN_USER | PIN_NONBLOCK; 654 struct list_head last; 655 struct eb_vma *ev; 656 unsigned int i, pass; 657 int err = 0; 658 659 /* 660 * Attempt to pin all of the buffers into the GTT. 661 * This is done in 3 phases: 662 * 663 * 1a. Unbind all objects that do not match the GTT constraints for 664 * the execbuffer (fenceable, mappable, alignment etc). 665 * 1b. Increment pin count for already bound objects. 666 * 2. Bind new objects. 667 * 3. Decrement pin count. 668 * 669 * This avoid unnecessary unbinding of later objects in order to make 670 * room for the earlier objects *unless* we need to defragment. 671 */ 672 pass = 0; 673 do { 674 list_for_each_entry(ev, &eb->unbound, bind_link) { 675 err = eb_reserve_vma(eb, ev, pin_flags); 676 if (err) 677 break; 678 } 679 if (err != -ENOSPC) 680 return err; 681 682 /* Resort *all* the objects into priority order */ 683 INIT_LIST_HEAD(&eb->unbound); 684 INIT_LIST_HEAD(&last); 685 for (i = 0; i < count; i++) { 686 unsigned int flags; 687 688 ev = &eb->vma[i]; 689 flags = ev->flags; 690 if (flags & EXEC_OBJECT_PINNED && 691 flags & __EXEC_OBJECT_HAS_PIN) 692 continue; 693 694 eb_unreserve_vma(ev); 695 696 if (flags & EXEC_OBJECT_PINNED) 697 /* Pinned must have their slot */ 698 list_add(&ev->bind_link, &eb->unbound); 699 else if (flags & __EXEC_OBJECT_NEEDS_MAP) 700 /* Map require the lowest 256MiB (aperture) */ 701 list_add_tail(&ev->bind_link, &eb->unbound); 702 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 703 /* Prioritise 4GiB region for restricted bo */ 704 list_add(&ev->bind_link, &last); 705 else 706 list_add_tail(&ev->bind_link, &last); 707 } 708 list_splice_tail(&last, &eb->unbound); 709 710 switch (pass++) { 711 case 0: 712 break; 713 714 case 1: 715 /* Too fragmented, unbind everything and retry */ 716 mutex_lock(&eb->context->vm->mutex); 717 err = i915_gem_evict_vm(eb->context->vm); 718 mutex_unlock(&eb->context->vm->mutex); 719 if (err) 720 return err; 721 break; 722 723 default: 724 return -ENOSPC; 725 } 726 727 pin_flags = PIN_USER; 728 } while (1); 729 } 730 731 static unsigned int eb_batch_index(const struct i915_execbuffer *eb) 732 { 733 if (eb->args->flags & I915_EXEC_BATCH_FIRST) 734 return 0; 735 else 736 return eb->buffer_count - 1; 737 } 738 739 static int eb_select_context(struct i915_execbuffer *eb) 740 { 741 struct i915_gem_context *ctx; 742 743 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); 744 if (unlikely(!ctx)) 745 return -ENOENT; 746 747 eb->gem_context = ctx; 748 if (rcu_access_pointer(ctx->vm)) 749 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 750 751 eb->context_flags = 0; 752 if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) 753 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; 754 755 return 0; 756 } 757 758 static int __eb_add_lut(struct i915_execbuffer *eb, 759 u32 handle, struct i915_vma *vma) 760 { 761 struct i915_gem_context *ctx = eb->gem_context; 762 struct i915_lut_handle *lut; 763 int err; 764 765 lut = i915_lut_handle_alloc(); 766 if (unlikely(!lut)) 767 return -ENOMEM; 768 769 i915_vma_get(vma); 770 if (!atomic_fetch_inc(&vma->open_count)) 771 i915_vma_reopen(vma); 772 lut->handle = handle; 773 lut->ctx = ctx; 774 775 /* Check that the context hasn't been closed in the meantime */ 776 err = -EINTR; 777 if (!mutex_lock_interruptible(&ctx->lut_mutex)) { 778 struct i915_address_space *vm = rcu_access_pointer(ctx->vm); 779 780 if (unlikely(vm && vma->vm != vm)) 781 err = -EAGAIN; /* user racing with ctx set-vm */ 782 else if (likely(!i915_gem_context_is_closed(ctx))) 783 err = radix_tree_insert(&ctx->handles_vma, handle, vma); 784 else 785 err = -ENOENT; 786 if (err == 0) { /* And nor has this handle */ 787 struct drm_i915_gem_object *obj = vma->obj; 788 789 spin_lock(&obj->lut_lock); 790 if (idr_find(&eb->file->object_idr, handle) == obj) { 791 list_add(&lut->obj_link, &obj->lut_list); 792 } else { 793 radix_tree_delete(&ctx->handles_vma, handle); 794 err = -ENOENT; 795 } 796 spin_unlock(&obj->lut_lock); 797 } 798 mutex_unlock(&ctx->lut_mutex); 799 } 800 if (unlikely(err)) 801 goto err; 802 803 return 0; 804 805 err: 806 i915_vma_close(vma); 807 i915_vma_put(vma); 808 i915_lut_handle_free(lut); 809 return err; 810 } 811 812 static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle) 813 { 814 struct i915_address_space *vm = eb->context->vm; 815 816 do { 817 struct drm_i915_gem_object *obj; 818 struct i915_vma *vma; 819 int err; 820 821 rcu_read_lock(); 822 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle); 823 if (likely(vma && vma->vm == vm)) 824 vma = i915_vma_tryget(vma); 825 rcu_read_unlock(); 826 if (likely(vma)) 827 return vma; 828 829 obj = i915_gem_object_lookup(eb->file, handle); 830 if (unlikely(!obj)) 831 return ERR_PTR(-ENOENT); 832 833 vma = i915_vma_instance(obj, vm, NULL); 834 if (IS_ERR(vma)) { 835 i915_gem_object_put(obj); 836 return vma; 837 } 838 839 err = __eb_add_lut(eb, handle, vma); 840 if (likely(!err)) 841 return vma; 842 843 i915_gem_object_put(obj); 844 if (err != -EEXIST) 845 return ERR_PTR(err); 846 } while (1); 847 } 848 849 static int eb_lookup_vmas(struct i915_execbuffer *eb) 850 { 851 struct drm_i915_private *i915 = eb->i915; 852 unsigned int batch = eb_batch_index(eb); 853 unsigned int i; 854 int err = 0; 855 856 INIT_LIST_HEAD(&eb->relocs); 857 858 for (i = 0; i < eb->buffer_count; i++) { 859 struct i915_vma *vma; 860 861 vma = eb_lookup_vma(eb, eb->exec[i].handle); 862 if (IS_ERR(vma)) { 863 err = PTR_ERR(vma); 864 goto err; 865 } 866 867 err = eb_validate_vma(eb, &eb->exec[i], vma); 868 if (unlikely(err)) { 869 i915_vma_put(vma); 870 goto err; 871 } 872 873 eb_add_vma(eb, i, batch, vma); 874 875 if (i915_gem_object_is_userptr(vma->obj)) { 876 err = i915_gem_object_userptr_submit_init(vma->obj); 877 if (err) { 878 if (i + 1 < eb->buffer_count) { 879 /* 880 * Execbuffer code expects last vma entry to be NULL, 881 * since we already initialized this entry, 882 * set the next value to NULL or we mess up 883 * cleanup handling. 884 */ 885 eb->vma[i + 1].vma = NULL; 886 } 887 888 return err; 889 } 890 891 eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT; 892 eb->args->flags |= __EXEC_USERPTR_USED; 893 } 894 } 895 896 if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { 897 drm_dbg(&i915->drm, 898 "Attempting to use self-modifying batch buffer\n"); 899 return -EINVAL; 900 } 901 902 if (range_overflows_t(u64, 903 eb->batch_start_offset, eb->batch_len, 904 eb->batch->vma->size)) { 905 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n"); 906 return -EINVAL; 907 } 908 909 if (eb->batch_len == 0) 910 eb->batch_len = eb->batch->vma->size - eb->batch_start_offset; 911 if (unlikely(eb->batch_len == 0)) { /* impossible! */ 912 drm_dbg(&i915->drm, "Invalid batch length\n"); 913 return -EINVAL; 914 } 915 916 return 0; 917 918 err: 919 eb->vma[i].vma = NULL; 920 return err; 921 } 922 923 static int eb_validate_vmas(struct i915_execbuffer *eb) 924 { 925 unsigned int i; 926 int err; 927 928 INIT_LIST_HEAD(&eb->unbound); 929 930 for (i = 0; i < eb->buffer_count; i++) { 931 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 932 struct eb_vma *ev = &eb->vma[i]; 933 struct i915_vma *vma = ev->vma; 934 935 err = i915_gem_object_lock(vma->obj, &eb->ww); 936 if (err) 937 return err; 938 939 err = eb_pin_vma(eb, entry, ev); 940 if (err == -EDEADLK) 941 return err; 942 943 if (!err) { 944 if (entry->offset != vma->node.start) { 945 entry->offset = vma->node.start | UPDATE; 946 eb->args->flags |= __EXEC_HAS_RELOC; 947 } 948 } else { 949 eb_unreserve_vma(ev); 950 951 list_add_tail(&ev->bind_link, &eb->unbound); 952 if (drm_mm_node_allocated(&vma->node)) { 953 err = i915_vma_unbind(vma); 954 if (err) 955 return err; 956 } 957 } 958 959 if (!(ev->flags & EXEC_OBJECT_WRITE)) { 960 err = dma_resv_reserve_shared(vma->resv, 1); 961 if (err) 962 return err; 963 } 964 965 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && 966 eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); 967 } 968 969 if (!list_empty(&eb->unbound)) 970 return eb_reserve(eb); 971 972 return 0; 973 } 974 975 static struct eb_vma * 976 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) 977 { 978 if (eb->lut_size < 0) { 979 if (handle >= -eb->lut_size) 980 return NULL; 981 return &eb->vma[handle]; 982 } else { 983 struct hlist_head *head; 984 struct eb_vma *ev; 985 986 head = &eb->buckets[hash_32(handle, eb->lut_size)]; 987 hlist_for_each_entry(ev, head, node) { 988 if (ev->handle == handle) 989 return ev; 990 } 991 return NULL; 992 } 993 } 994 995 static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) 996 { 997 const unsigned int count = eb->buffer_count; 998 unsigned int i; 999 1000 for (i = 0; i < count; i++) { 1001 struct eb_vma *ev = &eb->vma[i]; 1002 struct i915_vma *vma = ev->vma; 1003 1004 if (!vma) 1005 break; 1006 1007 eb_unreserve_vma(ev); 1008 1009 if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { 1010 ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; 1011 i915_gem_object_userptr_submit_fini(vma->obj); 1012 } 1013 1014 if (final) 1015 i915_vma_put(vma); 1016 } 1017 1018 eb_unpin_engine(eb); 1019 } 1020 1021 static void eb_destroy(const struct i915_execbuffer *eb) 1022 { 1023 GEM_BUG_ON(eb->reloc_cache.rq); 1024 1025 if (eb->lut_size > 0) 1026 kfree(eb->buckets); 1027 } 1028 1029 static inline u64 1030 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 1031 const struct i915_vma *target) 1032 { 1033 return gen8_canonical_addr((int)reloc->delta + target->node.start); 1034 } 1035 1036 static void reloc_cache_clear(struct reloc_cache *cache) 1037 { 1038 cache->rq = NULL; 1039 cache->rq_cmd = NULL; 1040 cache->pool = NULL; 1041 cache->rq_size = 0; 1042 } 1043 1044 static void reloc_cache_init(struct reloc_cache *cache, 1045 struct drm_i915_private *i915) 1046 { 1047 cache->page = -1; 1048 cache->vaddr = 0; 1049 /* Must be a variable in the struct to allow GCC to unroll. */ 1050 cache->graphics_ver = GRAPHICS_VER(i915); 1051 cache->has_llc = HAS_LLC(i915); 1052 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 1053 cache->has_fence = cache->graphics_ver < 4; 1054 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; 1055 cache->node.flags = 0; 1056 reloc_cache_clear(cache); 1057 } 1058 1059 static inline void *unmask_page(unsigned long p) 1060 { 1061 return (void *)(uintptr_t)(p & PAGE_MASK); 1062 } 1063 1064 static inline unsigned int unmask_flags(unsigned long p) 1065 { 1066 return p & ~PAGE_MASK; 1067 } 1068 1069 #define KMAP 0x4 /* after CLFLUSH_FLAGS */ 1070 1071 static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) 1072 { 1073 struct drm_i915_private *i915 = 1074 container_of(cache, struct i915_execbuffer, reloc_cache)->i915; 1075 return &i915->ggtt; 1076 } 1077 1078 static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) 1079 { 1080 if (!cache->pool) 1081 return; 1082 1083 /* 1084 * This is a bit nasty, normally we keep objects locked until the end 1085 * of execbuffer, but we already submit this, and have to unlock before 1086 * dropping the reference. Fortunately we can only hold 1 pool node at 1087 * a time, so this should be harmless. 1088 */ 1089 i915_gem_ww_unlock_single(cache->pool->obj); 1090 intel_gt_buffer_pool_put(cache->pool); 1091 cache->pool = NULL; 1092 } 1093 1094 static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) 1095 { 1096 struct drm_i915_gem_object *obj = cache->rq->batch->obj; 1097 1098 GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); 1099 cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; 1100 1101 i915_gem_object_flush_map(obj); 1102 i915_gem_object_unpin_map(obj); 1103 1104 intel_gt_chipset_flush(cache->rq->engine->gt); 1105 1106 i915_request_add(cache->rq); 1107 reloc_cache_put_pool(eb, cache); 1108 reloc_cache_clear(cache); 1109 1110 eb->reloc_pool = NULL; 1111 } 1112 1113 static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) 1114 { 1115 void *vaddr; 1116 1117 if (cache->rq) 1118 reloc_gpu_flush(eb, cache); 1119 1120 if (!cache->vaddr) 1121 return; 1122 1123 vaddr = unmask_page(cache->vaddr); 1124 if (cache->vaddr & KMAP) { 1125 struct drm_i915_gem_object *obj = 1126 (struct drm_i915_gem_object *)cache->node.mm; 1127 if (cache->vaddr & CLFLUSH_AFTER) 1128 mb(); 1129 1130 kunmap_atomic(vaddr); 1131 i915_gem_object_finish_access(obj); 1132 } else { 1133 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1134 1135 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1136 io_mapping_unmap_atomic((void __iomem *)vaddr); 1137 1138 if (drm_mm_node_allocated(&cache->node)) { 1139 ggtt->vm.clear_range(&ggtt->vm, 1140 cache->node.start, 1141 cache->node.size); 1142 mutex_lock(&ggtt->vm.mutex); 1143 drm_mm_remove_node(&cache->node); 1144 mutex_unlock(&ggtt->vm.mutex); 1145 } else { 1146 i915_vma_unpin((struct i915_vma *)cache->node.mm); 1147 } 1148 } 1149 1150 cache->vaddr = 0; 1151 cache->page = -1; 1152 } 1153 1154 static void *reloc_kmap(struct drm_i915_gem_object *obj, 1155 struct reloc_cache *cache, 1156 unsigned long pageno) 1157 { 1158 void *vaddr; 1159 struct page *page; 1160 1161 if (cache->vaddr) { 1162 kunmap_atomic(unmask_page(cache->vaddr)); 1163 } else { 1164 unsigned int flushes; 1165 int err; 1166 1167 err = i915_gem_object_prepare_write(obj, &flushes); 1168 if (err) 1169 return ERR_PTR(err); 1170 1171 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 1172 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); 1173 1174 cache->vaddr = flushes | KMAP; 1175 cache->node.mm = (void *)obj; 1176 if (flushes) 1177 mb(); 1178 } 1179 1180 page = i915_gem_object_get_page(obj, pageno); 1181 if (!obj->mm.dirty) 1182 set_page_dirty(page); 1183 1184 vaddr = kmap_atomic(page); 1185 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; 1186 cache->page = pageno; 1187 1188 return vaddr; 1189 } 1190 1191 static void *reloc_iomap(struct drm_i915_gem_object *obj, 1192 struct i915_execbuffer *eb, 1193 unsigned long page) 1194 { 1195 struct reloc_cache *cache = &eb->reloc_cache; 1196 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1197 unsigned long offset; 1198 void *vaddr; 1199 1200 if (cache->vaddr) { 1201 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 1202 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 1203 } else { 1204 struct i915_vma *vma; 1205 int err; 1206 1207 if (i915_gem_object_is_tiled(obj)) 1208 return ERR_PTR(-EINVAL); 1209 1210 if (use_cpu_reloc(cache, obj)) 1211 return NULL; 1212 1213 err = i915_gem_object_set_to_gtt_domain(obj, true); 1214 if (err) 1215 return ERR_PTR(err); 1216 1217 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0, 1218 PIN_MAPPABLE | 1219 PIN_NONBLOCK /* NOWARN */ | 1220 PIN_NOEVICT); 1221 if (vma == ERR_PTR(-EDEADLK)) 1222 return vma; 1223 1224 if (IS_ERR(vma)) { 1225 memset(&cache->node, 0, sizeof(cache->node)); 1226 mutex_lock(&ggtt->vm.mutex); 1227 err = drm_mm_insert_node_in_range 1228 (&ggtt->vm.mm, &cache->node, 1229 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1230 0, ggtt->mappable_end, 1231 DRM_MM_INSERT_LOW); 1232 mutex_unlock(&ggtt->vm.mutex); 1233 if (err) /* no inactive aperture space, use cpu reloc */ 1234 return NULL; 1235 } else { 1236 cache->node.start = vma->node.start; 1237 cache->node.mm = (void *)vma; 1238 } 1239 } 1240 1241 offset = cache->node.start; 1242 if (drm_mm_node_allocated(&cache->node)) { 1243 ggtt->vm.insert_page(&ggtt->vm, 1244 i915_gem_object_get_dma_address(obj, page), 1245 offset, I915_CACHE_NONE, 0); 1246 } else { 1247 offset += page << PAGE_SHIFT; 1248 } 1249 1250 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, 1251 offset); 1252 cache->page = page; 1253 cache->vaddr = (unsigned long)vaddr; 1254 1255 return vaddr; 1256 } 1257 1258 static void *reloc_vaddr(struct drm_i915_gem_object *obj, 1259 struct i915_execbuffer *eb, 1260 unsigned long page) 1261 { 1262 struct reloc_cache *cache = &eb->reloc_cache; 1263 void *vaddr; 1264 1265 if (cache->page == page) { 1266 vaddr = unmask_page(cache->vaddr); 1267 } else { 1268 vaddr = NULL; 1269 if ((cache->vaddr & KMAP) == 0) 1270 vaddr = reloc_iomap(obj, eb, page); 1271 if (!vaddr) 1272 vaddr = reloc_kmap(obj, cache, page); 1273 } 1274 1275 return vaddr; 1276 } 1277 1278 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) 1279 { 1280 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { 1281 if (flushes & CLFLUSH_BEFORE) { 1282 clflushopt(addr); 1283 mb(); 1284 } 1285 1286 *addr = value; 1287 1288 /* 1289 * Writes to the same cacheline are serialised by the CPU 1290 * (including clflush). On the write path, we only require 1291 * that it hits memory in an orderly fashion and place 1292 * mb barriers at the start and end of the relocation phase 1293 * to ensure ordering of clflush wrt to the system. 1294 */ 1295 if (flushes & CLFLUSH_AFTER) 1296 clflushopt(addr); 1297 } else 1298 *addr = value; 1299 } 1300 1301 static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) 1302 { 1303 struct drm_i915_gem_object *obj = vma->obj; 1304 int err; 1305 1306 assert_vma_held(vma); 1307 1308 if (obj->cache_dirty & ~obj->cache_coherent) 1309 i915_gem_clflush_object(obj, 0); 1310 obj->write_domain = 0; 1311 1312 err = i915_request_await_object(rq, vma->obj, true); 1313 if (err == 0) 1314 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1315 1316 return err; 1317 } 1318 1319 static int __reloc_gpu_alloc(struct i915_execbuffer *eb, 1320 struct intel_engine_cs *engine, 1321 struct i915_vma *vma, 1322 unsigned int len) 1323 { 1324 struct reloc_cache *cache = &eb->reloc_cache; 1325 struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; 1326 struct i915_request *rq; 1327 struct i915_vma *batch; 1328 u32 *cmd; 1329 int err; 1330 1331 if (!pool) { 1332 pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, 1333 cache->has_llc ? 1334 I915_MAP_WB : 1335 I915_MAP_WC); 1336 if (IS_ERR(pool)) 1337 return PTR_ERR(pool); 1338 } 1339 eb->reloc_pool = NULL; 1340 1341 err = i915_gem_object_lock(pool->obj, &eb->ww); 1342 if (err) 1343 goto err_pool; 1344 1345 cmd = i915_gem_object_pin_map(pool->obj, pool->type); 1346 if (IS_ERR(cmd)) { 1347 err = PTR_ERR(cmd); 1348 goto err_pool; 1349 } 1350 intel_gt_buffer_pool_mark_used(pool); 1351 1352 memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); 1353 1354 batch = i915_vma_instance(pool->obj, vma->vm, NULL); 1355 if (IS_ERR(batch)) { 1356 err = PTR_ERR(batch); 1357 goto err_unmap; 1358 } 1359 1360 err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); 1361 if (err) 1362 goto err_unmap; 1363 1364 if (engine == eb->context->engine) { 1365 rq = i915_request_create(eb->context); 1366 } else { 1367 struct intel_context *ce = eb->reloc_context; 1368 1369 if (!ce) { 1370 ce = intel_context_create(engine); 1371 if (IS_ERR(ce)) { 1372 err = PTR_ERR(ce); 1373 goto err_unpin; 1374 } 1375 1376 i915_vm_put(ce->vm); 1377 ce->vm = i915_vm_get(eb->context->vm); 1378 eb->reloc_context = ce; 1379 } 1380 1381 err = intel_context_pin_ww(ce, &eb->ww); 1382 if (err) 1383 goto err_unpin; 1384 1385 rq = i915_request_create(ce); 1386 intel_context_unpin(ce); 1387 } 1388 if (IS_ERR(rq)) { 1389 err = PTR_ERR(rq); 1390 goto err_unpin; 1391 } 1392 1393 err = intel_gt_buffer_pool_mark_active(pool, rq); 1394 if (err) 1395 goto err_request; 1396 1397 err = reloc_move_to_gpu(rq, vma); 1398 if (err) 1399 goto err_request; 1400 1401 err = eb->engine->emit_bb_start(rq, 1402 batch->node.start, PAGE_SIZE, 1403 cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE); 1404 if (err) 1405 goto skip_request; 1406 1407 assert_vma_held(batch); 1408 err = i915_request_await_object(rq, batch->obj, false); 1409 if (err == 0) 1410 err = i915_vma_move_to_active(batch, rq, 0); 1411 if (err) 1412 goto skip_request; 1413 1414 rq->batch = batch; 1415 i915_vma_unpin(batch); 1416 1417 cache->rq = rq; 1418 cache->rq_cmd = cmd; 1419 cache->rq_size = 0; 1420 cache->pool = pool; 1421 1422 /* Return with batch mapping (cmd) still pinned */ 1423 return 0; 1424 1425 skip_request: 1426 i915_request_set_error_once(rq, err); 1427 err_request: 1428 i915_request_add(rq); 1429 err_unpin: 1430 i915_vma_unpin(batch); 1431 err_unmap: 1432 i915_gem_object_unpin_map(pool->obj); 1433 err_pool: 1434 eb->reloc_pool = pool; 1435 return err; 1436 } 1437 1438 static bool reloc_can_use_engine(const struct intel_engine_cs *engine) 1439 { 1440 return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6; 1441 } 1442 1443 static u32 *reloc_gpu(struct i915_execbuffer *eb, 1444 struct i915_vma *vma, 1445 unsigned int len) 1446 { 1447 struct reloc_cache *cache = &eb->reloc_cache; 1448 u32 *cmd; 1449 1450 if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) 1451 reloc_gpu_flush(eb, cache); 1452 1453 if (unlikely(!cache->rq)) { 1454 int err; 1455 struct intel_engine_cs *engine = eb->engine; 1456 1457 /* If we need to copy for the cmdparser, we will stall anyway */ 1458 if (eb_use_cmdparser(eb)) 1459 return ERR_PTR(-EWOULDBLOCK); 1460 1461 if (!reloc_can_use_engine(engine)) { 1462 engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; 1463 if (!engine) 1464 return ERR_PTR(-ENODEV); 1465 } 1466 1467 err = __reloc_gpu_alloc(eb, engine, vma, len); 1468 if (unlikely(err)) 1469 return ERR_PTR(err); 1470 } 1471 1472 cmd = cache->rq_cmd + cache->rq_size; 1473 cache->rq_size += len; 1474 1475 return cmd; 1476 } 1477 1478 static inline bool use_reloc_gpu(struct i915_vma *vma) 1479 { 1480 if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) 1481 return true; 1482 1483 if (DBG_FORCE_RELOC) 1484 return false; 1485 1486 return !dma_resv_test_signaled(vma->resv, true); 1487 } 1488 1489 static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) 1490 { 1491 struct page *page; 1492 unsigned long addr; 1493 1494 GEM_BUG_ON(vma->pages != vma->obj->mm.pages); 1495 1496 page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); 1497 addr = PFN_PHYS(page_to_pfn(page)); 1498 GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ 1499 1500 return addr + offset_in_page(offset); 1501 } 1502 1503 static int __reloc_entry_gpu(struct i915_execbuffer *eb, 1504 struct i915_vma *vma, 1505 u64 offset, 1506 u64 target_addr) 1507 { 1508 const unsigned int ver = eb->reloc_cache.graphics_ver; 1509 unsigned int len; 1510 u32 *batch; 1511 u64 addr; 1512 1513 if (ver >= 8) 1514 len = offset & 7 ? 8 : 5; 1515 else if (ver >= 4) 1516 len = 4; 1517 else 1518 len = 3; 1519 1520 batch = reloc_gpu(eb, vma, len); 1521 if (batch == ERR_PTR(-EDEADLK)) 1522 return -EDEADLK; 1523 else if (IS_ERR(batch)) 1524 return false; 1525 1526 addr = gen8_canonical_addr(vma->node.start + offset); 1527 if (ver >= 8) { 1528 if (offset & 7) { 1529 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1530 *batch++ = lower_32_bits(addr); 1531 *batch++ = upper_32_bits(addr); 1532 *batch++ = lower_32_bits(target_addr); 1533 1534 addr = gen8_canonical_addr(addr + 4); 1535 1536 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1537 *batch++ = lower_32_bits(addr); 1538 *batch++ = upper_32_bits(addr); 1539 *batch++ = upper_32_bits(target_addr); 1540 } else { 1541 *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; 1542 *batch++ = lower_32_bits(addr); 1543 *batch++ = upper_32_bits(addr); 1544 *batch++ = lower_32_bits(target_addr); 1545 *batch++ = upper_32_bits(target_addr); 1546 } 1547 } else if (ver >= 6) { 1548 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1549 *batch++ = 0; 1550 *batch++ = addr; 1551 *batch++ = target_addr; 1552 } else if (IS_I965G(eb->i915)) { 1553 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1554 *batch++ = 0; 1555 *batch++ = vma_phys_addr(vma, offset); 1556 *batch++ = target_addr; 1557 } else if (ver >= 4) { 1558 *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1559 *batch++ = 0; 1560 *batch++ = addr; 1561 *batch++ = target_addr; 1562 } else if (ver >= 3 && 1563 !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { 1564 *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 1565 *batch++ = addr; 1566 *batch++ = target_addr; 1567 } else { 1568 *batch++ = MI_STORE_DWORD_IMM; 1569 *batch++ = vma_phys_addr(vma, offset); 1570 *batch++ = target_addr; 1571 } 1572 1573 return true; 1574 } 1575 1576 static int reloc_entry_gpu(struct i915_execbuffer *eb, 1577 struct i915_vma *vma, 1578 u64 offset, 1579 u64 target_addr) 1580 { 1581 if (eb->reloc_cache.vaddr) 1582 return false; 1583 1584 if (!use_reloc_gpu(vma)) 1585 return false; 1586 1587 return __reloc_entry_gpu(eb, vma, offset, target_addr); 1588 } 1589 1590 static u64 1591 relocate_entry(struct i915_vma *vma, 1592 const struct drm_i915_gem_relocation_entry *reloc, 1593 struct i915_execbuffer *eb, 1594 const struct i915_vma *target) 1595 { 1596 u64 target_addr = relocation_target(reloc, target); 1597 u64 offset = reloc->offset; 1598 int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); 1599 1600 if (reloc_gpu < 0) 1601 return reloc_gpu; 1602 1603 if (!reloc_gpu) { 1604 bool wide = eb->reloc_cache.use_64bit_reloc; 1605 void *vaddr; 1606 1607 repeat: 1608 vaddr = reloc_vaddr(vma->obj, eb, 1609 offset >> PAGE_SHIFT); 1610 if (IS_ERR(vaddr)) 1611 return PTR_ERR(vaddr); 1612 1613 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); 1614 clflush_write32(vaddr + offset_in_page(offset), 1615 lower_32_bits(target_addr), 1616 eb->reloc_cache.vaddr); 1617 1618 if (wide) { 1619 offset += sizeof(u32); 1620 target_addr >>= 32; 1621 wide = false; 1622 goto repeat; 1623 } 1624 } 1625 1626 return target->node.start | UPDATE; 1627 } 1628 1629 static u64 1630 eb_relocate_entry(struct i915_execbuffer *eb, 1631 struct eb_vma *ev, 1632 const struct drm_i915_gem_relocation_entry *reloc) 1633 { 1634 struct drm_i915_private *i915 = eb->i915; 1635 struct eb_vma *target; 1636 int err; 1637 1638 /* we've already hold a reference to all valid objects */ 1639 target = eb_get_vma(eb, reloc->target_handle); 1640 if (unlikely(!target)) 1641 return -ENOENT; 1642 1643 /* Validate that the target is in a valid r/w GPU domain */ 1644 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 1645 drm_dbg(&i915->drm, "reloc with multiple write domains: " 1646 "target %d offset %d " 1647 "read %08x write %08x", 1648 reloc->target_handle, 1649 (int) reloc->offset, 1650 reloc->read_domains, 1651 reloc->write_domain); 1652 return -EINVAL; 1653 } 1654 if (unlikely((reloc->write_domain | reloc->read_domains) 1655 & ~I915_GEM_GPU_DOMAINS)) { 1656 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: " 1657 "target %d offset %d " 1658 "read %08x write %08x", 1659 reloc->target_handle, 1660 (int) reloc->offset, 1661 reloc->read_domains, 1662 reloc->write_domain); 1663 return -EINVAL; 1664 } 1665 1666 if (reloc->write_domain) { 1667 target->flags |= EXEC_OBJECT_WRITE; 1668 1669 /* 1670 * Sandybridge PPGTT errata: We need a global gtt mapping 1671 * for MI and pipe_control writes because the gpu doesn't 1672 * properly redirect them through the ppgtt for non_secure 1673 * batchbuffers. 1674 */ 1675 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 1676 GRAPHICS_VER(eb->i915) == 6) { 1677 err = i915_vma_bind(target->vma, 1678 target->vma->obj->cache_level, 1679 PIN_GLOBAL, NULL); 1680 if (err) 1681 return err; 1682 } 1683 } 1684 1685 /* 1686 * If the relocation already has the right value in it, no 1687 * more work needs to be done. 1688 */ 1689 if (!DBG_FORCE_RELOC && 1690 gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) 1691 return 0; 1692 1693 /* Check that the relocation address is valid... */ 1694 if (unlikely(reloc->offset > 1695 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { 1696 drm_dbg(&i915->drm, "Relocation beyond object bounds: " 1697 "target %d offset %d size %d.\n", 1698 reloc->target_handle, 1699 (int)reloc->offset, 1700 (int)ev->vma->size); 1701 return -EINVAL; 1702 } 1703 if (unlikely(reloc->offset & 3)) { 1704 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: " 1705 "target %d offset %d.\n", 1706 reloc->target_handle, 1707 (int)reloc->offset); 1708 return -EINVAL; 1709 } 1710 1711 /* 1712 * If we write into the object, we need to force the synchronisation 1713 * barrier, either with an asynchronous clflush or if we executed the 1714 * patching using the GPU (though that should be serialised by the 1715 * timeline). To be completely sure, and since we are required to 1716 * do relocations we are already stalling, disable the user's opt 1717 * out of our synchronisation. 1718 */ 1719 ev->flags &= ~EXEC_OBJECT_ASYNC; 1720 1721 /* and update the user's relocation entry */ 1722 return relocate_entry(ev->vma, reloc, eb, target->vma); 1723 } 1724 1725 static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) 1726 { 1727 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1728 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; 1729 const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1730 struct drm_i915_gem_relocation_entry __user *urelocs = 1731 u64_to_user_ptr(entry->relocs_ptr); 1732 unsigned long remain = entry->relocation_count; 1733 1734 if (unlikely(remain > N_RELOC(ULONG_MAX))) 1735 return -EINVAL; 1736 1737 /* 1738 * We must check that the entire relocation array is safe 1739 * to read. However, if the array is not writable the user loses 1740 * the updated relocation values. 1741 */ 1742 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))) 1743 return -EFAULT; 1744 1745 do { 1746 struct drm_i915_gem_relocation_entry *r = stack; 1747 unsigned int count = 1748 min_t(unsigned long, remain, ARRAY_SIZE(stack)); 1749 unsigned int copied; 1750 1751 /* 1752 * This is the fast path and we cannot handle a pagefault 1753 * whilst holding the struct mutex lest the user pass in the 1754 * relocations contained within a mmaped bo. For in such a case 1755 * we, the page fault handler would call i915_gem_fault() and 1756 * we would try to acquire the struct mutex again. Obviously 1757 * this is bad and so lockdep complains vehemently. 1758 */ 1759 pagefault_disable(); 1760 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); 1761 pagefault_enable(); 1762 if (unlikely(copied)) { 1763 remain = -EFAULT; 1764 goto out; 1765 } 1766 1767 remain -= count; 1768 do { 1769 u64 offset = eb_relocate_entry(eb, ev, r); 1770 1771 if (likely(offset == 0)) { 1772 } else if ((s64)offset < 0) { 1773 remain = (int)offset; 1774 goto out; 1775 } else { 1776 /* 1777 * Note that reporting an error now 1778 * leaves everything in an inconsistent 1779 * state as we have *already* changed 1780 * the relocation value inside the 1781 * object. As we have not changed the 1782 * reloc.presumed_offset or will not 1783 * change the execobject.offset, on the 1784 * call we may not rewrite the value 1785 * inside the object, leaving it 1786 * dangling and causing a GPU hang. Unless 1787 * userspace dynamically rebuilds the 1788 * relocations on each execbuf rather than 1789 * presume a static tree. 1790 * 1791 * We did previously check if the relocations 1792 * were writable (access_ok), an error now 1793 * would be a strange race with mprotect, 1794 * having already demonstrated that we 1795 * can read from this userspace address. 1796 */ 1797 offset = gen8_canonical_addr(offset & ~UPDATE); 1798 __put_user(offset, 1799 &urelocs[r - stack].presumed_offset); 1800 } 1801 } while (r++, --count); 1802 urelocs += ARRAY_SIZE(stack); 1803 } while (remain); 1804 out: 1805 reloc_cache_reset(&eb->reloc_cache, eb); 1806 return remain; 1807 } 1808 1809 static int 1810 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev) 1811 { 1812 const struct drm_i915_gem_exec_object2 *entry = ev->exec; 1813 struct drm_i915_gem_relocation_entry *relocs = 1814 u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1815 unsigned int i; 1816 int err; 1817 1818 for (i = 0; i < entry->relocation_count; i++) { 1819 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]); 1820 1821 if ((s64)offset < 0) { 1822 err = (int)offset; 1823 goto err; 1824 } 1825 } 1826 err = 0; 1827 err: 1828 reloc_cache_reset(&eb->reloc_cache, eb); 1829 return err; 1830 } 1831 1832 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) 1833 { 1834 const char __user *addr, *end; 1835 unsigned long size; 1836 char __maybe_unused c; 1837 1838 size = entry->relocation_count; 1839 if (size == 0) 1840 return 0; 1841 1842 if (size > N_RELOC(ULONG_MAX)) 1843 return -EINVAL; 1844 1845 addr = u64_to_user_ptr(entry->relocs_ptr); 1846 size *= sizeof(struct drm_i915_gem_relocation_entry); 1847 if (!access_ok(addr, size)) 1848 return -EFAULT; 1849 1850 end = addr + size; 1851 for (; addr < end; addr += PAGE_SIZE) { 1852 int err = __get_user(c, addr); 1853 if (err) 1854 return err; 1855 } 1856 return __get_user(c, end - 1); 1857 } 1858 1859 static int eb_copy_relocations(const struct i915_execbuffer *eb) 1860 { 1861 struct drm_i915_gem_relocation_entry *relocs; 1862 const unsigned int count = eb->buffer_count; 1863 unsigned int i; 1864 int err; 1865 1866 for (i = 0; i < count; i++) { 1867 const unsigned int nreloc = eb->exec[i].relocation_count; 1868 struct drm_i915_gem_relocation_entry __user *urelocs; 1869 unsigned long size; 1870 unsigned long copied; 1871 1872 if (nreloc == 0) 1873 continue; 1874 1875 err = check_relocations(&eb->exec[i]); 1876 if (err) 1877 goto err; 1878 1879 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1880 size = nreloc * sizeof(*relocs); 1881 1882 relocs = kvmalloc_array(size, 1, GFP_KERNEL); 1883 if (!relocs) { 1884 err = -ENOMEM; 1885 goto err; 1886 } 1887 1888 /* copy_from_user is limited to < 4GiB */ 1889 copied = 0; 1890 do { 1891 unsigned int len = 1892 min_t(u64, BIT_ULL(31), size - copied); 1893 1894 if (__copy_from_user((char *)relocs + copied, 1895 (char __user *)urelocs + copied, 1896 len)) 1897 goto end; 1898 1899 copied += len; 1900 } while (copied < size); 1901 1902 /* 1903 * As we do not update the known relocation offsets after 1904 * relocating (due to the complexities in lock handling), 1905 * we need to mark them as invalid now so that we force the 1906 * relocation processing next time. Just in case the target 1907 * object is evicted and then rebound into its old 1908 * presumed_offset before the next execbuffer - if that 1909 * happened we would make the mistake of assuming that the 1910 * relocations were valid. 1911 */ 1912 if (!user_access_begin(urelocs, size)) 1913 goto end; 1914 1915 for (copied = 0; copied < nreloc; copied++) 1916 unsafe_put_user(-1, 1917 &urelocs[copied].presumed_offset, 1918 end_user); 1919 user_access_end(); 1920 1921 eb->exec[i].relocs_ptr = (uintptr_t)relocs; 1922 } 1923 1924 return 0; 1925 1926 end_user: 1927 user_access_end(); 1928 end: 1929 kvfree(relocs); 1930 err = -EFAULT; 1931 err: 1932 while (i--) { 1933 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); 1934 if (eb->exec[i].relocation_count) 1935 kvfree(relocs); 1936 } 1937 return err; 1938 } 1939 1940 static int eb_prefault_relocations(const struct i915_execbuffer *eb) 1941 { 1942 const unsigned int count = eb->buffer_count; 1943 unsigned int i; 1944 1945 for (i = 0; i < count; i++) { 1946 int err; 1947 1948 err = check_relocations(&eb->exec[i]); 1949 if (err) 1950 return err; 1951 } 1952 1953 return 0; 1954 } 1955 1956 static int eb_reinit_userptr(struct i915_execbuffer *eb) 1957 { 1958 const unsigned int count = eb->buffer_count; 1959 unsigned int i; 1960 int ret; 1961 1962 if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))) 1963 return 0; 1964 1965 for (i = 0; i < count; i++) { 1966 struct eb_vma *ev = &eb->vma[i]; 1967 1968 if (!i915_gem_object_is_userptr(ev->vma->obj)) 1969 continue; 1970 1971 ret = i915_gem_object_userptr_submit_init(ev->vma->obj); 1972 if (ret) 1973 return ret; 1974 1975 ev->flags |= __EXEC_OBJECT_USERPTR_INIT; 1976 } 1977 1978 return 0; 1979 } 1980 1981 static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, 1982 struct i915_request *rq) 1983 { 1984 bool have_copy = false; 1985 struct eb_vma *ev; 1986 int err = 0; 1987 1988 repeat: 1989 if (signal_pending(current)) { 1990 err = -ERESTARTSYS; 1991 goto out; 1992 } 1993 1994 /* We may process another execbuffer during the unlock... */ 1995 eb_release_vmas(eb, false, true); 1996 i915_gem_ww_ctx_fini(&eb->ww); 1997 1998 if (rq) { 1999 /* nonblocking is always false */ 2000 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 2001 MAX_SCHEDULE_TIMEOUT) < 0) { 2002 i915_request_put(rq); 2003 rq = NULL; 2004 2005 err = -EINTR; 2006 goto err_relock; 2007 } 2008 2009 i915_request_put(rq); 2010 rq = NULL; 2011 } 2012 2013 /* 2014 * We take 3 passes through the slowpatch. 2015 * 2016 * 1 - we try to just prefault all the user relocation entries and 2017 * then attempt to reuse the atomic pagefault disabled fast path again. 2018 * 2019 * 2 - we copy the user entries to a local buffer here outside of the 2020 * local and allow ourselves to wait upon any rendering before 2021 * relocations 2022 * 2023 * 3 - we already have a local copy of the relocation entries, but 2024 * were interrupted (EAGAIN) whilst waiting for the objects, try again. 2025 */ 2026 if (!err) { 2027 err = eb_prefault_relocations(eb); 2028 } else if (!have_copy) { 2029 err = eb_copy_relocations(eb); 2030 have_copy = err == 0; 2031 } else { 2032 cond_resched(); 2033 err = 0; 2034 } 2035 2036 if (!err) 2037 err = eb_reinit_userptr(eb); 2038 2039 err_relock: 2040 i915_gem_ww_ctx_init(&eb->ww, true); 2041 if (err) 2042 goto out; 2043 2044 /* reacquire the objects */ 2045 repeat_validate: 2046 rq = eb_pin_engine(eb, false); 2047 if (IS_ERR(rq)) { 2048 err = PTR_ERR(rq); 2049 rq = NULL; 2050 goto err; 2051 } 2052 2053 /* We didn't throttle, should be NULL */ 2054 GEM_WARN_ON(rq); 2055 2056 err = eb_validate_vmas(eb); 2057 if (err) 2058 goto err; 2059 2060 GEM_BUG_ON(!eb->batch); 2061 2062 list_for_each_entry(ev, &eb->relocs, reloc_link) { 2063 if (!have_copy) { 2064 pagefault_disable(); 2065 err = eb_relocate_vma(eb, ev); 2066 pagefault_enable(); 2067 if (err) 2068 break; 2069 } else { 2070 err = eb_relocate_vma_slow(eb, ev); 2071 if (err) 2072 break; 2073 } 2074 } 2075 2076 if (err == -EDEADLK) 2077 goto err; 2078 2079 if (err && !have_copy) 2080 goto repeat; 2081 2082 if (err) 2083 goto err; 2084 2085 /* as last step, parse the command buffer */ 2086 err = eb_parse(eb); 2087 if (err) 2088 goto err; 2089 2090 /* 2091 * Leave the user relocations as are, this is the painfully slow path, 2092 * and we want to avoid the complication of dropping the lock whilst 2093 * having buffers reserved in the aperture and so causing spurious 2094 * ENOSPC for random operations. 2095 */ 2096 2097 err: 2098 if (err == -EDEADLK) { 2099 eb_release_vmas(eb, false, false); 2100 err = i915_gem_ww_ctx_backoff(&eb->ww); 2101 if (!err) 2102 goto repeat_validate; 2103 } 2104 2105 if (err == -EAGAIN) 2106 goto repeat; 2107 2108 out: 2109 if (have_copy) { 2110 const unsigned int count = eb->buffer_count; 2111 unsigned int i; 2112 2113 for (i = 0; i < count; i++) { 2114 const struct drm_i915_gem_exec_object2 *entry = 2115 &eb->exec[i]; 2116 struct drm_i915_gem_relocation_entry *relocs; 2117 2118 if (!entry->relocation_count) 2119 continue; 2120 2121 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 2122 kvfree(relocs); 2123 } 2124 } 2125 2126 if (rq) 2127 i915_request_put(rq); 2128 2129 return err; 2130 } 2131 2132 static int eb_relocate_parse(struct i915_execbuffer *eb) 2133 { 2134 int err; 2135 struct i915_request *rq = NULL; 2136 bool throttle = true; 2137 2138 retry: 2139 rq = eb_pin_engine(eb, throttle); 2140 if (IS_ERR(rq)) { 2141 err = PTR_ERR(rq); 2142 rq = NULL; 2143 if (err != -EDEADLK) 2144 return err; 2145 2146 goto err; 2147 } 2148 2149 if (rq) { 2150 bool nonblock = eb->file->filp->f_flags & O_NONBLOCK; 2151 2152 /* Need to drop all locks now for throttling, take slowpath */ 2153 err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, 0); 2154 if (err == -ETIME) { 2155 if (nonblock) { 2156 err = -EWOULDBLOCK; 2157 i915_request_put(rq); 2158 goto err; 2159 } 2160 goto slow; 2161 } 2162 i915_request_put(rq); 2163 rq = NULL; 2164 } 2165 2166 /* only throttle once, even if we didn't need to throttle */ 2167 throttle = false; 2168 2169 err = eb_validate_vmas(eb); 2170 if (err == -EAGAIN) 2171 goto slow; 2172 else if (err) 2173 goto err; 2174 2175 /* The objects are in their final locations, apply the relocations. */ 2176 if (eb->args->flags & __EXEC_HAS_RELOC) { 2177 struct eb_vma *ev; 2178 2179 list_for_each_entry(ev, &eb->relocs, reloc_link) { 2180 err = eb_relocate_vma(eb, ev); 2181 if (err) 2182 break; 2183 } 2184 2185 if (err == -EDEADLK) 2186 goto err; 2187 else if (err) 2188 goto slow; 2189 } 2190 2191 if (!err) 2192 err = eb_parse(eb); 2193 2194 err: 2195 if (err == -EDEADLK) { 2196 eb_release_vmas(eb, false, false); 2197 err = i915_gem_ww_ctx_backoff(&eb->ww); 2198 if (!err) 2199 goto retry; 2200 } 2201 2202 return err; 2203 2204 slow: 2205 err = eb_relocate_parse_slow(eb, rq); 2206 if (err) 2207 /* 2208 * If the user expects the execobject.offset and 2209 * reloc.presumed_offset to be an exact match, 2210 * as for using NO_RELOC, then we cannot update 2211 * the execobject.offset until we have completed 2212 * relocation. 2213 */ 2214 eb->args->flags &= ~__EXEC_HAS_RELOC; 2215 2216 return err; 2217 } 2218 2219 static int eb_move_to_gpu(struct i915_execbuffer *eb) 2220 { 2221 const unsigned int count = eb->buffer_count; 2222 unsigned int i = count; 2223 int err = 0; 2224 2225 while (i--) { 2226 struct eb_vma *ev = &eb->vma[i]; 2227 struct i915_vma *vma = ev->vma; 2228 unsigned int flags = ev->flags; 2229 struct drm_i915_gem_object *obj = vma->obj; 2230 2231 assert_vma_held(vma); 2232 2233 if (flags & EXEC_OBJECT_CAPTURE) { 2234 struct i915_capture_list *capture; 2235 2236 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 2237 if (capture) { 2238 capture->next = eb->request->capture_list; 2239 capture->vma = vma; 2240 eb->request->capture_list = capture; 2241 } 2242 } 2243 2244 /* 2245 * If the GPU is not _reading_ through the CPU cache, we need 2246 * to make sure that any writes (both previous GPU writes from 2247 * before a change in snooping levels and normal CPU writes) 2248 * caught in that cache are flushed to main memory. 2249 * 2250 * We want to say 2251 * obj->cache_dirty && 2252 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 2253 * but gcc's optimiser doesn't handle that as well and emits 2254 * two jumps instead of one. Maybe one day... 2255 */ 2256 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 2257 if (i915_gem_clflush_object(obj, 0)) 2258 flags &= ~EXEC_OBJECT_ASYNC; 2259 } 2260 2261 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { 2262 err = i915_request_await_object 2263 (eb->request, obj, flags & EXEC_OBJECT_WRITE); 2264 } 2265 2266 if (err == 0) 2267 err = i915_vma_move_to_active(vma, eb->request, 2268 flags | __EXEC_OBJECT_NO_RESERVE); 2269 } 2270 2271 #ifdef CONFIG_MMU_NOTIFIER 2272 if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { 2273 spin_lock(&eb->i915->mm.notifier_lock); 2274 2275 /* 2276 * count is always at least 1, otherwise __EXEC_USERPTR_USED 2277 * could not have been set 2278 */ 2279 for (i = 0; i < count; i++) { 2280 struct eb_vma *ev = &eb->vma[i]; 2281 struct drm_i915_gem_object *obj = ev->vma->obj; 2282 2283 if (!i915_gem_object_is_userptr(obj)) 2284 continue; 2285 2286 err = i915_gem_object_userptr_submit_done(obj); 2287 if (err) 2288 break; 2289 } 2290 2291 spin_unlock(&eb->i915->mm.notifier_lock); 2292 } 2293 #endif 2294 2295 if (unlikely(err)) 2296 goto err_skip; 2297 2298 /* Unconditionally flush any chipset caches (for streaming writes). */ 2299 intel_gt_chipset_flush(eb->engine->gt); 2300 return 0; 2301 2302 err_skip: 2303 i915_request_set_error_once(eb->request, err); 2304 return err; 2305 } 2306 2307 static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 2308 { 2309 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 2310 return -EINVAL; 2311 2312 /* Kernel clipping was a DRI1 misfeature */ 2313 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | 2314 I915_EXEC_USE_EXTENSIONS))) { 2315 if (exec->num_cliprects || exec->cliprects_ptr) 2316 return -EINVAL; 2317 } 2318 2319 if (exec->DR4 == 0xffffffff) { 2320 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); 2321 exec->DR4 = 0; 2322 } 2323 if (exec->DR1 || exec->DR4) 2324 return -EINVAL; 2325 2326 if ((exec->batch_start_offset | exec->batch_len) & 0x7) 2327 return -EINVAL; 2328 2329 return 0; 2330 } 2331 2332 static int i915_reset_gen7_sol_offsets(struct i915_request *rq) 2333 { 2334 u32 *cs; 2335 int i; 2336 2337 if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) { 2338 drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n"); 2339 return -EINVAL; 2340 } 2341 2342 cs = intel_ring_begin(rq, 4 * 2 + 2); 2343 if (IS_ERR(cs)) 2344 return PTR_ERR(cs); 2345 2346 *cs++ = MI_LOAD_REGISTER_IMM(4); 2347 for (i = 0; i < 4; i++) { 2348 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 2349 *cs++ = 0; 2350 } 2351 *cs++ = MI_NOOP; 2352 intel_ring_advance(rq, cs); 2353 2354 return 0; 2355 } 2356 2357 static struct i915_vma * 2358 shadow_batch_pin(struct i915_execbuffer *eb, 2359 struct drm_i915_gem_object *obj, 2360 struct i915_address_space *vm, 2361 unsigned int flags) 2362 { 2363 struct i915_vma *vma; 2364 int err; 2365 2366 vma = i915_vma_instance(obj, vm, NULL); 2367 if (IS_ERR(vma)) 2368 return vma; 2369 2370 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags); 2371 if (err) 2372 return ERR_PTR(err); 2373 2374 return vma; 2375 } 2376 2377 static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) 2378 { 2379 /* 2380 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2381 * batch" bit. Hence we need to pin secure batches into the global gtt. 2382 * hsw should have this fixed, but bdw mucks it up again. */ 2383 if (eb->batch_flags & I915_DISPATCH_SECURE) 2384 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0); 2385 2386 return NULL; 2387 } 2388 2389 static int eb_parse(struct i915_execbuffer *eb) 2390 { 2391 struct drm_i915_private *i915 = eb->i915; 2392 struct intel_gt_buffer_pool_node *pool = eb->batch_pool; 2393 struct i915_vma *shadow, *trampoline, *batch; 2394 unsigned long len; 2395 int err; 2396 2397 if (!eb_use_cmdparser(eb)) { 2398 batch = eb_dispatch_secure(eb, eb->batch->vma); 2399 if (IS_ERR(batch)) 2400 return PTR_ERR(batch); 2401 2402 goto secure_batch; 2403 } 2404 2405 len = eb->batch_len; 2406 if (!CMDPARSER_USES_GGTT(eb->i915)) { 2407 /* 2408 * ppGTT backed shadow buffers must be mapped RO, to prevent 2409 * post-scan tampering 2410 */ 2411 if (!eb->context->vm->has_read_only) { 2412 drm_dbg(&i915->drm, 2413 "Cannot prevent post-scan tampering without RO capable vm\n"); 2414 return -EINVAL; 2415 } 2416 } else { 2417 len += I915_CMD_PARSER_TRAMPOLINE_SIZE; 2418 } 2419 if (unlikely(len < eb->batch_len)) /* last paranoid check of overflow */ 2420 return -EINVAL; 2421 2422 if (!pool) { 2423 pool = intel_gt_get_buffer_pool(eb->engine->gt, len, 2424 I915_MAP_WB); 2425 if (IS_ERR(pool)) 2426 return PTR_ERR(pool); 2427 eb->batch_pool = pool; 2428 } 2429 2430 err = i915_gem_object_lock(pool->obj, &eb->ww); 2431 if (err) 2432 goto err; 2433 2434 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER); 2435 if (IS_ERR(shadow)) { 2436 err = PTR_ERR(shadow); 2437 goto err; 2438 } 2439 intel_gt_buffer_pool_mark_used(pool); 2440 i915_gem_object_set_readonly(shadow->obj); 2441 shadow->private = pool; 2442 2443 trampoline = NULL; 2444 if (CMDPARSER_USES_GGTT(eb->i915)) { 2445 trampoline = shadow; 2446 2447 shadow = shadow_batch_pin(eb, pool->obj, 2448 &eb->engine->gt->ggtt->vm, 2449 PIN_GLOBAL); 2450 if (IS_ERR(shadow)) { 2451 err = PTR_ERR(shadow); 2452 shadow = trampoline; 2453 goto err_shadow; 2454 } 2455 shadow->private = pool; 2456 2457 eb->batch_flags |= I915_DISPATCH_SECURE; 2458 } 2459 2460 batch = eb_dispatch_secure(eb, shadow); 2461 if (IS_ERR(batch)) { 2462 err = PTR_ERR(batch); 2463 goto err_trampoline; 2464 } 2465 2466 err = dma_resv_reserve_shared(shadow->resv, 1); 2467 if (err) 2468 goto err_trampoline; 2469 2470 err = intel_engine_cmd_parser(eb->engine, 2471 eb->batch->vma, 2472 eb->batch_start_offset, 2473 eb->batch_len, 2474 shadow, trampoline); 2475 if (err) 2476 goto err_unpin_batch; 2477 2478 eb->batch = &eb->vma[eb->buffer_count++]; 2479 eb->batch->vma = i915_vma_get(shadow); 2480 eb->batch->flags = __EXEC_OBJECT_HAS_PIN; 2481 2482 eb->trampoline = trampoline; 2483 eb->batch_start_offset = 0; 2484 2485 secure_batch: 2486 if (batch) { 2487 eb->batch = &eb->vma[eb->buffer_count++]; 2488 eb->batch->flags = __EXEC_OBJECT_HAS_PIN; 2489 eb->batch->vma = i915_vma_get(batch); 2490 } 2491 return 0; 2492 2493 err_unpin_batch: 2494 if (batch) 2495 i915_vma_unpin(batch); 2496 err_trampoline: 2497 if (trampoline) 2498 i915_vma_unpin(trampoline); 2499 err_shadow: 2500 i915_vma_unpin(shadow); 2501 err: 2502 return err; 2503 } 2504 2505 static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) 2506 { 2507 int err; 2508 2509 if (intel_context_nopreempt(eb->context)) 2510 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); 2511 2512 err = eb_move_to_gpu(eb); 2513 if (err) 2514 return err; 2515 2516 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 2517 err = i915_reset_gen7_sol_offsets(eb->request); 2518 if (err) 2519 return err; 2520 } 2521 2522 /* 2523 * After we completed waiting for other engines (using HW semaphores) 2524 * then we can signal that this request/batch is ready to run. This 2525 * allows us to determine if the batch is still waiting on the GPU 2526 * or actually running by checking the breadcrumb. 2527 */ 2528 if (eb->engine->emit_init_breadcrumb) { 2529 err = eb->engine->emit_init_breadcrumb(eb->request); 2530 if (err) 2531 return err; 2532 } 2533 2534 err = eb->engine->emit_bb_start(eb->request, 2535 batch->node.start + 2536 eb->batch_start_offset, 2537 eb->batch_len, 2538 eb->batch_flags); 2539 if (err) 2540 return err; 2541 2542 if (eb->trampoline) { 2543 GEM_BUG_ON(eb->batch_start_offset); 2544 err = eb->engine->emit_bb_start(eb->request, 2545 eb->trampoline->node.start + 2546 eb->batch_len, 2547 0, 0); 2548 if (err) 2549 return err; 2550 } 2551 2552 return 0; 2553 } 2554 2555 static int num_vcs_engines(const struct drm_i915_private *i915) 2556 { 2557 return hweight_long(VDBOX_MASK(&i915->gt)); 2558 } 2559 2560 /* 2561 * Find one BSD ring to dispatch the corresponding BSD command. 2562 * The engine index is returned. 2563 */ 2564 static unsigned int 2565 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, 2566 struct drm_file *file) 2567 { 2568 struct drm_i915_file_private *file_priv = file->driver_priv; 2569 2570 /* Check whether the file_priv has already selected one ring. */ 2571 if ((int)file_priv->bsd_engine < 0) 2572 file_priv->bsd_engine = 2573 get_random_int() % num_vcs_engines(dev_priv); 2574 2575 return file_priv->bsd_engine; 2576 } 2577 2578 static const enum intel_engine_id user_ring_map[] = { 2579 [I915_EXEC_DEFAULT] = RCS0, 2580 [I915_EXEC_RENDER] = RCS0, 2581 [I915_EXEC_BLT] = BCS0, 2582 [I915_EXEC_BSD] = VCS0, 2583 [I915_EXEC_VEBOX] = VECS0 2584 }; 2585 2586 static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce) 2587 { 2588 struct intel_ring *ring = ce->ring; 2589 struct intel_timeline *tl = ce->timeline; 2590 struct i915_request *rq; 2591 2592 /* 2593 * Completely unscientific finger-in-the-air estimates for suitable 2594 * maximum user request size (to avoid blocking) and then backoff. 2595 */ 2596 if (intel_ring_update_space(ring) >= PAGE_SIZE) 2597 return NULL; 2598 2599 /* 2600 * Find a request that after waiting upon, there will be at least half 2601 * the ring available. The hysteresis allows us to compete for the 2602 * shared ring and should mean that we sleep less often prior to 2603 * claiming our resources, but not so long that the ring completely 2604 * drains before we can submit our next request. 2605 */ 2606 list_for_each_entry(rq, &tl->requests, link) { 2607 if (rq->ring != ring) 2608 continue; 2609 2610 if (__intel_ring_space(rq->postfix, 2611 ring->emit, ring->size) > ring->size / 2) 2612 break; 2613 } 2614 if (&rq->link == &tl->requests) 2615 return NULL; /* weird, we will check again later for real */ 2616 2617 return i915_request_get(rq); 2618 } 2619 2620 static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throttle) 2621 { 2622 struct intel_context *ce = eb->context; 2623 struct intel_timeline *tl; 2624 struct i915_request *rq = NULL; 2625 int err; 2626 2627 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); 2628 2629 if (unlikely(intel_context_is_banned(ce))) 2630 return ERR_PTR(-EIO); 2631 2632 /* 2633 * Pinning the contexts may generate requests in order to acquire 2634 * GGTT space, so do this first before we reserve a seqno for 2635 * ourselves. 2636 */ 2637 err = intel_context_pin_ww(ce, &eb->ww); 2638 if (err) 2639 return ERR_PTR(err); 2640 2641 /* 2642 * Take a local wakeref for preparing to dispatch the execbuf as 2643 * we expect to access the hardware fairly frequently in the 2644 * process, and require the engine to be kept awake between accesses. 2645 * Upon dispatch, we acquire another prolonged wakeref that we hold 2646 * until the timeline is idle, which in turn releases the wakeref 2647 * taken on the engine, and the parent device. 2648 */ 2649 tl = intel_context_timeline_lock(ce); 2650 if (IS_ERR(tl)) { 2651 intel_context_unpin(ce); 2652 return ERR_CAST(tl); 2653 } 2654 2655 intel_context_enter(ce); 2656 if (throttle) 2657 rq = eb_throttle(eb, ce); 2658 intel_context_timeline_unlock(tl); 2659 2660 eb->args->flags |= __EXEC_ENGINE_PINNED; 2661 return rq; 2662 } 2663 2664 static void eb_unpin_engine(struct i915_execbuffer *eb) 2665 { 2666 struct intel_context *ce = eb->context; 2667 struct intel_timeline *tl = ce->timeline; 2668 2669 if (!(eb->args->flags & __EXEC_ENGINE_PINNED)) 2670 return; 2671 2672 eb->args->flags &= ~__EXEC_ENGINE_PINNED; 2673 2674 mutex_lock(&tl->mutex); 2675 intel_context_exit(ce); 2676 mutex_unlock(&tl->mutex); 2677 2678 intel_context_unpin(ce); 2679 } 2680 2681 static unsigned int 2682 eb_select_legacy_ring(struct i915_execbuffer *eb) 2683 { 2684 struct drm_i915_private *i915 = eb->i915; 2685 struct drm_i915_gem_execbuffer2 *args = eb->args; 2686 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 2687 2688 if (user_ring_id != I915_EXEC_BSD && 2689 (args->flags & I915_EXEC_BSD_MASK)) { 2690 drm_dbg(&i915->drm, 2691 "execbuf with non bsd ring but with invalid " 2692 "bsd dispatch flags: %d\n", (int)(args->flags)); 2693 return -1; 2694 } 2695 2696 if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) { 2697 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 2698 2699 if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 2700 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file); 2701 } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 2702 bsd_idx <= I915_EXEC_BSD_RING2) { 2703 bsd_idx >>= I915_EXEC_BSD_SHIFT; 2704 bsd_idx--; 2705 } else { 2706 drm_dbg(&i915->drm, 2707 "execbuf with unknown bsd ring: %u\n", 2708 bsd_idx); 2709 return -1; 2710 } 2711 2712 return _VCS(bsd_idx); 2713 } 2714 2715 if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { 2716 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n", 2717 user_ring_id); 2718 return -1; 2719 } 2720 2721 return user_ring_map[user_ring_id]; 2722 } 2723 2724 static int 2725 eb_select_engine(struct i915_execbuffer *eb) 2726 { 2727 struct intel_context *ce; 2728 unsigned int idx; 2729 int err; 2730 2731 if (i915_gem_context_user_engines(eb->gem_context)) 2732 idx = eb->args->flags & I915_EXEC_RING_MASK; 2733 else 2734 idx = eb_select_legacy_ring(eb); 2735 2736 ce = i915_gem_context_get_engine(eb->gem_context, idx); 2737 if (IS_ERR(ce)) 2738 return PTR_ERR(ce); 2739 2740 intel_gt_pm_get(ce->engine->gt); 2741 2742 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 2743 err = intel_context_alloc_state(ce); 2744 if (err) 2745 goto err; 2746 } 2747 2748 /* 2749 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2750 * EIO if the GPU is already wedged. 2751 */ 2752 err = intel_gt_terminally_wedged(ce->engine->gt); 2753 if (err) 2754 goto err; 2755 2756 eb->context = ce; 2757 eb->engine = ce->engine; 2758 2759 /* 2760 * Make sure engine pool stays alive even if we call intel_context_put 2761 * during ww handling. The pool is destroyed when last pm reference 2762 * is dropped, which breaks our -EDEADLK handling. 2763 */ 2764 return err; 2765 2766 err: 2767 intel_gt_pm_put(ce->engine->gt); 2768 intel_context_put(ce); 2769 return err; 2770 } 2771 2772 static void 2773 eb_put_engine(struct i915_execbuffer *eb) 2774 { 2775 intel_gt_pm_put(eb->engine->gt); 2776 intel_context_put(eb->context); 2777 } 2778 2779 static void 2780 __free_fence_array(struct eb_fence *fences, unsigned int n) 2781 { 2782 while (n--) { 2783 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); 2784 dma_fence_put(fences[n].dma_fence); 2785 kfree(fences[n].chain_fence); 2786 } 2787 kvfree(fences); 2788 } 2789 2790 static int 2791 add_timeline_fence_array(struct i915_execbuffer *eb, 2792 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences) 2793 { 2794 struct drm_i915_gem_exec_fence __user *user_fences; 2795 u64 __user *user_values; 2796 struct eb_fence *f; 2797 u64 nfences; 2798 int err = 0; 2799 2800 nfences = timeline_fences->fence_count; 2801 if (!nfences) 2802 return 0; 2803 2804 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2805 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2806 if (nfences > min_t(unsigned long, 2807 ULONG_MAX / sizeof(*user_fences), 2808 SIZE_MAX / sizeof(*f)) - eb->num_fences) 2809 return -EINVAL; 2810 2811 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); 2812 if (!access_ok(user_fences, nfences * sizeof(*user_fences))) 2813 return -EFAULT; 2814 2815 user_values = u64_to_user_ptr(timeline_fences->values_ptr); 2816 if (!access_ok(user_values, nfences * sizeof(*user_values))) 2817 return -EFAULT; 2818 2819 f = krealloc(eb->fences, 2820 (eb->num_fences + nfences) * sizeof(*f), 2821 __GFP_NOWARN | GFP_KERNEL); 2822 if (!f) 2823 return -ENOMEM; 2824 2825 eb->fences = f; 2826 f += eb->num_fences; 2827 2828 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2829 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2830 2831 while (nfences--) { 2832 struct drm_i915_gem_exec_fence user_fence; 2833 struct drm_syncobj *syncobj; 2834 struct dma_fence *fence = NULL; 2835 u64 point; 2836 2837 if (__copy_from_user(&user_fence, 2838 user_fences++, 2839 sizeof(user_fence))) 2840 return -EFAULT; 2841 2842 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2843 return -EINVAL; 2844 2845 if (__get_user(point, user_values++)) 2846 return -EFAULT; 2847 2848 syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2849 if (!syncobj) { 2850 DRM_DEBUG("Invalid syncobj handle provided\n"); 2851 return -ENOENT; 2852 } 2853 2854 fence = drm_syncobj_fence_get(syncobj); 2855 2856 if (!fence && user_fence.flags && 2857 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2858 DRM_DEBUG("Syncobj handle has no fence\n"); 2859 drm_syncobj_put(syncobj); 2860 return -EINVAL; 2861 } 2862 2863 if (fence) 2864 err = dma_fence_chain_find_seqno(&fence, point); 2865 2866 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2867 DRM_DEBUG("Syncobj handle missing requested point %llu\n", point); 2868 dma_fence_put(fence); 2869 drm_syncobj_put(syncobj); 2870 return err; 2871 } 2872 2873 /* 2874 * A point might have been signaled already and 2875 * garbage collected from the timeline. In this case 2876 * just ignore the point and carry on. 2877 */ 2878 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) { 2879 drm_syncobj_put(syncobj); 2880 continue; 2881 } 2882 2883 /* 2884 * For timeline syncobjs we need to preallocate chains for 2885 * later signaling. 2886 */ 2887 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { 2888 /* 2889 * Waiting and signaling the same point (when point != 2890 * 0) would break the timeline. 2891 */ 2892 if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2893 DRM_DEBUG("Trying to wait & signal the same timeline point.\n"); 2894 dma_fence_put(fence); 2895 drm_syncobj_put(syncobj); 2896 return -EINVAL; 2897 } 2898 2899 f->chain_fence = 2900 kmalloc(sizeof(*f->chain_fence), 2901 GFP_KERNEL); 2902 if (!f->chain_fence) { 2903 drm_syncobj_put(syncobj); 2904 dma_fence_put(fence); 2905 return -ENOMEM; 2906 } 2907 } else { 2908 f->chain_fence = NULL; 2909 } 2910 2911 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2912 f->dma_fence = fence; 2913 f->value = point; 2914 f++; 2915 eb->num_fences++; 2916 } 2917 2918 return 0; 2919 } 2920 2921 static int add_fence_array(struct i915_execbuffer *eb) 2922 { 2923 struct drm_i915_gem_execbuffer2 *args = eb->args; 2924 struct drm_i915_gem_exec_fence __user *user; 2925 unsigned long num_fences = args->num_cliprects; 2926 struct eb_fence *f; 2927 2928 if (!(args->flags & I915_EXEC_FENCE_ARRAY)) 2929 return 0; 2930 2931 if (!num_fences) 2932 return 0; 2933 2934 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2935 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2936 if (num_fences > min_t(unsigned long, 2937 ULONG_MAX / sizeof(*user), 2938 SIZE_MAX / sizeof(*f) - eb->num_fences)) 2939 return -EINVAL; 2940 2941 user = u64_to_user_ptr(args->cliprects_ptr); 2942 if (!access_ok(user, num_fences * sizeof(*user))) 2943 return -EFAULT; 2944 2945 f = krealloc(eb->fences, 2946 (eb->num_fences + num_fences) * sizeof(*f), 2947 __GFP_NOWARN | GFP_KERNEL); 2948 if (!f) 2949 return -ENOMEM; 2950 2951 eb->fences = f; 2952 f += eb->num_fences; 2953 while (num_fences--) { 2954 struct drm_i915_gem_exec_fence user_fence; 2955 struct drm_syncobj *syncobj; 2956 struct dma_fence *fence = NULL; 2957 2958 if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) 2959 return -EFAULT; 2960 2961 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) 2962 return -EINVAL; 2963 2964 syncobj = drm_syncobj_find(eb->file, user_fence.handle); 2965 if (!syncobj) { 2966 DRM_DEBUG("Invalid syncobj handle provided\n"); 2967 return -ENOENT; 2968 } 2969 2970 if (user_fence.flags & I915_EXEC_FENCE_WAIT) { 2971 fence = drm_syncobj_fence_get(syncobj); 2972 if (!fence) { 2973 DRM_DEBUG("Syncobj handle has no fence\n"); 2974 drm_syncobj_put(syncobj); 2975 return -EINVAL; 2976 } 2977 } 2978 2979 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2980 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2981 2982 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); 2983 f->dma_fence = fence; 2984 f->value = 0; 2985 f->chain_fence = NULL; 2986 f++; 2987 eb->num_fences++; 2988 } 2989 2990 return 0; 2991 } 2992 2993 static void put_fence_array(struct eb_fence *fences, int num_fences) 2994 { 2995 if (fences) 2996 __free_fence_array(fences, num_fences); 2997 } 2998 2999 static int 3000 await_fence_array(struct i915_execbuffer *eb) 3001 { 3002 unsigned int n; 3003 int err; 3004 3005 for (n = 0; n < eb->num_fences; n++) { 3006 struct drm_syncobj *syncobj; 3007 unsigned int flags; 3008 3009 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); 3010 3011 if (!eb->fences[n].dma_fence) 3012 continue; 3013 3014 err = i915_request_await_dma_fence(eb->request, 3015 eb->fences[n].dma_fence); 3016 if (err < 0) 3017 return err; 3018 } 3019 3020 return 0; 3021 } 3022 3023 static void signal_fence_array(const struct i915_execbuffer *eb) 3024 { 3025 struct dma_fence * const fence = &eb->request->fence; 3026 unsigned int n; 3027 3028 for (n = 0; n < eb->num_fences; n++) { 3029 struct drm_syncobj *syncobj; 3030 unsigned int flags; 3031 3032 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); 3033 if (!(flags & I915_EXEC_FENCE_SIGNAL)) 3034 continue; 3035 3036 if (eb->fences[n].chain_fence) { 3037 drm_syncobj_add_point(syncobj, 3038 eb->fences[n].chain_fence, 3039 fence, 3040 eb->fences[n].value); 3041 /* 3042 * The chain's ownership is transferred to the 3043 * timeline. 3044 */ 3045 eb->fences[n].chain_fence = NULL; 3046 } else { 3047 drm_syncobj_replace_fence(syncobj, fence); 3048 } 3049 } 3050 } 3051 3052 static int 3053 parse_timeline_fences(struct i915_user_extension __user *ext, void *data) 3054 { 3055 struct i915_execbuffer *eb = data; 3056 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; 3057 3058 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences))) 3059 return -EFAULT; 3060 3061 return add_timeline_fence_array(eb, &timeline_fences); 3062 } 3063 3064 static void retire_requests(struct intel_timeline *tl, struct i915_request *end) 3065 { 3066 struct i915_request *rq, *rn; 3067 3068 list_for_each_entry_safe(rq, rn, &tl->requests, link) 3069 if (rq == end || !i915_request_retire(rq)) 3070 break; 3071 } 3072 3073 static int eb_request_add(struct i915_execbuffer *eb, int err) 3074 { 3075 struct i915_request *rq = eb->request; 3076 struct intel_timeline * const tl = i915_request_timeline(rq); 3077 struct i915_sched_attr attr = {}; 3078 struct i915_request *prev; 3079 3080 lockdep_assert_held(&tl->mutex); 3081 lockdep_unpin_lock(&tl->mutex, rq->cookie); 3082 3083 trace_i915_request_add(rq); 3084 3085 prev = __i915_request_commit(rq); 3086 3087 /* Check that the context wasn't destroyed before submission */ 3088 if (likely(!intel_context_is_closed(eb->context))) { 3089 attr = eb->gem_context->sched; 3090 } else { 3091 /* Serialise with context_close via the add_to_timeline */ 3092 i915_request_set_error_once(rq, -ENOENT); 3093 __i915_request_skip(rq); 3094 err = -ENOENT; /* override any transient errors */ 3095 } 3096 3097 __i915_request_queue(rq, &attr); 3098 3099 /* Try to clean up the client's timeline after submitting the request */ 3100 if (prev) 3101 retire_requests(tl, prev); 3102 3103 mutex_unlock(&tl->mutex); 3104 3105 return err; 3106 } 3107 3108 static const i915_user_extension_fn execbuf_extensions[] = { 3109 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, 3110 }; 3111 3112 static int 3113 parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, 3114 struct i915_execbuffer *eb) 3115 { 3116 if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) 3117 return 0; 3118 3119 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot 3120 * have another flag also using it at the same time. 3121 */ 3122 if (eb->args->flags & I915_EXEC_FENCE_ARRAY) 3123 return -EINVAL; 3124 3125 if (args->num_cliprects != 0) 3126 return -EINVAL; 3127 3128 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), 3129 execbuf_extensions, 3130 ARRAY_SIZE(execbuf_extensions), 3131 eb); 3132 } 3133 3134 static int 3135 i915_gem_do_execbuffer(struct drm_device *dev, 3136 struct drm_file *file, 3137 struct drm_i915_gem_execbuffer2 *args, 3138 struct drm_i915_gem_exec_object2 *exec) 3139 { 3140 struct drm_i915_private *i915 = to_i915(dev); 3141 struct i915_execbuffer eb; 3142 struct dma_fence *in_fence = NULL; 3143 struct sync_file *out_fence = NULL; 3144 struct i915_vma *batch; 3145 int out_fence_fd = -1; 3146 int err; 3147 3148 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); 3149 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & 3150 ~__EXEC_OBJECT_UNKNOWN_FLAGS); 3151 3152 eb.i915 = i915; 3153 eb.file = file; 3154 eb.args = args; 3155 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) 3156 args->flags |= __EXEC_HAS_RELOC; 3157 3158 eb.exec = exec; 3159 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); 3160 eb.vma[0].vma = NULL; 3161 eb.reloc_pool = eb.batch_pool = NULL; 3162 eb.reloc_context = NULL; 3163 3164 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 3165 reloc_cache_init(&eb.reloc_cache, eb.i915); 3166 3167 eb.buffer_count = args->buffer_count; 3168 eb.batch_start_offset = args->batch_start_offset; 3169 eb.batch_len = args->batch_len; 3170 eb.trampoline = NULL; 3171 3172 eb.fences = NULL; 3173 eb.num_fences = 0; 3174 3175 eb.batch_flags = 0; 3176 if (args->flags & I915_EXEC_SECURE) { 3177 if (GRAPHICS_VER(i915) >= 11) 3178 return -ENODEV; 3179 3180 /* Return -EPERM to trigger fallback code on old binaries. */ 3181 if (!HAS_SECURE_BATCHES(i915)) 3182 return -EPERM; 3183 3184 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 3185 return -EPERM; 3186 3187 eb.batch_flags |= I915_DISPATCH_SECURE; 3188 } 3189 if (args->flags & I915_EXEC_IS_PINNED) 3190 eb.batch_flags |= I915_DISPATCH_PINNED; 3191 3192 err = parse_execbuf2_extensions(args, &eb); 3193 if (err) 3194 goto err_ext; 3195 3196 err = add_fence_array(&eb); 3197 if (err) 3198 goto err_ext; 3199 3200 #define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) 3201 if (args->flags & IN_FENCES) { 3202 if ((args->flags & IN_FENCES) == IN_FENCES) 3203 return -EINVAL; 3204 3205 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 3206 if (!in_fence) { 3207 err = -EINVAL; 3208 goto err_ext; 3209 } 3210 } 3211 #undef IN_FENCES 3212 3213 if (args->flags & I915_EXEC_FENCE_OUT) { 3214 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 3215 if (out_fence_fd < 0) { 3216 err = out_fence_fd; 3217 goto err_in_fence; 3218 } 3219 } 3220 3221 err = eb_create(&eb); 3222 if (err) 3223 goto err_out_fence; 3224 3225 GEM_BUG_ON(!eb.lut_size); 3226 3227 err = eb_select_context(&eb); 3228 if (unlikely(err)) 3229 goto err_destroy; 3230 3231 err = eb_select_engine(&eb); 3232 if (unlikely(err)) 3233 goto err_context; 3234 3235 err = eb_lookup_vmas(&eb); 3236 if (err) { 3237 eb_release_vmas(&eb, true, true); 3238 goto err_engine; 3239 } 3240 3241 i915_gem_ww_ctx_init(&eb.ww, true); 3242 3243 err = eb_relocate_parse(&eb); 3244 if (err) { 3245 /* 3246 * If the user expects the execobject.offset and 3247 * reloc.presumed_offset to be an exact match, 3248 * as for using NO_RELOC, then we cannot update 3249 * the execobject.offset until we have completed 3250 * relocation. 3251 */ 3252 args->flags &= ~__EXEC_HAS_RELOC; 3253 goto err_vma; 3254 } 3255 3256 ww_acquire_done(&eb.ww.ctx); 3257 3258 batch = eb.batch->vma; 3259 3260 /* All GPU relocation batches must be submitted prior to the user rq */ 3261 GEM_BUG_ON(eb.reloc_cache.rq); 3262 3263 /* Allocate a request for this batch buffer nice and early. */ 3264 eb.request = i915_request_create(eb.context); 3265 if (IS_ERR(eb.request)) { 3266 err = PTR_ERR(eb.request); 3267 goto err_vma; 3268 } 3269 3270 if (in_fence) { 3271 if (args->flags & I915_EXEC_FENCE_SUBMIT) 3272 err = i915_request_await_execution(eb.request, 3273 in_fence, 3274 eb.engine->bond_execute); 3275 else 3276 err = i915_request_await_dma_fence(eb.request, 3277 in_fence); 3278 if (err < 0) 3279 goto err_request; 3280 } 3281 3282 if (eb.fences) { 3283 err = await_fence_array(&eb); 3284 if (err) 3285 goto err_request; 3286 } 3287 3288 if (out_fence_fd != -1) { 3289 out_fence = sync_file_create(&eb.request->fence); 3290 if (!out_fence) { 3291 err = -ENOMEM; 3292 goto err_request; 3293 } 3294 } 3295 3296 /* 3297 * Whilst this request exists, batch_obj will be on the 3298 * active_list, and so will hold the active reference. Only when this 3299 * request is retired will the the batch_obj be moved onto the 3300 * inactive_list and lose its active reference. Hence we do not need 3301 * to explicitly hold another reference here. 3302 */ 3303 eb.request->batch = batch; 3304 if (eb.batch_pool) 3305 intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); 3306 3307 trace_i915_request_queue(eb.request, eb.batch_flags); 3308 err = eb_submit(&eb, batch); 3309 3310 err_request: 3311 i915_request_get(eb.request); 3312 err = eb_request_add(&eb, err); 3313 3314 if (eb.fences) 3315 signal_fence_array(&eb); 3316 3317 if (out_fence) { 3318 if (err == 0) { 3319 fd_install(out_fence_fd, out_fence->file); 3320 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ 3321 args->rsvd2 |= (u64)out_fence_fd << 32; 3322 out_fence_fd = -1; 3323 } else { 3324 fput(out_fence->file); 3325 } 3326 } 3327 i915_request_put(eb.request); 3328 3329 err_vma: 3330 eb_release_vmas(&eb, true, true); 3331 if (eb.trampoline) 3332 i915_vma_unpin(eb.trampoline); 3333 WARN_ON(err == -EDEADLK); 3334 i915_gem_ww_ctx_fini(&eb.ww); 3335 3336 if (eb.batch_pool) 3337 intel_gt_buffer_pool_put(eb.batch_pool); 3338 if (eb.reloc_pool) 3339 intel_gt_buffer_pool_put(eb.reloc_pool); 3340 if (eb.reloc_context) 3341 intel_context_put(eb.reloc_context); 3342 err_engine: 3343 eb_put_engine(&eb); 3344 err_context: 3345 i915_gem_context_put(eb.gem_context); 3346 err_destroy: 3347 eb_destroy(&eb); 3348 err_out_fence: 3349 if (out_fence_fd != -1) 3350 put_unused_fd(out_fence_fd); 3351 err_in_fence: 3352 dma_fence_put(in_fence); 3353 err_ext: 3354 put_fence_array(eb.fences, eb.num_fences); 3355 return err; 3356 } 3357 3358 static size_t eb_element_size(void) 3359 { 3360 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma); 3361 } 3362 3363 static bool check_buffer_count(size_t count) 3364 { 3365 const size_t sz = eb_element_size(); 3366 3367 /* 3368 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup 3369 * array size (see eb_create()). Otherwise, we can accept an array as 3370 * large as can be addressed (though use large arrays at your peril)! 3371 */ 3372 3373 return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); 3374 } 3375 3376 int 3377 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, 3378 struct drm_file *file) 3379 { 3380 struct drm_i915_private *i915 = to_i915(dev); 3381 struct drm_i915_gem_execbuffer2 *args = data; 3382 struct drm_i915_gem_exec_object2 *exec2_list; 3383 const size_t count = args->buffer_count; 3384 int err; 3385 3386 if (!check_buffer_count(count)) { 3387 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); 3388 return -EINVAL; 3389 } 3390 3391 err = i915_gem_check_execbuffer(args); 3392 if (err) 3393 return err; 3394 3395 /* Allocate extra slots for use by the command parser */ 3396 exec2_list = kvmalloc_array(count + 2, eb_element_size(), 3397 __GFP_NOWARN | GFP_KERNEL); 3398 if (exec2_list == NULL) { 3399 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n", 3400 count); 3401 return -ENOMEM; 3402 } 3403 if (copy_from_user(exec2_list, 3404 u64_to_user_ptr(args->buffers_ptr), 3405 sizeof(*exec2_list) * count)) { 3406 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count); 3407 kvfree(exec2_list); 3408 return -EFAULT; 3409 } 3410 3411 err = i915_gem_do_execbuffer(dev, file, args, exec2_list); 3412 3413 /* 3414 * Now that we have begun execution of the batchbuffer, we ignore 3415 * any new error after this point. Also given that we have already 3416 * updated the associated relocations, we try to write out the current 3417 * object locations irrespective of any error. 3418 */ 3419 if (args->flags & __EXEC_HAS_RELOC) { 3420 struct drm_i915_gem_exec_object2 __user *user_exec_list = 3421 u64_to_user_ptr(args->buffers_ptr); 3422 unsigned int i; 3423 3424 /* Copy the new buffer offsets back to the user's exec list. */ 3425 /* 3426 * Note: count * sizeof(*user_exec_list) does not overflow, 3427 * because we checked 'count' in check_buffer_count(). 3428 * 3429 * And this range already got effectively checked earlier 3430 * when we did the "copy_from_user()" above. 3431 */ 3432 if (!user_write_access_begin(user_exec_list, 3433 count * sizeof(*user_exec_list))) 3434 goto end; 3435 3436 for (i = 0; i < args->buffer_count; i++) { 3437 if (!(exec2_list[i].offset & UPDATE)) 3438 continue; 3439 3440 exec2_list[i].offset = 3441 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); 3442 unsafe_put_user(exec2_list[i].offset, 3443 &user_exec_list[i].offset, 3444 end_user); 3445 } 3446 end_user: 3447 user_write_access_end(); 3448 end:; 3449 } 3450 3451 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; 3452 kvfree(exec2_list); 3453 return err; 3454 } 3455 3456 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 3457 #include "selftests/i915_gem_execbuffer.c" 3458 #endif 3459