1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008,2010 Intel Corporation 5 */ 6 7 #include <linux/intel-iommu.h> 8 #include <linux/reservation.h> 9 #include <linux/sync_file.h> 10 #include <linux/uaccess.h> 11 12 #include <drm/drm_syncobj.h> 13 #include <drm/i915_drm.h> 14 15 #include "display/intel_frontbuffer.h" 16 17 #include "gem/i915_gem_ioctls.h" 18 #include "gt/intel_context.h" 19 #include "gt/intel_gt.h" 20 #include "gt/intel_gt_pm.h" 21 22 #include "i915_gem_ioctls.h" 23 #include "i915_gem_clflush.h" 24 #include "i915_gem_context.h" 25 #include "i915_trace.h" 26 #include "intel_drv.h" 27 28 enum { 29 FORCE_CPU_RELOC = 1, 30 FORCE_GTT_RELOC, 31 FORCE_GPU_RELOC, 32 #define DBG_FORCE_RELOC 0 /* choose one of the above! */ 33 }; 34 35 #define __EXEC_OBJECT_HAS_REF BIT(31) 36 #define __EXEC_OBJECT_HAS_PIN BIT(30) 37 #define __EXEC_OBJECT_HAS_FENCE BIT(29) 38 #define __EXEC_OBJECT_NEEDS_MAP BIT(28) 39 #define __EXEC_OBJECT_NEEDS_BIAS BIT(27) 40 #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ 41 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) 42 43 #define __EXEC_HAS_RELOC BIT(31) 44 #define __EXEC_VALIDATED BIT(30) 45 #define __EXEC_INTERNAL_FLAGS (~0u << 30) 46 #define UPDATE PIN_OFFSET_FIXED 47 48 #define BATCH_OFFSET_BIAS (256*1024) 49 50 #define __I915_EXEC_ILLEGAL_FLAGS \ 51 (__I915_EXEC_UNKNOWN_FLAGS | \ 52 I915_EXEC_CONSTANTS_MASK | \ 53 I915_EXEC_RESOURCE_STREAMER) 54 55 /* Catch emission of unexpected errors for CI! */ 56 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 57 #undef EINVAL 58 #define EINVAL ({ \ 59 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ 60 22; \ 61 }) 62 #endif 63 64 /** 65 * DOC: User command execution 66 * 67 * Userspace submits commands to be executed on the GPU as an instruction 68 * stream within a GEM object we call a batchbuffer. This instructions may 69 * refer to other GEM objects containing auxiliary state such as kernels, 70 * samplers, render targets and even secondary batchbuffers. Userspace does 71 * not know where in the GPU memory these objects reside and so before the 72 * batchbuffer is passed to the GPU for execution, those addresses in the 73 * batchbuffer and auxiliary objects are updated. This is known as relocation, 74 * or patching. To try and avoid having to relocate each object on the next 75 * execution, userspace is told the location of those objects in this pass, 76 * but this remains just a hint as the kernel may choose a new location for 77 * any object in the future. 78 * 79 * At the level of talking to the hardware, submitting a batchbuffer for the 80 * GPU to execute is to add content to a buffer from which the HW 81 * command streamer is reading. 82 * 83 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. 84 * Execlists, this command is not placed on the same buffer as the 85 * remaining items. 86 * 87 * 2. Add a command to invalidate caches to the buffer. 88 * 89 * 3. Add a batchbuffer start command to the buffer; the start command is 90 * essentially a token together with the GPU address of the batchbuffer 91 * to be executed. 92 * 93 * 4. Add a pipeline flush to the buffer. 94 * 95 * 5. Add a memory write command to the buffer to record when the GPU 96 * is done executing the batchbuffer. The memory write writes the 97 * global sequence number of the request, ``i915_request::global_seqno``; 98 * the i915 driver uses the current value in the register to determine 99 * if the GPU has completed the batchbuffer. 100 * 101 * 6. Add a user interrupt command to the buffer. This command instructs 102 * the GPU to issue an interrupt when the command, pipeline flush and 103 * memory write are completed. 104 * 105 * 7. Inform the hardware of the additional commands added to the buffer 106 * (by updating the tail pointer). 107 * 108 * Processing an execbuf ioctl is conceptually split up into a few phases. 109 * 110 * 1. Validation - Ensure all the pointers, handles and flags are valid. 111 * 2. Reservation - Assign GPU address space for every object 112 * 3. Relocation - Update any addresses to point to the final locations 113 * 4. Serialisation - Order the request with respect to its dependencies 114 * 5. Construction - Construct a request to execute the batchbuffer 115 * 6. Submission (at some point in the future execution) 116 * 117 * Reserving resources for the execbuf is the most complicated phase. We 118 * neither want to have to migrate the object in the address space, nor do 119 * we want to have to update any relocations pointing to this object. Ideally, 120 * we want to leave the object where it is and for all the existing relocations 121 * to match. If the object is given a new address, or if userspace thinks the 122 * object is elsewhere, we have to parse all the relocation entries and update 123 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that 124 * all the target addresses in all of its objects match the value in the 125 * relocation entries and that they all match the presumed offsets given by the 126 * list of execbuffer objects. Using this knowledge, we know that if we haven't 127 * moved any buffers, all the relocation entries are valid and we can skip 128 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU 129 * hang.) The requirement for using I915_EXEC_NO_RELOC are: 130 * 131 * The addresses written in the objects must match the corresponding 132 * reloc.presumed_offset which in turn must match the corresponding 133 * execobject.offset. 134 * 135 * Any render targets written to in the batch must be flagged with 136 * EXEC_OBJECT_WRITE. 137 * 138 * To avoid stalling, execobject.offset should match the current 139 * address of that object within the active context. 140 * 141 * The reservation is done is multiple phases. First we try and keep any 142 * object already bound in its current location - so as long as meets the 143 * constraints imposed by the new execbuffer. Any object left unbound after the 144 * first pass is then fitted into any available idle space. If an object does 145 * not fit, all objects are removed from the reservation and the process rerun 146 * after sorting the objects into a priority order (more difficult to fit 147 * objects are tried first). Failing that, the entire VM is cleared and we try 148 * to fit the execbuf once last time before concluding that it simply will not 149 * fit. 150 * 151 * A small complication to all of this is that we allow userspace not only to 152 * specify an alignment and a size for the object in the address space, but 153 * we also allow userspace to specify the exact offset. This objects are 154 * simpler to place (the location is known a priori) all we have to do is make 155 * sure the space is available. 156 * 157 * Once all the objects are in place, patching up the buried pointers to point 158 * to the final locations is a fairly simple job of walking over the relocation 159 * entry arrays, looking up the right address and rewriting the value into 160 * the object. Simple! ... The relocation entries are stored in user memory 161 * and so to access them we have to copy them into a local buffer. That copy 162 * has to avoid taking any pagefaults as they may lead back to a GEM object 163 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split 164 * the relocation into multiple passes. First we try to do everything within an 165 * atomic context (avoid the pagefaults) which requires that we never wait. If 166 * we detect that we may wait, or if we need to fault, then we have to fallback 167 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm 168 * bells yet?) Dropping the mutex means that we lose all the state we have 169 * built up so far for the execbuf and we must reset any global data. However, 170 * we do leave the objects pinned in their final locations - which is a 171 * potential issue for concurrent execbufs. Once we have left the mutex, we can 172 * allocate and copy all the relocation entries into a large array at our 173 * leisure, reacquire the mutex, reclaim all the objects and other state and 174 * then proceed to update any incorrect addresses with the objects. 175 * 176 * As we process the relocation entries, we maintain a record of whether the 177 * object is being written to. Using NORELOC, we expect userspace to provide 178 * this information instead. We also check whether we can skip the relocation 179 * by comparing the expected value inside the relocation entry with the target's 180 * final address. If they differ, we have to map the current object and rewrite 181 * the 4 or 8 byte pointer within. 182 * 183 * Serialising an execbuf is quite simple according to the rules of the GEM 184 * ABI. Execution within each context is ordered by the order of submission. 185 * Writes to any GEM object are in order of submission and are exclusive. Reads 186 * from a GEM object are unordered with respect to other reads, but ordered by 187 * writes. A write submitted after a read cannot occur before the read, and 188 * similarly any read submitted after a write cannot occur before the write. 189 * Writes are ordered between engines such that only one write occurs at any 190 * time (completing any reads beforehand) - using semaphores where available 191 * and CPU serialisation otherwise. Other GEM access obey the same rules, any 192 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU 193 * reads before starting, and any read (either using set-domain or pread) must 194 * flush all GPU writes before starting. (Note we only employ a barrier before, 195 * we currently rely on userspace not concurrently starting a new execution 196 * whilst reading or writing to an object. This may be an advantage or not 197 * depending on how much you trust userspace not to shoot themselves in the 198 * foot.) Serialisation may just result in the request being inserted into 199 * a DAG awaiting its turn, but most simple is to wait on the CPU until 200 * all dependencies are resolved. 201 * 202 * After all of that, is just a matter of closing the request and handing it to 203 * the hardware (well, leaving it in a queue to be executed). However, we also 204 * offer the ability for batchbuffers to be run with elevated privileges so 205 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) 206 * Before any batch is given extra privileges we first must check that it 207 * contains no nefarious instructions, we check that each instruction is from 208 * our whitelist and all registers are also from an allowed list. We first 209 * copy the user's batchbuffer to a shadow (so that the user doesn't have 210 * access to it, either by the CPU or GPU as we scan it) and then parse each 211 * instruction. If everything is ok, we set a flag telling the hardware to run 212 * the batchbuffer in trusted mode, otherwise the ioctl is rejected. 213 */ 214 215 struct i915_execbuffer { 216 struct drm_i915_private *i915; /** i915 backpointer */ 217 struct drm_file *file; /** per-file lookup tables and limits */ 218 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ 219 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ 220 struct i915_vma **vma; 221 unsigned int *flags; 222 223 struct intel_engine_cs *engine; /** engine to queue the request to */ 224 struct intel_context *context; /* logical state for the request */ 225 struct i915_gem_context *gem_context; /** caller's context */ 226 227 struct i915_request *request; /** our request to build */ 228 struct i915_vma *batch; /** identity of the batch obj/vma */ 229 230 /** actual size of execobj[] as we may extend it for the cmdparser */ 231 unsigned int buffer_count; 232 233 /** list of vma not yet bound during reservation phase */ 234 struct list_head unbound; 235 236 /** list of vma that have execobj.relocation_count */ 237 struct list_head relocs; 238 239 /** 240 * Track the most recently used object for relocations, as we 241 * frequently have to perform multiple relocations within the same 242 * obj/page 243 */ 244 struct reloc_cache { 245 struct drm_mm_node node; /** temporary GTT binding */ 246 unsigned long vaddr; /** Current kmap address */ 247 unsigned long page; /** Currently mapped page index */ 248 unsigned int gen; /** Cached value of INTEL_GEN */ 249 bool use_64bit_reloc : 1; 250 bool has_llc : 1; 251 bool has_fence : 1; 252 bool needs_unfenced : 1; 253 254 struct i915_request *rq; 255 u32 *rq_cmd; 256 unsigned int rq_size; 257 } reloc_cache; 258 259 u64 invalid_flags; /** Set of execobj.flags that are invalid */ 260 u32 context_flags; /** Set of execobj.flags to insert from the ctx */ 261 262 u32 batch_start_offset; /** Location within object of batch */ 263 u32 batch_len; /** Length of batch within object */ 264 u32 batch_flags; /** Flags composed for emit_bb_start() */ 265 266 /** 267 * Indicate either the size of the hastable used to resolve 268 * relocation handles, or if negative that we are using a direct 269 * index into the execobj[]. 270 */ 271 int lut_size; 272 struct hlist_head *buckets; /** ht for relocation handles */ 273 }; 274 275 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) 276 277 /* 278 * Used to convert any address to canonical form. 279 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, 280 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the 281 * addresses to be in a canonical form: 282 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct 283 * canonical form [63:48] == [47]." 284 */ 285 #define GEN8_HIGH_ADDRESS_BIT 47 286 static inline u64 gen8_canonical_addr(u64 address) 287 { 288 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); 289 } 290 291 static inline u64 gen8_noncanonical_addr(u64 address) 292 { 293 return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); 294 } 295 296 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) 297 { 298 return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; 299 } 300 301 static int eb_create(struct i915_execbuffer *eb) 302 { 303 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { 304 unsigned int size = 1 + ilog2(eb->buffer_count); 305 306 /* 307 * Without a 1:1 association between relocation handles and 308 * the execobject[] index, we instead create a hashtable. 309 * We size it dynamically based on available memory, starting 310 * first with 1:1 assocative hash and scaling back until 311 * the allocation succeeds. 312 * 313 * Later on we use a positive lut_size to indicate we are 314 * using this hashtable, and a negative value to indicate a 315 * direct lookup. 316 */ 317 do { 318 gfp_t flags; 319 320 /* While we can still reduce the allocation size, don't 321 * raise a warning and allow the allocation to fail. 322 * On the last pass though, we want to try as hard 323 * as possible to perform the allocation and warn 324 * if it fails. 325 */ 326 flags = GFP_KERNEL; 327 if (size > 1) 328 flags |= __GFP_NORETRY | __GFP_NOWARN; 329 330 eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 331 flags); 332 if (eb->buckets) 333 break; 334 } while (--size); 335 336 if (unlikely(!size)) 337 return -ENOMEM; 338 339 eb->lut_size = size; 340 } else { 341 eb->lut_size = -eb->buffer_count; 342 } 343 344 return 0; 345 } 346 347 static bool 348 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, 349 const struct i915_vma *vma, 350 unsigned int flags) 351 { 352 if (vma->node.size < entry->pad_to_size) 353 return true; 354 355 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) 356 return true; 357 358 if (flags & EXEC_OBJECT_PINNED && 359 vma->node.start != entry->offset) 360 return true; 361 362 if (flags & __EXEC_OBJECT_NEEDS_BIAS && 363 vma->node.start < BATCH_OFFSET_BIAS) 364 return true; 365 366 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && 367 (vma->node.start + vma->node.size - 1) >> 32) 368 return true; 369 370 if (flags & __EXEC_OBJECT_NEEDS_MAP && 371 !i915_vma_is_map_and_fenceable(vma)) 372 return true; 373 374 return false; 375 } 376 377 static inline bool 378 eb_pin_vma(struct i915_execbuffer *eb, 379 const struct drm_i915_gem_exec_object2 *entry, 380 struct i915_vma *vma) 381 { 382 unsigned int exec_flags = *vma->exec_flags; 383 u64 pin_flags; 384 385 if (vma->node.size) 386 pin_flags = vma->node.start; 387 else 388 pin_flags = entry->offset & PIN_OFFSET_MASK; 389 390 pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; 391 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) 392 pin_flags |= PIN_GLOBAL; 393 394 if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) 395 return false; 396 397 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { 398 if (unlikely(i915_vma_pin_fence(vma))) { 399 i915_vma_unpin(vma); 400 return false; 401 } 402 403 if (vma->fence) 404 exec_flags |= __EXEC_OBJECT_HAS_FENCE; 405 } 406 407 *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; 408 return !eb_vma_misplaced(entry, vma, exec_flags); 409 } 410 411 static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) 412 { 413 GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); 414 415 if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) 416 __i915_vma_unpin_fence(vma); 417 418 __i915_vma_unpin(vma); 419 } 420 421 static inline void 422 eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) 423 { 424 if (!(*flags & __EXEC_OBJECT_HAS_PIN)) 425 return; 426 427 __eb_unreserve_vma(vma, *flags); 428 *flags &= ~__EXEC_OBJECT_RESERVED; 429 } 430 431 static int 432 eb_validate_vma(struct i915_execbuffer *eb, 433 struct drm_i915_gem_exec_object2 *entry, 434 struct i915_vma *vma) 435 { 436 if (unlikely(entry->flags & eb->invalid_flags)) 437 return -EINVAL; 438 439 if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) 440 return -EINVAL; 441 442 /* 443 * Offset can be used as input (EXEC_OBJECT_PINNED), reject 444 * any non-page-aligned or non-canonical addresses. 445 */ 446 if (unlikely(entry->flags & EXEC_OBJECT_PINNED && 447 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) 448 return -EINVAL; 449 450 /* pad_to_size was once a reserved field, so sanitize it */ 451 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { 452 if (unlikely(offset_in_page(entry->pad_to_size))) 453 return -EINVAL; 454 } else { 455 entry->pad_to_size = 0; 456 } 457 458 if (unlikely(vma->exec_flags)) { 459 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", 460 entry->handle, (int)(entry - eb->exec)); 461 return -EINVAL; 462 } 463 464 /* 465 * From drm_mm perspective address space is continuous, 466 * so from this point we're always using non-canonical 467 * form internally. 468 */ 469 entry->offset = gen8_noncanonical_addr(entry->offset); 470 471 if (!eb->reloc_cache.has_fence) { 472 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 473 } else { 474 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || 475 eb->reloc_cache.needs_unfenced) && 476 i915_gem_object_is_tiled(vma->obj)) 477 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; 478 } 479 480 if (!(entry->flags & EXEC_OBJECT_PINNED)) 481 entry->flags |= eb->context_flags; 482 483 return 0; 484 } 485 486 static int 487 eb_add_vma(struct i915_execbuffer *eb, 488 unsigned int i, unsigned batch_idx, 489 struct i915_vma *vma) 490 { 491 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 492 int err; 493 494 GEM_BUG_ON(i915_vma_is_closed(vma)); 495 496 if (!(eb->args->flags & __EXEC_VALIDATED)) { 497 err = eb_validate_vma(eb, entry, vma); 498 if (unlikely(err)) 499 return err; 500 } 501 502 if (eb->lut_size > 0) { 503 vma->exec_handle = entry->handle; 504 hlist_add_head(&vma->exec_node, 505 &eb->buckets[hash_32(entry->handle, 506 eb->lut_size)]); 507 } 508 509 if (entry->relocation_count) 510 list_add_tail(&vma->reloc_link, &eb->relocs); 511 512 /* 513 * Stash a pointer from the vma to execobj, so we can query its flags, 514 * size, alignment etc as provided by the user. Also we stash a pointer 515 * to the vma inside the execobj so that we can use a direct lookup 516 * to find the right target VMA when doing relocations. 517 */ 518 eb->vma[i] = vma; 519 eb->flags[i] = entry->flags; 520 vma->exec_flags = &eb->flags[i]; 521 522 /* 523 * SNA is doing fancy tricks with compressing batch buffers, which leads 524 * to negative relocation deltas. Usually that works out ok since the 525 * relocate address is still positive, except when the batch is placed 526 * very low in the GTT. Ensure this doesn't happen. 527 * 528 * Note that actual hangs have only been observed on gen7, but for 529 * paranoia do it everywhere. 530 */ 531 if (i == batch_idx) { 532 if (entry->relocation_count && 533 !(eb->flags[i] & EXEC_OBJECT_PINNED)) 534 eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; 535 if (eb->reloc_cache.has_fence) 536 eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; 537 538 eb->batch = vma; 539 } 540 541 err = 0; 542 if (eb_pin_vma(eb, entry, vma)) { 543 if (entry->offset != vma->node.start) { 544 entry->offset = vma->node.start | UPDATE; 545 eb->args->flags |= __EXEC_HAS_RELOC; 546 } 547 } else { 548 eb_unreserve_vma(vma, vma->exec_flags); 549 550 list_add_tail(&vma->exec_link, &eb->unbound); 551 if (drm_mm_node_allocated(&vma->node)) 552 err = i915_vma_unbind(vma); 553 if (unlikely(err)) 554 vma->exec_flags = NULL; 555 } 556 return err; 557 } 558 559 static inline int use_cpu_reloc(const struct reloc_cache *cache, 560 const struct drm_i915_gem_object *obj) 561 { 562 if (!i915_gem_object_has_struct_page(obj)) 563 return false; 564 565 if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) 566 return true; 567 568 if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) 569 return false; 570 571 return (cache->has_llc || 572 obj->cache_dirty || 573 obj->cache_level != I915_CACHE_NONE); 574 } 575 576 static int eb_reserve_vma(const struct i915_execbuffer *eb, 577 struct i915_vma *vma) 578 { 579 struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); 580 unsigned int exec_flags = *vma->exec_flags; 581 u64 pin_flags; 582 int err; 583 584 pin_flags = PIN_USER | PIN_NONBLOCK; 585 if (exec_flags & EXEC_OBJECT_NEEDS_GTT) 586 pin_flags |= PIN_GLOBAL; 587 588 /* 589 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 590 * limit address to the first 4GBs for unflagged objects. 591 */ 592 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 593 pin_flags |= PIN_ZONE_4G; 594 595 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) 596 pin_flags |= PIN_MAPPABLE; 597 598 if (exec_flags & EXEC_OBJECT_PINNED) { 599 pin_flags |= entry->offset | PIN_OFFSET_FIXED; 600 pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ 601 } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { 602 pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 603 } 604 605 err = i915_vma_pin(vma, 606 entry->pad_to_size, entry->alignment, 607 pin_flags); 608 if (err) 609 return err; 610 611 if (entry->offset != vma->node.start) { 612 entry->offset = vma->node.start | UPDATE; 613 eb->args->flags |= __EXEC_HAS_RELOC; 614 } 615 616 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { 617 err = i915_vma_pin_fence(vma); 618 if (unlikely(err)) { 619 i915_vma_unpin(vma); 620 return err; 621 } 622 623 if (vma->fence) 624 exec_flags |= __EXEC_OBJECT_HAS_FENCE; 625 } 626 627 *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; 628 GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); 629 630 return 0; 631 } 632 633 static int eb_reserve(struct i915_execbuffer *eb) 634 { 635 const unsigned int count = eb->buffer_count; 636 struct list_head last; 637 struct i915_vma *vma; 638 unsigned int i, pass; 639 int err; 640 641 /* 642 * Attempt to pin all of the buffers into the GTT. 643 * This is done in 3 phases: 644 * 645 * 1a. Unbind all objects that do not match the GTT constraints for 646 * the execbuffer (fenceable, mappable, alignment etc). 647 * 1b. Increment pin count for already bound objects. 648 * 2. Bind new objects. 649 * 3. Decrement pin count. 650 * 651 * This avoid unnecessary unbinding of later objects in order to make 652 * room for the earlier objects *unless* we need to defragment. 653 */ 654 655 pass = 0; 656 err = 0; 657 do { 658 list_for_each_entry(vma, &eb->unbound, exec_link) { 659 err = eb_reserve_vma(eb, vma); 660 if (err) 661 break; 662 } 663 if (err != -ENOSPC) 664 return err; 665 666 /* Resort *all* the objects into priority order */ 667 INIT_LIST_HEAD(&eb->unbound); 668 INIT_LIST_HEAD(&last); 669 for (i = 0; i < count; i++) { 670 unsigned int flags = eb->flags[i]; 671 struct i915_vma *vma = eb->vma[i]; 672 673 if (flags & EXEC_OBJECT_PINNED && 674 flags & __EXEC_OBJECT_HAS_PIN) 675 continue; 676 677 eb_unreserve_vma(vma, &eb->flags[i]); 678 679 if (flags & EXEC_OBJECT_PINNED) 680 /* Pinned must have their slot */ 681 list_add(&vma->exec_link, &eb->unbound); 682 else if (flags & __EXEC_OBJECT_NEEDS_MAP) 683 /* Map require the lowest 256MiB (aperture) */ 684 list_add_tail(&vma->exec_link, &eb->unbound); 685 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 686 /* Prioritise 4GiB region for restricted bo */ 687 list_add(&vma->exec_link, &last); 688 else 689 list_add_tail(&vma->exec_link, &last); 690 } 691 list_splice_tail(&last, &eb->unbound); 692 693 switch (pass++) { 694 case 0: 695 break; 696 697 case 1: 698 /* Too fragmented, unbind everything and retry */ 699 err = i915_gem_evict_vm(eb->context->vm); 700 if (err) 701 return err; 702 break; 703 704 default: 705 return -ENOSPC; 706 } 707 } while (1); 708 } 709 710 static unsigned int eb_batch_index(const struct i915_execbuffer *eb) 711 { 712 if (eb->args->flags & I915_EXEC_BATCH_FIRST) 713 return 0; 714 else 715 return eb->buffer_count - 1; 716 } 717 718 static int eb_select_context(struct i915_execbuffer *eb) 719 { 720 struct i915_gem_context *ctx; 721 722 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); 723 if (unlikely(!ctx)) 724 return -ENOENT; 725 726 eb->gem_context = ctx; 727 if (ctx->vm) 728 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 729 730 eb->context_flags = 0; 731 if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) 732 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; 733 734 return 0; 735 } 736 737 static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) 738 { 739 struct i915_request *rq; 740 741 /* 742 * Completely unscientific finger-in-the-air estimates for suitable 743 * maximum user request size (to avoid blocking) and then backoff. 744 */ 745 if (intel_ring_update_space(ring) >= PAGE_SIZE) 746 return NULL; 747 748 /* 749 * Find a request that after waiting upon, there will be at least half 750 * the ring available. The hysteresis allows us to compete for the 751 * shared ring and should mean that we sleep less often prior to 752 * claiming our resources, but not so long that the ring completely 753 * drains before we can submit our next request. 754 */ 755 list_for_each_entry(rq, &ring->request_list, ring_link) { 756 if (__intel_ring_space(rq->postfix, 757 ring->emit, ring->size) > ring->size / 2) 758 break; 759 } 760 if (&rq->ring_link == &ring->request_list) 761 return NULL; /* weird, we will check again later for real */ 762 763 return i915_request_get(rq); 764 } 765 766 static int eb_wait_for_ring(const struct i915_execbuffer *eb) 767 { 768 struct i915_request *rq; 769 int ret = 0; 770 771 /* 772 * Apply a light amount of backpressure to prevent excessive hogs 773 * from blocking waiting for space whilst holding struct_mutex and 774 * keeping all of their resources pinned. 775 */ 776 777 rq = __eb_wait_for_ring(eb->context->ring); 778 if (rq) { 779 mutex_unlock(&eb->i915->drm.struct_mutex); 780 781 if (i915_request_wait(rq, 782 I915_WAIT_INTERRUPTIBLE, 783 MAX_SCHEDULE_TIMEOUT) < 0) 784 ret = -EINTR; 785 786 i915_request_put(rq); 787 788 mutex_lock(&eb->i915->drm.struct_mutex); 789 } 790 791 return ret; 792 } 793 794 static int eb_lookup_vmas(struct i915_execbuffer *eb) 795 { 796 struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; 797 struct drm_i915_gem_object *obj; 798 unsigned int i, batch; 799 int err; 800 801 if (unlikely(i915_gem_context_is_banned(eb->gem_context))) 802 return -EIO; 803 804 INIT_LIST_HEAD(&eb->relocs); 805 INIT_LIST_HEAD(&eb->unbound); 806 807 batch = eb_batch_index(eb); 808 809 mutex_lock(&eb->gem_context->mutex); 810 if (unlikely(i915_gem_context_is_closed(eb->gem_context))) { 811 err = -ENOENT; 812 goto err_ctx; 813 } 814 815 for (i = 0; i < eb->buffer_count; i++) { 816 u32 handle = eb->exec[i].handle; 817 struct i915_lut_handle *lut; 818 struct i915_vma *vma; 819 820 vma = radix_tree_lookup(handles_vma, handle); 821 if (likely(vma)) 822 goto add_vma; 823 824 obj = i915_gem_object_lookup(eb->file, handle); 825 if (unlikely(!obj)) { 826 err = -ENOENT; 827 goto err_vma; 828 } 829 830 vma = i915_vma_instance(obj, eb->context->vm, NULL); 831 if (IS_ERR(vma)) { 832 err = PTR_ERR(vma); 833 goto err_obj; 834 } 835 836 lut = i915_lut_handle_alloc(); 837 if (unlikely(!lut)) { 838 err = -ENOMEM; 839 goto err_obj; 840 } 841 842 err = radix_tree_insert(handles_vma, handle, vma); 843 if (unlikely(err)) { 844 i915_lut_handle_free(lut); 845 goto err_obj; 846 } 847 848 /* transfer ref to lut */ 849 if (!atomic_fetch_inc(&vma->open_count)) 850 i915_vma_reopen(vma); 851 lut->handle = handle; 852 lut->ctx = eb->gem_context; 853 854 i915_gem_object_lock(obj); 855 list_add(&lut->obj_link, &obj->lut_list); 856 i915_gem_object_unlock(obj); 857 858 add_vma: 859 err = eb_add_vma(eb, i, batch, vma); 860 if (unlikely(err)) 861 goto err_vma; 862 863 GEM_BUG_ON(vma != eb->vma[i]); 864 GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); 865 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && 866 eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); 867 } 868 869 mutex_unlock(&eb->gem_context->mutex); 870 871 eb->args->flags |= __EXEC_VALIDATED; 872 return eb_reserve(eb); 873 874 err_obj: 875 i915_gem_object_put(obj); 876 err_vma: 877 eb->vma[i] = NULL; 878 err_ctx: 879 mutex_unlock(&eb->gem_context->mutex); 880 return err; 881 } 882 883 static struct i915_vma * 884 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) 885 { 886 if (eb->lut_size < 0) { 887 if (handle >= -eb->lut_size) 888 return NULL; 889 return eb->vma[handle]; 890 } else { 891 struct hlist_head *head; 892 struct i915_vma *vma; 893 894 head = &eb->buckets[hash_32(handle, eb->lut_size)]; 895 hlist_for_each_entry(vma, head, exec_node) { 896 if (vma->exec_handle == handle) 897 return vma; 898 } 899 return NULL; 900 } 901 } 902 903 static void eb_release_vmas(const struct i915_execbuffer *eb) 904 { 905 const unsigned int count = eb->buffer_count; 906 unsigned int i; 907 908 for (i = 0; i < count; i++) { 909 struct i915_vma *vma = eb->vma[i]; 910 unsigned int flags = eb->flags[i]; 911 912 if (!vma) 913 break; 914 915 GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); 916 vma->exec_flags = NULL; 917 eb->vma[i] = NULL; 918 919 if (flags & __EXEC_OBJECT_HAS_PIN) 920 __eb_unreserve_vma(vma, flags); 921 922 if (flags & __EXEC_OBJECT_HAS_REF) 923 i915_vma_put(vma); 924 } 925 } 926 927 static void eb_reset_vmas(const struct i915_execbuffer *eb) 928 { 929 eb_release_vmas(eb); 930 if (eb->lut_size > 0) 931 memset(eb->buckets, 0, 932 sizeof(struct hlist_head) << eb->lut_size); 933 } 934 935 static void eb_destroy(const struct i915_execbuffer *eb) 936 { 937 GEM_BUG_ON(eb->reloc_cache.rq); 938 939 if (eb->lut_size > 0) 940 kfree(eb->buckets); 941 } 942 943 static inline u64 944 relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 945 const struct i915_vma *target) 946 { 947 return gen8_canonical_addr((int)reloc->delta + target->node.start); 948 } 949 950 static void reloc_cache_init(struct reloc_cache *cache, 951 struct drm_i915_private *i915) 952 { 953 cache->page = -1; 954 cache->vaddr = 0; 955 /* Must be a variable in the struct to allow GCC to unroll. */ 956 cache->gen = INTEL_GEN(i915); 957 cache->has_llc = HAS_LLC(i915); 958 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 959 cache->has_fence = cache->gen < 4; 960 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; 961 cache->node.allocated = false; 962 cache->rq = NULL; 963 cache->rq_size = 0; 964 } 965 966 static inline void *unmask_page(unsigned long p) 967 { 968 return (void *)(uintptr_t)(p & PAGE_MASK); 969 } 970 971 static inline unsigned int unmask_flags(unsigned long p) 972 { 973 return p & ~PAGE_MASK; 974 } 975 976 #define KMAP 0x4 /* after CLFLUSH_FLAGS */ 977 978 static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) 979 { 980 struct drm_i915_private *i915 = 981 container_of(cache, struct i915_execbuffer, reloc_cache)->i915; 982 return &i915->ggtt; 983 } 984 985 static void reloc_gpu_flush(struct reloc_cache *cache) 986 { 987 GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); 988 cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; 989 990 __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); 991 i915_gem_object_unpin_map(cache->rq->batch->obj); 992 993 intel_gt_chipset_flush(cache->rq->engine->gt); 994 995 i915_request_add(cache->rq); 996 cache->rq = NULL; 997 } 998 999 static void reloc_cache_reset(struct reloc_cache *cache) 1000 { 1001 void *vaddr; 1002 1003 if (cache->rq) 1004 reloc_gpu_flush(cache); 1005 1006 if (!cache->vaddr) 1007 return; 1008 1009 vaddr = unmask_page(cache->vaddr); 1010 if (cache->vaddr & KMAP) { 1011 if (cache->vaddr & CLFLUSH_AFTER) 1012 mb(); 1013 1014 kunmap_atomic(vaddr); 1015 i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); 1016 } else { 1017 wmb(); 1018 io_mapping_unmap_atomic((void __iomem *)vaddr); 1019 if (cache->node.allocated) { 1020 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1021 1022 ggtt->vm.clear_range(&ggtt->vm, 1023 cache->node.start, 1024 cache->node.size); 1025 drm_mm_remove_node(&cache->node); 1026 } else { 1027 i915_vma_unpin((struct i915_vma *)cache->node.mm); 1028 } 1029 } 1030 1031 cache->vaddr = 0; 1032 cache->page = -1; 1033 } 1034 1035 static void *reloc_kmap(struct drm_i915_gem_object *obj, 1036 struct reloc_cache *cache, 1037 unsigned long page) 1038 { 1039 void *vaddr; 1040 1041 if (cache->vaddr) { 1042 kunmap_atomic(unmask_page(cache->vaddr)); 1043 } else { 1044 unsigned int flushes; 1045 int err; 1046 1047 err = i915_gem_object_prepare_write(obj, &flushes); 1048 if (err) 1049 return ERR_PTR(err); 1050 1051 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 1052 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); 1053 1054 cache->vaddr = flushes | KMAP; 1055 cache->node.mm = (void *)obj; 1056 if (flushes) 1057 mb(); 1058 } 1059 1060 vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); 1061 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; 1062 cache->page = page; 1063 1064 return vaddr; 1065 } 1066 1067 static void *reloc_iomap(struct drm_i915_gem_object *obj, 1068 struct reloc_cache *cache, 1069 unsigned long page) 1070 { 1071 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 1072 unsigned long offset; 1073 void *vaddr; 1074 1075 if (cache->vaddr) { 1076 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 1077 } else { 1078 struct i915_vma *vma; 1079 int err; 1080 1081 if (use_cpu_reloc(cache, obj)) 1082 return NULL; 1083 1084 i915_gem_object_lock(obj); 1085 err = i915_gem_object_set_to_gtt_domain(obj, true); 1086 i915_gem_object_unlock(obj); 1087 if (err) 1088 return ERR_PTR(err); 1089 1090 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1091 PIN_MAPPABLE | 1092 PIN_NONBLOCK | 1093 PIN_NONFAULT); 1094 if (IS_ERR(vma)) { 1095 memset(&cache->node, 0, sizeof(cache->node)); 1096 err = drm_mm_insert_node_in_range 1097 (&ggtt->vm.mm, &cache->node, 1098 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1099 0, ggtt->mappable_end, 1100 DRM_MM_INSERT_LOW); 1101 if (err) /* no inactive aperture space, use cpu reloc */ 1102 return NULL; 1103 } else { 1104 err = i915_vma_put_fence(vma); 1105 if (err) { 1106 i915_vma_unpin(vma); 1107 return ERR_PTR(err); 1108 } 1109 1110 cache->node.start = vma->node.start; 1111 cache->node.mm = (void *)vma; 1112 } 1113 } 1114 1115 offset = cache->node.start; 1116 if (cache->node.allocated) { 1117 wmb(); 1118 ggtt->vm.insert_page(&ggtt->vm, 1119 i915_gem_object_get_dma_address(obj, page), 1120 offset, I915_CACHE_NONE, 0); 1121 } else { 1122 offset += page << PAGE_SHIFT; 1123 } 1124 1125 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, 1126 offset); 1127 cache->page = page; 1128 cache->vaddr = (unsigned long)vaddr; 1129 1130 return vaddr; 1131 } 1132 1133 static void *reloc_vaddr(struct drm_i915_gem_object *obj, 1134 struct reloc_cache *cache, 1135 unsigned long page) 1136 { 1137 void *vaddr; 1138 1139 if (cache->page == page) { 1140 vaddr = unmask_page(cache->vaddr); 1141 } else { 1142 vaddr = NULL; 1143 if ((cache->vaddr & KMAP) == 0) 1144 vaddr = reloc_iomap(obj, cache, page); 1145 if (!vaddr) 1146 vaddr = reloc_kmap(obj, cache, page); 1147 } 1148 1149 return vaddr; 1150 } 1151 1152 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) 1153 { 1154 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { 1155 if (flushes & CLFLUSH_BEFORE) { 1156 clflushopt(addr); 1157 mb(); 1158 } 1159 1160 *addr = value; 1161 1162 /* 1163 * Writes to the same cacheline are serialised by the CPU 1164 * (including clflush). On the write path, we only require 1165 * that it hits memory in an orderly fashion and place 1166 * mb barriers at the start and end of the relocation phase 1167 * to ensure ordering of clflush wrt to the system. 1168 */ 1169 if (flushes & CLFLUSH_AFTER) 1170 clflushopt(addr); 1171 } else 1172 *addr = value; 1173 } 1174 1175 static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) 1176 { 1177 struct drm_i915_gem_object *obj = vma->obj; 1178 int err; 1179 1180 i915_vma_lock(vma); 1181 1182 if (obj->cache_dirty & ~obj->cache_coherent) 1183 i915_gem_clflush_object(obj, 0); 1184 obj->write_domain = 0; 1185 1186 err = i915_request_await_object(rq, vma->obj, true); 1187 if (err == 0) 1188 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1189 1190 i915_vma_unlock(vma); 1191 1192 return err; 1193 } 1194 1195 static int __reloc_gpu_alloc(struct i915_execbuffer *eb, 1196 struct i915_vma *vma, 1197 unsigned int len) 1198 { 1199 struct reloc_cache *cache = &eb->reloc_cache; 1200 struct drm_i915_gem_object *obj; 1201 struct i915_request *rq; 1202 struct i915_vma *batch; 1203 u32 *cmd; 1204 int err; 1205 1206 obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); 1207 if (IS_ERR(obj)) 1208 return PTR_ERR(obj); 1209 1210 cmd = i915_gem_object_pin_map(obj, 1211 cache->has_llc ? 1212 I915_MAP_FORCE_WB : 1213 I915_MAP_FORCE_WC); 1214 i915_gem_object_unpin_pages(obj); 1215 if (IS_ERR(cmd)) 1216 return PTR_ERR(cmd); 1217 1218 batch = i915_vma_instance(obj, vma->vm, NULL); 1219 if (IS_ERR(batch)) { 1220 err = PTR_ERR(batch); 1221 goto err_unmap; 1222 } 1223 1224 err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); 1225 if (err) 1226 goto err_unmap; 1227 1228 rq = i915_request_create(eb->context); 1229 if (IS_ERR(rq)) { 1230 err = PTR_ERR(rq); 1231 goto err_unpin; 1232 } 1233 1234 err = reloc_move_to_gpu(rq, vma); 1235 if (err) 1236 goto err_request; 1237 1238 err = eb->engine->emit_bb_start(rq, 1239 batch->node.start, PAGE_SIZE, 1240 cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); 1241 if (err) 1242 goto skip_request; 1243 1244 i915_vma_lock(batch); 1245 GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); 1246 err = i915_vma_move_to_active(batch, rq, 0); 1247 i915_vma_unlock(batch); 1248 if (err) 1249 goto skip_request; 1250 1251 rq->batch = batch; 1252 i915_vma_unpin(batch); 1253 1254 cache->rq = rq; 1255 cache->rq_cmd = cmd; 1256 cache->rq_size = 0; 1257 1258 /* Return with batch mapping (cmd) still pinned */ 1259 return 0; 1260 1261 skip_request: 1262 i915_request_skip(rq, err); 1263 err_request: 1264 i915_request_add(rq); 1265 err_unpin: 1266 i915_vma_unpin(batch); 1267 err_unmap: 1268 i915_gem_object_unpin_map(obj); 1269 return err; 1270 } 1271 1272 static u32 *reloc_gpu(struct i915_execbuffer *eb, 1273 struct i915_vma *vma, 1274 unsigned int len) 1275 { 1276 struct reloc_cache *cache = &eb->reloc_cache; 1277 u32 *cmd; 1278 1279 if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) 1280 reloc_gpu_flush(cache); 1281 1282 if (unlikely(!cache->rq)) { 1283 int err; 1284 1285 /* If we need to copy for the cmdparser, we will stall anyway */ 1286 if (eb_use_cmdparser(eb)) 1287 return ERR_PTR(-EWOULDBLOCK); 1288 1289 if (!intel_engine_can_store_dword(eb->engine)) 1290 return ERR_PTR(-ENODEV); 1291 1292 err = __reloc_gpu_alloc(eb, vma, len); 1293 if (unlikely(err)) 1294 return ERR_PTR(err); 1295 } 1296 1297 cmd = cache->rq_cmd + cache->rq_size; 1298 cache->rq_size += len; 1299 1300 return cmd; 1301 } 1302 1303 static u64 1304 relocate_entry(struct i915_vma *vma, 1305 const struct drm_i915_gem_relocation_entry *reloc, 1306 struct i915_execbuffer *eb, 1307 const struct i915_vma *target) 1308 { 1309 u64 offset = reloc->offset; 1310 u64 target_offset = relocation_target(reloc, target); 1311 bool wide = eb->reloc_cache.use_64bit_reloc; 1312 void *vaddr; 1313 1314 if (!eb->reloc_cache.vaddr && 1315 (DBG_FORCE_RELOC == FORCE_GPU_RELOC || 1316 !reservation_object_test_signaled_rcu(vma->resv, true))) { 1317 const unsigned int gen = eb->reloc_cache.gen; 1318 unsigned int len; 1319 u32 *batch; 1320 u64 addr; 1321 1322 if (wide) 1323 len = offset & 7 ? 8 : 5; 1324 else if (gen >= 4) 1325 len = 4; 1326 else 1327 len = 3; 1328 1329 batch = reloc_gpu(eb, vma, len); 1330 if (IS_ERR(batch)) 1331 goto repeat; 1332 1333 addr = gen8_canonical_addr(vma->node.start + offset); 1334 if (wide) { 1335 if (offset & 7) { 1336 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1337 *batch++ = lower_32_bits(addr); 1338 *batch++ = upper_32_bits(addr); 1339 *batch++ = lower_32_bits(target_offset); 1340 1341 addr = gen8_canonical_addr(addr + 4); 1342 1343 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1344 *batch++ = lower_32_bits(addr); 1345 *batch++ = upper_32_bits(addr); 1346 *batch++ = upper_32_bits(target_offset); 1347 } else { 1348 *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; 1349 *batch++ = lower_32_bits(addr); 1350 *batch++ = upper_32_bits(addr); 1351 *batch++ = lower_32_bits(target_offset); 1352 *batch++ = upper_32_bits(target_offset); 1353 } 1354 } else if (gen >= 6) { 1355 *batch++ = MI_STORE_DWORD_IMM_GEN4; 1356 *batch++ = 0; 1357 *batch++ = addr; 1358 *batch++ = target_offset; 1359 } else if (gen >= 4) { 1360 *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1361 *batch++ = 0; 1362 *batch++ = addr; 1363 *batch++ = target_offset; 1364 } else { 1365 *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 1366 *batch++ = addr; 1367 *batch++ = target_offset; 1368 } 1369 1370 goto out; 1371 } 1372 1373 repeat: 1374 vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); 1375 if (IS_ERR(vaddr)) 1376 return PTR_ERR(vaddr); 1377 1378 clflush_write32(vaddr + offset_in_page(offset), 1379 lower_32_bits(target_offset), 1380 eb->reloc_cache.vaddr); 1381 1382 if (wide) { 1383 offset += sizeof(u32); 1384 target_offset >>= 32; 1385 wide = false; 1386 goto repeat; 1387 } 1388 1389 out: 1390 return target->node.start | UPDATE; 1391 } 1392 1393 static u64 1394 eb_relocate_entry(struct i915_execbuffer *eb, 1395 struct i915_vma *vma, 1396 const struct drm_i915_gem_relocation_entry *reloc) 1397 { 1398 struct i915_vma *target; 1399 int err; 1400 1401 /* we've already hold a reference to all valid objects */ 1402 target = eb_get_vma(eb, reloc->target_handle); 1403 if (unlikely(!target)) 1404 return -ENOENT; 1405 1406 /* Validate that the target is in a valid r/w GPU domain */ 1407 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 1408 DRM_DEBUG("reloc with multiple write domains: " 1409 "target %d offset %d " 1410 "read %08x write %08x", 1411 reloc->target_handle, 1412 (int) reloc->offset, 1413 reloc->read_domains, 1414 reloc->write_domain); 1415 return -EINVAL; 1416 } 1417 if (unlikely((reloc->write_domain | reloc->read_domains) 1418 & ~I915_GEM_GPU_DOMAINS)) { 1419 DRM_DEBUG("reloc with read/write non-GPU domains: " 1420 "target %d offset %d " 1421 "read %08x write %08x", 1422 reloc->target_handle, 1423 (int) reloc->offset, 1424 reloc->read_domains, 1425 reloc->write_domain); 1426 return -EINVAL; 1427 } 1428 1429 if (reloc->write_domain) { 1430 *target->exec_flags |= EXEC_OBJECT_WRITE; 1431 1432 /* 1433 * Sandybridge PPGTT errata: We need a global gtt mapping 1434 * for MI and pipe_control writes because the gpu doesn't 1435 * properly redirect them through the ppgtt for non_secure 1436 * batchbuffers. 1437 */ 1438 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 1439 IS_GEN(eb->i915, 6)) { 1440 err = i915_vma_bind(target, target->obj->cache_level, 1441 PIN_GLOBAL); 1442 if (WARN_ONCE(err, 1443 "Unexpected failure to bind target VMA!")) 1444 return err; 1445 } 1446 } 1447 1448 /* 1449 * If the relocation already has the right value in it, no 1450 * more work needs to be done. 1451 */ 1452 if (!DBG_FORCE_RELOC && 1453 gen8_canonical_addr(target->node.start) == reloc->presumed_offset) 1454 return 0; 1455 1456 /* Check that the relocation address is valid... */ 1457 if (unlikely(reloc->offset > 1458 vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { 1459 DRM_DEBUG("Relocation beyond object bounds: " 1460 "target %d offset %d size %d.\n", 1461 reloc->target_handle, 1462 (int)reloc->offset, 1463 (int)vma->size); 1464 return -EINVAL; 1465 } 1466 if (unlikely(reloc->offset & 3)) { 1467 DRM_DEBUG("Relocation not 4-byte aligned: " 1468 "target %d offset %d.\n", 1469 reloc->target_handle, 1470 (int)reloc->offset); 1471 return -EINVAL; 1472 } 1473 1474 /* 1475 * If we write into the object, we need to force the synchronisation 1476 * barrier, either with an asynchronous clflush or if we executed the 1477 * patching using the GPU (though that should be serialised by the 1478 * timeline). To be completely sure, and since we are required to 1479 * do relocations we are already stalling, disable the user's opt 1480 * out of our synchronisation. 1481 */ 1482 *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; 1483 1484 /* and update the user's relocation entry */ 1485 return relocate_entry(vma, reloc, eb, target); 1486 } 1487 1488 static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) 1489 { 1490 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1491 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; 1492 struct drm_i915_gem_relocation_entry __user *urelocs; 1493 const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); 1494 unsigned int remain; 1495 1496 urelocs = u64_to_user_ptr(entry->relocs_ptr); 1497 remain = entry->relocation_count; 1498 if (unlikely(remain > N_RELOC(ULONG_MAX))) 1499 return -EINVAL; 1500 1501 /* 1502 * We must check that the entire relocation array is safe 1503 * to read. However, if the array is not writable the user loses 1504 * the updated relocation values. 1505 */ 1506 if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs)))) 1507 return -EFAULT; 1508 1509 do { 1510 struct drm_i915_gem_relocation_entry *r = stack; 1511 unsigned int count = 1512 min_t(unsigned int, remain, ARRAY_SIZE(stack)); 1513 unsigned int copied; 1514 1515 /* 1516 * This is the fast path and we cannot handle a pagefault 1517 * whilst holding the struct mutex lest the user pass in the 1518 * relocations contained within a mmaped bo. For in such a case 1519 * we, the page fault handler would call i915_gem_fault() and 1520 * we would try to acquire the struct mutex again. Obviously 1521 * this is bad and so lockdep complains vehemently. 1522 */ 1523 pagefault_disable(); 1524 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); 1525 pagefault_enable(); 1526 if (unlikely(copied)) { 1527 remain = -EFAULT; 1528 goto out; 1529 } 1530 1531 remain -= count; 1532 do { 1533 u64 offset = eb_relocate_entry(eb, vma, r); 1534 1535 if (likely(offset == 0)) { 1536 } else if ((s64)offset < 0) { 1537 remain = (int)offset; 1538 goto out; 1539 } else { 1540 /* 1541 * Note that reporting an error now 1542 * leaves everything in an inconsistent 1543 * state as we have *already* changed 1544 * the relocation value inside the 1545 * object. As we have not changed the 1546 * reloc.presumed_offset or will not 1547 * change the execobject.offset, on the 1548 * call we may not rewrite the value 1549 * inside the object, leaving it 1550 * dangling and causing a GPU hang. Unless 1551 * userspace dynamically rebuilds the 1552 * relocations on each execbuf rather than 1553 * presume a static tree. 1554 * 1555 * We did previously check if the relocations 1556 * were writable (access_ok), an error now 1557 * would be a strange race with mprotect, 1558 * having already demonstrated that we 1559 * can read from this userspace address. 1560 */ 1561 offset = gen8_canonical_addr(offset & ~UPDATE); 1562 if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) { 1563 remain = -EFAULT; 1564 goto out; 1565 } 1566 } 1567 } while (r++, --count); 1568 urelocs += ARRAY_SIZE(stack); 1569 } while (remain); 1570 out: 1571 reloc_cache_reset(&eb->reloc_cache); 1572 return remain; 1573 } 1574 1575 static int 1576 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) 1577 { 1578 const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); 1579 struct drm_i915_gem_relocation_entry *relocs = 1580 u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1581 unsigned int i; 1582 int err; 1583 1584 for (i = 0; i < entry->relocation_count; i++) { 1585 u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); 1586 1587 if ((s64)offset < 0) { 1588 err = (int)offset; 1589 goto err; 1590 } 1591 } 1592 err = 0; 1593 err: 1594 reloc_cache_reset(&eb->reloc_cache); 1595 return err; 1596 } 1597 1598 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) 1599 { 1600 const char __user *addr, *end; 1601 unsigned long size; 1602 char __maybe_unused c; 1603 1604 size = entry->relocation_count; 1605 if (size == 0) 1606 return 0; 1607 1608 if (size > N_RELOC(ULONG_MAX)) 1609 return -EINVAL; 1610 1611 addr = u64_to_user_ptr(entry->relocs_ptr); 1612 size *= sizeof(struct drm_i915_gem_relocation_entry); 1613 if (!access_ok(addr, size)) 1614 return -EFAULT; 1615 1616 end = addr + size; 1617 for (; addr < end; addr += PAGE_SIZE) { 1618 int err = __get_user(c, addr); 1619 if (err) 1620 return err; 1621 } 1622 return __get_user(c, end - 1); 1623 } 1624 1625 static int eb_copy_relocations(const struct i915_execbuffer *eb) 1626 { 1627 const unsigned int count = eb->buffer_count; 1628 unsigned int i; 1629 int err; 1630 1631 for (i = 0; i < count; i++) { 1632 const unsigned int nreloc = eb->exec[i].relocation_count; 1633 struct drm_i915_gem_relocation_entry __user *urelocs; 1634 struct drm_i915_gem_relocation_entry *relocs; 1635 unsigned long size; 1636 unsigned long copied; 1637 1638 if (nreloc == 0) 1639 continue; 1640 1641 err = check_relocations(&eb->exec[i]); 1642 if (err) 1643 goto err; 1644 1645 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1646 size = nreloc * sizeof(*relocs); 1647 1648 relocs = kvmalloc_array(size, 1, GFP_KERNEL); 1649 if (!relocs) { 1650 err = -ENOMEM; 1651 goto err; 1652 } 1653 1654 /* copy_from_user is limited to < 4GiB */ 1655 copied = 0; 1656 do { 1657 unsigned int len = 1658 min_t(u64, BIT_ULL(31), size - copied); 1659 1660 if (__copy_from_user((char *)relocs + copied, 1661 (char __user *)urelocs + copied, 1662 len)) { 1663 end_user: 1664 user_access_end(); 1665 end: 1666 kvfree(relocs); 1667 err = -EFAULT; 1668 goto err; 1669 } 1670 1671 copied += len; 1672 } while (copied < size); 1673 1674 /* 1675 * As we do not update the known relocation offsets after 1676 * relocating (due to the complexities in lock handling), 1677 * we need to mark them as invalid now so that we force the 1678 * relocation processing next time. Just in case the target 1679 * object is evicted and then rebound into its old 1680 * presumed_offset before the next execbuffer - if that 1681 * happened we would make the mistake of assuming that the 1682 * relocations were valid. 1683 */ 1684 if (!user_access_begin(urelocs, size)) 1685 goto end; 1686 1687 for (copied = 0; copied < nreloc; copied++) 1688 unsafe_put_user(-1, 1689 &urelocs[copied].presumed_offset, 1690 end_user); 1691 user_access_end(); 1692 1693 eb->exec[i].relocs_ptr = (uintptr_t)relocs; 1694 } 1695 1696 return 0; 1697 1698 err: 1699 while (i--) { 1700 struct drm_i915_gem_relocation_entry *relocs = 1701 u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); 1702 if (eb->exec[i].relocation_count) 1703 kvfree(relocs); 1704 } 1705 return err; 1706 } 1707 1708 static int eb_prefault_relocations(const struct i915_execbuffer *eb) 1709 { 1710 const unsigned int count = eb->buffer_count; 1711 unsigned int i; 1712 1713 if (unlikely(i915_modparams.prefault_disable)) 1714 return 0; 1715 1716 for (i = 0; i < count; i++) { 1717 int err; 1718 1719 err = check_relocations(&eb->exec[i]); 1720 if (err) 1721 return err; 1722 } 1723 1724 return 0; 1725 } 1726 1727 static noinline int eb_relocate_slow(struct i915_execbuffer *eb) 1728 { 1729 struct drm_device *dev = &eb->i915->drm; 1730 bool have_copy = false; 1731 struct i915_vma *vma; 1732 int err = 0; 1733 1734 repeat: 1735 if (signal_pending(current)) { 1736 err = -ERESTARTSYS; 1737 goto out; 1738 } 1739 1740 /* We may process another execbuffer during the unlock... */ 1741 eb_reset_vmas(eb); 1742 mutex_unlock(&dev->struct_mutex); 1743 1744 /* 1745 * We take 3 passes through the slowpatch. 1746 * 1747 * 1 - we try to just prefault all the user relocation entries and 1748 * then attempt to reuse the atomic pagefault disabled fast path again. 1749 * 1750 * 2 - we copy the user entries to a local buffer here outside of the 1751 * local and allow ourselves to wait upon any rendering before 1752 * relocations 1753 * 1754 * 3 - we already have a local copy of the relocation entries, but 1755 * were interrupted (EAGAIN) whilst waiting for the objects, try again. 1756 */ 1757 if (!err) { 1758 err = eb_prefault_relocations(eb); 1759 } else if (!have_copy) { 1760 err = eb_copy_relocations(eb); 1761 have_copy = err == 0; 1762 } else { 1763 cond_resched(); 1764 err = 0; 1765 } 1766 if (err) { 1767 mutex_lock(&dev->struct_mutex); 1768 goto out; 1769 } 1770 1771 /* A frequent cause for EAGAIN are currently unavailable client pages */ 1772 flush_workqueue(eb->i915->mm.userptr_wq); 1773 1774 err = i915_mutex_lock_interruptible(dev); 1775 if (err) { 1776 mutex_lock(&dev->struct_mutex); 1777 goto out; 1778 } 1779 1780 /* reacquire the objects */ 1781 err = eb_lookup_vmas(eb); 1782 if (err) 1783 goto err; 1784 1785 GEM_BUG_ON(!eb->batch); 1786 1787 list_for_each_entry(vma, &eb->relocs, reloc_link) { 1788 if (!have_copy) { 1789 pagefault_disable(); 1790 err = eb_relocate_vma(eb, vma); 1791 pagefault_enable(); 1792 if (err) 1793 goto repeat; 1794 } else { 1795 err = eb_relocate_vma_slow(eb, vma); 1796 if (err) 1797 goto err; 1798 } 1799 } 1800 1801 /* 1802 * Leave the user relocations as are, this is the painfully slow path, 1803 * and we want to avoid the complication of dropping the lock whilst 1804 * having buffers reserved in the aperture and so causing spurious 1805 * ENOSPC for random operations. 1806 */ 1807 1808 err: 1809 if (err == -EAGAIN) 1810 goto repeat; 1811 1812 out: 1813 if (have_copy) { 1814 const unsigned int count = eb->buffer_count; 1815 unsigned int i; 1816 1817 for (i = 0; i < count; i++) { 1818 const struct drm_i915_gem_exec_object2 *entry = 1819 &eb->exec[i]; 1820 struct drm_i915_gem_relocation_entry *relocs; 1821 1822 if (!entry->relocation_count) 1823 continue; 1824 1825 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1826 kvfree(relocs); 1827 } 1828 } 1829 1830 return err; 1831 } 1832 1833 static int eb_relocate(struct i915_execbuffer *eb) 1834 { 1835 if (eb_lookup_vmas(eb)) 1836 goto slow; 1837 1838 /* The objects are in their final locations, apply the relocations. */ 1839 if (eb->args->flags & __EXEC_HAS_RELOC) { 1840 struct i915_vma *vma; 1841 1842 list_for_each_entry(vma, &eb->relocs, reloc_link) { 1843 if (eb_relocate_vma(eb, vma)) 1844 goto slow; 1845 } 1846 } 1847 1848 return 0; 1849 1850 slow: 1851 return eb_relocate_slow(eb); 1852 } 1853 1854 static int eb_move_to_gpu(struct i915_execbuffer *eb) 1855 { 1856 const unsigned int count = eb->buffer_count; 1857 struct ww_acquire_ctx acquire; 1858 unsigned int i; 1859 int err = 0; 1860 1861 ww_acquire_init(&acquire, &reservation_ww_class); 1862 1863 for (i = 0; i < count; i++) { 1864 struct i915_vma *vma = eb->vma[i]; 1865 1866 err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire); 1867 if (!err) 1868 continue; 1869 1870 GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */ 1871 1872 if (err == -EDEADLK) { 1873 GEM_BUG_ON(i == 0); 1874 do { 1875 int j = i - 1; 1876 1877 ww_mutex_unlock(&eb->vma[j]->resv->lock); 1878 1879 swap(eb->flags[i], eb->flags[j]); 1880 swap(eb->vma[i], eb->vma[j]); 1881 eb->vma[i]->exec_flags = &eb->flags[i]; 1882 } while (--i); 1883 GEM_BUG_ON(vma != eb->vma[0]); 1884 vma->exec_flags = &eb->flags[0]; 1885 1886 err = ww_mutex_lock_slow_interruptible(&vma->resv->lock, 1887 &acquire); 1888 } 1889 if (err) 1890 break; 1891 } 1892 ww_acquire_done(&acquire); 1893 1894 while (i--) { 1895 unsigned int flags = eb->flags[i]; 1896 struct i915_vma *vma = eb->vma[i]; 1897 struct drm_i915_gem_object *obj = vma->obj; 1898 1899 assert_vma_held(vma); 1900 1901 if (flags & EXEC_OBJECT_CAPTURE) { 1902 struct i915_capture_list *capture; 1903 1904 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 1905 if (capture) { 1906 capture->next = eb->request->capture_list; 1907 capture->vma = vma; 1908 eb->request->capture_list = capture; 1909 } 1910 } 1911 1912 /* 1913 * If the GPU is not _reading_ through the CPU cache, we need 1914 * to make sure that any writes (both previous GPU writes from 1915 * before a change in snooping levels and normal CPU writes) 1916 * caught in that cache are flushed to main memory. 1917 * 1918 * We want to say 1919 * obj->cache_dirty && 1920 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) 1921 * but gcc's optimiser doesn't handle that as well and emits 1922 * two jumps instead of one. Maybe one day... 1923 */ 1924 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 1925 if (i915_gem_clflush_object(obj, 0)) 1926 flags &= ~EXEC_OBJECT_ASYNC; 1927 } 1928 1929 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) { 1930 err = i915_request_await_object 1931 (eb->request, obj, flags & EXEC_OBJECT_WRITE); 1932 } 1933 1934 if (err == 0) 1935 err = i915_vma_move_to_active(vma, eb->request, flags); 1936 1937 i915_vma_unlock(vma); 1938 1939 __eb_unreserve_vma(vma, flags); 1940 vma->exec_flags = NULL; 1941 1942 if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) 1943 i915_vma_put(vma); 1944 } 1945 ww_acquire_fini(&acquire); 1946 1947 if (unlikely(err)) 1948 goto err_skip; 1949 1950 eb->exec = NULL; 1951 1952 /* Unconditionally flush any chipset caches (for streaming writes). */ 1953 intel_gt_chipset_flush(eb->engine->gt); 1954 return 0; 1955 1956 err_skip: 1957 i915_request_skip(eb->request, err); 1958 return err; 1959 } 1960 1961 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 1962 { 1963 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 1964 return false; 1965 1966 /* Kernel clipping was a DRI1 misfeature */ 1967 if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { 1968 if (exec->num_cliprects || exec->cliprects_ptr) 1969 return false; 1970 } 1971 1972 if (exec->DR4 == 0xffffffff) { 1973 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); 1974 exec->DR4 = 0; 1975 } 1976 if (exec->DR1 || exec->DR4) 1977 return false; 1978 1979 if ((exec->batch_start_offset | exec->batch_len) & 0x7) 1980 return false; 1981 1982 return true; 1983 } 1984 1985 static int i915_reset_gen7_sol_offsets(struct i915_request *rq) 1986 { 1987 u32 *cs; 1988 int i; 1989 1990 if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { 1991 DRM_DEBUG("sol reset is gen7/rcs only\n"); 1992 return -EINVAL; 1993 } 1994 1995 cs = intel_ring_begin(rq, 4 * 2 + 2); 1996 if (IS_ERR(cs)) 1997 return PTR_ERR(cs); 1998 1999 *cs++ = MI_LOAD_REGISTER_IMM(4); 2000 for (i = 0; i < 4; i++) { 2001 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 2002 *cs++ = 0; 2003 } 2004 *cs++ = MI_NOOP; 2005 intel_ring_advance(rq, cs); 2006 2007 return 0; 2008 } 2009 2010 static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) 2011 { 2012 struct drm_i915_gem_object *shadow_batch_obj; 2013 struct i915_vma *vma; 2014 int err; 2015 2016 shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, 2017 PAGE_ALIGN(eb->batch_len)); 2018 if (IS_ERR(shadow_batch_obj)) 2019 return ERR_CAST(shadow_batch_obj); 2020 2021 err = intel_engine_cmd_parser(eb->engine, 2022 eb->batch->obj, 2023 shadow_batch_obj, 2024 eb->batch_start_offset, 2025 eb->batch_len, 2026 is_master); 2027 if (err) { 2028 if (err == -EACCES) /* unhandled chained batch */ 2029 vma = NULL; 2030 else 2031 vma = ERR_PTR(err); 2032 goto out; 2033 } 2034 2035 vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); 2036 if (IS_ERR(vma)) 2037 goto out; 2038 2039 eb->vma[eb->buffer_count] = i915_vma_get(vma); 2040 eb->flags[eb->buffer_count] = 2041 __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; 2042 vma->exec_flags = &eb->flags[eb->buffer_count]; 2043 eb->buffer_count++; 2044 2045 out: 2046 i915_gem_object_unpin_pages(shadow_batch_obj); 2047 return vma; 2048 } 2049 2050 static void 2051 add_to_client(struct i915_request *rq, struct drm_file *file) 2052 { 2053 rq->file_priv = file->driver_priv; 2054 list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); 2055 } 2056 2057 static int eb_submit(struct i915_execbuffer *eb) 2058 { 2059 int err; 2060 2061 err = eb_move_to_gpu(eb); 2062 if (err) 2063 return err; 2064 2065 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 2066 err = i915_reset_gen7_sol_offsets(eb->request); 2067 if (err) 2068 return err; 2069 } 2070 2071 /* 2072 * After we completed waiting for other engines (using HW semaphores) 2073 * then we can signal that this request/batch is ready to run. This 2074 * allows us to determine if the batch is still waiting on the GPU 2075 * or actually running by checking the breadcrumb. 2076 */ 2077 if (eb->engine->emit_init_breadcrumb) { 2078 err = eb->engine->emit_init_breadcrumb(eb->request); 2079 if (err) 2080 return err; 2081 } 2082 2083 err = eb->engine->emit_bb_start(eb->request, 2084 eb->batch->node.start + 2085 eb->batch_start_offset, 2086 eb->batch_len, 2087 eb->batch_flags); 2088 if (err) 2089 return err; 2090 2091 return 0; 2092 } 2093 2094 /* 2095 * Find one BSD ring to dispatch the corresponding BSD command. 2096 * The engine index is returned. 2097 */ 2098 static unsigned int 2099 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, 2100 struct drm_file *file) 2101 { 2102 struct drm_i915_file_private *file_priv = file->driver_priv; 2103 2104 /* Check whether the file_priv has already selected one ring. */ 2105 if ((int)file_priv->bsd_engine < 0) 2106 file_priv->bsd_engine = atomic_fetch_xor(1, 2107 &dev_priv->mm.bsd_engine_dispatch_index); 2108 2109 return file_priv->bsd_engine; 2110 } 2111 2112 static const enum intel_engine_id user_ring_map[] = { 2113 [I915_EXEC_DEFAULT] = RCS0, 2114 [I915_EXEC_RENDER] = RCS0, 2115 [I915_EXEC_BLT] = BCS0, 2116 [I915_EXEC_BSD] = VCS0, 2117 [I915_EXEC_VEBOX] = VECS0 2118 }; 2119 2120 static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) 2121 { 2122 int err; 2123 2124 /* 2125 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2126 * EIO if the GPU is already wedged. 2127 */ 2128 err = intel_gt_terminally_wedged(ce->engine->gt); 2129 if (err) 2130 return err; 2131 2132 /* 2133 * Pinning the contexts may generate requests in order to acquire 2134 * GGTT space, so do this first before we reserve a seqno for 2135 * ourselves. 2136 */ 2137 err = intel_context_pin(ce); 2138 if (err) 2139 return err; 2140 2141 eb->engine = ce->engine; 2142 eb->context = ce; 2143 return 0; 2144 } 2145 2146 static void eb_unpin_context(struct i915_execbuffer *eb) 2147 { 2148 intel_context_unpin(eb->context); 2149 } 2150 2151 static unsigned int 2152 eb_select_legacy_ring(struct i915_execbuffer *eb, 2153 struct drm_file *file, 2154 struct drm_i915_gem_execbuffer2 *args) 2155 { 2156 struct drm_i915_private *i915 = eb->i915; 2157 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; 2158 2159 if (user_ring_id != I915_EXEC_BSD && 2160 (args->flags & I915_EXEC_BSD_MASK)) { 2161 DRM_DEBUG("execbuf with non bsd ring but with invalid " 2162 "bsd dispatch flags: %d\n", (int)(args->flags)); 2163 return -1; 2164 } 2165 2166 if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { 2167 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; 2168 2169 if (bsd_idx == I915_EXEC_BSD_DEFAULT) { 2170 bsd_idx = gen8_dispatch_bsd_engine(i915, file); 2171 } else if (bsd_idx >= I915_EXEC_BSD_RING1 && 2172 bsd_idx <= I915_EXEC_BSD_RING2) { 2173 bsd_idx >>= I915_EXEC_BSD_SHIFT; 2174 bsd_idx--; 2175 } else { 2176 DRM_DEBUG("execbuf with unknown bsd ring: %u\n", 2177 bsd_idx); 2178 return -1; 2179 } 2180 2181 return _VCS(bsd_idx); 2182 } 2183 2184 if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { 2185 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); 2186 return -1; 2187 } 2188 2189 return user_ring_map[user_ring_id]; 2190 } 2191 2192 static int 2193 eb_select_engine(struct i915_execbuffer *eb, 2194 struct drm_file *file, 2195 struct drm_i915_gem_execbuffer2 *args) 2196 { 2197 struct intel_context *ce; 2198 unsigned int idx; 2199 int err; 2200 2201 if (i915_gem_context_user_engines(eb->gem_context)) 2202 idx = args->flags & I915_EXEC_RING_MASK; 2203 else 2204 idx = eb_select_legacy_ring(eb, file, args); 2205 2206 ce = i915_gem_context_get_engine(eb->gem_context, idx); 2207 if (IS_ERR(ce)) 2208 return PTR_ERR(ce); 2209 2210 err = eb_pin_context(eb, ce); 2211 intel_context_put(ce); 2212 2213 return err; 2214 } 2215 2216 static void 2217 __free_fence_array(struct drm_syncobj **fences, unsigned int n) 2218 { 2219 while (n--) 2220 drm_syncobj_put(ptr_mask_bits(fences[n], 2)); 2221 kvfree(fences); 2222 } 2223 2224 static struct drm_syncobj ** 2225 get_fence_array(struct drm_i915_gem_execbuffer2 *args, 2226 struct drm_file *file) 2227 { 2228 const unsigned long nfences = args->num_cliprects; 2229 struct drm_i915_gem_exec_fence __user *user; 2230 struct drm_syncobj **fences; 2231 unsigned long n; 2232 int err; 2233 2234 if (!(args->flags & I915_EXEC_FENCE_ARRAY)) 2235 return NULL; 2236 2237 /* Check multiplication overflow for access_ok() and kvmalloc_array() */ 2238 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); 2239 if (nfences > min_t(unsigned long, 2240 ULONG_MAX / sizeof(*user), 2241 SIZE_MAX / sizeof(*fences))) 2242 return ERR_PTR(-EINVAL); 2243 2244 user = u64_to_user_ptr(args->cliprects_ptr); 2245 if (!access_ok(user, nfences * sizeof(*user))) 2246 return ERR_PTR(-EFAULT); 2247 2248 fences = kvmalloc_array(nfences, sizeof(*fences), 2249 __GFP_NOWARN | GFP_KERNEL); 2250 if (!fences) 2251 return ERR_PTR(-ENOMEM); 2252 2253 for (n = 0; n < nfences; n++) { 2254 struct drm_i915_gem_exec_fence fence; 2255 struct drm_syncobj *syncobj; 2256 2257 if (__copy_from_user(&fence, user++, sizeof(fence))) { 2258 err = -EFAULT; 2259 goto err; 2260 } 2261 2262 if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { 2263 err = -EINVAL; 2264 goto err; 2265 } 2266 2267 syncobj = drm_syncobj_find(file, fence.handle); 2268 if (!syncobj) { 2269 DRM_DEBUG("Invalid syncobj handle provided\n"); 2270 err = -ENOENT; 2271 goto err; 2272 } 2273 2274 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & 2275 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); 2276 2277 fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); 2278 } 2279 2280 return fences; 2281 2282 err: 2283 __free_fence_array(fences, n); 2284 return ERR_PTR(err); 2285 } 2286 2287 static void 2288 put_fence_array(struct drm_i915_gem_execbuffer2 *args, 2289 struct drm_syncobj **fences) 2290 { 2291 if (fences) 2292 __free_fence_array(fences, args->num_cliprects); 2293 } 2294 2295 static int 2296 await_fence_array(struct i915_execbuffer *eb, 2297 struct drm_syncobj **fences) 2298 { 2299 const unsigned int nfences = eb->args->num_cliprects; 2300 unsigned int n; 2301 int err; 2302 2303 for (n = 0; n < nfences; n++) { 2304 struct drm_syncobj *syncobj; 2305 struct dma_fence *fence; 2306 unsigned int flags; 2307 2308 syncobj = ptr_unpack_bits(fences[n], &flags, 2); 2309 if (!(flags & I915_EXEC_FENCE_WAIT)) 2310 continue; 2311 2312 fence = drm_syncobj_fence_get(syncobj); 2313 if (!fence) 2314 return -EINVAL; 2315 2316 err = i915_request_await_dma_fence(eb->request, fence); 2317 dma_fence_put(fence); 2318 if (err < 0) 2319 return err; 2320 } 2321 2322 return 0; 2323 } 2324 2325 static void 2326 signal_fence_array(struct i915_execbuffer *eb, 2327 struct drm_syncobj **fences) 2328 { 2329 const unsigned int nfences = eb->args->num_cliprects; 2330 struct dma_fence * const fence = &eb->request->fence; 2331 unsigned int n; 2332 2333 for (n = 0; n < nfences; n++) { 2334 struct drm_syncobj *syncobj; 2335 unsigned int flags; 2336 2337 syncobj = ptr_unpack_bits(fences[n], &flags, 2); 2338 if (!(flags & I915_EXEC_FENCE_SIGNAL)) 2339 continue; 2340 2341 drm_syncobj_replace_fence(syncobj, fence); 2342 } 2343 } 2344 2345 static int 2346 i915_gem_do_execbuffer(struct drm_device *dev, 2347 struct drm_file *file, 2348 struct drm_i915_gem_execbuffer2 *args, 2349 struct drm_i915_gem_exec_object2 *exec, 2350 struct drm_syncobj **fences) 2351 { 2352 struct i915_execbuffer eb; 2353 struct dma_fence *in_fence = NULL; 2354 struct dma_fence *exec_fence = NULL; 2355 struct sync_file *out_fence = NULL; 2356 int out_fence_fd = -1; 2357 int err; 2358 2359 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); 2360 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & 2361 ~__EXEC_OBJECT_UNKNOWN_FLAGS); 2362 2363 eb.i915 = to_i915(dev); 2364 eb.file = file; 2365 eb.args = args; 2366 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) 2367 args->flags |= __EXEC_HAS_RELOC; 2368 2369 eb.exec = exec; 2370 eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); 2371 eb.vma[0] = NULL; 2372 eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); 2373 2374 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 2375 reloc_cache_init(&eb.reloc_cache, eb.i915); 2376 2377 eb.buffer_count = args->buffer_count; 2378 eb.batch_start_offset = args->batch_start_offset; 2379 eb.batch_len = args->batch_len; 2380 2381 eb.batch_flags = 0; 2382 if (args->flags & I915_EXEC_SECURE) { 2383 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 2384 return -EPERM; 2385 2386 eb.batch_flags |= I915_DISPATCH_SECURE; 2387 } 2388 if (args->flags & I915_EXEC_IS_PINNED) 2389 eb.batch_flags |= I915_DISPATCH_PINNED; 2390 2391 if (args->flags & I915_EXEC_FENCE_IN) { 2392 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 2393 if (!in_fence) 2394 return -EINVAL; 2395 } 2396 2397 if (args->flags & I915_EXEC_FENCE_SUBMIT) { 2398 if (in_fence) { 2399 err = -EINVAL; 2400 goto err_in_fence; 2401 } 2402 2403 exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); 2404 if (!exec_fence) { 2405 err = -EINVAL; 2406 goto err_in_fence; 2407 } 2408 } 2409 2410 if (args->flags & I915_EXEC_FENCE_OUT) { 2411 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 2412 if (out_fence_fd < 0) { 2413 err = out_fence_fd; 2414 goto err_exec_fence; 2415 } 2416 } 2417 2418 err = eb_create(&eb); 2419 if (err) 2420 goto err_out_fence; 2421 2422 GEM_BUG_ON(!eb.lut_size); 2423 2424 err = eb_select_context(&eb); 2425 if (unlikely(err)) 2426 goto err_destroy; 2427 2428 /* 2429 * Take a local wakeref for preparing to dispatch the execbuf as 2430 * we expect to access the hardware fairly frequently in the 2431 * process. Upon first dispatch, we acquire another prolonged 2432 * wakeref that we hold until the GPU has been idle for at least 2433 * 100ms. 2434 */ 2435 intel_gt_pm_get(&eb.i915->gt); 2436 2437 err = i915_mutex_lock_interruptible(dev); 2438 if (err) 2439 goto err_rpm; 2440 2441 err = eb_select_engine(&eb, file, args); 2442 if (unlikely(err)) 2443 goto err_unlock; 2444 2445 err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ 2446 if (unlikely(err)) 2447 goto err_engine; 2448 2449 err = eb_relocate(&eb); 2450 if (err) { 2451 /* 2452 * If the user expects the execobject.offset and 2453 * reloc.presumed_offset to be an exact match, 2454 * as for using NO_RELOC, then we cannot update 2455 * the execobject.offset until we have completed 2456 * relocation. 2457 */ 2458 args->flags &= ~__EXEC_HAS_RELOC; 2459 goto err_vma; 2460 } 2461 2462 if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { 2463 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 2464 err = -EINVAL; 2465 goto err_vma; 2466 } 2467 if (eb.batch_start_offset > eb.batch->size || 2468 eb.batch_len > eb.batch->size - eb.batch_start_offset) { 2469 DRM_DEBUG("Attempting to use out-of-bounds batch\n"); 2470 err = -EINVAL; 2471 goto err_vma; 2472 } 2473 2474 if (eb_use_cmdparser(&eb)) { 2475 struct i915_vma *vma; 2476 2477 vma = eb_parse(&eb, drm_is_current_master(file)); 2478 if (IS_ERR(vma)) { 2479 err = PTR_ERR(vma); 2480 goto err_vma; 2481 } 2482 2483 if (vma) { 2484 /* 2485 * Batch parsed and accepted: 2486 * 2487 * Set the DISPATCH_SECURE bit to remove the NON_SECURE 2488 * bit from MI_BATCH_BUFFER_START commands issued in 2489 * the dispatch_execbuffer implementations. We 2490 * specifically don't want that set on batches the 2491 * command parser has accepted. 2492 */ 2493 eb.batch_flags |= I915_DISPATCH_SECURE; 2494 eb.batch_start_offset = 0; 2495 eb.batch = vma; 2496 } 2497 } 2498 2499 if (eb.batch_len == 0) 2500 eb.batch_len = eb.batch->size - eb.batch_start_offset; 2501 2502 /* 2503 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2504 * batch" bit. Hence we need to pin secure batches into the global gtt. 2505 * hsw should have this fixed, but bdw mucks it up again. */ 2506 if (eb.batch_flags & I915_DISPATCH_SECURE) { 2507 struct i915_vma *vma; 2508 2509 /* 2510 * So on first glance it looks freaky that we pin the batch here 2511 * outside of the reservation loop. But: 2512 * - The batch is already pinned into the relevant ppgtt, so we 2513 * already have the backing storage fully allocated. 2514 * - No other BO uses the global gtt (well contexts, but meh), 2515 * so we don't really have issues with multiple objects not 2516 * fitting due to fragmentation. 2517 * So this is actually safe. 2518 */ 2519 vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); 2520 if (IS_ERR(vma)) { 2521 err = PTR_ERR(vma); 2522 goto err_vma; 2523 } 2524 2525 eb.batch = vma; 2526 } 2527 2528 /* All GPU relocation batches must be submitted prior to the user rq */ 2529 GEM_BUG_ON(eb.reloc_cache.rq); 2530 2531 /* Allocate a request for this batch buffer nice and early. */ 2532 eb.request = i915_request_create(eb.context); 2533 if (IS_ERR(eb.request)) { 2534 err = PTR_ERR(eb.request); 2535 goto err_batch_unpin; 2536 } 2537 2538 if (in_fence) { 2539 err = i915_request_await_dma_fence(eb.request, in_fence); 2540 if (err < 0) 2541 goto err_request; 2542 } 2543 2544 if (exec_fence) { 2545 err = i915_request_await_execution(eb.request, exec_fence, 2546 eb.engine->bond_execute); 2547 if (err < 0) 2548 goto err_request; 2549 } 2550 2551 if (fences) { 2552 err = await_fence_array(&eb, fences); 2553 if (err) 2554 goto err_request; 2555 } 2556 2557 if (out_fence_fd != -1) { 2558 out_fence = sync_file_create(&eb.request->fence); 2559 if (!out_fence) { 2560 err = -ENOMEM; 2561 goto err_request; 2562 } 2563 } 2564 2565 /* 2566 * Whilst this request exists, batch_obj will be on the 2567 * active_list, and so will hold the active reference. Only when this 2568 * request is retired will the the batch_obj be moved onto the 2569 * inactive_list and lose its active reference. Hence we do not need 2570 * to explicitly hold another reference here. 2571 */ 2572 eb.request->batch = eb.batch; 2573 2574 trace_i915_request_queue(eb.request, eb.batch_flags); 2575 err = eb_submit(&eb); 2576 err_request: 2577 add_to_client(eb.request, file); 2578 i915_request_add(eb.request); 2579 2580 if (fences) 2581 signal_fence_array(&eb, fences); 2582 2583 if (out_fence) { 2584 if (err == 0) { 2585 fd_install(out_fence_fd, out_fence->file); 2586 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ 2587 args->rsvd2 |= (u64)out_fence_fd << 32; 2588 out_fence_fd = -1; 2589 } else { 2590 fput(out_fence->file); 2591 } 2592 } 2593 2594 err_batch_unpin: 2595 if (eb.batch_flags & I915_DISPATCH_SECURE) 2596 i915_vma_unpin(eb.batch); 2597 err_vma: 2598 if (eb.exec) 2599 eb_release_vmas(&eb); 2600 err_engine: 2601 eb_unpin_context(&eb); 2602 err_unlock: 2603 mutex_unlock(&dev->struct_mutex); 2604 err_rpm: 2605 intel_gt_pm_put(&eb.i915->gt); 2606 i915_gem_context_put(eb.gem_context); 2607 err_destroy: 2608 eb_destroy(&eb); 2609 err_out_fence: 2610 if (out_fence_fd != -1) 2611 put_unused_fd(out_fence_fd); 2612 err_exec_fence: 2613 dma_fence_put(exec_fence); 2614 err_in_fence: 2615 dma_fence_put(in_fence); 2616 return err; 2617 } 2618 2619 static size_t eb_element_size(void) 2620 { 2621 return (sizeof(struct drm_i915_gem_exec_object2) + 2622 sizeof(struct i915_vma *) + 2623 sizeof(unsigned int)); 2624 } 2625 2626 static bool check_buffer_count(size_t count) 2627 { 2628 const size_t sz = eb_element_size(); 2629 2630 /* 2631 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup 2632 * array size (see eb_create()). Otherwise, we can accept an array as 2633 * large as can be addressed (though use large arrays at your peril)! 2634 */ 2635 2636 return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); 2637 } 2638 2639 /* 2640 * Legacy execbuffer just creates an exec2 list from the original exec object 2641 * list array and passes it to the real function. 2642 */ 2643 int 2644 i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, 2645 struct drm_file *file) 2646 { 2647 struct drm_i915_gem_execbuffer *args = data; 2648 struct drm_i915_gem_execbuffer2 exec2; 2649 struct drm_i915_gem_exec_object *exec_list = NULL; 2650 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 2651 const size_t count = args->buffer_count; 2652 unsigned int i; 2653 int err; 2654 2655 if (!check_buffer_count(count)) { 2656 DRM_DEBUG("execbuf2 with %zd buffers\n", count); 2657 return -EINVAL; 2658 } 2659 2660 exec2.buffers_ptr = args->buffers_ptr; 2661 exec2.buffer_count = args->buffer_count; 2662 exec2.batch_start_offset = args->batch_start_offset; 2663 exec2.batch_len = args->batch_len; 2664 exec2.DR1 = args->DR1; 2665 exec2.DR4 = args->DR4; 2666 exec2.num_cliprects = args->num_cliprects; 2667 exec2.cliprects_ptr = args->cliprects_ptr; 2668 exec2.flags = I915_EXEC_RENDER; 2669 i915_execbuffer2_set_context_id(exec2, 0); 2670 2671 if (!i915_gem_check_execbuffer(&exec2)) 2672 return -EINVAL; 2673 2674 /* Copy in the exec list from userland */ 2675 exec_list = kvmalloc_array(count, sizeof(*exec_list), 2676 __GFP_NOWARN | GFP_KERNEL); 2677 exec2_list = kvmalloc_array(count + 1, eb_element_size(), 2678 __GFP_NOWARN | GFP_KERNEL); 2679 if (exec_list == NULL || exec2_list == NULL) { 2680 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 2681 args->buffer_count); 2682 kvfree(exec_list); 2683 kvfree(exec2_list); 2684 return -ENOMEM; 2685 } 2686 err = copy_from_user(exec_list, 2687 u64_to_user_ptr(args->buffers_ptr), 2688 sizeof(*exec_list) * count); 2689 if (err) { 2690 DRM_DEBUG("copy %d exec entries failed %d\n", 2691 args->buffer_count, err); 2692 kvfree(exec_list); 2693 kvfree(exec2_list); 2694 return -EFAULT; 2695 } 2696 2697 for (i = 0; i < args->buffer_count; i++) { 2698 exec2_list[i].handle = exec_list[i].handle; 2699 exec2_list[i].relocation_count = exec_list[i].relocation_count; 2700 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 2701 exec2_list[i].alignment = exec_list[i].alignment; 2702 exec2_list[i].offset = exec_list[i].offset; 2703 if (INTEL_GEN(to_i915(dev)) < 4) 2704 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 2705 else 2706 exec2_list[i].flags = 0; 2707 } 2708 2709 err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); 2710 if (exec2.flags & __EXEC_HAS_RELOC) { 2711 struct drm_i915_gem_exec_object __user *user_exec_list = 2712 u64_to_user_ptr(args->buffers_ptr); 2713 2714 /* Copy the new buffer offsets back to the user's exec list. */ 2715 for (i = 0; i < args->buffer_count; i++) { 2716 if (!(exec2_list[i].offset & UPDATE)) 2717 continue; 2718 2719 exec2_list[i].offset = 2720 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); 2721 exec2_list[i].offset &= PIN_OFFSET_MASK; 2722 if (__copy_to_user(&user_exec_list[i].offset, 2723 &exec2_list[i].offset, 2724 sizeof(user_exec_list[i].offset))) 2725 break; 2726 } 2727 } 2728 2729 kvfree(exec_list); 2730 kvfree(exec2_list); 2731 return err; 2732 } 2733 2734 int 2735 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, 2736 struct drm_file *file) 2737 { 2738 struct drm_i915_gem_execbuffer2 *args = data; 2739 struct drm_i915_gem_exec_object2 *exec2_list; 2740 struct drm_syncobj **fences = NULL; 2741 const size_t count = args->buffer_count; 2742 int err; 2743 2744 if (!check_buffer_count(count)) { 2745 DRM_DEBUG("execbuf2 with %zd buffers\n", count); 2746 return -EINVAL; 2747 } 2748 2749 if (!i915_gem_check_execbuffer(args)) 2750 return -EINVAL; 2751 2752 /* Allocate an extra slot for use by the command parser */ 2753 exec2_list = kvmalloc_array(count + 1, eb_element_size(), 2754 __GFP_NOWARN | GFP_KERNEL); 2755 if (exec2_list == NULL) { 2756 DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", 2757 count); 2758 return -ENOMEM; 2759 } 2760 if (copy_from_user(exec2_list, 2761 u64_to_user_ptr(args->buffers_ptr), 2762 sizeof(*exec2_list) * count)) { 2763 DRM_DEBUG("copy %zd exec entries failed\n", count); 2764 kvfree(exec2_list); 2765 return -EFAULT; 2766 } 2767 2768 if (args->flags & I915_EXEC_FENCE_ARRAY) { 2769 fences = get_fence_array(args, file); 2770 if (IS_ERR(fences)) { 2771 kvfree(exec2_list); 2772 return PTR_ERR(fences); 2773 } 2774 } 2775 2776 err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); 2777 2778 /* 2779 * Now that we have begun execution of the batchbuffer, we ignore 2780 * any new error after this point. Also given that we have already 2781 * updated the associated relocations, we try to write out the current 2782 * object locations irrespective of any error. 2783 */ 2784 if (args->flags & __EXEC_HAS_RELOC) { 2785 struct drm_i915_gem_exec_object2 __user *user_exec_list = 2786 u64_to_user_ptr(args->buffers_ptr); 2787 unsigned int i; 2788 2789 /* Copy the new buffer offsets back to the user's exec list. */ 2790 /* 2791 * Note: count * sizeof(*user_exec_list) does not overflow, 2792 * because we checked 'count' in check_buffer_count(). 2793 * 2794 * And this range already got effectively checked earlier 2795 * when we did the "copy_from_user()" above. 2796 */ 2797 if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list))) 2798 goto end; 2799 2800 for (i = 0; i < args->buffer_count; i++) { 2801 if (!(exec2_list[i].offset & UPDATE)) 2802 continue; 2803 2804 exec2_list[i].offset = 2805 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); 2806 unsafe_put_user(exec2_list[i].offset, 2807 &user_exec_list[i].offset, 2808 end_user); 2809 } 2810 end_user: 2811 user_access_end(); 2812 end:; 2813 } 2814 2815 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; 2816 put_fence_array(args, fences); 2817 kvfree(exec2_list); 2818 return err; 2819 } 2820