1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008,2010 Intel Corporation
5  */
6 
7 #include <linux/intel-iommu.h>
8 #include <linux/dma-resv.h>
9 #include <linux/sync_file.h>
10 #include <linux/uaccess.h>
11 
12 #include <drm/drm_syncobj.h>
13 #include <drm/i915_drm.h>
14 
15 #include "display/intel_frontbuffer.h"
16 
17 #include "gem/i915_gem_ioctls.h"
18 #include "gt/intel_context.h"
19 #include "gt/intel_engine_pool.h"
20 #include "gt/intel_gt.h"
21 #include "gt/intel_gt_pm.h"
22 #include "gt/intel_ring.h"
23 
24 #include "i915_drv.h"
25 #include "i915_gem_clflush.h"
26 #include "i915_gem_context.h"
27 #include "i915_gem_ioctls.h"
28 #include "i915_trace.h"
29 
30 enum {
31 	FORCE_CPU_RELOC = 1,
32 	FORCE_GTT_RELOC,
33 	FORCE_GPU_RELOC,
34 #define DBG_FORCE_RELOC 0 /* choose one of the above! */
35 };
36 
37 #define __EXEC_OBJECT_HAS_REF		BIT(31)
38 #define __EXEC_OBJECT_HAS_PIN		BIT(30)
39 #define __EXEC_OBJECT_HAS_FENCE		BIT(29)
40 #define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
41 #define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
42 #define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
43 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
44 
45 #define __EXEC_HAS_RELOC	BIT(31)
46 #define __EXEC_VALIDATED	BIT(30)
47 #define __EXEC_INTERNAL_FLAGS	(~0u << 30)
48 #define UPDATE			PIN_OFFSET_FIXED
49 
50 #define BATCH_OFFSET_BIAS (256*1024)
51 
52 #define __I915_EXEC_ILLEGAL_FLAGS \
53 	(__I915_EXEC_UNKNOWN_FLAGS | \
54 	 I915_EXEC_CONSTANTS_MASK  | \
55 	 I915_EXEC_RESOURCE_STREAMER)
56 
57 /* Catch emission of unexpected errors for CI! */
58 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
59 #undef EINVAL
60 #define EINVAL ({ \
61 	DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
62 	22; \
63 })
64 #endif
65 
66 /**
67  * DOC: User command execution
68  *
69  * Userspace submits commands to be executed on the GPU as an instruction
70  * stream within a GEM object we call a batchbuffer. This instructions may
71  * refer to other GEM objects containing auxiliary state such as kernels,
72  * samplers, render targets and even secondary batchbuffers. Userspace does
73  * not know where in the GPU memory these objects reside and so before the
74  * batchbuffer is passed to the GPU for execution, those addresses in the
75  * batchbuffer and auxiliary objects are updated. This is known as relocation,
76  * or patching. To try and avoid having to relocate each object on the next
77  * execution, userspace is told the location of those objects in this pass,
78  * but this remains just a hint as the kernel may choose a new location for
79  * any object in the future.
80  *
81  * At the level of talking to the hardware, submitting a batchbuffer for the
82  * GPU to execute is to add content to a buffer from which the HW
83  * command streamer is reading.
84  *
85  * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
86  *    Execlists, this command is not placed on the same buffer as the
87  *    remaining items.
88  *
89  * 2. Add a command to invalidate caches to the buffer.
90  *
91  * 3. Add a batchbuffer start command to the buffer; the start command is
92  *    essentially a token together with the GPU address of the batchbuffer
93  *    to be executed.
94  *
95  * 4. Add a pipeline flush to the buffer.
96  *
97  * 5. Add a memory write command to the buffer to record when the GPU
98  *    is done executing the batchbuffer. The memory write writes the
99  *    global sequence number of the request, ``i915_request::global_seqno``;
100  *    the i915 driver uses the current value in the register to determine
101  *    if the GPU has completed the batchbuffer.
102  *
103  * 6. Add a user interrupt command to the buffer. This command instructs
104  *    the GPU to issue an interrupt when the command, pipeline flush and
105  *    memory write are completed.
106  *
107  * 7. Inform the hardware of the additional commands added to the buffer
108  *    (by updating the tail pointer).
109  *
110  * Processing an execbuf ioctl is conceptually split up into a few phases.
111  *
112  * 1. Validation - Ensure all the pointers, handles and flags are valid.
113  * 2. Reservation - Assign GPU address space for every object
114  * 3. Relocation - Update any addresses to point to the final locations
115  * 4. Serialisation - Order the request with respect to its dependencies
116  * 5. Construction - Construct a request to execute the batchbuffer
117  * 6. Submission (at some point in the future execution)
118  *
119  * Reserving resources for the execbuf is the most complicated phase. We
120  * neither want to have to migrate the object in the address space, nor do
121  * we want to have to update any relocations pointing to this object. Ideally,
122  * we want to leave the object where it is and for all the existing relocations
123  * to match. If the object is given a new address, or if userspace thinks the
124  * object is elsewhere, we have to parse all the relocation entries and update
125  * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
126  * all the target addresses in all of its objects match the value in the
127  * relocation entries and that they all match the presumed offsets given by the
128  * list of execbuffer objects. Using this knowledge, we know that if we haven't
129  * moved any buffers, all the relocation entries are valid and we can skip
130  * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
131  * hang.) The requirement for using I915_EXEC_NO_RELOC are:
132  *
133  *      The addresses written in the objects must match the corresponding
134  *      reloc.presumed_offset which in turn must match the corresponding
135  *      execobject.offset.
136  *
137  *      Any render targets written to in the batch must be flagged with
138  *      EXEC_OBJECT_WRITE.
139  *
140  *      To avoid stalling, execobject.offset should match the current
141  *      address of that object within the active context.
142  *
143  * The reservation is done is multiple phases. First we try and keep any
144  * object already bound in its current location - so as long as meets the
145  * constraints imposed by the new execbuffer. Any object left unbound after the
146  * first pass is then fitted into any available idle space. If an object does
147  * not fit, all objects are removed from the reservation and the process rerun
148  * after sorting the objects into a priority order (more difficult to fit
149  * objects are tried first). Failing that, the entire VM is cleared and we try
150  * to fit the execbuf once last time before concluding that it simply will not
151  * fit.
152  *
153  * A small complication to all of this is that we allow userspace not only to
154  * specify an alignment and a size for the object in the address space, but
155  * we also allow userspace to specify the exact offset. This objects are
156  * simpler to place (the location is known a priori) all we have to do is make
157  * sure the space is available.
158  *
159  * Once all the objects are in place, patching up the buried pointers to point
160  * to the final locations is a fairly simple job of walking over the relocation
161  * entry arrays, looking up the right address and rewriting the value into
162  * the object. Simple! ... The relocation entries are stored in user memory
163  * and so to access them we have to copy them into a local buffer. That copy
164  * has to avoid taking any pagefaults as they may lead back to a GEM object
165  * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
166  * the relocation into multiple passes. First we try to do everything within an
167  * atomic context (avoid the pagefaults) which requires that we never wait. If
168  * we detect that we may wait, or if we need to fault, then we have to fallback
169  * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
170  * bells yet?) Dropping the mutex means that we lose all the state we have
171  * built up so far for the execbuf and we must reset any global data. However,
172  * we do leave the objects pinned in their final locations - which is a
173  * potential issue for concurrent execbufs. Once we have left the mutex, we can
174  * allocate and copy all the relocation entries into a large array at our
175  * leisure, reacquire the mutex, reclaim all the objects and other state and
176  * then proceed to update any incorrect addresses with the objects.
177  *
178  * As we process the relocation entries, we maintain a record of whether the
179  * object is being written to. Using NORELOC, we expect userspace to provide
180  * this information instead. We also check whether we can skip the relocation
181  * by comparing the expected value inside the relocation entry with the target's
182  * final address. If they differ, we have to map the current object and rewrite
183  * the 4 or 8 byte pointer within.
184  *
185  * Serialising an execbuf is quite simple according to the rules of the GEM
186  * ABI. Execution within each context is ordered by the order of submission.
187  * Writes to any GEM object are in order of submission and are exclusive. Reads
188  * from a GEM object are unordered with respect to other reads, but ordered by
189  * writes. A write submitted after a read cannot occur before the read, and
190  * similarly any read submitted after a write cannot occur before the write.
191  * Writes are ordered between engines such that only one write occurs at any
192  * time (completing any reads beforehand) - using semaphores where available
193  * and CPU serialisation otherwise. Other GEM access obey the same rules, any
194  * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
195  * reads before starting, and any read (either using set-domain or pread) must
196  * flush all GPU writes before starting. (Note we only employ a barrier before,
197  * we currently rely on userspace not concurrently starting a new execution
198  * whilst reading or writing to an object. This may be an advantage or not
199  * depending on how much you trust userspace not to shoot themselves in the
200  * foot.) Serialisation may just result in the request being inserted into
201  * a DAG awaiting its turn, but most simple is to wait on the CPU until
202  * all dependencies are resolved.
203  *
204  * After all of that, is just a matter of closing the request and handing it to
205  * the hardware (well, leaving it in a queue to be executed). However, we also
206  * offer the ability for batchbuffers to be run with elevated privileges so
207  * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
208  * Before any batch is given extra privileges we first must check that it
209  * contains no nefarious instructions, we check that each instruction is from
210  * our whitelist and all registers are also from an allowed list. We first
211  * copy the user's batchbuffer to a shadow (so that the user doesn't have
212  * access to it, either by the CPU or GPU as we scan it) and then parse each
213  * instruction. If everything is ok, we set a flag telling the hardware to run
214  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
215  */
216 
217 struct i915_execbuffer {
218 	struct drm_i915_private *i915; /** i915 backpointer */
219 	struct drm_file *file; /** per-file lookup tables and limits */
220 	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
221 	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
222 	struct i915_vma **vma;
223 	unsigned int *flags;
224 
225 	struct intel_engine_cs *engine; /** engine to queue the request to */
226 	struct intel_context *context; /* logical state for the request */
227 	struct i915_gem_context *gem_context; /** caller's context */
228 
229 	struct i915_request *request; /** our request to build */
230 	struct i915_vma *batch; /** identity of the batch obj/vma */
231 
232 	/** actual size of execobj[] as we may extend it for the cmdparser */
233 	unsigned int buffer_count;
234 
235 	/** list of vma not yet bound during reservation phase */
236 	struct list_head unbound;
237 
238 	/** list of vma that have execobj.relocation_count */
239 	struct list_head relocs;
240 
241 	/**
242 	 * Track the most recently used object for relocations, as we
243 	 * frequently have to perform multiple relocations within the same
244 	 * obj/page
245 	 */
246 	struct reloc_cache {
247 		struct drm_mm_node node; /** temporary GTT binding */
248 		unsigned long vaddr; /** Current kmap address */
249 		unsigned long page; /** Currently mapped page index */
250 		unsigned int gen; /** Cached value of INTEL_GEN */
251 		bool use_64bit_reloc : 1;
252 		bool has_llc : 1;
253 		bool has_fence : 1;
254 		bool needs_unfenced : 1;
255 
256 		struct intel_context *ce;
257 		struct i915_request *rq;
258 		u32 *rq_cmd;
259 		unsigned int rq_size;
260 	} reloc_cache;
261 
262 	u64 invalid_flags; /** Set of execobj.flags that are invalid */
263 	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
264 
265 	u32 batch_start_offset; /** Location within object of batch */
266 	u32 batch_len; /** Length of batch within object */
267 	u32 batch_flags; /** Flags composed for emit_bb_start() */
268 
269 	/**
270 	 * Indicate either the size of the hastable used to resolve
271 	 * relocation handles, or if negative that we are using a direct
272 	 * index into the execobj[].
273 	 */
274 	int lut_size;
275 	struct hlist_head *buckets; /** ht for relocation handles */
276 };
277 
278 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
279 
280 /*
281  * Used to convert any address to canonical form.
282  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
283  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
284  * addresses to be in a canonical form:
285  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
286  * canonical form [63:48] == [47]."
287  */
288 #define GEN8_HIGH_ADDRESS_BIT 47
289 static inline u64 gen8_canonical_addr(u64 address)
290 {
291 	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
292 }
293 
294 static inline u64 gen8_noncanonical_addr(u64 address)
295 {
296 	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
297 }
298 
299 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
300 {
301 	return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
302 }
303 
304 static int eb_create(struct i915_execbuffer *eb)
305 {
306 	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
307 		unsigned int size = 1 + ilog2(eb->buffer_count);
308 
309 		/*
310 		 * Without a 1:1 association between relocation handles and
311 		 * the execobject[] index, we instead create a hashtable.
312 		 * We size it dynamically based on available memory, starting
313 		 * first with 1:1 assocative hash and scaling back until
314 		 * the allocation succeeds.
315 		 *
316 		 * Later on we use a positive lut_size to indicate we are
317 		 * using this hashtable, and a negative value to indicate a
318 		 * direct lookup.
319 		 */
320 		do {
321 			gfp_t flags;
322 
323 			/* While we can still reduce the allocation size, don't
324 			 * raise a warning and allow the allocation to fail.
325 			 * On the last pass though, we want to try as hard
326 			 * as possible to perform the allocation and warn
327 			 * if it fails.
328 			 */
329 			flags = GFP_KERNEL;
330 			if (size > 1)
331 				flags |= __GFP_NORETRY | __GFP_NOWARN;
332 
333 			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
334 					      flags);
335 			if (eb->buckets)
336 				break;
337 		} while (--size);
338 
339 		if (unlikely(!size))
340 			return -ENOMEM;
341 
342 		eb->lut_size = size;
343 	} else {
344 		eb->lut_size = -eb->buffer_count;
345 	}
346 
347 	return 0;
348 }
349 
350 static bool
351 eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
352 		 const struct i915_vma *vma,
353 		 unsigned int flags)
354 {
355 	if (vma->node.size < entry->pad_to_size)
356 		return true;
357 
358 	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
359 		return true;
360 
361 	if (flags & EXEC_OBJECT_PINNED &&
362 	    vma->node.start != entry->offset)
363 		return true;
364 
365 	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
366 	    vma->node.start < BATCH_OFFSET_BIAS)
367 		return true;
368 
369 	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
370 	    (vma->node.start + vma->node.size - 1) >> 32)
371 		return true;
372 
373 	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
374 	    !i915_vma_is_map_and_fenceable(vma))
375 		return true;
376 
377 	return false;
378 }
379 
380 static inline bool
381 eb_pin_vma(struct i915_execbuffer *eb,
382 	   const struct drm_i915_gem_exec_object2 *entry,
383 	   struct i915_vma *vma)
384 {
385 	unsigned int exec_flags = *vma->exec_flags;
386 	u64 pin_flags;
387 
388 	if (vma->node.size)
389 		pin_flags = vma->node.start;
390 	else
391 		pin_flags = entry->offset & PIN_OFFSET_MASK;
392 
393 	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
394 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
395 		pin_flags |= PIN_GLOBAL;
396 
397 	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
398 		return false;
399 
400 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
401 		if (unlikely(i915_vma_pin_fence(vma))) {
402 			i915_vma_unpin(vma);
403 			return false;
404 		}
405 
406 		if (vma->fence)
407 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
408 	}
409 
410 	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
411 	return !eb_vma_misplaced(entry, vma, exec_flags);
412 }
413 
414 static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
415 {
416 	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
417 
418 	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
419 		__i915_vma_unpin_fence(vma);
420 
421 	__i915_vma_unpin(vma);
422 }
423 
424 static inline void
425 eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
426 {
427 	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
428 		return;
429 
430 	__eb_unreserve_vma(vma, *flags);
431 	*flags &= ~__EXEC_OBJECT_RESERVED;
432 }
433 
434 static int
435 eb_validate_vma(struct i915_execbuffer *eb,
436 		struct drm_i915_gem_exec_object2 *entry,
437 		struct i915_vma *vma)
438 {
439 	if (unlikely(entry->flags & eb->invalid_flags))
440 		return -EINVAL;
441 
442 	if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
443 		return -EINVAL;
444 
445 	/*
446 	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
447 	 * any non-page-aligned or non-canonical addresses.
448 	 */
449 	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
450 		     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
451 		return -EINVAL;
452 
453 	/* pad_to_size was once a reserved field, so sanitize it */
454 	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
455 		if (unlikely(offset_in_page(entry->pad_to_size)))
456 			return -EINVAL;
457 	} else {
458 		entry->pad_to_size = 0;
459 	}
460 
461 	if (unlikely(vma->exec_flags)) {
462 		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
463 			  entry->handle, (int)(entry - eb->exec));
464 		return -EINVAL;
465 	}
466 
467 	/*
468 	 * From drm_mm perspective address space is continuous,
469 	 * so from this point we're always using non-canonical
470 	 * form internally.
471 	 */
472 	entry->offset = gen8_noncanonical_addr(entry->offset);
473 
474 	if (!eb->reloc_cache.has_fence) {
475 		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
476 	} else {
477 		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
478 		     eb->reloc_cache.needs_unfenced) &&
479 		    i915_gem_object_is_tiled(vma->obj))
480 			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
481 	}
482 
483 	if (!(entry->flags & EXEC_OBJECT_PINNED))
484 		entry->flags |= eb->context_flags;
485 
486 	return 0;
487 }
488 
489 static int
490 eb_add_vma(struct i915_execbuffer *eb,
491 	   unsigned int i, unsigned batch_idx,
492 	   struct i915_vma *vma)
493 {
494 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
495 	int err;
496 
497 	GEM_BUG_ON(i915_vma_is_closed(vma));
498 
499 	if (!(eb->args->flags & __EXEC_VALIDATED)) {
500 		err = eb_validate_vma(eb, entry, vma);
501 		if (unlikely(err))
502 			return err;
503 	}
504 
505 	if (eb->lut_size > 0) {
506 		vma->exec_handle = entry->handle;
507 		hlist_add_head(&vma->exec_node,
508 			       &eb->buckets[hash_32(entry->handle,
509 						    eb->lut_size)]);
510 	}
511 
512 	if (entry->relocation_count)
513 		list_add_tail(&vma->reloc_link, &eb->relocs);
514 
515 	/*
516 	 * Stash a pointer from the vma to execobj, so we can query its flags,
517 	 * size, alignment etc as provided by the user. Also we stash a pointer
518 	 * to the vma inside the execobj so that we can use a direct lookup
519 	 * to find the right target VMA when doing relocations.
520 	 */
521 	eb->vma[i] = vma;
522 	eb->flags[i] = entry->flags;
523 	vma->exec_flags = &eb->flags[i];
524 
525 	/*
526 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
527 	 * to negative relocation deltas. Usually that works out ok since the
528 	 * relocate address is still positive, except when the batch is placed
529 	 * very low in the GTT. Ensure this doesn't happen.
530 	 *
531 	 * Note that actual hangs have only been observed on gen7, but for
532 	 * paranoia do it everywhere.
533 	 */
534 	if (i == batch_idx) {
535 		if (entry->relocation_count &&
536 		    !(eb->flags[i] & EXEC_OBJECT_PINNED))
537 			eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
538 		if (eb->reloc_cache.has_fence)
539 			eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
540 
541 		eb->batch = vma;
542 	}
543 
544 	err = 0;
545 	if (eb_pin_vma(eb, entry, vma)) {
546 		if (entry->offset != vma->node.start) {
547 			entry->offset = vma->node.start | UPDATE;
548 			eb->args->flags |= __EXEC_HAS_RELOC;
549 		}
550 	} else {
551 		eb_unreserve_vma(vma, vma->exec_flags);
552 
553 		list_add_tail(&vma->exec_link, &eb->unbound);
554 		if (drm_mm_node_allocated(&vma->node))
555 			err = i915_vma_unbind(vma);
556 		if (unlikely(err))
557 			vma->exec_flags = NULL;
558 	}
559 	return err;
560 }
561 
562 static inline int use_cpu_reloc(const struct reloc_cache *cache,
563 				const struct drm_i915_gem_object *obj)
564 {
565 	if (!i915_gem_object_has_struct_page(obj))
566 		return false;
567 
568 	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
569 		return true;
570 
571 	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
572 		return false;
573 
574 	return (cache->has_llc ||
575 		obj->cache_dirty ||
576 		obj->cache_level != I915_CACHE_NONE);
577 }
578 
579 static int eb_reserve_vma(const struct i915_execbuffer *eb,
580 			  struct i915_vma *vma)
581 {
582 	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
583 	unsigned int exec_flags = *vma->exec_flags;
584 	u64 pin_flags;
585 	int err;
586 
587 	pin_flags = PIN_USER | PIN_NONBLOCK;
588 	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
589 		pin_flags |= PIN_GLOBAL;
590 
591 	/*
592 	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
593 	 * limit address to the first 4GBs for unflagged objects.
594 	 */
595 	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
596 		pin_flags |= PIN_ZONE_4G;
597 
598 	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
599 		pin_flags |= PIN_MAPPABLE;
600 
601 	if (exec_flags & EXEC_OBJECT_PINNED) {
602 		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
603 		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
604 	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
605 		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
606 	}
607 
608 	err = i915_vma_pin(vma,
609 			   entry->pad_to_size, entry->alignment,
610 			   pin_flags);
611 	if (err)
612 		return err;
613 
614 	if (entry->offset != vma->node.start) {
615 		entry->offset = vma->node.start | UPDATE;
616 		eb->args->flags |= __EXEC_HAS_RELOC;
617 	}
618 
619 	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
620 		err = i915_vma_pin_fence(vma);
621 		if (unlikely(err)) {
622 			i915_vma_unpin(vma);
623 			return err;
624 		}
625 
626 		if (vma->fence)
627 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
628 	}
629 
630 	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
631 	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
632 
633 	return 0;
634 }
635 
636 static int eb_reserve(struct i915_execbuffer *eb)
637 {
638 	const unsigned int count = eb->buffer_count;
639 	struct list_head last;
640 	struct i915_vma *vma;
641 	unsigned int i, pass;
642 	int err;
643 
644 	/*
645 	 * Attempt to pin all of the buffers into the GTT.
646 	 * This is done in 3 phases:
647 	 *
648 	 * 1a. Unbind all objects that do not match the GTT constraints for
649 	 *     the execbuffer (fenceable, mappable, alignment etc).
650 	 * 1b. Increment pin count for already bound objects.
651 	 * 2.  Bind new objects.
652 	 * 3.  Decrement pin count.
653 	 *
654 	 * This avoid unnecessary unbinding of later objects in order to make
655 	 * room for the earlier objects *unless* we need to defragment.
656 	 */
657 
658 	pass = 0;
659 	err = 0;
660 	do {
661 		list_for_each_entry(vma, &eb->unbound, exec_link) {
662 			err = eb_reserve_vma(eb, vma);
663 			if (err)
664 				break;
665 		}
666 		if (err != -ENOSPC)
667 			return err;
668 
669 		/* Resort *all* the objects into priority order */
670 		INIT_LIST_HEAD(&eb->unbound);
671 		INIT_LIST_HEAD(&last);
672 		for (i = 0; i < count; i++) {
673 			unsigned int flags = eb->flags[i];
674 			struct i915_vma *vma = eb->vma[i];
675 
676 			if (flags & EXEC_OBJECT_PINNED &&
677 			    flags & __EXEC_OBJECT_HAS_PIN)
678 				continue;
679 
680 			eb_unreserve_vma(vma, &eb->flags[i]);
681 
682 			if (flags & EXEC_OBJECT_PINNED)
683 				/* Pinned must have their slot */
684 				list_add(&vma->exec_link, &eb->unbound);
685 			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
686 				/* Map require the lowest 256MiB (aperture) */
687 				list_add_tail(&vma->exec_link, &eb->unbound);
688 			else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
689 				/* Prioritise 4GiB region for restricted bo */
690 				list_add(&vma->exec_link, &last);
691 			else
692 				list_add_tail(&vma->exec_link, &last);
693 		}
694 		list_splice_tail(&last, &eb->unbound);
695 
696 		switch (pass++) {
697 		case 0:
698 			break;
699 
700 		case 1:
701 			/* Too fragmented, unbind everything and retry */
702 			mutex_lock(&eb->context->vm->mutex);
703 			err = i915_gem_evict_vm(eb->context->vm);
704 			mutex_unlock(&eb->context->vm->mutex);
705 			if (err)
706 				return err;
707 			break;
708 
709 		default:
710 			return -ENOSPC;
711 		}
712 	} while (1);
713 }
714 
715 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
716 {
717 	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
718 		return 0;
719 	else
720 		return eb->buffer_count - 1;
721 }
722 
723 static int eb_select_context(struct i915_execbuffer *eb)
724 {
725 	struct i915_gem_context *ctx;
726 
727 	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
728 	if (unlikely(!ctx))
729 		return -ENOENT;
730 
731 	eb->gem_context = ctx;
732 	if (rcu_access_pointer(ctx->vm))
733 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
734 
735 	eb->context_flags = 0;
736 	if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
737 		eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
738 
739 	return 0;
740 }
741 
742 static int eb_lookup_vmas(struct i915_execbuffer *eb)
743 {
744 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
745 	struct drm_i915_gem_object *obj;
746 	unsigned int i, batch;
747 	int err;
748 
749 	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
750 		return -EIO;
751 
752 	INIT_LIST_HEAD(&eb->relocs);
753 	INIT_LIST_HEAD(&eb->unbound);
754 
755 	batch = eb_batch_index(eb);
756 
757 	mutex_lock(&eb->gem_context->mutex);
758 	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
759 		err = -ENOENT;
760 		goto err_ctx;
761 	}
762 
763 	for (i = 0; i < eb->buffer_count; i++) {
764 		u32 handle = eb->exec[i].handle;
765 		struct i915_lut_handle *lut;
766 		struct i915_vma *vma;
767 
768 		vma = radix_tree_lookup(handles_vma, handle);
769 		if (likely(vma))
770 			goto add_vma;
771 
772 		obj = i915_gem_object_lookup(eb->file, handle);
773 		if (unlikely(!obj)) {
774 			err = -ENOENT;
775 			goto err_vma;
776 		}
777 
778 		vma = i915_vma_instance(obj, eb->context->vm, NULL);
779 		if (IS_ERR(vma)) {
780 			err = PTR_ERR(vma);
781 			goto err_obj;
782 		}
783 
784 		lut = i915_lut_handle_alloc();
785 		if (unlikely(!lut)) {
786 			err = -ENOMEM;
787 			goto err_obj;
788 		}
789 
790 		err = radix_tree_insert(handles_vma, handle, vma);
791 		if (unlikely(err)) {
792 			i915_lut_handle_free(lut);
793 			goto err_obj;
794 		}
795 
796 		/* transfer ref to lut */
797 		if (!atomic_fetch_inc(&vma->open_count))
798 			i915_vma_reopen(vma);
799 		lut->handle = handle;
800 		lut->ctx = eb->gem_context;
801 
802 		i915_gem_object_lock(obj);
803 		list_add(&lut->obj_link, &obj->lut_list);
804 		i915_gem_object_unlock(obj);
805 
806 add_vma:
807 		err = eb_add_vma(eb, i, batch, vma);
808 		if (unlikely(err))
809 			goto err_vma;
810 
811 		GEM_BUG_ON(vma != eb->vma[i]);
812 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
813 		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
814 			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
815 	}
816 
817 	mutex_unlock(&eb->gem_context->mutex);
818 
819 	eb->args->flags |= __EXEC_VALIDATED;
820 	return eb_reserve(eb);
821 
822 err_obj:
823 	i915_gem_object_put(obj);
824 err_vma:
825 	eb->vma[i] = NULL;
826 err_ctx:
827 	mutex_unlock(&eb->gem_context->mutex);
828 	return err;
829 }
830 
831 static struct i915_vma *
832 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
833 {
834 	if (eb->lut_size < 0) {
835 		if (handle >= -eb->lut_size)
836 			return NULL;
837 		return eb->vma[handle];
838 	} else {
839 		struct hlist_head *head;
840 		struct i915_vma *vma;
841 
842 		head = &eb->buckets[hash_32(handle, eb->lut_size)];
843 		hlist_for_each_entry(vma, head, exec_node) {
844 			if (vma->exec_handle == handle)
845 				return vma;
846 		}
847 		return NULL;
848 	}
849 }
850 
851 static void eb_release_vmas(const struct i915_execbuffer *eb)
852 {
853 	const unsigned int count = eb->buffer_count;
854 	unsigned int i;
855 
856 	for (i = 0; i < count; i++) {
857 		struct i915_vma *vma = eb->vma[i];
858 		unsigned int flags = eb->flags[i];
859 
860 		if (!vma)
861 			break;
862 
863 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
864 		vma->exec_flags = NULL;
865 		eb->vma[i] = NULL;
866 
867 		if (flags & __EXEC_OBJECT_HAS_PIN)
868 			__eb_unreserve_vma(vma, flags);
869 
870 		if (flags & __EXEC_OBJECT_HAS_REF)
871 			i915_vma_put(vma);
872 	}
873 }
874 
875 static void eb_reset_vmas(const struct i915_execbuffer *eb)
876 {
877 	eb_release_vmas(eb);
878 	if (eb->lut_size > 0)
879 		memset(eb->buckets, 0,
880 		       sizeof(struct hlist_head) << eb->lut_size);
881 }
882 
883 static void eb_destroy(const struct i915_execbuffer *eb)
884 {
885 	GEM_BUG_ON(eb->reloc_cache.rq);
886 
887 	if (eb->reloc_cache.ce)
888 		intel_context_put(eb->reloc_cache.ce);
889 
890 	if (eb->lut_size > 0)
891 		kfree(eb->buckets);
892 }
893 
894 static inline u64
895 relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
896 		  const struct i915_vma *target)
897 {
898 	return gen8_canonical_addr((int)reloc->delta + target->node.start);
899 }
900 
901 static void reloc_cache_init(struct reloc_cache *cache,
902 			     struct drm_i915_private *i915)
903 {
904 	cache->page = -1;
905 	cache->vaddr = 0;
906 	/* Must be a variable in the struct to allow GCC to unroll. */
907 	cache->gen = INTEL_GEN(i915);
908 	cache->has_llc = HAS_LLC(i915);
909 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
910 	cache->has_fence = cache->gen < 4;
911 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
912 	cache->node.flags = 0;
913 	cache->ce = NULL;
914 	cache->rq = NULL;
915 	cache->rq_size = 0;
916 }
917 
918 static inline void *unmask_page(unsigned long p)
919 {
920 	return (void *)(uintptr_t)(p & PAGE_MASK);
921 }
922 
923 static inline unsigned int unmask_flags(unsigned long p)
924 {
925 	return p & ~PAGE_MASK;
926 }
927 
928 #define KMAP 0x4 /* after CLFLUSH_FLAGS */
929 
930 static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
931 {
932 	struct drm_i915_private *i915 =
933 		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
934 	return &i915->ggtt;
935 }
936 
937 static void reloc_gpu_flush(struct reloc_cache *cache)
938 {
939 	GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
940 	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
941 
942 	__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
943 	i915_gem_object_unpin_map(cache->rq->batch->obj);
944 
945 	intel_gt_chipset_flush(cache->rq->engine->gt);
946 
947 	i915_request_add(cache->rq);
948 	cache->rq = NULL;
949 }
950 
951 static void reloc_cache_reset(struct reloc_cache *cache)
952 {
953 	void *vaddr;
954 
955 	if (cache->rq)
956 		reloc_gpu_flush(cache);
957 
958 	if (!cache->vaddr)
959 		return;
960 
961 	vaddr = unmask_page(cache->vaddr);
962 	if (cache->vaddr & KMAP) {
963 		if (cache->vaddr & CLFLUSH_AFTER)
964 			mb();
965 
966 		kunmap_atomic(vaddr);
967 		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
968 	} else {
969 		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
970 
971 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
972 		io_mapping_unmap_atomic((void __iomem *)vaddr);
973 
974 		if (drm_mm_node_allocated(&cache->node)) {
975 			ggtt->vm.clear_range(&ggtt->vm,
976 					     cache->node.start,
977 					     cache->node.size);
978 			mutex_lock(&ggtt->vm.mutex);
979 			drm_mm_remove_node(&cache->node);
980 			mutex_unlock(&ggtt->vm.mutex);
981 		} else {
982 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
983 		}
984 	}
985 
986 	cache->vaddr = 0;
987 	cache->page = -1;
988 }
989 
990 static void *reloc_kmap(struct drm_i915_gem_object *obj,
991 			struct reloc_cache *cache,
992 			unsigned long page)
993 {
994 	void *vaddr;
995 
996 	if (cache->vaddr) {
997 		kunmap_atomic(unmask_page(cache->vaddr));
998 	} else {
999 		unsigned int flushes;
1000 		int err;
1001 
1002 		err = i915_gem_object_prepare_write(obj, &flushes);
1003 		if (err)
1004 			return ERR_PTR(err);
1005 
1006 		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
1007 		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
1008 
1009 		cache->vaddr = flushes | KMAP;
1010 		cache->node.mm = (void *)obj;
1011 		if (flushes)
1012 			mb();
1013 	}
1014 
1015 	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
1016 	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1017 	cache->page = page;
1018 
1019 	return vaddr;
1020 }
1021 
1022 static void *reloc_iomap(struct drm_i915_gem_object *obj,
1023 			 struct reloc_cache *cache,
1024 			 unsigned long page)
1025 {
1026 	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1027 	unsigned long offset;
1028 	void *vaddr;
1029 
1030 	if (cache->vaddr) {
1031 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1032 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1033 	} else {
1034 		struct i915_vma *vma;
1035 		int err;
1036 
1037 		if (i915_gem_object_is_tiled(obj))
1038 			return ERR_PTR(-EINVAL);
1039 
1040 		if (use_cpu_reloc(cache, obj))
1041 			return NULL;
1042 
1043 		i915_gem_object_lock(obj);
1044 		err = i915_gem_object_set_to_gtt_domain(obj, true);
1045 		i915_gem_object_unlock(obj);
1046 		if (err)
1047 			return ERR_PTR(err);
1048 
1049 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1050 					       PIN_MAPPABLE |
1051 					       PIN_NONBLOCK /* NOWARN */ |
1052 					       PIN_NOEVICT);
1053 		if (IS_ERR(vma)) {
1054 			memset(&cache->node, 0, sizeof(cache->node));
1055 			mutex_lock(&ggtt->vm.mutex);
1056 			err = drm_mm_insert_node_in_range
1057 				(&ggtt->vm.mm, &cache->node,
1058 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
1059 				 0, ggtt->mappable_end,
1060 				 DRM_MM_INSERT_LOW);
1061 			mutex_unlock(&ggtt->vm.mutex);
1062 			if (err) /* no inactive aperture space, use cpu reloc */
1063 				return NULL;
1064 		} else {
1065 			cache->node.start = vma->node.start;
1066 			cache->node.mm = (void *)vma;
1067 		}
1068 	}
1069 
1070 	offset = cache->node.start;
1071 	if (drm_mm_node_allocated(&cache->node)) {
1072 		ggtt->vm.insert_page(&ggtt->vm,
1073 				     i915_gem_object_get_dma_address(obj, page),
1074 				     offset, I915_CACHE_NONE, 0);
1075 	} else {
1076 		offset += page << PAGE_SHIFT;
1077 	}
1078 
1079 	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1080 							 offset);
1081 	cache->page = page;
1082 	cache->vaddr = (unsigned long)vaddr;
1083 
1084 	return vaddr;
1085 }
1086 
1087 static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1088 			 struct reloc_cache *cache,
1089 			 unsigned long page)
1090 {
1091 	void *vaddr;
1092 
1093 	if (cache->page == page) {
1094 		vaddr = unmask_page(cache->vaddr);
1095 	} else {
1096 		vaddr = NULL;
1097 		if ((cache->vaddr & KMAP) == 0)
1098 			vaddr = reloc_iomap(obj, cache, page);
1099 		if (!vaddr)
1100 			vaddr = reloc_kmap(obj, cache, page);
1101 	}
1102 
1103 	return vaddr;
1104 }
1105 
1106 static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1107 {
1108 	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1109 		if (flushes & CLFLUSH_BEFORE) {
1110 			clflushopt(addr);
1111 			mb();
1112 		}
1113 
1114 		*addr = value;
1115 
1116 		/*
1117 		 * Writes to the same cacheline are serialised by the CPU
1118 		 * (including clflush). On the write path, we only require
1119 		 * that it hits memory in an orderly fashion and place
1120 		 * mb barriers at the start and end of the relocation phase
1121 		 * to ensure ordering of clflush wrt to the system.
1122 		 */
1123 		if (flushes & CLFLUSH_AFTER)
1124 			clflushopt(addr);
1125 	} else
1126 		*addr = value;
1127 }
1128 
1129 static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
1130 {
1131 	struct drm_i915_gem_object *obj = vma->obj;
1132 	int err;
1133 
1134 	i915_vma_lock(vma);
1135 
1136 	if (obj->cache_dirty & ~obj->cache_coherent)
1137 		i915_gem_clflush_object(obj, 0);
1138 	obj->write_domain = 0;
1139 
1140 	err = i915_request_await_object(rq, vma->obj, true);
1141 	if (err == 0)
1142 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1143 
1144 	i915_vma_unlock(vma);
1145 
1146 	return err;
1147 }
1148 
1149 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1150 			     struct i915_vma *vma,
1151 			     unsigned int len)
1152 {
1153 	struct reloc_cache *cache = &eb->reloc_cache;
1154 	struct intel_engine_pool_node *pool;
1155 	struct i915_request *rq;
1156 	struct i915_vma *batch;
1157 	u32 *cmd;
1158 	int err;
1159 
1160 	pool = intel_engine_get_pool(eb->engine, PAGE_SIZE);
1161 	if (IS_ERR(pool))
1162 		return PTR_ERR(pool);
1163 
1164 	cmd = i915_gem_object_pin_map(pool->obj,
1165 				      cache->has_llc ?
1166 				      I915_MAP_FORCE_WB :
1167 				      I915_MAP_FORCE_WC);
1168 	if (IS_ERR(cmd)) {
1169 		err = PTR_ERR(cmd);
1170 		goto out_pool;
1171 	}
1172 
1173 	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
1174 	if (IS_ERR(batch)) {
1175 		err = PTR_ERR(batch);
1176 		goto err_unmap;
1177 	}
1178 
1179 	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1180 	if (err)
1181 		goto err_unmap;
1182 
1183 	rq = intel_context_create_request(cache->ce);
1184 	if (IS_ERR(rq)) {
1185 		err = PTR_ERR(rq);
1186 		goto err_unpin;
1187 	}
1188 
1189 	err = intel_engine_pool_mark_active(pool, rq);
1190 	if (err)
1191 		goto err_request;
1192 
1193 	err = reloc_move_to_gpu(rq, vma);
1194 	if (err)
1195 		goto err_request;
1196 
1197 	err = eb->engine->emit_bb_start(rq,
1198 					batch->node.start, PAGE_SIZE,
1199 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1200 	if (err)
1201 		goto skip_request;
1202 
1203 	i915_vma_lock(batch);
1204 	err = i915_request_await_object(rq, batch->obj, false);
1205 	if (err == 0)
1206 		err = i915_vma_move_to_active(batch, rq, 0);
1207 	i915_vma_unlock(batch);
1208 	if (err)
1209 		goto skip_request;
1210 
1211 	rq->batch = batch;
1212 	i915_vma_unpin(batch);
1213 
1214 	cache->rq = rq;
1215 	cache->rq_cmd = cmd;
1216 	cache->rq_size = 0;
1217 
1218 	/* Return with batch mapping (cmd) still pinned */
1219 	goto out_pool;
1220 
1221 skip_request:
1222 	i915_request_skip(rq, err);
1223 err_request:
1224 	i915_request_add(rq);
1225 err_unpin:
1226 	i915_vma_unpin(batch);
1227 err_unmap:
1228 	i915_gem_object_unpin_map(pool->obj);
1229 out_pool:
1230 	intel_engine_pool_put(pool);
1231 	return err;
1232 }
1233 
1234 static u32 *reloc_gpu(struct i915_execbuffer *eb,
1235 		      struct i915_vma *vma,
1236 		      unsigned int len)
1237 {
1238 	struct reloc_cache *cache = &eb->reloc_cache;
1239 	u32 *cmd;
1240 
1241 	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1242 		reloc_gpu_flush(cache);
1243 
1244 	if (unlikely(!cache->rq)) {
1245 		int err;
1246 
1247 		/* If we need to copy for the cmdparser, we will stall anyway */
1248 		if (eb_use_cmdparser(eb))
1249 			return ERR_PTR(-EWOULDBLOCK);
1250 
1251 		if (!intel_engine_can_store_dword(eb->engine))
1252 			return ERR_PTR(-ENODEV);
1253 
1254 		if (!cache->ce) {
1255 			struct intel_context *ce;
1256 
1257 			/*
1258 			 * The CS pre-parser can pre-fetch commands across
1259 			 * memory sync points and starting gen12 it is able to
1260 			 * pre-fetch across BB_START and BB_END boundaries
1261 			 * (within the same context). We therefore use a
1262 			 * separate context gen12+ to guarantee that the reloc
1263 			 * writes land before the parser gets to the target
1264 			 * memory location.
1265 			 */
1266 			if (cache->gen >= 12)
1267 				ce = intel_context_create(eb->context->gem_context,
1268 							  eb->engine);
1269 			else
1270 				ce = intel_context_get(eb->context);
1271 			if (IS_ERR(ce))
1272 				return ERR_CAST(ce);
1273 
1274 			cache->ce = ce;
1275 		}
1276 
1277 		err = __reloc_gpu_alloc(eb, vma, len);
1278 		if (unlikely(err))
1279 			return ERR_PTR(err);
1280 	}
1281 
1282 	cmd = cache->rq_cmd + cache->rq_size;
1283 	cache->rq_size += len;
1284 
1285 	return cmd;
1286 }
1287 
1288 static u64
1289 relocate_entry(struct i915_vma *vma,
1290 	       const struct drm_i915_gem_relocation_entry *reloc,
1291 	       struct i915_execbuffer *eb,
1292 	       const struct i915_vma *target)
1293 {
1294 	u64 offset = reloc->offset;
1295 	u64 target_offset = relocation_target(reloc, target);
1296 	bool wide = eb->reloc_cache.use_64bit_reloc;
1297 	void *vaddr;
1298 
1299 	if (!eb->reloc_cache.vaddr &&
1300 	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1301 	     !dma_resv_test_signaled_rcu(vma->resv, true))) {
1302 		const unsigned int gen = eb->reloc_cache.gen;
1303 		unsigned int len;
1304 		u32 *batch;
1305 		u64 addr;
1306 
1307 		if (wide)
1308 			len = offset & 7 ? 8 : 5;
1309 		else if (gen >= 4)
1310 			len = 4;
1311 		else
1312 			len = 3;
1313 
1314 		batch = reloc_gpu(eb, vma, len);
1315 		if (IS_ERR(batch))
1316 			goto repeat;
1317 
1318 		addr = gen8_canonical_addr(vma->node.start + offset);
1319 		if (wide) {
1320 			if (offset & 7) {
1321 				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1322 				*batch++ = lower_32_bits(addr);
1323 				*batch++ = upper_32_bits(addr);
1324 				*batch++ = lower_32_bits(target_offset);
1325 
1326 				addr = gen8_canonical_addr(addr + 4);
1327 
1328 				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1329 				*batch++ = lower_32_bits(addr);
1330 				*batch++ = upper_32_bits(addr);
1331 				*batch++ = upper_32_bits(target_offset);
1332 			} else {
1333 				*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1334 				*batch++ = lower_32_bits(addr);
1335 				*batch++ = upper_32_bits(addr);
1336 				*batch++ = lower_32_bits(target_offset);
1337 				*batch++ = upper_32_bits(target_offset);
1338 			}
1339 		} else if (gen >= 6) {
1340 			*batch++ = MI_STORE_DWORD_IMM_GEN4;
1341 			*batch++ = 0;
1342 			*batch++ = addr;
1343 			*batch++ = target_offset;
1344 		} else if (gen >= 4) {
1345 			*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1346 			*batch++ = 0;
1347 			*batch++ = addr;
1348 			*batch++ = target_offset;
1349 		} else {
1350 			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1351 			*batch++ = addr;
1352 			*batch++ = target_offset;
1353 		}
1354 
1355 		goto out;
1356 	}
1357 
1358 repeat:
1359 	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1360 	if (IS_ERR(vaddr))
1361 		return PTR_ERR(vaddr);
1362 
1363 	clflush_write32(vaddr + offset_in_page(offset),
1364 			lower_32_bits(target_offset),
1365 			eb->reloc_cache.vaddr);
1366 
1367 	if (wide) {
1368 		offset += sizeof(u32);
1369 		target_offset >>= 32;
1370 		wide = false;
1371 		goto repeat;
1372 	}
1373 
1374 out:
1375 	return target->node.start | UPDATE;
1376 }
1377 
1378 static u64
1379 eb_relocate_entry(struct i915_execbuffer *eb,
1380 		  struct i915_vma *vma,
1381 		  const struct drm_i915_gem_relocation_entry *reloc)
1382 {
1383 	struct i915_vma *target;
1384 	int err;
1385 
1386 	/* we've already hold a reference to all valid objects */
1387 	target = eb_get_vma(eb, reloc->target_handle);
1388 	if (unlikely(!target))
1389 		return -ENOENT;
1390 
1391 	/* Validate that the target is in a valid r/w GPU domain */
1392 	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1393 		DRM_DEBUG("reloc with multiple write domains: "
1394 			  "target %d offset %d "
1395 			  "read %08x write %08x",
1396 			  reloc->target_handle,
1397 			  (int) reloc->offset,
1398 			  reloc->read_domains,
1399 			  reloc->write_domain);
1400 		return -EINVAL;
1401 	}
1402 	if (unlikely((reloc->write_domain | reloc->read_domains)
1403 		     & ~I915_GEM_GPU_DOMAINS)) {
1404 		DRM_DEBUG("reloc with read/write non-GPU domains: "
1405 			  "target %d offset %d "
1406 			  "read %08x write %08x",
1407 			  reloc->target_handle,
1408 			  (int) reloc->offset,
1409 			  reloc->read_domains,
1410 			  reloc->write_domain);
1411 		return -EINVAL;
1412 	}
1413 
1414 	if (reloc->write_domain) {
1415 		*target->exec_flags |= EXEC_OBJECT_WRITE;
1416 
1417 		/*
1418 		 * Sandybridge PPGTT errata: We need a global gtt mapping
1419 		 * for MI and pipe_control writes because the gpu doesn't
1420 		 * properly redirect them through the ppgtt for non_secure
1421 		 * batchbuffers.
1422 		 */
1423 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1424 		    IS_GEN(eb->i915, 6)) {
1425 			err = i915_vma_bind(target, target->obj->cache_level,
1426 					    PIN_GLOBAL, NULL);
1427 			if (WARN_ONCE(err,
1428 				      "Unexpected failure to bind target VMA!"))
1429 				return err;
1430 		}
1431 	}
1432 
1433 	/*
1434 	 * If the relocation already has the right value in it, no
1435 	 * more work needs to be done.
1436 	 */
1437 	if (!DBG_FORCE_RELOC &&
1438 	    gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1439 		return 0;
1440 
1441 	/* Check that the relocation address is valid... */
1442 	if (unlikely(reloc->offset >
1443 		     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1444 		DRM_DEBUG("Relocation beyond object bounds: "
1445 			  "target %d offset %d size %d.\n",
1446 			  reloc->target_handle,
1447 			  (int)reloc->offset,
1448 			  (int)vma->size);
1449 		return -EINVAL;
1450 	}
1451 	if (unlikely(reloc->offset & 3)) {
1452 		DRM_DEBUG("Relocation not 4-byte aligned: "
1453 			  "target %d offset %d.\n",
1454 			  reloc->target_handle,
1455 			  (int)reloc->offset);
1456 		return -EINVAL;
1457 	}
1458 
1459 	/*
1460 	 * If we write into the object, we need to force the synchronisation
1461 	 * barrier, either with an asynchronous clflush or if we executed the
1462 	 * patching using the GPU (though that should be serialised by the
1463 	 * timeline). To be completely sure, and since we are required to
1464 	 * do relocations we are already stalling, disable the user's opt
1465 	 * out of our synchronisation.
1466 	 */
1467 	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
1468 
1469 	/* and update the user's relocation entry */
1470 	return relocate_entry(vma, reloc, eb, target);
1471 }
1472 
1473 static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
1474 {
1475 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1476 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1477 	struct drm_i915_gem_relocation_entry __user *urelocs;
1478 	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1479 	unsigned int remain;
1480 
1481 	urelocs = u64_to_user_ptr(entry->relocs_ptr);
1482 	remain = entry->relocation_count;
1483 	if (unlikely(remain > N_RELOC(ULONG_MAX)))
1484 		return -EINVAL;
1485 
1486 	/*
1487 	 * We must check that the entire relocation array is safe
1488 	 * to read. However, if the array is not writable the user loses
1489 	 * the updated relocation values.
1490 	 */
1491 	if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
1492 		return -EFAULT;
1493 
1494 	do {
1495 		struct drm_i915_gem_relocation_entry *r = stack;
1496 		unsigned int count =
1497 			min_t(unsigned int, remain, ARRAY_SIZE(stack));
1498 		unsigned int copied;
1499 
1500 		/*
1501 		 * This is the fast path and we cannot handle a pagefault
1502 		 * whilst holding the struct mutex lest the user pass in the
1503 		 * relocations contained within a mmaped bo. For in such a case
1504 		 * we, the page fault handler would call i915_gem_fault() and
1505 		 * we would try to acquire the struct mutex again. Obviously
1506 		 * this is bad and so lockdep complains vehemently.
1507 		 */
1508 		pagefault_disable();
1509 		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1510 		pagefault_enable();
1511 		if (unlikely(copied)) {
1512 			remain = -EFAULT;
1513 			goto out;
1514 		}
1515 
1516 		remain -= count;
1517 		do {
1518 			u64 offset = eb_relocate_entry(eb, vma, r);
1519 
1520 			if (likely(offset == 0)) {
1521 			} else if ((s64)offset < 0) {
1522 				remain = (int)offset;
1523 				goto out;
1524 			} else {
1525 				/*
1526 				 * Note that reporting an error now
1527 				 * leaves everything in an inconsistent
1528 				 * state as we have *already* changed
1529 				 * the relocation value inside the
1530 				 * object. As we have not changed the
1531 				 * reloc.presumed_offset or will not
1532 				 * change the execobject.offset, on the
1533 				 * call we may not rewrite the value
1534 				 * inside the object, leaving it
1535 				 * dangling and causing a GPU hang. Unless
1536 				 * userspace dynamically rebuilds the
1537 				 * relocations on each execbuf rather than
1538 				 * presume a static tree.
1539 				 *
1540 				 * We did previously check if the relocations
1541 				 * were writable (access_ok), an error now
1542 				 * would be a strange race with mprotect,
1543 				 * having already demonstrated that we
1544 				 * can read from this userspace address.
1545 				 */
1546 				offset = gen8_canonical_addr(offset & ~UPDATE);
1547 				if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) {
1548 					remain = -EFAULT;
1549 					goto out;
1550 				}
1551 			}
1552 		} while (r++, --count);
1553 		urelocs += ARRAY_SIZE(stack);
1554 	} while (remain);
1555 out:
1556 	reloc_cache_reset(&eb->reloc_cache);
1557 	return remain;
1558 }
1559 
1560 static int
1561 eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
1562 {
1563 	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1564 	struct drm_i915_gem_relocation_entry *relocs =
1565 		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1566 	unsigned int i;
1567 	int err;
1568 
1569 	for (i = 0; i < entry->relocation_count; i++) {
1570 		u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
1571 
1572 		if ((s64)offset < 0) {
1573 			err = (int)offset;
1574 			goto err;
1575 		}
1576 	}
1577 	err = 0;
1578 err:
1579 	reloc_cache_reset(&eb->reloc_cache);
1580 	return err;
1581 }
1582 
1583 static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1584 {
1585 	const char __user *addr, *end;
1586 	unsigned long size;
1587 	char __maybe_unused c;
1588 
1589 	size = entry->relocation_count;
1590 	if (size == 0)
1591 		return 0;
1592 
1593 	if (size > N_RELOC(ULONG_MAX))
1594 		return -EINVAL;
1595 
1596 	addr = u64_to_user_ptr(entry->relocs_ptr);
1597 	size *= sizeof(struct drm_i915_gem_relocation_entry);
1598 	if (!access_ok(addr, size))
1599 		return -EFAULT;
1600 
1601 	end = addr + size;
1602 	for (; addr < end; addr += PAGE_SIZE) {
1603 		int err = __get_user(c, addr);
1604 		if (err)
1605 			return err;
1606 	}
1607 	return __get_user(c, end - 1);
1608 }
1609 
1610 static int eb_copy_relocations(const struct i915_execbuffer *eb)
1611 {
1612 	struct drm_i915_gem_relocation_entry *relocs;
1613 	const unsigned int count = eb->buffer_count;
1614 	unsigned int i;
1615 	int err;
1616 
1617 	for (i = 0; i < count; i++) {
1618 		const unsigned int nreloc = eb->exec[i].relocation_count;
1619 		struct drm_i915_gem_relocation_entry __user *urelocs;
1620 		unsigned long size;
1621 		unsigned long copied;
1622 
1623 		if (nreloc == 0)
1624 			continue;
1625 
1626 		err = check_relocations(&eb->exec[i]);
1627 		if (err)
1628 			goto err;
1629 
1630 		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1631 		size = nreloc * sizeof(*relocs);
1632 
1633 		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1634 		if (!relocs) {
1635 			err = -ENOMEM;
1636 			goto err;
1637 		}
1638 
1639 		/* copy_from_user is limited to < 4GiB */
1640 		copied = 0;
1641 		do {
1642 			unsigned int len =
1643 				min_t(u64, BIT_ULL(31), size - copied);
1644 
1645 			if (__copy_from_user((char *)relocs + copied,
1646 					     (char __user *)urelocs + copied,
1647 					     len))
1648 				goto end;
1649 
1650 			copied += len;
1651 		} while (copied < size);
1652 
1653 		/*
1654 		 * As we do not update the known relocation offsets after
1655 		 * relocating (due to the complexities in lock handling),
1656 		 * we need to mark them as invalid now so that we force the
1657 		 * relocation processing next time. Just in case the target
1658 		 * object is evicted and then rebound into its old
1659 		 * presumed_offset before the next execbuffer - if that
1660 		 * happened we would make the mistake of assuming that the
1661 		 * relocations were valid.
1662 		 */
1663 		if (!user_access_begin(urelocs, size))
1664 			goto end;
1665 
1666 		for (copied = 0; copied < nreloc; copied++)
1667 			unsafe_put_user(-1,
1668 					&urelocs[copied].presumed_offset,
1669 					end_user);
1670 		user_access_end();
1671 
1672 		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1673 	}
1674 
1675 	return 0;
1676 
1677 end_user:
1678 	user_access_end();
1679 end:
1680 	kvfree(relocs);
1681 	err = -EFAULT;
1682 err:
1683 	while (i--) {
1684 		relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1685 		if (eb->exec[i].relocation_count)
1686 			kvfree(relocs);
1687 	}
1688 	return err;
1689 }
1690 
1691 static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1692 {
1693 	const unsigned int count = eb->buffer_count;
1694 	unsigned int i;
1695 
1696 	if (unlikely(i915_modparams.prefault_disable))
1697 		return 0;
1698 
1699 	for (i = 0; i < count; i++) {
1700 		int err;
1701 
1702 		err = check_relocations(&eb->exec[i]);
1703 		if (err)
1704 			return err;
1705 	}
1706 
1707 	return 0;
1708 }
1709 
1710 static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
1711 {
1712 	struct drm_device *dev = &eb->i915->drm;
1713 	bool have_copy = false;
1714 	struct i915_vma *vma;
1715 	int err = 0;
1716 
1717 repeat:
1718 	if (signal_pending(current)) {
1719 		err = -ERESTARTSYS;
1720 		goto out;
1721 	}
1722 
1723 	/* We may process another execbuffer during the unlock... */
1724 	eb_reset_vmas(eb);
1725 	mutex_unlock(&dev->struct_mutex);
1726 
1727 	/*
1728 	 * We take 3 passes through the slowpatch.
1729 	 *
1730 	 * 1 - we try to just prefault all the user relocation entries and
1731 	 * then attempt to reuse the atomic pagefault disabled fast path again.
1732 	 *
1733 	 * 2 - we copy the user entries to a local buffer here outside of the
1734 	 * local and allow ourselves to wait upon any rendering before
1735 	 * relocations
1736 	 *
1737 	 * 3 - we already have a local copy of the relocation entries, but
1738 	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1739 	 */
1740 	if (!err) {
1741 		err = eb_prefault_relocations(eb);
1742 	} else if (!have_copy) {
1743 		err = eb_copy_relocations(eb);
1744 		have_copy = err == 0;
1745 	} else {
1746 		cond_resched();
1747 		err = 0;
1748 	}
1749 	if (err) {
1750 		mutex_lock(&dev->struct_mutex);
1751 		goto out;
1752 	}
1753 
1754 	/* A frequent cause for EAGAIN are currently unavailable client pages */
1755 	flush_workqueue(eb->i915->mm.userptr_wq);
1756 
1757 	err = i915_mutex_lock_interruptible(dev);
1758 	if (err) {
1759 		mutex_lock(&dev->struct_mutex);
1760 		goto out;
1761 	}
1762 
1763 	/* reacquire the objects */
1764 	err = eb_lookup_vmas(eb);
1765 	if (err)
1766 		goto err;
1767 
1768 	GEM_BUG_ON(!eb->batch);
1769 
1770 	list_for_each_entry(vma, &eb->relocs, reloc_link) {
1771 		if (!have_copy) {
1772 			pagefault_disable();
1773 			err = eb_relocate_vma(eb, vma);
1774 			pagefault_enable();
1775 			if (err)
1776 				goto repeat;
1777 		} else {
1778 			err = eb_relocate_vma_slow(eb, vma);
1779 			if (err)
1780 				goto err;
1781 		}
1782 	}
1783 
1784 	/*
1785 	 * Leave the user relocations as are, this is the painfully slow path,
1786 	 * and we want to avoid the complication of dropping the lock whilst
1787 	 * having buffers reserved in the aperture and so causing spurious
1788 	 * ENOSPC for random operations.
1789 	 */
1790 
1791 err:
1792 	if (err == -EAGAIN)
1793 		goto repeat;
1794 
1795 out:
1796 	if (have_copy) {
1797 		const unsigned int count = eb->buffer_count;
1798 		unsigned int i;
1799 
1800 		for (i = 0; i < count; i++) {
1801 			const struct drm_i915_gem_exec_object2 *entry =
1802 				&eb->exec[i];
1803 			struct drm_i915_gem_relocation_entry *relocs;
1804 
1805 			if (!entry->relocation_count)
1806 				continue;
1807 
1808 			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1809 			kvfree(relocs);
1810 		}
1811 	}
1812 
1813 	return err;
1814 }
1815 
1816 static int eb_relocate(struct i915_execbuffer *eb)
1817 {
1818 	if (eb_lookup_vmas(eb))
1819 		goto slow;
1820 
1821 	/* The objects are in their final locations, apply the relocations. */
1822 	if (eb->args->flags & __EXEC_HAS_RELOC) {
1823 		struct i915_vma *vma;
1824 
1825 		list_for_each_entry(vma, &eb->relocs, reloc_link) {
1826 			if (eb_relocate_vma(eb, vma))
1827 				goto slow;
1828 		}
1829 	}
1830 
1831 	return 0;
1832 
1833 slow:
1834 	return eb_relocate_slow(eb);
1835 }
1836 
1837 static int eb_move_to_gpu(struct i915_execbuffer *eb)
1838 {
1839 	const unsigned int count = eb->buffer_count;
1840 	struct ww_acquire_ctx acquire;
1841 	unsigned int i;
1842 	int err = 0;
1843 
1844 	ww_acquire_init(&acquire, &reservation_ww_class);
1845 
1846 	for (i = 0; i < count; i++) {
1847 		struct i915_vma *vma = eb->vma[i];
1848 
1849 		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
1850 		if (!err)
1851 			continue;
1852 
1853 		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
1854 
1855 		if (err == -EDEADLK) {
1856 			GEM_BUG_ON(i == 0);
1857 			do {
1858 				int j = i - 1;
1859 
1860 				ww_mutex_unlock(&eb->vma[j]->resv->lock);
1861 
1862 				swap(eb->flags[i], eb->flags[j]);
1863 				swap(eb->vma[i],  eb->vma[j]);
1864 				eb->vma[i]->exec_flags = &eb->flags[i];
1865 			} while (--i);
1866 			GEM_BUG_ON(vma != eb->vma[0]);
1867 			vma->exec_flags = &eb->flags[0];
1868 
1869 			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
1870 							       &acquire);
1871 		}
1872 		if (err)
1873 			break;
1874 	}
1875 	ww_acquire_done(&acquire);
1876 
1877 	while (i--) {
1878 		unsigned int flags = eb->flags[i];
1879 		struct i915_vma *vma = eb->vma[i];
1880 		struct drm_i915_gem_object *obj = vma->obj;
1881 
1882 		assert_vma_held(vma);
1883 
1884 		if (flags & EXEC_OBJECT_CAPTURE) {
1885 			struct i915_capture_list *capture;
1886 
1887 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1888 			if (capture) {
1889 				capture->next = eb->request->capture_list;
1890 				capture->vma = vma;
1891 				eb->request->capture_list = capture;
1892 			}
1893 		}
1894 
1895 		/*
1896 		 * If the GPU is not _reading_ through the CPU cache, we need
1897 		 * to make sure that any writes (both previous GPU writes from
1898 		 * before a change in snooping levels and normal CPU writes)
1899 		 * caught in that cache are flushed to main memory.
1900 		 *
1901 		 * We want to say
1902 		 *   obj->cache_dirty &&
1903 		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
1904 		 * but gcc's optimiser doesn't handle that as well and emits
1905 		 * two jumps instead of one. Maybe one day...
1906 		 */
1907 		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1908 			if (i915_gem_clflush_object(obj, 0))
1909 				flags &= ~EXEC_OBJECT_ASYNC;
1910 		}
1911 
1912 		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
1913 			err = i915_request_await_object
1914 				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
1915 		}
1916 
1917 		if (err == 0)
1918 			err = i915_vma_move_to_active(vma, eb->request, flags);
1919 
1920 		i915_vma_unlock(vma);
1921 
1922 		__eb_unreserve_vma(vma, flags);
1923 		vma->exec_flags = NULL;
1924 
1925 		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1926 			i915_vma_put(vma);
1927 	}
1928 	ww_acquire_fini(&acquire);
1929 
1930 	if (unlikely(err))
1931 		goto err_skip;
1932 
1933 	eb->exec = NULL;
1934 
1935 	/* Unconditionally flush any chipset caches (for streaming writes). */
1936 	intel_gt_chipset_flush(eb->engine->gt);
1937 	return 0;
1938 
1939 err_skip:
1940 	i915_request_skip(eb->request, err);
1941 	return err;
1942 }
1943 
1944 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1945 {
1946 	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1947 		return false;
1948 
1949 	/* Kernel clipping was a DRI1 misfeature */
1950 	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
1951 		if (exec->num_cliprects || exec->cliprects_ptr)
1952 			return false;
1953 	}
1954 
1955 	if (exec->DR4 == 0xffffffff) {
1956 		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1957 		exec->DR4 = 0;
1958 	}
1959 	if (exec->DR1 || exec->DR4)
1960 		return false;
1961 
1962 	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1963 		return false;
1964 
1965 	return true;
1966 }
1967 
1968 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
1969 {
1970 	u32 *cs;
1971 	int i;
1972 
1973 	if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
1974 		DRM_DEBUG("sol reset is gen7/rcs only\n");
1975 		return -EINVAL;
1976 	}
1977 
1978 	cs = intel_ring_begin(rq, 4 * 2 + 2);
1979 	if (IS_ERR(cs))
1980 		return PTR_ERR(cs);
1981 
1982 	*cs++ = MI_LOAD_REGISTER_IMM(4);
1983 	for (i = 0; i < 4; i++) {
1984 		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1985 		*cs++ = 0;
1986 	}
1987 	*cs++ = MI_NOOP;
1988 	intel_ring_advance(rq, cs);
1989 
1990 	return 0;
1991 }
1992 
1993 static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1994 {
1995 	struct intel_engine_pool_node *pool;
1996 	struct i915_vma *vma;
1997 	int err;
1998 
1999 	pool = intel_engine_get_pool(eb->engine, eb->batch_len);
2000 	if (IS_ERR(pool))
2001 		return ERR_CAST(pool);
2002 
2003 	err = intel_engine_cmd_parser(eb->engine,
2004 				      eb->batch->obj,
2005 				      pool->obj,
2006 				      eb->batch_start_offset,
2007 				      eb->batch_len,
2008 				      is_master);
2009 	if (err) {
2010 		if (err == -EACCES) /* unhandled chained batch */
2011 			vma = NULL;
2012 		else
2013 			vma = ERR_PTR(err);
2014 		goto err;
2015 	}
2016 
2017 	vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
2018 	if (IS_ERR(vma))
2019 		goto err;
2020 
2021 	eb->vma[eb->buffer_count] = i915_vma_get(vma);
2022 	eb->flags[eb->buffer_count] =
2023 		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
2024 	vma->exec_flags = &eb->flags[eb->buffer_count];
2025 	eb->buffer_count++;
2026 
2027 	vma->private = pool;
2028 	return vma;
2029 
2030 err:
2031 	intel_engine_pool_put(pool);
2032 	return vma;
2033 }
2034 
2035 static void
2036 add_to_client(struct i915_request *rq, struct drm_file *file)
2037 {
2038 	struct drm_i915_file_private *file_priv = file->driver_priv;
2039 
2040 	rq->file_priv = file_priv;
2041 
2042 	spin_lock(&file_priv->mm.lock);
2043 	list_add_tail(&rq->client_link, &file_priv->mm.request_list);
2044 	spin_unlock(&file_priv->mm.lock);
2045 }
2046 
2047 static int eb_submit(struct i915_execbuffer *eb)
2048 {
2049 	int err;
2050 
2051 	err = eb_move_to_gpu(eb);
2052 	if (err)
2053 		return err;
2054 
2055 	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
2056 		err = i915_reset_gen7_sol_offsets(eb->request);
2057 		if (err)
2058 			return err;
2059 	}
2060 
2061 	/*
2062 	 * After we completed waiting for other engines (using HW semaphores)
2063 	 * then we can signal that this request/batch is ready to run. This
2064 	 * allows us to determine if the batch is still waiting on the GPU
2065 	 * or actually running by checking the breadcrumb.
2066 	 */
2067 	if (eb->engine->emit_init_breadcrumb) {
2068 		err = eb->engine->emit_init_breadcrumb(eb->request);
2069 		if (err)
2070 			return err;
2071 	}
2072 
2073 	err = eb->engine->emit_bb_start(eb->request,
2074 					eb->batch->node.start +
2075 					eb->batch_start_offset,
2076 					eb->batch_len,
2077 					eb->batch_flags);
2078 	if (err)
2079 		return err;
2080 
2081 	if (i915_gem_context_nopreempt(eb->gem_context))
2082 		eb->request->flags |= I915_REQUEST_NOPREEMPT;
2083 
2084 	return 0;
2085 }
2086 
2087 static int num_vcs_engines(const struct drm_i915_private *i915)
2088 {
2089 	return hweight64(INTEL_INFO(i915)->engine_mask &
2090 			 GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0));
2091 }
2092 
2093 /*
2094  * Find one BSD ring to dispatch the corresponding BSD command.
2095  * The engine index is returned.
2096  */
2097 static unsigned int
2098 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
2099 			 struct drm_file *file)
2100 {
2101 	struct drm_i915_file_private *file_priv = file->driver_priv;
2102 
2103 	/* Check whether the file_priv has already selected one ring. */
2104 	if ((int)file_priv->bsd_engine < 0)
2105 		file_priv->bsd_engine =
2106 			get_random_int() % num_vcs_engines(dev_priv);
2107 
2108 	return file_priv->bsd_engine;
2109 }
2110 
2111 static const enum intel_engine_id user_ring_map[] = {
2112 	[I915_EXEC_DEFAULT]	= RCS0,
2113 	[I915_EXEC_RENDER]	= RCS0,
2114 	[I915_EXEC_BLT]		= BCS0,
2115 	[I915_EXEC_BSD]		= VCS0,
2116 	[I915_EXEC_VEBOX]	= VECS0
2117 };
2118 
2119 static struct i915_request *eb_throttle(struct intel_context *ce)
2120 {
2121 	struct intel_ring *ring = ce->ring;
2122 	struct intel_timeline *tl = ce->timeline;
2123 	struct i915_request *rq;
2124 
2125 	/*
2126 	 * Completely unscientific finger-in-the-air estimates for suitable
2127 	 * maximum user request size (to avoid blocking) and then backoff.
2128 	 */
2129 	if (intel_ring_update_space(ring) >= PAGE_SIZE)
2130 		return NULL;
2131 
2132 	/*
2133 	 * Find a request that after waiting upon, there will be at least half
2134 	 * the ring available. The hysteresis allows us to compete for the
2135 	 * shared ring and should mean that we sleep less often prior to
2136 	 * claiming our resources, but not so long that the ring completely
2137 	 * drains before we can submit our next request.
2138 	 */
2139 	list_for_each_entry(rq, &tl->requests, link) {
2140 		if (rq->ring != ring)
2141 			continue;
2142 
2143 		if (__intel_ring_space(rq->postfix,
2144 				       ring->emit, ring->size) > ring->size / 2)
2145 			break;
2146 	}
2147 	if (&rq->link == &tl->requests)
2148 		return NULL; /* weird, we will check again later for real */
2149 
2150 	return i915_request_get(rq);
2151 }
2152 
2153 static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
2154 {
2155 	struct intel_timeline *tl;
2156 	struct i915_request *rq;
2157 	int err;
2158 
2159 	/*
2160 	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2161 	 * EIO if the GPU is already wedged.
2162 	 */
2163 	err = intel_gt_terminally_wedged(ce->engine->gt);
2164 	if (err)
2165 		return err;
2166 
2167 	/*
2168 	 * Pinning the contexts may generate requests in order to acquire
2169 	 * GGTT space, so do this first before we reserve a seqno for
2170 	 * ourselves.
2171 	 */
2172 	err = intel_context_pin(ce);
2173 	if (err)
2174 		return err;
2175 
2176 	/*
2177 	 * Take a local wakeref for preparing to dispatch the execbuf as
2178 	 * we expect to access the hardware fairly frequently in the
2179 	 * process, and require the engine to be kept awake between accesses.
2180 	 * Upon dispatch, we acquire another prolonged wakeref that we hold
2181 	 * until the timeline is idle, which in turn releases the wakeref
2182 	 * taken on the engine, and the parent device.
2183 	 */
2184 	tl = intel_context_timeline_lock(ce);
2185 	if (IS_ERR(tl)) {
2186 		err = PTR_ERR(tl);
2187 		goto err_unpin;
2188 	}
2189 
2190 	intel_context_enter(ce);
2191 	rq = eb_throttle(ce);
2192 
2193 	intel_context_timeline_unlock(tl);
2194 
2195 	if (rq) {
2196 		if (i915_request_wait(rq,
2197 				      I915_WAIT_INTERRUPTIBLE,
2198 				      MAX_SCHEDULE_TIMEOUT) < 0) {
2199 			i915_request_put(rq);
2200 			err = -EINTR;
2201 			goto err_exit;
2202 		}
2203 
2204 		i915_request_put(rq);
2205 	}
2206 
2207 	eb->engine = ce->engine;
2208 	eb->context = ce;
2209 	return 0;
2210 
2211 err_exit:
2212 	mutex_lock(&tl->mutex);
2213 	intel_context_exit(ce);
2214 	intel_context_timeline_unlock(tl);
2215 err_unpin:
2216 	intel_context_unpin(ce);
2217 	return err;
2218 }
2219 
2220 static void eb_unpin_engine(struct i915_execbuffer *eb)
2221 {
2222 	struct intel_context *ce = eb->context;
2223 	struct intel_timeline *tl = ce->timeline;
2224 
2225 	mutex_lock(&tl->mutex);
2226 	intel_context_exit(ce);
2227 	mutex_unlock(&tl->mutex);
2228 
2229 	intel_context_unpin(ce);
2230 }
2231 
2232 static unsigned int
2233 eb_select_legacy_ring(struct i915_execbuffer *eb,
2234 		      struct drm_file *file,
2235 		      struct drm_i915_gem_execbuffer2 *args)
2236 {
2237 	struct drm_i915_private *i915 = eb->i915;
2238 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2239 
2240 	if (user_ring_id != I915_EXEC_BSD &&
2241 	    (args->flags & I915_EXEC_BSD_MASK)) {
2242 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
2243 			  "bsd dispatch flags: %d\n", (int)(args->flags));
2244 		return -1;
2245 	}
2246 
2247 	if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
2248 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2249 
2250 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2251 			bsd_idx = gen8_dispatch_bsd_engine(i915, file);
2252 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2253 			   bsd_idx <= I915_EXEC_BSD_RING2) {
2254 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
2255 			bsd_idx--;
2256 		} else {
2257 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
2258 				  bsd_idx);
2259 			return -1;
2260 		}
2261 
2262 		return _VCS(bsd_idx);
2263 	}
2264 
2265 	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
2266 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
2267 		return -1;
2268 	}
2269 
2270 	return user_ring_map[user_ring_id];
2271 }
2272 
2273 static int
2274 eb_pin_engine(struct i915_execbuffer *eb,
2275 	      struct drm_file *file,
2276 	      struct drm_i915_gem_execbuffer2 *args)
2277 {
2278 	struct intel_context *ce;
2279 	unsigned int idx;
2280 	int err;
2281 
2282 	if (i915_gem_context_user_engines(eb->gem_context))
2283 		idx = args->flags & I915_EXEC_RING_MASK;
2284 	else
2285 		idx = eb_select_legacy_ring(eb, file, args);
2286 
2287 	ce = i915_gem_context_get_engine(eb->gem_context, idx);
2288 	if (IS_ERR(ce))
2289 		return PTR_ERR(ce);
2290 
2291 	err = __eb_pin_engine(eb, ce);
2292 	intel_context_put(ce);
2293 
2294 	return err;
2295 }
2296 
2297 static void
2298 __free_fence_array(struct drm_syncobj **fences, unsigned int n)
2299 {
2300 	while (n--)
2301 		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
2302 	kvfree(fences);
2303 }
2304 
2305 static struct drm_syncobj **
2306 get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2307 		struct drm_file *file)
2308 {
2309 	const unsigned long nfences = args->num_cliprects;
2310 	struct drm_i915_gem_exec_fence __user *user;
2311 	struct drm_syncobj **fences;
2312 	unsigned long n;
2313 	int err;
2314 
2315 	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2316 		return NULL;
2317 
2318 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
2319 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2320 	if (nfences > min_t(unsigned long,
2321 			    ULONG_MAX / sizeof(*user),
2322 			    SIZE_MAX / sizeof(*fences)))
2323 		return ERR_PTR(-EINVAL);
2324 
2325 	user = u64_to_user_ptr(args->cliprects_ptr);
2326 	if (!access_ok(user, nfences * sizeof(*user)))
2327 		return ERR_PTR(-EFAULT);
2328 
2329 	fences = kvmalloc_array(nfences, sizeof(*fences),
2330 				__GFP_NOWARN | GFP_KERNEL);
2331 	if (!fences)
2332 		return ERR_PTR(-ENOMEM);
2333 
2334 	for (n = 0; n < nfences; n++) {
2335 		struct drm_i915_gem_exec_fence fence;
2336 		struct drm_syncobj *syncobj;
2337 
2338 		if (__copy_from_user(&fence, user++, sizeof(fence))) {
2339 			err = -EFAULT;
2340 			goto err;
2341 		}
2342 
2343 		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2344 			err = -EINVAL;
2345 			goto err;
2346 		}
2347 
2348 		syncobj = drm_syncobj_find(file, fence.handle);
2349 		if (!syncobj) {
2350 			DRM_DEBUG("Invalid syncobj handle provided\n");
2351 			err = -ENOENT;
2352 			goto err;
2353 		}
2354 
2355 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2356 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2357 
2358 		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2359 	}
2360 
2361 	return fences;
2362 
2363 err:
2364 	__free_fence_array(fences, n);
2365 	return ERR_PTR(err);
2366 }
2367 
2368 static void
2369 put_fence_array(struct drm_i915_gem_execbuffer2 *args,
2370 		struct drm_syncobj **fences)
2371 {
2372 	if (fences)
2373 		__free_fence_array(fences, args->num_cliprects);
2374 }
2375 
2376 static int
2377 await_fence_array(struct i915_execbuffer *eb,
2378 		  struct drm_syncobj **fences)
2379 {
2380 	const unsigned int nfences = eb->args->num_cliprects;
2381 	unsigned int n;
2382 	int err;
2383 
2384 	for (n = 0; n < nfences; n++) {
2385 		struct drm_syncobj *syncobj;
2386 		struct dma_fence *fence;
2387 		unsigned int flags;
2388 
2389 		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2390 		if (!(flags & I915_EXEC_FENCE_WAIT))
2391 			continue;
2392 
2393 		fence = drm_syncobj_fence_get(syncobj);
2394 		if (!fence)
2395 			return -EINVAL;
2396 
2397 		err = i915_request_await_dma_fence(eb->request, fence);
2398 		dma_fence_put(fence);
2399 		if (err < 0)
2400 			return err;
2401 	}
2402 
2403 	return 0;
2404 }
2405 
2406 static void
2407 signal_fence_array(struct i915_execbuffer *eb,
2408 		   struct drm_syncobj **fences)
2409 {
2410 	const unsigned int nfences = eb->args->num_cliprects;
2411 	struct dma_fence * const fence = &eb->request->fence;
2412 	unsigned int n;
2413 
2414 	for (n = 0; n < nfences; n++) {
2415 		struct drm_syncobj *syncobj;
2416 		unsigned int flags;
2417 
2418 		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2419 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
2420 			continue;
2421 
2422 		drm_syncobj_replace_fence(syncobj, fence);
2423 	}
2424 }
2425 
2426 static int
2427 i915_gem_do_execbuffer(struct drm_device *dev,
2428 		       struct drm_file *file,
2429 		       struct drm_i915_gem_execbuffer2 *args,
2430 		       struct drm_i915_gem_exec_object2 *exec,
2431 		       struct drm_syncobj **fences)
2432 {
2433 	struct i915_execbuffer eb;
2434 	struct dma_fence *in_fence = NULL;
2435 	struct dma_fence *exec_fence = NULL;
2436 	struct sync_file *out_fence = NULL;
2437 	int out_fence_fd = -1;
2438 	int err;
2439 
2440 	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2441 	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
2442 		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
2443 
2444 	eb.i915 = to_i915(dev);
2445 	eb.file = file;
2446 	eb.args = args;
2447 	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2448 		args->flags |= __EXEC_HAS_RELOC;
2449 
2450 	eb.exec = exec;
2451 	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
2452 	eb.vma[0] = NULL;
2453 	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2454 
2455 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2456 	reloc_cache_init(&eb.reloc_cache, eb.i915);
2457 
2458 	eb.buffer_count = args->buffer_count;
2459 	eb.batch_start_offset = args->batch_start_offset;
2460 	eb.batch_len = args->batch_len;
2461 
2462 	eb.batch_flags = 0;
2463 	if (args->flags & I915_EXEC_SECURE) {
2464 		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
2465 		    return -EPERM;
2466 
2467 		eb.batch_flags |= I915_DISPATCH_SECURE;
2468 	}
2469 	if (args->flags & I915_EXEC_IS_PINNED)
2470 		eb.batch_flags |= I915_DISPATCH_PINNED;
2471 
2472 	if (args->flags & I915_EXEC_FENCE_IN) {
2473 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2474 		if (!in_fence)
2475 			return -EINVAL;
2476 	}
2477 
2478 	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
2479 		if (in_fence) {
2480 			err = -EINVAL;
2481 			goto err_in_fence;
2482 		}
2483 
2484 		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2485 		if (!exec_fence) {
2486 			err = -EINVAL;
2487 			goto err_in_fence;
2488 		}
2489 	}
2490 
2491 	if (args->flags & I915_EXEC_FENCE_OUT) {
2492 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
2493 		if (out_fence_fd < 0) {
2494 			err = out_fence_fd;
2495 			goto err_exec_fence;
2496 		}
2497 	}
2498 
2499 	err = eb_create(&eb);
2500 	if (err)
2501 		goto err_out_fence;
2502 
2503 	GEM_BUG_ON(!eb.lut_size);
2504 
2505 	err = eb_select_context(&eb);
2506 	if (unlikely(err))
2507 		goto err_destroy;
2508 
2509 	err = eb_pin_engine(&eb, file, args);
2510 	if (unlikely(err))
2511 		goto err_context;
2512 
2513 	err = i915_mutex_lock_interruptible(dev);
2514 	if (err)
2515 		goto err_engine;
2516 
2517 	err = eb_relocate(&eb);
2518 	if (err) {
2519 		/*
2520 		 * If the user expects the execobject.offset and
2521 		 * reloc.presumed_offset to be an exact match,
2522 		 * as for using NO_RELOC, then we cannot update
2523 		 * the execobject.offset until we have completed
2524 		 * relocation.
2525 		 */
2526 		args->flags &= ~__EXEC_HAS_RELOC;
2527 		goto err_vma;
2528 	}
2529 
2530 	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
2531 		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2532 		err = -EINVAL;
2533 		goto err_vma;
2534 	}
2535 	if (eb.batch_start_offset > eb.batch->size ||
2536 	    eb.batch_len > eb.batch->size - eb.batch_start_offset) {
2537 		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
2538 		err = -EINVAL;
2539 		goto err_vma;
2540 	}
2541 
2542 	if (eb_use_cmdparser(&eb)) {
2543 		struct i915_vma *vma;
2544 
2545 		vma = eb_parse(&eb, drm_is_current_master(file));
2546 		if (IS_ERR(vma)) {
2547 			err = PTR_ERR(vma);
2548 			goto err_vma;
2549 		}
2550 
2551 		if (vma) {
2552 			/*
2553 			 * Batch parsed and accepted:
2554 			 *
2555 			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
2556 			 * bit from MI_BATCH_BUFFER_START commands issued in
2557 			 * the dispatch_execbuffer implementations. We
2558 			 * specifically don't want that set on batches the
2559 			 * command parser has accepted.
2560 			 */
2561 			eb.batch_flags |= I915_DISPATCH_SECURE;
2562 			eb.batch_start_offset = 0;
2563 			eb.batch = vma;
2564 		}
2565 	}
2566 
2567 	if (eb.batch_len == 0)
2568 		eb.batch_len = eb.batch->size - eb.batch_start_offset;
2569 
2570 	/*
2571 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2572 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
2573 	 * hsw should have this fixed, but bdw mucks it up again. */
2574 	if (eb.batch_flags & I915_DISPATCH_SECURE) {
2575 		struct i915_vma *vma;
2576 
2577 		/*
2578 		 * So on first glance it looks freaky that we pin the batch here
2579 		 * outside of the reservation loop. But:
2580 		 * - The batch is already pinned into the relevant ppgtt, so we
2581 		 *   already have the backing storage fully allocated.
2582 		 * - No other BO uses the global gtt (well contexts, but meh),
2583 		 *   so we don't really have issues with multiple objects not
2584 		 *   fitting due to fragmentation.
2585 		 * So this is actually safe.
2586 		 */
2587 		vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
2588 		if (IS_ERR(vma)) {
2589 			err = PTR_ERR(vma);
2590 			goto err_vma;
2591 		}
2592 
2593 		eb.batch = vma;
2594 	}
2595 
2596 	/* All GPU relocation batches must be submitted prior to the user rq */
2597 	GEM_BUG_ON(eb.reloc_cache.rq);
2598 
2599 	/* Allocate a request for this batch buffer nice and early. */
2600 	eb.request = i915_request_create(eb.context);
2601 	if (IS_ERR(eb.request)) {
2602 		err = PTR_ERR(eb.request);
2603 		goto err_batch_unpin;
2604 	}
2605 
2606 	if (in_fence) {
2607 		err = i915_request_await_dma_fence(eb.request, in_fence);
2608 		if (err < 0)
2609 			goto err_request;
2610 	}
2611 
2612 	if (exec_fence) {
2613 		err = i915_request_await_execution(eb.request, exec_fence,
2614 						   eb.engine->bond_execute);
2615 		if (err < 0)
2616 			goto err_request;
2617 	}
2618 
2619 	if (fences) {
2620 		err = await_fence_array(&eb, fences);
2621 		if (err)
2622 			goto err_request;
2623 	}
2624 
2625 	if (out_fence_fd != -1) {
2626 		out_fence = sync_file_create(&eb.request->fence);
2627 		if (!out_fence) {
2628 			err = -ENOMEM;
2629 			goto err_request;
2630 		}
2631 	}
2632 
2633 	/*
2634 	 * Whilst this request exists, batch_obj will be on the
2635 	 * active_list, and so will hold the active reference. Only when this
2636 	 * request is retired will the the batch_obj be moved onto the
2637 	 * inactive_list and lose its active reference. Hence we do not need
2638 	 * to explicitly hold another reference here.
2639 	 */
2640 	eb.request->batch = eb.batch;
2641 	if (eb.batch->private)
2642 		intel_engine_pool_mark_active(eb.batch->private, eb.request);
2643 
2644 	trace_i915_request_queue(eb.request, eb.batch_flags);
2645 	err = eb_submit(&eb);
2646 err_request:
2647 	add_to_client(eb.request, file);
2648 	i915_request_add(eb.request);
2649 
2650 	if (fences)
2651 		signal_fence_array(&eb, fences);
2652 
2653 	if (out_fence) {
2654 		if (err == 0) {
2655 			fd_install(out_fence_fd, out_fence->file);
2656 			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
2657 			args->rsvd2 |= (u64)out_fence_fd << 32;
2658 			out_fence_fd = -1;
2659 		} else {
2660 			fput(out_fence->file);
2661 		}
2662 	}
2663 
2664 err_batch_unpin:
2665 	if (eb.batch_flags & I915_DISPATCH_SECURE)
2666 		i915_vma_unpin(eb.batch);
2667 	if (eb.batch->private)
2668 		intel_engine_pool_put(eb.batch->private);
2669 err_vma:
2670 	if (eb.exec)
2671 		eb_release_vmas(&eb);
2672 	mutex_unlock(&dev->struct_mutex);
2673 err_engine:
2674 	eb_unpin_engine(&eb);
2675 err_context:
2676 	i915_gem_context_put(eb.gem_context);
2677 err_destroy:
2678 	eb_destroy(&eb);
2679 err_out_fence:
2680 	if (out_fence_fd != -1)
2681 		put_unused_fd(out_fence_fd);
2682 err_exec_fence:
2683 	dma_fence_put(exec_fence);
2684 err_in_fence:
2685 	dma_fence_put(in_fence);
2686 	return err;
2687 }
2688 
2689 static size_t eb_element_size(void)
2690 {
2691 	return (sizeof(struct drm_i915_gem_exec_object2) +
2692 		sizeof(struct i915_vma *) +
2693 		sizeof(unsigned int));
2694 }
2695 
2696 static bool check_buffer_count(size_t count)
2697 {
2698 	const size_t sz = eb_element_size();
2699 
2700 	/*
2701 	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
2702 	 * array size (see eb_create()). Otherwise, we can accept an array as
2703 	 * large as can be addressed (though use large arrays at your peril)!
2704 	 */
2705 
2706 	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
2707 }
2708 
2709 /*
2710  * Legacy execbuffer just creates an exec2 list from the original exec object
2711  * list array and passes it to the real function.
2712  */
2713 int
2714 i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
2715 			  struct drm_file *file)
2716 {
2717 	struct drm_i915_gem_execbuffer *args = data;
2718 	struct drm_i915_gem_execbuffer2 exec2;
2719 	struct drm_i915_gem_exec_object *exec_list = NULL;
2720 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
2721 	const size_t count = args->buffer_count;
2722 	unsigned int i;
2723 	int err;
2724 
2725 	if (!check_buffer_count(count)) {
2726 		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2727 		return -EINVAL;
2728 	}
2729 
2730 	exec2.buffers_ptr = args->buffers_ptr;
2731 	exec2.buffer_count = args->buffer_count;
2732 	exec2.batch_start_offset = args->batch_start_offset;
2733 	exec2.batch_len = args->batch_len;
2734 	exec2.DR1 = args->DR1;
2735 	exec2.DR4 = args->DR4;
2736 	exec2.num_cliprects = args->num_cliprects;
2737 	exec2.cliprects_ptr = args->cliprects_ptr;
2738 	exec2.flags = I915_EXEC_RENDER;
2739 	i915_execbuffer2_set_context_id(exec2, 0);
2740 
2741 	if (!i915_gem_check_execbuffer(&exec2))
2742 		return -EINVAL;
2743 
2744 	/* Copy in the exec list from userland */
2745 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
2746 				   __GFP_NOWARN | GFP_KERNEL);
2747 	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2748 				    __GFP_NOWARN | GFP_KERNEL);
2749 	if (exec_list == NULL || exec2_list == NULL) {
2750 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2751 			  args->buffer_count);
2752 		kvfree(exec_list);
2753 		kvfree(exec2_list);
2754 		return -ENOMEM;
2755 	}
2756 	err = copy_from_user(exec_list,
2757 			     u64_to_user_ptr(args->buffers_ptr),
2758 			     sizeof(*exec_list) * count);
2759 	if (err) {
2760 		DRM_DEBUG("copy %d exec entries failed %d\n",
2761 			  args->buffer_count, err);
2762 		kvfree(exec_list);
2763 		kvfree(exec2_list);
2764 		return -EFAULT;
2765 	}
2766 
2767 	for (i = 0; i < args->buffer_count; i++) {
2768 		exec2_list[i].handle = exec_list[i].handle;
2769 		exec2_list[i].relocation_count = exec_list[i].relocation_count;
2770 		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
2771 		exec2_list[i].alignment = exec_list[i].alignment;
2772 		exec2_list[i].offset = exec_list[i].offset;
2773 		if (INTEL_GEN(to_i915(dev)) < 4)
2774 			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
2775 		else
2776 			exec2_list[i].flags = 0;
2777 	}
2778 
2779 	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
2780 	if (exec2.flags & __EXEC_HAS_RELOC) {
2781 		struct drm_i915_gem_exec_object __user *user_exec_list =
2782 			u64_to_user_ptr(args->buffers_ptr);
2783 
2784 		/* Copy the new buffer offsets back to the user's exec list. */
2785 		for (i = 0; i < args->buffer_count; i++) {
2786 			if (!(exec2_list[i].offset & UPDATE))
2787 				continue;
2788 
2789 			exec2_list[i].offset =
2790 				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2791 			exec2_list[i].offset &= PIN_OFFSET_MASK;
2792 			if (__copy_to_user(&user_exec_list[i].offset,
2793 					   &exec2_list[i].offset,
2794 					   sizeof(user_exec_list[i].offset)))
2795 				break;
2796 		}
2797 	}
2798 
2799 	kvfree(exec_list);
2800 	kvfree(exec2_list);
2801 	return err;
2802 }
2803 
2804 int
2805 i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
2806 			   struct drm_file *file)
2807 {
2808 	struct drm_i915_gem_execbuffer2 *args = data;
2809 	struct drm_i915_gem_exec_object2 *exec2_list;
2810 	struct drm_syncobj **fences = NULL;
2811 	const size_t count = args->buffer_count;
2812 	int err;
2813 
2814 	if (!check_buffer_count(count)) {
2815 		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2816 		return -EINVAL;
2817 	}
2818 
2819 	if (!i915_gem_check_execbuffer(args))
2820 		return -EINVAL;
2821 
2822 	/* Allocate an extra slot for use by the command parser */
2823 	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2824 				    __GFP_NOWARN | GFP_KERNEL);
2825 	if (exec2_list == NULL) {
2826 		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
2827 			  count);
2828 		return -ENOMEM;
2829 	}
2830 	if (copy_from_user(exec2_list,
2831 			   u64_to_user_ptr(args->buffers_ptr),
2832 			   sizeof(*exec2_list) * count)) {
2833 		DRM_DEBUG("copy %zd exec entries failed\n", count);
2834 		kvfree(exec2_list);
2835 		return -EFAULT;
2836 	}
2837 
2838 	if (args->flags & I915_EXEC_FENCE_ARRAY) {
2839 		fences = get_fence_array(args, file);
2840 		if (IS_ERR(fences)) {
2841 			kvfree(exec2_list);
2842 			return PTR_ERR(fences);
2843 		}
2844 	}
2845 
2846 	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
2847 
2848 	/*
2849 	 * Now that we have begun execution of the batchbuffer, we ignore
2850 	 * any new error after this point. Also given that we have already
2851 	 * updated the associated relocations, we try to write out the current
2852 	 * object locations irrespective of any error.
2853 	 */
2854 	if (args->flags & __EXEC_HAS_RELOC) {
2855 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
2856 			u64_to_user_ptr(args->buffers_ptr);
2857 		unsigned int i;
2858 
2859 		/* Copy the new buffer offsets back to the user's exec list. */
2860 		/*
2861 		 * Note: count * sizeof(*user_exec_list) does not overflow,
2862 		 * because we checked 'count' in check_buffer_count().
2863 		 *
2864 		 * And this range already got effectively checked earlier
2865 		 * when we did the "copy_from_user()" above.
2866 		 */
2867 		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
2868 			goto end;
2869 
2870 		for (i = 0; i < args->buffer_count; i++) {
2871 			if (!(exec2_list[i].offset & UPDATE))
2872 				continue;
2873 
2874 			exec2_list[i].offset =
2875 				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2876 			unsafe_put_user(exec2_list[i].offset,
2877 					&user_exec_list[i].offset,
2878 					end_user);
2879 		}
2880 end_user:
2881 		user_access_end();
2882 end:;
2883 	}
2884 
2885 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2886 	put_fence_array(args, fences);
2887 	kvfree(exec2_list);
2888 	return err;
2889 }
2890