1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2012-2014 Intel Corporation
5  */
6 
7 #include <linux/mmu_context.h>
8 #include <linux/mmu_notifier.h>
9 #include <linux/mempolicy.h>
10 #include <linux/swap.h>
11 #include <linux/sched/mm.h>
12 
13 #include <drm/i915_drm.h>
14 
15 #include "i915_gem_ioctls.h"
16 #include "i915_gem_object.h"
17 #include "i915_scatterlist.h"
18 #include "i915_trace.h"
19 #include "intel_drv.h"
20 
21 struct i915_mm_struct {
22 	struct mm_struct *mm;
23 	struct drm_i915_private *i915;
24 	struct i915_mmu_notifier *mn;
25 	struct hlist_node node;
26 	struct kref kref;
27 	struct work_struct work;
28 };
29 
30 #if defined(CONFIG_MMU_NOTIFIER)
31 #include <linux/interval_tree.h>
32 
33 struct i915_mmu_notifier {
34 	spinlock_t lock;
35 	struct hlist_node node;
36 	struct mmu_notifier mn;
37 	struct rb_root_cached objects;
38 	struct i915_mm_struct *mm;
39 };
40 
41 struct i915_mmu_object {
42 	struct i915_mmu_notifier *mn;
43 	struct drm_i915_gem_object *obj;
44 	struct interval_tree_node it;
45 };
46 
47 static void add_object(struct i915_mmu_object *mo)
48 {
49 	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
50 	interval_tree_insert(&mo->it, &mo->mn->objects);
51 }
52 
53 static void del_object(struct i915_mmu_object *mo)
54 {
55 	if (RB_EMPTY_NODE(&mo->it.rb))
56 		return;
57 
58 	interval_tree_remove(&mo->it, &mo->mn->objects);
59 	RB_CLEAR_NODE(&mo->it.rb);
60 }
61 
62 static void
63 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
64 {
65 	struct i915_mmu_object *mo = obj->userptr.mmu_object;
66 
67 	/*
68 	 * During mm_invalidate_range we need to cancel any userptr that
69 	 * overlaps the range being invalidated. Doing so requires the
70 	 * struct_mutex, and that risks recursion. In order to cause
71 	 * recursion, the user must alias the userptr address space with
72 	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
73 	 * to invalidate that mmaping, mm_invalidate_range is called with
74 	 * the userptr address *and* the struct_mutex held.  To prevent that
75 	 * we set a flag under the i915_mmu_notifier spinlock to indicate
76 	 * whether this object is valid.
77 	 */
78 	if (!mo)
79 		return;
80 
81 	spin_lock(&mo->mn->lock);
82 	if (value)
83 		add_object(mo);
84 	else
85 		del_object(mo);
86 	spin_unlock(&mo->mn->lock);
87 }
88 
89 static int
90 userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
91 				  const struct mmu_notifier_range *range)
92 {
93 	struct i915_mmu_notifier *mn =
94 		container_of(_mn, struct i915_mmu_notifier, mn);
95 	struct interval_tree_node *it;
96 	struct mutex *unlock = NULL;
97 	unsigned long end;
98 	int ret = 0;
99 
100 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
101 		return 0;
102 
103 	/* interval ranges are inclusive, but invalidate range is exclusive */
104 	end = range->end - 1;
105 
106 	spin_lock(&mn->lock);
107 	it = interval_tree_iter_first(&mn->objects, range->start, end);
108 	while (it) {
109 		struct drm_i915_gem_object *obj;
110 
111 		if (!mmu_notifier_range_blockable(range)) {
112 			ret = -EAGAIN;
113 			break;
114 		}
115 
116 		/*
117 		 * The mmu_object is released late when destroying the
118 		 * GEM object so it is entirely possible to gain a
119 		 * reference on an object in the process of being freed
120 		 * since our serialisation is via the spinlock and not
121 		 * the struct_mutex - and consequently use it after it
122 		 * is freed and then double free it. To prevent that
123 		 * use-after-free we only acquire a reference on the
124 		 * object if it is not in the process of being destroyed.
125 		 */
126 		obj = container_of(it, struct i915_mmu_object, it)->obj;
127 		if (!kref_get_unless_zero(&obj->base.refcount)) {
128 			it = interval_tree_iter_next(it, range->start, end);
129 			continue;
130 		}
131 		spin_unlock(&mn->lock);
132 
133 		if (!unlock) {
134 			unlock = &mn->mm->i915->drm.struct_mutex;
135 
136 			switch (mutex_trylock_recursive(unlock)) {
137 			default:
138 			case MUTEX_TRYLOCK_FAILED:
139 				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
140 					i915_gem_object_put(obj);
141 					return -EINTR;
142 				}
143 				/* fall through */
144 			case MUTEX_TRYLOCK_SUCCESS:
145 				break;
146 
147 			case MUTEX_TRYLOCK_RECURSIVE:
148 				unlock = ERR_PTR(-EEXIST);
149 				break;
150 			}
151 		}
152 
153 		ret = i915_gem_object_unbind(obj,
154 					     I915_GEM_OBJECT_UNBIND_ACTIVE);
155 		if (ret == 0)
156 			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
157 		i915_gem_object_put(obj);
158 		if (ret)
159 			goto unlock;
160 
161 		spin_lock(&mn->lock);
162 
163 		/*
164 		 * As we do not (yet) protect the mmu from concurrent insertion
165 		 * over this range, there is no guarantee that this search will
166 		 * terminate given a pathologic workload.
167 		 */
168 		it = interval_tree_iter_first(&mn->objects, range->start, end);
169 	}
170 	spin_unlock(&mn->lock);
171 
172 unlock:
173 	if (!IS_ERR_OR_NULL(unlock))
174 		mutex_unlock(unlock);
175 
176 	return ret;
177 
178 }
179 
180 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
181 	.invalidate_range_start = userptr_mn_invalidate_range_start,
182 };
183 
184 static struct i915_mmu_notifier *
185 i915_mmu_notifier_create(struct i915_mm_struct *mm)
186 {
187 	struct i915_mmu_notifier *mn;
188 
189 	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
190 	if (mn == NULL)
191 		return ERR_PTR(-ENOMEM);
192 
193 	spin_lock_init(&mn->lock);
194 	mn->mn.ops = &i915_gem_userptr_notifier;
195 	mn->objects = RB_ROOT_CACHED;
196 	mn->mm = mm;
197 
198 	return mn;
199 }
200 
201 static void
202 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
203 {
204 	struct i915_mmu_object *mo;
205 
206 	mo = fetch_and_zero(&obj->userptr.mmu_object);
207 	if (!mo)
208 		return;
209 
210 	spin_lock(&mo->mn->lock);
211 	del_object(mo);
212 	spin_unlock(&mo->mn->lock);
213 	kfree(mo);
214 }
215 
216 static struct i915_mmu_notifier *
217 i915_mmu_notifier_find(struct i915_mm_struct *mm)
218 {
219 	struct i915_mmu_notifier *mn;
220 	int err = 0;
221 
222 	mn = mm->mn;
223 	if (mn)
224 		return mn;
225 
226 	mn = i915_mmu_notifier_create(mm);
227 	if (IS_ERR(mn))
228 		err = PTR_ERR(mn);
229 
230 	down_write(&mm->mm->mmap_sem);
231 	mutex_lock(&mm->i915->mm_lock);
232 	if (mm->mn == NULL && !err) {
233 		/* Protected by mmap_sem (write-lock) */
234 		err = __mmu_notifier_register(&mn->mn, mm->mm);
235 		if (!err) {
236 			/* Protected by mm_lock */
237 			mm->mn = fetch_and_zero(&mn);
238 		}
239 	} else if (mm->mn) {
240 		/*
241 		 * Someone else raced and successfully installed the mmu
242 		 * notifier, we can cancel our own errors.
243 		 */
244 		err = 0;
245 	}
246 	mutex_unlock(&mm->i915->mm_lock);
247 	up_write(&mm->mm->mmap_sem);
248 
249 	if (mn && !IS_ERR(mn))
250 		kfree(mn);
251 
252 	return err ? ERR_PTR(err) : mm->mn;
253 }
254 
255 static int
256 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
257 				    unsigned flags)
258 {
259 	struct i915_mmu_notifier *mn;
260 	struct i915_mmu_object *mo;
261 
262 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
263 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
264 
265 	if (WARN_ON(obj->userptr.mm == NULL))
266 		return -EINVAL;
267 
268 	mn = i915_mmu_notifier_find(obj->userptr.mm);
269 	if (IS_ERR(mn))
270 		return PTR_ERR(mn);
271 
272 	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
273 	if (!mo)
274 		return -ENOMEM;
275 
276 	mo->mn = mn;
277 	mo->obj = obj;
278 	mo->it.start = obj->userptr.ptr;
279 	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
280 	RB_CLEAR_NODE(&mo->it.rb);
281 
282 	obj->userptr.mmu_object = mo;
283 	return 0;
284 }
285 
286 static void
287 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
288 		       struct mm_struct *mm)
289 {
290 	if (mn == NULL)
291 		return;
292 
293 	mmu_notifier_unregister(&mn->mn, mm);
294 	kfree(mn);
295 }
296 
297 #else
298 
299 static void
300 __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
301 {
302 }
303 
304 static void
305 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
306 {
307 }
308 
309 static int
310 i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
311 				    unsigned flags)
312 {
313 	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
314 		return -ENODEV;
315 
316 	if (!capable(CAP_SYS_ADMIN))
317 		return -EPERM;
318 
319 	return 0;
320 }
321 
322 static void
323 i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
324 		       struct mm_struct *mm)
325 {
326 }
327 
328 #endif
329 
330 static struct i915_mm_struct *
331 __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
332 {
333 	struct i915_mm_struct *mm;
334 
335 	/* Protected by dev_priv->mm_lock */
336 	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
337 		if (mm->mm == real)
338 			return mm;
339 
340 	return NULL;
341 }
342 
343 static int
344 i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
345 {
346 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
347 	struct i915_mm_struct *mm;
348 	int ret = 0;
349 
350 	/* During release of the GEM object we hold the struct_mutex. This
351 	 * precludes us from calling mmput() at that time as that may be
352 	 * the last reference and so call exit_mmap(). exit_mmap() will
353 	 * attempt to reap the vma, and if we were holding a GTT mmap
354 	 * would then call drm_gem_vm_close() and attempt to reacquire
355 	 * the struct mutex. So in order to avoid that recursion, we have
356 	 * to defer releasing the mm reference until after we drop the
357 	 * struct_mutex, i.e. we need to schedule a worker to do the clean
358 	 * up.
359 	 */
360 	mutex_lock(&dev_priv->mm_lock);
361 	mm = __i915_mm_struct_find(dev_priv, current->mm);
362 	if (mm == NULL) {
363 		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
364 		if (mm == NULL) {
365 			ret = -ENOMEM;
366 			goto out;
367 		}
368 
369 		kref_init(&mm->kref);
370 		mm->i915 = to_i915(obj->base.dev);
371 
372 		mm->mm = current->mm;
373 		mmgrab(current->mm);
374 
375 		mm->mn = NULL;
376 
377 		/* Protected by dev_priv->mm_lock */
378 		hash_add(dev_priv->mm_structs,
379 			 &mm->node, (unsigned long)mm->mm);
380 	} else
381 		kref_get(&mm->kref);
382 
383 	obj->userptr.mm = mm;
384 out:
385 	mutex_unlock(&dev_priv->mm_lock);
386 	return ret;
387 }
388 
389 static void
390 __i915_mm_struct_free__worker(struct work_struct *work)
391 {
392 	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
393 	i915_mmu_notifier_free(mm->mn, mm->mm);
394 	mmdrop(mm->mm);
395 	kfree(mm);
396 }
397 
398 static void
399 __i915_mm_struct_free(struct kref *kref)
400 {
401 	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
402 
403 	/* Protected by dev_priv->mm_lock */
404 	hash_del(&mm->node);
405 	mutex_unlock(&mm->i915->mm_lock);
406 
407 	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
408 	queue_work(mm->i915->mm.userptr_wq, &mm->work);
409 }
410 
411 static void
412 i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
413 {
414 	if (obj->userptr.mm == NULL)
415 		return;
416 
417 	kref_put_mutex(&obj->userptr.mm->kref,
418 		       __i915_mm_struct_free,
419 		       &to_i915(obj->base.dev)->mm_lock);
420 	obj->userptr.mm = NULL;
421 }
422 
423 struct get_pages_work {
424 	struct work_struct work;
425 	struct drm_i915_gem_object *obj;
426 	struct task_struct *task;
427 };
428 
429 static struct sg_table *
430 __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
431 			       struct page **pvec, int num_pages)
432 {
433 	unsigned int max_segment = i915_sg_segment_size();
434 	struct sg_table *st;
435 	unsigned int sg_page_sizes;
436 	int ret;
437 
438 	st = kmalloc(sizeof(*st), GFP_KERNEL);
439 	if (!st)
440 		return ERR_PTR(-ENOMEM);
441 
442 alloc_table:
443 	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
444 					  0, num_pages << PAGE_SHIFT,
445 					  max_segment,
446 					  GFP_KERNEL);
447 	if (ret) {
448 		kfree(st);
449 		return ERR_PTR(ret);
450 	}
451 
452 	ret = i915_gem_gtt_prepare_pages(obj, st);
453 	if (ret) {
454 		sg_free_table(st);
455 
456 		if (max_segment > PAGE_SIZE) {
457 			max_segment = PAGE_SIZE;
458 			goto alloc_table;
459 		}
460 
461 		kfree(st);
462 		return ERR_PTR(ret);
463 	}
464 
465 	sg_page_sizes = i915_sg_page_sizes(st->sgl);
466 
467 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
468 
469 	return st;
470 }
471 
472 static void
473 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
474 {
475 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
476 	struct drm_i915_gem_object *obj = work->obj;
477 	const int npages = obj->base.size >> PAGE_SHIFT;
478 	struct page **pvec;
479 	int pinned, ret;
480 
481 	ret = -ENOMEM;
482 	pinned = 0;
483 
484 	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
485 	if (pvec != NULL) {
486 		struct mm_struct *mm = obj->userptr.mm->mm;
487 		unsigned int flags = 0;
488 
489 		if (!i915_gem_object_is_readonly(obj))
490 			flags |= FOLL_WRITE;
491 
492 		ret = -EFAULT;
493 		if (mmget_not_zero(mm)) {
494 			down_read(&mm->mmap_sem);
495 			while (pinned < npages) {
496 				ret = get_user_pages_remote
497 					(work->task, mm,
498 					 obj->userptr.ptr + pinned * PAGE_SIZE,
499 					 npages - pinned,
500 					 flags,
501 					 pvec + pinned, NULL, NULL);
502 				if (ret < 0)
503 					break;
504 
505 				pinned += ret;
506 			}
507 			up_read(&mm->mmap_sem);
508 			mmput(mm);
509 		}
510 	}
511 
512 	mutex_lock(&obj->mm.lock);
513 	if (obj->userptr.work == &work->work) {
514 		struct sg_table *pages = ERR_PTR(ret);
515 
516 		if (pinned == npages) {
517 			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
518 							       npages);
519 			if (!IS_ERR(pages)) {
520 				pinned = 0;
521 				pages = NULL;
522 			}
523 		}
524 
525 		obj->userptr.work = ERR_CAST(pages);
526 		if (IS_ERR(pages))
527 			__i915_gem_userptr_set_active(obj, false);
528 	}
529 	mutex_unlock(&obj->mm.lock);
530 
531 	release_pages(pvec, pinned);
532 	kvfree(pvec);
533 
534 	i915_gem_object_put(obj);
535 	put_task_struct(work->task);
536 	kfree(work);
537 }
538 
539 static struct sg_table *
540 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
541 {
542 	struct get_pages_work *work;
543 
544 	/* Spawn a worker so that we can acquire the
545 	 * user pages without holding our mutex. Access
546 	 * to the user pages requires mmap_sem, and we have
547 	 * a strict lock ordering of mmap_sem, struct_mutex -
548 	 * we already hold struct_mutex here and so cannot
549 	 * call gup without encountering a lock inversion.
550 	 *
551 	 * Userspace will keep on repeating the operation
552 	 * (thanks to EAGAIN) until either we hit the fast
553 	 * path or the worker completes. If the worker is
554 	 * cancelled or superseded, the task is still run
555 	 * but the results ignored. (This leads to
556 	 * complications that we may have a stray object
557 	 * refcount that we need to be wary of when
558 	 * checking for existing objects during creation.)
559 	 * If the worker encounters an error, it reports
560 	 * that error back to this function through
561 	 * obj->userptr.work = ERR_PTR.
562 	 */
563 	work = kmalloc(sizeof(*work), GFP_KERNEL);
564 	if (work == NULL)
565 		return ERR_PTR(-ENOMEM);
566 
567 	obj->userptr.work = &work->work;
568 
569 	work->obj = i915_gem_object_get(obj);
570 
571 	work->task = current;
572 	get_task_struct(work->task);
573 
574 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
575 	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
576 
577 	return ERR_PTR(-EAGAIN);
578 }
579 
580 static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
581 {
582 	const int num_pages = obj->base.size >> PAGE_SHIFT;
583 	struct mm_struct *mm = obj->userptr.mm->mm;
584 	struct page **pvec;
585 	struct sg_table *pages;
586 	bool active;
587 	int pinned;
588 
589 	/* If userspace should engineer that these pages are replaced in
590 	 * the vma between us binding this page into the GTT and completion
591 	 * of rendering... Their loss. If they change the mapping of their
592 	 * pages they need to create a new bo to point to the new vma.
593 	 *
594 	 * However, that still leaves open the possibility of the vma
595 	 * being copied upon fork. Which falls under the same userspace
596 	 * synchronisation issue as a regular bo, except that this time
597 	 * the process may not be expecting that a particular piece of
598 	 * memory is tied to the GPU.
599 	 *
600 	 * Fortunately, we can hook into the mmu_notifier in order to
601 	 * discard the page references prior to anything nasty happening
602 	 * to the vma (discard or cloning) which should prevent the more
603 	 * egregious cases from causing harm.
604 	 */
605 
606 	if (obj->userptr.work) {
607 		/* active flag should still be held for the pending work */
608 		if (IS_ERR(obj->userptr.work))
609 			return PTR_ERR(obj->userptr.work);
610 		else
611 			return -EAGAIN;
612 	}
613 
614 	pvec = NULL;
615 	pinned = 0;
616 
617 	if (mm == current->mm) {
618 		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
619 				      GFP_KERNEL |
620 				      __GFP_NORETRY |
621 				      __GFP_NOWARN);
622 		if (pvec) /* defer to worker if malloc fails */
623 			pinned = __get_user_pages_fast(obj->userptr.ptr,
624 						       num_pages,
625 						       !i915_gem_object_is_readonly(obj),
626 						       pvec);
627 	}
628 
629 	active = false;
630 	if (pinned < 0) {
631 		pages = ERR_PTR(pinned);
632 		pinned = 0;
633 	} else if (pinned < num_pages) {
634 		pages = __i915_gem_userptr_get_pages_schedule(obj);
635 		active = pages == ERR_PTR(-EAGAIN);
636 	} else {
637 		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
638 		active = !IS_ERR(pages);
639 	}
640 	if (active)
641 		__i915_gem_userptr_set_active(obj, true);
642 
643 	if (IS_ERR(pages))
644 		release_pages(pvec, pinned);
645 	kvfree(pvec);
646 
647 	return PTR_ERR_OR_ZERO(pages);
648 }
649 
650 static void
651 i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
652 			   struct sg_table *pages)
653 {
654 	struct sgt_iter sgt_iter;
655 	struct page *page;
656 
657 	/* Cancel any inflight work and force them to restart their gup */
658 	obj->userptr.work = NULL;
659 	__i915_gem_userptr_set_active(obj, false);
660 	if (!pages)
661 		return;
662 
663 	__i915_gem_object_release_shmem(obj, pages, true);
664 	i915_gem_gtt_finish_pages(obj, pages);
665 
666 	/*
667 	 * We always mark objects as dirty when they are used by the GPU,
668 	 * just in case. However, if we set the vma as being read-only we know
669 	 * that the object will never have been written to.
670 	 */
671 	if (i915_gem_object_is_readonly(obj))
672 		obj->mm.dirty = false;
673 
674 	for_each_sgt_page(page, sgt_iter, pages) {
675 		if (obj->mm.dirty)
676 			/*
677 			 * As this may not be anonymous memory (e.g. shmem)
678 			 * but exist on a real mapping, we have to lock
679 			 * the page in order to dirty it -- holding
680 			 * the page reference is not sufficient to
681 			 * prevent the inode from being truncated.
682 			 * Play safe and take the lock.
683 			 */
684 			set_page_dirty_lock(page);
685 
686 		mark_page_accessed(page);
687 		put_page(page);
688 	}
689 	obj->mm.dirty = false;
690 
691 	sg_free_table(pages);
692 	kfree(pages);
693 }
694 
695 static void
696 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
697 {
698 	i915_gem_userptr_release__mmu_notifier(obj);
699 	i915_gem_userptr_release__mm_struct(obj);
700 }
701 
702 static int
703 i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
704 {
705 	if (obj->userptr.mmu_object)
706 		return 0;
707 
708 	return i915_gem_userptr_init__mmu_notifier(obj, 0);
709 }
710 
711 static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
712 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
713 		 I915_GEM_OBJECT_IS_SHRINKABLE |
714 		 I915_GEM_OBJECT_ASYNC_CANCEL,
715 	.get_pages = i915_gem_userptr_get_pages,
716 	.put_pages = i915_gem_userptr_put_pages,
717 	.dmabuf_export = i915_gem_userptr_dmabuf_export,
718 	.release = i915_gem_userptr_release,
719 };
720 
721 /*
722  * Creates a new mm object that wraps some normal memory from the process
723  * context - user memory.
724  *
725  * We impose several restrictions upon the memory being mapped
726  * into the GPU.
727  * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
728  * 2. It must be normal system memory, not a pointer into another map of IO
729  *    space (e.g. it must not be a GTT mmapping of another object).
730  * 3. We only allow a bo as large as we could in theory map into the GTT,
731  *    that is we limit the size to the total size of the GTT.
732  * 4. The bo is marked as being snoopable. The backing pages are left
733  *    accessible directly by the CPU, but reads and writes by the GPU may
734  *    incur the cost of a snoop (unless you have an LLC architecture).
735  *
736  * Synchronisation between multiple users and the GPU is left to userspace
737  * through the normal set-domain-ioctl. The kernel will enforce that the
738  * GPU relinquishes the VMA before it is returned back to the system
739  * i.e. upon free(), munmap() or process termination. However, the userspace
740  * malloc() library may not immediately relinquish the VMA after free() and
741  * instead reuse it whilst the GPU is still reading and writing to the VMA.
742  * Caveat emptor.
743  *
744  * Also note, that the object created here is not currently a "first class"
745  * object, in that several ioctls are banned. These are the CPU access
746  * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
747  * direct access via your pointer rather than use those ioctls. Another
748  * restriction is that we do not allow userptr surfaces to be pinned to the
749  * hardware and so we reject any attempt to create a framebuffer out of a
750  * userptr.
751  *
752  * If you think this is a good interface to use to pass GPU memory between
753  * drivers, please use dma-buf instead. In fact, wherever possible use
754  * dma-buf instead.
755  */
756 int
757 i915_gem_userptr_ioctl(struct drm_device *dev,
758 		       void *data,
759 		       struct drm_file *file)
760 {
761 	struct drm_i915_private *dev_priv = to_i915(dev);
762 	struct drm_i915_gem_userptr *args = data;
763 	struct drm_i915_gem_object *obj;
764 	int ret;
765 	u32 handle;
766 
767 	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
768 		/* We cannot support coherent userptr objects on hw without
769 		 * LLC and broken snooping.
770 		 */
771 		return -ENODEV;
772 	}
773 
774 	if (args->flags & ~(I915_USERPTR_READ_ONLY |
775 			    I915_USERPTR_UNSYNCHRONIZED))
776 		return -EINVAL;
777 
778 	if (!args->user_size)
779 		return -EINVAL;
780 
781 	if (offset_in_page(args->user_ptr | args->user_size))
782 		return -EINVAL;
783 
784 	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
785 		return -EFAULT;
786 
787 	if (args->flags & I915_USERPTR_READ_ONLY) {
788 		struct i915_address_space *vm;
789 
790 		/*
791 		 * On almost all of the older hw, we cannot tell the GPU that
792 		 * a page is readonly.
793 		 */
794 		vm = dev_priv->kernel_context->vm;
795 		if (!vm || !vm->has_read_only)
796 			return -ENODEV;
797 	}
798 
799 	obj = i915_gem_object_alloc();
800 	if (obj == NULL)
801 		return -ENOMEM;
802 
803 	drm_gem_private_object_init(dev, &obj->base, args->user_size);
804 	i915_gem_object_init(obj, &i915_gem_userptr_ops);
805 	obj->read_domains = I915_GEM_DOMAIN_CPU;
806 	obj->write_domain = I915_GEM_DOMAIN_CPU;
807 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
808 
809 	obj->userptr.ptr = args->user_ptr;
810 	if (args->flags & I915_USERPTR_READ_ONLY)
811 		i915_gem_object_set_readonly(obj);
812 
813 	/* And keep a pointer to the current->mm for resolving the user pages
814 	 * at binding. This means that we need to hook into the mmu_notifier
815 	 * in order to detect if the mmu is destroyed.
816 	 */
817 	ret = i915_gem_userptr_init__mm_struct(obj);
818 	if (ret == 0)
819 		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
820 	if (ret == 0)
821 		ret = drm_gem_handle_create(file, &obj->base, &handle);
822 
823 	/* drop reference from allocate - handle holds it now */
824 	i915_gem_object_put(obj);
825 	if (ret)
826 		return ret;
827 
828 	args->handle = handle;
829 	return 0;
830 }
831 
832 int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
833 {
834 	mutex_init(&dev_priv->mm_lock);
835 	hash_init(dev_priv->mm_structs);
836 
837 	dev_priv->mm.userptr_wq =
838 		alloc_workqueue("i915-userptr-acquire",
839 				WQ_HIGHPRI | WQ_UNBOUND,
840 				0);
841 	if (!dev_priv->mm.userptr_wq)
842 		return -ENOMEM;
843 
844 	return 0;
845 }
846 
847 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
848 {
849 	destroy_workqueue(dev_priv->mm.userptr_wq);
850 }
851