xref: /openbmc/linux/drivers/iommu/iommufd/device.c (revision d7955ce4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #include <linux/iommufd.h>
5 #include <linux/slab.h>
6 #include <linux/iommu.h>
7 
8 #include "io_pagetable.h"
9 #include "iommufd_private.h"
10 
11 static bool allow_unsafe_interrupts;
12 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
13 MODULE_PARM_DESC(
14 	allow_unsafe_interrupts,
15 	"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
16 	"the MSI interrupt window. Enabling this is a security weakness.");
17 
18 void iommufd_device_destroy(struct iommufd_object *obj)
19 {
20 	struct iommufd_device *idev =
21 		container_of(obj, struct iommufd_device, obj);
22 
23 	iommu_device_release_dma_owner(idev->dev);
24 	iommu_group_put(idev->group);
25 	if (!iommufd_selftest_is_mock_dev(idev->dev))
26 		iommufd_ctx_put(idev->ictx);
27 }
28 
29 /**
30  * iommufd_device_bind - Bind a physical device to an iommu fd
31  * @ictx: iommufd file descriptor
32  * @dev: Pointer to a physical device struct
33  * @id: Output ID number to return to userspace for this device
34  *
35  * A successful bind establishes an ownership over the device and returns
36  * struct iommufd_device pointer, otherwise returns error pointer.
37  *
38  * A driver using this API must set driver_managed_dma and must not touch
39  * the device until this routine succeeds and establishes ownership.
40  *
41  * Binding a PCI device places the entire RID under iommufd control.
42  *
43  * The caller must undo this with iommufd_device_unbind()
44  */
45 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
46 					   struct device *dev, u32 *id)
47 {
48 	struct iommufd_device *idev;
49 	struct iommu_group *group;
50 	int rc;
51 
52 	/*
53 	 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
54 	 * to restore cache coherency.
55 	 */
56 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
57 		return ERR_PTR(-EINVAL);
58 
59 	group = iommu_group_get(dev);
60 	if (!group)
61 		return ERR_PTR(-ENODEV);
62 
63 	rc = iommu_device_claim_dma_owner(dev, ictx);
64 	if (rc)
65 		goto out_group_put;
66 
67 	idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
68 	if (IS_ERR(idev)) {
69 		rc = PTR_ERR(idev);
70 		goto out_release_owner;
71 	}
72 	idev->ictx = ictx;
73 	if (!iommufd_selftest_is_mock_dev(dev))
74 		iommufd_ctx_get(ictx);
75 	idev->dev = dev;
76 	idev->enforce_cache_coherency =
77 		device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
78 	/* The calling driver is a user until iommufd_device_unbind() */
79 	refcount_inc(&idev->obj.users);
80 	/* group refcount moves into iommufd_device */
81 	idev->group = group;
82 
83 	/*
84 	 * If the caller fails after this success it must call
85 	 * iommufd_unbind_device() which is safe since we hold this refcount.
86 	 * This also means the device is a leaf in the graph and no other object
87 	 * can take a reference on it.
88 	 */
89 	iommufd_object_finalize(ictx, &idev->obj);
90 	*id = idev->obj.id;
91 	return idev;
92 
93 out_release_owner:
94 	iommu_device_release_dma_owner(dev);
95 out_group_put:
96 	iommu_group_put(group);
97 	return ERR_PTR(rc);
98 }
99 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
100 
101 /**
102  * iommufd_ctx_has_group - True if any device within the group is bound
103  *                         to the ictx
104  * @ictx: iommufd file descriptor
105  * @group: Pointer to a physical iommu_group struct
106  *
107  * True if any device within the group has been bound to this ictx, ex. via
108  * iommufd_device_bind(), therefore implying ictx ownership of the group.
109  */
110 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
111 {
112 	struct iommufd_object *obj;
113 	unsigned long index;
114 
115 	if (!ictx || !group)
116 		return false;
117 
118 	xa_lock(&ictx->objects);
119 	xa_for_each(&ictx->objects, index, obj) {
120 		if (obj->type == IOMMUFD_OBJ_DEVICE &&
121 		    container_of(obj, struct iommufd_device, obj)->group == group) {
122 			xa_unlock(&ictx->objects);
123 			return true;
124 		}
125 	}
126 	xa_unlock(&ictx->objects);
127 	return false;
128 }
129 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
130 
131 /**
132  * iommufd_device_unbind - Undo iommufd_device_bind()
133  * @idev: Device returned by iommufd_device_bind()
134  *
135  * Release the device from iommufd control. The DMA ownership will return back
136  * to unowned with DMA controlled by the DMA API. This invalidates the
137  * iommufd_device pointer, other APIs that consume it must not be called
138  * concurrently.
139  */
140 void iommufd_device_unbind(struct iommufd_device *idev)
141 {
142 	bool was_destroyed;
143 
144 	was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj);
145 	WARN_ON(!was_destroyed);
146 }
147 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
148 
149 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
150 {
151 	return idev->ictx;
152 }
153 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
154 
155 u32 iommufd_device_to_id(struct iommufd_device *idev)
156 {
157 	return idev->obj.id;
158 }
159 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
160 
161 static int iommufd_device_setup_msi(struct iommufd_device *idev,
162 				    struct iommufd_hw_pagetable *hwpt,
163 				    phys_addr_t sw_msi_start)
164 {
165 	int rc;
166 
167 	/*
168 	 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
169 	 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
170 	 * the MSI window so iommu_dma_prepare_msi() can install pages into our
171 	 * domain after request_irq(). If it is not done interrupts will not
172 	 * work on this domain.
173 	 *
174 	 * FIXME: This is conceptually broken for iommufd since we want to allow
175 	 * userspace to change the domains, eg switch from an identity IOAS to a
176 	 * DMA IOAS. There is currently no way to create a MSI window that
177 	 * matches what the IRQ layer actually expects in a newly created
178 	 * domain.
179 	 */
180 	if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) {
181 		rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
182 		if (rc)
183 			return rc;
184 
185 		/*
186 		 * iommu_get_msi_cookie() can only be called once per domain,
187 		 * it returns -EBUSY on later calls.
188 		 */
189 		hwpt->msi_cookie = true;
190 	}
191 
192 	/*
193 	 * For historical compat with VFIO the insecure interrupt path is
194 	 * allowed if the module parameter is set. Insecure means that a MemWr
195 	 * operation from the device (eg a simple DMA) cannot trigger an
196 	 * interrupt outside this iommufd context.
197 	 */
198 	if (!iommufd_selftest_is_mock_dev(idev->dev) &&
199 	    !iommu_group_has_isolated_msi(idev->group)) {
200 		if (!allow_unsafe_interrupts)
201 			return -EPERM;
202 
203 		dev_warn(
204 			idev->dev,
205 			"MSI interrupts are not secure, they cannot be isolated by the platform. "
206 			"Check that platform features like interrupt remapping are enabled. "
207 			"Use the \"allow_unsafe_interrupts\" module parameter to override\n");
208 	}
209 	return 0;
210 }
211 
212 static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
213 					   struct iommu_group *group)
214 {
215 	struct iommufd_device *cur_dev;
216 
217 	lockdep_assert_held(&hwpt->devices_lock);
218 
219 	list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
220 		if (cur_dev->group == group)
221 			return true;
222 	return false;
223 }
224 
225 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
226 				struct iommufd_device *idev)
227 {
228 	phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
229 	int rc;
230 
231 	lockdep_assert_held(&hwpt->devices_lock);
232 
233 	if (WARN_ON(idev->hwpt))
234 		return -EINVAL;
235 
236 	/*
237 	 * Try to upgrade the domain we have, it is an iommu driver bug to
238 	 * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail
239 	 * enforce_cache_coherency when there are no devices attached to the
240 	 * domain.
241 	 */
242 	if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) {
243 		if (hwpt->domain->ops->enforce_cache_coherency)
244 			hwpt->enforce_cache_coherency =
245 				hwpt->domain->ops->enforce_cache_coherency(
246 					hwpt->domain);
247 		if (!hwpt->enforce_cache_coherency) {
248 			WARN_ON(list_empty(&hwpt->devices));
249 			return -EINVAL;
250 		}
251 	}
252 
253 	rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
254 						   idev->group, &sw_msi_start);
255 	if (rc)
256 		return rc;
257 
258 	rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start);
259 	if (rc)
260 		goto err_unresv;
261 
262 	/*
263 	 * FIXME: Hack around missing a device-centric iommu api, only attach to
264 	 * the group once for the first device that is in the group.
265 	 */
266 	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
267 		rc = iommu_attach_group(hwpt->domain, idev->group);
268 		if (rc)
269 			goto err_unresv;
270 	}
271 	return 0;
272 err_unresv:
273 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
274 	return rc;
275 }
276 
277 void iommufd_hw_pagetable_detach(struct iommufd_hw_pagetable *hwpt,
278 				 struct iommufd_device *idev)
279 {
280 	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group))
281 		iommu_detach_group(hwpt->domain, idev->group);
282 	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
283 }
284 
285 static int iommufd_device_do_attach(struct iommufd_device *idev,
286 				    struct iommufd_hw_pagetable *hwpt)
287 {
288 	int rc;
289 
290 	mutex_lock(&hwpt->devices_lock);
291 	rc = iommufd_hw_pagetable_attach(hwpt, idev);
292 	if (rc)
293 		goto out_unlock;
294 
295 	idev->hwpt = hwpt;
296 	refcount_inc(&hwpt->obj.users);
297 	list_add(&idev->devices_item, &hwpt->devices);
298 out_unlock:
299 	mutex_unlock(&hwpt->devices_lock);
300 	return rc;
301 }
302 
303 /*
304  * When automatically managing the domains we search for a compatible domain in
305  * the iopt and if one is found use it, otherwise create a new domain.
306  * Automatic domain selection will never pick a manually created domain.
307  */
308 static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
309 					  struct iommufd_ioas *ioas)
310 {
311 	struct iommufd_hw_pagetable *hwpt;
312 	int rc;
313 
314 	/*
315 	 * There is no differentiation when domains are allocated, so any domain
316 	 * that is willing to attach to the device is interchangeable with any
317 	 * other.
318 	 */
319 	mutex_lock(&ioas->mutex);
320 	list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
321 		if (!hwpt->auto_domain)
322 			continue;
323 
324 		if (!iommufd_lock_obj(&hwpt->obj))
325 			continue;
326 		rc = iommufd_device_do_attach(idev, hwpt);
327 		iommufd_put_object(&hwpt->obj);
328 
329 		/*
330 		 * -EINVAL means the domain is incompatible with the device.
331 		 * Other error codes should propagate to userspace as failure.
332 		 * Success means the domain is attached.
333 		 */
334 		if (rc == -EINVAL)
335 			continue;
336 		goto out_unlock;
337 	}
338 
339 	hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, true);
340 	if (IS_ERR(hwpt)) {
341 		rc = PTR_ERR(hwpt);
342 		goto out_unlock;
343 	}
344 	hwpt->auto_domain = true;
345 
346 	mutex_unlock(&ioas->mutex);
347 	iommufd_object_finalize(idev->ictx, &hwpt->obj);
348 	return 0;
349 out_unlock:
350 	mutex_unlock(&ioas->mutex);
351 	return rc;
352 }
353 
354 /**
355  * iommufd_device_attach - Connect a device from an iommu_domain
356  * @idev: device to attach
357  * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
358  *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
359  *
360  * This connects the device to an iommu_domain, either automatically or manually
361  * selected. Once this completes the device could do DMA.
362  *
363  * The caller should return the resulting pt_id back to userspace.
364  * This function is undone by calling iommufd_device_detach().
365  */
366 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
367 {
368 	struct iommufd_object *pt_obj;
369 	int rc;
370 
371 	pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
372 	if (IS_ERR(pt_obj))
373 		return PTR_ERR(pt_obj);
374 
375 	switch (pt_obj->type) {
376 	case IOMMUFD_OBJ_HW_PAGETABLE: {
377 		struct iommufd_hw_pagetable *hwpt =
378 			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
379 
380 		rc = iommufd_device_do_attach(idev, hwpt);
381 		if (rc)
382 			goto out_put_pt_obj;
383 		break;
384 	}
385 	case IOMMUFD_OBJ_IOAS: {
386 		struct iommufd_ioas *ioas =
387 			container_of(pt_obj, struct iommufd_ioas, obj);
388 
389 		rc = iommufd_device_auto_get_domain(idev, ioas);
390 		if (rc)
391 			goto out_put_pt_obj;
392 		break;
393 	}
394 	default:
395 		rc = -EINVAL;
396 		goto out_put_pt_obj;
397 	}
398 
399 	refcount_inc(&idev->obj.users);
400 	*pt_id = idev->hwpt->obj.id;
401 	rc = 0;
402 
403 out_put_pt_obj:
404 	iommufd_put_object(pt_obj);
405 	return rc;
406 }
407 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
408 
409 /**
410  * iommufd_device_detach - Disconnect a device to an iommu_domain
411  * @idev: device to detach
412  *
413  * Undo iommufd_device_attach(). This disconnects the idev from the previously
414  * attached pt_id. The device returns back to a blocked DMA translation.
415  */
416 void iommufd_device_detach(struct iommufd_device *idev)
417 {
418 	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
419 
420 	mutex_lock(&hwpt->devices_lock);
421 	list_del(&idev->devices_item);
422 	idev->hwpt = NULL;
423 	iommufd_hw_pagetable_detach(hwpt, idev);
424 	mutex_unlock(&hwpt->devices_lock);
425 
426 	if (hwpt->auto_domain)
427 		iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
428 	else
429 		refcount_dec(&hwpt->obj.users);
430 
431 	refcount_dec(&idev->obj.users);
432 }
433 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
434 
435 void iommufd_access_destroy_object(struct iommufd_object *obj)
436 {
437 	struct iommufd_access *access =
438 		container_of(obj, struct iommufd_access, obj);
439 
440 	if (access->ioas) {
441 		iopt_remove_access(&access->ioas->iopt, access);
442 		refcount_dec(&access->ioas->obj.users);
443 		access->ioas = NULL;
444 	}
445 	iommufd_ctx_put(access->ictx);
446 }
447 
448 /**
449  * iommufd_access_create - Create an iommufd_access
450  * @ictx: iommufd file descriptor
451  * @ops: Driver's ops to associate with the access
452  * @data: Opaque data to pass into ops functions
453  * @id: Output ID number to return to userspace for this access
454  *
455  * An iommufd_access allows a driver to read/write to the IOAS without using
456  * DMA. The underlying CPU memory can be accessed using the
457  * iommufd_access_pin_pages() or iommufd_access_rw() functions.
458  *
459  * The provided ops are required to use iommufd_access_pin_pages().
460  */
461 struct iommufd_access *
462 iommufd_access_create(struct iommufd_ctx *ictx,
463 		      const struct iommufd_access_ops *ops, void *data, u32 *id)
464 {
465 	struct iommufd_access *access;
466 
467 	/*
468 	 * There is no uAPI for the access object, but to keep things symmetric
469 	 * use the object infrastructure anyhow.
470 	 */
471 	access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
472 	if (IS_ERR(access))
473 		return access;
474 
475 	access->data = data;
476 	access->ops = ops;
477 
478 	if (ops->needs_pin_pages)
479 		access->iova_alignment = PAGE_SIZE;
480 	else
481 		access->iova_alignment = 1;
482 
483 	/* The calling driver is a user until iommufd_access_destroy() */
484 	refcount_inc(&access->obj.users);
485 	access->ictx = ictx;
486 	iommufd_ctx_get(ictx);
487 	iommufd_object_finalize(ictx, &access->obj);
488 	*id = access->obj.id;
489 	mutex_init(&access->ioas_lock);
490 	return access;
491 }
492 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
493 
494 /**
495  * iommufd_access_destroy - Destroy an iommufd_access
496  * @access: The access to destroy
497  *
498  * The caller must stop using the access before destroying it.
499  */
500 void iommufd_access_destroy(struct iommufd_access *access)
501 {
502 	bool was_destroyed;
503 
504 	was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj);
505 	WARN_ON(!was_destroyed);
506 }
507 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
508 
509 void iommufd_access_detach(struct iommufd_access *access)
510 {
511 	struct iommufd_ioas *cur_ioas = access->ioas;
512 
513 	mutex_lock(&access->ioas_lock);
514 	if (WARN_ON(!access->ioas))
515 		goto out;
516 	/*
517 	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
518 	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
519 	 */
520 	access->ioas = NULL;
521 
522 	if (access->ops->unmap) {
523 		mutex_unlock(&access->ioas_lock);
524 		access->ops->unmap(access->data, 0, ULONG_MAX);
525 		mutex_lock(&access->ioas_lock);
526 	}
527 	iopt_remove_access(&cur_ioas->iopt, access);
528 	refcount_dec(&cur_ioas->obj.users);
529 out:
530 	access->ioas_unpin = NULL;
531 	mutex_unlock(&access->ioas_lock);
532 }
533 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
534 
535 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
536 {
537 	struct iommufd_ioas *new_ioas;
538 	int rc = 0;
539 
540 	mutex_lock(&access->ioas_lock);
541 	if (WARN_ON(access->ioas || access->ioas_unpin)) {
542 		mutex_unlock(&access->ioas_lock);
543 		return -EINVAL;
544 	}
545 
546 	new_ioas = iommufd_get_ioas(access->ictx, ioas_id);
547 	if (IS_ERR(new_ioas)) {
548 		mutex_unlock(&access->ioas_lock);
549 		return PTR_ERR(new_ioas);
550 	}
551 
552 	rc = iopt_add_access(&new_ioas->iopt, access);
553 	if (rc) {
554 		mutex_unlock(&access->ioas_lock);
555 		iommufd_put_object(&new_ioas->obj);
556 		return rc;
557 	}
558 	iommufd_ref_to_users(&new_ioas->obj);
559 
560 	access->ioas = new_ioas;
561 	access->ioas_unpin = new_ioas;
562 	mutex_unlock(&access->ioas_lock);
563 	return 0;
564 }
565 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
566 
567 /**
568  * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
569  * @iopt: iopt to work on
570  * @iova: Starting iova in the iopt
571  * @length: Number of bytes
572  *
573  * After this function returns there should be no users attached to the pages
574  * linked to this iopt that intersect with iova,length. Anyone that has attached
575  * a user through iopt_access_pages() needs to detach it through
576  * iommufd_access_unpin_pages() before this function returns.
577  *
578  * iommufd_access_destroy() will wait for any outstanding unmap callback to
579  * complete. Once iommufd_access_destroy() no unmap ops are running or will
580  * run in the future. Due to this a driver must not create locking that prevents
581  * unmap to complete while iommufd_access_destroy() is running.
582  */
583 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
584 				 unsigned long length)
585 {
586 	struct iommufd_ioas *ioas =
587 		container_of(iopt, struct iommufd_ioas, iopt);
588 	struct iommufd_access *access;
589 	unsigned long index;
590 
591 	xa_lock(&ioas->iopt.access_list);
592 	xa_for_each(&ioas->iopt.access_list, index, access) {
593 		if (!iommufd_lock_obj(&access->obj))
594 			continue;
595 		xa_unlock(&ioas->iopt.access_list);
596 
597 		access->ops->unmap(access->data, iova, length);
598 
599 		iommufd_put_object(&access->obj);
600 		xa_lock(&ioas->iopt.access_list);
601 	}
602 	xa_unlock(&ioas->iopt.access_list);
603 }
604 
605 /**
606  * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
607  * @access: IOAS access to act on
608  * @iova: Starting IOVA
609  * @length: Number of bytes to access
610  *
611  * Return the struct page's. The caller must stop accessing them before calling
612  * this. The iova/length must exactly match the one provided to access_pages.
613  */
614 void iommufd_access_unpin_pages(struct iommufd_access *access,
615 				unsigned long iova, unsigned long length)
616 {
617 	struct iopt_area_contig_iter iter;
618 	struct io_pagetable *iopt;
619 	unsigned long last_iova;
620 	struct iopt_area *area;
621 
622 	if (WARN_ON(!length) ||
623 	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
624 		return;
625 
626 	mutex_lock(&access->ioas_lock);
627 	/*
628 	 * The driver must be doing something wrong if it calls this before an
629 	 * iommufd_access_attach() or after an iommufd_access_detach().
630 	 */
631 	if (WARN_ON(!access->ioas_unpin)) {
632 		mutex_unlock(&access->ioas_lock);
633 		return;
634 	}
635 	iopt = &access->ioas_unpin->iopt;
636 
637 	down_read(&iopt->iova_rwsem);
638 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
639 		iopt_area_remove_access(
640 			area, iopt_area_iova_to_index(area, iter.cur_iova),
641 			iopt_area_iova_to_index(
642 				area,
643 				min(last_iova, iopt_area_last_iova(area))));
644 	WARN_ON(!iopt_area_contig_done(&iter));
645 	up_read(&iopt->iova_rwsem);
646 	mutex_unlock(&access->ioas_lock);
647 }
648 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
649 
650 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
651 {
652 	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
653 		return false;
654 
655 	if (!iopt_area_contig_done(iter) &&
656 	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
657 	     PAGE_SIZE) != (PAGE_SIZE - 1))
658 		return false;
659 	return true;
660 }
661 
662 static bool check_area_prot(struct iopt_area *area, unsigned int flags)
663 {
664 	if (flags & IOMMUFD_ACCESS_RW_WRITE)
665 		return area->iommu_prot & IOMMU_WRITE;
666 	return area->iommu_prot & IOMMU_READ;
667 }
668 
669 /**
670  * iommufd_access_pin_pages() - Return a list of pages under the iova
671  * @access: IOAS access to act on
672  * @iova: Starting IOVA
673  * @length: Number of bytes to access
674  * @out_pages: Output page list
675  * @flags: IOPMMUFD_ACCESS_RW_* flags
676  *
677  * Reads @length bytes starting at iova and returns the struct page * pointers.
678  * These can be kmap'd by the caller for CPU access.
679  *
680  * The caller must perform iommufd_access_unpin_pages() when done to balance
681  * this.
682  *
683  * This API always requires a page aligned iova. This happens naturally if the
684  * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
685  * smaller alignments have corner cases where this API can fail on otherwise
686  * aligned iova.
687  */
688 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
689 			     unsigned long length, struct page **out_pages,
690 			     unsigned int flags)
691 {
692 	struct iopt_area_contig_iter iter;
693 	struct io_pagetable *iopt;
694 	unsigned long last_iova;
695 	struct iopt_area *area;
696 	int rc;
697 
698 	/* Driver's ops don't support pin_pages */
699 	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
700 	    WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
701 		return -EINVAL;
702 
703 	if (!length)
704 		return -EINVAL;
705 	if (check_add_overflow(iova, length - 1, &last_iova))
706 		return -EOVERFLOW;
707 
708 	mutex_lock(&access->ioas_lock);
709 	if (!access->ioas) {
710 		mutex_unlock(&access->ioas_lock);
711 		return -ENOENT;
712 	}
713 	iopt = &access->ioas->iopt;
714 
715 	down_read(&iopt->iova_rwsem);
716 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
717 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
718 		unsigned long last_index = iopt_area_iova_to_index(area, last);
719 		unsigned long index =
720 			iopt_area_iova_to_index(area, iter.cur_iova);
721 
722 		if (area->prevent_access ||
723 		    !iopt_area_contig_is_aligned(&iter)) {
724 			rc = -EINVAL;
725 			goto err_remove;
726 		}
727 
728 		if (!check_area_prot(area, flags)) {
729 			rc = -EPERM;
730 			goto err_remove;
731 		}
732 
733 		rc = iopt_area_add_access(area, index, last_index, out_pages,
734 					  flags);
735 		if (rc)
736 			goto err_remove;
737 		out_pages += last_index - index + 1;
738 	}
739 	if (!iopt_area_contig_done(&iter)) {
740 		rc = -ENOENT;
741 		goto err_remove;
742 	}
743 
744 	up_read(&iopt->iova_rwsem);
745 	mutex_unlock(&access->ioas_lock);
746 	return 0;
747 
748 err_remove:
749 	if (iova < iter.cur_iova) {
750 		last_iova = iter.cur_iova - 1;
751 		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
752 			iopt_area_remove_access(
753 				area,
754 				iopt_area_iova_to_index(area, iter.cur_iova),
755 				iopt_area_iova_to_index(
756 					area, min(last_iova,
757 						  iopt_area_last_iova(area))));
758 	}
759 	up_read(&iopt->iova_rwsem);
760 	mutex_unlock(&access->ioas_lock);
761 	return rc;
762 }
763 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
764 
765 /**
766  * iommufd_access_rw - Read or write data under the iova
767  * @access: IOAS access to act on
768  * @iova: Starting IOVA
769  * @data: Kernel buffer to copy to/from
770  * @length: Number of bytes to access
771  * @flags: IOMMUFD_ACCESS_RW_* flags
772  *
773  * Copy kernel to/from data into the range given by IOVA/length. If flags
774  * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
775  * by changing it into copy_to/from_user().
776  */
777 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
778 		      void *data, size_t length, unsigned int flags)
779 {
780 	struct iopt_area_contig_iter iter;
781 	struct io_pagetable *iopt;
782 	struct iopt_area *area;
783 	unsigned long last_iova;
784 	int rc;
785 
786 	if (!length)
787 		return -EINVAL;
788 	if (check_add_overflow(iova, length - 1, &last_iova))
789 		return -EOVERFLOW;
790 
791 	mutex_lock(&access->ioas_lock);
792 	if (!access->ioas) {
793 		mutex_unlock(&access->ioas_lock);
794 		return -ENOENT;
795 	}
796 	iopt = &access->ioas->iopt;
797 
798 	down_read(&iopt->iova_rwsem);
799 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
800 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
801 		unsigned long bytes = (last - iter.cur_iova) + 1;
802 
803 		if (area->prevent_access) {
804 			rc = -EINVAL;
805 			goto err_out;
806 		}
807 
808 		if (!check_area_prot(area, flags)) {
809 			rc = -EPERM;
810 			goto err_out;
811 		}
812 
813 		rc = iopt_pages_rw_access(
814 			area->pages, iopt_area_start_byte(area, iter.cur_iova),
815 			data, bytes, flags);
816 		if (rc)
817 			goto err_out;
818 		data += bytes;
819 	}
820 	if (!iopt_area_contig_done(&iter))
821 		rc = -ENOENT;
822 err_out:
823 	up_read(&iopt->iova_rwsem);
824 	mutex_unlock(&access->ioas_lock);
825 	return rc;
826 }
827 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
828