1f394576eSJason Gunthorpe /* SPDX-License-Identifier: GPL-2.0 */
2f394576eSJason Gunthorpe /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3f394576eSJason Gunthorpe  *
4f394576eSJason Gunthorpe  */
5f394576eSJason Gunthorpe #ifndef __IO_PAGETABLE_H
6f394576eSJason Gunthorpe #define __IO_PAGETABLE_H
7f394576eSJason Gunthorpe 
8f394576eSJason Gunthorpe #include <linux/interval_tree.h>
9f394576eSJason Gunthorpe #include <linux/mutex.h>
10f394576eSJason Gunthorpe #include <linux/kref.h>
11f394576eSJason Gunthorpe #include <linux/xarray.h>
12f394576eSJason Gunthorpe 
13f394576eSJason Gunthorpe #include "iommufd_private.h"
14f394576eSJason Gunthorpe 
15f394576eSJason Gunthorpe struct iommu_domain;
16f394576eSJason Gunthorpe 
17f394576eSJason Gunthorpe /*
18f394576eSJason Gunthorpe  * Each io_pagetable is composed of intervals of areas which cover regions of
19f394576eSJason Gunthorpe  * the iova that are backed by something. iova not covered by areas is not
20f394576eSJason Gunthorpe  * populated in the page table. Each area is fully populated with pages.
21f394576eSJason Gunthorpe  *
22f394576eSJason Gunthorpe  * iovas are in byte units, but must be iopt->iova_alignment aligned.
23f394576eSJason Gunthorpe  *
24f394576eSJason Gunthorpe  * pages can be NULL, this means some other thread is still working on setting
25f394576eSJason Gunthorpe  * up or tearing down the area. When observed under the write side of the
26f394576eSJason Gunthorpe  * domain_rwsem a NULL pages must mean the area is still being setup and no
27f394576eSJason Gunthorpe  * domains are filled.
28f394576eSJason Gunthorpe  *
29f394576eSJason Gunthorpe  * storage_domain points at an arbitrary iommu_domain that is holding the PFNs
30f394576eSJason Gunthorpe  * for this area. It is locked by the pages->mutex. This simplifies the locking
31f394576eSJason Gunthorpe  * as the pages code can rely on the storage_domain without having to get the
32f394576eSJason Gunthorpe  * iopt->domains_rwsem.
33f394576eSJason Gunthorpe  *
34f394576eSJason Gunthorpe  * The io_pagetable::iova_rwsem protects node
35f394576eSJason Gunthorpe  * The iopt_pages::mutex protects pages_node
36*c9b8a83aSJason Gunthorpe  * iopt and iommu_prot are immutable
37f394576eSJason Gunthorpe  * The pages::mutex protects num_accesses
38f394576eSJason Gunthorpe  */
39f394576eSJason Gunthorpe struct iopt_area {
40f394576eSJason Gunthorpe 	struct interval_tree_node node;
41f394576eSJason Gunthorpe 	struct interval_tree_node pages_node;
42f394576eSJason Gunthorpe 	struct io_pagetable *iopt;
43f394576eSJason Gunthorpe 	struct iopt_pages *pages;
44f394576eSJason Gunthorpe 	struct iommu_domain *storage_domain;
45f394576eSJason Gunthorpe 	/* How many bytes into the first page the area starts */
46f394576eSJason Gunthorpe 	unsigned int page_offset;
47f394576eSJason Gunthorpe 	/* IOMMU_READ, IOMMU_WRITE, etc */
48f394576eSJason Gunthorpe 	int iommu_prot;
4951fe6141SJason Gunthorpe 	bool prevent_access : 1;
50f394576eSJason Gunthorpe 	unsigned int num_accesses;
51f394576eSJason Gunthorpe };
52f394576eSJason Gunthorpe 
5351fe6141SJason Gunthorpe struct iopt_allowed {
5451fe6141SJason Gunthorpe 	struct interval_tree_node node;
5551fe6141SJason Gunthorpe };
5651fe6141SJason Gunthorpe 
5751fe6141SJason Gunthorpe struct iopt_reserved {
5851fe6141SJason Gunthorpe 	struct interval_tree_node node;
5951fe6141SJason Gunthorpe 	void *owner;
6051fe6141SJason Gunthorpe };
6151fe6141SJason Gunthorpe 
628d160cd4SJason Gunthorpe int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages);
638d160cd4SJason Gunthorpe void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages);
648d160cd4SJason Gunthorpe 
658d160cd4SJason Gunthorpe int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain);
668d160cd4SJason Gunthorpe void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages,
678d160cd4SJason Gunthorpe 			     struct iommu_domain *domain);
688d160cd4SJason Gunthorpe void iopt_area_unmap_domain(struct iopt_area *area,
698d160cd4SJason Gunthorpe 			    struct iommu_domain *domain);
708d160cd4SJason Gunthorpe 
iopt_area_index(struct iopt_area * area)71f394576eSJason Gunthorpe static inline unsigned long iopt_area_index(struct iopt_area *area)
72f394576eSJason Gunthorpe {
73f394576eSJason Gunthorpe 	return area->pages_node.start;
74f394576eSJason Gunthorpe }
75f394576eSJason Gunthorpe 
iopt_area_last_index(struct iopt_area * area)76f394576eSJason Gunthorpe static inline unsigned long iopt_area_last_index(struct iopt_area *area)
77f394576eSJason Gunthorpe {
78f394576eSJason Gunthorpe 	return area->pages_node.last;
79f394576eSJason Gunthorpe }
80f394576eSJason Gunthorpe 
iopt_area_iova(struct iopt_area * area)81f394576eSJason Gunthorpe static inline unsigned long iopt_area_iova(struct iopt_area *area)
82f394576eSJason Gunthorpe {
83f394576eSJason Gunthorpe 	return area->node.start;
84f394576eSJason Gunthorpe }
85f394576eSJason Gunthorpe 
iopt_area_last_iova(struct iopt_area * area)86f394576eSJason Gunthorpe static inline unsigned long iopt_area_last_iova(struct iopt_area *area)
87f394576eSJason Gunthorpe {
88f394576eSJason Gunthorpe 	return area->node.last;
89f394576eSJason Gunthorpe }
90f394576eSJason Gunthorpe 
iopt_area_length(struct iopt_area * area)918d160cd4SJason Gunthorpe static inline size_t iopt_area_length(struct iopt_area *area)
928d160cd4SJason Gunthorpe {
938d160cd4SJason Gunthorpe 	return (area->node.last - area->node.start) + 1;
948d160cd4SJason Gunthorpe }
958d160cd4SJason Gunthorpe 
9651fe6141SJason Gunthorpe /*
9751fe6141SJason Gunthorpe  * Number of bytes from the start of the iopt_pages that the iova begins.
9851fe6141SJason Gunthorpe  * iopt_area_start_byte() / PAGE_SIZE encodes the starting page index
9951fe6141SJason Gunthorpe  * iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page
10051fe6141SJason Gunthorpe  */
iopt_area_start_byte(struct iopt_area * area,unsigned long iova)10151fe6141SJason Gunthorpe static inline unsigned long iopt_area_start_byte(struct iopt_area *area,
10251fe6141SJason Gunthorpe 						 unsigned long iova)
10351fe6141SJason Gunthorpe {
10452f52858SJason Gunthorpe 	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
10552f52858SJason Gunthorpe 		WARN_ON(iova < iopt_area_iova(area) ||
10652f52858SJason Gunthorpe 			iova > iopt_area_last_iova(area));
10751fe6141SJason Gunthorpe 	return (iova - iopt_area_iova(area)) + area->page_offset +
10851fe6141SJason Gunthorpe 	       iopt_area_index(area) * PAGE_SIZE;
10951fe6141SJason Gunthorpe }
11051fe6141SJason Gunthorpe 
iopt_area_iova_to_index(struct iopt_area * area,unsigned long iova)11151fe6141SJason Gunthorpe static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area,
11251fe6141SJason Gunthorpe 						    unsigned long iova)
11351fe6141SJason Gunthorpe {
11451fe6141SJason Gunthorpe 	return iopt_area_start_byte(area, iova) / PAGE_SIZE;
11551fe6141SJason Gunthorpe }
11651fe6141SJason Gunthorpe 
1178d160cd4SJason Gunthorpe #define __make_iopt_iter(name)                                                 \
1188d160cd4SJason Gunthorpe 	static inline struct iopt_##name *iopt_##name##_iter_first(            \
1198d160cd4SJason Gunthorpe 		struct io_pagetable *iopt, unsigned long start,                \
1208d160cd4SJason Gunthorpe 		unsigned long last)                                            \
1218d160cd4SJason Gunthorpe 	{                                                                      \
1228d160cd4SJason Gunthorpe 		struct interval_tree_node *node;                               \
1238d160cd4SJason Gunthorpe 									       \
1248d160cd4SJason Gunthorpe 		lockdep_assert_held(&iopt->iova_rwsem);                        \
1258d160cd4SJason Gunthorpe 		node = interval_tree_iter_first(&iopt->name##_itree, start,    \
1268d160cd4SJason Gunthorpe 						last);                         \
1278d160cd4SJason Gunthorpe 		if (!node)                                                     \
1288d160cd4SJason Gunthorpe 			return NULL;                                           \
1298d160cd4SJason Gunthorpe 		return container_of(node, struct iopt_##name, node);           \
1308d160cd4SJason Gunthorpe 	}                                                                      \
1318d160cd4SJason Gunthorpe 	static inline struct iopt_##name *iopt_##name##_iter_next(             \
1328d160cd4SJason Gunthorpe 		struct iopt_##name *last_node, unsigned long start,            \
1338d160cd4SJason Gunthorpe 		unsigned long last)                                            \
1348d160cd4SJason Gunthorpe 	{                                                                      \
1358d160cd4SJason Gunthorpe 		struct interval_tree_node *node;                               \
1368d160cd4SJason Gunthorpe 									       \
1378d160cd4SJason Gunthorpe 		node = interval_tree_iter_next(&last_node->node, start, last); \
1388d160cd4SJason Gunthorpe 		if (!node)                                                     \
1398d160cd4SJason Gunthorpe 			return NULL;                                           \
1408d160cd4SJason Gunthorpe 		return container_of(node, struct iopt_##name, node);           \
1418d160cd4SJason Gunthorpe 	}
1428d160cd4SJason Gunthorpe 
1438d160cd4SJason Gunthorpe __make_iopt_iter(area)
14451fe6141SJason Gunthorpe __make_iopt_iter(allowed)
14551fe6141SJason Gunthorpe __make_iopt_iter(reserved)
14651fe6141SJason Gunthorpe 
14751fe6141SJason Gunthorpe struct iopt_area_contig_iter {
14851fe6141SJason Gunthorpe 	unsigned long cur_iova;
14951fe6141SJason Gunthorpe 	unsigned long last_iova;
15051fe6141SJason Gunthorpe 	struct iopt_area *area;
15151fe6141SJason Gunthorpe };
15251fe6141SJason Gunthorpe struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
15351fe6141SJason Gunthorpe 					struct io_pagetable *iopt,
15451fe6141SJason Gunthorpe 					unsigned long iova,
15551fe6141SJason Gunthorpe 					unsigned long last_iova);
15651fe6141SJason Gunthorpe struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter);
15751fe6141SJason Gunthorpe 
iopt_area_contig_done(struct iopt_area_contig_iter * iter)15851fe6141SJason Gunthorpe static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter)
15951fe6141SJason Gunthorpe {
16051fe6141SJason Gunthorpe 	return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area);
16151fe6141SJason Gunthorpe }
16251fe6141SJason Gunthorpe 
16351fe6141SJason Gunthorpe /*
16451fe6141SJason Gunthorpe  * Iterate over a contiguous list of areas that span the iova,last_iova range.
16551fe6141SJason Gunthorpe  * The caller must check iopt_area_contig_done() after the loop to see if
16651fe6141SJason Gunthorpe  * contiguous areas existed.
16751fe6141SJason Gunthorpe  */
16851fe6141SJason Gunthorpe #define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova)          \
16951fe6141SJason Gunthorpe 	for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \
17051fe6141SJason Gunthorpe 	     area = iopt_area_contig_next(iter))
1718d160cd4SJason Gunthorpe 
172f394576eSJason Gunthorpe enum {
173f394576eSJason Gunthorpe 	IOPT_PAGES_ACCOUNT_NONE = 0,
174f394576eSJason Gunthorpe 	IOPT_PAGES_ACCOUNT_USER = 1,
175f394576eSJason Gunthorpe 	IOPT_PAGES_ACCOUNT_MM = 2,
176f394576eSJason Gunthorpe };
177f394576eSJason Gunthorpe 
178f394576eSJason Gunthorpe /*
179f394576eSJason Gunthorpe  * This holds a pinned page list for multiple areas of IO address space. The
180f394576eSJason Gunthorpe  * pages always originate from a linear chunk of userspace VA. Multiple
181f394576eSJason Gunthorpe  * io_pagetable's, through their iopt_area's, can share a single iopt_pages
182f394576eSJason Gunthorpe  * which avoids multi-pinning and double accounting of page consumption.
183f394576eSJason Gunthorpe  *
184f394576eSJason Gunthorpe  * indexes in this structure are measured in PAGE_SIZE units, are 0 based from
185f394576eSJason Gunthorpe  * the start of the uptr and extend to npages. pages are pinned dynamically
186f394576eSJason Gunthorpe  * according to the intervals in the access_itree and domains_itree, npinned
187f394576eSJason Gunthorpe  * records the current number of pages pinned.
188f394576eSJason Gunthorpe  */
189f394576eSJason Gunthorpe struct iopt_pages {
190f394576eSJason Gunthorpe 	struct kref kref;
191f394576eSJason Gunthorpe 	struct mutex mutex;
192f394576eSJason Gunthorpe 	size_t npages;
193f394576eSJason Gunthorpe 	size_t npinned;
194f394576eSJason Gunthorpe 	size_t last_npinned;
195f394576eSJason Gunthorpe 	struct task_struct *source_task;
196f394576eSJason Gunthorpe 	struct mm_struct *source_mm;
197f394576eSJason Gunthorpe 	struct user_struct *source_user;
198f394576eSJason Gunthorpe 	void __user *uptr;
199f394576eSJason Gunthorpe 	bool writable:1;
200f394576eSJason Gunthorpe 	u8 account_mode;
201f394576eSJason Gunthorpe 
202f394576eSJason Gunthorpe 	struct xarray pinned_pfns;
203f394576eSJason Gunthorpe 	/* Of iopt_pages_access::node */
204f394576eSJason Gunthorpe 	struct rb_root_cached access_itree;
205f394576eSJason Gunthorpe 	/* Of iopt_area::pages_node */
206f394576eSJason Gunthorpe 	struct rb_root_cached domains_itree;
207f394576eSJason Gunthorpe };
208f394576eSJason Gunthorpe 
2098d160cd4SJason Gunthorpe struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
2108d160cd4SJason Gunthorpe 				    bool writable);
2118d160cd4SJason Gunthorpe void iopt_release_pages(struct kref *kref);
iopt_put_pages(struct iopt_pages * pages)2128d160cd4SJason Gunthorpe static inline void iopt_put_pages(struct iopt_pages *pages)
2138d160cd4SJason Gunthorpe {
2148d160cd4SJason Gunthorpe 	kref_put(&pages->kref, iopt_release_pages);
2158d160cd4SJason Gunthorpe }
2168d160cd4SJason Gunthorpe 
2178d160cd4SJason Gunthorpe void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start,
2188d160cd4SJason Gunthorpe 				 unsigned long last, struct page **out_pages);
2198d160cd4SJason Gunthorpe int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start,
2208d160cd4SJason Gunthorpe 			   unsigned long last, struct page **out_pages);
2218d160cd4SJason Gunthorpe void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start,
2228d160cd4SJason Gunthorpe 			      unsigned long last);
2238d160cd4SJason Gunthorpe 
2248d160cd4SJason Gunthorpe int iopt_area_add_access(struct iopt_area *area, unsigned long start,
2258d160cd4SJason Gunthorpe 			 unsigned long last, struct page **out_pages,
2268d160cd4SJason Gunthorpe 			 unsigned int flags);
2278d160cd4SJason Gunthorpe void iopt_area_remove_access(struct iopt_area *area, unsigned long start,
2288d160cd4SJason Gunthorpe 			    unsigned long last);
2298d160cd4SJason Gunthorpe int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
2308d160cd4SJason Gunthorpe 			 void *data, unsigned long length, unsigned int flags);
2318d160cd4SJason Gunthorpe 
2328d160cd4SJason Gunthorpe /*
2338d160cd4SJason Gunthorpe  * Each interval represents an active iopt_access_pages(), it acts as an
2348d160cd4SJason Gunthorpe  * interval lock that keeps the PFNs pinned and stored in the xarray.
2358d160cd4SJason Gunthorpe  */
2368d160cd4SJason Gunthorpe struct iopt_pages_access {
2378d160cd4SJason Gunthorpe 	struct interval_tree_node node;
2388d160cd4SJason Gunthorpe 	unsigned int users;
2398d160cd4SJason Gunthorpe };
2408d160cd4SJason Gunthorpe 
241f394576eSJason Gunthorpe #endif
242