1f394576eSJason Gunthorpe // SPDX-License-Identifier: GPL-2.0 2f394576eSJason Gunthorpe /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3f394576eSJason Gunthorpe * 4f394576eSJason Gunthorpe * The iopt_pages is the center of the storage and motion of PFNs. Each 5f394576eSJason Gunthorpe * iopt_pages represents a logical linear array of full PFNs. The array is 0 6f394576eSJason Gunthorpe * based and has npages in it. Accessors use 'index' to refer to the entry in 7f394576eSJason Gunthorpe * this logical array, regardless of its storage location. 8f394576eSJason Gunthorpe * 9f394576eSJason Gunthorpe * PFNs are stored in a tiered scheme: 10f394576eSJason Gunthorpe * 1) iopt_pages::pinned_pfns xarray 11f394576eSJason Gunthorpe * 2) An iommu_domain 12f394576eSJason Gunthorpe * 3) The origin of the PFNs, i.e. the userspace pointer 13f394576eSJason Gunthorpe * 14f394576eSJason Gunthorpe * PFN have to be copied between all combinations of tiers, depending on the 15f394576eSJason Gunthorpe * configuration. 16f394576eSJason Gunthorpe * 17f394576eSJason Gunthorpe * When a PFN is taken out of the userspace pointer it is pinned exactly once. 18f394576eSJason Gunthorpe * The storage locations of the PFN's index are tracked in the two interval 19f394576eSJason Gunthorpe * trees. If no interval includes the index then it is not pinned. 20f394576eSJason Gunthorpe * 21f394576eSJason Gunthorpe * If access_itree includes the PFN's index then an in-kernel access has 22f394576eSJason Gunthorpe * requested the page. The PFN is stored in the xarray so other requestors can 23f394576eSJason Gunthorpe * continue to find it. 24f394576eSJason Gunthorpe * 25f394576eSJason Gunthorpe * If the domains_itree includes the PFN's index then an iommu_domain is storing 26f394576eSJason Gunthorpe * the PFN and it can be read back using iommu_iova_to_phys(). To avoid 27f394576eSJason Gunthorpe * duplicating storage the xarray is not used if only iommu_domains are using 28f394576eSJason Gunthorpe * the PFN's index. 29f394576eSJason Gunthorpe * 30f394576eSJason Gunthorpe * As a general principle this is designed so that destroy never fails. This 31f394576eSJason Gunthorpe * means removing an iommu_domain or releasing a in-kernel access will not fail 32f394576eSJason Gunthorpe * due to insufficient memory. In practice this means some cases have to hold 33f394576eSJason Gunthorpe * PFNs in the xarray even though they are also being stored in an iommu_domain. 34f394576eSJason Gunthorpe * 35f394576eSJason Gunthorpe * While the iopt_pages can use an iommu_domain as storage, it does not have an 36f394576eSJason Gunthorpe * IOVA itself. Instead the iopt_area represents a range of IOVA and uses the 37f394576eSJason Gunthorpe * iopt_pages as the PFN provider. Multiple iopt_areas can share the iopt_pages 38f394576eSJason Gunthorpe * and reference their own slice of the PFN array, with sub page granularity. 39f394576eSJason Gunthorpe * 40f394576eSJason Gunthorpe * In this file the term 'last' indicates an inclusive and closed interval, eg 41f394576eSJason Gunthorpe * [0,0] refers to a single PFN. 'end' means an open range, eg [0,0) refers to 42f394576eSJason Gunthorpe * no PFNs. 43f394576eSJason Gunthorpe * 44f394576eSJason Gunthorpe * Be cautious of overflow. An IOVA can go all the way up to U64_MAX, so 45f394576eSJason Gunthorpe * last_iova + 1 can overflow. An iopt_pages index will always be much less than 46f394576eSJason Gunthorpe * ULONG_MAX so last_index + 1 cannot overflow. 47f394576eSJason Gunthorpe */ 48f394576eSJason Gunthorpe #include <linux/overflow.h> 49f394576eSJason Gunthorpe #include <linux/slab.h> 50f394576eSJason Gunthorpe #include <linux/iommu.h> 51f394576eSJason Gunthorpe #include <linux/sched/mm.h> 52f394576eSJason Gunthorpe #include <linux/highmem.h> 53f394576eSJason Gunthorpe #include <linux/kthread.h> 54f394576eSJason Gunthorpe #include <linux/iommufd.h> 55f394576eSJason Gunthorpe 56f394576eSJason Gunthorpe #include "io_pagetable.h" 57f394576eSJason Gunthorpe #include "double_span.h" 58f394576eSJason Gunthorpe 59f4b20bb3SJason Gunthorpe #ifndef CONFIG_IOMMUFD_TEST 60f394576eSJason Gunthorpe #define TEMP_MEMORY_LIMIT 65536 61f4b20bb3SJason Gunthorpe #else 62f4b20bb3SJason Gunthorpe #define TEMP_MEMORY_LIMIT iommufd_test_memory_limit 63f4b20bb3SJason Gunthorpe #endif 64f394576eSJason Gunthorpe #define BATCH_BACKUP_SIZE 32 65f394576eSJason Gunthorpe 66f394576eSJason Gunthorpe /* 67f394576eSJason Gunthorpe * More memory makes pin_user_pages() and the batching more efficient, but as 68f394576eSJason Gunthorpe * this is only a performance optimization don't try too hard to get it. A 64k 69f394576eSJason Gunthorpe * allocation can hold about 26M of 4k pages and 13G of 2M pages in an 70f394576eSJason Gunthorpe * pfn_batch. Various destroy paths cannot fail and provide a small amount of 71f394576eSJason Gunthorpe * stack memory as a backup contingency. If backup_len is given this cannot 72f394576eSJason Gunthorpe * fail. 73f394576eSJason Gunthorpe */ 74f394576eSJason Gunthorpe static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len) 75f394576eSJason Gunthorpe { 76f394576eSJason Gunthorpe void *res; 77f394576eSJason Gunthorpe 78f394576eSJason Gunthorpe if (WARN_ON(*size == 0)) 79f394576eSJason Gunthorpe return NULL; 80f394576eSJason Gunthorpe 81f394576eSJason Gunthorpe if (*size < backup_len) 82f394576eSJason Gunthorpe return backup; 83e26eed4fSJason Gunthorpe 84e26eed4fSJason Gunthorpe if (!backup && iommufd_should_fail()) 85e26eed4fSJason Gunthorpe return NULL; 86e26eed4fSJason Gunthorpe 87f394576eSJason Gunthorpe *size = min_t(size_t, *size, TEMP_MEMORY_LIMIT); 88f394576eSJason Gunthorpe res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 89f394576eSJason Gunthorpe if (res) 90f394576eSJason Gunthorpe return res; 91f394576eSJason Gunthorpe *size = PAGE_SIZE; 92f394576eSJason Gunthorpe if (backup_len) { 93f394576eSJason Gunthorpe res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); 94f394576eSJason Gunthorpe if (res) 95f394576eSJason Gunthorpe return res; 96f394576eSJason Gunthorpe *size = backup_len; 97f394576eSJason Gunthorpe return backup; 98f394576eSJason Gunthorpe } 99f394576eSJason Gunthorpe return kmalloc(*size, GFP_KERNEL); 100f394576eSJason Gunthorpe } 101f394576eSJason Gunthorpe 102f394576eSJason Gunthorpe void interval_tree_double_span_iter_update( 103f394576eSJason Gunthorpe struct interval_tree_double_span_iter *iter) 104f394576eSJason Gunthorpe { 105f394576eSJason Gunthorpe unsigned long last_hole = ULONG_MAX; 106f394576eSJason Gunthorpe unsigned int i; 107f394576eSJason Gunthorpe 108f394576eSJason Gunthorpe for (i = 0; i != ARRAY_SIZE(iter->spans); i++) { 109f394576eSJason Gunthorpe if (interval_tree_span_iter_done(&iter->spans[i])) { 110f394576eSJason Gunthorpe iter->is_used = -1; 111f394576eSJason Gunthorpe return; 112f394576eSJason Gunthorpe } 113f394576eSJason Gunthorpe 114f394576eSJason Gunthorpe if (iter->spans[i].is_hole) { 115f394576eSJason Gunthorpe last_hole = min(last_hole, iter->spans[i].last_hole); 116f394576eSJason Gunthorpe continue; 117f394576eSJason Gunthorpe } 118f394576eSJason Gunthorpe 119f394576eSJason Gunthorpe iter->is_used = i + 1; 120f394576eSJason Gunthorpe iter->start_used = iter->spans[i].start_used; 121f394576eSJason Gunthorpe iter->last_used = min(iter->spans[i].last_used, last_hole); 122f394576eSJason Gunthorpe return; 123f394576eSJason Gunthorpe } 124f394576eSJason Gunthorpe 125f394576eSJason Gunthorpe iter->is_used = 0; 126f394576eSJason Gunthorpe iter->start_hole = iter->spans[0].start_hole; 127f394576eSJason Gunthorpe iter->last_hole = 128f394576eSJason Gunthorpe min(iter->spans[0].last_hole, iter->spans[1].last_hole); 129f394576eSJason Gunthorpe } 130f394576eSJason Gunthorpe 131f394576eSJason Gunthorpe void interval_tree_double_span_iter_first( 132f394576eSJason Gunthorpe struct interval_tree_double_span_iter *iter, 133f394576eSJason Gunthorpe struct rb_root_cached *itree1, struct rb_root_cached *itree2, 134f394576eSJason Gunthorpe unsigned long first_index, unsigned long last_index) 135f394576eSJason Gunthorpe { 136f394576eSJason Gunthorpe unsigned int i; 137f394576eSJason Gunthorpe 138f394576eSJason Gunthorpe iter->itrees[0] = itree1; 139f394576eSJason Gunthorpe iter->itrees[1] = itree2; 140f394576eSJason Gunthorpe for (i = 0; i != ARRAY_SIZE(iter->spans); i++) 141f394576eSJason Gunthorpe interval_tree_span_iter_first(&iter->spans[i], iter->itrees[i], 142f394576eSJason Gunthorpe first_index, last_index); 143f394576eSJason Gunthorpe interval_tree_double_span_iter_update(iter); 144f394576eSJason Gunthorpe } 145f394576eSJason Gunthorpe 146f394576eSJason Gunthorpe void interval_tree_double_span_iter_next( 147f394576eSJason Gunthorpe struct interval_tree_double_span_iter *iter) 148f394576eSJason Gunthorpe { 149f394576eSJason Gunthorpe unsigned int i; 150f394576eSJason Gunthorpe 151f394576eSJason Gunthorpe if (iter->is_used == -1 || 152f394576eSJason Gunthorpe iter->last_hole == iter->spans[0].last_index) { 153f394576eSJason Gunthorpe iter->is_used = -1; 154f394576eSJason Gunthorpe return; 155f394576eSJason Gunthorpe } 156f394576eSJason Gunthorpe 157f394576eSJason Gunthorpe for (i = 0; i != ARRAY_SIZE(iter->spans); i++) 158f394576eSJason Gunthorpe interval_tree_span_iter_advance( 159f394576eSJason Gunthorpe &iter->spans[i], iter->itrees[i], iter->last_hole + 1); 160f394576eSJason Gunthorpe interval_tree_double_span_iter_update(iter); 161f394576eSJason Gunthorpe } 162f394576eSJason Gunthorpe 163f394576eSJason Gunthorpe static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages) 164f394576eSJason Gunthorpe { 16552f52858SJason Gunthorpe int rc; 16652f52858SJason Gunthorpe 16752f52858SJason Gunthorpe rc = check_add_overflow(pages->npinned, npages, &pages->npinned); 16852f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 16952f52858SJason Gunthorpe WARN_ON(rc || pages->npinned > pages->npages); 170f394576eSJason Gunthorpe } 171f394576eSJason Gunthorpe 172f394576eSJason Gunthorpe static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages) 173f394576eSJason Gunthorpe { 17452f52858SJason Gunthorpe int rc; 17552f52858SJason Gunthorpe 17652f52858SJason Gunthorpe rc = check_sub_overflow(pages->npinned, npages, &pages->npinned); 17752f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 17852f52858SJason Gunthorpe WARN_ON(rc || pages->npinned > pages->npages); 179f394576eSJason Gunthorpe } 180f394576eSJason Gunthorpe 181f394576eSJason Gunthorpe static void iopt_pages_err_unpin(struct iopt_pages *pages, 182f394576eSJason Gunthorpe unsigned long start_index, 183f394576eSJason Gunthorpe unsigned long last_index, 184f394576eSJason Gunthorpe struct page **page_list) 185f394576eSJason Gunthorpe { 186f394576eSJason Gunthorpe unsigned long npages = last_index - start_index + 1; 187f394576eSJason Gunthorpe 188f394576eSJason Gunthorpe unpin_user_pages(page_list, npages); 189f394576eSJason Gunthorpe iopt_pages_sub_npinned(pages, npages); 190f394576eSJason Gunthorpe } 191f394576eSJason Gunthorpe 192f394576eSJason Gunthorpe /* 193f394576eSJason Gunthorpe * index is the number of PAGE_SIZE units from the start of the area's 194f394576eSJason Gunthorpe * iopt_pages. If the iova is sub page-size then the area has an iova that 195f394576eSJason Gunthorpe * covers a portion of the first and last pages in the range. 196f394576eSJason Gunthorpe */ 197f394576eSJason Gunthorpe static unsigned long iopt_area_index_to_iova(struct iopt_area *area, 198f394576eSJason Gunthorpe unsigned long index) 199f394576eSJason Gunthorpe { 20052f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 20152f52858SJason Gunthorpe WARN_ON(index < iopt_area_index(area) || 20252f52858SJason Gunthorpe index > iopt_area_last_index(area)); 203f394576eSJason Gunthorpe index -= iopt_area_index(area); 204f394576eSJason Gunthorpe if (index == 0) 205f394576eSJason Gunthorpe return iopt_area_iova(area); 206f394576eSJason Gunthorpe return iopt_area_iova(area) - area->page_offset + index * PAGE_SIZE; 207f394576eSJason Gunthorpe } 208f394576eSJason Gunthorpe 209f394576eSJason Gunthorpe static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area, 210f394576eSJason Gunthorpe unsigned long index) 211f394576eSJason Gunthorpe { 21252f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 21352f52858SJason Gunthorpe WARN_ON(index < iopt_area_index(area) || 21452f52858SJason Gunthorpe index > iopt_area_last_index(area)); 215f394576eSJason Gunthorpe if (index == iopt_area_last_index(area)) 216f394576eSJason Gunthorpe return iopt_area_last_iova(area); 217f394576eSJason Gunthorpe return iopt_area_iova(area) - area->page_offset + 218f394576eSJason Gunthorpe (index - iopt_area_index(area) + 1) * PAGE_SIZE - 1; 219f394576eSJason Gunthorpe } 220f394576eSJason Gunthorpe 221f394576eSJason Gunthorpe static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova, 222f394576eSJason Gunthorpe size_t size) 223f394576eSJason Gunthorpe { 224f394576eSJason Gunthorpe size_t ret; 225f394576eSJason Gunthorpe 226f394576eSJason Gunthorpe ret = iommu_unmap(domain, iova, size); 227f394576eSJason Gunthorpe /* 228f394576eSJason Gunthorpe * It is a logic error in this code or a driver bug if the IOMMU unmaps 229f394576eSJason Gunthorpe * something other than exactly as requested. This implies that the 230f394576eSJason Gunthorpe * iommu driver may not fail unmap for reasons beyond bad agruments. 231f394576eSJason Gunthorpe * Particularly, the iommu driver may not do a memory allocation on the 232f394576eSJason Gunthorpe * unmap path. 233f394576eSJason Gunthorpe */ 234f394576eSJason Gunthorpe WARN_ON(ret != size); 235f394576eSJason Gunthorpe } 236f394576eSJason Gunthorpe 2378d160cd4SJason Gunthorpe static void iopt_area_unmap_domain_range(struct iopt_area *area, 2388d160cd4SJason Gunthorpe struct iommu_domain *domain, 2398d160cd4SJason Gunthorpe unsigned long start_index, 2408d160cd4SJason Gunthorpe unsigned long last_index) 2418d160cd4SJason Gunthorpe { 2428d160cd4SJason Gunthorpe unsigned long start_iova = iopt_area_index_to_iova(area, start_index); 2438d160cd4SJason Gunthorpe 2448d160cd4SJason Gunthorpe iommu_unmap_nofail(domain, start_iova, 2458d160cd4SJason Gunthorpe iopt_area_index_to_iova_last(area, last_index) - 2468d160cd4SJason Gunthorpe start_iova + 1); 2478d160cd4SJason Gunthorpe } 2488d160cd4SJason Gunthorpe 249f394576eSJason Gunthorpe static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, 250f394576eSJason Gunthorpe unsigned long index) 251f394576eSJason Gunthorpe { 252f394576eSJason Gunthorpe struct interval_tree_node *node; 253f394576eSJason Gunthorpe 254f394576eSJason Gunthorpe node = interval_tree_iter_first(&pages->domains_itree, index, index); 255f394576eSJason Gunthorpe if (!node) 256f394576eSJason Gunthorpe return NULL; 257f394576eSJason Gunthorpe return container_of(node, struct iopt_area, pages_node); 258f394576eSJason Gunthorpe } 259f394576eSJason Gunthorpe 260f394576eSJason Gunthorpe /* 261f394576eSJason Gunthorpe * A simple datastructure to hold a vector of PFNs, optimized for contiguous 262f394576eSJason Gunthorpe * PFNs. This is used as a temporary holding memory for shuttling pfns from one 263f394576eSJason Gunthorpe * place to another. Generally everything is made more efficient if operations 264f394576eSJason Gunthorpe * work on the largest possible grouping of pfns. eg fewer lock/unlock cycles, 265f394576eSJason Gunthorpe * better cache locality, etc 266f394576eSJason Gunthorpe */ 267f394576eSJason Gunthorpe struct pfn_batch { 268f394576eSJason Gunthorpe unsigned long *pfns; 269f394576eSJason Gunthorpe u32 *npfns; 270f394576eSJason Gunthorpe unsigned int array_size; 271f394576eSJason Gunthorpe unsigned int end; 272f394576eSJason Gunthorpe unsigned int total_pfns; 273f394576eSJason Gunthorpe }; 274f394576eSJason Gunthorpe 275f394576eSJason Gunthorpe static void batch_clear(struct pfn_batch *batch) 276f394576eSJason Gunthorpe { 277f394576eSJason Gunthorpe batch->total_pfns = 0; 278f394576eSJason Gunthorpe batch->end = 0; 279f394576eSJason Gunthorpe batch->pfns[0] = 0; 280f394576eSJason Gunthorpe batch->npfns[0] = 0; 281f394576eSJason Gunthorpe } 282f394576eSJason Gunthorpe 283f394576eSJason Gunthorpe /* 284f394576eSJason Gunthorpe * Carry means we carry a portion of the final hugepage over to the front of the 285f394576eSJason Gunthorpe * batch 286f394576eSJason Gunthorpe */ 287f394576eSJason Gunthorpe static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) 288f394576eSJason Gunthorpe { 289f394576eSJason Gunthorpe if (!keep_pfns) 290f394576eSJason Gunthorpe return batch_clear(batch); 291f394576eSJason Gunthorpe 292a26fa392SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 293a26fa392SJason Gunthorpe WARN_ON(!batch->end || 294a26fa392SJason Gunthorpe batch->npfns[batch->end - 1] < keep_pfns); 295a26fa392SJason Gunthorpe 296f394576eSJason Gunthorpe batch->total_pfns = keep_pfns; 297f394576eSJason Gunthorpe batch->pfns[0] = batch->pfns[batch->end - 1] + 298f394576eSJason Gunthorpe (batch->npfns[batch->end - 1] - keep_pfns); 29913a0d1aeSJason Gunthorpe batch->npfns[0] = keep_pfns; 300*b7c822faSJason Gunthorpe batch->end = 1; 301f394576eSJason Gunthorpe } 302f394576eSJason Gunthorpe 303f394576eSJason Gunthorpe static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns) 304f394576eSJason Gunthorpe { 305f394576eSJason Gunthorpe if (!batch->total_pfns) 306f394576eSJason Gunthorpe return; 30752f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 30852f52858SJason Gunthorpe WARN_ON(batch->total_pfns != batch->npfns[0]); 309f394576eSJason Gunthorpe skip_pfns = min(batch->total_pfns, skip_pfns); 310f394576eSJason Gunthorpe batch->pfns[0] += skip_pfns; 311f394576eSJason Gunthorpe batch->npfns[0] -= skip_pfns; 312f394576eSJason Gunthorpe batch->total_pfns -= skip_pfns; 313f394576eSJason Gunthorpe } 314f394576eSJason Gunthorpe 315f394576eSJason Gunthorpe static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup, 316f394576eSJason Gunthorpe size_t backup_len) 317f394576eSJason Gunthorpe { 318f394576eSJason Gunthorpe const size_t elmsz = sizeof(*batch->pfns) + sizeof(*batch->npfns); 319f394576eSJason Gunthorpe size_t size = max_pages * elmsz; 320f394576eSJason Gunthorpe 321f394576eSJason Gunthorpe batch->pfns = temp_kmalloc(&size, backup, backup_len); 322f394576eSJason Gunthorpe if (!batch->pfns) 323f394576eSJason Gunthorpe return -ENOMEM; 32452f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(size < elmsz)) 32552f52858SJason Gunthorpe return -EINVAL; 326f394576eSJason Gunthorpe batch->array_size = size / elmsz; 327f394576eSJason Gunthorpe batch->npfns = (u32 *)(batch->pfns + batch->array_size); 328f394576eSJason Gunthorpe batch_clear(batch); 329f394576eSJason Gunthorpe return 0; 330f394576eSJason Gunthorpe } 331f394576eSJason Gunthorpe 332f394576eSJason Gunthorpe static int batch_init(struct pfn_batch *batch, size_t max_pages) 333f394576eSJason Gunthorpe { 334f394576eSJason Gunthorpe return __batch_init(batch, max_pages, NULL, 0); 335f394576eSJason Gunthorpe } 336f394576eSJason Gunthorpe 337f394576eSJason Gunthorpe static void batch_init_backup(struct pfn_batch *batch, size_t max_pages, 338f394576eSJason Gunthorpe void *backup, size_t backup_len) 339f394576eSJason Gunthorpe { 340f394576eSJason Gunthorpe __batch_init(batch, max_pages, backup, backup_len); 341f394576eSJason Gunthorpe } 342f394576eSJason Gunthorpe 343f394576eSJason Gunthorpe static void batch_destroy(struct pfn_batch *batch, void *backup) 344f394576eSJason Gunthorpe { 345f394576eSJason Gunthorpe if (batch->pfns != backup) 346f394576eSJason Gunthorpe kfree(batch->pfns); 347f394576eSJason Gunthorpe } 348f394576eSJason Gunthorpe 349c9b8a83aSJason Gunthorpe /* true if the pfn was added, false otherwise */ 350f394576eSJason Gunthorpe static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) 351f394576eSJason Gunthorpe { 352f394576eSJason Gunthorpe const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); 353f394576eSJason Gunthorpe 354f394576eSJason Gunthorpe if (batch->end && 355f394576eSJason Gunthorpe pfn == batch->pfns[batch->end - 1] + batch->npfns[batch->end - 1] && 356f394576eSJason Gunthorpe batch->npfns[batch->end - 1] != MAX_NPFNS) { 357f394576eSJason Gunthorpe batch->npfns[batch->end - 1]++; 358f394576eSJason Gunthorpe batch->total_pfns++; 359f394576eSJason Gunthorpe return true; 360f394576eSJason Gunthorpe } 361f394576eSJason Gunthorpe if (batch->end == batch->array_size) 362f394576eSJason Gunthorpe return false; 363f394576eSJason Gunthorpe batch->total_pfns++; 364f394576eSJason Gunthorpe batch->pfns[batch->end] = pfn; 365f394576eSJason Gunthorpe batch->npfns[batch->end] = 1; 366f394576eSJason Gunthorpe batch->end++; 367f394576eSJason Gunthorpe return true; 368f394576eSJason Gunthorpe } 369f394576eSJason Gunthorpe 370f394576eSJason Gunthorpe /* 371f394576eSJason Gunthorpe * Fill the batch with pfns from the domain. When the batch is full, or it 372f394576eSJason Gunthorpe * reaches last_index, the function will return. The caller should use 373f394576eSJason Gunthorpe * batch->total_pfns to determine the starting point for the next iteration. 374f394576eSJason Gunthorpe */ 375f394576eSJason Gunthorpe static void batch_from_domain(struct pfn_batch *batch, 376f394576eSJason Gunthorpe struct iommu_domain *domain, 377f394576eSJason Gunthorpe struct iopt_area *area, unsigned long start_index, 378f394576eSJason Gunthorpe unsigned long last_index) 379f394576eSJason Gunthorpe { 380f394576eSJason Gunthorpe unsigned int page_offset = 0; 381f394576eSJason Gunthorpe unsigned long iova; 382f394576eSJason Gunthorpe phys_addr_t phys; 383f394576eSJason Gunthorpe 384f394576eSJason Gunthorpe iova = iopt_area_index_to_iova(area, start_index); 385f394576eSJason Gunthorpe if (start_index == iopt_area_index(area)) 386f394576eSJason Gunthorpe page_offset = area->page_offset; 387f394576eSJason Gunthorpe while (start_index <= last_index) { 388f394576eSJason Gunthorpe /* 389f394576eSJason Gunthorpe * This is pretty slow, it would be nice to get the page size 390f394576eSJason Gunthorpe * back from the driver, or have the driver directly fill the 391f394576eSJason Gunthorpe * batch. 392f394576eSJason Gunthorpe */ 393f394576eSJason Gunthorpe phys = iommu_iova_to_phys(domain, iova) - page_offset; 394f394576eSJason Gunthorpe if (!batch_add_pfn(batch, PHYS_PFN(phys))) 395f394576eSJason Gunthorpe return; 396f394576eSJason Gunthorpe iova += PAGE_SIZE - page_offset; 397f394576eSJason Gunthorpe page_offset = 0; 398f394576eSJason Gunthorpe start_index++; 399f394576eSJason Gunthorpe } 400f394576eSJason Gunthorpe } 401f394576eSJason Gunthorpe 402f394576eSJason Gunthorpe static struct page **raw_pages_from_domain(struct iommu_domain *domain, 403f394576eSJason Gunthorpe struct iopt_area *area, 404f394576eSJason Gunthorpe unsigned long start_index, 405f394576eSJason Gunthorpe unsigned long last_index, 406f394576eSJason Gunthorpe struct page **out_pages) 407f394576eSJason Gunthorpe { 408f394576eSJason Gunthorpe unsigned int page_offset = 0; 409f394576eSJason Gunthorpe unsigned long iova; 410f394576eSJason Gunthorpe phys_addr_t phys; 411f394576eSJason Gunthorpe 412f394576eSJason Gunthorpe iova = iopt_area_index_to_iova(area, start_index); 413f394576eSJason Gunthorpe if (start_index == iopt_area_index(area)) 414f394576eSJason Gunthorpe page_offset = area->page_offset; 415f394576eSJason Gunthorpe while (start_index <= last_index) { 416f394576eSJason Gunthorpe phys = iommu_iova_to_phys(domain, iova) - page_offset; 417f394576eSJason Gunthorpe *(out_pages++) = pfn_to_page(PHYS_PFN(phys)); 418f394576eSJason Gunthorpe iova += PAGE_SIZE - page_offset; 419f394576eSJason Gunthorpe page_offset = 0; 420f394576eSJason Gunthorpe start_index++; 421f394576eSJason Gunthorpe } 422f394576eSJason Gunthorpe return out_pages; 423f394576eSJason Gunthorpe } 424f394576eSJason Gunthorpe 425c9b8a83aSJason Gunthorpe /* Continues reading a domain until we reach a discontinuity in the pfns. */ 426f394576eSJason Gunthorpe static void batch_from_domain_continue(struct pfn_batch *batch, 427f394576eSJason Gunthorpe struct iommu_domain *domain, 428f394576eSJason Gunthorpe struct iopt_area *area, 429f394576eSJason Gunthorpe unsigned long start_index, 430f394576eSJason Gunthorpe unsigned long last_index) 431f394576eSJason Gunthorpe { 432f394576eSJason Gunthorpe unsigned int array_size = batch->array_size; 433f394576eSJason Gunthorpe 434f394576eSJason Gunthorpe batch->array_size = batch->end; 435f394576eSJason Gunthorpe batch_from_domain(batch, domain, area, start_index, last_index); 436f394576eSJason Gunthorpe batch->array_size = array_size; 437f394576eSJason Gunthorpe } 438f394576eSJason Gunthorpe 439f394576eSJason Gunthorpe /* 440f394576eSJason Gunthorpe * This is part of the VFIO compatibility support for VFIO_TYPE1_IOMMU. That 441f394576eSJason Gunthorpe * mode permits splitting a mapped area up, and then one of the splits is 442f394576eSJason Gunthorpe * unmapped. Doing this normally would cause us to violate our invariant of 443f394576eSJason Gunthorpe * pairing map/unmap. Thus, to support old VFIO compatibility disable support 444f394576eSJason Gunthorpe * for batching consecutive PFNs. All PFNs mapped into the iommu are done in 445f394576eSJason Gunthorpe * PAGE_SIZE units, not larger or smaller. 446f394576eSJason Gunthorpe */ 447f394576eSJason Gunthorpe static int batch_iommu_map_small(struct iommu_domain *domain, 448f394576eSJason Gunthorpe unsigned long iova, phys_addr_t paddr, 449f394576eSJason Gunthorpe size_t size, int prot) 450f394576eSJason Gunthorpe { 451f394576eSJason Gunthorpe unsigned long start_iova = iova; 452f394576eSJason Gunthorpe int rc; 453f394576eSJason Gunthorpe 45452f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 45552f52858SJason Gunthorpe WARN_ON(paddr % PAGE_SIZE || iova % PAGE_SIZE || 45652f52858SJason Gunthorpe size % PAGE_SIZE); 45752f52858SJason Gunthorpe 458f394576eSJason Gunthorpe while (size) { 4591369459bSJason Gunthorpe rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, 460e787a38eSJason Gunthorpe GFP_KERNEL_ACCOUNT); 461f394576eSJason Gunthorpe if (rc) 462f394576eSJason Gunthorpe goto err_unmap; 463f394576eSJason Gunthorpe iova += PAGE_SIZE; 464f394576eSJason Gunthorpe paddr += PAGE_SIZE; 465f394576eSJason Gunthorpe size -= PAGE_SIZE; 466f394576eSJason Gunthorpe } 467f394576eSJason Gunthorpe return 0; 468f394576eSJason Gunthorpe 469f394576eSJason Gunthorpe err_unmap: 470f394576eSJason Gunthorpe if (start_iova != iova) 471f394576eSJason Gunthorpe iommu_unmap_nofail(domain, start_iova, iova - start_iova); 472f394576eSJason Gunthorpe return rc; 473f394576eSJason Gunthorpe } 474f394576eSJason Gunthorpe 475f394576eSJason Gunthorpe static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, 476f394576eSJason Gunthorpe struct iopt_area *area, unsigned long start_index) 477f394576eSJason Gunthorpe { 478f394576eSJason Gunthorpe bool disable_large_pages = area->iopt->disable_large_pages; 479f394576eSJason Gunthorpe unsigned long last_iova = iopt_area_last_iova(area); 480f394576eSJason Gunthorpe unsigned int page_offset = 0; 481f394576eSJason Gunthorpe unsigned long start_iova; 482f394576eSJason Gunthorpe unsigned long next_iova; 483f394576eSJason Gunthorpe unsigned int cur = 0; 484f394576eSJason Gunthorpe unsigned long iova; 485f394576eSJason Gunthorpe int rc; 486f394576eSJason Gunthorpe 487f394576eSJason Gunthorpe /* The first index might be a partial page */ 488f394576eSJason Gunthorpe if (start_index == iopt_area_index(area)) 489f394576eSJason Gunthorpe page_offset = area->page_offset; 490f394576eSJason Gunthorpe next_iova = iova = start_iova = 491f394576eSJason Gunthorpe iopt_area_index_to_iova(area, start_index); 492f394576eSJason Gunthorpe while (cur < batch->end) { 493f394576eSJason Gunthorpe next_iova = min(last_iova + 1, 494f394576eSJason Gunthorpe next_iova + batch->npfns[cur] * PAGE_SIZE - 495f394576eSJason Gunthorpe page_offset); 496f394576eSJason Gunthorpe if (disable_large_pages) 497f394576eSJason Gunthorpe rc = batch_iommu_map_small( 498f394576eSJason Gunthorpe domain, iova, 499f394576eSJason Gunthorpe PFN_PHYS(batch->pfns[cur]) + page_offset, 500f394576eSJason Gunthorpe next_iova - iova, area->iommu_prot); 501f394576eSJason Gunthorpe else 502f394576eSJason Gunthorpe rc = iommu_map(domain, iova, 503f394576eSJason Gunthorpe PFN_PHYS(batch->pfns[cur]) + page_offset, 5041369459bSJason Gunthorpe next_iova - iova, area->iommu_prot, 505e787a38eSJason Gunthorpe GFP_KERNEL_ACCOUNT); 506f394576eSJason Gunthorpe if (rc) 507f394576eSJason Gunthorpe goto err_unmap; 508f394576eSJason Gunthorpe iova = next_iova; 509f394576eSJason Gunthorpe page_offset = 0; 510f394576eSJason Gunthorpe cur++; 511f394576eSJason Gunthorpe } 512f394576eSJason Gunthorpe return 0; 513f394576eSJason Gunthorpe err_unmap: 514f394576eSJason Gunthorpe if (start_iova != iova) 515f394576eSJason Gunthorpe iommu_unmap_nofail(domain, start_iova, iova - start_iova); 516f394576eSJason Gunthorpe return rc; 517f394576eSJason Gunthorpe } 518f394576eSJason Gunthorpe 519f394576eSJason Gunthorpe static void batch_from_xarray(struct pfn_batch *batch, struct xarray *xa, 520f394576eSJason Gunthorpe unsigned long start_index, 521f394576eSJason Gunthorpe unsigned long last_index) 522f394576eSJason Gunthorpe { 523f394576eSJason Gunthorpe XA_STATE(xas, xa, start_index); 524f394576eSJason Gunthorpe void *entry; 525f394576eSJason Gunthorpe 526f394576eSJason Gunthorpe rcu_read_lock(); 527f394576eSJason Gunthorpe while (true) { 528f394576eSJason Gunthorpe entry = xas_next(&xas); 529f394576eSJason Gunthorpe if (xas_retry(&xas, entry)) 530f394576eSJason Gunthorpe continue; 531f394576eSJason Gunthorpe WARN_ON(!xa_is_value(entry)); 532f394576eSJason Gunthorpe if (!batch_add_pfn(batch, xa_to_value(entry)) || 533f394576eSJason Gunthorpe start_index == last_index) 534f394576eSJason Gunthorpe break; 535f394576eSJason Gunthorpe start_index++; 536f394576eSJason Gunthorpe } 537f394576eSJason Gunthorpe rcu_read_unlock(); 538f394576eSJason Gunthorpe } 539f394576eSJason Gunthorpe 540f394576eSJason Gunthorpe static void batch_from_xarray_clear(struct pfn_batch *batch, struct xarray *xa, 541f394576eSJason Gunthorpe unsigned long start_index, 542f394576eSJason Gunthorpe unsigned long last_index) 543f394576eSJason Gunthorpe { 544f394576eSJason Gunthorpe XA_STATE(xas, xa, start_index); 545f394576eSJason Gunthorpe void *entry; 546f394576eSJason Gunthorpe 547f394576eSJason Gunthorpe xas_lock(&xas); 548f394576eSJason Gunthorpe while (true) { 549f394576eSJason Gunthorpe entry = xas_next(&xas); 550f394576eSJason Gunthorpe if (xas_retry(&xas, entry)) 551f394576eSJason Gunthorpe continue; 552f394576eSJason Gunthorpe WARN_ON(!xa_is_value(entry)); 553f394576eSJason Gunthorpe if (!batch_add_pfn(batch, xa_to_value(entry))) 554f394576eSJason Gunthorpe break; 555f394576eSJason Gunthorpe xas_store(&xas, NULL); 556f394576eSJason Gunthorpe if (start_index == last_index) 557f394576eSJason Gunthorpe break; 558f394576eSJason Gunthorpe start_index++; 559f394576eSJason Gunthorpe } 560f394576eSJason Gunthorpe xas_unlock(&xas); 561f394576eSJason Gunthorpe } 562f394576eSJason Gunthorpe 563f394576eSJason Gunthorpe static void clear_xarray(struct xarray *xa, unsigned long start_index, 564f394576eSJason Gunthorpe unsigned long last_index) 565f394576eSJason Gunthorpe { 566f394576eSJason Gunthorpe XA_STATE(xas, xa, start_index); 567f394576eSJason Gunthorpe void *entry; 568f394576eSJason Gunthorpe 569f394576eSJason Gunthorpe xas_lock(&xas); 570f394576eSJason Gunthorpe xas_for_each(&xas, entry, last_index) 571f394576eSJason Gunthorpe xas_store(&xas, NULL); 572f394576eSJason Gunthorpe xas_unlock(&xas); 573f394576eSJason Gunthorpe } 574f394576eSJason Gunthorpe 575f394576eSJason Gunthorpe static int pages_to_xarray(struct xarray *xa, unsigned long start_index, 576f394576eSJason Gunthorpe unsigned long last_index, struct page **pages) 577f394576eSJason Gunthorpe { 578f394576eSJason Gunthorpe struct page **end_pages = pages + (last_index - start_index) + 1; 579e26eed4fSJason Gunthorpe struct page **half_pages = pages + (end_pages - pages) / 2; 580f394576eSJason Gunthorpe XA_STATE(xas, xa, start_index); 581f394576eSJason Gunthorpe 582f394576eSJason Gunthorpe do { 583f394576eSJason Gunthorpe void *old; 584f394576eSJason Gunthorpe 585f394576eSJason Gunthorpe xas_lock(&xas); 586f394576eSJason Gunthorpe while (pages != end_pages) { 587e26eed4fSJason Gunthorpe /* xarray does not participate in fault injection */ 588e26eed4fSJason Gunthorpe if (pages == half_pages && iommufd_should_fail()) { 589e26eed4fSJason Gunthorpe xas_set_err(&xas, -EINVAL); 590e26eed4fSJason Gunthorpe xas_unlock(&xas); 591e26eed4fSJason Gunthorpe /* aka xas_destroy() */ 592e26eed4fSJason Gunthorpe xas_nomem(&xas, GFP_KERNEL); 593e26eed4fSJason Gunthorpe goto err_clear; 594e26eed4fSJason Gunthorpe } 595e26eed4fSJason Gunthorpe 596f394576eSJason Gunthorpe old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages))); 597f394576eSJason Gunthorpe if (xas_error(&xas)) 598f394576eSJason Gunthorpe break; 599f394576eSJason Gunthorpe WARN_ON(old); 600f394576eSJason Gunthorpe pages++; 601f394576eSJason Gunthorpe xas_next(&xas); 602f394576eSJason Gunthorpe } 603f394576eSJason Gunthorpe xas_unlock(&xas); 604f394576eSJason Gunthorpe } while (xas_nomem(&xas, GFP_KERNEL)); 605f394576eSJason Gunthorpe 606e26eed4fSJason Gunthorpe err_clear: 607f394576eSJason Gunthorpe if (xas_error(&xas)) { 608f394576eSJason Gunthorpe if (xas.xa_index != start_index) 609f394576eSJason Gunthorpe clear_xarray(xa, start_index, xas.xa_index - 1); 610f394576eSJason Gunthorpe return xas_error(&xas); 611f394576eSJason Gunthorpe } 612f394576eSJason Gunthorpe return 0; 613f394576eSJason Gunthorpe } 614f394576eSJason Gunthorpe 615f394576eSJason Gunthorpe static void batch_from_pages(struct pfn_batch *batch, struct page **pages, 616f394576eSJason Gunthorpe size_t npages) 617f394576eSJason Gunthorpe { 618f394576eSJason Gunthorpe struct page **end = pages + npages; 619f394576eSJason Gunthorpe 620f394576eSJason Gunthorpe for (; pages != end; pages++) 621f394576eSJason Gunthorpe if (!batch_add_pfn(batch, page_to_pfn(*pages))) 622f394576eSJason Gunthorpe break; 623f394576eSJason Gunthorpe } 624f394576eSJason Gunthorpe 625f394576eSJason Gunthorpe static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages, 626f394576eSJason Gunthorpe unsigned int first_page_off, size_t npages) 627f394576eSJason Gunthorpe { 628f394576eSJason Gunthorpe unsigned int cur = 0; 629f394576eSJason Gunthorpe 630f394576eSJason Gunthorpe while (first_page_off) { 631f394576eSJason Gunthorpe if (batch->npfns[cur] > first_page_off) 632f394576eSJason Gunthorpe break; 633f394576eSJason Gunthorpe first_page_off -= batch->npfns[cur]; 634f394576eSJason Gunthorpe cur++; 635f394576eSJason Gunthorpe } 636f394576eSJason Gunthorpe 637f394576eSJason Gunthorpe while (npages) { 638f394576eSJason Gunthorpe size_t to_unpin = min_t(size_t, npages, 639f394576eSJason Gunthorpe batch->npfns[cur] - first_page_off); 640f394576eSJason Gunthorpe 641f394576eSJason Gunthorpe unpin_user_page_range_dirty_lock( 642f394576eSJason Gunthorpe pfn_to_page(batch->pfns[cur] + first_page_off), 643f394576eSJason Gunthorpe to_unpin, pages->writable); 644f394576eSJason Gunthorpe iopt_pages_sub_npinned(pages, to_unpin); 645f394576eSJason Gunthorpe cur++; 646f394576eSJason Gunthorpe first_page_off = 0; 647f394576eSJason Gunthorpe npages -= to_unpin; 648f394576eSJason Gunthorpe } 649f394576eSJason Gunthorpe } 650f394576eSJason Gunthorpe 651f394576eSJason Gunthorpe static void copy_data_page(struct page *page, void *data, unsigned long offset, 652f394576eSJason Gunthorpe size_t length, unsigned int flags) 653f394576eSJason Gunthorpe { 654f394576eSJason Gunthorpe void *mem; 655f394576eSJason Gunthorpe 656f394576eSJason Gunthorpe mem = kmap_local_page(page); 657f394576eSJason Gunthorpe if (flags & IOMMUFD_ACCESS_RW_WRITE) { 658f394576eSJason Gunthorpe memcpy(mem + offset, data, length); 659f394576eSJason Gunthorpe set_page_dirty_lock(page); 660f394576eSJason Gunthorpe } else { 661f394576eSJason Gunthorpe memcpy(data, mem + offset, length); 662f394576eSJason Gunthorpe } 663f394576eSJason Gunthorpe kunmap_local(mem); 664f394576eSJason Gunthorpe } 665f394576eSJason Gunthorpe 666f394576eSJason Gunthorpe static unsigned long batch_rw(struct pfn_batch *batch, void *data, 667f394576eSJason Gunthorpe unsigned long offset, unsigned long length, 668f394576eSJason Gunthorpe unsigned int flags) 669f394576eSJason Gunthorpe { 670f394576eSJason Gunthorpe unsigned long copied = 0; 671f394576eSJason Gunthorpe unsigned int npage = 0; 672f394576eSJason Gunthorpe unsigned int cur = 0; 673f394576eSJason Gunthorpe 674f394576eSJason Gunthorpe while (cur < batch->end) { 675f394576eSJason Gunthorpe unsigned long bytes = min(length, PAGE_SIZE - offset); 676f394576eSJason Gunthorpe 677f394576eSJason Gunthorpe copy_data_page(pfn_to_page(batch->pfns[cur] + npage), data, 678f394576eSJason Gunthorpe offset, bytes, flags); 679f394576eSJason Gunthorpe offset = 0; 680f394576eSJason Gunthorpe length -= bytes; 681f394576eSJason Gunthorpe data += bytes; 682f394576eSJason Gunthorpe copied += bytes; 683f394576eSJason Gunthorpe npage++; 684f394576eSJason Gunthorpe if (npage == batch->npfns[cur]) { 685f394576eSJason Gunthorpe npage = 0; 686f394576eSJason Gunthorpe cur++; 687f394576eSJason Gunthorpe } 688f394576eSJason Gunthorpe if (!length) 689f394576eSJason Gunthorpe break; 690f394576eSJason Gunthorpe } 691f394576eSJason Gunthorpe return copied; 692f394576eSJason Gunthorpe } 693f394576eSJason Gunthorpe 694f394576eSJason Gunthorpe /* pfn_reader_user is just the pin_user_pages() path */ 695f394576eSJason Gunthorpe struct pfn_reader_user { 696f394576eSJason Gunthorpe struct page **upages; 697f394576eSJason Gunthorpe size_t upages_len; 698f394576eSJason Gunthorpe unsigned long upages_start; 699f394576eSJason Gunthorpe unsigned long upages_end; 700f394576eSJason Gunthorpe unsigned int gup_flags; 701f394576eSJason Gunthorpe /* 702f394576eSJason Gunthorpe * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is 703f394576eSJason Gunthorpe * neither 704f394576eSJason Gunthorpe */ 705f394576eSJason Gunthorpe int locked; 706f394576eSJason Gunthorpe }; 707f394576eSJason Gunthorpe 708f394576eSJason Gunthorpe static void pfn_reader_user_init(struct pfn_reader_user *user, 709f394576eSJason Gunthorpe struct iopt_pages *pages) 710f394576eSJason Gunthorpe { 711f394576eSJason Gunthorpe user->upages = NULL; 712f394576eSJason Gunthorpe user->upages_start = 0; 713f394576eSJason Gunthorpe user->upages_end = 0; 714f394576eSJason Gunthorpe user->locked = -1; 715f394576eSJason Gunthorpe 71608cdc215SLinus Torvalds user->gup_flags = FOLL_LONGTERM; 71708cdc215SLinus Torvalds if (pages->writable) 71808cdc215SLinus Torvalds user->gup_flags |= FOLL_WRITE; 719f394576eSJason Gunthorpe } 720f394576eSJason Gunthorpe 721f394576eSJason Gunthorpe static void pfn_reader_user_destroy(struct pfn_reader_user *user, 722f394576eSJason Gunthorpe struct iopt_pages *pages) 723f394576eSJason Gunthorpe { 724f394576eSJason Gunthorpe if (user->locked != -1) { 725f394576eSJason Gunthorpe if (user->locked) 726f394576eSJason Gunthorpe mmap_read_unlock(pages->source_mm); 727f394576eSJason Gunthorpe if (pages->source_mm != current->mm) 728f394576eSJason Gunthorpe mmput(pages->source_mm); 729a26fa392SJason Gunthorpe user->locked = -1; 730f394576eSJason Gunthorpe } 731f394576eSJason Gunthorpe 732f394576eSJason Gunthorpe kfree(user->upages); 733f394576eSJason Gunthorpe user->upages = NULL; 734f394576eSJason Gunthorpe } 735f394576eSJason Gunthorpe 736f394576eSJason Gunthorpe static int pfn_reader_user_pin(struct pfn_reader_user *user, 737f394576eSJason Gunthorpe struct iopt_pages *pages, 738f394576eSJason Gunthorpe unsigned long start_index, 739f394576eSJason Gunthorpe unsigned long last_index) 740f394576eSJason Gunthorpe { 741f394576eSJason Gunthorpe bool remote_mm = pages->source_mm != current->mm; 742f394576eSJason Gunthorpe unsigned long npages; 743f394576eSJason Gunthorpe uintptr_t uptr; 744f394576eSJason Gunthorpe long rc; 745f394576eSJason Gunthorpe 74652f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 74752f52858SJason Gunthorpe WARN_ON(last_index < start_index)) 74852f52858SJason Gunthorpe return -EINVAL; 74952f52858SJason Gunthorpe 750f394576eSJason Gunthorpe if (!user->upages) { 751f394576eSJason Gunthorpe /* All undone in pfn_reader_destroy() */ 752f394576eSJason Gunthorpe user->upages_len = 753f394576eSJason Gunthorpe (last_index - start_index + 1) * sizeof(*user->upages); 754f394576eSJason Gunthorpe user->upages = temp_kmalloc(&user->upages_len, NULL, 0); 755f394576eSJason Gunthorpe if (!user->upages) 756f394576eSJason Gunthorpe return -ENOMEM; 757f394576eSJason Gunthorpe } 758f394576eSJason Gunthorpe 759f394576eSJason Gunthorpe if (user->locked == -1) { 760f394576eSJason Gunthorpe /* 761f394576eSJason Gunthorpe * The majority of usages will run the map task within the mm 762f394576eSJason Gunthorpe * providing the pages, so we can optimize into 763f394576eSJason Gunthorpe * get_user_pages_fast() 764f394576eSJason Gunthorpe */ 765f394576eSJason Gunthorpe if (remote_mm) { 766f394576eSJason Gunthorpe if (!mmget_not_zero(pages->source_mm)) 767f394576eSJason Gunthorpe return -EFAULT; 768f394576eSJason Gunthorpe } 769f394576eSJason Gunthorpe user->locked = 0; 770f394576eSJason Gunthorpe } 771f394576eSJason Gunthorpe 772f394576eSJason Gunthorpe npages = min_t(unsigned long, last_index - start_index + 1, 773f394576eSJason Gunthorpe user->upages_len / sizeof(*user->upages)); 774f394576eSJason Gunthorpe 775e26eed4fSJason Gunthorpe 776e26eed4fSJason Gunthorpe if (iommufd_should_fail()) 777e26eed4fSJason Gunthorpe return -EFAULT; 778e26eed4fSJason Gunthorpe 779f394576eSJason Gunthorpe uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); 780f394576eSJason Gunthorpe if (!remote_mm) 781f394576eSJason Gunthorpe rc = pin_user_pages_fast(uptr, npages, user->gup_flags, 782f394576eSJason Gunthorpe user->upages); 783f394576eSJason Gunthorpe else { 784f394576eSJason Gunthorpe if (!user->locked) { 785f394576eSJason Gunthorpe mmap_read_lock(pages->source_mm); 786f394576eSJason Gunthorpe user->locked = 1; 787f394576eSJason Gunthorpe } 788f394576eSJason Gunthorpe rc = pin_user_pages_remote(pages->source_mm, uptr, npages, 7890b295316SLorenzo Stoakes user->gup_flags, user->upages, 79008cdc215SLinus Torvalds &user->locked); 791f394576eSJason Gunthorpe } 792f394576eSJason Gunthorpe if (rc <= 0) { 793f394576eSJason Gunthorpe if (WARN_ON(!rc)) 794f394576eSJason Gunthorpe return -EFAULT; 795f394576eSJason Gunthorpe return rc; 796f394576eSJason Gunthorpe } 797f394576eSJason Gunthorpe iopt_pages_add_npinned(pages, rc); 798f394576eSJason Gunthorpe user->upages_start = start_index; 799f394576eSJason Gunthorpe user->upages_end = start_index + rc; 800f394576eSJason Gunthorpe return 0; 801f394576eSJason Gunthorpe } 802f394576eSJason Gunthorpe 803f394576eSJason Gunthorpe /* This is the "modern" and faster accounting method used by io_uring */ 804f394576eSJason Gunthorpe static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) 805f394576eSJason Gunthorpe { 806f394576eSJason Gunthorpe unsigned long lock_limit; 807f394576eSJason Gunthorpe unsigned long cur_pages; 808f394576eSJason Gunthorpe unsigned long new_pages; 809f394576eSJason Gunthorpe 810f394576eSJason Gunthorpe lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> 811f394576eSJason Gunthorpe PAGE_SHIFT; 812f394576eSJason Gunthorpe do { 813f394576eSJason Gunthorpe cur_pages = atomic_long_read(&pages->source_user->locked_vm); 814f394576eSJason Gunthorpe new_pages = cur_pages + npages; 815f394576eSJason Gunthorpe if (new_pages > lock_limit) 816f394576eSJason Gunthorpe return -ENOMEM; 817f394576eSJason Gunthorpe } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages, 818f394576eSJason Gunthorpe new_pages) != cur_pages); 819f394576eSJason Gunthorpe return 0; 820f394576eSJason Gunthorpe } 821f394576eSJason Gunthorpe 822f394576eSJason Gunthorpe static void decr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) 823f394576eSJason Gunthorpe { 824f394576eSJason Gunthorpe if (WARN_ON(atomic_long_read(&pages->source_user->locked_vm) < npages)) 825f394576eSJason Gunthorpe return; 826f394576eSJason Gunthorpe atomic_long_sub(npages, &pages->source_user->locked_vm); 827f394576eSJason Gunthorpe } 828f394576eSJason Gunthorpe 829f394576eSJason Gunthorpe /* This is the accounting method used for compatibility with VFIO */ 830f394576eSJason Gunthorpe static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages, 831f394576eSJason Gunthorpe bool inc, struct pfn_reader_user *user) 832f394576eSJason Gunthorpe { 833f394576eSJason Gunthorpe bool do_put = false; 834f394576eSJason Gunthorpe int rc; 835f394576eSJason Gunthorpe 836f394576eSJason Gunthorpe if (user && user->locked) { 837f394576eSJason Gunthorpe mmap_read_unlock(pages->source_mm); 838f394576eSJason Gunthorpe user->locked = 0; 839f394576eSJason Gunthorpe /* If we had the lock then we also have a get */ 840f394576eSJason Gunthorpe } else if ((!user || !user->upages) && 841f394576eSJason Gunthorpe pages->source_mm != current->mm) { 842f394576eSJason Gunthorpe if (!mmget_not_zero(pages->source_mm)) 843f394576eSJason Gunthorpe return -EINVAL; 844f394576eSJason Gunthorpe do_put = true; 845f394576eSJason Gunthorpe } 846f394576eSJason Gunthorpe 847f394576eSJason Gunthorpe mmap_write_lock(pages->source_mm); 848f394576eSJason Gunthorpe rc = __account_locked_vm(pages->source_mm, npages, inc, 849f394576eSJason Gunthorpe pages->source_task, false); 850f394576eSJason Gunthorpe mmap_write_unlock(pages->source_mm); 851f394576eSJason Gunthorpe 852f394576eSJason Gunthorpe if (do_put) 853f394576eSJason Gunthorpe mmput(pages->source_mm); 854f394576eSJason Gunthorpe return rc; 855f394576eSJason Gunthorpe } 856f394576eSJason Gunthorpe 857f394576eSJason Gunthorpe static int do_update_pinned(struct iopt_pages *pages, unsigned long npages, 858f394576eSJason Gunthorpe bool inc, struct pfn_reader_user *user) 859f394576eSJason Gunthorpe { 860f394576eSJason Gunthorpe int rc = 0; 861f394576eSJason Gunthorpe 862f394576eSJason Gunthorpe switch (pages->account_mode) { 863f394576eSJason Gunthorpe case IOPT_PAGES_ACCOUNT_NONE: 864f394576eSJason Gunthorpe break; 865f394576eSJason Gunthorpe case IOPT_PAGES_ACCOUNT_USER: 866f394576eSJason Gunthorpe if (inc) 867f394576eSJason Gunthorpe rc = incr_user_locked_vm(pages, npages); 868f394576eSJason Gunthorpe else 869f394576eSJason Gunthorpe decr_user_locked_vm(pages, npages); 870f394576eSJason Gunthorpe break; 871f394576eSJason Gunthorpe case IOPT_PAGES_ACCOUNT_MM: 872f394576eSJason Gunthorpe rc = update_mm_locked_vm(pages, npages, inc, user); 873f394576eSJason Gunthorpe break; 874f394576eSJason Gunthorpe } 875f394576eSJason Gunthorpe if (rc) 876f394576eSJason Gunthorpe return rc; 877f394576eSJason Gunthorpe 878f394576eSJason Gunthorpe pages->last_npinned = pages->npinned; 879f394576eSJason Gunthorpe if (inc) 880f394576eSJason Gunthorpe atomic64_add(npages, &pages->source_mm->pinned_vm); 881f394576eSJason Gunthorpe else 882f394576eSJason Gunthorpe atomic64_sub(npages, &pages->source_mm->pinned_vm); 883f394576eSJason Gunthorpe return 0; 884f394576eSJason Gunthorpe } 885f394576eSJason Gunthorpe 886f394576eSJason Gunthorpe static void update_unpinned(struct iopt_pages *pages) 887f394576eSJason Gunthorpe { 888f394576eSJason Gunthorpe if (WARN_ON(pages->npinned > pages->last_npinned)) 889f394576eSJason Gunthorpe return; 890f394576eSJason Gunthorpe if (pages->npinned == pages->last_npinned) 891f394576eSJason Gunthorpe return; 892f394576eSJason Gunthorpe do_update_pinned(pages, pages->last_npinned - pages->npinned, false, 893f394576eSJason Gunthorpe NULL); 894f394576eSJason Gunthorpe } 895f394576eSJason Gunthorpe 896f394576eSJason Gunthorpe /* 897f394576eSJason Gunthorpe * Changes in the number of pages pinned is done after the pages have been read 898f394576eSJason Gunthorpe * and processed. If the user lacked the limit then the error unwind will unpin 899f394576eSJason Gunthorpe * everything that was just pinned. This is because it is expensive to calculate 900f394576eSJason Gunthorpe * how many pages we have already pinned within a range to generate an accurate 901f394576eSJason Gunthorpe * prediction in advance of doing the work to actually pin them. 902f394576eSJason Gunthorpe */ 903f394576eSJason Gunthorpe static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, 904f394576eSJason Gunthorpe struct iopt_pages *pages) 905f394576eSJason Gunthorpe { 906f394576eSJason Gunthorpe unsigned long npages; 907f394576eSJason Gunthorpe bool inc; 908f394576eSJason Gunthorpe 909f394576eSJason Gunthorpe lockdep_assert_held(&pages->mutex); 910f394576eSJason Gunthorpe 911f394576eSJason Gunthorpe if (pages->npinned == pages->last_npinned) 912f394576eSJason Gunthorpe return 0; 913f394576eSJason Gunthorpe 914f394576eSJason Gunthorpe if (pages->npinned < pages->last_npinned) { 915f394576eSJason Gunthorpe npages = pages->last_npinned - pages->npinned; 916f394576eSJason Gunthorpe inc = false; 917f394576eSJason Gunthorpe } else { 918e26eed4fSJason Gunthorpe if (iommufd_should_fail()) 919e26eed4fSJason Gunthorpe return -ENOMEM; 920f394576eSJason Gunthorpe npages = pages->npinned - pages->last_npinned; 921f394576eSJason Gunthorpe inc = true; 922f394576eSJason Gunthorpe } 923f394576eSJason Gunthorpe return do_update_pinned(pages, npages, inc, user); 924f394576eSJason Gunthorpe } 925f394576eSJason Gunthorpe 926f394576eSJason Gunthorpe /* 927f394576eSJason Gunthorpe * PFNs are stored in three places, in order of preference: 928f394576eSJason Gunthorpe * - The iopt_pages xarray. This is only populated if there is a 929f394576eSJason Gunthorpe * iopt_pages_access 930f394576eSJason Gunthorpe * - The iommu_domain under an area 931f394576eSJason Gunthorpe * - The original PFN source, ie pages->source_mm 932f394576eSJason Gunthorpe * 933f394576eSJason Gunthorpe * This iterator reads the pfns optimizing to load according to the 934f394576eSJason Gunthorpe * above order. 935f394576eSJason Gunthorpe */ 936f394576eSJason Gunthorpe struct pfn_reader { 937f394576eSJason Gunthorpe struct iopt_pages *pages; 938f394576eSJason Gunthorpe struct interval_tree_double_span_iter span; 939f394576eSJason Gunthorpe struct pfn_batch batch; 940f394576eSJason Gunthorpe unsigned long batch_start_index; 941f394576eSJason Gunthorpe unsigned long batch_end_index; 942f394576eSJason Gunthorpe unsigned long last_index; 943f394576eSJason Gunthorpe 944f394576eSJason Gunthorpe struct pfn_reader_user user; 945f394576eSJason Gunthorpe }; 946f394576eSJason Gunthorpe 947f394576eSJason Gunthorpe static int pfn_reader_update_pinned(struct pfn_reader *pfns) 948f394576eSJason Gunthorpe { 949f394576eSJason Gunthorpe return pfn_reader_user_update_pinned(&pfns->user, pfns->pages); 950f394576eSJason Gunthorpe } 951f394576eSJason Gunthorpe 952f394576eSJason Gunthorpe /* 953f394576eSJason Gunthorpe * The batch can contain a mixture of pages that are still in use and pages that 954f394576eSJason Gunthorpe * need to be unpinned. Unpin only pages that are not held anywhere else. 955f394576eSJason Gunthorpe */ 956f394576eSJason Gunthorpe static void pfn_reader_unpin(struct pfn_reader *pfns) 957f394576eSJason Gunthorpe { 958f394576eSJason Gunthorpe unsigned long last = pfns->batch_end_index - 1; 959f394576eSJason Gunthorpe unsigned long start = pfns->batch_start_index; 960f394576eSJason Gunthorpe struct interval_tree_double_span_iter span; 961f394576eSJason Gunthorpe struct iopt_pages *pages = pfns->pages; 962f394576eSJason Gunthorpe 963f394576eSJason Gunthorpe lockdep_assert_held(&pages->mutex); 964f394576eSJason Gunthorpe 965f394576eSJason Gunthorpe interval_tree_for_each_double_span(&span, &pages->access_itree, 966f394576eSJason Gunthorpe &pages->domains_itree, start, last) { 967f394576eSJason Gunthorpe if (span.is_used) 968f394576eSJason Gunthorpe continue; 969f394576eSJason Gunthorpe 970f394576eSJason Gunthorpe batch_unpin(&pfns->batch, pages, span.start_hole - start, 971f394576eSJason Gunthorpe span.last_hole - span.start_hole + 1); 972f394576eSJason Gunthorpe } 973f394576eSJason Gunthorpe } 974f394576eSJason Gunthorpe 975f394576eSJason Gunthorpe /* Process a single span to load it from the proper storage */ 976f394576eSJason Gunthorpe static int pfn_reader_fill_span(struct pfn_reader *pfns) 977f394576eSJason Gunthorpe { 978f394576eSJason Gunthorpe struct interval_tree_double_span_iter *span = &pfns->span; 979f394576eSJason Gunthorpe unsigned long start_index = pfns->batch_end_index; 980f394576eSJason Gunthorpe struct iopt_area *area; 981f394576eSJason Gunthorpe int rc; 982f394576eSJason Gunthorpe 98352f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 98452f52858SJason Gunthorpe WARN_ON(span->last_used < start_index)) 98552f52858SJason Gunthorpe return -EINVAL; 98652f52858SJason Gunthorpe 987f394576eSJason Gunthorpe if (span->is_used == 1) { 988f394576eSJason Gunthorpe batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns, 989f394576eSJason Gunthorpe start_index, span->last_used); 990f394576eSJason Gunthorpe return 0; 991f394576eSJason Gunthorpe } 992f394576eSJason Gunthorpe 993f394576eSJason Gunthorpe if (span->is_used == 2) { 994f394576eSJason Gunthorpe /* 995f394576eSJason Gunthorpe * Pull as many pages from the first domain we find in the 996f394576eSJason Gunthorpe * target span. If it is too small then we will be called again 997f394576eSJason Gunthorpe * and we'll find another area. 998f394576eSJason Gunthorpe */ 999f394576eSJason Gunthorpe area = iopt_pages_find_domain_area(pfns->pages, start_index); 1000f394576eSJason Gunthorpe if (WARN_ON(!area)) 1001f394576eSJason Gunthorpe return -EINVAL; 1002f394576eSJason Gunthorpe 1003f394576eSJason Gunthorpe /* The storage_domain cannot change without the pages mutex */ 1004f394576eSJason Gunthorpe batch_from_domain( 1005f394576eSJason Gunthorpe &pfns->batch, area->storage_domain, area, start_index, 1006f394576eSJason Gunthorpe min(iopt_area_last_index(area), span->last_used)); 1007f394576eSJason Gunthorpe return 0; 1008f394576eSJason Gunthorpe } 1009f394576eSJason Gunthorpe 1010f394576eSJason Gunthorpe if (start_index >= pfns->user.upages_end) { 1011f394576eSJason Gunthorpe rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, 1012f394576eSJason Gunthorpe span->last_hole); 1013f394576eSJason Gunthorpe if (rc) 1014f394576eSJason Gunthorpe return rc; 1015f394576eSJason Gunthorpe } 1016f394576eSJason Gunthorpe 1017f394576eSJason Gunthorpe batch_from_pages(&pfns->batch, 1018f394576eSJason Gunthorpe pfns->user.upages + 1019f394576eSJason Gunthorpe (start_index - pfns->user.upages_start), 1020f394576eSJason Gunthorpe pfns->user.upages_end - start_index); 1021f394576eSJason Gunthorpe return 0; 1022f394576eSJason Gunthorpe } 1023f394576eSJason Gunthorpe 1024f394576eSJason Gunthorpe static bool pfn_reader_done(struct pfn_reader *pfns) 1025f394576eSJason Gunthorpe { 1026f394576eSJason Gunthorpe return pfns->batch_start_index == pfns->last_index + 1; 1027f394576eSJason Gunthorpe } 1028f394576eSJason Gunthorpe 1029f394576eSJason Gunthorpe static int pfn_reader_next(struct pfn_reader *pfns) 1030f394576eSJason Gunthorpe { 1031f394576eSJason Gunthorpe int rc; 1032f394576eSJason Gunthorpe 1033f394576eSJason Gunthorpe batch_clear(&pfns->batch); 1034f394576eSJason Gunthorpe pfns->batch_start_index = pfns->batch_end_index; 1035f394576eSJason Gunthorpe 1036f394576eSJason Gunthorpe while (pfns->batch_end_index != pfns->last_index + 1) { 1037f394576eSJason Gunthorpe unsigned int npfns = pfns->batch.total_pfns; 1038f394576eSJason Gunthorpe 103952f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 104052f52858SJason Gunthorpe WARN_ON(interval_tree_double_span_iter_done(&pfns->span))) 104152f52858SJason Gunthorpe return -EINVAL; 104252f52858SJason Gunthorpe 1043f394576eSJason Gunthorpe rc = pfn_reader_fill_span(pfns); 1044f394576eSJason Gunthorpe if (rc) 1045f394576eSJason Gunthorpe return rc; 1046f394576eSJason Gunthorpe 1047f394576eSJason Gunthorpe if (WARN_ON(!pfns->batch.total_pfns)) 1048f394576eSJason Gunthorpe return -EINVAL; 1049f394576eSJason Gunthorpe 1050f394576eSJason Gunthorpe pfns->batch_end_index = 1051f394576eSJason Gunthorpe pfns->batch_start_index + pfns->batch.total_pfns; 1052f394576eSJason Gunthorpe if (pfns->batch_end_index == pfns->span.last_used + 1) 1053f394576eSJason Gunthorpe interval_tree_double_span_iter_next(&pfns->span); 1054f394576eSJason Gunthorpe 1055f394576eSJason Gunthorpe /* Batch is full */ 1056f394576eSJason Gunthorpe if (npfns == pfns->batch.total_pfns) 1057f394576eSJason Gunthorpe return 0; 1058f394576eSJason Gunthorpe } 1059f394576eSJason Gunthorpe return 0; 1060f394576eSJason Gunthorpe } 1061f394576eSJason Gunthorpe 1062f394576eSJason Gunthorpe static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, 1063f394576eSJason Gunthorpe unsigned long start_index, unsigned long last_index) 1064f394576eSJason Gunthorpe { 1065f394576eSJason Gunthorpe int rc; 1066f394576eSJason Gunthorpe 1067f394576eSJason Gunthorpe lockdep_assert_held(&pages->mutex); 1068f394576eSJason Gunthorpe 1069f394576eSJason Gunthorpe pfns->pages = pages; 1070f394576eSJason Gunthorpe pfns->batch_start_index = start_index; 1071f394576eSJason Gunthorpe pfns->batch_end_index = start_index; 1072f394576eSJason Gunthorpe pfns->last_index = last_index; 1073f394576eSJason Gunthorpe pfn_reader_user_init(&pfns->user, pages); 1074f394576eSJason Gunthorpe rc = batch_init(&pfns->batch, last_index - start_index + 1); 1075f394576eSJason Gunthorpe if (rc) 1076f394576eSJason Gunthorpe return rc; 1077f394576eSJason Gunthorpe interval_tree_double_span_iter_first(&pfns->span, &pages->access_itree, 1078f394576eSJason Gunthorpe &pages->domains_itree, start_index, 1079f394576eSJason Gunthorpe last_index); 1080f394576eSJason Gunthorpe return 0; 1081f394576eSJason Gunthorpe } 1082f394576eSJason Gunthorpe 1083f394576eSJason Gunthorpe /* 1084f394576eSJason Gunthorpe * There are many assertions regarding the state of pages->npinned vs 1085f394576eSJason Gunthorpe * pages->last_pinned, for instance something like unmapping a domain must only 1086f394576eSJason Gunthorpe * decrement the npinned, and pfn_reader_destroy() must be called only after all 1087f394576eSJason Gunthorpe * the pins are updated. This is fine for success flows, but error flows 1088f394576eSJason Gunthorpe * sometimes need to release the pins held inside the pfn_reader before going on 1089f394576eSJason Gunthorpe * to complete unmapping and releasing pins held in domains. 1090f394576eSJason Gunthorpe */ 1091f394576eSJason Gunthorpe static void pfn_reader_release_pins(struct pfn_reader *pfns) 1092f394576eSJason Gunthorpe { 1093f394576eSJason Gunthorpe struct iopt_pages *pages = pfns->pages; 1094f394576eSJason Gunthorpe 1095f394576eSJason Gunthorpe if (pfns->user.upages_end > pfns->batch_end_index) { 1096f394576eSJason Gunthorpe size_t npages = pfns->user.upages_end - pfns->batch_end_index; 1097f394576eSJason Gunthorpe 1098f394576eSJason Gunthorpe /* Any pages not transferred to the batch are just unpinned */ 1099f394576eSJason Gunthorpe unpin_user_pages(pfns->user.upages + (pfns->batch_end_index - 1100f394576eSJason Gunthorpe pfns->user.upages_start), 1101f394576eSJason Gunthorpe npages); 1102f394576eSJason Gunthorpe iopt_pages_sub_npinned(pages, npages); 1103f394576eSJason Gunthorpe pfns->user.upages_end = pfns->batch_end_index; 1104f394576eSJason Gunthorpe } 1105f394576eSJason Gunthorpe if (pfns->batch_start_index != pfns->batch_end_index) { 1106f394576eSJason Gunthorpe pfn_reader_unpin(pfns); 1107f394576eSJason Gunthorpe pfns->batch_start_index = pfns->batch_end_index; 1108f394576eSJason Gunthorpe } 1109f394576eSJason Gunthorpe } 1110f394576eSJason Gunthorpe 1111f394576eSJason Gunthorpe static void pfn_reader_destroy(struct pfn_reader *pfns) 1112f394576eSJason Gunthorpe { 1113f394576eSJason Gunthorpe struct iopt_pages *pages = pfns->pages; 1114f394576eSJason Gunthorpe 1115f394576eSJason Gunthorpe pfn_reader_release_pins(pfns); 1116f394576eSJason Gunthorpe pfn_reader_user_destroy(&pfns->user, pfns->pages); 1117f394576eSJason Gunthorpe batch_destroy(&pfns->batch, NULL); 1118f394576eSJason Gunthorpe WARN_ON(pages->last_npinned != pages->npinned); 1119f394576eSJason Gunthorpe } 1120f394576eSJason Gunthorpe 1121f394576eSJason Gunthorpe static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, 1122f394576eSJason Gunthorpe unsigned long start_index, unsigned long last_index) 1123f394576eSJason Gunthorpe { 1124f394576eSJason Gunthorpe int rc; 1125f394576eSJason Gunthorpe 112652f52858SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 112752f52858SJason Gunthorpe WARN_ON(last_index < start_index)) 112852f52858SJason Gunthorpe return -EINVAL; 112952f52858SJason Gunthorpe 1130f394576eSJason Gunthorpe rc = pfn_reader_init(pfns, pages, start_index, last_index); 1131f394576eSJason Gunthorpe if (rc) 1132f394576eSJason Gunthorpe return rc; 1133f394576eSJason Gunthorpe rc = pfn_reader_next(pfns); 1134f394576eSJason Gunthorpe if (rc) { 1135f394576eSJason Gunthorpe pfn_reader_destroy(pfns); 1136f394576eSJason Gunthorpe return rc; 1137f394576eSJason Gunthorpe } 1138f394576eSJason Gunthorpe return 0; 1139f394576eSJason Gunthorpe } 11408d160cd4SJason Gunthorpe 11418d160cd4SJason Gunthorpe struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, 11428d160cd4SJason Gunthorpe bool writable) 11438d160cd4SJason Gunthorpe { 11448d160cd4SJason Gunthorpe struct iopt_pages *pages; 1145e4395701SJason Gunthorpe unsigned long end; 11468d160cd4SJason Gunthorpe 11478d160cd4SJason Gunthorpe /* 11488d160cd4SJason Gunthorpe * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP 11498d160cd4SJason Gunthorpe * below from overflow 11508d160cd4SJason Gunthorpe */ 11518d160cd4SJason Gunthorpe if (length > SIZE_MAX - PAGE_SIZE || length == 0) 11528d160cd4SJason Gunthorpe return ERR_PTR(-EINVAL); 11538d160cd4SJason Gunthorpe 1154e4395701SJason Gunthorpe if (check_add_overflow((unsigned long)uptr, length, &end)) 1155e4395701SJason Gunthorpe return ERR_PTR(-EOVERFLOW); 1156e4395701SJason Gunthorpe 11578d160cd4SJason Gunthorpe pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); 11588d160cd4SJason Gunthorpe if (!pages) 11598d160cd4SJason Gunthorpe return ERR_PTR(-ENOMEM); 11608d160cd4SJason Gunthorpe 11618d160cd4SJason Gunthorpe kref_init(&pages->kref); 11628d160cd4SJason Gunthorpe xa_init_flags(&pages->pinned_pfns, XA_FLAGS_ACCOUNT); 11638d160cd4SJason Gunthorpe mutex_init(&pages->mutex); 11648d160cd4SJason Gunthorpe pages->source_mm = current->mm; 11658d160cd4SJason Gunthorpe mmgrab(pages->source_mm); 11668d160cd4SJason Gunthorpe pages->uptr = (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); 11678d160cd4SJason Gunthorpe pages->npages = DIV_ROUND_UP(length + (uptr - pages->uptr), PAGE_SIZE); 11688d160cd4SJason Gunthorpe pages->access_itree = RB_ROOT_CACHED; 11698d160cd4SJason Gunthorpe pages->domains_itree = RB_ROOT_CACHED; 11708d160cd4SJason Gunthorpe pages->writable = writable; 11718d160cd4SJason Gunthorpe if (capable(CAP_IPC_LOCK)) 11728d160cd4SJason Gunthorpe pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; 11738d160cd4SJason Gunthorpe else 11748d160cd4SJason Gunthorpe pages->account_mode = IOPT_PAGES_ACCOUNT_USER; 11758d160cd4SJason Gunthorpe pages->source_task = current->group_leader; 11768d160cd4SJason Gunthorpe get_task_struct(current->group_leader); 11778d160cd4SJason Gunthorpe pages->source_user = get_uid(current_user()); 11788d160cd4SJason Gunthorpe return pages; 11798d160cd4SJason Gunthorpe } 11808d160cd4SJason Gunthorpe 11818d160cd4SJason Gunthorpe void iopt_release_pages(struct kref *kref) 11828d160cd4SJason Gunthorpe { 11838d160cd4SJason Gunthorpe struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); 11848d160cd4SJason Gunthorpe 11858d160cd4SJason Gunthorpe WARN_ON(!RB_EMPTY_ROOT(&pages->access_itree.rb_root)); 11868d160cd4SJason Gunthorpe WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root)); 11878d160cd4SJason Gunthorpe WARN_ON(pages->npinned); 11888d160cd4SJason Gunthorpe WARN_ON(!xa_empty(&pages->pinned_pfns)); 11898d160cd4SJason Gunthorpe mmdrop(pages->source_mm); 11908d160cd4SJason Gunthorpe mutex_destroy(&pages->mutex); 11918d160cd4SJason Gunthorpe put_task_struct(pages->source_task); 11928d160cd4SJason Gunthorpe free_uid(pages->source_user); 11938d160cd4SJason Gunthorpe kfree(pages); 11948d160cd4SJason Gunthorpe } 11958d160cd4SJason Gunthorpe 11968d160cd4SJason Gunthorpe static void 11978d160cd4SJason Gunthorpe iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, 11988d160cd4SJason Gunthorpe struct iopt_pages *pages, struct iommu_domain *domain, 11998d160cd4SJason Gunthorpe unsigned long start_index, unsigned long last_index, 12008d160cd4SJason Gunthorpe unsigned long *unmapped_end_index, 12018d160cd4SJason Gunthorpe unsigned long real_last_index) 12028d160cd4SJason Gunthorpe { 12038d160cd4SJason Gunthorpe while (start_index <= last_index) { 12048d160cd4SJason Gunthorpe unsigned long batch_last_index; 12058d160cd4SJason Gunthorpe 12068d160cd4SJason Gunthorpe if (*unmapped_end_index <= last_index) { 12078d160cd4SJason Gunthorpe unsigned long start = 12088d160cd4SJason Gunthorpe max(start_index, *unmapped_end_index); 12098d160cd4SJason Gunthorpe 1210727c28c1SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 1211727c28c1SJason Gunthorpe batch->total_pfns) 1212727c28c1SJason Gunthorpe WARN_ON(*unmapped_end_index - 1213727c28c1SJason Gunthorpe batch->total_pfns != 1214727c28c1SJason Gunthorpe start_index); 12158d160cd4SJason Gunthorpe batch_from_domain(batch, domain, area, start, 12168d160cd4SJason Gunthorpe last_index); 1217727c28c1SJason Gunthorpe batch_last_index = start_index + batch->total_pfns - 1; 12188d160cd4SJason Gunthorpe } else { 12198d160cd4SJason Gunthorpe batch_last_index = last_index; 12208d160cd4SJason Gunthorpe } 12218d160cd4SJason Gunthorpe 1222727c28c1SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) 1223727c28c1SJason Gunthorpe WARN_ON(batch_last_index > real_last_index); 1224727c28c1SJason Gunthorpe 12258d160cd4SJason Gunthorpe /* 12268d160cd4SJason Gunthorpe * unmaps must always 'cut' at a place where the pfns are not 12278d160cd4SJason Gunthorpe * contiguous to pair with the maps that always install 12288d160cd4SJason Gunthorpe * contiguous pages. Thus, if we have to stop unpinning in the 12298d160cd4SJason Gunthorpe * middle of the domains we need to keep reading pfns until we 12308d160cd4SJason Gunthorpe * find a cut point to do the unmap. The pfns we read are 12318d160cd4SJason Gunthorpe * carried over and either skipped or integrated into the next 12328d160cd4SJason Gunthorpe * batch. 12338d160cd4SJason Gunthorpe */ 12348d160cd4SJason Gunthorpe if (batch_last_index == last_index && 12358d160cd4SJason Gunthorpe last_index != real_last_index) 12368d160cd4SJason Gunthorpe batch_from_domain_continue(batch, domain, area, 12378d160cd4SJason Gunthorpe last_index + 1, 12388d160cd4SJason Gunthorpe real_last_index); 12398d160cd4SJason Gunthorpe 12408d160cd4SJason Gunthorpe if (*unmapped_end_index <= batch_last_index) { 12418d160cd4SJason Gunthorpe iopt_area_unmap_domain_range( 12428d160cd4SJason Gunthorpe area, domain, *unmapped_end_index, 12438d160cd4SJason Gunthorpe start_index + batch->total_pfns - 1); 12448d160cd4SJason Gunthorpe *unmapped_end_index = start_index + batch->total_pfns; 12458d160cd4SJason Gunthorpe } 12468d160cd4SJason Gunthorpe 12478d160cd4SJason Gunthorpe /* unpin must follow unmap */ 12488d160cd4SJason Gunthorpe batch_unpin(batch, pages, 0, 12498d160cd4SJason Gunthorpe batch_last_index - start_index + 1); 12508d160cd4SJason Gunthorpe start_index = batch_last_index + 1; 12518d160cd4SJason Gunthorpe 12528d160cd4SJason Gunthorpe batch_clear_carry(batch, 12538d160cd4SJason Gunthorpe *unmapped_end_index - batch_last_index - 1); 12548d160cd4SJason Gunthorpe } 12558d160cd4SJason Gunthorpe } 12568d160cd4SJason Gunthorpe 12578d160cd4SJason Gunthorpe static void __iopt_area_unfill_domain(struct iopt_area *area, 12588d160cd4SJason Gunthorpe struct iopt_pages *pages, 12598d160cd4SJason Gunthorpe struct iommu_domain *domain, 12608d160cd4SJason Gunthorpe unsigned long last_index) 12618d160cd4SJason Gunthorpe { 12628d160cd4SJason Gunthorpe struct interval_tree_double_span_iter span; 12638d160cd4SJason Gunthorpe unsigned long start_index = iopt_area_index(area); 12648d160cd4SJason Gunthorpe unsigned long unmapped_end_index = start_index; 12658d160cd4SJason Gunthorpe u64 backup[BATCH_BACKUP_SIZE]; 12668d160cd4SJason Gunthorpe struct pfn_batch batch; 12678d160cd4SJason Gunthorpe 12688d160cd4SJason Gunthorpe lockdep_assert_held(&pages->mutex); 12698d160cd4SJason Gunthorpe 12708d160cd4SJason Gunthorpe /* 12718d160cd4SJason Gunthorpe * For security we must not unpin something that is still DMA mapped, 12728d160cd4SJason Gunthorpe * so this must unmap any IOVA before we go ahead and unpin the pages. 12738d160cd4SJason Gunthorpe * This creates a complexity where we need to skip over unpinning pages 12748d160cd4SJason Gunthorpe * held in the xarray, but continue to unmap from the domain. 12758d160cd4SJason Gunthorpe * 12768d160cd4SJason Gunthorpe * The domain unmap cannot stop in the middle of a contiguous range of 12778d160cd4SJason Gunthorpe * PFNs. To solve this problem the unpinning step will read ahead to the 12788d160cd4SJason Gunthorpe * end of any contiguous span, unmap that whole span, and then only 12798d160cd4SJason Gunthorpe * unpin the leading part that does not have any accesses. The residual 12808d160cd4SJason Gunthorpe * PFNs that were unmapped but not unpinned are called a "carry" in the 12818d160cd4SJason Gunthorpe * batch as they are moved to the front of the PFN list and continue on 12828d160cd4SJason Gunthorpe * to the next iteration(s). 12838d160cd4SJason Gunthorpe */ 12848d160cd4SJason Gunthorpe batch_init_backup(&batch, last_index + 1, backup, sizeof(backup)); 12858d160cd4SJason Gunthorpe interval_tree_for_each_double_span(&span, &pages->domains_itree, 12868d160cd4SJason Gunthorpe &pages->access_itree, start_index, 12878d160cd4SJason Gunthorpe last_index) { 12888d160cd4SJason Gunthorpe if (span.is_used) { 12898d160cd4SJason Gunthorpe batch_skip_carry(&batch, 12908d160cd4SJason Gunthorpe span.last_used - span.start_used + 1); 12918d160cd4SJason Gunthorpe continue; 12928d160cd4SJason Gunthorpe } 12938d160cd4SJason Gunthorpe iopt_area_unpin_domain(&batch, area, pages, domain, 12948d160cd4SJason Gunthorpe span.start_hole, span.last_hole, 12958d160cd4SJason Gunthorpe &unmapped_end_index, last_index); 12968d160cd4SJason Gunthorpe } 12978d160cd4SJason Gunthorpe /* 12988d160cd4SJason Gunthorpe * If the range ends in a access then we do the residual unmap without 12998d160cd4SJason Gunthorpe * any unpins. 13008d160cd4SJason Gunthorpe */ 13018d160cd4SJason Gunthorpe if (unmapped_end_index != last_index + 1) 13028d160cd4SJason Gunthorpe iopt_area_unmap_domain_range(area, domain, unmapped_end_index, 13038d160cd4SJason Gunthorpe last_index); 13048d160cd4SJason Gunthorpe WARN_ON(batch.total_pfns); 13058d160cd4SJason Gunthorpe batch_destroy(&batch, backup); 13068d160cd4SJason Gunthorpe update_unpinned(pages); 13078d160cd4SJason Gunthorpe } 13088d160cd4SJason Gunthorpe 13098d160cd4SJason Gunthorpe static void iopt_area_unfill_partial_domain(struct iopt_area *area, 13108d160cd4SJason Gunthorpe struct iopt_pages *pages, 13118d160cd4SJason Gunthorpe struct iommu_domain *domain, 13128d160cd4SJason Gunthorpe unsigned long end_index) 13138d160cd4SJason Gunthorpe { 13148d160cd4SJason Gunthorpe if (end_index != iopt_area_index(area)) 13158d160cd4SJason Gunthorpe __iopt_area_unfill_domain(area, pages, domain, end_index - 1); 13168d160cd4SJason Gunthorpe } 13178d160cd4SJason Gunthorpe 13188d160cd4SJason Gunthorpe /** 13198d160cd4SJason Gunthorpe * iopt_area_unmap_domain() - Unmap without unpinning PFNs in a domain 13208d160cd4SJason Gunthorpe * @area: The IOVA range to unmap 13218d160cd4SJason Gunthorpe * @domain: The domain to unmap 13228d160cd4SJason Gunthorpe * 13238d160cd4SJason Gunthorpe * The caller must know that unpinning is not required, usually because there 13248d160cd4SJason Gunthorpe * are other domains in the iopt. 13258d160cd4SJason Gunthorpe */ 13268d160cd4SJason Gunthorpe void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain) 13278d160cd4SJason Gunthorpe { 13288d160cd4SJason Gunthorpe iommu_unmap_nofail(domain, iopt_area_iova(area), 13298d160cd4SJason Gunthorpe iopt_area_length(area)); 13308d160cd4SJason Gunthorpe } 13318d160cd4SJason Gunthorpe 13328d160cd4SJason Gunthorpe /** 13338d160cd4SJason Gunthorpe * iopt_area_unfill_domain() - Unmap and unpin PFNs in a domain 13348d160cd4SJason Gunthorpe * @area: IOVA area to use 13358d160cd4SJason Gunthorpe * @pages: page supplier for the area (area->pages is NULL) 13368d160cd4SJason Gunthorpe * @domain: Domain to unmap from 13378d160cd4SJason Gunthorpe * 13388d160cd4SJason Gunthorpe * The domain should be removed from the domains_itree before calling. The 13398d160cd4SJason Gunthorpe * domain will always be unmapped, but the PFNs may not be unpinned if there are 13408d160cd4SJason Gunthorpe * still accesses. 13418d160cd4SJason Gunthorpe */ 13428d160cd4SJason Gunthorpe void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, 13438d160cd4SJason Gunthorpe struct iommu_domain *domain) 13448d160cd4SJason Gunthorpe { 13458d160cd4SJason Gunthorpe __iopt_area_unfill_domain(area, pages, domain, 13468d160cd4SJason Gunthorpe iopt_area_last_index(area)); 13478d160cd4SJason Gunthorpe } 13488d160cd4SJason Gunthorpe 13498d160cd4SJason Gunthorpe /** 13508d160cd4SJason Gunthorpe * iopt_area_fill_domain() - Map PFNs from the area into a domain 13518d160cd4SJason Gunthorpe * @area: IOVA area to use 13528d160cd4SJason Gunthorpe * @domain: Domain to load PFNs into 13538d160cd4SJason Gunthorpe * 13548d160cd4SJason Gunthorpe * Read the pfns from the area's underlying iopt_pages and map them into the 13558d160cd4SJason Gunthorpe * given domain. Called when attaching a new domain to an io_pagetable. 13568d160cd4SJason Gunthorpe */ 13578d160cd4SJason Gunthorpe int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain) 13588d160cd4SJason Gunthorpe { 13598d160cd4SJason Gunthorpe unsigned long done_end_index; 13608d160cd4SJason Gunthorpe struct pfn_reader pfns; 13618d160cd4SJason Gunthorpe int rc; 13628d160cd4SJason Gunthorpe 13638d160cd4SJason Gunthorpe lockdep_assert_held(&area->pages->mutex); 13648d160cd4SJason Gunthorpe 13658d160cd4SJason Gunthorpe rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), 13668d160cd4SJason Gunthorpe iopt_area_last_index(area)); 13678d160cd4SJason Gunthorpe if (rc) 13688d160cd4SJason Gunthorpe return rc; 13698d160cd4SJason Gunthorpe 13708d160cd4SJason Gunthorpe while (!pfn_reader_done(&pfns)) { 13718d160cd4SJason Gunthorpe done_end_index = pfns.batch_start_index; 13728d160cd4SJason Gunthorpe rc = batch_to_domain(&pfns.batch, domain, area, 13738d160cd4SJason Gunthorpe pfns.batch_start_index); 13748d160cd4SJason Gunthorpe if (rc) 13758d160cd4SJason Gunthorpe goto out_unmap; 13768d160cd4SJason Gunthorpe done_end_index = pfns.batch_end_index; 13778d160cd4SJason Gunthorpe 13788d160cd4SJason Gunthorpe rc = pfn_reader_next(&pfns); 13798d160cd4SJason Gunthorpe if (rc) 13808d160cd4SJason Gunthorpe goto out_unmap; 13818d160cd4SJason Gunthorpe } 13828d160cd4SJason Gunthorpe 13838d160cd4SJason Gunthorpe rc = pfn_reader_update_pinned(&pfns); 13848d160cd4SJason Gunthorpe if (rc) 13858d160cd4SJason Gunthorpe goto out_unmap; 13868d160cd4SJason Gunthorpe goto out_destroy; 13878d160cd4SJason Gunthorpe 13888d160cd4SJason Gunthorpe out_unmap: 13898d160cd4SJason Gunthorpe pfn_reader_release_pins(&pfns); 13908d160cd4SJason Gunthorpe iopt_area_unfill_partial_domain(area, area->pages, domain, 13918d160cd4SJason Gunthorpe done_end_index); 13928d160cd4SJason Gunthorpe out_destroy: 13938d160cd4SJason Gunthorpe pfn_reader_destroy(&pfns); 13948d160cd4SJason Gunthorpe return rc; 13958d160cd4SJason Gunthorpe } 13968d160cd4SJason Gunthorpe 13978d160cd4SJason Gunthorpe /** 13988d160cd4SJason Gunthorpe * iopt_area_fill_domains() - Install PFNs into the area's domains 13998d160cd4SJason Gunthorpe * @area: The area to act on 14008d160cd4SJason Gunthorpe * @pages: The pages associated with the area (area->pages is NULL) 14018d160cd4SJason Gunthorpe * 14028d160cd4SJason Gunthorpe * Called during area creation. The area is freshly created and not inserted in 14038d160cd4SJason Gunthorpe * the domains_itree yet. PFNs are read and loaded into every domain held in the 14048d160cd4SJason Gunthorpe * area's io_pagetable and the area is installed in the domains_itree. 14058d160cd4SJason Gunthorpe * 14068d160cd4SJason Gunthorpe * On failure all domains are left unchanged. 14078d160cd4SJason Gunthorpe */ 14088d160cd4SJason Gunthorpe int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages) 14098d160cd4SJason Gunthorpe { 14108d160cd4SJason Gunthorpe unsigned long done_first_end_index; 14118d160cd4SJason Gunthorpe unsigned long done_all_end_index; 14128d160cd4SJason Gunthorpe struct iommu_domain *domain; 14138d160cd4SJason Gunthorpe unsigned long unmap_index; 14148d160cd4SJason Gunthorpe struct pfn_reader pfns; 14158d160cd4SJason Gunthorpe unsigned long index; 14168d160cd4SJason Gunthorpe int rc; 14178d160cd4SJason Gunthorpe 14188d160cd4SJason Gunthorpe lockdep_assert_held(&area->iopt->domains_rwsem); 14198d160cd4SJason Gunthorpe 14208d160cd4SJason Gunthorpe if (xa_empty(&area->iopt->domains)) 14218d160cd4SJason Gunthorpe return 0; 14228d160cd4SJason Gunthorpe 14238d160cd4SJason Gunthorpe mutex_lock(&pages->mutex); 14248d160cd4SJason Gunthorpe rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), 14258d160cd4SJason Gunthorpe iopt_area_last_index(area)); 14268d160cd4SJason Gunthorpe if (rc) 14278d160cd4SJason Gunthorpe goto out_unlock; 14288d160cd4SJason Gunthorpe 14298d160cd4SJason Gunthorpe while (!pfn_reader_done(&pfns)) { 14308d160cd4SJason Gunthorpe done_first_end_index = pfns.batch_end_index; 14318d160cd4SJason Gunthorpe done_all_end_index = pfns.batch_start_index; 14328d160cd4SJason Gunthorpe xa_for_each(&area->iopt->domains, index, domain) { 14338d160cd4SJason Gunthorpe rc = batch_to_domain(&pfns.batch, domain, area, 14348d160cd4SJason Gunthorpe pfns.batch_start_index); 14358d160cd4SJason Gunthorpe if (rc) 14368d160cd4SJason Gunthorpe goto out_unmap; 14378d160cd4SJason Gunthorpe } 14388d160cd4SJason Gunthorpe done_all_end_index = done_first_end_index; 14398d160cd4SJason Gunthorpe 14408d160cd4SJason Gunthorpe rc = pfn_reader_next(&pfns); 14418d160cd4SJason Gunthorpe if (rc) 14428d160cd4SJason Gunthorpe goto out_unmap; 14438d160cd4SJason Gunthorpe } 14448d160cd4SJason Gunthorpe rc = pfn_reader_update_pinned(&pfns); 14458d160cd4SJason Gunthorpe if (rc) 14468d160cd4SJason Gunthorpe goto out_unmap; 14478d160cd4SJason Gunthorpe 14488d160cd4SJason Gunthorpe area->storage_domain = xa_load(&area->iopt->domains, 0); 14498d160cd4SJason Gunthorpe interval_tree_insert(&area->pages_node, &pages->domains_itree); 14508d160cd4SJason Gunthorpe goto out_destroy; 14518d160cd4SJason Gunthorpe 14528d160cd4SJason Gunthorpe out_unmap: 14538d160cd4SJason Gunthorpe pfn_reader_release_pins(&pfns); 14548d160cd4SJason Gunthorpe xa_for_each(&area->iopt->domains, unmap_index, domain) { 14558d160cd4SJason Gunthorpe unsigned long end_index; 14568d160cd4SJason Gunthorpe 14578d160cd4SJason Gunthorpe if (unmap_index < index) 14588d160cd4SJason Gunthorpe end_index = done_first_end_index; 14598d160cd4SJason Gunthorpe else 14608d160cd4SJason Gunthorpe end_index = done_all_end_index; 14618d160cd4SJason Gunthorpe 14628d160cd4SJason Gunthorpe /* 14638d160cd4SJason Gunthorpe * The area is not yet part of the domains_itree so we have to 14648d160cd4SJason Gunthorpe * manage the unpinning specially. The last domain does the 14658d160cd4SJason Gunthorpe * unpin, every other domain is just unmapped. 14668d160cd4SJason Gunthorpe */ 14678d160cd4SJason Gunthorpe if (unmap_index != area->iopt->next_domain_id - 1) { 14688d160cd4SJason Gunthorpe if (end_index != iopt_area_index(area)) 14698d160cd4SJason Gunthorpe iopt_area_unmap_domain_range( 14708d160cd4SJason Gunthorpe area, domain, iopt_area_index(area), 14718d160cd4SJason Gunthorpe end_index - 1); 14728d160cd4SJason Gunthorpe } else { 14738d160cd4SJason Gunthorpe iopt_area_unfill_partial_domain(area, pages, domain, 14748d160cd4SJason Gunthorpe end_index); 14758d160cd4SJason Gunthorpe } 14768d160cd4SJason Gunthorpe } 14778d160cd4SJason Gunthorpe out_destroy: 14788d160cd4SJason Gunthorpe pfn_reader_destroy(&pfns); 14798d160cd4SJason Gunthorpe out_unlock: 14808d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 14818d160cd4SJason Gunthorpe return rc; 14828d160cd4SJason Gunthorpe } 14838d160cd4SJason Gunthorpe 14848d160cd4SJason Gunthorpe /** 14858d160cd4SJason Gunthorpe * iopt_area_unfill_domains() - unmap PFNs from the area's domains 14868d160cd4SJason Gunthorpe * @area: The area to act on 14878d160cd4SJason Gunthorpe * @pages: The pages associated with the area (area->pages is NULL) 14888d160cd4SJason Gunthorpe * 14898d160cd4SJason Gunthorpe * Called during area destruction. This unmaps the iova's covered by all the 14908d160cd4SJason Gunthorpe * area's domains and releases the PFNs. 14918d160cd4SJason Gunthorpe */ 14928d160cd4SJason Gunthorpe void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages) 14938d160cd4SJason Gunthorpe { 14948d160cd4SJason Gunthorpe struct io_pagetable *iopt = area->iopt; 14958d160cd4SJason Gunthorpe struct iommu_domain *domain; 14968d160cd4SJason Gunthorpe unsigned long index; 14978d160cd4SJason Gunthorpe 14988d160cd4SJason Gunthorpe lockdep_assert_held(&iopt->domains_rwsem); 14998d160cd4SJason Gunthorpe 15008d160cd4SJason Gunthorpe mutex_lock(&pages->mutex); 15018d160cd4SJason Gunthorpe if (!area->storage_domain) 15028d160cd4SJason Gunthorpe goto out_unlock; 15038d160cd4SJason Gunthorpe 15048d160cd4SJason Gunthorpe xa_for_each(&iopt->domains, index, domain) 15058d160cd4SJason Gunthorpe if (domain != area->storage_domain) 15068d160cd4SJason Gunthorpe iopt_area_unmap_domain_range( 15078d160cd4SJason Gunthorpe area, domain, iopt_area_index(area), 15088d160cd4SJason Gunthorpe iopt_area_last_index(area)); 15098d160cd4SJason Gunthorpe 15108d160cd4SJason Gunthorpe interval_tree_remove(&area->pages_node, &pages->domains_itree); 15118d160cd4SJason Gunthorpe iopt_area_unfill_domain(area, pages, area->storage_domain); 15128d160cd4SJason Gunthorpe area->storage_domain = NULL; 15138d160cd4SJason Gunthorpe out_unlock: 15148d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 15158d160cd4SJason Gunthorpe } 15168d160cd4SJason Gunthorpe 15178d160cd4SJason Gunthorpe static void iopt_pages_unpin_xarray(struct pfn_batch *batch, 15188d160cd4SJason Gunthorpe struct iopt_pages *pages, 15198d160cd4SJason Gunthorpe unsigned long start_index, 15208d160cd4SJason Gunthorpe unsigned long end_index) 15218d160cd4SJason Gunthorpe { 15228d160cd4SJason Gunthorpe while (start_index <= end_index) { 15238d160cd4SJason Gunthorpe batch_from_xarray_clear(batch, &pages->pinned_pfns, start_index, 15248d160cd4SJason Gunthorpe end_index); 15258d160cd4SJason Gunthorpe batch_unpin(batch, pages, 0, batch->total_pfns); 15268d160cd4SJason Gunthorpe start_index += batch->total_pfns; 15278d160cd4SJason Gunthorpe batch_clear(batch); 15288d160cd4SJason Gunthorpe } 15298d160cd4SJason Gunthorpe } 15308d160cd4SJason Gunthorpe 15318d160cd4SJason Gunthorpe /** 15328d160cd4SJason Gunthorpe * iopt_pages_unfill_xarray() - Update the xarry after removing an access 15338d160cd4SJason Gunthorpe * @pages: The pages to act on 15348d160cd4SJason Gunthorpe * @start_index: Starting PFN index 15358d160cd4SJason Gunthorpe * @last_index: Last PFN index 15368d160cd4SJason Gunthorpe * 15378d160cd4SJason Gunthorpe * Called when an iopt_pages_access is removed, removes pages from the itree. 15388d160cd4SJason Gunthorpe * The access should already be removed from the access_itree. 15398d160cd4SJason Gunthorpe */ 15408d160cd4SJason Gunthorpe void iopt_pages_unfill_xarray(struct iopt_pages *pages, 15418d160cd4SJason Gunthorpe unsigned long start_index, 15428d160cd4SJason Gunthorpe unsigned long last_index) 15438d160cd4SJason Gunthorpe { 15448d160cd4SJason Gunthorpe struct interval_tree_double_span_iter span; 15458d160cd4SJason Gunthorpe u64 backup[BATCH_BACKUP_SIZE]; 15468d160cd4SJason Gunthorpe struct pfn_batch batch; 15478d160cd4SJason Gunthorpe bool batch_inited = false; 15488d160cd4SJason Gunthorpe 15498d160cd4SJason Gunthorpe lockdep_assert_held(&pages->mutex); 15508d160cd4SJason Gunthorpe 15518d160cd4SJason Gunthorpe interval_tree_for_each_double_span(&span, &pages->access_itree, 15528d160cd4SJason Gunthorpe &pages->domains_itree, start_index, 15538d160cd4SJason Gunthorpe last_index) { 15548d160cd4SJason Gunthorpe if (!span.is_used) { 15558d160cd4SJason Gunthorpe if (!batch_inited) { 15568d160cd4SJason Gunthorpe batch_init_backup(&batch, 15578d160cd4SJason Gunthorpe last_index - start_index + 1, 15588d160cd4SJason Gunthorpe backup, sizeof(backup)); 15598d160cd4SJason Gunthorpe batch_inited = true; 15608d160cd4SJason Gunthorpe } 15618d160cd4SJason Gunthorpe iopt_pages_unpin_xarray(&batch, pages, span.start_hole, 15628d160cd4SJason Gunthorpe span.last_hole); 15638d160cd4SJason Gunthorpe } else if (span.is_used == 2) { 15648d160cd4SJason Gunthorpe /* Covered by a domain */ 15658d160cd4SJason Gunthorpe clear_xarray(&pages->pinned_pfns, span.start_used, 15668d160cd4SJason Gunthorpe span.last_used); 15678d160cd4SJason Gunthorpe } 15688d160cd4SJason Gunthorpe /* Otherwise covered by an existing access */ 15698d160cd4SJason Gunthorpe } 15708d160cd4SJason Gunthorpe if (batch_inited) 15718d160cd4SJason Gunthorpe batch_destroy(&batch, backup); 15728d160cd4SJason Gunthorpe update_unpinned(pages); 15738d160cd4SJason Gunthorpe } 15748d160cd4SJason Gunthorpe 15758d160cd4SJason Gunthorpe /** 15768d160cd4SJason Gunthorpe * iopt_pages_fill_from_xarray() - Fast path for reading PFNs 15778d160cd4SJason Gunthorpe * @pages: The pages to act on 15788d160cd4SJason Gunthorpe * @start_index: The first page index in the range 15798d160cd4SJason Gunthorpe * @last_index: The last page index in the range 15808d160cd4SJason Gunthorpe * @out_pages: The output array to return the pages 15818d160cd4SJason Gunthorpe * 15828d160cd4SJason Gunthorpe * This can be called if the caller is holding a refcount on an 15838d160cd4SJason Gunthorpe * iopt_pages_access that is known to have already been filled. It quickly reads 15848d160cd4SJason Gunthorpe * the pages directly from the xarray. 15858d160cd4SJason Gunthorpe * 15868d160cd4SJason Gunthorpe * This is part of the SW iommu interface to read pages for in-kernel use. 15878d160cd4SJason Gunthorpe */ 15888d160cd4SJason Gunthorpe void iopt_pages_fill_from_xarray(struct iopt_pages *pages, 15898d160cd4SJason Gunthorpe unsigned long start_index, 15908d160cd4SJason Gunthorpe unsigned long last_index, 15918d160cd4SJason Gunthorpe struct page **out_pages) 15928d160cd4SJason Gunthorpe { 15938d160cd4SJason Gunthorpe XA_STATE(xas, &pages->pinned_pfns, start_index); 15948d160cd4SJason Gunthorpe void *entry; 15958d160cd4SJason Gunthorpe 15968d160cd4SJason Gunthorpe rcu_read_lock(); 15978d160cd4SJason Gunthorpe while (start_index <= last_index) { 15988d160cd4SJason Gunthorpe entry = xas_next(&xas); 15998d160cd4SJason Gunthorpe if (xas_retry(&xas, entry)) 16008d160cd4SJason Gunthorpe continue; 16018d160cd4SJason Gunthorpe WARN_ON(!xa_is_value(entry)); 16028d160cd4SJason Gunthorpe *(out_pages++) = pfn_to_page(xa_to_value(entry)); 16038d160cd4SJason Gunthorpe start_index++; 16048d160cd4SJason Gunthorpe } 16058d160cd4SJason Gunthorpe rcu_read_unlock(); 16068d160cd4SJason Gunthorpe } 16078d160cd4SJason Gunthorpe 16088d160cd4SJason Gunthorpe static int iopt_pages_fill_from_domain(struct iopt_pages *pages, 16098d160cd4SJason Gunthorpe unsigned long start_index, 16108d160cd4SJason Gunthorpe unsigned long last_index, 16118d160cd4SJason Gunthorpe struct page **out_pages) 16128d160cd4SJason Gunthorpe { 16138d160cd4SJason Gunthorpe while (start_index != last_index + 1) { 16148d160cd4SJason Gunthorpe unsigned long domain_last; 16158d160cd4SJason Gunthorpe struct iopt_area *area; 16168d160cd4SJason Gunthorpe 16178d160cd4SJason Gunthorpe area = iopt_pages_find_domain_area(pages, start_index); 16188d160cd4SJason Gunthorpe if (WARN_ON(!area)) 16198d160cd4SJason Gunthorpe return -EINVAL; 16208d160cd4SJason Gunthorpe 16218d160cd4SJason Gunthorpe domain_last = min(iopt_area_last_index(area), last_index); 16228d160cd4SJason Gunthorpe out_pages = raw_pages_from_domain(area->storage_domain, area, 16238d160cd4SJason Gunthorpe start_index, domain_last, 16248d160cd4SJason Gunthorpe out_pages); 16258d160cd4SJason Gunthorpe start_index = domain_last + 1; 16268d160cd4SJason Gunthorpe } 16278d160cd4SJason Gunthorpe return 0; 16288d160cd4SJason Gunthorpe } 16298d160cd4SJason Gunthorpe 16308d160cd4SJason Gunthorpe static int iopt_pages_fill_from_mm(struct iopt_pages *pages, 16318d160cd4SJason Gunthorpe struct pfn_reader_user *user, 16328d160cd4SJason Gunthorpe unsigned long start_index, 16338d160cd4SJason Gunthorpe unsigned long last_index, 16348d160cd4SJason Gunthorpe struct page **out_pages) 16358d160cd4SJason Gunthorpe { 16368d160cd4SJason Gunthorpe unsigned long cur_index = start_index; 16378d160cd4SJason Gunthorpe int rc; 16388d160cd4SJason Gunthorpe 16398d160cd4SJason Gunthorpe while (cur_index != last_index + 1) { 16408d160cd4SJason Gunthorpe user->upages = out_pages + (cur_index - start_index); 16418d160cd4SJason Gunthorpe rc = pfn_reader_user_pin(user, pages, cur_index, last_index); 16428d160cd4SJason Gunthorpe if (rc) 16438d160cd4SJason Gunthorpe goto out_unpin; 16448d160cd4SJason Gunthorpe cur_index = user->upages_end; 16458d160cd4SJason Gunthorpe } 16468d160cd4SJason Gunthorpe return 0; 16478d160cd4SJason Gunthorpe 16488d160cd4SJason Gunthorpe out_unpin: 16498d160cd4SJason Gunthorpe if (start_index != cur_index) 16508d160cd4SJason Gunthorpe iopt_pages_err_unpin(pages, start_index, cur_index - 1, 16518d160cd4SJason Gunthorpe out_pages); 16528d160cd4SJason Gunthorpe return rc; 16538d160cd4SJason Gunthorpe } 16548d160cd4SJason Gunthorpe 16558d160cd4SJason Gunthorpe /** 16568d160cd4SJason Gunthorpe * iopt_pages_fill_xarray() - Read PFNs 16578d160cd4SJason Gunthorpe * @pages: The pages to act on 16588d160cd4SJason Gunthorpe * @start_index: The first page index in the range 16598d160cd4SJason Gunthorpe * @last_index: The last page index in the range 16608d160cd4SJason Gunthorpe * @out_pages: The output array to return the pages, may be NULL 16618d160cd4SJason Gunthorpe * 16628d160cd4SJason Gunthorpe * This populates the xarray and returns the pages in out_pages. As the slow 16638d160cd4SJason Gunthorpe * path this is able to copy pages from other storage tiers into the xarray. 16648d160cd4SJason Gunthorpe * 16658d160cd4SJason Gunthorpe * On failure the xarray is left unchanged. 16668d160cd4SJason Gunthorpe * 16678d160cd4SJason Gunthorpe * This is part of the SW iommu interface to read pages for in-kernel use. 16688d160cd4SJason Gunthorpe */ 16698d160cd4SJason Gunthorpe int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index, 16708d160cd4SJason Gunthorpe unsigned long last_index, struct page **out_pages) 16718d160cd4SJason Gunthorpe { 16728d160cd4SJason Gunthorpe struct interval_tree_double_span_iter span; 16738d160cd4SJason Gunthorpe unsigned long xa_end = start_index; 16748d160cd4SJason Gunthorpe struct pfn_reader_user user; 16758d160cd4SJason Gunthorpe int rc; 16768d160cd4SJason Gunthorpe 16778d160cd4SJason Gunthorpe lockdep_assert_held(&pages->mutex); 16788d160cd4SJason Gunthorpe 16798d160cd4SJason Gunthorpe pfn_reader_user_init(&user, pages); 16808d160cd4SJason Gunthorpe user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages); 16818d160cd4SJason Gunthorpe interval_tree_for_each_double_span(&span, &pages->access_itree, 16828d160cd4SJason Gunthorpe &pages->domains_itree, start_index, 16838d160cd4SJason Gunthorpe last_index) { 16848d160cd4SJason Gunthorpe struct page **cur_pages; 16858d160cd4SJason Gunthorpe 16868d160cd4SJason Gunthorpe if (span.is_used == 1) { 16878d160cd4SJason Gunthorpe cur_pages = out_pages + (span.start_used - start_index); 16888d160cd4SJason Gunthorpe iopt_pages_fill_from_xarray(pages, span.start_used, 16898d160cd4SJason Gunthorpe span.last_used, cur_pages); 16908d160cd4SJason Gunthorpe continue; 16918d160cd4SJason Gunthorpe } 16928d160cd4SJason Gunthorpe 16938d160cd4SJason Gunthorpe if (span.is_used == 2) { 16948d160cd4SJason Gunthorpe cur_pages = out_pages + (span.start_used - start_index); 16958d160cd4SJason Gunthorpe iopt_pages_fill_from_domain(pages, span.start_used, 16968d160cd4SJason Gunthorpe span.last_used, cur_pages); 16978d160cd4SJason Gunthorpe rc = pages_to_xarray(&pages->pinned_pfns, 16988d160cd4SJason Gunthorpe span.start_used, span.last_used, 16998d160cd4SJason Gunthorpe cur_pages); 17008d160cd4SJason Gunthorpe if (rc) 17018d160cd4SJason Gunthorpe goto out_clean_xa; 17028d160cd4SJason Gunthorpe xa_end = span.last_used + 1; 17038d160cd4SJason Gunthorpe continue; 17048d160cd4SJason Gunthorpe } 17058d160cd4SJason Gunthorpe 17068d160cd4SJason Gunthorpe /* hole */ 17078d160cd4SJason Gunthorpe cur_pages = out_pages + (span.start_hole - start_index); 17088d160cd4SJason Gunthorpe rc = iopt_pages_fill_from_mm(pages, &user, span.start_hole, 17098d160cd4SJason Gunthorpe span.last_hole, cur_pages); 17108d160cd4SJason Gunthorpe if (rc) 17118d160cd4SJason Gunthorpe goto out_clean_xa; 17128d160cd4SJason Gunthorpe rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole, 17138d160cd4SJason Gunthorpe span.last_hole, cur_pages); 17148d160cd4SJason Gunthorpe if (rc) { 17158d160cd4SJason Gunthorpe iopt_pages_err_unpin(pages, span.start_hole, 17168d160cd4SJason Gunthorpe span.last_hole, cur_pages); 17178d160cd4SJason Gunthorpe goto out_clean_xa; 17188d160cd4SJason Gunthorpe } 17198d160cd4SJason Gunthorpe xa_end = span.last_hole + 1; 17208d160cd4SJason Gunthorpe } 17218d160cd4SJason Gunthorpe rc = pfn_reader_user_update_pinned(&user, pages); 17228d160cd4SJason Gunthorpe if (rc) 17238d160cd4SJason Gunthorpe goto out_clean_xa; 17248d160cd4SJason Gunthorpe user.upages = NULL; 17258d160cd4SJason Gunthorpe pfn_reader_user_destroy(&user, pages); 17268d160cd4SJason Gunthorpe return 0; 17278d160cd4SJason Gunthorpe 17288d160cd4SJason Gunthorpe out_clean_xa: 17298d160cd4SJason Gunthorpe if (start_index != xa_end) 17308d160cd4SJason Gunthorpe iopt_pages_unfill_xarray(pages, start_index, xa_end - 1); 17318d160cd4SJason Gunthorpe user.upages = NULL; 17328d160cd4SJason Gunthorpe pfn_reader_user_destroy(&user, pages); 17338d160cd4SJason Gunthorpe return rc; 17348d160cd4SJason Gunthorpe } 17358d160cd4SJason Gunthorpe 17368d160cd4SJason Gunthorpe /* 17378d160cd4SJason Gunthorpe * This uses the pfn_reader instead of taking a shortcut by using the mm. It can 17388d160cd4SJason Gunthorpe * do every scenario and is fully consistent with what an iommu_domain would 17398d160cd4SJason Gunthorpe * see. 17408d160cd4SJason Gunthorpe */ 17418d160cd4SJason Gunthorpe static int iopt_pages_rw_slow(struct iopt_pages *pages, 17428d160cd4SJason Gunthorpe unsigned long start_index, 17438d160cd4SJason Gunthorpe unsigned long last_index, unsigned long offset, 17448d160cd4SJason Gunthorpe void *data, unsigned long length, 17458d160cd4SJason Gunthorpe unsigned int flags) 17468d160cd4SJason Gunthorpe { 17478d160cd4SJason Gunthorpe struct pfn_reader pfns; 17488d160cd4SJason Gunthorpe int rc; 17498d160cd4SJason Gunthorpe 17508d160cd4SJason Gunthorpe mutex_lock(&pages->mutex); 17518d160cd4SJason Gunthorpe 17528d160cd4SJason Gunthorpe rc = pfn_reader_first(&pfns, pages, start_index, last_index); 17538d160cd4SJason Gunthorpe if (rc) 17548d160cd4SJason Gunthorpe goto out_unlock; 17558d160cd4SJason Gunthorpe 17568d160cd4SJason Gunthorpe while (!pfn_reader_done(&pfns)) { 17578d160cd4SJason Gunthorpe unsigned long done; 17588d160cd4SJason Gunthorpe 17598d160cd4SJason Gunthorpe done = batch_rw(&pfns.batch, data, offset, length, flags); 17608d160cd4SJason Gunthorpe data += done; 17618d160cd4SJason Gunthorpe length -= done; 17628d160cd4SJason Gunthorpe offset = 0; 17638d160cd4SJason Gunthorpe pfn_reader_unpin(&pfns); 17648d160cd4SJason Gunthorpe 17658d160cd4SJason Gunthorpe rc = pfn_reader_next(&pfns); 17668d160cd4SJason Gunthorpe if (rc) 17678d160cd4SJason Gunthorpe goto out_destroy; 17688d160cd4SJason Gunthorpe } 17698d160cd4SJason Gunthorpe if (WARN_ON(length != 0)) 17708d160cd4SJason Gunthorpe rc = -EINVAL; 17718d160cd4SJason Gunthorpe out_destroy: 17728d160cd4SJason Gunthorpe pfn_reader_destroy(&pfns); 17738d160cd4SJason Gunthorpe out_unlock: 17748d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 17758d160cd4SJason Gunthorpe return rc; 17768d160cd4SJason Gunthorpe } 17778d160cd4SJason Gunthorpe 17788d160cd4SJason Gunthorpe /* 17798d160cd4SJason Gunthorpe * A medium speed path that still allows DMA inconsistencies, but doesn't do any 17808d160cd4SJason Gunthorpe * memory allocations or interval tree searches. 17818d160cd4SJason Gunthorpe */ 17828d160cd4SJason Gunthorpe static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index, 17838d160cd4SJason Gunthorpe unsigned long offset, void *data, 17848d160cd4SJason Gunthorpe unsigned long length, unsigned int flags) 17858d160cd4SJason Gunthorpe { 17868d160cd4SJason Gunthorpe struct page *page = NULL; 17878d160cd4SJason Gunthorpe int rc; 17888d160cd4SJason Gunthorpe 17898d160cd4SJason Gunthorpe if (!mmget_not_zero(pages->source_mm)) 17908d160cd4SJason Gunthorpe return iopt_pages_rw_slow(pages, index, index, offset, data, 17918d160cd4SJason Gunthorpe length, flags); 17928d160cd4SJason Gunthorpe 1793e26eed4fSJason Gunthorpe if (iommufd_should_fail()) { 1794e26eed4fSJason Gunthorpe rc = -EINVAL; 1795e26eed4fSJason Gunthorpe goto out_mmput; 1796e26eed4fSJason Gunthorpe } 1797e26eed4fSJason Gunthorpe 17988d160cd4SJason Gunthorpe mmap_read_lock(pages->source_mm); 17998d160cd4SJason Gunthorpe rc = pin_user_pages_remote( 18008d160cd4SJason Gunthorpe pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE), 18018d160cd4SJason Gunthorpe 1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page, 18020b295316SLorenzo Stoakes NULL); 18038d160cd4SJason Gunthorpe mmap_read_unlock(pages->source_mm); 18048d160cd4SJason Gunthorpe if (rc != 1) { 18058d160cd4SJason Gunthorpe if (WARN_ON(rc >= 0)) 18068d160cd4SJason Gunthorpe rc = -EINVAL; 18078d160cd4SJason Gunthorpe goto out_mmput; 18088d160cd4SJason Gunthorpe } 18098d160cd4SJason Gunthorpe copy_data_page(page, data, offset, length, flags); 18108d160cd4SJason Gunthorpe unpin_user_page(page); 18118d160cd4SJason Gunthorpe rc = 0; 18128d160cd4SJason Gunthorpe 18138d160cd4SJason Gunthorpe out_mmput: 18148d160cd4SJason Gunthorpe mmput(pages->source_mm); 18158d160cd4SJason Gunthorpe return rc; 18168d160cd4SJason Gunthorpe } 18178d160cd4SJason Gunthorpe 18188d160cd4SJason Gunthorpe /** 18198d160cd4SJason Gunthorpe * iopt_pages_rw_access - Copy to/from a linear slice of the pages 18208d160cd4SJason Gunthorpe * @pages: pages to act on 18218d160cd4SJason Gunthorpe * @start_byte: First byte of pages to copy to/from 18228d160cd4SJason Gunthorpe * @data: Kernel buffer to get/put the data 18238d160cd4SJason Gunthorpe * @length: Number of bytes to copy 18248d160cd4SJason Gunthorpe * @flags: IOMMUFD_ACCESS_RW_* flags 18258d160cd4SJason Gunthorpe * 18268d160cd4SJason Gunthorpe * This will find each page in the range, kmap it and then memcpy to/from 18278d160cd4SJason Gunthorpe * the given kernel buffer. 18288d160cd4SJason Gunthorpe */ 18298d160cd4SJason Gunthorpe int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, 18308d160cd4SJason Gunthorpe void *data, unsigned long length, unsigned int flags) 18318d160cd4SJason Gunthorpe { 18328d160cd4SJason Gunthorpe unsigned long start_index = start_byte / PAGE_SIZE; 18338d160cd4SJason Gunthorpe unsigned long last_index = (start_byte + length - 1) / PAGE_SIZE; 18348d160cd4SJason Gunthorpe bool change_mm = current->mm != pages->source_mm; 18358d160cd4SJason Gunthorpe int rc = 0; 18368d160cd4SJason Gunthorpe 1837f4b20bb3SJason Gunthorpe if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && 1838f4b20bb3SJason Gunthorpe (flags & __IOMMUFD_ACCESS_RW_SLOW_PATH)) 1839f4b20bb3SJason Gunthorpe change_mm = true; 1840f4b20bb3SJason Gunthorpe 18418d160cd4SJason Gunthorpe if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) 18428d160cd4SJason Gunthorpe return -EPERM; 18438d160cd4SJason Gunthorpe 18448d160cd4SJason Gunthorpe if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { 18458d160cd4SJason Gunthorpe if (start_index == last_index) 18468d160cd4SJason Gunthorpe return iopt_pages_rw_page(pages, start_index, 18478d160cd4SJason Gunthorpe start_byte % PAGE_SIZE, data, 18488d160cd4SJason Gunthorpe length, flags); 18498d160cd4SJason Gunthorpe return iopt_pages_rw_slow(pages, start_index, last_index, 18508d160cd4SJason Gunthorpe start_byte % PAGE_SIZE, data, length, 18518d160cd4SJason Gunthorpe flags); 18528d160cd4SJason Gunthorpe } 18538d160cd4SJason Gunthorpe 18548d160cd4SJason Gunthorpe /* 18558d160cd4SJason Gunthorpe * Try to copy using copy_to_user(). We do this as a fast path and 18568d160cd4SJason Gunthorpe * ignore any pinning inconsistencies, unlike a real DMA path. 18578d160cd4SJason Gunthorpe */ 18588d160cd4SJason Gunthorpe if (change_mm) { 18598d160cd4SJason Gunthorpe if (!mmget_not_zero(pages->source_mm)) 18608d160cd4SJason Gunthorpe return iopt_pages_rw_slow(pages, start_index, 18618d160cd4SJason Gunthorpe last_index, 18628d160cd4SJason Gunthorpe start_byte % PAGE_SIZE, data, 18638d160cd4SJason Gunthorpe length, flags); 18648d160cd4SJason Gunthorpe kthread_use_mm(pages->source_mm); 18658d160cd4SJason Gunthorpe } 18668d160cd4SJason Gunthorpe 18678d160cd4SJason Gunthorpe if (flags & IOMMUFD_ACCESS_RW_WRITE) { 18688d160cd4SJason Gunthorpe if (copy_to_user(pages->uptr + start_byte, data, length)) 18698d160cd4SJason Gunthorpe rc = -EFAULT; 18708d160cd4SJason Gunthorpe } else { 18718d160cd4SJason Gunthorpe if (copy_from_user(data, pages->uptr + start_byte, length)) 18728d160cd4SJason Gunthorpe rc = -EFAULT; 18738d160cd4SJason Gunthorpe } 18748d160cd4SJason Gunthorpe 18758d160cd4SJason Gunthorpe if (change_mm) { 18768d160cd4SJason Gunthorpe kthread_unuse_mm(pages->source_mm); 18778d160cd4SJason Gunthorpe mmput(pages->source_mm); 18788d160cd4SJason Gunthorpe } 18798d160cd4SJason Gunthorpe 18808d160cd4SJason Gunthorpe return rc; 18818d160cd4SJason Gunthorpe } 18828d160cd4SJason Gunthorpe 18838d160cd4SJason Gunthorpe static struct iopt_pages_access * 18848d160cd4SJason Gunthorpe iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, 18858d160cd4SJason Gunthorpe unsigned long last) 18868d160cd4SJason Gunthorpe { 18878d160cd4SJason Gunthorpe struct interval_tree_node *node; 18888d160cd4SJason Gunthorpe 18898d160cd4SJason Gunthorpe lockdep_assert_held(&pages->mutex); 18908d160cd4SJason Gunthorpe 18918d160cd4SJason Gunthorpe /* There can be overlapping ranges in this interval tree */ 18928d160cd4SJason Gunthorpe for (node = interval_tree_iter_first(&pages->access_itree, index, last); 18938d160cd4SJason Gunthorpe node; node = interval_tree_iter_next(node, index, last)) 18948d160cd4SJason Gunthorpe if (node->start == index && node->last == last) 18958d160cd4SJason Gunthorpe return container_of(node, struct iopt_pages_access, 18968d160cd4SJason Gunthorpe node); 18978d160cd4SJason Gunthorpe return NULL; 18988d160cd4SJason Gunthorpe } 18998d160cd4SJason Gunthorpe 19008d160cd4SJason Gunthorpe /** 19018d160cd4SJason Gunthorpe * iopt_area_add_access() - Record an in-knerel access for PFNs 19028d160cd4SJason Gunthorpe * @area: The source of PFNs 19038d160cd4SJason Gunthorpe * @start_index: First page index 19048d160cd4SJason Gunthorpe * @last_index: Inclusive last page index 19058d160cd4SJason Gunthorpe * @out_pages: Output list of struct page's representing the PFNs 19068d160cd4SJason Gunthorpe * @flags: IOMMUFD_ACCESS_RW_* flags 19078d160cd4SJason Gunthorpe * 19088d160cd4SJason Gunthorpe * Record that an in-kernel access will be accessing the pages, ensure they are 19098d160cd4SJason Gunthorpe * pinned, and return the PFNs as a simple list of 'struct page *'. 19108d160cd4SJason Gunthorpe * 19118d160cd4SJason Gunthorpe * This should be undone through a matching call to iopt_area_remove_access() 19128d160cd4SJason Gunthorpe */ 19138d160cd4SJason Gunthorpe int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, 19148d160cd4SJason Gunthorpe unsigned long last_index, struct page **out_pages, 19158d160cd4SJason Gunthorpe unsigned int flags) 19168d160cd4SJason Gunthorpe { 19178d160cd4SJason Gunthorpe struct iopt_pages *pages = area->pages; 19188d160cd4SJason Gunthorpe struct iopt_pages_access *access; 19198d160cd4SJason Gunthorpe int rc; 19208d160cd4SJason Gunthorpe 19218d160cd4SJason Gunthorpe if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) 19228d160cd4SJason Gunthorpe return -EPERM; 19238d160cd4SJason Gunthorpe 19248d160cd4SJason Gunthorpe mutex_lock(&pages->mutex); 19258d160cd4SJason Gunthorpe access = iopt_pages_get_exact_access(pages, start_index, last_index); 19268d160cd4SJason Gunthorpe if (access) { 19278d160cd4SJason Gunthorpe area->num_accesses++; 19288d160cd4SJason Gunthorpe access->users++; 19298d160cd4SJason Gunthorpe iopt_pages_fill_from_xarray(pages, start_index, last_index, 19308d160cd4SJason Gunthorpe out_pages); 19318d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 19328d160cd4SJason Gunthorpe return 0; 19338d160cd4SJason Gunthorpe } 19348d160cd4SJason Gunthorpe 19358d160cd4SJason Gunthorpe access = kzalloc(sizeof(*access), GFP_KERNEL_ACCOUNT); 19368d160cd4SJason Gunthorpe if (!access) { 19378d160cd4SJason Gunthorpe rc = -ENOMEM; 19388d160cd4SJason Gunthorpe goto err_unlock; 19398d160cd4SJason Gunthorpe } 19408d160cd4SJason Gunthorpe 19418d160cd4SJason Gunthorpe rc = iopt_pages_fill_xarray(pages, start_index, last_index, out_pages); 19428d160cd4SJason Gunthorpe if (rc) 19438d160cd4SJason Gunthorpe goto err_free; 19448d160cd4SJason Gunthorpe 19458d160cd4SJason Gunthorpe access->node.start = start_index; 19468d160cd4SJason Gunthorpe access->node.last = last_index; 19478d160cd4SJason Gunthorpe access->users = 1; 19488d160cd4SJason Gunthorpe area->num_accesses++; 19498d160cd4SJason Gunthorpe interval_tree_insert(&access->node, &pages->access_itree); 19508d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 19518d160cd4SJason Gunthorpe return 0; 19528d160cd4SJason Gunthorpe 19538d160cd4SJason Gunthorpe err_free: 19548d160cd4SJason Gunthorpe kfree(access); 19558d160cd4SJason Gunthorpe err_unlock: 19568d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 19578d160cd4SJason Gunthorpe return rc; 19588d160cd4SJason Gunthorpe } 19598d160cd4SJason Gunthorpe 19608d160cd4SJason Gunthorpe /** 19618d160cd4SJason Gunthorpe * iopt_area_remove_access() - Release an in-kernel access for PFNs 19628d160cd4SJason Gunthorpe * @area: The source of PFNs 19638d160cd4SJason Gunthorpe * @start_index: First page index 19648d160cd4SJason Gunthorpe * @last_index: Inclusive last page index 19658d160cd4SJason Gunthorpe * 19668d160cd4SJason Gunthorpe * Undo iopt_area_add_access() and unpin the pages if necessary. The caller 19678d160cd4SJason Gunthorpe * must stop using the PFNs before calling this. 19688d160cd4SJason Gunthorpe */ 19698d160cd4SJason Gunthorpe void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, 19708d160cd4SJason Gunthorpe unsigned long last_index) 19718d160cd4SJason Gunthorpe { 19728d160cd4SJason Gunthorpe struct iopt_pages *pages = area->pages; 19738d160cd4SJason Gunthorpe struct iopt_pages_access *access; 19748d160cd4SJason Gunthorpe 19758d160cd4SJason Gunthorpe mutex_lock(&pages->mutex); 19768d160cd4SJason Gunthorpe access = iopt_pages_get_exact_access(pages, start_index, last_index); 19778d160cd4SJason Gunthorpe if (WARN_ON(!access)) 19788d160cd4SJason Gunthorpe goto out_unlock; 19798d160cd4SJason Gunthorpe 19808d160cd4SJason Gunthorpe WARN_ON(area->num_accesses == 0 || access->users == 0); 19818d160cd4SJason Gunthorpe area->num_accesses--; 19828d160cd4SJason Gunthorpe access->users--; 19838d160cd4SJason Gunthorpe if (access->users) 19848d160cd4SJason Gunthorpe goto out_unlock; 19858d160cd4SJason Gunthorpe 19868d160cd4SJason Gunthorpe interval_tree_remove(&access->node, &pages->access_itree); 19878d160cd4SJason Gunthorpe iopt_pages_unfill_xarray(pages, start_index, last_index); 19888d160cd4SJason Gunthorpe kfree(access); 19898d160cd4SJason Gunthorpe out_unlock: 19908d160cd4SJason Gunthorpe mutex_unlock(&pages->mutex); 19918d160cd4SJason Gunthorpe } 1992