xref: /openbmc/linux/drivers/vfio/iova_bitmap.c (revision 913447d0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2022, Oracle and/or its affiliates.
4  * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
5  */
6 #include <linux/iova_bitmap.h>
7 #include <linux/mm.h>
8 #include <linux/slab.h>
9 #include <linux/highmem.h>
10 
11 #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
12 
13 /*
14  * struct iova_bitmap_map - A bitmap representing an IOVA range
15  *
16  * Main data structure for tracking mapped user pages of bitmap data.
17  *
18  * For example, for something recording dirty IOVAs, it will be provided a
19  * struct iova_bitmap structure, as a general structure for iterating the
20  * total IOVA range. The struct iova_bitmap_map, though, represents the
21  * subset of said IOVA space that is pinned by its parent structure (struct
22  * iova_bitmap).
23  *
24  * The user does not need to exact location of the bits in the bitmap.
25  * From user perspective the only API available is iova_bitmap_set() which
26  * records the IOVA *range* in the bitmap by setting the corresponding
27  * bits.
28  *
29  * The bitmap is an array of u64 whereas each bit represents an IOVA of
30  * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
31  *
32  *   data[(iova / page_size) / 64] & (1ULL << (iova % 64))
33  */
34 struct iova_bitmap_map {
35 	/* base IOVA representing bit 0 of the first page */
36 	unsigned long iova;
37 
38 	/* page size order that each bit granules to */
39 	unsigned long pgshift;
40 
41 	/* page offset of the first user page pinned */
42 	unsigned long pgoff;
43 
44 	/* number of pages pinned */
45 	unsigned long npages;
46 
47 	/* pinned pages representing the bitmap data */
48 	struct page **pages;
49 };
50 
51 /*
52  * struct iova_bitmap - The IOVA bitmap object
53  *
54  * Main data structure for iterating over the bitmap data.
55  *
56  * Abstracts the pinning work and iterates in IOVA ranges.
57  * It uses a windowing scheme and pins the bitmap in relatively
58  * big ranges e.g.
59  *
60  * The bitmap object uses one base page to store all the pinned pages
61  * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
62  * 512 struct page pointers which, if the base page size is 4K, it means
63  * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
64  * also 4K then the range window to iterate is 64G.
65  *
66  * For example iterating on a total IOVA range of 4G..128G, it will walk
67  * through this set of ranges:
68  *
69  *    4G  -  68G-1 (64G)
70  *    68G - 128G-1 (64G)
71  *
72  * An example of the APIs on how to use/iterate over the IOVA bitmap:
73  *
74  *   bitmap = iova_bitmap_alloc(iova, length, page_size, data);
75  *   if (IS_ERR(bitmap))
76  *       return PTR_ERR(bitmap);
77  *
78  *   ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
79  *
80  *   iova_bitmap_free(bitmap);
81  *
82  * Each iteration of the @dirty_reporter_fn is called with a unique @iova
83  * and @length argument, indicating the current range available through the
84  * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
85  * areas (@iova_length) within that provided range, as following:
86  *
87  *   iova_bitmap_set(bitmap, iova, iova_length);
88  *
89  * The internals of the object uses an index @mapped_base_index that indexes
90  * which u64 word of the bitmap is mapped, up to @mapped_total_index.
91  * Those keep being incremented until @mapped_total_index is reached while
92  * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
93  *
94  * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
95  * some form of IOVA range tracking that co-relates to the user passed
96  * bitmap.
97  */
98 struct iova_bitmap {
99 	/* IOVA range representing the currently mapped bitmap data */
100 	struct iova_bitmap_map mapped;
101 
102 	/* userspace address of the bitmap */
103 	u64 __user *bitmap;
104 
105 	/* u64 index that @mapped points to */
106 	unsigned long mapped_base_index;
107 
108 	/* how many u64 can we walk in total */
109 	unsigned long mapped_total_index;
110 
111 	/* base IOVA of the whole bitmap */
112 	unsigned long iova;
113 
114 	/* length of the IOVA range for the whole bitmap */
115 	size_t length;
116 };
117 
118 /*
119  * Converts a relative IOVA to a bitmap index.
120  * This function provides the index into the u64 array (bitmap::bitmap)
121  * for a given IOVA offset.
122  * Relative IOVA means relative to the bitmap::mapped base IOVA
123  * (stored in mapped::iova). All computations in this file are done using
124  * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
125  * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
126  */
127 static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
128 						 unsigned long iova)
129 {
130 	unsigned long pgsize = 1 << bitmap->mapped.pgshift;
131 
132 	return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
133 }
134 
135 /*
136  * Converts a bitmap index to a *relative* IOVA.
137  */
138 static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
139 						 unsigned long index)
140 {
141 	unsigned long pgshift = bitmap->mapped.pgshift;
142 
143 	return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
144 }
145 
146 /*
147  * Returns the base IOVA of the mapped range.
148  */
149 static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
150 {
151 	unsigned long skip = bitmap->mapped_base_index;
152 
153 	return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
154 }
155 
156 /*
157  * Pins the bitmap user pages for the current range window.
158  * This is internal to IOVA bitmap and called when advancing the
159  * index (@mapped_base_index) or allocating the bitmap.
160  */
161 static int iova_bitmap_get(struct iova_bitmap *bitmap)
162 {
163 	struct iova_bitmap_map *mapped = &bitmap->mapped;
164 	unsigned long npages;
165 	u64 __user *addr;
166 	long ret;
167 
168 	/*
169 	 * @mapped_base_index is the index of the currently mapped u64 words
170 	 * that we have access. Anything before @mapped_base_index is not
171 	 * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
172 	 * mapped but capped at a maximum number of pages.
173 	 */
174 	npages = DIV_ROUND_UP((bitmap->mapped_total_index -
175 			       bitmap->mapped_base_index) *
176 			       sizeof(*bitmap->bitmap), PAGE_SIZE);
177 
178 	/*
179 	 * We always cap at max number of 'struct page' a base page can fit.
180 	 * This is, for example, on x86 means 2M of bitmap data max.
181 	 */
182 	npages = min(npages,  PAGE_SIZE / sizeof(struct page *));
183 
184 	/*
185 	 * Bitmap address to be pinned is calculated via pointer arithmetic
186 	 * with bitmap u64 word index.
187 	 */
188 	addr = bitmap->bitmap + bitmap->mapped_base_index;
189 
190 	ret = pin_user_pages_fast((unsigned long)addr, npages,
191 				  FOLL_WRITE, mapped->pages);
192 	if (ret <= 0)
193 		return -EFAULT;
194 
195 	mapped->npages = (unsigned long)ret;
196 	/* Base IOVA where @pages point to i.e. bit 0 of the first page */
197 	mapped->iova = iova_bitmap_mapped_iova(bitmap);
198 
199 	/*
200 	 * offset of the page where pinned pages bit 0 is located.
201 	 * This handles the case where the bitmap is not PAGE_SIZE
202 	 * aligned.
203 	 */
204 	mapped->pgoff = offset_in_page(addr);
205 	return 0;
206 }
207 
208 /*
209  * Unpins the bitmap user pages and clears @npages
210  * (un)pinning is abstracted from API user and it's done when advancing
211  * the index or freeing the bitmap.
212  */
213 static void iova_bitmap_put(struct iova_bitmap *bitmap)
214 {
215 	struct iova_bitmap_map *mapped = &bitmap->mapped;
216 
217 	if (mapped->npages) {
218 		unpin_user_pages(mapped->pages, mapped->npages);
219 		mapped->npages = 0;
220 	}
221 }
222 
223 /**
224  * iova_bitmap_alloc() - Allocates an IOVA bitmap object
225  * @iova: Start address of the IOVA range
226  * @length: Length of the IOVA range
227  * @page_size: Page size of the IOVA bitmap. It defines what each bit
228  *             granularity represents
229  * @data: Userspace address of the bitmap
230  *
231  * Allocates an IOVA object and initializes all its fields including the
232  * first user pages of @data.
233  *
234  * Return: A pointer to a newly allocated struct iova_bitmap
235  * or ERR_PTR() on error.
236  */
237 struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
238 				      unsigned long page_size, u64 __user *data)
239 {
240 	struct iova_bitmap_map *mapped;
241 	struct iova_bitmap *bitmap;
242 	int rc;
243 
244 	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
245 	if (!bitmap)
246 		return ERR_PTR(-ENOMEM);
247 
248 	mapped = &bitmap->mapped;
249 	mapped->pgshift = __ffs(page_size);
250 	bitmap->bitmap = data;
251 	bitmap->mapped_total_index =
252 		iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
253 	bitmap->iova = iova;
254 	bitmap->length = length;
255 	mapped->iova = iova;
256 	mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
257 	if (!mapped->pages) {
258 		rc = -ENOMEM;
259 		goto err;
260 	}
261 
262 	rc = iova_bitmap_get(bitmap);
263 	if (rc)
264 		goto err;
265 	return bitmap;
266 
267 err:
268 	iova_bitmap_free(bitmap);
269 	return ERR_PTR(rc);
270 }
271 
272 /**
273  * iova_bitmap_free() - Frees an IOVA bitmap object
274  * @bitmap: IOVA bitmap to free
275  *
276  * It unpins and releases pages array memory and clears any leftover
277  * state.
278  */
279 void iova_bitmap_free(struct iova_bitmap *bitmap)
280 {
281 	struct iova_bitmap_map *mapped = &bitmap->mapped;
282 
283 	iova_bitmap_put(bitmap);
284 
285 	if (mapped->pages) {
286 		free_page((unsigned long)mapped->pages);
287 		mapped->pages = NULL;
288 	}
289 
290 	kfree(bitmap);
291 }
292 
293 /*
294  * Returns the remaining bitmap indexes from mapped_total_index to process for
295  * the currently pinned bitmap pages.
296  */
297 static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
298 {
299 	unsigned long remaining, bytes;
300 
301 	/* Cap to one page in the first iteration, if PAGE_SIZE unaligned. */
302 	bytes = !bitmap->mapped.pgoff ? bitmap->mapped.npages << PAGE_SHIFT :
303 					PAGE_SIZE - bitmap->mapped.pgoff;
304 
305 	remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
306 	remaining = min_t(unsigned long, remaining,
307 			  bytes / sizeof(*bitmap->bitmap));
308 
309 	return remaining;
310 }
311 
312 /*
313  * Returns the length of the mapped IOVA range.
314  */
315 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
316 {
317 	unsigned long max_iova = bitmap->iova + bitmap->length - 1;
318 	unsigned long iova = iova_bitmap_mapped_iova(bitmap);
319 	unsigned long remaining;
320 
321 	/*
322 	 * iova_bitmap_mapped_remaining() returns a number of indexes which
323 	 * when converted to IOVA gives us a max length that the bitmap
324 	 * pinned data can cover. Afterwards, that is capped to
325 	 * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
326 	 */
327 	remaining = iova_bitmap_index_to_offset(bitmap,
328 			iova_bitmap_mapped_remaining(bitmap));
329 
330 	if (iova + remaining - 1 > max_iova)
331 		remaining -= ((iova + remaining - 1) - max_iova);
332 
333 	return remaining;
334 }
335 
336 /*
337  * Returns true if there's not more data to iterate.
338  */
339 static bool iova_bitmap_done(struct iova_bitmap *bitmap)
340 {
341 	return bitmap->mapped_base_index >= bitmap->mapped_total_index;
342 }
343 
344 /*
345  * Advances to the next range, releases the current pinned
346  * pages and pins the next set of bitmap pages.
347  * Returns 0 on success or otherwise errno.
348  */
349 static int iova_bitmap_advance(struct iova_bitmap *bitmap)
350 {
351 	unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
352 	unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
353 
354 	bitmap->mapped_base_index += count;
355 
356 	iova_bitmap_put(bitmap);
357 	if (iova_bitmap_done(bitmap))
358 		return 0;
359 
360 	/* When advancing the index we pin the next set of bitmap pages */
361 	return iova_bitmap_get(bitmap);
362 }
363 
364 /**
365  * iova_bitmap_for_each() - Iterates over the bitmap
366  * @bitmap: IOVA bitmap to iterate
367  * @opaque: Additional argument to pass to the callback
368  * @fn: Function that gets called for each IOVA range
369  *
370  * Helper function to iterate over bitmap data representing a portion of IOVA
371  * space. It hides the complexity of iterating bitmaps and translating the
372  * mapped bitmap user pages into IOVA ranges to process.
373  *
374  * Return: 0 on success, and an error on failure either upon
375  * iteration or when the callback returns an error.
376  */
377 int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
378 			 iova_bitmap_fn_t fn)
379 {
380 	int ret = 0;
381 
382 	for (; !iova_bitmap_done(bitmap) && !ret;
383 	     ret = iova_bitmap_advance(bitmap)) {
384 		ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
385 			 iova_bitmap_mapped_length(bitmap), opaque);
386 		if (ret)
387 			break;
388 	}
389 
390 	return ret;
391 }
392 
393 /**
394  * iova_bitmap_set() - Records an IOVA range in bitmap
395  * @bitmap: IOVA bitmap
396  * @iova: IOVA to start
397  * @length: IOVA range length
398  *
399  * Set the bits corresponding to the range [iova .. iova+length-1] in
400  * the user bitmap.
401  *
402  * Return: The number of bits set.
403  */
404 void iova_bitmap_set(struct iova_bitmap *bitmap,
405 		     unsigned long iova, size_t length)
406 {
407 	struct iova_bitmap_map *mapped = &bitmap->mapped;
408 	unsigned long offset = (iova - mapped->iova) >> mapped->pgshift;
409 	unsigned long nbits = max_t(unsigned long, 1, length >> mapped->pgshift);
410 	unsigned long page_idx = offset / BITS_PER_PAGE;
411 	unsigned long page_offset = mapped->pgoff;
412 	void *kaddr;
413 
414 	offset = offset % BITS_PER_PAGE;
415 
416 	do {
417 		unsigned long size = min(BITS_PER_PAGE - offset, nbits);
418 
419 		kaddr = kmap_local_page(mapped->pages[page_idx]);
420 		bitmap_set(kaddr + page_offset, offset, size);
421 		kunmap_local(kaddr);
422 		page_offset = offset = 0;
423 		nbits -= size;
424 		page_idx++;
425 	} while (nbits > 0);
426 }
427 EXPORT_SYMBOL_GPL(iova_bitmap_set);
428