xref: /openbmc/linux/drivers/vfio/iova_bitmap.c (revision 9b84f0f7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2022, Oracle and/or its affiliates.
4  * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
5  */
6 #include <linux/iova_bitmap.h>
7 #include <linux/mm.h>
8 #include <linux/highmem.h>
9 
10 #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
11 
12 /*
13  * struct iova_bitmap_map - A bitmap representing an IOVA range
14  *
15  * Main data structure for tracking mapped user pages of bitmap data.
16  *
17  * For example, for something recording dirty IOVAs, it will be provided a
18  * struct iova_bitmap structure, as a general structure for iterating the
19  * total IOVA range. The struct iova_bitmap_map, though, represents the
20  * subset of said IOVA space that is pinned by its parent structure (struct
21  * iova_bitmap).
22  *
23  * The user does not need to exact location of the bits in the bitmap.
24  * From user perspective the only API available is iova_bitmap_set() which
25  * records the IOVA *range* in the bitmap by setting the corresponding
26  * bits.
27  *
28  * The bitmap is an array of u64 whereas each bit represents an IOVA of
29  * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
30  *
31  *   data[(iova / page_size) / 64] & (1ULL << (iova % 64))
32  */
33 struct iova_bitmap_map {
34 	/* base IOVA representing bit 0 of the first page */
35 	unsigned long iova;
36 
37 	/* page size order that each bit granules to */
38 	unsigned long pgshift;
39 
40 	/* page offset of the first user page pinned */
41 	unsigned long pgoff;
42 
43 	/* number of pages pinned */
44 	unsigned long npages;
45 
46 	/* pinned pages representing the bitmap data */
47 	struct page **pages;
48 };
49 
50 /*
51  * struct iova_bitmap - The IOVA bitmap object
52  *
53  * Main data structure for iterating over the bitmap data.
54  *
55  * Abstracts the pinning work and iterates in IOVA ranges.
56  * It uses a windowing scheme and pins the bitmap in relatively
57  * big ranges e.g.
58  *
59  * The bitmap object uses one base page to store all the pinned pages
60  * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
61  * 512 struct page pointers which, if the base page size is 4K, it means
62  * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
63  * also 4K then the range window to iterate is 64G.
64  *
65  * For example iterating on a total IOVA range of 4G..128G, it will walk
66  * through this set of ranges:
67  *
68  *    4G  -  68G-1 (64G)
69  *    68G - 128G-1 (64G)
70  *
71  * An example of the APIs on how to use/iterate over the IOVA bitmap:
72  *
73  *   bitmap = iova_bitmap_alloc(iova, length, page_size, data);
74  *   if (IS_ERR(bitmap))
75  *       return PTR_ERR(bitmap);
76  *
77  *   ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
78  *
79  *   iova_bitmap_free(bitmap);
80  *
81  * Each iteration of the @dirty_reporter_fn is called with a unique @iova
82  * and @length argument, indicating the current range available through the
83  * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
84  * areas (@iova_length) within that provided range, as following:
85  *
86  *   iova_bitmap_set(bitmap, iova, iova_length);
87  *
88  * The internals of the object uses an index @mapped_base_index that indexes
89  * which u64 word of the bitmap is mapped, up to @mapped_total_index.
90  * Those keep being incremented until @mapped_total_index is reached while
91  * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
92  *
93  * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
94  * some form of IOVA range tracking that co-relates to the user passed
95  * bitmap.
96  */
97 struct iova_bitmap {
98 	/* IOVA range representing the currently mapped bitmap data */
99 	struct iova_bitmap_map mapped;
100 
101 	/* userspace address of the bitmap */
102 	u64 __user *bitmap;
103 
104 	/* u64 index that @mapped points to */
105 	unsigned long mapped_base_index;
106 
107 	/* how many u64 can we walk in total */
108 	unsigned long mapped_total_index;
109 
110 	/* base IOVA of the whole bitmap */
111 	unsigned long iova;
112 
113 	/* length of the IOVA range for the whole bitmap */
114 	size_t length;
115 };
116 
117 /*
118  * Converts a relative IOVA to a bitmap index.
119  * This function provides the index into the u64 array (bitmap::bitmap)
120  * for a given IOVA offset.
121  * Relative IOVA means relative to the bitmap::mapped base IOVA
122  * (stored in mapped::iova). All computations in this file are done using
123  * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
124  * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
125  */
126 static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
127 						 unsigned long iova)
128 {
129 	unsigned long pgsize = 1 << bitmap->mapped.pgshift;
130 
131 	return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
132 }
133 
134 /*
135  * Converts a bitmap index to a *relative* IOVA.
136  */
137 static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
138 						 unsigned long index)
139 {
140 	unsigned long pgshift = bitmap->mapped.pgshift;
141 
142 	return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
143 }
144 
145 /*
146  * Returns the base IOVA of the mapped range.
147  */
148 static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
149 {
150 	unsigned long skip = bitmap->mapped_base_index;
151 
152 	return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
153 }
154 
155 /*
156  * Pins the bitmap user pages for the current range window.
157  * This is internal to IOVA bitmap and called when advancing the
158  * index (@mapped_base_index) or allocating the bitmap.
159  */
160 static int iova_bitmap_get(struct iova_bitmap *bitmap)
161 {
162 	struct iova_bitmap_map *mapped = &bitmap->mapped;
163 	unsigned long npages;
164 	u64 __user *addr;
165 	long ret;
166 
167 	/*
168 	 * @mapped_base_index is the index of the currently mapped u64 words
169 	 * that we have access. Anything before @mapped_base_index is not
170 	 * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
171 	 * mapped but capped at a maximum number of pages.
172 	 */
173 	npages = DIV_ROUND_UP((bitmap->mapped_total_index -
174 			       bitmap->mapped_base_index) *
175 			       sizeof(*bitmap->bitmap), PAGE_SIZE);
176 
177 	/*
178 	 * We always cap at max number of 'struct page' a base page can fit.
179 	 * This is, for example, on x86 means 2M of bitmap data max.
180 	 */
181 	npages = min(npages,  PAGE_SIZE / sizeof(struct page *));
182 
183 	/*
184 	 * Bitmap address to be pinned is calculated via pointer arithmetic
185 	 * with bitmap u64 word index.
186 	 */
187 	addr = bitmap->bitmap + bitmap->mapped_base_index;
188 
189 	ret = pin_user_pages_fast((unsigned long)addr, npages,
190 				  FOLL_WRITE, mapped->pages);
191 	if (ret <= 0)
192 		return -EFAULT;
193 
194 	mapped->npages = (unsigned long)ret;
195 	/* Base IOVA where @pages point to i.e. bit 0 of the first page */
196 	mapped->iova = iova_bitmap_mapped_iova(bitmap);
197 
198 	/*
199 	 * offset of the page where pinned pages bit 0 is located.
200 	 * This handles the case where the bitmap is not PAGE_SIZE
201 	 * aligned.
202 	 */
203 	mapped->pgoff = offset_in_page(addr);
204 	return 0;
205 }
206 
207 /*
208  * Unpins the bitmap user pages and clears @npages
209  * (un)pinning is abstracted from API user and it's done when advancing
210  * the index or freeing the bitmap.
211  */
212 static void iova_bitmap_put(struct iova_bitmap *bitmap)
213 {
214 	struct iova_bitmap_map *mapped = &bitmap->mapped;
215 
216 	if (mapped->npages) {
217 		unpin_user_pages(mapped->pages, mapped->npages);
218 		mapped->npages = 0;
219 	}
220 }
221 
222 /**
223  * iova_bitmap_alloc() - Allocates an IOVA bitmap object
224  * @iova: Start address of the IOVA range
225  * @length: Length of the IOVA range
226  * @page_size: Page size of the IOVA bitmap. It defines what each bit
227  *             granularity represents
228  * @data: Userspace address of the bitmap
229  *
230  * Allocates an IOVA object and initializes all its fields including the
231  * first user pages of @data.
232  *
233  * Return: A pointer to a newly allocated struct iova_bitmap
234  * or ERR_PTR() on error.
235  */
236 struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
237 				      unsigned long page_size, u64 __user *data)
238 {
239 	struct iova_bitmap_map *mapped;
240 	struct iova_bitmap *bitmap;
241 	int rc;
242 
243 	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
244 	if (!bitmap)
245 		return ERR_PTR(-ENOMEM);
246 
247 	mapped = &bitmap->mapped;
248 	mapped->pgshift = __ffs(page_size);
249 	bitmap->bitmap = data;
250 	bitmap->mapped_total_index =
251 		iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
252 	bitmap->iova = iova;
253 	bitmap->length = length;
254 	mapped->iova = iova;
255 	mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
256 	if (!mapped->pages) {
257 		rc = -ENOMEM;
258 		goto err;
259 	}
260 
261 	rc = iova_bitmap_get(bitmap);
262 	if (rc)
263 		goto err;
264 	return bitmap;
265 
266 err:
267 	iova_bitmap_free(bitmap);
268 	return ERR_PTR(rc);
269 }
270 
271 /**
272  * iova_bitmap_free() - Frees an IOVA bitmap object
273  * @bitmap: IOVA bitmap to free
274  *
275  * It unpins and releases pages array memory and clears any leftover
276  * state.
277  */
278 void iova_bitmap_free(struct iova_bitmap *bitmap)
279 {
280 	struct iova_bitmap_map *mapped = &bitmap->mapped;
281 
282 	iova_bitmap_put(bitmap);
283 
284 	if (mapped->pages) {
285 		free_page((unsigned long)mapped->pages);
286 		mapped->pages = NULL;
287 	}
288 
289 	kfree(bitmap);
290 }
291 
292 /*
293  * Returns the remaining bitmap indexes from mapped_total_index to process for
294  * the currently pinned bitmap pages.
295  */
296 static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
297 {
298 	unsigned long remaining;
299 
300 	remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
301 	remaining = min_t(unsigned long, remaining,
302 	      (bitmap->mapped.npages << PAGE_SHIFT) / sizeof(*bitmap->bitmap));
303 
304 	return remaining;
305 }
306 
307 /*
308  * Returns the length of the mapped IOVA range.
309  */
310 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
311 {
312 	unsigned long max_iova = bitmap->iova + bitmap->length - 1;
313 	unsigned long iova = iova_bitmap_mapped_iova(bitmap);
314 	unsigned long remaining;
315 
316 	/*
317 	 * iova_bitmap_mapped_remaining() returns a number of indexes which
318 	 * when converted to IOVA gives us a max length that the bitmap
319 	 * pinned data can cover. Afterwards, that is capped to
320 	 * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
321 	 */
322 	remaining = iova_bitmap_index_to_offset(bitmap,
323 			iova_bitmap_mapped_remaining(bitmap));
324 
325 	if (iova + remaining - 1 > max_iova)
326 		remaining -= ((iova + remaining - 1) - max_iova);
327 
328 	return remaining;
329 }
330 
331 /*
332  * Returns true if there's not more data to iterate.
333  */
334 static bool iova_bitmap_done(struct iova_bitmap *bitmap)
335 {
336 	return bitmap->mapped_base_index >= bitmap->mapped_total_index;
337 }
338 
339 /*
340  * Advances to the next range, releases the current pinned
341  * pages and pins the next set of bitmap pages.
342  * Returns 0 on success or otherwise errno.
343  */
344 static int iova_bitmap_advance(struct iova_bitmap *bitmap)
345 {
346 	unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
347 	unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
348 
349 	bitmap->mapped_base_index += count;
350 
351 	iova_bitmap_put(bitmap);
352 	if (iova_bitmap_done(bitmap))
353 		return 0;
354 
355 	/* When advancing the index we pin the next set of bitmap pages */
356 	return iova_bitmap_get(bitmap);
357 }
358 
359 /**
360  * iova_bitmap_for_each() - Iterates over the bitmap
361  * @bitmap: IOVA bitmap to iterate
362  * @opaque: Additional argument to pass to the callback
363  * @fn: Function that gets called for each IOVA range
364  *
365  * Helper function to iterate over bitmap data representing a portion of IOVA
366  * space. It hides the complexity of iterating bitmaps and translating the
367  * mapped bitmap user pages into IOVA ranges to process.
368  *
369  * Return: 0 on success, and an error on failure either upon
370  * iteration or when the callback returns an error.
371  */
372 int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
373 			 iova_bitmap_fn_t fn)
374 {
375 	int ret = 0;
376 
377 	for (; !iova_bitmap_done(bitmap) && !ret;
378 	     ret = iova_bitmap_advance(bitmap)) {
379 		ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
380 			 iova_bitmap_mapped_length(bitmap), opaque);
381 		if (ret)
382 			break;
383 	}
384 
385 	return ret;
386 }
387 
388 /**
389  * iova_bitmap_set() - Records an IOVA range in bitmap
390  * @bitmap: IOVA bitmap
391  * @iova: IOVA to start
392  * @length: IOVA range length
393  *
394  * Set the bits corresponding to the range [iova .. iova+length-1] in
395  * the user bitmap.
396  *
397  * Return: The number of bits set.
398  */
399 void iova_bitmap_set(struct iova_bitmap *bitmap,
400 		     unsigned long iova, size_t length)
401 {
402 	struct iova_bitmap_map *mapped = &bitmap->mapped;
403 	unsigned long offset = (iova - mapped->iova) >> mapped->pgshift;
404 	unsigned long nbits = max_t(unsigned long, 1, length >> mapped->pgshift);
405 	unsigned long page_idx = offset / BITS_PER_PAGE;
406 	unsigned long page_offset = mapped->pgoff;
407 	void *kaddr;
408 
409 	offset = offset % BITS_PER_PAGE;
410 
411 	do {
412 		unsigned long size = min(BITS_PER_PAGE - offset, nbits);
413 
414 		kaddr = kmap_local_page(mapped->pages[page_idx]);
415 		bitmap_set(kaddr + page_offset, offset, size);
416 		kunmap_local(kaddr);
417 		page_offset = offset = 0;
418 		nbits -= size;
419 		page_idx++;
420 	} while (nbits > 0);
421 }
422 EXPORT_SYMBOL_GPL(iova_bitmap_set);
423