xref: /openbmc/linux/lib/iov_iter.c (revision d37cf9b63113f13d742713881ce691fc615d8b3b)
1  // SPDX-License-Identifier: GPL-2.0-only
2  #include <crypto/hash.h>
3  #include <linux/export.h>
4  #include <linux/bvec.h>
5  #include <linux/fault-inject-usercopy.h>
6  #include <linux/uio.h>
7  #include <linux/pagemap.h>
8  #include <linux/highmem.h>
9  #include <linux/slab.h>
10  #include <linux/vmalloc.h>
11  #include <linux/splice.h>
12  #include <linux/compat.h>
13  #include <net/checksum.h>
14  #include <linux/scatterlist.h>
15  #include <linux/instrumented.h>
16  
17  /* covers ubuf and kbuf alike */
18  #define iterate_buf(i, n, base, len, off, __p, STEP) {		\
19  	size_t __maybe_unused off = 0;				\
20  	len = n;						\
21  	base = __p + i->iov_offset;				\
22  	len -= (STEP);						\
23  	i->iov_offset += len;					\
24  	n = len;						\
25  }
26  
27  /* covers iovec and kvec alike */
28  #define iterate_iovec(i, n, base, len, off, __p, STEP) {	\
29  	size_t off = 0;						\
30  	size_t skip = i->iov_offset;				\
31  	do {							\
32  		len = min(n, __p->iov_len - skip);		\
33  		if (likely(len)) {				\
34  			base = __p->iov_base + skip;		\
35  			len -= (STEP);				\
36  			off += len;				\
37  			skip += len;				\
38  			n -= len;				\
39  			if (skip < __p->iov_len)		\
40  				break;				\
41  		}						\
42  		__p++;						\
43  		skip = 0;					\
44  	} while (n);						\
45  	i->iov_offset = skip;					\
46  	n = off;						\
47  }
48  
49  #define iterate_bvec(i, n, base, len, off, p, STEP) {		\
50  	size_t off = 0;						\
51  	unsigned skip = i->iov_offset;				\
52  	while (n) {						\
53  		unsigned offset = p->bv_offset + skip;		\
54  		unsigned left;					\
55  		void *kaddr = kmap_local_page(p->bv_page +	\
56  					offset / PAGE_SIZE);	\
57  		base = kaddr + offset % PAGE_SIZE;		\
58  		len = min(min(n, (size_t)(p->bv_len - skip)),	\
59  		     (size_t)(PAGE_SIZE - offset % PAGE_SIZE));	\
60  		left = (STEP);					\
61  		kunmap_local(kaddr);				\
62  		len -= left;					\
63  		off += len;					\
64  		skip += len;					\
65  		if (skip == p->bv_len) {			\
66  			skip = 0;				\
67  			p++;					\
68  		}						\
69  		n -= len;					\
70  		if (left)					\
71  			break;					\
72  	}							\
73  	i->iov_offset = skip;					\
74  	n = off;						\
75  }
76  
77  #define iterate_xarray(i, n, base, len, __off, STEP) {		\
78  	__label__ __out;					\
79  	size_t __off = 0;					\
80  	struct folio *folio;					\
81  	loff_t start = i->xarray_start + i->iov_offset;		\
82  	pgoff_t index = start / PAGE_SIZE;			\
83  	XA_STATE(xas, i->xarray, index);			\
84  								\
85  	len = PAGE_SIZE - offset_in_page(start);		\
86  	rcu_read_lock();					\
87  	xas_for_each(&xas, folio, ULONG_MAX) {			\
88  		unsigned left;					\
89  		size_t offset;					\
90  		if (xas_retry(&xas, folio))			\
91  			continue;				\
92  		if (WARN_ON(xa_is_value(folio)))		\
93  			break;					\
94  		if (WARN_ON(folio_test_hugetlb(folio)))		\
95  			break;					\
96  		offset = offset_in_folio(folio, start + __off);	\
97  		while (offset < folio_size(folio)) {		\
98  			base = kmap_local_folio(folio, offset);	\
99  			len = min(n, len);			\
100  			left = (STEP);				\
101  			kunmap_local(base);			\
102  			len -= left;				\
103  			__off += len;				\
104  			n -= len;				\
105  			if (left || n == 0)			\
106  				goto __out;			\
107  			offset += len;				\
108  			len = PAGE_SIZE;			\
109  		}						\
110  	}							\
111  __out:								\
112  	rcu_read_unlock();					\
113  	i->iov_offset += __off;					\
114  	n = __off;						\
115  }
116  
117  #define __iterate_and_advance(i, n, base, len, off, I, K) {	\
118  	if (unlikely(i->count < n))				\
119  		n = i->count;					\
120  	if (likely(n)) {					\
121  		if (likely(iter_is_ubuf(i))) {			\
122  			void __user *base;			\
123  			size_t len;				\
124  			iterate_buf(i, n, base, len, off,	\
125  						i->ubuf, (I)) 	\
126  		} else if (likely(iter_is_iovec(i))) {		\
127  			const struct iovec *iov = iter_iov(i);	\
128  			void __user *base;			\
129  			size_t len;				\
130  			iterate_iovec(i, n, base, len, off,	\
131  						iov, (I))	\
132  			i->nr_segs -= iov - iter_iov(i);	\
133  			i->__iov = iov;				\
134  		} else if (iov_iter_is_bvec(i)) {		\
135  			const struct bio_vec *bvec = i->bvec;	\
136  			void *base;				\
137  			size_t len;				\
138  			iterate_bvec(i, n, base, len, off,	\
139  						bvec, (K))	\
140  			i->nr_segs -= bvec - i->bvec;		\
141  			i->bvec = bvec;				\
142  		} else if (iov_iter_is_kvec(i)) {		\
143  			const struct kvec *kvec = i->kvec;	\
144  			void *base;				\
145  			size_t len;				\
146  			iterate_iovec(i, n, base, len, off,	\
147  						kvec, (K))	\
148  			i->nr_segs -= kvec - i->kvec;		\
149  			i->kvec = kvec;				\
150  		} else if (iov_iter_is_xarray(i)) {		\
151  			void *base;				\
152  			size_t len;				\
153  			iterate_xarray(i, n, base, len, off,	\
154  							(K))	\
155  		}						\
156  		i->count -= n;					\
157  	}							\
158  }
159  #define iterate_and_advance(i, n, base, len, off, I, K) \
160  	__iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
161  
copyout(void __user * to,const void * from,size_t n)162  static int copyout(void __user *to, const void *from, size_t n)
163  {
164  	if (should_fail_usercopy())
165  		return n;
166  	if (access_ok(to, n)) {
167  		instrument_copy_to_user(to, from, n);
168  		n = raw_copy_to_user(to, from, n);
169  	}
170  	return n;
171  }
172  
copyout_nofault(void __user * to,const void * from,size_t n)173  static int copyout_nofault(void __user *to, const void *from, size_t n)
174  {
175  	long res;
176  
177  	if (should_fail_usercopy())
178  		return n;
179  
180  	res = copy_to_user_nofault(to, from, n);
181  
182  	return res < 0 ? n : res;
183  }
184  
copyin(void * to,const void __user * from,size_t n)185  static int copyin(void *to, const void __user *from, size_t n)
186  {
187  	size_t res = n;
188  
189  	if (should_fail_usercopy())
190  		return n;
191  	if (access_ok(from, n)) {
192  		instrument_copy_from_user_before(to, from, n);
193  		res = raw_copy_from_user(to, from, n);
194  		instrument_copy_from_user_after(to, from, n, res);
195  	}
196  	return res;
197  }
198  
199  /*
200   * fault_in_iov_iter_readable - fault in iov iterator for reading
201   * @i: iterator
202   * @size: maximum length
203   *
204   * Fault in one or more iovecs of the given iov_iter, to a maximum length of
205   * @size.  For each iovec, fault in each page that constitutes the iovec.
206   *
207   * Returns the number of bytes not faulted in (like copy_to_user() and
208   * copy_from_user()).
209   *
210   * Always returns 0 for non-userspace iterators.
211   */
fault_in_iov_iter_readable(const struct iov_iter * i,size_t size)212  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
213  {
214  	if (iter_is_ubuf(i)) {
215  		size_t n = min(size, iov_iter_count(i));
216  		n -= fault_in_readable(i->ubuf + i->iov_offset, n);
217  		return size - n;
218  	} else if (iter_is_iovec(i)) {
219  		size_t count = min(size, iov_iter_count(i));
220  		const struct iovec *p;
221  		size_t skip;
222  
223  		size -= count;
224  		for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
225  			size_t len = min(count, p->iov_len - skip);
226  			size_t ret;
227  
228  			if (unlikely(!len))
229  				continue;
230  			ret = fault_in_readable(p->iov_base + skip, len);
231  			count -= len - ret;
232  			if (ret)
233  				break;
234  		}
235  		return count + size;
236  	}
237  	return 0;
238  }
239  EXPORT_SYMBOL(fault_in_iov_iter_readable);
240  
241  /*
242   * fault_in_iov_iter_writeable - fault in iov iterator for writing
243   * @i: iterator
244   * @size: maximum length
245   *
246   * Faults in the iterator using get_user_pages(), i.e., without triggering
247   * hardware page faults.  This is primarily useful when we already know that
248   * some or all of the pages in @i aren't in memory.
249   *
250   * Returns the number of bytes not faulted in, like copy_to_user() and
251   * copy_from_user().
252   *
253   * Always returns 0 for non-user-space iterators.
254   */
fault_in_iov_iter_writeable(const struct iov_iter * i,size_t size)255  size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
256  {
257  	if (iter_is_ubuf(i)) {
258  		size_t n = min(size, iov_iter_count(i));
259  		n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
260  		return size - n;
261  	} else if (iter_is_iovec(i)) {
262  		size_t count = min(size, iov_iter_count(i));
263  		const struct iovec *p;
264  		size_t skip;
265  
266  		size -= count;
267  		for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
268  			size_t len = min(count, p->iov_len - skip);
269  			size_t ret;
270  
271  			if (unlikely(!len))
272  				continue;
273  			ret = fault_in_safe_writeable(p->iov_base + skip, len);
274  			count -= len - ret;
275  			if (ret)
276  				break;
277  		}
278  		return count + size;
279  	}
280  	return 0;
281  }
282  EXPORT_SYMBOL(fault_in_iov_iter_writeable);
283  
iov_iter_init(struct iov_iter * i,unsigned int direction,const struct iovec * iov,unsigned long nr_segs,size_t count)284  void iov_iter_init(struct iov_iter *i, unsigned int direction,
285  			const struct iovec *iov, unsigned long nr_segs,
286  			size_t count)
287  {
288  	WARN_ON(direction & ~(READ | WRITE));
289  	*i = (struct iov_iter) {
290  		.iter_type = ITER_IOVEC,
291  		.copy_mc = false,
292  		.nofault = false,
293  		.user_backed = true,
294  		.data_source = direction,
295  		.__iov = iov,
296  		.nr_segs = nr_segs,
297  		.iov_offset = 0,
298  		.count = count
299  	};
300  }
301  EXPORT_SYMBOL(iov_iter_init);
302  
csum_and_memcpy(void * to,const void * from,size_t len,__wsum sum,size_t off)303  static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
304  			      __wsum sum, size_t off)
305  {
306  	__wsum next = csum_partial_copy_nocheck(from, to, len);
307  	return csum_block_add(sum, next, off);
308  }
309  
_copy_to_iter(const void * addr,size_t bytes,struct iov_iter * i)310  size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
311  {
312  	if (WARN_ON_ONCE(i->data_source))
313  		return 0;
314  	if (user_backed_iter(i))
315  		might_fault();
316  	iterate_and_advance(i, bytes, base, len, off,
317  		copyout(base, addr + off, len),
318  		memcpy(base, addr + off, len)
319  	)
320  
321  	return bytes;
322  }
323  EXPORT_SYMBOL(_copy_to_iter);
324  
325  #ifdef CONFIG_ARCH_HAS_COPY_MC
copyout_mc(void __user * to,const void * from,size_t n)326  static int copyout_mc(void __user *to, const void *from, size_t n)
327  {
328  	if (access_ok(to, n)) {
329  		instrument_copy_to_user(to, from, n);
330  		n = copy_mc_to_user((__force void *) to, from, n);
331  	}
332  	return n;
333  }
334  
335  /**
336   * _copy_mc_to_iter - copy to iter with source memory error exception handling
337   * @addr: source kernel address
338   * @bytes: total transfer length
339   * @i: destination iterator
340   *
341   * The pmem driver deploys this for the dax operation
342   * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
343   * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
344   * successfully copied.
345   *
346   * The main differences between this and typical _copy_to_iter().
347   *
348   * * Typical tail/residue handling after a fault retries the copy
349   *   byte-by-byte until the fault happens again. Re-triggering machine
350   *   checks is potentially fatal so the implementation uses source
351   *   alignment and poison alignment assumptions to avoid re-triggering
352   *   hardware exceptions.
353   *
354   * * ITER_KVEC and ITER_BVEC can return short copies.  Compare to
355   *   copy_to_iter() where only ITER_IOVEC attempts might return a short copy.
356   *
357   * Return: number of bytes copied (may be %0)
358   */
_copy_mc_to_iter(const void * addr,size_t bytes,struct iov_iter * i)359  size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
360  {
361  	if (WARN_ON_ONCE(i->data_source))
362  		return 0;
363  	if (user_backed_iter(i))
364  		might_fault();
365  	__iterate_and_advance(i, bytes, base, len, off,
366  		copyout_mc(base, addr + off, len),
367  		copy_mc_to_kernel(base, addr + off, len)
368  	)
369  
370  	return bytes;
371  }
372  EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
373  #endif /* CONFIG_ARCH_HAS_COPY_MC */
374  
memcpy_from_iter(struct iov_iter * i,void * to,const void * from,size_t size)375  static void *memcpy_from_iter(struct iov_iter *i, void *to, const void *from,
376  				 size_t size)
377  {
378  	if (iov_iter_is_copy_mc(i))
379  		return (void *)copy_mc_to_kernel(to, from, size);
380  	return memcpy(to, from, size);
381  }
382  
_copy_from_iter(void * addr,size_t bytes,struct iov_iter * i)383  size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
384  {
385  	if (WARN_ON_ONCE(!i->data_source))
386  		return 0;
387  
388  	if (user_backed_iter(i))
389  		might_fault();
390  	iterate_and_advance(i, bytes, base, len, off,
391  		copyin(addr + off, base, len),
392  		memcpy_from_iter(i, addr + off, base, len)
393  	)
394  
395  	return bytes;
396  }
397  EXPORT_SYMBOL(_copy_from_iter);
398  
_copy_from_iter_nocache(void * addr,size_t bytes,struct iov_iter * i)399  size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
400  {
401  	if (WARN_ON_ONCE(!i->data_source))
402  		return 0;
403  
404  	iterate_and_advance(i, bytes, base, len, off,
405  		__copy_from_user_inatomic_nocache(addr + off, base, len),
406  		memcpy(addr + off, base, len)
407  	)
408  
409  	return bytes;
410  }
411  EXPORT_SYMBOL(_copy_from_iter_nocache);
412  
413  #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
414  /**
415   * _copy_from_iter_flushcache - write destination through cpu cache
416   * @addr: destination kernel address
417   * @bytes: total transfer length
418   * @i: source iterator
419   *
420   * The pmem driver arranges for filesystem-dax to use this facility via
421   * dax_copy_from_iter() for ensuring that writes to persistent memory
422   * are flushed through the CPU cache. It is differentiated from
423   * _copy_from_iter_nocache() in that guarantees all data is flushed for
424   * all iterator types. The _copy_from_iter_nocache() only attempts to
425   * bypass the cache for the ITER_IOVEC case, and on some archs may use
426   * instructions that strand dirty-data in the cache.
427   *
428   * Return: number of bytes copied (may be %0)
429   */
_copy_from_iter_flushcache(void * addr,size_t bytes,struct iov_iter * i)430  size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
431  {
432  	if (WARN_ON_ONCE(!i->data_source))
433  		return 0;
434  
435  	iterate_and_advance(i, bytes, base, len, off,
436  		__copy_from_user_flushcache(addr + off, base, len),
437  		memcpy_flushcache(addr + off, base, len)
438  	)
439  
440  	return bytes;
441  }
442  EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
443  #endif
444  
page_copy_sane(struct page * page,size_t offset,size_t n)445  static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
446  {
447  	struct page *head;
448  	size_t v = n + offset;
449  
450  	/*
451  	 * The general case needs to access the page order in order
452  	 * to compute the page size.
453  	 * However, we mostly deal with order-0 pages and thus can
454  	 * avoid a possible cache line miss for requests that fit all
455  	 * page orders.
456  	 */
457  	if (n <= v && v <= PAGE_SIZE)
458  		return true;
459  
460  	head = compound_head(page);
461  	v += (page - head) << PAGE_SHIFT;
462  
463  	if (WARN_ON(n > v || v > page_size(head)))
464  		return false;
465  	return true;
466  }
467  
copy_page_to_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)468  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
469  			 struct iov_iter *i)
470  {
471  	size_t res = 0;
472  	if (!page_copy_sane(page, offset, bytes))
473  		return 0;
474  	if (WARN_ON_ONCE(i->data_source))
475  		return 0;
476  	page += offset / PAGE_SIZE; // first subpage
477  	offset %= PAGE_SIZE;
478  	while (1) {
479  		void *kaddr = kmap_local_page(page);
480  		size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
481  		n = _copy_to_iter(kaddr + offset, n, i);
482  		kunmap_local(kaddr);
483  		res += n;
484  		bytes -= n;
485  		if (!bytes || !n)
486  			break;
487  		offset += n;
488  		if (offset == PAGE_SIZE) {
489  			page++;
490  			offset = 0;
491  		}
492  	}
493  	return res;
494  }
495  EXPORT_SYMBOL(copy_page_to_iter);
496  
copy_page_to_iter_nofault(struct page * page,unsigned offset,size_t bytes,struct iov_iter * i)497  size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes,
498  				 struct iov_iter *i)
499  {
500  	size_t res = 0;
501  
502  	if (!page_copy_sane(page, offset, bytes))
503  		return 0;
504  	if (WARN_ON_ONCE(i->data_source))
505  		return 0;
506  	page += offset / PAGE_SIZE; // first subpage
507  	offset %= PAGE_SIZE;
508  	while (1) {
509  		void *kaddr = kmap_local_page(page);
510  		size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
511  
512  		iterate_and_advance(i, n, base, len, off,
513  			copyout_nofault(base, kaddr + offset + off, len),
514  			memcpy(base, kaddr + offset + off, len)
515  		)
516  		kunmap_local(kaddr);
517  		res += n;
518  		bytes -= n;
519  		if (!bytes || !n)
520  			break;
521  		offset += n;
522  		if (offset == PAGE_SIZE) {
523  			page++;
524  			offset = 0;
525  		}
526  	}
527  	return res;
528  }
529  EXPORT_SYMBOL(copy_page_to_iter_nofault);
530  
copy_page_from_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)531  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
532  			 struct iov_iter *i)
533  {
534  	size_t res = 0;
535  	if (!page_copy_sane(page, offset, bytes))
536  		return 0;
537  	page += offset / PAGE_SIZE; // first subpage
538  	offset %= PAGE_SIZE;
539  	while (1) {
540  		void *kaddr = kmap_local_page(page);
541  		size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
542  		n = _copy_from_iter(kaddr + offset, n, i);
543  		kunmap_local(kaddr);
544  		res += n;
545  		bytes -= n;
546  		if (!bytes || !n)
547  			break;
548  		offset += n;
549  		if (offset == PAGE_SIZE) {
550  			page++;
551  			offset = 0;
552  		}
553  	}
554  	return res;
555  }
556  EXPORT_SYMBOL(copy_page_from_iter);
557  
iov_iter_zero(size_t bytes,struct iov_iter * i)558  size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
559  {
560  	iterate_and_advance(i, bytes, base, len, count,
561  		clear_user(base, len),
562  		memset(base, 0, len)
563  	)
564  
565  	return bytes;
566  }
567  EXPORT_SYMBOL(iov_iter_zero);
568  
copy_page_from_iter_atomic(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)569  size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
570  		size_t bytes, struct iov_iter *i)
571  {
572  	size_t n, copied = 0;
573  	bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
574  			 PageHighMem(page);
575  
576  	if (!page_copy_sane(page, offset, bytes))
577  		return 0;
578  	if (WARN_ON_ONCE(!i->data_source))
579  		return 0;
580  
581  	do {
582  		char *p;
583  
584  		n = bytes - copied;
585  		if (uses_kmap) {
586  			page += offset / PAGE_SIZE;
587  			offset %= PAGE_SIZE;
588  			n = min_t(size_t, n, PAGE_SIZE - offset);
589  		}
590  
591  		p = kmap_atomic(page) + offset;
592  		iterate_and_advance(i, n, base, len, off,
593  			copyin(p + off, base, len),
594  			memcpy_from_iter(i, p + off, base, len)
595  		)
596  		kunmap_atomic(p);
597  		copied += n;
598  		offset += n;
599  	} while (uses_kmap && copied != bytes && n > 0);
600  
601  	return copied;
602  }
603  EXPORT_SYMBOL(copy_page_from_iter_atomic);
604  
iov_iter_bvec_advance(struct iov_iter * i,size_t size)605  static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
606  {
607  	const struct bio_vec *bvec, *end;
608  
609  	if (!i->count)
610  		return;
611  	i->count -= size;
612  
613  	size += i->iov_offset;
614  
615  	for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) {
616  		if (likely(size < bvec->bv_len))
617  			break;
618  		size -= bvec->bv_len;
619  	}
620  	i->iov_offset = size;
621  	i->nr_segs -= bvec - i->bvec;
622  	i->bvec = bvec;
623  }
624  
iov_iter_iovec_advance(struct iov_iter * i,size_t size)625  static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
626  {
627  	const struct iovec *iov, *end;
628  
629  	if (!i->count)
630  		return;
631  	i->count -= size;
632  
633  	size += i->iov_offset; // from beginning of current segment
634  	for (iov = iter_iov(i), end = iov + i->nr_segs; iov < end; iov++) {
635  		if (likely(size < iov->iov_len))
636  			break;
637  		size -= iov->iov_len;
638  	}
639  	i->iov_offset = size;
640  	i->nr_segs -= iov - iter_iov(i);
641  	i->__iov = iov;
642  }
643  
iov_iter_advance(struct iov_iter * i,size_t size)644  void iov_iter_advance(struct iov_iter *i, size_t size)
645  {
646  	if (unlikely(i->count < size))
647  		size = i->count;
648  	if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
649  		i->iov_offset += size;
650  		i->count -= size;
651  	} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
652  		/* iovec and kvec have identical layouts */
653  		iov_iter_iovec_advance(i, size);
654  	} else if (iov_iter_is_bvec(i)) {
655  		iov_iter_bvec_advance(i, size);
656  	} else if (iov_iter_is_discard(i)) {
657  		i->count -= size;
658  	}
659  }
660  EXPORT_SYMBOL(iov_iter_advance);
661  
iov_iter_revert(struct iov_iter * i,size_t unroll)662  void iov_iter_revert(struct iov_iter *i, size_t unroll)
663  {
664  	if (!unroll)
665  		return;
666  	if (WARN_ON(unroll > MAX_RW_COUNT))
667  		return;
668  	i->count += unroll;
669  	if (unlikely(iov_iter_is_discard(i)))
670  		return;
671  	if (unroll <= i->iov_offset) {
672  		i->iov_offset -= unroll;
673  		return;
674  	}
675  	unroll -= i->iov_offset;
676  	if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
677  		BUG(); /* We should never go beyond the start of the specified
678  			* range since we might then be straying into pages that
679  			* aren't pinned.
680  			*/
681  	} else if (iov_iter_is_bvec(i)) {
682  		const struct bio_vec *bvec = i->bvec;
683  		while (1) {
684  			size_t n = (--bvec)->bv_len;
685  			i->nr_segs++;
686  			if (unroll <= n) {
687  				i->bvec = bvec;
688  				i->iov_offset = n - unroll;
689  				return;
690  			}
691  			unroll -= n;
692  		}
693  	} else { /* same logics for iovec and kvec */
694  		const struct iovec *iov = iter_iov(i);
695  		while (1) {
696  			size_t n = (--iov)->iov_len;
697  			i->nr_segs++;
698  			if (unroll <= n) {
699  				i->__iov = iov;
700  				i->iov_offset = n - unroll;
701  				return;
702  			}
703  			unroll -= n;
704  		}
705  	}
706  }
707  EXPORT_SYMBOL(iov_iter_revert);
708  
709  /*
710   * Return the count of just the current iov_iter segment.
711   */
iov_iter_single_seg_count(const struct iov_iter * i)712  size_t iov_iter_single_seg_count(const struct iov_iter *i)
713  {
714  	if (i->nr_segs > 1) {
715  		if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
716  			return min(i->count, iter_iov(i)->iov_len - i->iov_offset);
717  		if (iov_iter_is_bvec(i))
718  			return min(i->count, i->bvec->bv_len - i->iov_offset);
719  	}
720  	return i->count;
721  }
722  EXPORT_SYMBOL(iov_iter_single_seg_count);
723  
iov_iter_kvec(struct iov_iter * i,unsigned int direction,const struct kvec * kvec,unsigned long nr_segs,size_t count)724  void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
725  			const struct kvec *kvec, unsigned long nr_segs,
726  			size_t count)
727  {
728  	WARN_ON(direction & ~(READ | WRITE));
729  	*i = (struct iov_iter){
730  		.iter_type = ITER_KVEC,
731  		.copy_mc = false,
732  		.data_source = direction,
733  		.kvec = kvec,
734  		.nr_segs = nr_segs,
735  		.iov_offset = 0,
736  		.count = count
737  	};
738  }
739  EXPORT_SYMBOL(iov_iter_kvec);
740  
iov_iter_bvec(struct iov_iter * i,unsigned int direction,const struct bio_vec * bvec,unsigned long nr_segs,size_t count)741  void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
742  			const struct bio_vec *bvec, unsigned long nr_segs,
743  			size_t count)
744  {
745  	WARN_ON(direction & ~(READ | WRITE));
746  	*i = (struct iov_iter){
747  		.iter_type = ITER_BVEC,
748  		.copy_mc = false,
749  		.data_source = direction,
750  		.bvec = bvec,
751  		.nr_segs = nr_segs,
752  		.iov_offset = 0,
753  		.count = count
754  	};
755  }
756  EXPORT_SYMBOL(iov_iter_bvec);
757  
758  /**
759   * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
760   * @i: The iterator to initialise.
761   * @direction: The direction of the transfer.
762   * @xarray: The xarray to access.
763   * @start: The start file position.
764   * @count: The size of the I/O buffer in bytes.
765   *
766   * Set up an I/O iterator to either draw data out of the pages attached to an
767   * inode or to inject data into those pages.  The pages *must* be prevented
768   * from evaporation, either by taking a ref on them or locking them by the
769   * caller.
770   */
iov_iter_xarray(struct iov_iter * i,unsigned int direction,struct xarray * xarray,loff_t start,size_t count)771  void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
772  		     struct xarray *xarray, loff_t start, size_t count)
773  {
774  	BUG_ON(direction & ~1);
775  	*i = (struct iov_iter) {
776  		.iter_type = ITER_XARRAY,
777  		.copy_mc = false,
778  		.data_source = direction,
779  		.xarray = xarray,
780  		.xarray_start = start,
781  		.count = count,
782  		.iov_offset = 0
783  	};
784  }
785  EXPORT_SYMBOL(iov_iter_xarray);
786  
787  /**
788   * iov_iter_discard - Initialise an I/O iterator that discards data
789   * @i: The iterator to initialise.
790   * @direction: The direction of the transfer.
791   * @count: The size of the I/O buffer in bytes.
792   *
793   * Set up an I/O iterator that just discards everything that's written to it.
794   * It's only available as a READ iterator.
795   */
iov_iter_discard(struct iov_iter * i,unsigned int direction,size_t count)796  void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
797  {
798  	BUG_ON(direction != READ);
799  	*i = (struct iov_iter){
800  		.iter_type = ITER_DISCARD,
801  		.copy_mc = false,
802  		.data_source = false,
803  		.count = count,
804  		.iov_offset = 0
805  	};
806  }
807  EXPORT_SYMBOL(iov_iter_discard);
808  
iov_iter_aligned_iovec(const struct iov_iter * i,unsigned addr_mask,unsigned len_mask)809  static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
810  				   unsigned len_mask)
811  {
812  	size_t size = i->count;
813  	size_t skip = i->iov_offset;
814  	unsigned k;
815  
816  	for (k = 0; k < i->nr_segs; k++, skip = 0) {
817  		const struct iovec *iov = iter_iov(i) + k;
818  		size_t len = iov->iov_len - skip;
819  
820  		if (len > size)
821  			len = size;
822  		if (len & len_mask)
823  			return false;
824  		if ((unsigned long)(iov->iov_base + skip) & addr_mask)
825  			return false;
826  
827  		size -= len;
828  		if (!size)
829  			break;
830  	}
831  	return true;
832  }
833  
iov_iter_aligned_bvec(const struct iov_iter * i,unsigned addr_mask,unsigned len_mask)834  static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
835  				  unsigned len_mask)
836  {
837  	size_t size = i->count;
838  	unsigned skip = i->iov_offset;
839  	unsigned k;
840  
841  	for (k = 0; k < i->nr_segs; k++, skip = 0) {
842  		size_t len = i->bvec[k].bv_len - skip;
843  
844  		if (len > size)
845  			len = size;
846  		if (len & len_mask)
847  			return false;
848  		if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
849  			return false;
850  
851  		size -= len;
852  		if (!size)
853  			break;
854  	}
855  	return true;
856  }
857  
858  /**
859   * iov_iter_is_aligned() - Check if the addresses and lengths of each segments
860   * 	are aligned to the parameters.
861   *
862   * @i: &struct iov_iter to restore
863   * @addr_mask: bit mask to check against the iov element's addresses
864   * @len_mask: bit mask to check against the iov element's lengths
865   *
866   * Return: false if any addresses or lengths intersect with the provided masks
867   */
iov_iter_is_aligned(const struct iov_iter * i,unsigned addr_mask,unsigned len_mask)868  bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
869  			 unsigned len_mask)
870  {
871  	if (likely(iter_is_ubuf(i))) {
872  		if (i->count & len_mask)
873  			return false;
874  		if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
875  			return false;
876  		return true;
877  	}
878  
879  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
880  		return iov_iter_aligned_iovec(i, addr_mask, len_mask);
881  
882  	if (iov_iter_is_bvec(i))
883  		return iov_iter_aligned_bvec(i, addr_mask, len_mask);
884  
885  	if (iov_iter_is_xarray(i)) {
886  		if (i->count & len_mask)
887  			return false;
888  		if ((i->xarray_start + i->iov_offset) & addr_mask)
889  			return false;
890  	}
891  
892  	return true;
893  }
894  EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
895  
iov_iter_alignment_iovec(const struct iov_iter * i)896  static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
897  {
898  	unsigned long res = 0;
899  	size_t size = i->count;
900  	size_t skip = i->iov_offset;
901  	unsigned k;
902  
903  	for (k = 0; k < i->nr_segs; k++, skip = 0) {
904  		const struct iovec *iov = iter_iov(i) + k;
905  		size_t len = iov->iov_len - skip;
906  		if (len) {
907  			res |= (unsigned long)iov->iov_base + skip;
908  			if (len > size)
909  				len = size;
910  			res |= len;
911  			size -= len;
912  			if (!size)
913  				break;
914  		}
915  	}
916  	return res;
917  }
918  
iov_iter_alignment_bvec(const struct iov_iter * i)919  static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
920  {
921  	unsigned res = 0;
922  	size_t size = i->count;
923  	unsigned skip = i->iov_offset;
924  	unsigned k;
925  
926  	for (k = 0; k < i->nr_segs; k++, skip = 0) {
927  		size_t len = i->bvec[k].bv_len - skip;
928  		res |= (unsigned long)i->bvec[k].bv_offset + skip;
929  		if (len > size)
930  			len = size;
931  		res |= len;
932  		size -= len;
933  		if (!size)
934  			break;
935  	}
936  	return res;
937  }
938  
iov_iter_alignment(const struct iov_iter * i)939  unsigned long iov_iter_alignment(const struct iov_iter *i)
940  {
941  	if (likely(iter_is_ubuf(i))) {
942  		size_t size = i->count;
943  		if (size)
944  			return ((unsigned long)i->ubuf + i->iov_offset) | size;
945  		return 0;
946  	}
947  
948  	/* iovec and kvec have identical layouts */
949  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
950  		return iov_iter_alignment_iovec(i);
951  
952  	if (iov_iter_is_bvec(i))
953  		return iov_iter_alignment_bvec(i);
954  
955  	if (iov_iter_is_xarray(i))
956  		return (i->xarray_start + i->iov_offset) | i->count;
957  
958  	return 0;
959  }
960  EXPORT_SYMBOL(iov_iter_alignment);
961  
iov_iter_gap_alignment(const struct iov_iter * i)962  unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
963  {
964  	unsigned long res = 0;
965  	unsigned long v = 0;
966  	size_t size = i->count;
967  	unsigned k;
968  
969  	if (iter_is_ubuf(i))
970  		return 0;
971  
972  	if (WARN_ON(!iter_is_iovec(i)))
973  		return ~0U;
974  
975  	for (k = 0; k < i->nr_segs; k++) {
976  		const struct iovec *iov = iter_iov(i) + k;
977  		if (iov->iov_len) {
978  			unsigned long base = (unsigned long)iov->iov_base;
979  			if (v) // if not the first one
980  				res |= base | v; // this start | previous end
981  			v = base + iov->iov_len;
982  			if (size <= iov->iov_len)
983  				break;
984  			size -= iov->iov_len;
985  		}
986  	}
987  	return res;
988  }
989  EXPORT_SYMBOL(iov_iter_gap_alignment);
990  
want_pages_array(struct page *** res,size_t size,size_t start,unsigned int maxpages)991  static int want_pages_array(struct page ***res, size_t size,
992  			    size_t start, unsigned int maxpages)
993  {
994  	unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE);
995  
996  	if (count > maxpages)
997  		count = maxpages;
998  	WARN_ON(!count);	// caller should've prevented that
999  	if (!*res) {
1000  		*res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
1001  		if (!*res)
1002  			return 0;
1003  	}
1004  	return count;
1005  }
1006  
iter_xarray_populate_pages(struct page ** pages,struct xarray * xa,pgoff_t index,unsigned int nr_pages)1007  static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1008  					  pgoff_t index, unsigned int nr_pages)
1009  {
1010  	XA_STATE(xas, xa, index);
1011  	struct page *page;
1012  	unsigned int ret = 0;
1013  
1014  	rcu_read_lock();
1015  	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1016  		if (xas_retry(&xas, page))
1017  			continue;
1018  
1019  		/* Has the page moved or been split? */
1020  		if (unlikely(page != xas_reload(&xas))) {
1021  			xas_reset(&xas);
1022  			continue;
1023  		}
1024  
1025  		pages[ret] = find_subpage(page, xas.xa_index);
1026  		get_page(pages[ret]);
1027  		if (++ret == nr_pages)
1028  			break;
1029  	}
1030  	rcu_read_unlock();
1031  	return ret;
1032  }
1033  
iter_xarray_get_pages(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned maxpages,size_t * _start_offset)1034  static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1035  				     struct page ***pages, size_t maxsize,
1036  				     unsigned maxpages, size_t *_start_offset)
1037  {
1038  	unsigned nr, offset, count;
1039  	pgoff_t index;
1040  	loff_t pos;
1041  
1042  	pos = i->xarray_start + i->iov_offset;
1043  	index = pos >> PAGE_SHIFT;
1044  	offset = pos & ~PAGE_MASK;
1045  	*_start_offset = offset;
1046  
1047  	count = want_pages_array(pages, maxsize, offset, maxpages);
1048  	if (!count)
1049  		return -ENOMEM;
1050  	nr = iter_xarray_populate_pages(*pages, i->xarray, index, count);
1051  	if (nr == 0)
1052  		return 0;
1053  
1054  	maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
1055  	i->iov_offset += maxsize;
1056  	i->count -= maxsize;
1057  	return maxsize;
1058  }
1059  
1060  /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
first_iovec_segment(const struct iov_iter * i,size_t * size)1061  static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
1062  {
1063  	size_t skip;
1064  	long k;
1065  
1066  	if (iter_is_ubuf(i))
1067  		return (unsigned long)i->ubuf + i->iov_offset;
1068  
1069  	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
1070  		const struct iovec *iov = iter_iov(i) + k;
1071  		size_t len = iov->iov_len - skip;
1072  
1073  		if (unlikely(!len))
1074  			continue;
1075  		if (*size > len)
1076  			*size = len;
1077  		return (unsigned long)iov->iov_base + skip;
1078  	}
1079  	BUG(); // if it had been empty, we wouldn't get called
1080  }
1081  
1082  /* must be done on non-empty ITER_BVEC one */
first_bvec_segment(const struct iov_iter * i,size_t * size,size_t * start)1083  static struct page *first_bvec_segment(const struct iov_iter *i,
1084  				       size_t *size, size_t *start)
1085  {
1086  	struct page *page;
1087  	size_t skip = i->iov_offset, len;
1088  
1089  	len = i->bvec->bv_len - skip;
1090  	if (*size > len)
1091  		*size = len;
1092  	skip += i->bvec->bv_offset;
1093  	page = i->bvec->bv_page + skip / PAGE_SIZE;
1094  	*start = skip % PAGE_SIZE;
1095  	return page;
1096  }
1097  
__iov_iter_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned int maxpages,size_t * start)1098  static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
1099  		   struct page ***pages, size_t maxsize,
1100  		   unsigned int maxpages, size_t *start)
1101  {
1102  	unsigned int n, gup_flags = 0;
1103  
1104  	if (maxsize > i->count)
1105  		maxsize = i->count;
1106  	if (!maxsize)
1107  		return 0;
1108  	if (maxsize > MAX_RW_COUNT)
1109  		maxsize = MAX_RW_COUNT;
1110  
1111  	if (likely(user_backed_iter(i))) {
1112  		unsigned long addr;
1113  		int res;
1114  
1115  		if (iov_iter_rw(i) != WRITE)
1116  			gup_flags |= FOLL_WRITE;
1117  		if (i->nofault)
1118  			gup_flags |= FOLL_NOFAULT;
1119  
1120  		addr = first_iovec_segment(i, &maxsize);
1121  		*start = addr % PAGE_SIZE;
1122  		addr &= PAGE_MASK;
1123  		n = want_pages_array(pages, maxsize, *start, maxpages);
1124  		if (!n)
1125  			return -ENOMEM;
1126  		res = get_user_pages_fast(addr, n, gup_flags, *pages);
1127  		if (unlikely(res <= 0))
1128  			return res;
1129  		maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start);
1130  		iov_iter_advance(i, maxsize);
1131  		return maxsize;
1132  	}
1133  	if (iov_iter_is_bvec(i)) {
1134  		struct page **p;
1135  		struct page *page;
1136  
1137  		page = first_bvec_segment(i, &maxsize, start);
1138  		n = want_pages_array(pages, maxsize, *start, maxpages);
1139  		if (!n)
1140  			return -ENOMEM;
1141  		p = *pages;
1142  		for (int k = 0; k < n; k++)
1143  			get_page(p[k] = page + k);
1144  		maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
1145  		i->count -= maxsize;
1146  		i->iov_offset += maxsize;
1147  		if (i->iov_offset == i->bvec->bv_len) {
1148  			i->iov_offset = 0;
1149  			i->bvec++;
1150  			i->nr_segs--;
1151  		}
1152  		return maxsize;
1153  	}
1154  	if (iov_iter_is_xarray(i))
1155  		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1156  	return -EFAULT;
1157  }
1158  
iov_iter_get_pages2(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1159  ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
1160  		size_t maxsize, unsigned maxpages, size_t *start)
1161  {
1162  	if (!maxpages)
1163  		return 0;
1164  	BUG_ON(!pages);
1165  
1166  	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start);
1167  }
1168  EXPORT_SYMBOL(iov_iter_get_pages2);
1169  
iov_iter_get_pages_alloc2(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1170  ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
1171  		struct page ***pages, size_t maxsize, size_t *start)
1172  {
1173  	ssize_t len;
1174  
1175  	*pages = NULL;
1176  
1177  	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start);
1178  	if (len <= 0) {
1179  		kvfree(*pages);
1180  		*pages = NULL;
1181  	}
1182  	return len;
1183  }
1184  EXPORT_SYMBOL(iov_iter_get_pages_alloc2);
1185  
csum_and_copy_from_iter(void * addr,size_t bytes,__wsum * csum,struct iov_iter * i)1186  size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1187  			       struct iov_iter *i)
1188  {
1189  	__wsum sum, next;
1190  	sum = *csum;
1191  	if (WARN_ON_ONCE(!i->data_source))
1192  		return 0;
1193  
1194  	iterate_and_advance(i, bytes, base, len, off, ({
1195  		next = csum_and_copy_from_user(base, addr + off, len);
1196  		sum = csum_block_add(sum, next, off);
1197  		next ? 0 : len;
1198  	}), ({
1199  		sum = csum_and_memcpy(addr + off, base, len, sum, off);
1200  	})
1201  	)
1202  	*csum = sum;
1203  	return bytes;
1204  }
1205  EXPORT_SYMBOL(csum_and_copy_from_iter);
1206  
csum_and_copy_to_iter(const void * addr,size_t bytes,void * _csstate,struct iov_iter * i)1207  size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1208  			     struct iov_iter *i)
1209  {
1210  	struct csum_state *csstate = _csstate;
1211  	__wsum sum, next;
1212  
1213  	if (WARN_ON_ONCE(i->data_source))
1214  		return 0;
1215  	if (unlikely(iov_iter_is_discard(i))) {
1216  		// can't use csum_memcpy() for that one - data is not copied
1217  		csstate->csum = csum_block_add(csstate->csum,
1218  					       csum_partial(addr, bytes, 0),
1219  					       csstate->off);
1220  		csstate->off += bytes;
1221  		return bytes;
1222  	}
1223  
1224  	sum = csum_shift(csstate->csum, csstate->off);
1225  	iterate_and_advance(i, bytes, base, len, off, ({
1226  		next = csum_and_copy_to_user(addr + off, base, len);
1227  		sum = csum_block_add(sum, next, off);
1228  		next ? 0 : len;
1229  	}), ({
1230  		sum = csum_and_memcpy(base, addr + off, len, sum, off);
1231  	})
1232  	)
1233  	csstate->csum = csum_shift(sum, csstate->off);
1234  	csstate->off += bytes;
1235  	return bytes;
1236  }
1237  EXPORT_SYMBOL(csum_and_copy_to_iter);
1238  
hash_and_copy_to_iter(const void * addr,size_t bytes,void * hashp,struct iov_iter * i)1239  size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1240  		struct iov_iter *i)
1241  {
1242  #ifdef CONFIG_CRYPTO_HASH
1243  	struct ahash_request *hash = hashp;
1244  	struct scatterlist sg;
1245  	size_t copied;
1246  
1247  	copied = copy_to_iter(addr, bytes, i);
1248  	sg_init_one(&sg, addr, copied);
1249  	ahash_request_set_crypt(hash, &sg, NULL, copied);
1250  	crypto_ahash_update(hash);
1251  	return copied;
1252  #else
1253  	return 0;
1254  #endif
1255  }
1256  EXPORT_SYMBOL(hash_and_copy_to_iter);
1257  
iov_npages(const struct iov_iter * i,int maxpages)1258  static int iov_npages(const struct iov_iter *i, int maxpages)
1259  {
1260  	size_t skip = i->iov_offset, size = i->count;
1261  	const struct iovec *p;
1262  	int npages = 0;
1263  
1264  	for (p = iter_iov(i); size; skip = 0, p++) {
1265  		unsigned offs = offset_in_page(p->iov_base + skip);
1266  		size_t len = min(p->iov_len - skip, size);
1267  
1268  		if (len) {
1269  			size -= len;
1270  			npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1271  			if (unlikely(npages > maxpages))
1272  				return maxpages;
1273  		}
1274  	}
1275  	return npages;
1276  }
1277  
bvec_npages(const struct iov_iter * i,int maxpages)1278  static int bvec_npages(const struct iov_iter *i, int maxpages)
1279  {
1280  	size_t skip = i->iov_offset, size = i->count;
1281  	const struct bio_vec *p;
1282  	int npages = 0;
1283  
1284  	for (p = i->bvec; size; skip = 0, p++) {
1285  		unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
1286  		size_t len = min(p->bv_len - skip, size);
1287  
1288  		size -= len;
1289  		npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1290  		if (unlikely(npages > maxpages))
1291  			return maxpages;
1292  	}
1293  	return npages;
1294  }
1295  
iov_iter_npages(const struct iov_iter * i,int maxpages)1296  int iov_iter_npages(const struct iov_iter *i, int maxpages)
1297  {
1298  	if (unlikely(!i->count))
1299  		return 0;
1300  	if (likely(iter_is_ubuf(i))) {
1301  		unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
1302  		int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
1303  		return min(npages, maxpages);
1304  	}
1305  	/* iovec and kvec have identical layouts */
1306  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1307  		return iov_npages(i, maxpages);
1308  	if (iov_iter_is_bvec(i))
1309  		return bvec_npages(i, maxpages);
1310  	if (iov_iter_is_xarray(i)) {
1311  		unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
1312  		int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
1313  		return min(npages, maxpages);
1314  	}
1315  	return 0;
1316  }
1317  EXPORT_SYMBOL(iov_iter_npages);
1318  
dup_iter(struct iov_iter * new,struct iov_iter * old,gfp_t flags)1319  const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1320  {
1321  	*new = *old;
1322  	if (iov_iter_is_bvec(new))
1323  		return new->bvec = kmemdup(new->bvec,
1324  				    new->nr_segs * sizeof(struct bio_vec),
1325  				    flags);
1326  	else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
1327  		/* iovec and kvec have identical layout */
1328  		return new->__iov = kmemdup(new->__iov,
1329  				   new->nr_segs * sizeof(struct iovec),
1330  				   flags);
1331  	return NULL;
1332  }
1333  EXPORT_SYMBOL(dup_iter);
1334  
copy_compat_iovec_from_user(struct iovec * iov,const struct iovec __user * uvec,unsigned long nr_segs)1335  static __noclone int copy_compat_iovec_from_user(struct iovec *iov,
1336  		const struct iovec __user *uvec, unsigned long nr_segs)
1337  {
1338  	const struct compat_iovec __user *uiov =
1339  		(const struct compat_iovec __user *)uvec;
1340  	int ret = -EFAULT, i;
1341  
1342  	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1343  		return -EFAULT;
1344  
1345  	for (i = 0; i < nr_segs; i++) {
1346  		compat_uptr_t buf;
1347  		compat_ssize_t len;
1348  
1349  		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1350  		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1351  
1352  		/* check for compat_size_t not fitting in compat_ssize_t .. */
1353  		if (len < 0) {
1354  			ret = -EINVAL;
1355  			goto uaccess_end;
1356  		}
1357  		iov[i].iov_base = compat_ptr(buf);
1358  		iov[i].iov_len = len;
1359  	}
1360  
1361  	ret = 0;
1362  uaccess_end:
1363  	user_access_end();
1364  	return ret;
1365  }
1366  
copy_iovec_from_user(struct iovec * iov,const struct iovec __user * uiov,unsigned long nr_segs)1367  static __noclone int copy_iovec_from_user(struct iovec *iov,
1368  		const struct iovec __user *uiov, unsigned long nr_segs)
1369  {
1370  	int ret = -EFAULT;
1371  
1372  	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1373  		return -EFAULT;
1374  
1375  	do {
1376  		void __user *buf;
1377  		ssize_t len;
1378  
1379  		unsafe_get_user(len, &uiov->iov_len, uaccess_end);
1380  		unsafe_get_user(buf, &uiov->iov_base, uaccess_end);
1381  
1382  		/* check for size_t not fitting in ssize_t .. */
1383  		if (unlikely(len < 0)) {
1384  			ret = -EINVAL;
1385  			goto uaccess_end;
1386  		}
1387  		iov->iov_base = buf;
1388  		iov->iov_len = len;
1389  
1390  		uiov++; iov++;
1391  	} while (--nr_segs);
1392  
1393  	ret = 0;
1394  uaccess_end:
1395  	user_access_end();
1396  	return ret;
1397  }
1398  
iovec_from_user(const struct iovec __user * uvec,unsigned long nr_segs,unsigned long fast_segs,struct iovec * fast_iov,bool compat)1399  struct iovec *iovec_from_user(const struct iovec __user *uvec,
1400  		unsigned long nr_segs, unsigned long fast_segs,
1401  		struct iovec *fast_iov, bool compat)
1402  {
1403  	struct iovec *iov = fast_iov;
1404  	int ret;
1405  
1406  	/*
1407  	 * SuS says "The readv() function *may* fail if the iovcnt argument was
1408  	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1409  	 * traditionally returned zero for zero segments, so...
1410  	 */
1411  	if (nr_segs == 0)
1412  		return iov;
1413  	if (nr_segs > UIO_MAXIOV)
1414  		return ERR_PTR(-EINVAL);
1415  	if (nr_segs > fast_segs) {
1416  		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1417  		if (!iov)
1418  			return ERR_PTR(-ENOMEM);
1419  	}
1420  
1421  	if (unlikely(compat))
1422  		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1423  	else
1424  		ret = copy_iovec_from_user(iov, uvec, nr_segs);
1425  	if (ret) {
1426  		if (iov != fast_iov)
1427  			kfree(iov);
1428  		return ERR_PTR(ret);
1429  	}
1430  
1431  	return iov;
1432  }
1433  
1434  /*
1435   * Single segment iovec supplied by the user, import it as ITER_UBUF.
1436   */
__import_iovec_ubuf(int type,const struct iovec __user * uvec,struct iovec ** iovp,struct iov_iter * i,bool compat)1437  static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec,
1438  				   struct iovec **iovp, struct iov_iter *i,
1439  				   bool compat)
1440  {
1441  	struct iovec *iov = *iovp;
1442  	ssize_t ret;
1443  
1444  	*iovp = NULL;
1445  
1446  	if (compat)
1447  		ret = copy_compat_iovec_from_user(iov, uvec, 1);
1448  	else
1449  		ret = copy_iovec_from_user(iov, uvec, 1);
1450  	if (unlikely(ret))
1451  		return ret;
1452  
1453  	ret = import_ubuf(type, iov->iov_base, iov->iov_len, i);
1454  	if (unlikely(ret))
1455  		return ret;
1456  	return i->count;
1457  }
1458  
__import_iovec(int type,const struct iovec __user * uvec,unsigned nr_segs,unsigned fast_segs,struct iovec ** iovp,struct iov_iter * i,bool compat)1459  ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1460  		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1461  		 struct iov_iter *i, bool compat)
1462  {
1463  	ssize_t total_len = 0;
1464  	unsigned long seg;
1465  	struct iovec *iov;
1466  
1467  	if (nr_segs == 1)
1468  		return __import_iovec_ubuf(type, uvec, iovp, i, compat);
1469  
1470  	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1471  	if (IS_ERR(iov)) {
1472  		*iovp = NULL;
1473  		return PTR_ERR(iov);
1474  	}
1475  
1476  	/*
1477  	 * According to the Single Unix Specification we should return EINVAL if
1478  	 * an element length is < 0 when cast to ssize_t or if the total length
1479  	 * would overflow the ssize_t return value of the system call.
1480  	 *
1481  	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1482  	 * overflow case.
1483  	 */
1484  	for (seg = 0; seg < nr_segs; seg++) {
1485  		ssize_t len = (ssize_t)iov[seg].iov_len;
1486  
1487  		if (!access_ok(iov[seg].iov_base, len)) {
1488  			if (iov != *iovp)
1489  				kfree(iov);
1490  			*iovp = NULL;
1491  			return -EFAULT;
1492  		}
1493  
1494  		if (len > MAX_RW_COUNT - total_len) {
1495  			len = MAX_RW_COUNT - total_len;
1496  			iov[seg].iov_len = len;
1497  		}
1498  		total_len += len;
1499  	}
1500  
1501  	iov_iter_init(i, type, iov, nr_segs, total_len);
1502  	if (iov == *iovp)
1503  		*iovp = NULL;
1504  	else
1505  		*iovp = iov;
1506  	return total_len;
1507  }
1508  
1509  /**
1510   * import_iovec() - Copy an array of &struct iovec from userspace
1511   *     into the kernel, check that it is valid, and initialize a new
1512   *     &struct iov_iter iterator to access it.
1513   *
1514   * @type: One of %READ or %WRITE.
1515   * @uvec: Pointer to the userspace array.
1516   * @nr_segs: Number of elements in userspace array.
1517   * @fast_segs: Number of elements in @iov.
1518   * @iovp: (input and output parameter) Pointer to pointer to (usually small
1519   *     on-stack) kernel array.
1520   * @i: Pointer to iterator that will be initialized on success.
1521   *
1522   * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1523   * then this function places %NULL in *@iov on return. Otherwise, a new
1524   * array will be allocated and the result placed in *@iov. This means that
1525   * the caller may call kfree() on *@iov regardless of whether the small
1526   * on-stack array was used or not (and regardless of whether this function
1527   * returns an error or not).
1528   *
1529   * Return: Negative error code on error, bytes imported on success
1530   */
import_iovec(int type,const struct iovec __user * uvec,unsigned nr_segs,unsigned fast_segs,struct iovec ** iovp,struct iov_iter * i)1531  ssize_t import_iovec(int type, const struct iovec __user *uvec,
1532  		 unsigned nr_segs, unsigned fast_segs,
1533  		 struct iovec **iovp, struct iov_iter *i)
1534  {
1535  	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
1536  			      in_compat_syscall());
1537  }
1538  EXPORT_SYMBOL(import_iovec);
1539  
import_single_range(int rw,void __user * buf,size_t len,struct iovec * iov,struct iov_iter * i)1540  int import_single_range(int rw, void __user *buf, size_t len,
1541  		 struct iovec *iov, struct iov_iter *i)
1542  {
1543  	if (len > MAX_RW_COUNT)
1544  		len = MAX_RW_COUNT;
1545  	if (unlikely(!access_ok(buf, len)))
1546  		return -EFAULT;
1547  
1548  	iov_iter_ubuf(i, rw, buf, len);
1549  	return 0;
1550  }
1551  EXPORT_SYMBOL(import_single_range);
1552  
import_ubuf(int rw,void __user * buf,size_t len,struct iov_iter * i)1553  int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
1554  {
1555  	if (len > MAX_RW_COUNT)
1556  		len = MAX_RW_COUNT;
1557  	if (unlikely(!access_ok(buf, len)))
1558  		return -EFAULT;
1559  
1560  	iov_iter_ubuf(i, rw, buf, len);
1561  	return 0;
1562  }
1563  EXPORT_SYMBOL_GPL(import_ubuf);
1564  
1565  /**
1566   * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
1567   *     iov_iter_save_state() was called.
1568   *
1569   * @i: &struct iov_iter to restore
1570   * @state: state to restore from
1571   *
1572   * Used after iov_iter_save_state() to bring restore @i, if operations may
1573   * have advanced it.
1574   *
1575   * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
1576   */
iov_iter_restore(struct iov_iter * i,struct iov_iter_state * state)1577  void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
1578  {
1579  	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) &&
1580  			 !iter_is_ubuf(i)) && !iov_iter_is_kvec(i))
1581  		return;
1582  	i->iov_offset = state->iov_offset;
1583  	i->count = state->count;
1584  	if (iter_is_ubuf(i))
1585  		return;
1586  	/*
1587  	 * For the *vec iters, nr_segs + iov is constant - if we increment
1588  	 * the vec, then we also decrement the nr_segs count. Hence we don't
1589  	 * need to track both of these, just one is enough and we can deduct
1590  	 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
1591  	 * size, so we can just increment the iov pointer as they are unionzed.
1592  	 * ITER_BVEC _may_ be the same size on some archs, but on others it is
1593  	 * not. Be safe and handle it separately.
1594  	 */
1595  	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
1596  	if (iov_iter_is_bvec(i))
1597  		i->bvec -= state->nr_segs - i->nr_segs;
1598  	else
1599  		i->__iov -= state->nr_segs - i->nr_segs;
1600  	i->nr_segs = state->nr_segs;
1601  }
1602  
1603  /*
1604   * Extract a list of contiguous pages from an ITER_XARRAY iterator.  This does not
1605   * get references on the pages, nor does it get a pin on them.
1606   */
iov_iter_extract_xarray_pages(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned int maxpages,iov_iter_extraction_t extraction_flags,size_t * offset0)1607  static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
1608  					     struct page ***pages, size_t maxsize,
1609  					     unsigned int maxpages,
1610  					     iov_iter_extraction_t extraction_flags,
1611  					     size_t *offset0)
1612  {
1613  	struct page *page, **p;
1614  	unsigned int nr = 0, offset;
1615  	loff_t pos = i->xarray_start + i->iov_offset;
1616  	pgoff_t index = pos >> PAGE_SHIFT;
1617  	XA_STATE(xas, i->xarray, index);
1618  
1619  	offset = pos & ~PAGE_MASK;
1620  	*offset0 = offset;
1621  
1622  	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
1623  	if (!maxpages)
1624  		return -ENOMEM;
1625  	p = *pages;
1626  
1627  	rcu_read_lock();
1628  	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1629  		if (xas_retry(&xas, page))
1630  			continue;
1631  
1632  		/* Has the page moved or been split? */
1633  		if (unlikely(page != xas_reload(&xas))) {
1634  			xas_reset(&xas);
1635  			continue;
1636  		}
1637  
1638  		p[nr++] = find_subpage(page, xas.xa_index);
1639  		if (nr == maxpages)
1640  			break;
1641  	}
1642  	rcu_read_unlock();
1643  
1644  	maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
1645  	iov_iter_advance(i, maxsize);
1646  	return maxsize;
1647  }
1648  
1649  /*
1650   * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
1651   * not get references on the pages, nor does it get a pin on them.
1652   */
iov_iter_extract_bvec_pages(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned int maxpages,iov_iter_extraction_t extraction_flags,size_t * offset0)1653  static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
1654  					   struct page ***pages, size_t maxsize,
1655  					   unsigned int maxpages,
1656  					   iov_iter_extraction_t extraction_flags,
1657  					   size_t *offset0)
1658  {
1659  	struct page **p, *page;
1660  	size_t skip = i->iov_offset, offset, size;
1661  	int k;
1662  
1663  	for (;;) {
1664  		if (i->nr_segs == 0)
1665  			return 0;
1666  		size = min(maxsize, i->bvec->bv_len - skip);
1667  		if (size)
1668  			break;
1669  		i->iov_offset = 0;
1670  		i->nr_segs--;
1671  		i->bvec++;
1672  		skip = 0;
1673  	}
1674  
1675  	skip += i->bvec->bv_offset;
1676  	page = i->bvec->bv_page + skip / PAGE_SIZE;
1677  	offset = skip % PAGE_SIZE;
1678  	*offset0 = offset;
1679  
1680  	maxpages = want_pages_array(pages, size, offset, maxpages);
1681  	if (!maxpages)
1682  		return -ENOMEM;
1683  	p = *pages;
1684  	for (k = 0; k < maxpages; k++)
1685  		p[k] = page + k;
1686  
1687  	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
1688  	iov_iter_advance(i, size);
1689  	return size;
1690  }
1691  
1692  /*
1693   * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
1694   * This does not get references on the pages, nor does it get a pin on them.
1695   */
iov_iter_extract_kvec_pages(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned int maxpages,iov_iter_extraction_t extraction_flags,size_t * offset0)1696  static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
1697  					   struct page ***pages, size_t maxsize,
1698  					   unsigned int maxpages,
1699  					   iov_iter_extraction_t extraction_flags,
1700  					   size_t *offset0)
1701  {
1702  	struct page **p, *page;
1703  	const void *kaddr;
1704  	size_t skip = i->iov_offset, offset, len, size;
1705  	int k;
1706  
1707  	for (;;) {
1708  		if (i->nr_segs == 0)
1709  			return 0;
1710  		size = min(maxsize, i->kvec->iov_len - skip);
1711  		if (size)
1712  			break;
1713  		i->iov_offset = 0;
1714  		i->nr_segs--;
1715  		i->kvec++;
1716  		skip = 0;
1717  	}
1718  
1719  	kaddr = i->kvec->iov_base + skip;
1720  	offset = (unsigned long)kaddr & ~PAGE_MASK;
1721  	*offset0 = offset;
1722  
1723  	maxpages = want_pages_array(pages, size, offset, maxpages);
1724  	if (!maxpages)
1725  		return -ENOMEM;
1726  	p = *pages;
1727  
1728  	kaddr -= offset;
1729  	len = offset + size;
1730  	for (k = 0; k < maxpages; k++) {
1731  		size_t seg = min_t(size_t, len, PAGE_SIZE);
1732  
1733  		if (is_vmalloc_or_module_addr(kaddr))
1734  			page = vmalloc_to_page(kaddr);
1735  		else
1736  			page = virt_to_page(kaddr);
1737  
1738  		p[k] = page;
1739  		len -= seg;
1740  		kaddr += PAGE_SIZE;
1741  	}
1742  
1743  	size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
1744  	iov_iter_advance(i, size);
1745  	return size;
1746  }
1747  
1748  /*
1749   * Extract a list of contiguous pages from a user iterator and get a pin on
1750   * each of them.  This should only be used if the iterator is user-backed
1751   * (IOBUF/UBUF).
1752   *
1753   * It does not get refs on the pages, but the pages must be unpinned by the
1754   * caller once the transfer is complete.
1755   *
1756   * This is safe to be used where background IO/DMA *is* going to be modifying
1757   * the buffer; using a pin rather than a ref makes forces fork() to give the
1758   * child a copy of the page.
1759   */
iov_iter_extract_user_pages(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned int maxpages,iov_iter_extraction_t extraction_flags,size_t * offset0)1760  static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
1761  					   struct page ***pages,
1762  					   size_t maxsize,
1763  					   unsigned int maxpages,
1764  					   iov_iter_extraction_t extraction_flags,
1765  					   size_t *offset0)
1766  {
1767  	unsigned long addr;
1768  	unsigned int gup_flags = 0;
1769  	size_t offset;
1770  	int res;
1771  
1772  	if (i->data_source == ITER_DEST)
1773  		gup_flags |= FOLL_WRITE;
1774  	if (extraction_flags & ITER_ALLOW_P2PDMA)
1775  		gup_flags |= FOLL_PCI_P2PDMA;
1776  	if (i->nofault)
1777  		gup_flags |= FOLL_NOFAULT;
1778  
1779  	addr = first_iovec_segment(i, &maxsize);
1780  	*offset0 = offset = addr % PAGE_SIZE;
1781  	addr &= PAGE_MASK;
1782  	maxpages = want_pages_array(pages, maxsize, offset, maxpages);
1783  	if (!maxpages)
1784  		return -ENOMEM;
1785  	res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages);
1786  	if (unlikely(res <= 0))
1787  		return res;
1788  	maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset);
1789  	iov_iter_advance(i, maxsize);
1790  	return maxsize;
1791  }
1792  
1793  /**
1794   * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator
1795   * @i: The iterator to extract from
1796   * @pages: Where to return the list of pages
1797   * @maxsize: The maximum amount of iterator to extract
1798   * @maxpages: The maximum size of the list of pages
1799   * @extraction_flags: Flags to qualify request
1800   * @offset0: Where to return the starting offset into (*@pages)[0]
1801   *
1802   * Extract a list of contiguous pages from the current point of the iterator,
1803   * advancing the iterator.  The maximum number of pages and the maximum amount
1804   * of page contents can be set.
1805   *
1806   * If *@pages is NULL, a page list will be allocated to the required size and
1807   * *@pages will be set to its base.  If *@pages is not NULL, it will be assumed
1808   * that the caller allocated a page list at least @maxpages in size and this
1809   * will be filled in.
1810   *
1811   * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1812   * be allowed on the pages extracted.
1813   *
1814   * The iov_iter_extract_will_pin() function can be used to query how cleanup
1815   * should be performed.
1816   *
1817   * Extra refs or pins on the pages may be obtained as follows:
1818   *
1819   *  (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF), pins will be
1820   *      added to the pages, but refs will not be taken.
1821   *      iov_iter_extract_will_pin() will return true.
1822   *
1823   *  (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are
1824   *      merely listed; no extra refs or pins are obtained.
1825   *      iov_iter_extract_will_pin() will return 0.
1826   *
1827   * Note also:
1828   *
1829   *  (*) Use with ITER_DISCARD is not supported as that has no content.
1830   *
1831   * On success, the function sets *@pages to the new pagelist, if allocated, and
1832   * sets *offset0 to the offset into the first page.
1833   *
1834   * It may also return -ENOMEM and -EFAULT.
1835   */
iov_iter_extract_pages(struct iov_iter * i,struct page *** pages,size_t maxsize,unsigned int maxpages,iov_iter_extraction_t extraction_flags,size_t * offset0)1836  ssize_t iov_iter_extract_pages(struct iov_iter *i,
1837  			       struct page ***pages,
1838  			       size_t maxsize,
1839  			       unsigned int maxpages,
1840  			       iov_iter_extraction_t extraction_flags,
1841  			       size_t *offset0)
1842  {
1843  	maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT);
1844  	if (!maxsize)
1845  		return 0;
1846  
1847  	if (likely(user_backed_iter(i)))
1848  		return iov_iter_extract_user_pages(i, pages, maxsize,
1849  						   maxpages, extraction_flags,
1850  						   offset0);
1851  	if (iov_iter_is_kvec(i))
1852  		return iov_iter_extract_kvec_pages(i, pages, maxsize,
1853  						   maxpages, extraction_flags,
1854  						   offset0);
1855  	if (iov_iter_is_bvec(i))
1856  		return iov_iter_extract_bvec_pages(i, pages, maxsize,
1857  						   maxpages, extraction_flags,
1858  						   offset0);
1859  	if (iov_iter_is_xarray(i))
1860  		return iov_iter_extract_xarray_pages(i, pages, maxsize,
1861  						     maxpages, extraction_flags,
1862  						     offset0);
1863  	return -EFAULT;
1864  }
1865  EXPORT_SYMBOL_GPL(iov_iter_extract_pages);
1866