xref: /openbmc/linux/lib/iov_iter.c (revision fcb14cb1)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
27999096fSHerbert Xu #include <crypto/hash.h>
3d879cb83SAl Viro #include <linux/export.h>
42f8b5444SChristoph Hellwig #include <linux/bvec.h>
54d0e9df5SAlbert van der Linde #include <linux/fault-inject-usercopy.h>
6d879cb83SAl Viro #include <linux/uio.h>
7d879cb83SAl Viro #include <linux/pagemap.h>
828961998SIra Weiny #include <linux/highmem.h>
9d879cb83SAl Viro #include <linux/slab.h>
10d879cb83SAl Viro #include <linux/vmalloc.h>
11241699cdSAl Viro #include <linux/splice.h>
12bfdc5970SChristoph Hellwig #include <linux/compat.h>
13d879cb83SAl Viro #include <net/checksum.h>
14d05f4435SSagi Grimberg #include <linux/scatterlist.h>
15d0ef4c36SMarco Elver #include <linux/instrumented.h>
16d879cb83SAl Viro 
17241699cdSAl Viro #define PIPE_PARANOIA /* for now */
18241699cdSAl Viro 
19*fcb14cb1SAl Viro /* covers ubuf and kbuf alike */
20*fcb14cb1SAl Viro #define iterate_buf(i, n, base, len, off, __p, STEP) {		\
21*fcb14cb1SAl Viro 	size_t __maybe_unused off = 0;				\
22*fcb14cb1SAl Viro 	len = n;						\
23*fcb14cb1SAl Viro 	base = __p + i->iov_offset;				\
24*fcb14cb1SAl Viro 	len -= (STEP);						\
25*fcb14cb1SAl Viro 	i->iov_offset += len;					\
26*fcb14cb1SAl Viro 	n = len;						\
27*fcb14cb1SAl Viro }
28*fcb14cb1SAl Viro 
295c67aa90SAl Viro /* covers iovec and kvec alike */
30a6e4ec7bSAl Viro #define iterate_iovec(i, n, base, len, off, __p, STEP) {	\
317baa5099SAl Viro 	size_t off = 0;						\
32a6e4ec7bSAl Viro 	size_t skip = i->iov_offset;				\
337a1bcb5dSAl Viro 	do {							\
347baa5099SAl Viro 		len = min(n, __p->iov_len - skip);		\
357baa5099SAl Viro 		if (likely(len)) {				\
367baa5099SAl Viro 			base = __p->iov_base + skip;		\
377baa5099SAl Viro 			len -= (STEP);				\
387baa5099SAl Viro 			off += len;				\
397baa5099SAl Viro 			skip += len;				\
407baa5099SAl Viro 			n -= len;				\
417a1bcb5dSAl Viro 			if (skip < __p->iov_len)		\
427a1bcb5dSAl Viro 				break;				\
43d879cb83SAl Viro 		}						\
44d879cb83SAl Viro 		__p++;						\
457a1bcb5dSAl Viro 		skip = 0;					\
467a1bcb5dSAl Viro 	} while (n);						\
47a6e4ec7bSAl Viro 	i->iov_offset = skip;					\
487baa5099SAl Viro 	n = off;						\
49d879cb83SAl Viro }
50d879cb83SAl Viro 
51a6e4ec7bSAl Viro #define iterate_bvec(i, n, base, len, off, p, STEP) {		\
527baa5099SAl Viro 	size_t off = 0;						\
53a6e4ec7bSAl Viro 	unsigned skip = i->iov_offset;				\
547491a2bfSAl Viro 	while (n) {						\
557491a2bfSAl Viro 		unsigned offset = p->bv_offset + skip;		\
561b4fb5ffSAl Viro 		unsigned left;					\
5721b56c84SAl Viro 		void *kaddr = kmap_local_page(p->bv_page +	\
5821b56c84SAl Viro 					offset / PAGE_SIZE);	\
597baa5099SAl Viro 		base = kaddr + offset % PAGE_SIZE;		\
60a6e4ec7bSAl Viro 		len = min(min(n, (size_t)(p->bv_len - skip)),	\
617491a2bfSAl Viro 		     (size_t)(PAGE_SIZE - offset % PAGE_SIZE));	\
621b4fb5ffSAl Viro 		left = (STEP);					\
6321b56c84SAl Viro 		kunmap_local(kaddr);				\
647baa5099SAl Viro 		len -= left;					\
657baa5099SAl Viro 		off += len;					\
667baa5099SAl Viro 		skip += len;					\
677491a2bfSAl Viro 		if (skip == p->bv_len) {			\
687491a2bfSAl Viro 			skip = 0;				\
697491a2bfSAl Viro 			p++;					\
70d879cb83SAl Viro 		}						\
717baa5099SAl Viro 		n -= len;					\
721b4fb5ffSAl Viro 		if (left)					\
731b4fb5ffSAl Viro 			break;					\
747491a2bfSAl Viro 	}							\
75a6e4ec7bSAl Viro 	i->iov_offset = skip;					\
767baa5099SAl Viro 	n = off;						\
77d879cb83SAl Viro }
78d879cb83SAl Viro 
79a6e4ec7bSAl Viro #define iterate_xarray(i, n, base, len, __off, STEP) {		\
801b4fb5ffSAl Viro 	__label__ __out;					\
81622838f3SAl Viro 	size_t __off = 0;					\
82821979f5SMatthew Wilcox (Oracle) 	struct folio *folio;					\
83a6e4ec7bSAl Viro 	loff_t start = i->xarray_start + i->iov_offset;		\
844b179e9aSAl Viro 	pgoff_t index = start / PAGE_SIZE;			\
857ff50620SDavid Howells 	XA_STATE(xas, i->xarray, index);			\
867ff50620SDavid Howells 								\
87821979f5SMatthew Wilcox (Oracle) 	len = PAGE_SIZE - offset_in_page(start);		\
887ff50620SDavid Howells 	rcu_read_lock();					\
89821979f5SMatthew Wilcox (Oracle) 	xas_for_each(&xas, folio, ULONG_MAX) {			\
901b4fb5ffSAl Viro 		unsigned left;					\
91821979f5SMatthew Wilcox (Oracle) 		size_t offset;					\
92821979f5SMatthew Wilcox (Oracle) 		if (xas_retry(&xas, folio))			\
937ff50620SDavid Howells 			continue;				\
94821979f5SMatthew Wilcox (Oracle) 		if (WARN_ON(xa_is_value(folio)))		\
957ff50620SDavid Howells 			break;					\
96821979f5SMatthew Wilcox (Oracle) 		if (WARN_ON(folio_test_hugetlb(folio)))		\
977ff50620SDavid Howells 			break;					\
98821979f5SMatthew Wilcox (Oracle) 		offset = offset_in_folio(folio, start + __off);	\
99821979f5SMatthew Wilcox (Oracle) 		while (offset < folio_size(folio)) {		\
100821979f5SMatthew Wilcox (Oracle) 			base = kmap_local_folio(folio, offset);	\
1017baa5099SAl Viro 			len = min(n, len);			\
1021b4fb5ffSAl Viro 			left = (STEP);				\
103821979f5SMatthew Wilcox (Oracle) 			kunmap_local(base);			\
1047baa5099SAl Viro 			len -= left;				\
1057baa5099SAl Viro 			__off += len;				\
1067baa5099SAl Viro 			n -= len;				\
1071b4fb5ffSAl Viro 			if (left || n == 0)			\
1081b4fb5ffSAl Viro 				goto __out;			\
109821979f5SMatthew Wilcox (Oracle) 			offset += len;				\
110821979f5SMatthew Wilcox (Oracle) 			len = PAGE_SIZE;			\
1117ff50620SDavid Howells 		}						\
1127ff50620SDavid Howells 	}							\
1131b4fb5ffSAl Viro __out:								\
1147ff50620SDavid Howells 	rcu_read_unlock();					\
115a6e4ec7bSAl Viro 	i->iov_offset += __off;					\
116622838f3SAl Viro 	n = __off;						\
1177ff50620SDavid Howells }
1187ff50620SDavid Howells 
1197baa5099SAl Viro #define __iterate_and_advance(i, n, base, len, off, I, K) {	\
120dd254f5aSAl Viro 	if (unlikely(i->count < n))				\
121dd254f5aSAl Viro 		n = i->count;					\
122f5da8354SAl Viro 	if (likely(n)) {					\
123*fcb14cb1SAl Viro 		if (likely(iter_is_ubuf(i))) {			\
124*fcb14cb1SAl Viro 			void __user *base;			\
125*fcb14cb1SAl Viro 			size_t len;				\
126*fcb14cb1SAl Viro 			iterate_buf(i, n, base, len, off,	\
127*fcb14cb1SAl Viro 						i->ubuf, (I)) 	\
128*fcb14cb1SAl Viro 		} else if (likely(iter_is_iovec(i))) {		\
1295c67aa90SAl Viro 			const struct iovec *iov = i->iov;	\
1307baa5099SAl Viro 			void __user *base;			\
1317baa5099SAl Viro 			size_t len;				\
1327baa5099SAl Viro 			iterate_iovec(i, n, base, len, off,	\
133a6e4ec7bSAl Viro 						iov, (I))	\
134d879cb83SAl Viro 			i->nr_segs -= iov - i->iov;		\
135d879cb83SAl Viro 			i->iov = iov;				\
13628f38db7SAl Viro 		} else if (iov_iter_is_bvec(i)) {		\
13728f38db7SAl Viro 			const struct bio_vec *bvec = i->bvec;	\
1387baa5099SAl Viro 			void *base;				\
1397baa5099SAl Viro 			size_t len;				\
1407baa5099SAl Viro 			iterate_bvec(i, n, base, len, off,	\
141a6e4ec7bSAl Viro 						bvec, (K))	\
1427491a2bfSAl Viro 			i->nr_segs -= bvec - i->bvec;		\
1437491a2bfSAl Viro 			i->bvec = bvec;				\
14428f38db7SAl Viro 		} else if (iov_iter_is_kvec(i)) {		\
1455c67aa90SAl Viro 			const struct kvec *kvec = i->kvec;	\
1467baa5099SAl Viro 			void *base;				\
1477baa5099SAl Viro 			size_t len;				\
1487baa5099SAl Viro 			iterate_iovec(i, n, base, len, off,	\
149a6e4ec7bSAl Viro 						kvec, (K))	\
15028f38db7SAl Viro 			i->nr_segs -= kvec - i->kvec;		\
15128f38db7SAl Viro 			i->kvec = kvec;				\
15228f38db7SAl Viro 		} else if (iov_iter_is_xarray(i)) {		\
1537baa5099SAl Viro 			void *base;				\
1547baa5099SAl Viro 			size_t len;				\
1557baa5099SAl Viro 			iterate_xarray(i, n, base, len, off,	\
156a6e4ec7bSAl Viro 							(K))	\
157d879cb83SAl Viro 		}						\
158d879cb83SAl Viro 		i->count -= n;					\
159dd254f5aSAl Viro 	}							\
160d879cb83SAl Viro }
1617baa5099SAl Viro #define iterate_and_advance(i, n, base, len, off, I, K) \
1627baa5099SAl Viro 	__iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
163d879cb83SAl Viro 
16409fc68dcSAl Viro static int copyout(void __user *to, const void *from, size_t n)
16509fc68dcSAl Viro {
1664d0e9df5SAlbert van der Linde 	if (should_fail_usercopy())
1674d0e9df5SAlbert van der Linde 		return n;
16896d4f267SLinus Torvalds 	if (access_ok(to, n)) {
169d0ef4c36SMarco Elver 		instrument_copy_to_user(to, from, n);
17009fc68dcSAl Viro 		n = raw_copy_to_user(to, from, n);
17109fc68dcSAl Viro 	}
17209fc68dcSAl Viro 	return n;
17309fc68dcSAl Viro }
17409fc68dcSAl Viro 
17509fc68dcSAl Viro static int copyin(void *to, const void __user *from, size_t n)
17609fc68dcSAl Viro {
1774d0e9df5SAlbert van der Linde 	if (should_fail_usercopy())
1784d0e9df5SAlbert van der Linde 		return n;
17996d4f267SLinus Torvalds 	if (access_ok(from, n)) {
180d0ef4c36SMarco Elver 		instrument_copy_from_user(to, from, n);
18109fc68dcSAl Viro 		n = raw_copy_from_user(to, from, n);
18209fc68dcSAl Viro 	}
18309fc68dcSAl Viro 	return n;
18409fc68dcSAl Viro }
18509fc68dcSAl Viro 
186241699cdSAl Viro #ifdef PIPE_PARANOIA
187241699cdSAl Viro static bool sanity(const struct iov_iter *i)
188241699cdSAl Viro {
189241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
1908cefc107SDavid Howells 	unsigned int p_head = pipe->head;
1918cefc107SDavid Howells 	unsigned int p_tail = pipe->tail;
1928cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
1938cefc107SDavid Howells 	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
1948cefc107SDavid Howells 	unsigned int i_head = i->head;
1958cefc107SDavid Howells 	unsigned int idx;
1968cefc107SDavid Howells 
197241699cdSAl Viro 	if (i->iov_offset) {
198241699cdSAl Viro 		struct pipe_buffer *p;
1998cefc107SDavid Howells 		if (unlikely(p_occupancy == 0))
200241699cdSAl Viro 			goto Bad;	// pipe must be non-empty
2018cefc107SDavid Howells 		if (unlikely(i_head != p_head - 1))
202241699cdSAl Viro 			goto Bad;	// must be at the last buffer...
203241699cdSAl Viro 
2048cefc107SDavid Howells 		p = &pipe->bufs[i_head & p_mask];
205241699cdSAl Viro 		if (unlikely(p->offset + p->len != i->iov_offset))
206241699cdSAl Viro 			goto Bad;	// ... at the end of segment
207241699cdSAl Viro 	} else {
2088cefc107SDavid Howells 		if (i_head != p_head)
209241699cdSAl Viro 			goto Bad;	// must be right after the last buffer
210241699cdSAl Viro 	}
211241699cdSAl Viro 	return true;
212241699cdSAl Viro Bad:
2138cefc107SDavid Howells 	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
2148cefc107SDavid Howells 	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
2158cefc107SDavid Howells 			p_head, p_tail, pipe->ring_size);
2168cefc107SDavid Howells 	for (idx = 0; idx < pipe->ring_size; idx++)
217241699cdSAl Viro 		printk(KERN_ERR "[%p %p %d %d]\n",
218241699cdSAl Viro 			pipe->bufs[idx].ops,
219241699cdSAl Viro 			pipe->bufs[idx].page,
220241699cdSAl Viro 			pipe->bufs[idx].offset,
221241699cdSAl Viro 			pipe->bufs[idx].len);
222241699cdSAl Viro 	WARN_ON(1);
223241699cdSAl Viro 	return false;
224241699cdSAl Viro }
225241699cdSAl Viro #else
226241699cdSAl Viro #define sanity(i) true
227241699cdSAl Viro #endif
228241699cdSAl Viro 
229241699cdSAl Viro static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
230241699cdSAl Viro 			 struct iov_iter *i)
231241699cdSAl Viro {
232241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
233241699cdSAl Viro 	struct pipe_buffer *buf;
2348cefc107SDavid Howells 	unsigned int p_tail = pipe->tail;
2358cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
2368cefc107SDavid Howells 	unsigned int i_head = i->head;
237241699cdSAl Viro 	size_t off;
238241699cdSAl Viro 
239241699cdSAl Viro 	if (unlikely(bytes > i->count))
240241699cdSAl Viro 		bytes = i->count;
241241699cdSAl Viro 
242241699cdSAl Viro 	if (unlikely(!bytes))
243241699cdSAl Viro 		return 0;
244241699cdSAl Viro 
245241699cdSAl Viro 	if (!sanity(i))
246241699cdSAl Viro 		return 0;
247241699cdSAl Viro 
248241699cdSAl Viro 	off = i->iov_offset;
2498cefc107SDavid Howells 	buf = &pipe->bufs[i_head & p_mask];
250241699cdSAl Viro 	if (off) {
251241699cdSAl Viro 		if (offset == off && buf->page == page) {
252241699cdSAl Viro 			/* merge with the last one */
253241699cdSAl Viro 			buf->len += bytes;
254241699cdSAl Viro 			i->iov_offset += bytes;
255241699cdSAl Viro 			goto out;
256241699cdSAl Viro 		}
2578cefc107SDavid Howells 		i_head++;
2588cefc107SDavid Howells 		buf = &pipe->bufs[i_head & p_mask];
259241699cdSAl Viro 	}
2606718b6f8SDavid Howells 	if (pipe_full(i_head, p_tail, pipe->max_usage))
261241699cdSAl Viro 		return 0;
2628cefc107SDavid Howells 
263241699cdSAl Viro 	buf->ops = &page_cache_pipe_buf_ops;
2649d2231c5SMax Kellermann 	buf->flags = 0;
2658cefc107SDavid Howells 	get_page(page);
2668cefc107SDavid Howells 	buf->page = page;
267241699cdSAl Viro 	buf->offset = offset;
268241699cdSAl Viro 	buf->len = bytes;
2698cefc107SDavid Howells 
2708cefc107SDavid Howells 	pipe->head = i_head + 1;
271241699cdSAl Viro 	i->iov_offset = offset + bytes;
2728cefc107SDavid Howells 	i->head = i_head;
273241699cdSAl Viro out:
274241699cdSAl Viro 	i->count -= bytes;
275241699cdSAl Viro 	return bytes;
276241699cdSAl Viro }
277241699cdSAl Viro 
278d879cb83SAl Viro /*
279a6294593SAndreas Gruenbacher  * fault_in_iov_iter_readable - fault in iov iterator for reading
280a6294593SAndreas Gruenbacher  * @i: iterator
281a6294593SAndreas Gruenbacher  * @size: maximum length
282171a0203SAnton Altaparmakov  *
283a6294593SAndreas Gruenbacher  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
284a6294593SAndreas Gruenbacher  * @size.  For each iovec, fault in each page that constitutes the iovec.
285a6294593SAndreas Gruenbacher  *
286a6294593SAndreas Gruenbacher  * Returns the number of bytes not faulted in (like copy_to_user() and
287a6294593SAndreas Gruenbacher  * copy_from_user()).
288a6294593SAndreas Gruenbacher  *
289a6294593SAndreas Gruenbacher  * Always returns 0 for non-userspace iterators.
290171a0203SAnton Altaparmakov  */
291a6294593SAndreas Gruenbacher size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
292171a0203SAnton Altaparmakov {
293*fcb14cb1SAl Viro 	if (iter_is_ubuf(i)) {
294*fcb14cb1SAl Viro 		size_t n = min(size, iov_iter_count(i));
295*fcb14cb1SAl Viro 		n -= fault_in_readable(i->ubuf + i->iov_offset, n);
296*fcb14cb1SAl Viro 		return size - n;
297*fcb14cb1SAl Viro 	} else if (iter_is_iovec(i)) {
298a6294593SAndreas Gruenbacher 		size_t count = min(size, iov_iter_count(i));
2998409a0d2SAl Viro 		const struct iovec *p;
3008409a0d2SAl Viro 		size_t skip;
3018409a0d2SAl Viro 
302a6294593SAndreas Gruenbacher 		size -= count;
303a6294593SAndreas Gruenbacher 		for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
304a6294593SAndreas Gruenbacher 			size_t len = min(count, p->iov_len - skip);
305a6294593SAndreas Gruenbacher 			size_t ret;
3068409a0d2SAl Viro 
3078409a0d2SAl Viro 			if (unlikely(!len))
3088409a0d2SAl Viro 				continue;
309a6294593SAndreas Gruenbacher 			ret = fault_in_readable(p->iov_base + skip, len);
310a6294593SAndreas Gruenbacher 			count -= len - ret;
311a6294593SAndreas Gruenbacher 			if (ret)
312a6294593SAndreas Gruenbacher 				break;
3138409a0d2SAl Viro 		}
314a6294593SAndreas Gruenbacher 		return count + size;
315171a0203SAnton Altaparmakov 	}
316171a0203SAnton Altaparmakov 	return 0;
317171a0203SAnton Altaparmakov }
318a6294593SAndreas Gruenbacher EXPORT_SYMBOL(fault_in_iov_iter_readable);
319171a0203SAnton Altaparmakov 
320cdd591fcSAndreas Gruenbacher /*
321cdd591fcSAndreas Gruenbacher  * fault_in_iov_iter_writeable - fault in iov iterator for writing
322cdd591fcSAndreas Gruenbacher  * @i: iterator
323cdd591fcSAndreas Gruenbacher  * @size: maximum length
324cdd591fcSAndreas Gruenbacher  *
325cdd591fcSAndreas Gruenbacher  * Faults in the iterator using get_user_pages(), i.e., without triggering
326cdd591fcSAndreas Gruenbacher  * hardware page faults.  This is primarily useful when we already know that
327cdd591fcSAndreas Gruenbacher  * some or all of the pages in @i aren't in memory.
328cdd591fcSAndreas Gruenbacher  *
329cdd591fcSAndreas Gruenbacher  * Returns the number of bytes not faulted in, like copy_to_user() and
330cdd591fcSAndreas Gruenbacher  * copy_from_user().
331cdd591fcSAndreas Gruenbacher  *
332cdd591fcSAndreas Gruenbacher  * Always returns 0 for non-user-space iterators.
333cdd591fcSAndreas Gruenbacher  */
334cdd591fcSAndreas Gruenbacher size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
335cdd591fcSAndreas Gruenbacher {
336*fcb14cb1SAl Viro 	if (iter_is_ubuf(i)) {
337*fcb14cb1SAl Viro 		size_t n = min(size, iov_iter_count(i));
338*fcb14cb1SAl Viro 		n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
339*fcb14cb1SAl Viro 		return size - n;
340*fcb14cb1SAl Viro 	} else if (iter_is_iovec(i)) {
341cdd591fcSAndreas Gruenbacher 		size_t count = min(size, iov_iter_count(i));
342cdd591fcSAndreas Gruenbacher 		const struct iovec *p;
343cdd591fcSAndreas Gruenbacher 		size_t skip;
344cdd591fcSAndreas Gruenbacher 
345cdd591fcSAndreas Gruenbacher 		size -= count;
346cdd591fcSAndreas Gruenbacher 		for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
347cdd591fcSAndreas Gruenbacher 			size_t len = min(count, p->iov_len - skip);
348cdd591fcSAndreas Gruenbacher 			size_t ret;
349cdd591fcSAndreas Gruenbacher 
350cdd591fcSAndreas Gruenbacher 			if (unlikely(!len))
351cdd591fcSAndreas Gruenbacher 				continue;
352cdd591fcSAndreas Gruenbacher 			ret = fault_in_safe_writeable(p->iov_base + skip, len);
353cdd591fcSAndreas Gruenbacher 			count -= len - ret;
354cdd591fcSAndreas Gruenbacher 			if (ret)
355cdd591fcSAndreas Gruenbacher 				break;
356cdd591fcSAndreas Gruenbacher 		}
357cdd591fcSAndreas Gruenbacher 		return count + size;
358cdd591fcSAndreas Gruenbacher 	}
359cdd591fcSAndreas Gruenbacher 	return 0;
360cdd591fcSAndreas Gruenbacher }
361cdd591fcSAndreas Gruenbacher EXPORT_SYMBOL(fault_in_iov_iter_writeable);
362cdd591fcSAndreas Gruenbacher 
363aa563d7bSDavid Howells void iov_iter_init(struct iov_iter *i, unsigned int direction,
364d879cb83SAl Viro 			const struct iovec *iov, unsigned long nr_segs,
365d879cb83SAl Viro 			size_t count)
366d879cb83SAl Viro {
367aa563d7bSDavid Howells 	WARN_ON(direction & ~(READ | WRITE));
3688cd54c1cSAl Viro 	*i = (struct iov_iter) {
3698cd54c1cSAl Viro 		.iter_type = ITER_IOVEC,
3703337ab08SAndreas Gruenbacher 		.nofault = false,
371*fcb14cb1SAl Viro 		.user_backed = true,
3728cd54c1cSAl Viro 		.data_source = direction,
3738cd54c1cSAl Viro 		.iov = iov,
3748cd54c1cSAl Viro 		.nr_segs = nr_segs,
3758cd54c1cSAl Viro 		.iov_offset = 0,
3768cd54c1cSAl Viro 		.count = count
3778cd54c1cSAl Viro 	};
378d879cb83SAl Viro }
379d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_init);
380d879cb83SAl Viro 
381241699cdSAl Viro static inline bool allocated(struct pipe_buffer *buf)
382241699cdSAl Viro {
383241699cdSAl Viro 	return buf->ops == &default_pipe_buf_ops;
384241699cdSAl Viro }
385241699cdSAl Viro 
3868cefc107SDavid Howells static inline void data_start(const struct iov_iter *i,
3878cefc107SDavid Howells 			      unsigned int *iter_headp, size_t *offp)
388241699cdSAl Viro {
3898cefc107SDavid Howells 	unsigned int p_mask = i->pipe->ring_size - 1;
3908cefc107SDavid Howells 	unsigned int iter_head = i->head;
391241699cdSAl Viro 	size_t off = i->iov_offset;
3928cefc107SDavid Howells 
3938cefc107SDavid Howells 	if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
3948cefc107SDavid Howells 		    off == PAGE_SIZE)) {
3958cefc107SDavid Howells 		iter_head++;
396241699cdSAl Viro 		off = 0;
397241699cdSAl Viro 	}
3988cefc107SDavid Howells 	*iter_headp = iter_head;
399241699cdSAl Viro 	*offp = off;
400241699cdSAl Viro }
401241699cdSAl Viro 
402241699cdSAl Viro static size_t push_pipe(struct iov_iter *i, size_t size,
4038cefc107SDavid Howells 			int *iter_headp, size_t *offp)
404241699cdSAl Viro {
405241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
4068cefc107SDavid Howells 	unsigned int p_tail = pipe->tail;
4078cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
4088cefc107SDavid Howells 	unsigned int iter_head;
409241699cdSAl Viro 	size_t off;
410241699cdSAl Viro 	ssize_t left;
411241699cdSAl Viro 
412241699cdSAl Viro 	if (unlikely(size > i->count))
413241699cdSAl Viro 		size = i->count;
414241699cdSAl Viro 	if (unlikely(!size))
415241699cdSAl Viro 		return 0;
416241699cdSAl Viro 
417241699cdSAl Viro 	left = size;
4188cefc107SDavid Howells 	data_start(i, &iter_head, &off);
4198cefc107SDavid Howells 	*iter_headp = iter_head;
420241699cdSAl Viro 	*offp = off;
421241699cdSAl Viro 	if (off) {
422241699cdSAl Viro 		left -= PAGE_SIZE - off;
423241699cdSAl Viro 		if (left <= 0) {
4248cefc107SDavid Howells 			pipe->bufs[iter_head & p_mask].len += size;
425241699cdSAl Viro 			return size;
426241699cdSAl Viro 		}
4278cefc107SDavid Howells 		pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
4288cefc107SDavid Howells 		iter_head++;
429241699cdSAl Viro 	}
4306718b6f8SDavid Howells 	while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
4318cefc107SDavid Howells 		struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
432241699cdSAl Viro 		struct page *page = alloc_page(GFP_USER);
433241699cdSAl Viro 		if (!page)
434241699cdSAl Viro 			break;
4358cefc107SDavid Howells 
4368cefc107SDavid Howells 		buf->ops = &default_pipe_buf_ops;
4379d2231c5SMax Kellermann 		buf->flags = 0;
4388cefc107SDavid Howells 		buf->page = page;
4398cefc107SDavid Howells 		buf->offset = 0;
4408cefc107SDavid Howells 		buf->len = min_t(ssize_t, left, PAGE_SIZE);
4418cefc107SDavid Howells 		left -= buf->len;
4428cefc107SDavid Howells 		iter_head++;
4438cefc107SDavid Howells 		pipe->head = iter_head;
4448cefc107SDavid Howells 
4458cefc107SDavid Howells 		if (left == 0)
446241699cdSAl Viro 			return size;
447241699cdSAl Viro 	}
448241699cdSAl Viro 	return size - left;
449241699cdSAl Viro }
450241699cdSAl Viro 
451241699cdSAl Viro static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
452241699cdSAl Viro 				struct iov_iter *i)
453241699cdSAl Viro {
454241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
4558cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
4568cefc107SDavid Howells 	unsigned int i_head;
457241699cdSAl Viro 	size_t n, off;
458241699cdSAl Viro 
459241699cdSAl Viro 	if (!sanity(i))
460241699cdSAl Viro 		return 0;
461241699cdSAl Viro 
4628cefc107SDavid Howells 	bytes = n = push_pipe(i, bytes, &i_head, &off);
463241699cdSAl Viro 	if (unlikely(!n))
464241699cdSAl Viro 		return 0;
4658cefc107SDavid Howells 	do {
466241699cdSAl Viro 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
4678cefc107SDavid Howells 		memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
4688cefc107SDavid Howells 		i->head = i_head;
469241699cdSAl Viro 		i->iov_offset = off + chunk;
470241699cdSAl Viro 		n -= chunk;
471241699cdSAl Viro 		addr += chunk;
4728cefc107SDavid Howells 		off = 0;
4738cefc107SDavid Howells 		i_head++;
4748cefc107SDavid Howells 	} while (n);
475241699cdSAl Viro 	i->count -= bytes;
476241699cdSAl Viro 	return bytes;
477241699cdSAl Viro }
478241699cdSAl Viro 
479f9152895SAl Viro static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
480f9152895SAl Viro 			      __wsum sum, size_t off)
481f9152895SAl Viro {
482cc44c17bSAl Viro 	__wsum next = csum_partial_copy_nocheck(from, to, len);
483f9152895SAl Viro 	return csum_block_add(sum, next, off);
484f9152895SAl Viro }
485f9152895SAl Viro 
48678e1f386SAl Viro static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
4876852df12SAl Viro 					 struct iov_iter *i, __wsum *sump)
48878e1f386SAl Viro {
48978e1f386SAl Viro 	struct pipe_inode_info *pipe = i->pipe;
4908cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
4916852df12SAl Viro 	__wsum sum = *sump;
4926852df12SAl Viro 	size_t off = 0;
4938cefc107SDavid Howells 	unsigned int i_head;
4946852df12SAl Viro 	size_t r;
49578e1f386SAl Viro 
49678e1f386SAl Viro 	if (!sanity(i))
49778e1f386SAl Viro 		return 0;
49878e1f386SAl Viro 
4996852df12SAl Viro 	bytes = push_pipe(i, bytes, &i_head, &r);
5006852df12SAl Viro 	while (bytes) {
5016852df12SAl Viro 		size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r);
5022495bdccSAl Viro 		char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
5036852df12SAl Viro 		sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
5042495bdccSAl Viro 		kunmap_local(p);
5058cefc107SDavid Howells 		i->head = i_head;
50678e1f386SAl Viro 		i->iov_offset = r + chunk;
5076852df12SAl Viro 		bytes -= chunk;
50878e1f386SAl Viro 		off += chunk;
5098cefc107SDavid Howells 		r = 0;
5108cefc107SDavid Howells 		i_head++;
5116852df12SAl Viro 	}
5126852df12SAl Viro 	*sump = sum;
5136852df12SAl Viro 	i->count -= off;
5146852df12SAl Viro 	return off;
51578e1f386SAl Viro }
51678e1f386SAl Viro 
517aa28de27SAl Viro size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
518d879cb83SAl Viro {
51900e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i)))
520241699cdSAl Viro 		return copy_pipe_to_iter(addr, bytes, i);
521*fcb14cb1SAl Viro 	if (user_backed_iter(i))
52209fc68dcSAl Viro 		might_fault();
5237baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, off,
5247baa5099SAl Viro 		copyout(base, addr + off, len),
5257baa5099SAl Viro 		memcpy(base, addr + off, len)
526d879cb83SAl Viro 	)
527d879cb83SAl Viro 
528d879cb83SAl Viro 	return bytes;
529d879cb83SAl Viro }
530aa28de27SAl Viro EXPORT_SYMBOL(_copy_to_iter);
531d879cb83SAl Viro 
532ec6347bbSDan Williams #ifdef CONFIG_ARCH_HAS_COPY_MC
533ec6347bbSDan Williams static int copyout_mc(void __user *to, const void *from, size_t n)
5348780356eSDan Williams {
53596d4f267SLinus Torvalds 	if (access_ok(to, n)) {
536d0ef4c36SMarco Elver 		instrument_copy_to_user(to, from, n);
537ec6347bbSDan Williams 		n = copy_mc_to_user((__force void *) to, from, n);
5388780356eSDan Williams 	}
5398780356eSDan Williams 	return n;
5408780356eSDan Williams }
5418780356eSDan Williams 
542ec6347bbSDan Williams static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
543ca146f6fSDan Williams 				struct iov_iter *i)
544ca146f6fSDan Williams {
545ca146f6fSDan Williams 	struct pipe_inode_info *pipe = i->pipe;
5468cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
5478cefc107SDavid Howells 	unsigned int i_head;
548c3497fd0SAl Viro 	unsigned int valid = pipe->head;
549ca146f6fSDan Williams 	size_t n, off, xfer = 0;
550ca146f6fSDan Williams 
551ca146f6fSDan Williams 	if (!sanity(i))
552ca146f6fSDan Williams 		return 0;
553ca146f6fSDan Williams 
5542a510a74SAl Viro 	n = push_pipe(i, bytes, &i_head, &off);
5552a510a74SAl Viro 	while (n) {
556ca146f6fSDan Williams 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
5572a510a74SAl Viro 		char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
558ca146f6fSDan Williams 		unsigned long rem;
5592a510a74SAl Viro 		rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
5602a510a74SAl Viro 		chunk -= rem;
5612a510a74SAl Viro 		kunmap_local(p);
562c3497fd0SAl Viro 		if (chunk) {
5638cefc107SDavid Howells 			i->head = i_head;
5642a510a74SAl Viro 			i->iov_offset = off + chunk;
5652a510a74SAl Viro 			xfer += chunk;
566c3497fd0SAl Viro 			valid = i_head + 1;
567c3497fd0SAl Viro 		}
568c3497fd0SAl Viro 		if (rem) {
569c3497fd0SAl Viro 			pipe->bufs[i_head & p_mask].len -= rem;
570c3497fd0SAl Viro 			pipe_discard_from(pipe, valid);
571ca146f6fSDan Williams 			break;
572c3497fd0SAl Viro 		}
573ca146f6fSDan Williams 		n -= chunk;
5748cefc107SDavid Howells 		off = 0;
5758cefc107SDavid Howells 		i_head++;
5762a510a74SAl Viro 	}
577ca146f6fSDan Williams 	i->count -= xfer;
578ca146f6fSDan Williams 	return xfer;
579ca146f6fSDan Williams }
580ca146f6fSDan Williams 
581bf3eeb9bSDan Williams /**
582ec6347bbSDan Williams  * _copy_mc_to_iter - copy to iter with source memory error exception handling
583bf3eeb9bSDan Williams  * @addr: source kernel address
584bf3eeb9bSDan Williams  * @bytes: total transfer length
58544e55997SRandy Dunlap  * @i: destination iterator
586bf3eeb9bSDan Williams  *
587ec6347bbSDan Williams  * The pmem driver deploys this for the dax operation
588ec6347bbSDan Williams  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
589ec6347bbSDan Williams  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
590ec6347bbSDan Williams  * successfully copied.
591bf3eeb9bSDan Williams  *
592ec6347bbSDan Williams  * The main differences between this and typical _copy_to_iter().
593bf3eeb9bSDan Williams  *
594bf3eeb9bSDan Williams  * * Typical tail/residue handling after a fault retries the copy
595bf3eeb9bSDan Williams  *   byte-by-byte until the fault happens again. Re-triggering machine
596bf3eeb9bSDan Williams  *   checks is potentially fatal so the implementation uses source
597bf3eeb9bSDan Williams  *   alignment and poison alignment assumptions to avoid re-triggering
598bf3eeb9bSDan Williams  *   hardware exceptions.
599bf3eeb9bSDan Williams  *
600bf3eeb9bSDan Williams  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
601bf3eeb9bSDan Williams  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
602bf3eeb9bSDan Williams  *   a short copy.
60344e55997SRandy Dunlap  *
60444e55997SRandy Dunlap  * Return: number of bytes copied (may be %0)
605bf3eeb9bSDan Williams  */
606ec6347bbSDan Williams size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
6078780356eSDan Williams {
60800e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i)))
609ec6347bbSDan Williams 		return copy_mc_pipe_to_iter(addr, bytes, i);
610*fcb14cb1SAl Viro 	if (user_backed_iter(i))
6118780356eSDan Williams 		might_fault();
6127baa5099SAl Viro 	__iterate_and_advance(i, bytes, base, len, off,
6137baa5099SAl Viro 		copyout_mc(base, addr + off, len),
6147baa5099SAl Viro 		copy_mc_to_kernel(base, addr + off, len)
6158780356eSDan Williams 	)
6168780356eSDan Williams 
6178780356eSDan Williams 	return bytes;
6188780356eSDan Williams }
619ec6347bbSDan Williams EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
620ec6347bbSDan Williams #endif /* CONFIG_ARCH_HAS_COPY_MC */
6218780356eSDan Williams 
622aa28de27SAl Viro size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
623d879cb83SAl Viro {
62400e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i))) {
625241699cdSAl Viro 		WARN_ON(1);
626241699cdSAl Viro 		return 0;
627241699cdSAl Viro 	}
628*fcb14cb1SAl Viro 	if (user_backed_iter(i))
62909fc68dcSAl Viro 		might_fault();
6307baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, off,
6317baa5099SAl Viro 		copyin(addr + off, base, len),
6327baa5099SAl Viro 		memcpy(addr + off, base, len)
633d879cb83SAl Viro 	)
634d879cb83SAl Viro 
635d879cb83SAl Viro 	return bytes;
636d879cb83SAl Viro }
637aa28de27SAl Viro EXPORT_SYMBOL(_copy_from_iter);
638d879cb83SAl Viro 
639aa28de27SAl Viro size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
640d879cb83SAl Viro {
64100e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i))) {
642241699cdSAl Viro 		WARN_ON(1);
643241699cdSAl Viro 		return 0;
644241699cdSAl Viro 	}
6457baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, off,
6467baa5099SAl Viro 		__copy_from_user_inatomic_nocache(addr + off, base, len),
6477baa5099SAl Viro 		memcpy(addr + off, base, len)
648d879cb83SAl Viro 	)
649d879cb83SAl Viro 
650d879cb83SAl Viro 	return bytes;
651d879cb83SAl Viro }
652aa28de27SAl Viro EXPORT_SYMBOL(_copy_from_iter_nocache);
653d879cb83SAl Viro 
6540aed55afSDan Williams #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
655abd08d7dSDan Williams /**
656abd08d7dSDan Williams  * _copy_from_iter_flushcache - write destination through cpu cache
657abd08d7dSDan Williams  * @addr: destination kernel address
658abd08d7dSDan Williams  * @bytes: total transfer length
65944e55997SRandy Dunlap  * @i: source iterator
660abd08d7dSDan Williams  *
661abd08d7dSDan Williams  * The pmem driver arranges for filesystem-dax to use this facility via
662abd08d7dSDan Williams  * dax_copy_from_iter() for ensuring that writes to persistent memory
663abd08d7dSDan Williams  * are flushed through the CPU cache. It is differentiated from
664abd08d7dSDan Williams  * _copy_from_iter_nocache() in that guarantees all data is flushed for
665abd08d7dSDan Williams  * all iterator types. The _copy_from_iter_nocache() only attempts to
666abd08d7dSDan Williams  * bypass the cache for the ITER_IOVEC case, and on some archs may use
667abd08d7dSDan Williams  * instructions that strand dirty-data in the cache.
66844e55997SRandy Dunlap  *
66944e55997SRandy Dunlap  * Return: number of bytes copied (may be %0)
670abd08d7dSDan Williams  */
6716a37e940SLinus Torvalds size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
6720aed55afSDan Williams {
67300e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i))) {
6740aed55afSDan Williams 		WARN_ON(1);
6750aed55afSDan Williams 		return 0;
6760aed55afSDan Williams 	}
6777baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, off,
6787baa5099SAl Viro 		__copy_from_user_flushcache(addr + off, base, len),
6797baa5099SAl Viro 		memcpy_flushcache(addr + off, base, len)
6800aed55afSDan Williams 	)
6810aed55afSDan Williams 
6820aed55afSDan Williams 	return bytes;
6830aed55afSDan Williams }
6846a37e940SLinus Torvalds EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
6850aed55afSDan Williams #endif
6860aed55afSDan Williams 
68772e809edSAl Viro static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
68872e809edSAl Viro {
6896daef95bSEric Dumazet 	struct page *head;
6906daef95bSEric Dumazet 	size_t v = n + offset;
6916daef95bSEric Dumazet 
6926daef95bSEric Dumazet 	/*
6936daef95bSEric Dumazet 	 * The general case needs to access the page order in order
6946daef95bSEric Dumazet 	 * to compute the page size.
6956daef95bSEric Dumazet 	 * However, we mostly deal with order-0 pages and thus can
6966daef95bSEric Dumazet 	 * avoid a possible cache line miss for requests that fit all
6976daef95bSEric Dumazet 	 * page orders.
6986daef95bSEric Dumazet 	 */
6996daef95bSEric Dumazet 	if (n <= v && v <= PAGE_SIZE)
7006daef95bSEric Dumazet 		return true;
7016daef95bSEric Dumazet 
7026daef95bSEric Dumazet 	head = compound_head(page);
7036daef95bSEric Dumazet 	v += (page - head) << PAGE_SHIFT;
704a90bcb86SPetar Penkov 
705a50b854eSMatthew Wilcox (Oracle) 	if (likely(n <= v && v <= (page_size(head))))
70672e809edSAl Viro 		return true;
70772e809edSAl Viro 	WARN_ON(1);
70872e809edSAl Viro 	return false;
70972e809edSAl Viro }
710cbbd26b8SAl Viro 
71108aa6479SAl Viro static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
712d879cb83SAl Viro 			 struct iov_iter *i)
713d879cb83SAl Viro {
71459bb69c6SAl Viro 	if (unlikely(iov_iter_is_pipe(i))) {
71559bb69c6SAl Viro 		return copy_page_to_iter_pipe(page, offset, bytes, i);
71659bb69c6SAl Viro 	} else {
717c1d4d6a9SAl Viro 		void *kaddr = kmap_local_page(page);
718c1d4d6a9SAl Viro 		size_t wanted = _copy_to_iter(kaddr + offset, bytes, i);
719c1d4d6a9SAl Viro 		kunmap_local(kaddr);
720d879cb83SAl Viro 		return wanted;
72128f38db7SAl Viro 	}
722d879cb83SAl Viro }
72308aa6479SAl Viro 
72408aa6479SAl Viro size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
72508aa6479SAl Viro 			 struct iov_iter *i)
72608aa6479SAl Viro {
72708aa6479SAl Viro 	size_t res = 0;
72808aa6479SAl Viro 	if (unlikely(!page_copy_sane(page, offset, bytes)))
72908aa6479SAl Viro 		return 0;
73008aa6479SAl Viro 	page += offset / PAGE_SIZE; // first subpage
73108aa6479SAl Viro 	offset %= PAGE_SIZE;
73208aa6479SAl Viro 	while (1) {
73308aa6479SAl Viro 		size_t n = __copy_page_to_iter(page, offset,
73408aa6479SAl Viro 				min(bytes, (size_t)PAGE_SIZE - offset), i);
73508aa6479SAl Viro 		res += n;
73608aa6479SAl Viro 		bytes -= n;
73708aa6479SAl Viro 		if (!bytes || !n)
73808aa6479SAl Viro 			break;
73908aa6479SAl Viro 		offset += n;
74008aa6479SAl Viro 		if (offset == PAGE_SIZE) {
74108aa6479SAl Viro 			page++;
74208aa6479SAl Viro 			offset = 0;
74308aa6479SAl Viro 		}
74408aa6479SAl Viro 	}
74508aa6479SAl Viro 	return res;
74608aa6479SAl Viro }
747d879cb83SAl Viro EXPORT_SYMBOL(copy_page_to_iter);
748d879cb83SAl Viro 
749d879cb83SAl Viro size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
750d879cb83SAl Viro 			 struct iov_iter *i)
751d879cb83SAl Viro {
75259bb69c6SAl Viro 	if (page_copy_sane(page, offset, bytes)) {
75355ca375cSAl Viro 		void *kaddr = kmap_local_page(page);
754aa28de27SAl Viro 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
75555ca375cSAl Viro 		kunmap_local(kaddr);
756d879cb83SAl Viro 		return wanted;
75728f38db7SAl Viro 	}
75828f38db7SAl Viro 	return 0;
759d879cb83SAl Viro }
760d879cb83SAl Viro EXPORT_SYMBOL(copy_page_from_iter);
761d879cb83SAl Viro 
762241699cdSAl Viro static size_t pipe_zero(size_t bytes, struct iov_iter *i)
763241699cdSAl Viro {
764241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
7658cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
7668cefc107SDavid Howells 	unsigned int i_head;
767241699cdSAl Viro 	size_t n, off;
768241699cdSAl Viro 
769241699cdSAl Viro 	if (!sanity(i))
770241699cdSAl Viro 		return 0;
771241699cdSAl Viro 
7728cefc107SDavid Howells 	bytes = n = push_pipe(i, bytes, &i_head, &off);
773241699cdSAl Viro 	if (unlikely(!n))
774241699cdSAl Viro 		return 0;
775241699cdSAl Viro 
7768cefc107SDavid Howells 	do {
777241699cdSAl Viro 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
778893839fdSAl Viro 		char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
779893839fdSAl Viro 		memset(p + off, 0, chunk);
780893839fdSAl Viro 		kunmap_local(p);
7818cefc107SDavid Howells 		i->head = i_head;
782241699cdSAl Viro 		i->iov_offset = off + chunk;
783241699cdSAl Viro 		n -= chunk;
7848cefc107SDavid Howells 		off = 0;
7858cefc107SDavid Howells 		i_head++;
7868cefc107SDavid Howells 	} while (n);
787241699cdSAl Viro 	i->count -= bytes;
788241699cdSAl Viro 	return bytes;
789241699cdSAl Viro }
790241699cdSAl Viro 
791d879cb83SAl Viro size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
792d879cb83SAl Viro {
79300e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i)))
794241699cdSAl Viro 		return pipe_zero(bytes, i);
7957baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, count,
7967baa5099SAl Viro 		clear_user(base, len),
7977baa5099SAl Viro 		memset(base, 0, len)
798d879cb83SAl Viro 	)
799d879cb83SAl Viro 
800d879cb83SAl Viro 	return bytes;
801d879cb83SAl Viro }
802d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_zero);
803d879cb83SAl Viro 
804f0b65f39SAl Viro size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
805f0b65f39SAl Viro 				  struct iov_iter *i)
806d879cb83SAl Viro {
807d879cb83SAl Viro 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
80872e809edSAl Viro 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
80972e809edSAl Viro 		kunmap_atomic(kaddr);
81072e809edSAl Viro 		return 0;
81172e809edSAl Viro 	}
8129ea9ce04SDavid Howells 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
813241699cdSAl Viro 		kunmap_atomic(kaddr);
814241699cdSAl Viro 		WARN_ON(1);
815241699cdSAl Viro 		return 0;
816241699cdSAl Viro 	}
8177baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, off,
8187baa5099SAl Viro 		copyin(p + off, base, len),
8197baa5099SAl Viro 		memcpy(p + off, base, len)
820d879cb83SAl Viro 	)
821d879cb83SAl Viro 	kunmap_atomic(kaddr);
822d879cb83SAl Viro 	return bytes;
823d879cb83SAl Viro }
824f0b65f39SAl Viro EXPORT_SYMBOL(copy_page_from_iter_atomic);
825d879cb83SAl Viro 
826b9dc6f65SAl Viro static inline void pipe_truncate(struct iov_iter *i)
827241699cdSAl Viro {
828241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
8298cefc107SDavid Howells 	unsigned int p_tail = pipe->tail;
8308cefc107SDavid Howells 	unsigned int p_head = pipe->head;
8318cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
8328cefc107SDavid Howells 
8338cefc107SDavid Howells 	if (!pipe_empty(p_head, p_tail)) {
8348cefc107SDavid Howells 		struct pipe_buffer *buf;
8358cefc107SDavid Howells 		unsigned int i_head = i->head;
836b9dc6f65SAl Viro 		size_t off = i->iov_offset;
8378cefc107SDavid Howells 
838b9dc6f65SAl Viro 		if (off) {
8398cefc107SDavid Howells 			buf = &pipe->bufs[i_head & p_mask];
8408cefc107SDavid Howells 			buf->len = off - buf->offset;
8418cefc107SDavid Howells 			i_head++;
842b9dc6f65SAl Viro 		}
8438cefc107SDavid Howells 		while (p_head != i_head) {
8448cefc107SDavid Howells 			p_head--;
8458cefc107SDavid Howells 			pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
846241699cdSAl Viro 		}
8478cefc107SDavid Howells 
8488cefc107SDavid Howells 		pipe->head = p_head;
849241699cdSAl Viro 	}
850b9dc6f65SAl Viro }
851b9dc6f65SAl Viro 
852b9dc6f65SAl Viro static void pipe_advance(struct iov_iter *i, size_t size)
853b9dc6f65SAl Viro {
854b9dc6f65SAl Viro 	struct pipe_inode_info *pipe = i->pipe;
855b9dc6f65SAl Viro 	if (size) {
856b9dc6f65SAl Viro 		struct pipe_buffer *buf;
8578cefc107SDavid Howells 		unsigned int p_mask = pipe->ring_size - 1;
8588cefc107SDavid Howells 		unsigned int i_head = i->head;
859b9dc6f65SAl Viro 		size_t off = i->iov_offset, left = size;
8608cefc107SDavid Howells 
861b9dc6f65SAl Viro 		if (off) /* make it relative to the beginning of buffer */
8628cefc107SDavid Howells 			left += off - pipe->bufs[i_head & p_mask].offset;
863b9dc6f65SAl Viro 		while (1) {
8648cefc107SDavid Howells 			buf = &pipe->bufs[i_head & p_mask];
865b9dc6f65SAl Viro 			if (left <= buf->len)
866b9dc6f65SAl Viro 				break;
867b9dc6f65SAl Viro 			left -= buf->len;
8688cefc107SDavid Howells 			i_head++;
869b9dc6f65SAl Viro 		}
8708cefc107SDavid Howells 		i->head = i_head;
871b9dc6f65SAl Viro 		i->iov_offset = buf->offset + left;
872b9dc6f65SAl Viro 	}
873b9dc6f65SAl Viro 	i->count -= size;
874b9dc6f65SAl Viro 	/* ... and discard everything past that point */
875b9dc6f65SAl Viro 	pipe_truncate(i);
876241699cdSAl Viro }
877241699cdSAl Viro 
87854c8195bSPavel Begunkov static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
87954c8195bSPavel Begunkov {
88018fa9af7SAl Viro 	const struct bio_vec *bvec, *end;
88154c8195bSPavel Begunkov 
88218fa9af7SAl Viro 	if (!i->count)
88318fa9af7SAl Viro 		return;
88418fa9af7SAl Viro 	i->count -= size;
88554c8195bSPavel Begunkov 
88618fa9af7SAl Viro 	size += i->iov_offset;
88718fa9af7SAl Viro 
88818fa9af7SAl Viro 	for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) {
88918fa9af7SAl Viro 		if (likely(size < bvec->bv_len))
89018fa9af7SAl Viro 			break;
89118fa9af7SAl Viro 		size -= bvec->bv_len;
89218fa9af7SAl Viro 	}
89318fa9af7SAl Viro 	i->iov_offset = size;
89418fa9af7SAl Viro 	i->nr_segs -= bvec - i->bvec;
89518fa9af7SAl Viro 	i->bvec = bvec;
89654c8195bSPavel Begunkov }
89754c8195bSPavel Begunkov 
898185ac4d4SAl Viro static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
899185ac4d4SAl Viro {
900185ac4d4SAl Viro 	const struct iovec *iov, *end;
901185ac4d4SAl Viro 
902185ac4d4SAl Viro 	if (!i->count)
903185ac4d4SAl Viro 		return;
904185ac4d4SAl Viro 	i->count -= size;
905185ac4d4SAl Viro 
906185ac4d4SAl Viro 	size += i->iov_offset; // from beginning of current segment
907185ac4d4SAl Viro 	for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
908185ac4d4SAl Viro 		if (likely(size < iov->iov_len))
909185ac4d4SAl Viro 			break;
910185ac4d4SAl Viro 		size -= iov->iov_len;
911185ac4d4SAl Viro 	}
912185ac4d4SAl Viro 	i->iov_offset = size;
913185ac4d4SAl Viro 	i->nr_segs -= iov - i->iov;
914185ac4d4SAl Viro 	i->iov = iov;
915185ac4d4SAl Viro }
916185ac4d4SAl Viro 
917d879cb83SAl Viro void iov_iter_advance(struct iov_iter *i, size_t size)
918d879cb83SAl Viro {
9193b3fc051SAl Viro 	if (unlikely(i->count < size))
9203b3fc051SAl Viro 		size = i->count;
921*fcb14cb1SAl Viro 	if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
922*fcb14cb1SAl Viro 		i->iov_offset += size;
923*fcb14cb1SAl Viro 		i->count -= size;
924*fcb14cb1SAl Viro 	} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
925185ac4d4SAl Viro 		/* iovec and kvec have identical layouts */
926185ac4d4SAl Viro 		iov_iter_iovec_advance(i, size);
927185ac4d4SAl Viro 	} else if (iov_iter_is_bvec(i)) {
928185ac4d4SAl Viro 		iov_iter_bvec_advance(i, size);
929185ac4d4SAl Viro 	} else if (iov_iter_is_pipe(i)) {
930241699cdSAl Viro 		pipe_advance(i, size);
931185ac4d4SAl Viro 	} else if (iov_iter_is_discard(i)) {
932185ac4d4SAl Viro 		i->count -= size;
9337ff50620SDavid Howells 	}
934d879cb83SAl Viro }
935d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_advance);
936d879cb83SAl Viro 
93727c0e374SAl Viro void iov_iter_revert(struct iov_iter *i, size_t unroll)
93827c0e374SAl Viro {
93927c0e374SAl Viro 	if (!unroll)
94027c0e374SAl Viro 		return;
9415b47d59aSAl Viro 	if (WARN_ON(unroll > MAX_RW_COUNT))
9425b47d59aSAl Viro 		return;
94327c0e374SAl Viro 	i->count += unroll;
94400e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(i))) {
94527c0e374SAl Viro 		struct pipe_inode_info *pipe = i->pipe;
9468cefc107SDavid Howells 		unsigned int p_mask = pipe->ring_size - 1;
9478cefc107SDavid Howells 		unsigned int i_head = i->head;
94827c0e374SAl Viro 		size_t off = i->iov_offset;
94927c0e374SAl Viro 		while (1) {
9508cefc107SDavid Howells 			struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
9518cefc107SDavid Howells 			size_t n = off - b->offset;
95227c0e374SAl Viro 			if (unroll < n) {
9534fa55cefSAl Viro 				off -= unroll;
95427c0e374SAl Viro 				break;
95527c0e374SAl Viro 			}
95627c0e374SAl Viro 			unroll -= n;
9578cefc107SDavid Howells 			if (!unroll && i_head == i->start_head) {
95827c0e374SAl Viro 				off = 0;
95927c0e374SAl Viro 				break;
96027c0e374SAl Viro 			}
9618cefc107SDavid Howells 			i_head--;
9628cefc107SDavid Howells 			b = &pipe->bufs[i_head & p_mask];
9638cefc107SDavid Howells 			off = b->offset + b->len;
96427c0e374SAl Viro 		}
96527c0e374SAl Viro 		i->iov_offset = off;
9668cefc107SDavid Howells 		i->head = i_head;
96727c0e374SAl Viro 		pipe_truncate(i);
96827c0e374SAl Viro 		return;
96927c0e374SAl Viro 	}
9709ea9ce04SDavid Howells 	if (unlikely(iov_iter_is_discard(i)))
9719ea9ce04SDavid Howells 		return;
97227c0e374SAl Viro 	if (unroll <= i->iov_offset) {
97327c0e374SAl Viro 		i->iov_offset -= unroll;
97427c0e374SAl Viro 		return;
97527c0e374SAl Viro 	}
97627c0e374SAl Viro 	unroll -= i->iov_offset;
977*fcb14cb1SAl Viro 	if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
9787ff50620SDavid Howells 		BUG(); /* We should never go beyond the start of the specified
9797ff50620SDavid Howells 			* range since we might then be straying into pages that
9807ff50620SDavid Howells 			* aren't pinned.
9817ff50620SDavid Howells 			*/
9827ff50620SDavid Howells 	} else if (iov_iter_is_bvec(i)) {
98327c0e374SAl Viro 		const struct bio_vec *bvec = i->bvec;
98427c0e374SAl Viro 		while (1) {
98527c0e374SAl Viro 			size_t n = (--bvec)->bv_len;
98627c0e374SAl Viro 			i->nr_segs++;
98727c0e374SAl Viro 			if (unroll <= n) {
98827c0e374SAl Viro 				i->bvec = bvec;
98927c0e374SAl Viro 				i->iov_offset = n - unroll;
99027c0e374SAl Viro 				return;
99127c0e374SAl Viro 			}
99227c0e374SAl Viro 			unroll -= n;
99327c0e374SAl Viro 		}
99427c0e374SAl Viro 	} else { /* same logics for iovec and kvec */
99527c0e374SAl Viro 		const struct iovec *iov = i->iov;
99627c0e374SAl Viro 		while (1) {
99727c0e374SAl Viro 			size_t n = (--iov)->iov_len;
99827c0e374SAl Viro 			i->nr_segs++;
99927c0e374SAl Viro 			if (unroll <= n) {
100027c0e374SAl Viro 				i->iov = iov;
100127c0e374SAl Viro 				i->iov_offset = n - unroll;
100227c0e374SAl Viro 				return;
100327c0e374SAl Viro 			}
100427c0e374SAl Viro 			unroll -= n;
100527c0e374SAl Viro 		}
100627c0e374SAl Viro 	}
100727c0e374SAl Viro }
100827c0e374SAl Viro EXPORT_SYMBOL(iov_iter_revert);
100927c0e374SAl Viro 
1010d879cb83SAl Viro /*
1011d879cb83SAl Viro  * Return the count of just the current iov_iter segment.
1012d879cb83SAl Viro  */
1013d879cb83SAl Viro size_t iov_iter_single_seg_count(const struct iov_iter *i)
1014d879cb83SAl Viro {
101528f38db7SAl Viro 	if (i->nr_segs > 1) {
101628f38db7SAl Viro 		if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
101728f38db7SAl Viro 			return min(i->count, i->iov->iov_len - i->iov_offset);
10187ff50620SDavid Howells 		if (iov_iter_is_bvec(i))
1019d879cb83SAl Viro 			return min(i->count, i->bvec->bv_len - i->iov_offset);
102028f38db7SAl Viro 	}
102128f38db7SAl Viro 	return i->count;
1022d879cb83SAl Viro }
1023d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_single_seg_count);
1024d879cb83SAl Viro 
1025aa563d7bSDavid Howells void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1026d879cb83SAl Viro 			const struct kvec *kvec, unsigned long nr_segs,
1027d879cb83SAl Viro 			size_t count)
1028d879cb83SAl Viro {
1029aa563d7bSDavid Howells 	WARN_ON(direction & ~(READ | WRITE));
10308cd54c1cSAl Viro 	*i = (struct iov_iter){
10318cd54c1cSAl Viro 		.iter_type = ITER_KVEC,
10328cd54c1cSAl Viro 		.data_source = direction,
10338cd54c1cSAl Viro 		.kvec = kvec,
10348cd54c1cSAl Viro 		.nr_segs = nr_segs,
10358cd54c1cSAl Viro 		.iov_offset = 0,
10368cd54c1cSAl Viro 		.count = count
10378cd54c1cSAl Viro 	};
1038d879cb83SAl Viro }
1039d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_kvec);
1040d879cb83SAl Viro 
1041aa563d7bSDavid Howells void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1042d879cb83SAl Viro 			const struct bio_vec *bvec, unsigned long nr_segs,
1043d879cb83SAl Viro 			size_t count)
1044d879cb83SAl Viro {
1045aa563d7bSDavid Howells 	WARN_ON(direction & ~(READ | WRITE));
10468cd54c1cSAl Viro 	*i = (struct iov_iter){
10478cd54c1cSAl Viro 		.iter_type = ITER_BVEC,
10488cd54c1cSAl Viro 		.data_source = direction,
10498cd54c1cSAl Viro 		.bvec = bvec,
10508cd54c1cSAl Viro 		.nr_segs = nr_segs,
10518cd54c1cSAl Viro 		.iov_offset = 0,
10528cd54c1cSAl Viro 		.count = count
10538cd54c1cSAl Viro 	};
1054d879cb83SAl Viro }
1055d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_bvec);
1056d879cb83SAl Viro 
1057aa563d7bSDavid Howells void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1058241699cdSAl Viro 			struct pipe_inode_info *pipe,
1059241699cdSAl Viro 			size_t count)
1060241699cdSAl Viro {
1061aa563d7bSDavid Howells 	BUG_ON(direction != READ);
10628cefc107SDavid Howells 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
10638cd54c1cSAl Viro 	*i = (struct iov_iter){
10648cd54c1cSAl Viro 		.iter_type = ITER_PIPE,
10658cd54c1cSAl Viro 		.data_source = false,
10668cd54c1cSAl Viro 		.pipe = pipe,
10678cd54c1cSAl Viro 		.head = pipe->head,
10688cd54c1cSAl Viro 		.start_head = pipe->head,
10698cd54c1cSAl Viro 		.iov_offset = 0,
10708cd54c1cSAl Viro 		.count = count
10718cd54c1cSAl Viro 	};
1072241699cdSAl Viro }
1073241699cdSAl Viro EXPORT_SYMBOL(iov_iter_pipe);
1074241699cdSAl Viro 
10759ea9ce04SDavid Howells /**
10767ff50620SDavid Howells  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
10777ff50620SDavid Howells  * @i: The iterator to initialise.
10787ff50620SDavid Howells  * @direction: The direction of the transfer.
10797ff50620SDavid Howells  * @xarray: The xarray to access.
10807ff50620SDavid Howells  * @start: The start file position.
10817ff50620SDavid Howells  * @count: The size of the I/O buffer in bytes.
10827ff50620SDavid Howells  *
10837ff50620SDavid Howells  * Set up an I/O iterator to either draw data out of the pages attached to an
10847ff50620SDavid Howells  * inode or to inject data into those pages.  The pages *must* be prevented
10857ff50620SDavid Howells  * from evaporation, either by taking a ref on them or locking them by the
10867ff50620SDavid Howells  * caller.
10877ff50620SDavid Howells  */
10887ff50620SDavid Howells void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
10897ff50620SDavid Howells 		     struct xarray *xarray, loff_t start, size_t count)
10907ff50620SDavid Howells {
10917ff50620SDavid Howells 	BUG_ON(direction & ~1);
10928cd54c1cSAl Viro 	*i = (struct iov_iter) {
10938cd54c1cSAl Viro 		.iter_type = ITER_XARRAY,
10948cd54c1cSAl Viro 		.data_source = direction,
10958cd54c1cSAl Viro 		.xarray = xarray,
10968cd54c1cSAl Viro 		.xarray_start = start,
10978cd54c1cSAl Viro 		.count = count,
10988cd54c1cSAl Viro 		.iov_offset = 0
10998cd54c1cSAl Viro 	};
11007ff50620SDavid Howells }
11017ff50620SDavid Howells EXPORT_SYMBOL(iov_iter_xarray);
11027ff50620SDavid Howells 
11037ff50620SDavid Howells /**
11049ea9ce04SDavid Howells  * iov_iter_discard - Initialise an I/O iterator that discards data
11059ea9ce04SDavid Howells  * @i: The iterator to initialise.
11069ea9ce04SDavid Howells  * @direction: The direction of the transfer.
11079ea9ce04SDavid Howells  * @count: The size of the I/O buffer in bytes.
11089ea9ce04SDavid Howells  *
11099ea9ce04SDavid Howells  * Set up an I/O iterator that just discards everything that's written to it.
11109ea9ce04SDavid Howells  * It's only available as a READ iterator.
11119ea9ce04SDavid Howells  */
11129ea9ce04SDavid Howells void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
11139ea9ce04SDavid Howells {
11149ea9ce04SDavid Howells 	BUG_ON(direction != READ);
11158cd54c1cSAl Viro 	*i = (struct iov_iter){
11168cd54c1cSAl Viro 		.iter_type = ITER_DISCARD,
11178cd54c1cSAl Viro 		.data_source = false,
11188cd54c1cSAl Viro 		.count = count,
11198cd54c1cSAl Viro 		.iov_offset = 0
11208cd54c1cSAl Viro 	};
11219ea9ce04SDavid Howells }
11229ea9ce04SDavid Howells EXPORT_SYMBOL(iov_iter_discard);
11239ea9ce04SDavid Howells 
1124cfa320f7SKeith Busch static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
1125cfa320f7SKeith Busch 				   unsigned len_mask)
1126cfa320f7SKeith Busch {
1127cfa320f7SKeith Busch 	size_t size = i->count;
1128cfa320f7SKeith Busch 	size_t skip = i->iov_offset;
1129cfa320f7SKeith Busch 	unsigned k;
1130cfa320f7SKeith Busch 
1131cfa320f7SKeith Busch 	for (k = 0; k < i->nr_segs; k++, skip = 0) {
1132cfa320f7SKeith Busch 		size_t len = i->iov[k].iov_len - skip;
1133cfa320f7SKeith Busch 
1134cfa320f7SKeith Busch 		if (len > size)
1135cfa320f7SKeith Busch 			len = size;
1136cfa320f7SKeith Busch 		if (len & len_mask)
1137cfa320f7SKeith Busch 			return false;
1138cfa320f7SKeith Busch 		if ((unsigned long)(i->iov[k].iov_base + skip) & addr_mask)
1139cfa320f7SKeith Busch 			return false;
1140cfa320f7SKeith Busch 
1141cfa320f7SKeith Busch 		size -= len;
1142cfa320f7SKeith Busch 		if (!size)
1143cfa320f7SKeith Busch 			break;
1144cfa320f7SKeith Busch 	}
1145cfa320f7SKeith Busch 	return true;
1146cfa320f7SKeith Busch }
1147cfa320f7SKeith Busch 
1148cfa320f7SKeith Busch static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
1149cfa320f7SKeith Busch 				  unsigned len_mask)
1150cfa320f7SKeith Busch {
1151cfa320f7SKeith Busch 	size_t size = i->count;
1152cfa320f7SKeith Busch 	unsigned skip = i->iov_offset;
1153cfa320f7SKeith Busch 	unsigned k;
1154cfa320f7SKeith Busch 
1155cfa320f7SKeith Busch 	for (k = 0; k < i->nr_segs; k++, skip = 0) {
1156cfa320f7SKeith Busch 		size_t len = i->bvec[k].bv_len - skip;
1157cfa320f7SKeith Busch 
1158cfa320f7SKeith Busch 		if (len > size)
1159cfa320f7SKeith Busch 			len = size;
1160cfa320f7SKeith Busch 		if (len & len_mask)
1161cfa320f7SKeith Busch 			return false;
1162cfa320f7SKeith Busch 		if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
1163cfa320f7SKeith Busch 			return false;
1164cfa320f7SKeith Busch 
1165cfa320f7SKeith Busch 		size -= len;
1166cfa320f7SKeith Busch 		if (!size)
1167cfa320f7SKeith Busch 			break;
1168cfa320f7SKeith Busch 	}
1169cfa320f7SKeith Busch 	return true;
1170cfa320f7SKeith Busch }
1171cfa320f7SKeith Busch 
1172cfa320f7SKeith Busch /**
1173cfa320f7SKeith Busch  * iov_iter_is_aligned() - Check if the addresses and lengths of each segments
1174cfa320f7SKeith Busch  * 	are aligned to the parameters.
1175cfa320f7SKeith Busch  *
1176cfa320f7SKeith Busch  * @i: &struct iov_iter to restore
1177cfa320f7SKeith Busch  * @addr_mask: bit mask to check against the iov element's addresses
1178cfa320f7SKeith Busch  * @len_mask: bit mask to check against the iov element's lengths
1179cfa320f7SKeith Busch  *
1180cfa320f7SKeith Busch  * Return: false if any addresses or lengths intersect with the provided masks
1181cfa320f7SKeith Busch  */
1182cfa320f7SKeith Busch bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
1183cfa320f7SKeith Busch 			 unsigned len_mask)
1184cfa320f7SKeith Busch {
1185*fcb14cb1SAl Viro 	if (likely(iter_is_ubuf(i))) {
1186*fcb14cb1SAl Viro 		if (i->count & len_mask)
1187*fcb14cb1SAl Viro 			return false;
1188*fcb14cb1SAl Viro 		if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
1189*fcb14cb1SAl Viro 			return false;
1190*fcb14cb1SAl Viro 		return true;
1191*fcb14cb1SAl Viro 	}
1192*fcb14cb1SAl Viro 
1193cfa320f7SKeith Busch 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1194cfa320f7SKeith Busch 		return iov_iter_aligned_iovec(i, addr_mask, len_mask);
1195cfa320f7SKeith Busch 
1196cfa320f7SKeith Busch 	if (iov_iter_is_bvec(i))
1197cfa320f7SKeith Busch 		return iov_iter_aligned_bvec(i, addr_mask, len_mask);
1198cfa320f7SKeith Busch 
1199cfa320f7SKeith Busch 	if (iov_iter_is_pipe(i)) {
1200cfa320f7SKeith Busch 		unsigned int p_mask = i->pipe->ring_size - 1;
1201cfa320f7SKeith Busch 		size_t size = i->count;
1202cfa320f7SKeith Busch 
1203cfa320f7SKeith Busch 		if (size & len_mask)
1204cfa320f7SKeith Busch 			return false;
1205cfa320f7SKeith Busch 		if (size && allocated(&i->pipe->bufs[i->head & p_mask])) {
1206cfa320f7SKeith Busch 			if (i->iov_offset & addr_mask)
1207cfa320f7SKeith Busch 				return false;
1208cfa320f7SKeith Busch 		}
1209cfa320f7SKeith Busch 
1210cfa320f7SKeith Busch 		return true;
1211cfa320f7SKeith Busch 	}
1212cfa320f7SKeith Busch 
1213cfa320f7SKeith Busch 	if (iov_iter_is_xarray(i)) {
1214cfa320f7SKeith Busch 		if (i->count & len_mask)
1215cfa320f7SKeith Busch 			return false;
1216cfa320f7SKeith Busch 		if ((i->xarray_start + i->iov_offset) & addr_mask)
1217cfa320f7SKeith Busch 			return false;
1218cfa320f7SKeith Busch 	}
1219cfa320f7SKeith Busch 
1220cfa320f7SKeith Busch 	return true;
1221cfa320f7SKeith Busch }
1222cfa320f7SKeith Busch EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
1223cfa320f7SKeith Busch 
12249221d2e3SAl Viro static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
1225d879cb83SAl Viro {
1226d879cb83SAl Viro 	unsigned long res = 0;
1227d879cb83SAl Viro 	size_t size = i->count;
12289221d2e3SAl Viro 	size_t skip = i->iov_offset;
12299221d2e3SAl Viro 	unsigned k;
1230d879cb83SAl Viro 
12319221d2e3SAl Viro 	for (k = 0; k < i->nr_segs; k++, skip = 0) {
12329221d2e3SAl Viro 		size_t len = i->iov[k].iov_len - skip;
12339221d2e3SAl Viro 		if (len) {
12349221d2e3SAl Viro 			res |= (unsigned long)i->iov[k].iov_base + skip;
12359221d2e3SAl Viro 			if (len > size)
12369221d2e3SAl Viro 				len = size;
12379221d2e3SAl Viro 			res |= len;
12389221d2e3SAl Viro 			size -= len;
12399221d2e3SAl Viro 			if (!size)
12409221d2e3SAl Viro 				break;
12419221d2e3SAl Viro 		}
12429221d2e3SAl Viro 	}
12439221d2e3SAl Viro 	return res;
12449221d2e3SAl Viro }
12459221d2e3SAl Viro 
12469221d2e3SAl Viro static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
12479221d2e3SAl Viro {
12489221d2e3SAl Viro 	unsigned res = 0;
12499221d2e3SAl Viro 	size_t size = i->count;
12509221d2e3SAl Viro 	unsigned skip = i->iov_offset;
12519221d2e3SAl Viro 	unsigned k;
12529221d2e3SAl Viro 
12539221d2e3SAl Viro 	for (k = 0; k < i->nr_segs; k++, skip = 0) {
12549221d2e3SAl Viro 		size_t len = i->bvec[k].bv_len - skip;
12559221d2e3SAl Viro 		res |= (unsigned long)i->bvec[k].bv_offset + skip;
12569221d2e3SAl Viro 		if (len > size)
12579221d2e3SAl Viro 			len = size;
12589221d2e3SAl Viro 		res |= len;
12599221d2e3SAl Viro 		size -= len;
12609221d2e3SAl Viro 		if (!size)
12619221d2e3SAl Viro 			break;
12629221d2e3SAl Viro 	}
12639221d2e3SAl Viro 	return res;
12649221d2e3SAl Viro }
12659221d2e3SAl Viro 
12669221d2e3SAl Viro unsigned long iov_iter_alignment(const struct iov_iter *i)
12679221d2e3SAl Viro {
1268*fcb14cb1SAl Viro 	if (likely(iter_is_ubuf(i))) {
1269*fcb14cb1SAl Viro 		size_t size = i->count;
1270*fcb14cb1SAl Viro 		if (size)
1271*fcb14cb1SAl Viro 			return ((unsigned long)i->ubuf + i->iov_offset) | size;
1272*fcb14cb1SAl Viro 		return 0;
1273*fcb14cb1SAl Viro 	}
1274*fcb14cb1SAl Viro 
12759221d2e3SAl Viro 	/* iovec and kvec have identical layouts */
12769221d2e3SAl Viro 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
12779221d2e3SAl Viro 		return iov_iter_alignment_iovec(i);
12789221d2e3SAl Viro 
12799221d2e3SAl Viro 	if (iov_iter_is_bvec(i))
12809221d2e3SAl Viro 		return iov_iter_alignment_bvec(i);
12819221d2e3SAl Viro 
12829221d2e3SAl Viro 	if (iov_iter_is_pipe(i)) {
1283e0ff126eSJan Kara 		unsigned int p_mask = i->pipe->ring_size - 1;
12849221d2e3SAl Viro 		size_t size = i->count;
1285e0ff126eSJan Kara 
12868cefc107SDavid Howells 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1287241699cdSAl Viro 			return size | i->iov_offset;
1288241699cdSAl Viro 		return size;
1289241699cdSAl Viro 	}
12909221d2e3SAl Viro 
12919221d2e3SAl Viro 	if (iov_iter_is_xarray(i))
12923d14ec1fSDavid Howells 		return (i->xarray_start + i->iov_offset) | i->count;
12939221d2e3SAl Viro 
12949221d2e3SAl Viro 	return 0;
1295d879cb83SAl Viro }
1296d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_alignment);
1297d879cb83SAl Viro 
1298357f435dSAl Viro unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1299357f435dSAl Viro {
1300357f435dSAl Viro 	unsigned long res = 0;
1301610c7a71SAl Viro 	unsigned long v = 0;
1302357f435dSAl Viro 	size_t size = i->count;
1303610c7a71SAl Viro 	unsigned k;
1304357f435dSAl Viro 
1305*fcb14cb1SAl Viro 	if (iter_is_ubuf(i))
1306*fcb14cb1SAl Viro 		return 0;
1307*fcb14cb1SAl Viro 
1308610c7a71SAl Viro 	if (WARN_ON(!iter_is_iovec(i)))
1309241699cdSAl Viro 		return ~0U;
1310241699cdSAl Viro 
1311610c7a71SAl Viro 	for (k = 0; k < i->nr_segs; k++) {
1312610c7a71SAl Viro 		if (i->iov[k].iov_len) {
1313610c7a71SAl Viro 			unsigned long base = (unsigned long)i->iov[k].iov_base;
1314610c7a71SAl Viro 			if (v) // if not the first one
1315610c7a71SAl Viro 				res |= base | v; // this start | previous end
1316610c7a71SAl Viro 			v = base + i->iov[k].iov_len;
1317610c7a71SAl Viro 			if (size <= i->iov[k].iov_len)
1318610c7a71SAl Viro 				break;
1319610c7a71SAl Viro 			size -= i->iov[k].iov_len;
1320610c7a71SAl Viro 		}
1321610c7a71SAl Viro 	}
1322357f435dSAl Viro 	return res;
1323357f435dSAl Viro }
1324357f435dSAl Viro EXPORT_SYMBOL(iov_iter_gap_alignment);
1325357f435dSAl Viro 
1326e76b6312SIlya Dryomov static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1327241699cdSAl Viro 				size_t maxsize,
1328241699cdSAl Viro 				struct page **pages,
13298cefc107SDavid Howells 				int iter_head,
1330241699cdSAl Viro 				size_t *start)
1331241699cdSAl Viro {
1332241699cdSAl Viro 	struct pipe_inode_info *pipe = i->pipe;
13338cefc107SDavid Howells 	unsigned int p_mask = pipe->ring_size - 1;
13348cefc107SDavid Howells 	ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1335241699cdSAl Viro 	if (!n)
1336241699cdSAl Viro 		return -EFAULT;
1337241699cdSAl Viro 
1338241699cdSAl Viro 	maxsize = n;
1339241699cdSAl Viro 	n += *start;
13401689c73aSAl Viro 	while (n > 0) {
13418cefc107SDavid Howells 		get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
13428cefc107SDavid Howells 		iter_head++;
1343241699cdSAl Viro 		n -= PAGE_SIZE;
1344241699cdSAl Viro 	}
1345241699cdSAl Viro 
1346241699cdSAl Viro 	return maxsize;
1347241699cdSAl Viro }
1348241699cdSAl Viro 
1349241699cdSAl Viro static ssize_t pipe_get_pages(struct iov_iter *i,
1350241699cdSAl Viro 		   struct page **pages, size_t maxsize, unsigned maxpages,
1351241699cdSAl Viro 		   size_t *start)
1352241699cdSAl Viro {
13538cefc107SDavid Howells 	unsigned int iter_head, npages;
1354241699cdSAl Viro 	size_t capacity;
1355241699cdSAl Viro 
1356241699cdSAl Viro 	if (!sanity(i))
1357241699cdSAl Viro 		return -EFAULT;
1358241699cdSAl Viro 
13598cefc107SDavid Howells 	data_start(i, &iter_head, start);
13608cefc107SDavid Howells 	/* Amount of free space: some of this one + all after this one */
13618cefc107SDavid Howells 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1362241699cdSAl Viro 	capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1363241699cdSAl Viro 
13648cefc107SDavid Howells 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1365241699cdSAl Viro }
1366241699cdSAl Viro 
13677ff50620SDavid Howells static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
13687ff50620SDavid Howells 					  pgoff_t index, unsigned int nr_pages)
13697ff50620SDavid Howells {
13707ff50620SDavid Howells 	XA_STATE(xas, xa, index);
13717ff50620SDavid Howells 	struct page *page;
13727ff50620SDavid Howells 	unsigned int ret = 0;
13737ff50620SDavid Howells 
13747ff50620SDavid Howells 	rcu_read_lock();
13757ff50620SDavid Howells 	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
13767ff50620SDavid Howells 		if (xas_retry(&xas, page))
13777ff50620SDavid Howells 			continue;
13787ff50620SDavid Howells 
13797ff50620SDavid Howells 		/* Has the page moved or been split? */
13807ff50620SDavid Howells 		if (unlikely(page != xas_reload(&xas))) {
13817ff50620SDavid Howells 			xas_reset(&xas);
13827ff50620SDavid Howells 			continue;
13837ff50620SDavid Howells 		}
13847ff50620SDavid Howells 
13857ff50620SDavid Howells 		pages[ret] = find_subpage(page, xas.xa_index);
13867ff50620SDavid Howells 		get_page(pages[ret]);
13877ff50620SDavid Howells 		if (++ret == nr_pages)
13887ff50620SDavid Howells 			break;
13897ff50620SDavid Howells 	}
13907ff50620SDavid Howells 	rcu_read_unlock();
13917ff50620SDavid Howells 	return ret;
13927ff50620SDavid Howells }
13937ff50620SDavid Howells 
13947ff50620SDavid Howells static ssize_t iter_xarray_get_pages(struct iov_iter *i,
13957ff50620SDavid Howells 				     struct page **pages, size_t maxsize,
13967ff50620SDavid Howells 				     unsigned maxpages, size_t *_start_offset)
13977ff50620SDavid Howells {
13987ff50620SDavid Howells 	unsigned nr, offset;
13997ff50620SDavid Howells 	pgoff_t index, count;
14006c776766SDavid Howells 	size_t size = maxsize;
14017ff50620SDavid Howells 	loff_t pos;
14027ff50620SDavid Howells 
14037ff50620SDavid Howells 	if (!size || !maxpages)
14047ff50620SDavid Howells 		return 0;
14057ff50620SDavid Howells 
14067ff50620SDavid Howells 	pos = i->xarray_start + i->iov_offset;
14077ff50620SDavid Howells 	index = pos >> PAGE_SHIFT;
14087ff50620SDavid Howells 	offset = pos & ~PAGE_MASK;
14097ff50620SDavid Howells 	*_start_offset = offset;
14107ff50620SDavid Howells 
14117ff50620SDavid Howells 	count = 1;
14127ff50620SDavid Howells 	if (size > PAGE_SIZE - offset) {
14137ff50620SDavid Howells 		size -= PAGE_SIZE - offset;
14147ff50620SDavid Howells 		count += size >> PAGE_SHIFT;
14157ff50620SDavid Howells 		size &= ~PAGE_MASK;
14167ff50620SDavid Howells 		if (size)
14177ff50620SDavid Howells 			count++;
14187ff50620SDavid Howells 	}
14197ff50620SDavid Howells 
14207ff50620SDavid Howells 	if (count > maxpages)
14217ff50620SDavid Howells 		count = maxpages;
14227ff50620SDavid Howells 
14237ff50620SDavid Howells 	nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
14247ff50620SDavid Howells 	if (nr == 0)
14257ff50620SDavid Howells 		return 0;
14267ff50620SDavid Howells 
14271c27f1fcSLinus Torvalds 	return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
14287ff50620SDavid Howells }
14297ff50620SDavid Howells 
1430*fcb14cb1SAl Viro /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
1431dd45ab9dSAl Viro static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
14323d671ca6SAl Viro {
14333d671ca6SAl Viro 	size_t skip;
14343d671ca6SAl Viro 	long k;
14353d671ca6SAl Viro 
1436*fcb14cb1SAl Viro 	if (iter_is_ubuf(i))
1437*fcb14cb1SAl Viro 		return (unsigned long)i->ubuf + i->iov_offset;
1438*fcb14cb1SAl Viro 
14393d671ca6SAl Viro 	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
14403d671ca6SAl Viro 		size_t len = i->iov[k].iov_len - skip;
14413d671ca6SAl Viro 
14423d671ca6SAl Viro 		if (unlikely(!len))
14433d671ca6SAl Viro 			continue;
144459dbd7d0SAl Viro 		if (*size > len)
14453d671ca6SAl Viro 			*size = len;
1446dd45ab9dSAl Viro 		return (unsigned long)i->iov[k].iov_base + skip;
14473d671ca6SAl Viro 	}
14483d671ca6SAl Viro 	BUG(); // if it had been empty, we wouldn't get called
14493d671ca6SAl Viro }
14503d671ca6SAl Viro 
14513d671ca6SAl Viro /* must be done on non-empty ITER_BVEC one */
14523d671ca6SAl Viro static struct page *first_bvec_segment(const struct iov_iter *i,
145359dbd7d0SAl Viro 				       size_t *size, size_t *start)
14543d671ca6SAl Viro {
14553d671ca6SAl Viro 	struct page *page;
14563d671ca6SAl Viro 	size_t skip = i->iov_offset, len;
14573d671ca6SAl Viro 
14583d671ca6SAl Viro 	len = i->bvec->bv_len - skip;
145959dbd7d0SAl Viro 	if (*size > len)
146059dbd7d0SAl Viro 		*size = len;
14613d671ca6SAl Viro 	skip += i->bvec->bv_offset;
14623d671ca6SAl Viro 	page = i->bvec->bv_page + skip / PAGE_SIZE;
1463dda8e5d1SAl Viro 	*start = skip % PAGE_SIZE;
14643d671ca6SAl Viro 	return page;
14653d671ca6SAl Viro }
14663d671ca6SAl Viro 
1467d879cb83SAl Viro ssize_t iov_iter_get_pages(struct iov_iter *i,
1468d879cb83SAl Viro 		   struct page **pages, size_t maxsize, unsigned maxpages,
1469d879cb83SAl Viro 		   size_t *start)
1470d879cb83SAl Viro {
14713d671ca6SAl Viro 	int n, res;
14723d671ca6SAl Viro 
1473d879cb83SAl Viro 	if (maxsize > i->count)
1474d879cb83SAl Viro 		maxsize = i->count;
14753d671ca6SAl Viro 	if (!maxsize)
14763d671ca6SAl Viro 		return 0;
14777392ed17SAl Viro 	if (maxsize > MAX_RW_COUNT)
14787392ed17SAl Viro 		maxsize = MAX_RW_COUNT;
1479d879cb83SAl Viro 
1480*fcb14cb1SAl Viro 	if (likely(user_backed_iter(i))) {
14813337ab08SAndreas Gruenbacher 		unsigned int gup_flags = 0;
14823d671ca6SAl Viro 		unsigned long addr;
14839ea9ce04SDavid Howells 
14843337ab08SAndreas Gruenbacher 		if (iov_iter_rw(i) != WRITE)
14853337ab08SAndreas Gruenbacher 			gup_flags |= FOLL_WRITE;
14863337ab08SAndreas Gruenbacher 		if (i->nofault)
14873337ab08SAndreas Gruenbacher 			gup_flags |= FOLL_NOFAULT;
14883337ab08SAndreas Gruenbacher 
1489dd45ab9dSAl Viro 		addr = first_iovec_segment(i, &maxsize);
1490dd45ab9dSAl Viro 		*start = addr % PAGE_SIZE;
1491dd45ab9dSAl Viro 		addr &= PAGE_MASK;
149259dbd7d0SAl Viro 		n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1493dda8e5d1SAl Viro 		if (n > maxpages)
1494dda8e5d1SAl Viro 			n = maxpages;
14953337ab08SAndreas Gruenbacher 		res = get_user_pages_fast(addr, n, gup_flags, pages);
1496814a6674SAndreas Gruenbacher 		if (unlikely(res <= 0))
1497d879cb83SAl Viro 			return res;
149859dbd7d0SAl Viro 		return min_t(size_t, maxsize, res * PAGE_SIZE - *start);
14993d671ca6SAl Viro 	}
15003d671ca6SAl Viro 	if (iov_iter_is_bvec(i)) {
15013d671ca6SAl Viro 		struct page *page;
15023d671ca6SAl Viro 
150359dbd7d0SAl Viro 		page = first_bvec_segment(i, &maxsize, start);
150459dbd7d0SAl Viro 		n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1505dda8e5d1SAl Viro 		if (n > maxpages)
1506dda8e5d1SAl Viro 			n = maxpages;
1507dda8e5d1SAl Viro 		for (int k = 0; k < n; k++)
15083d671ca6SAl Viro 			get_page(*pages++ = page++);
150959dbd7d0SAl Viro 		return min_t(size_t, maxsize, n * PAGE_SIZE - *start);
15103d671ca6SAl Viro 	}
15113d671ca6SAl Viro 	if (iov_iter_is_pipe(i))
15123d671ca6SAl Viro 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
15133d671ca6SAl Viro 	if (iov_iter_is_xarray(i))
15143d671ca6SAl Viro 		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1515d879cb83SAl Viro 	return -EFAULT;
1516d879cb83SAl Viro }
1517d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_get_pages);
1518d879cb83SAl Viro 
1519d879cb83SAl Viro static struct page **get_pages_array(size_t n)
1520d879cb83SAl Viro {
1521752ade68SMichal Hocko 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1522d879cb83SAl Viro }
1523d879cb83SAl Viro 
1524241699cdSAl Viro static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1525241699cdSAl Viro 		   struct page ***pages, size_t maxsize,
1526241699cdSAl Viro 		   size_t *start)
1527241699cdSAl Viro {
1528241699cdSAl Viro 	struct page **p;
15298cefc107SDavid Howells 	unsigned int iter_head, npages;
1530d7760d63SIlya Dryomov 	ssize_t n;
1531241699cdSAl Viro 
1532241699cdSAl Viro 	if (!sanity(i))
1533241699cdSAl Viro 		return -EFAULT;
1534241699cdSAl Viro 
15358cefc107SDavid Howells 	data_start(i, &iter_head, start);
15368cefc107SDavid Howells 	/* Amount of free space: some of this one + all after this one */
15378cefc107SDavid Howells 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1538241699cdSAl Viro 	n = npages * PAGE_SIZE - *start;
1539241699cdSAl Viro 	if (maxsize > n)
1540241699cdSAl Viro 		maxsize = n;
1541241699cdSAl Viro 	else
1542241699cdSAl Viro 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1543241699cdSAl Viro 	p = get_pages_array(npages);
1544241699cdSAl Viro 	if (!p)
1545241699cdSAl Viro 		return -ENOMEM;
15468cefc107SDavid Howells 	n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1547241699cdSAl Viro 	if (n > 0)
1548241699cdSAl Viro 		*pages = p;
1549241699cdSAl Viro 	else
1550241699cdSAl Viro 		kvfree(p);
1551241699cdSAl Viro 	return n;
1552241699cdSAl Viro }
1553241699cdSAl Viro 
15547ff50620SDavid Howells static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
15557ff50620SDavid Howells 					   struct page ***pages, size_t maxsize,
15567ff50620SDavid Howells 					   size_t *_start_offset)
15577ff50620SDavid Howells {
15587ff50620SDavid Howells 	struct page **p;
15597ff50620SDavid Howells 	unsigned nr, offset;
15607ff50620SDavid Howells 	pgoff_t index, count;
15616c776766SDavid Howells 	size_t size = maxsize;
15627ff50620SDavid Howells 	loff_t pos;
15637ff50620SDavid Howells 
15647ff50620SDavid Howells 	if (!size)
15657ff50620SDavid Howells 		return 0;
15667ff50620SDavid Howells 
15677ff50620SDavid Howells 	pos = i->xarray_start + i->iov_offset;
15687ff50620SDavid Howells 	index = pos >> PAGE_SHIFT;
15697ff50620SDavid Howells 	offset = pos & ~PAGE_MASK;
15707ff50620SDavid Howells 	*_start_offset = offset;
15717ff50620SDavid Howells 
15727ff50620SDavid Howells 	count = 1;
15737ff50620SDavid Howells 	if (size > PAGE_SIZE - offset) {
15747ff50620SDavid Howells 		size -= PAGE_SIZE - offset;
15757ff50620SDavid Howells 		count += size >> PAGE_SHIFT;
15767ff50620SDavid Howells 		size &= ~PAGE_MASK;
15777ff50620SDavid Howells 		if (size)
15787ff50620SDavid Howells 			count++;
15797ff50620SDavid Howells 	}
15807ff50620SDavid Howells 
15817ff50620SDavid Howells 	p = get_pages_array(count);
15827ff50620SDavid Howells 	if (!p)
15837ff50620SDavid Howells 		return -ENOMEM;
15847ff50620SDavid Howells 	*pages = p;
15857ff50620SDavid Howells 
15867ff50620SDavid Howells 	nr = iter_xarray_populate_pages(p, i->xarray, index, count);
15877ff50620SDavid Howells 	if (nr == 0)
15887ff50620SDavid Howells 		return 0;
15897ff50620SDavid Howells 
15901c27f1fcSLinus Torvalds 	return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
15917ff50620SDavid Howells }
15927ff50620SDavid Howells 
1593d879cb83SAl Viro ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1594d879cb83SAl Viro 		   struct page ***pages, size_t maxsize,
1595d879cb83SAl Viro 		   size_t *start)
1596d879cb83SAl Viro {
1597d879cb83SAl Viro 	struct page **p;
15983d671ca6SAl Viro 	int n, res;
1599d879cb83SAl Viro 
1600d879cb83SAl Viro 	if (maxsize > i->count)
1601d879cb83SAl Viro 		maxsize = i->count;
16023d671ca6SAl Viro 	if (!maxsize)
16033d671ca6SAl Viro 		return 0;
16047392ed17SAl Viro 	if (maxsize > MAX_RW_COUNT)
16057392ed17SAl Viro 		maxsize = MAX_RW_COUNT;
1606d879cb83SAl Viro 
1607*fcb14cb1SAl Viro 	if (likely(user_backed_iter(i))) {
16083337ab08SAndreas Gruenbacher 		unsigned int gup_flags = 0;
16093d671ca6SAl Viro 		unsigned long addr;
16109ea9ce04SDavid Howells 
16113337ab08SAndreas Gruenbacher 		if (iov_iter_rw(i) != WRITE)
16123337ab08SAndreas Gruenbacher 			gup_flags |= FOLL_WRITE;
16133337ab08SAndreas Gruenbacher 		if (i->nofault)
16143337ab08SAndreas Gruenbacher 			gup_flags |= FOLL_NOFAULT;
16153337ab08SAndreas Gruenbacher 
1616dd45ab9dSAl Viro 		addr = first_iovec_segment(i, &maxsize);
1617dd45ab9dSAl Viro 		*start = addr % PAGE_SIZE;
1618dd45ab9dSAl Viro 		addr &= PAGE_MASK;
161959dbd7d0SAl Viro 		n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1620d879cb83SAl Viro 		p = get_pages_array(n);
1621d879cb83SAl Viro 		if (!p)
1622d879cb83SAl Viro 			return -ENOMEM;
16233337ab08SAndreas Gruenbacher 		res = get_user_pages_fast(addr, n, gup_flags, p);
1624814a6674SAndreas Gruenbacher 		if (unlikely(res <= 0)) {
1625d879cb83SAl Viro 			kvfree(p);
1626814a6674SAndreas Gruenbacher 			*pages = NULL;
1627d879cb83SAl Viro 			return res;
1628d879cb83SAl Viro 		}
1629d879cb83SAl Viro 		*pages = p;
163059dbd7d0SAl Viro 		return min_t(size_t, maxsize, res * PAGE_SIZE - *start);
16313d671ca6SAl Viro 	}
16323d671ca6SAl Viro 	if (iov_iter_is_bvec(i)) {
16333d671ca6SAl Viro 		struct page *page;
16343d671ca6SAl Viro 
163559dbd7d0SAl Viro 		page = first_bvec_segment(i, &maxsize, start);
163659dbd7d0SAl Viro 		n = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
16373d671ca6SAl Viro 		*pages = p = get_pages_array(n);
1638d879cb83SAl Viro 		if (!p)
1639d879cb83SAl Viro 			return -ENOMEM;
1640dda8e5d1SAl Viro 		for (int k = 0; k < n; k++)
16413d671ca6SAl Viro 			get_page(*p++ = page++);
164259dbd7d0SAl Viro 		return min_t(size_t, maxsize, n * PAGE_SIZE - *start);
16433d671ca6SAl Viro 	}
16443d671ca6SAl Viro 	if (iov_iter_is_pipe(i))
16453d671ca6SAl Viro 		return pipe_get_pages_alloc(i, pages, maxsize, start);
16463d671ca6SAl Viro 	if (iov_iter_is_xarray(i))
16473d671ca6SAl Viro 		return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1648d879cb83SAl Viro 	return -EFAULT;
1649d879cb83SAl Viro }
1650d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1651d879cb83SAl Viro 
1652d879cb83SAl Viro size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1653d879cb83SAl Viro 			       struct iov_iter *i)
1654d879cb83SAl Viro {
1655d879cb83SAl Viro 	__wsum sum, next;
1656d879cb83SAl Viro 	sum = *csum;
16579ea9ce04SDavid Howells 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1658241699cdSAl Viro 		WARN_ON(1);
1659241699cdSAl Viro 		return 0;
1660241699cdSAl Viro 	}
16617baa5099SAl Viro 	iterate_and_advance(i, bytes, base, len, off, ({
16627baa5099SAl Viro 		next = csum_and_copy_from_user(base, addr + off, len);
1663d879cb83SAl Viro 		sum = csum_block_add(sum, next, off);
16647baa5099SAl Viro 		next ? 0 : len;
1665d879cb83SAl Viro 	}), ({
16667baa5099SAl Viro 		sum = csum_and_memcpy(addr + off, base, len, sum, off);
1667d879cb83SAl Viro 	})
1668d879cb83SAl Viro 	)
1669d879cb83SAl Viro 	*csum = sum;
1670d879cb83SAl Viro 	return bytes;
1671d879cb83SAl Viro }
1672d879cb83SAl Viro EXPORT_SYMBOL(csum_and_copy_from_iter);
1673d879cb83SAl Viro 
167452cbd23aSWillem de Bruijn size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1675d879cb83SAl Viro 			     struct iov_iter *i)
1676d879cb83SAl Viro {
167752cbd23aSWillem de Bruijn 	struct csum_state *csstate = _csstate;
1678d879cb83SAl Viro 	__wsum sum, next;
167978e1f386SAl Viro 
168078e1f386SAl Viro 	if (unlikely(iov_iter_is_discard(i))) {
1681241699cdSAl Viro 		WARN_ON(1);	/* for now */
1682241699cdSAl Viro 		return 0;
1683241699cdSAl Viro 	}
16846852df12SAl Viro 
16856852df12SAl Viro 	sum = csum_shift(csstate->csum, csstate->off);
16866852df12SAl Viro 	if (unlikely(iov_iter_is_pipe(i)))
16876852df12SAl Viro 		bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
16886852df12SAl Viro 	else iterate_and_advance(i, bytes, base, len, off, ({
16897baa5099SAl Viro 		next = csum_and_copy_to_user(addr + off, base, len);
1690d879cb83SAl Viro 		sum = csum_block_add(sum, next, off);
16917baa5099SAl Viro 		next ? 0 : len;
1692d879cb83SAl Viro 	}), ({
16937baa5099SAl Viro 		sum = csum_and_memcpy(base, addr + off, len, sum, off);
1694d879cb83SAl Viro 	})
1695d879cb83SAl Viro 	)
1696594e450bSAl Viro 	csstate->csum = csum_shift(sum, csstate->off);
1697594e450bSAl Viro 	csstate->off += bytes;
1698d879cb83SAl Viro 	return bytes;
1699d879cb83SAl Viro }
1700d879cb83SAl Viro EXPORT_SYMBOL(csum_and_copy_to_iter);
1701d879cb83SAl Viro 
1702d05f4435SSagi Grimberg size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1703d05f4435SSagi Grimberg 		struct iov_iter *i)
1704d05f4435SSagi Grimberg {
17057999096fSHerbert Xu #ifdef CONFIG_CRYPTO_HASH
1706d05f4435SSagi Grimberg 	struct ahash_request *hash = hashp;
1707d05f4435SSagi Grimberg 	struct scatterlist sg;
1708d05f4435SSagi Grimberg 	size_t copied;
1709d05f4435SSagi Grimberg 
1710d05f4435SSagi Grimberg 	copied = copy_to_iter(addr, bytes, i);
1711d05f4435SSagi Grimberg 	sg_init_one(&sg, addr, copied);
1712d05f4435SSagi Grimberg 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1713d05f4435SSagi Grimberg 	crypto_ahash_update(hash);
1714d05f4435SSagi Grimberg 	return copied;
171527fad74aSYueHaibing #else
171627fad74aSYueHaibing 	return 0;
171727fad74aSYueHaibing #endif
1718d05f4435SSagi Grimberg }
1719d05f4435SSagi Grimberg EXPORT_SYMBOL(hash_and_copy_to_iter);
1720d05f4435SSagi Grimberg 
172166531c65SAl Viro static int iov_npages(const struct iov_iter *i, int maxpages)
1722d879cb83SAl Viro {
172366531c65SAl Viro 	size_t skip = i->iov_offset, size = i->count;
172466531c65SAl Viro 	const struct iovec *p;
1725d879cb83SAl Viro 	int npages = 0;
1726d879cb83SAl Viro 
172766531c65SAl Viro 	for (p = i->iov; size; skip = 0, p++) {
172866531c65SAl Viro 		unsigned offs = offset_in_page(p->iov_base + skip);
172966531c65SAl Viro 		size_t len = min(p->iov_len - skip, size);
1730d879cb83SAl Viro 
173166531c65SAl Viro 		if (len) {
173266531c65SAl Viro 			size -= len;
173366531c65SAl Viro 			npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
173466531c65SAl Viro 			if (unlikely(npages > maxpages))
173566531c65SAl Viro 				return maxpages;
173666531c65SAl Viro 		}
173766531c65SAl Viro 	}
173866531c65SAl Viro 	return npages;
173966531c65SAl Viro }
174066531c65SAl Viro 
174166531c65SAl Viro static int bvec_npages(const struct iov_iter *i, int maxpages)
174266531c65SAl Viro {
174366531c65SAl Viro 	size_t skip = i->iov_offset, size = i->count;
174466531c65SAl Viro 	const struct bio_vec *p;
174566531c65SAl Viro 	int npages = 0;
174666531c65SAl Viro 
174766531c65SAl Viro 	for (p = i->bvec; size; skip = 0, p++) {
174866531c65SAl Viro 		unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
174966531c65SAl Viro 		size_t len = min(p->bv_len - skip, size);
175066531c65SAl Viro 
175166531c65SAl Viro 		size -= len;
175266531c65SAl Viro 		npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
175366531c65SAl Viro 		if (unlikely(npages > maxpages))
175466531c65SAl Viro 			return maxpages;
175566531c65SAl Viro 	}
175666531c65SAl Viro 	return npages;
175766531c65SAl Viro }
175866531c65SAl Viro 
175966531c65SAl Viro int iov_iter_npages(const struct iov_iter *i, int maxpages)
176066531c65SAl Viro {
176166531c65SAl Viro 	if (unlikely(!i->count))
176266531c65SAl Viro 		return 0;
1763*fcb14cb1SAl Viro 	if (likely(iter_is_ubuf(i))) {
1764*fcb14cb1SAl Viro 		unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
1765*fcb14cb1SAl Viro 		int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
1766*fcb14cb1SAl Viro 		return min(npages, maxpages);
1767*fcb14cb1SAl Viro 	}
176866531c65SAl Viro 	/* iovec and kvec have identical layouts */
176966531c65SAl Viro 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
177066531c65SAl Viro 		return iov_npages(i, maxpages);
177166531c65SAl Viro 	if (iov_iter_is_bvec(i))
177266531c65SAl Viro 		return bvec_npages(i, maxpages);
177366531c65SAl Viro 	if (iov_iter_is_pipe(i)) {
17748cefc107SDavid Howells 		unsigned int iter_head;
177566531c65SAl Viro 		int npages;
1776241699cdSAl Viro 		size_t off;
1777241699cdSAl Viro 
1778241699cdSAl Viro 		if (!sanity(i))
1779241699cdSAl Viro 			return 0;
1780241699cdSAl Viro 
17818cefc107SDavid Howells 		data_start(i, &iter_head, &off);
1782241699cdSAl Viro 		/* some of this one + all after this one */
178366531c65SAl Viro 		npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
178466531c65SAl Viro 		return min(npages, maxpages);
178566531c65SAl Viro 	}
178666531c65SAl Viro 	if (iov_iter_is_xarray(i)) {
1787e4f8df86SAl Viro 		unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
1788e4f8df86SAl Viro 		int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
178966531c65SAl Viro 		return min(npages, maxpages);
179066531c65SAl Viro 	}
179166531c65SAl Viro 	return 0;
1792d879cb83SAl Viro }
1793d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_npages);
1794d879cb83SAl Viro 
1795d879cb83SAl Viro const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1796d879cb83SAl Viro {
1797d879cb83SAl Viro 	*new = *old;
179800e23707SDavid Howells 	if (unlikely(iov_iter_is_pipe(new))) {
1799241699cdSAl Viro 		WARN_ON(1);
1800241699cdSAl Viro 		return NULL;
1801241699cdSAl Viro 	}
180200e23707SDavid Howells 	if (iov_iter_is_bvec(new))
1803d879cb83SAl Viro 		return new->bvec = kmemdup(new->bvec,
1804d879cb83SAl Viro 				    new->nr_segs * sizeof(struct bio_vec),
1805d879cb83SAl Viro 				    flags);
1806*fcb14cb1SAl Viro 	else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
1807d879cb83SAl Viro 		/* iovec and kvec have identical layout */
1808d879cb83SAl Viro 		return new->iov = kmemdup(new->iov,
1809d879cb83SAl Viro 				   new->nr_segs * sizeof(struct iovec),
1810d879cb83SAl Viro 				   flags);
1811*fcb14cb1SAl Viro 	return NULL;
1812d879cb83SAl Viro }
1813d879cb83SAl Viro EXPORT_SYMBOL(dup_iter);
1814bc917be8SAl Viro 
1815bfdc5970SChristoph Hellwig static int copy_compat_iovec_from_user(struct iovec *iov,
1816bfdc5970SChristoph Hellwig 		const struct iovec __user *uvec, unsigned long nr_segs)
1817bfdc5970SChristoph Hellwig {
1818bfdc5970SChristoph Hellwig 	const struct compat_iovec __user *uiov =
1819bfdc5970SChristoph Hellwig 		(const struct compat_iovec __user *)uvec;
1820bfdc5970SChristoph Hellwig 	int ret = -EFAULT, i;
1821bfdc5970SChristoph Hellwig 
1822a959a978SChristoph Hellwig 	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1823bfdc5970SChristoph Hellwig 		return -EFAULT;
1824bfdc5970SChristoph Hellwig 
1825bfdc5970SChristoph Hellwig 	for (i = 0; i < nr_segs; i++) {
1826bfdc5970SChristoph Hellwig 		compat_uptr_t buf;
1827bfdc5970SChristoph Hellwig 		compat_ssize_t len;
1828bfdc5970SChristoph Hellwig 
1829bfdc5970SChristoph Hellwig 		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1830bfdc5970SChristoph Hellwig 		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1831bfdc5970SChristoph Hellwig 
1832bfdc5970SChristoph Hellwig 		/* check for compat_size_t not fitting in compat_ssize_t .. */
1833bfdc5970SChristoph Hellwig 		if (len < 0) {
1834bfdc5970SChristoph Hellwig 			ret = -EINVAL;
1835bfdc5970SChristoph Hellwig 			goto uaccess_end;
1836bfdc5970SChristoph Hellwig 		}
1837bfdc5970SChristoph Hellwig 		iov[i].iov_base = compat_ptr(buf);
1838bfdc5970SChristoph Hellwig 		iov[i].iov_len = len;
1839bfdc5970SChristoph Hellwig 	}
1840bfdc5970SChristoph Hellwig 
1841bfdc5970SChristoph Hellwig 	ret = 0;
1842bfdc5970SChristoph Hellwig uaccess_end:
1843bfdc5970SChristoph Hellwig 	user_access_end();
1844bfdc5970SChristoph Hellwig 	return ret;
1845bfdc5970SChristoph Hellwig }
1846bfdc5970SChristoph Hellwig 
1847bfdc5970SChristoph Hellwig static int copy_iovec_from_user(struct iovec *iov,
1848bfdc5970SChristoph Hellwig 		const struct iovec __user *uvec, unsigned long nr_segs)
1849fb041b59SDavid Laight {
1850fb041b59SDavid Laight 	unsigned long seg;
1851bfdc5970SChristoph Hellwig 
1852bfdc5970SChristoph Hellwig 	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1853bfdc5970SChristoph Hellwig 		return -EFAULT;
1854bfdc5970SChristoph Hellwig 	for (seg = 0; seg < nr_segs; seg++) {
1855bfdc5970SChristoph Hellwig 		if ((ssize_t)iov[seg].iov_len < 0)
1856bfdc5970SChristoph Hellwig 			return -EINVAL;
1857bfdc5970SChristoph Hellwig 	}
1858bfdc5970SChristoph Hellwig 
1859bfdc5970SChristoph Hellwig 	return 0;
1860bfdc5970SChristoph Hellwig }
1861bfdc5970SChristoph Hellwig 
1862bfdc5970SChristoph Hellwig struct iovec *iovec_from_user(const struct iovec __user *uvec,
1863bfdc5970SChristoph Hellwig 		unsigned long nr_segs, unsigned long fast_segs,
1864bfdc5970SChristoph Hellwig 		struct iovec *fast_iov, bool compat)
1865bfdc5970SChristoph Hellwig {
1866bfdc5970SChristoph Hellwig 	struct iovec *iov = fast_iov;
1867bfdc5970SChristoph Hellwig 	int ret;
1868fb041b59SDavid Laight 
1869fb041b59SDavid Laight 	/*
1870bfdc5970SChristoph Hellwig 	 * SuS says "The readv() function *may* fail if the iovcnt argument was
1871bfdc5970SChristoph Hellwig 	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1872fb041b59SDavid Laight 	 * traditionally returned zero for zero segments, so...
1873fb041b59SDavid Laight 	 */
1874bfdc5970SChristoph Hellwig 	if (nr_segs == 0)
1875bfdc5970SChristoph Hellwig 		return iov;
1876bfdc5970SChristoph Hellwig 	if (nr_segs > UIO_MAXIOV)
1877bfdc5970SChristoph Hellwig 		return ERR_PTR(-EINVAL);
1878fb041b59SDavid Laight 	if (nr_segs > fast_segs) {
1879fb041b59SDavid Laight 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1880bfdc5970SChristoph Hellwig 		if (!iov)
1881bfdc5970SChristoph Hellwig 			return ERR_PTR(-ENOMEM);
1882fb041b59SDavid Laight 	}
1883bfdc5970SChristoph Hellwig 
1884bfdc5970SChristoph Hellwig 	if (compat)
1885bfdc5970SChristoph Hellwig 		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1886bfdc5970SChristoph Hellwig 	else
1887bfdc5970SChristoph Hellwig 		ret = copy_iovec_from_user(iov, uvec, nr_segs);
1888bfdc5970SChristoph Hellwig 	if (ret) {
1889bfdc5970SChristoph Hellwig 		if (iov != fast_iov)
1890bfdc5970SChristoph Hellwig 			kfree(iov);
1891bfdc5970SChristoph Hellwig 		return ERR_PTR(ret);
1892fb041b59SDavid Laight 	}
1893bfdc5970SChristoph Hellwig 
1894bfdc5970SChristoph Hellwig 	return iov;
1895bfdc5970SChristoph Hellwig }
1896bfdc5970SChristoph Hellwig 
1897bfdc5970SChristoph Hellwig ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1898bfdc5970SChristoph Hellwig 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1899bfdc5970SChristoph Hellwig 		 struct iov_iter *i, bool compat)
1900bfdc5970SChristoph Hellwig {
1901bfdc5970SChristoph Hellwig 	ssize_t total_len = 0;
1902bfdc5970SChristoph Hellwig 	unsigned long seg;
1903bfdc5970SChristoph Hellwig 	struct iovec *iov;
1904bfdc5970SChristoph Hellwig 
1905bfdc5970SChristoph Hellwig 	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1906bfdc5970SChristoph Hellwig 	if (IS_ERR(iov)) {
1907bfdc5970SChristoph Hellwig 		*iovp = NULL;
1908bfdc5970SChristoph Hellwig 		return PTR_ERR(iov);
1909fb041b59SDavid Laight 	}
1910fb041b59SDavid Laight 
1911fb041b59SDavid Laight 	/*
1912bfdc5970SChristoph Hellwig 	 * According to the Single Unix Specification we should return EINVAL if
1913bfdc5970SChristoph Hellwig 	 * an element length is < 0 when cast to ssize_t or if the total length
1914bfdc5970SChristoph Hellwig 	 * would overflow the ssize_t return value of the system call.
1915fb041b59SDavid Laight 	 *
1916fb041b59SDavid Laight 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1917fb041b59SDavid Laight 	 * overflow case.
1918fb041b59SDavid Laight 	 */
1919fb041b59SDavid Laight 	for (seg = 0; seg < nr_segs; seg++) {
1920fb041b59SDavid Laight 		ssize_t len = (ssize_t)iov[seg].iov_len;
1921fb041b59SDavid Laight 
1922bfdc5970SChristoph Hellwig 		if (!access_ok(iov[seg].iov_base, len)) {
1923bfdc5970SChristoph Hellwig 			if (iov != *iovp)
1924bfdc5970SChristoph Hellwig 				kfree(iov);
1925bfdc5970SChristoph Hellwig 			*iovp = NULL;
1926bfdc5970SChristoph Hellwig 			return -EFAULT;
1927fb041b59SDavid Laight 		}
1928bfdc5970SChristoph Hellwig 
1929bfdc5970SChristoph Hellwig 		if (len > MAX_RW_COUNT - total_len) {
1930bfdc5970SChristoph Hellwig 			len = MAX_RW_COUNT - total_len;
1931fb041b59SDavid Laight 			iov[seg].iov_len = len;
1932fb041b59SDavid Laight 		}
1933bfdc5970SChristoph Hellwig 		total_len += len;
1934fb041b59SDavid Laight 	}
1935bfdc5970SChristoph Hellwig 
1936bfdc5970SChristoph Hellwig 	iov_iter_init(i, type, iov, nr_segs, total_len);
1937bfdc5970SChristoph Hellwig 	if (iov == *iovp)
1938bfdc5970SChristoph Hellwig 		*iovp = NULL;
1939bfdc5970SChristoph Hellwig 	else
1940bfdc5970SChristoph Hellwig 		*iovp = iov;
1941bfdc5970SChristoph Hellwig 	return total_len;
1942fb041b59SDavid Laight }
1943fb041b59SDavid Laight 
1944ffecee4fSVegard Nossum /**
1945ffecee4fSVegard Nossum  * import_iovec() - Copy an array of &struct iovec from userspace
1946ffecee4fSVegard Nossum  *     into the kernel, check that it is valid, and initialize a new
1947ffecee4fSVegard Nossum  *     &struct iov_iter iterator to access it.
1948ffecee4fSVegard Nossum  *
1949ffecee4fSVegard Nossum  * @type: One of %READ or %WRITE.
1950bfdc5970SChristoph Hellwig  * @uvec: Pointer to the userspace array.
1951ffecee4fSVegard Nossum  * @nr_segs: Number of elements in userspace array.
1952ffecee4fSVegard Nossum  * @fast_segs: Number of elements in @iov.
1953bfdc5970SChristoph Hellwig  * @iovp: (input and output parameter) Pointer to pointer to (usually small
1954ffecee4fSVegard Nossum  *     on-stack) kernel array.
1955ffecee4fSVegard Nossum  * @i: Pointer to iterator that will be initialized on success.
1956ffecee4fSVegard Nossum  *
1957ffecee4fSVegard Nossum  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1958ffecee4fSVegard Nossum  * then this function places %NULL in *@iov on return. Otherwise, a new
1959ffecee4fSVegard Nossum  * array will be allocated and the result placed in *@iov. This means that
1960ffecee4fSVegard Nossum  * the caller may call kfree() on *@iov regardless of whether the small
1961ffecee4fSVegard Nossum  * on-stack array was used or not (and regardless of whether this function
1962ffecee4fSVegard Nossum  * returns an error or not).
1963ffecee4fSVegard Nossum  *
196487e5e6daSJens Axboe  * Return: Negative error code on error, bytes imported on success
1965ffecee4fSVegard Nossum  */
1966bfdc5970SChristoph Hellwig ssize_t import_iovec(int type, const struct iovec __user *uvec,
1967bc917be8SAl Viro 		 unsigned nr_segs, unsigned fast_segs,
1968bfdc5970SChristoph Hellwig 		 struct iovec **iovp, struct iov_iter *i)
1969bc917be8SAl Viro {
197089cd35c5SChristoph Hellwig 	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
197189cd35c5SChristoph Hellwig 			      in_compat_syscall());
1972bc917be8SAl Viro }
1973bc917be8SAl Viro EXPORT_SYMBOL(import_iovec);
1974bc917be8SAl Viro 
1975bc917be8SAl Viro int import_single_range(int rw, void __user *buf, size_t len,
1976bc917be8SAl Viro 		 struct iovec *iov, struct iov_iter *i)
1977bc917be8SAl Viro {
1978bc917be8SAl Viro 	if (len > MAX_RW_COUNT)
1979bc917be8SAl Viro 		len = MAX_RW_COUNT;
198096d4f267SLinus Torvalds 	if (unlikely(!access_ok(buf, len)))
1981bc917be8SAl Viro 		return -EFAULT;
1982bc917be8SAl Viro 
1983bc917be8SAl Viro 	iov->iov_base = buf;
1984bc917be8SAl Viro 	iov->iov_len = len;
1985bc917be8SAl Viro 	iov_iter_init(i, rw, iov, 1, len);
1986bc917be8SAl Viro 	return 0;
1987bc917be8SAl Viro }
1988e1267585SAl Viro EXPORT_SYMBOL(import_single_range);
19898fb0f47aSJens Axboe 
19908fb0f47aSJens Axboe /**
19918fb0f47aSJens Axboe  * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
19928fb0f47aSJens Axboe  *     iov_iter_save_state() was called.
19938fb0f47aSJens Axboe  *
19948fb0f47aSJens Axboe  * @i: &struct iov_iter to restore
19958fb0f47aSJens Axboe  * @state: state to restore from
19968fb0f47aSJens Axboe  *
19978fb0f47aSJens Axboe  * Used after iov_iter_save_state() to bring restore @i, if operations may
19988fb0f47aSJens Axboe  * have advanced it.
19998fb0f47aSJens Axboe  *
20008fb0f47aSJens Axboe  * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
20018fb0f47aSJens Axboe  */
20028fb0f47aSJens Axboe void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
20038fb0f47aSJens Axboe {
20048fb0f47aSJens Axboe 	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
2005*fcb14cb1SAl Viro 			 !iov_iter_is_kvec(i) && !iter_is_ubuf(i))
20068fb0f47aSJens Axboe 		return;
20078fb0f47aSJens Axboe 	i->iov_offset = state->iov_offset;
20088fb0f47aSJens Axboe 	i->count = state->count;
2009*fcb14cb1SAl Viro 	if (iter_is_ubuf(i))
2010*fcb14cb1SAl Viro 		return;
20118fb0f47aSJens Axboe 	/*
20128fb0f47aSJens Axboe 	 * For the *vec iters, nr_segs + iov is constant - if we increment
20138fb0f47aSJens Axboe 	 * the vec, then we also decrement the nr_segs count. Hence we don't
20148fb0f47aSJens Axboe 	 * need to track both of these, just one is enough and we can deduct
20158fb0f47aSJens Axboe 	 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
20168fb0f47aSJens Axboe 	 * size, so we can just increment the iov pointer as they are unionzed.
20178fb0f47aSJens Axboe 	 * ITER_BVEC _may_ be the same size on some archs, but on others it is
20188fb0f47aSJens Axboe 	 * not. Be safe and handle it separately.
20198fb0f47aSJens Axboe 	 */
20208fb0f47aSJens Axboe 	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
20218fb0f47aSJens Axboe 	if (iov_iter_is_bvec(i))
20228fb0f47aSJens Axboe 		i->bvec -= state->nr_segs - i->nr_segs;
20238fb0f47aSJens Axboe 	else
20248fb0f47aSJens Axboe 		i->iov -= state->nr_segs - i->nr_segs;
20258fb0f47aSJens Axboe 	i->nr_segs = state->nr_segs;
20268fb0f47aSJens Axboe }
2027