xref: /openbmc/linux/lib/iov_iter.c (revision aa563d7b)
1 #include <linux/export.h>
2 #include <linux/bvec.h>
3 #include <linux/uio.h>
4 #include <linux/pagemap.h>
5 #include <linux/slab.h>
6 #include <linux/vmalloc.h>
7 #include <linux/splice.h>
8 #include <net/checksum.h>
9 
10 #define PIPE_PARANOIA /* for now */
11 
12 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
13 	size_t left;					\
14 	size_t wanted = n;				\
15 	__p = i->iov;					\
16 	__v.iov_len = min(n, __p->iov_len - skip);	\
17 	if (likely(__v.iov_len)) {			\
18 		__v.iov_base = __p->iov_base + skip;	\
19 		left = (STEP);				\
20 		__v.iov_len -= left;			\
21 		skip += __v.iov_len;			\
22 		n -= __v.iov_len;			\
23 	} else {					\
24 		left = 0;				\
25 	}						\
26 	while (unlikely(!left && n)) {			\
27 		__p++;					\
28 		__v.iov_len = min(n, __p->iov_len);	\
29 		if (unlikely(!__v.iov_len))		\
30 			continue;			\
31 		__v.iov_base = __p->iov_base;		\
32 		left = (STEP);				\
33 		__v.iov_len -= left;			\
34 		skip = __v.iov_len;			\
35 		n -= __v.iov_len;			\
36 	}						\
37 	n = wanted - n;					\
38 }
39 
40 #define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
41 	size_t wanted = n;				\
42 	__p = i->kvec;					\
43 	__v.iov_len = min(n, __p->iov_len - skip);	\
44 	if (likely(__v.iov_len)) {			\
45 		__v.iov_base = __p->iov_base + skip;	\
46 		(void)(STEP);				\
47 		skip += __v.iov_len;			\
48 		n -= __v.iov_len;			\
49 	}						\
50 	while (unlikely(n)) {				\
51 		__p++;					\
52 		__v.iov_len = min(n, __p->iov_len);	\
53 		if (unlikely(!__v.iov_len))		\
54 			continue;			\
55 		__v.iov_base = __p->iov_base;		\
56 		(void)(STEP);				\
57 		skip = __v.iov_len;			\
58 		n -= __v.iov_len;			\
59 	}						\
60 	n = wanted;					\
61 }
62 
63 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
64 	struct bvec_iter __start;			\
65 	__start.bi_size = n;				\
66 	__start.bi_bvec_done = skip;			\
67 	__start.bi_idx = 0;				\
68 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
69 		if (!__v.bv_len)			\
70 			continue;			\
71 		(void)(STEP);				\
72 	}						\
73 }
74 
75 #define iterate_all_kinds(i, n, v, I, B, K) {			\
76 	if (likely(n)) {					\
77 		size_t skip = i->iov_offset;			\
78 		if (unlikely(i->type & ITER_BVEC)) {		\
79 			struct bio_vec v;			\
80 			struct bvec_iter __bi;			\
81 			iterate_bvec(i, n, v, __bi, skip, (B))	\
82 		} else if (unlikely(i->type & ITER_KVEC)) {	\
83 			const struct kvec *kvec;		\
84 			struct kvec v;				\
85 			iterate_kvec(i, n, v, kvec, skip, (K))	\
86 		} else {					\
87 			const struct iovec *iov;		\
88 			struct iovec v;				\
89 			iterate_iovec(i, n, v, iov, skip, (I))	\
90 		}						\
91 	}							\
92 }
93 
94 #define iterate_and_advance(i, n, v, I, B, K) {			\
95 	if (unlikely(i->count < n))				\
96 		n = i->count;					\
97 	if (i->count) {						\
98 		size_t skip = i->iov_offset;			\
99 		if (unlikely(i->type & ITER_BVEC)) {		\
100 			const struct bio_vec *bvec = i->bvec;	\
101 			struct bio_vec v;			\
102 			struct bvec_iter __bi;			\
103 			iterate_bvec(i, n, v, __bi, skip, (B))	\
104 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
105 			i->nr_segs -= i->bvec - bvec;		\
106 			skip = __bi.bi_bvec_done;		\
107 		} else if (unlikely(i->type & ITER_KVEC)) {	\
108 			const struct kvec *kvec;		\
109 			struct kvec v;				\
110 			iterate_kvec(i, n, v, kvec, skip, (K))	\
111 			if (skip == kvec->iov_len) {		\
112 				kvec++;				\
113 				skip = 0;			\
114 			}					\
115 			i->nr_segs -= kvec - i->kvec;		\
116 			i->kvec = kvec;				\
117 		} else {					\
118 			const struct iovec *iov;		\
119 			struct iovec v;				\
120 			iterate_iovec(i, n, v, iov, skip, (I))	\
121 			if (skip == iov->iov_len) {		\
122 				iov++;				\
123 				skip = 0;			\
124 			}					\
125 			i->nr_segs -= iov - i->iov;		\
126 			i->iov = iov;				\
127 		}						\
128 		i->count -= n;					\
129 		i->iov_offset = skip;				\
130 	}							\
131 }
132 
133 static int copyout(void __user *to, const void *from, size_t n)
134 {
135 	if (access_ok(VERIFY_WRITE, to, n)) {
136 		kasan_check_read(from, n);
137 		n = raw_copy_to_user(to, from, n);
138 	}
139 	return n;
140 }
141 
142 static int copyin(void *to, const void __user *from, size_t n)
143 {
144 	if (access_ok(VERIFY_READ, from, n)) {
145 		kasan_check_write(to, n);
146 		n = raw_copy_from_user(to, from, n);
147 	}
148 	return n;
149 }
150 
151 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
152 			 struct iov_iter *i)
153 {
154 	size_t skip, copy, left, wanted;
155 	const struct iovec *iov;
156 	char __user *buf;
157 	void *kaddr, *from;
158 
159 	if (unlikely(bytes > i->count))
160 		bytes = i->count;
161 
162 	if (unlikely(!bytes))
163 		return 0;
164 
165 	might_fault();
166 	wanted = bytes;
167 	iov = i->iov;
168 	skip = i->iov_offset;
169 	buf = iov->iov_base + skip;
170 	copy = min(bytes, iov->iov_len - skip);
171 
172 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
173 		kaddr = kmap_atomic(page);
174 		from = kaddr + offset;
175 
176 		/* first chunk, usually the only one */
177 		left = copyout(buf, from, copy);
178 		copy -= left;
179 		skip += copy;
180 		from += copy;
181 		bytes -= copy;
182 
183 		while (unlikely(!left && bytes)) {
184 			iov++;
185 			buf = iov->iov_base;
186 			copy = min(bytes, iov->iov_len);
187 			left = copyout(buf, from, copy);
188 			copy -= left;
189 			skip = copy;
190 			from += copy;
191 			bytes -= copy;
192 		}
193 		if (likely(!bytes)) {
194 			kunmap_atomic(kaddr);
195 			goto done;
196 		}
197 		offset = from - kaddr;
198 		buf += copy;
199 		kunmap_atomic(kaddr);
200 		copy = min(bytes, iov->iov_len - skip);
201 	}
202 	/* Too bad - revert to non-atomic kmap */
203 
204 	kaddr = kmap(page);
205 	from = kaddr + offset;
206 	left = copyout(buf, from, copy);
207 	copy -= left;
208 	skip += copy;
209 	from += copy;
210 	bytes -= copy;
211 	while (unlikely(!left && bytes)) {
212 		iov++;
213 		buf = iov->iov_base;
214 		copy = min(bytes, iov->iov_len);
215 		left = copyout(buf, from, copy);
216 		copy -= left;
217 		skip = copy;
218 		from += copy;
219 		bytes -= copy;
220 	}
221 	kunmap(page);
222 
223 done:
224 	if (skip == iov->iov_len) {
225 		iov++;
226 		skip = 0;
227 	}
228 	i->count -= wanted - bytes;
229 	i->nr_segs -= iov - i->iov;
230 	i->iov = iov;
231 	i->iov_offset = skip;
232 	return wanted - bytes;
233 }
234 
235 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
236 			 struct iov_iter *i)
237 {
238 	size_t skip, copy, left, wanted;
239 	const struct iovec *iov;
240 	char __user *buf;
241 	void *kaddr, *to;
242 
243 	if (unlikely(bytes > i->count))
244 		bytes = i->count;
245 
246 	if (unlikely(!bytes))
247 		return 0;
248 
249 	might_fault();
250 	wanted = bytes;
251 	iov = i->iov;
252 	skip = i->iov_offset;
253 	buf = iov->iov_base + skip;
254 	copy = min(bytes, iov->iov_len - skip);
255 
256 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
257 		kaddr = kmap_atomic(page);
258 		to = kaddr + offset;
259 
260 		/* first chunk, usually the only one */
261 		left = copyin(to, buf, copy);
262 		copy -= left;
263 		skip += copy;
264 		to += copy;
265 		bytes -= copy;
266 
267 		while (unlikely(!left && bytes)) {
268 			iov++;
269 			buf = iov->iov_base;
270 			copy = min(bytes, iov->iov_len);
271 			left = copyin(to, buf, copy);
272 			copy -= left;
273 			skip = copy;
274 			to += copy;
275 			bytes -= copy;
276 		}
277 		if (likely(!bytes)) {
278 			kunmap_atomic(kaddr);
279 			goto done;
280 		}
281 		offset = to - kaddr;
282 		buf += copy;
283 		kunmap_atomic(kaddr);
284 		copy = min(bytes, iov->iov_len - skip);
285 	}
286 	/* Too bad - revert to non-atomic kmap */
287 
288 	kaddr = kmap(page);
289 	to = kaddr + offset;
290 	left = copyin(to, buf, copy);
291 	copy -= left;
292 	skip += copy;
293 	to += copy;
294 	bytes -= copy;
295 	while (unlikely(!left && bytes)) {
296 		iov++;
297 		buf = iov->iov_base;
298 		copy = min(bytes, iov->iov_len);
299 		left = copyin(to, buf, copy);
300 		copy -= left;
301 		skip = copy;
302 		to += copy;
303 		bytes -= copy;
304 	}
305 	kunmap(page);
306 
307 done:
308 	if (skip == iov->iov_len) {
309 		iov++;
310 		skip = 0;
311 	}
312 	i->count -= wanted - bytes;
313 	i->nr_segs -= iov - i->iov;
314 	i->iov = iov;
315 	i->iov_offset = skip;
316 	return wanted - bytes;
317 }
318 
319 #ifdef PIPE_PARANOIA
320 static bool sanity(const struct iov_iter *i)
321 {
322 	struct pipe_inode_info *pipe = i->pipe;
323 	int idx = i->idx;
324 	int next = pipe->curbuf + pipe->nrbufs;
325 	if (i->iov_offset) {
326 		struct pipe_buffer *p;
327 		if (unlikely(!pipe->nrbufs))
328 			goto Bad;	// pipe must be non-empty
329 		if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
330 			goto Bad;	// must be at the last buffer...
331 
332 		p = &pipe->bufs[idx];
333 		if (unlikely(p->offset + p->len != i->iov_offset))
334 			goto Bad;	// ... at the end of segment
335 	} else {
336 		if (idx != (next & (pipe->buffers - 1)))
337 			goto Bad;	// must be right after the last buffer
338 	}
339 	return true;
340 Bad:
341 	printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
342 	printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
343 			pipe->curbuf, pipe->nrbufs, pipe->buffers);
344 	for (idx = 0; idx < pipe->buffers; idx++)
345 		printk(KERN_ERR "[%p %p %d %d]\n",
346 			pipe->bufs[idx].ops,
347 			pipe->bufs[idx].page,
348 			pipe->bufs[idx].offset,
349 			pipe->bufs[idx].len);
350 	WARN_ON(1);
351 	return false;
352 }
353 #else
354 #define sanity(i) true
355 #endif
356 
357 static inline int next_idx(int idx, struct pipe_inode_info *pipe)
358 {
359 	return (idx + 1) & (pipe->buffers - 1);
360 }
361 
362 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
363 			 struct iov_iter *i)
364 {
365 	struct pipe_inode_info *pipe = i->pipe;
366 	struct pipe_buffer *buf;
367 	size_t off;
368 	int idx;
369 
370 	if (unlikely(bytes > i->count))
371 		bytes = i->count;
372 
373 	if (unlikely(!bytes))
374 		return 0;
375 
376 	if (!sanity(i))
377 		return 0;
378 
379 	off = i->iov_offset;
380 	idx = i->idx;
381 	buf = &pipe->bufs[idx];
382 	if (off) {
383 		if (offset == off && buf->page == page) {
384 			/* merge with the last one */
385 			buf->len += bytes;
386 			i->iov_offset += bytes;
387 			goto out;
388 		}
389 		idx = next_idx(idx, pipe);
390 		buf = &pipe->bufs[idx];
391 	}
392 	if (idx == pipe->curbuf && pipe->nrbufs)
393 		return 0;
394 	pipe->nrbufs++;
395 	buf->ops = &page_cache_pipe_buf_ops;
396 	get_page(buf->page = page);
397 	buf->offset = offset;
398 	buf->len = bytes;
399 	i->iov_offset = offset + bytes;
400 	i->idx = idx;
401 out:
402 	i->count -= bytes;
403 	return bytes;
404 }
405 
406 /*
407  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
408  * bytes.  For each iovec, fault in each page that constitutes the iovec.
409  *
410  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
411  * because it is an invalid address).
412  */
413 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
414 {
415 	size_t skip = i->iov_offset;
416 	const struct iovec *iov;
417 	int err;
418 	struct iovec v;
419 
420 	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
421 		iterate_iovec(i, bytes, v, iov, skip, ({
422 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
423 			if (unlikely(err))
424 			return err;
425 		0;}))
426 	}
427 	return 0;
428 }
429 EXPORT_SYMBOL(iov_iter_fault_in_readable);
430 
431 void iov_iter_init(struct iov_iter *i, unsigned int direction,
432 			const struct iovec *iov, unsigned long nr_segs,
433 			size_t count)
434 {
435 	WARN_ON(direction & ~(READ | WRITE));
436 	direction &= READ | WRITE;
437 
438 	/* It will get better.  Eventually... */
439 	if (uaccess_kernel()) {
440 		i->type = ITER_KVEC | direction;
441 		i->kvec = (struct kvec *)iov;
442 	} else {
443 		i->type = ITER_IOVEC | direction;
444 		i->iov = iov;
445 	}
446 	i->nr_segs = nr_segs;
447 	i->iov_offset = 0;
448 	i->count = count;
449 }
450 EXPORT_SYMBOL(iov_iter_init);
451 
452 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
453 {
454 	char *from = kmap_atomic(page);
455 	memcpy(to, from + offset, len);
456 	kunmap_atomic(from);
457 }
458 
459 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
460 {
461 	char *to = kmap_atomic(page);
462 	memcpy(to + offset, from, len);
463 	kunmap_atomic(to);
464 }
465 
466 static void memzero_page(struct page *page, size_t offset, size_t len)
467 {
468 	char *addr = kmap_atomic(page);
469 	memset(addr + offset, 0, len);
470 	kunmap_atomic(addr);
471 }
472 
473 static inline bool allocated(struct pipe_buffer *buf)
474 {
475 	return buf->ops == &default_pipe_buf_ops;
476 }
477 
478 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
479 {
480 	size_t off = i->iov_offset;
481 	int idx = i->idx;
482 	if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
483 		idx = next_idx(idx, i->pipe);
484 		off = 0;
485 	}
486 	*idxp = idx;
487 	*offp = off;
488 }
489 
490 static size_t push_pipe(struct iov_iter *i, size_t size,
491 			int *idxp, size_t *offp)
492 {
493 	struct pipe_inode_info *pipe = i->pipe;
494 	size_t off;
495 	int idx;
496 	ssize_t left;
497 
498 	if (unlikely(size > i->count))
499 		size = i->count;
500 	if (unlikely(!size))
501 		return 0;
502 
503 	left = size;
504 	data_start(i, &idx, &off);
505 	*idxp = idx;
506 	*offp = off;
507 	if (off) {
508 		left -= PAGE_SIZE - off;
509 		if (left <= 0) {
510 			pipe->bufs[idx].len += size;
511 			return size;
512 		}
513 		pipe->bufs[idx].len = PAGE_SIZE;
514 		idx = next_idx(idx, pipe);
515 	}
516 	while (idx != pipe->curbuf || !pipe->nrbufs) {
517 		struct page *page = alloc_page(GFP_USER);
518 		if (!page)
519 			break;
520 		pipe->nrbufs++;
521 		pipe->bufs[idx].ops = &default_pipe_buf_ops;
522 		pipe->bufs[idx].page = page;
523 		pipe->bufs[idx].offset = 0;
524 		if (left <= PAGE_SIZE) {
525 			pipe->bufs[idx].len = left;
526 			return size;
527 		}
528 		pipe->bufs[idx].len = PAGE_SIZE;
529 		left -= PAGE_SIZE;
530 		idx = next_idx(idx, pipe);
531 	}
532 	return size - left;
533 }
534 
535 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
536 				struct iov_iter *i)
537 {
538 	struct pipe_inode_info *pipe = i->pipe;
539 	size_t n, off;
540 	int idx;
541 
542 	if (!sanity(i))
543 		return 0;
544 
545 	bytes = n = push_pipe(i, bytes, &idx, &off);
546 	if (unlikely(!n))
547 		return 0;
548 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
549 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
550 		memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
551 		i->idx = idx;
552 		i->iov_offset = off + chunk;
553 		n -= chunk;
554 		addr += chunk;
555 	}
556 	i->count -= bytes;
557 	return bytes;
558 }
559 
560 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
561 {
562 	const char *from = addr;
563 	if (unlikely(iov_iter_is_pipe(i)))
564 		return copy_pipe_to_iter(addr, bytes, i);
565 	if (iter_is_iovec(i))
566 		might_fault();
567 	iterate_and_advance(i, bytes, v,
568 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
569 		memcpy_to_page(v.bv_page, v.bv_offset,
570 			       (from += v.bv_len) - v.bv_len, v.bv_len),
571 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
572 	)
573 
574 	return bytes;
575 }
576 EXPORT_SYMBOL(_copy_to_iter);
577 
578 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
579 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
580 {
581 	if (access_ok(VERIFY_WRITE, to, n)) {
582 		kasan_check_read(from, n);
583 		n = copy_to_user_mcsafe((__force void *) to, from, n);
584 	}
585 	return n;
586 }
587 
588 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
589 		const char *from, size_t len)
590 {
591 	unsigned long ret;
592 	char *to;
593 
594 	to = kmap_atomic(page);
595 	ret = memcpy_mcsafe(to + offset, from, len);
596 	kunmap_atomic(to);
597 
598 	return ret;
599 }
600 
601 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
602 				struct iov_iter *i)
603 {
604 	struct pipe_inode_info *pipe = i->pipe;
605 	size_t n, off, xfer = 0;
606 	int idx;
607 
608 	if (!sanity(i))
609 		return 0;
610 
611 	bytes = n = push_pipe(i, bytes, &idx, &off);
612 	if (unlikely(!n))
613 		return 0;
614 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
615 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
616 		unsigned long rem;
617 
618 		rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
619 				chunk);
620 		i->idx = idx;
621 		i->iov_offset = off + chunk - rem;
622 		xfer += chunk - rem;
623 		if (rem)
624 			break;
625 		n -= chunk;
626 		addr += chunk;
627 	}
628 	i->count -= xfer;
629 	return xfer;
630 }
631 
632 /**
633  * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
634  * @addr: source kernel address
635  * @bytes: total transfer length
636  * @iter: destination iterator
637  *
638  * The pmem driver arranges for filesystem-dax to use this facility via
639  * dax_copy_to_iter() for protecting read/write to persistent memory.
640  * Unless / until an architecture can guarantee identical performance
641  * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
642  * performance regression to switch more users to the mcsafe version.
643  *
644  * Otherwise, the main differences between this and typical _copy_to_iter().
645  *
646  * * Typical tail/residue handling after a fault retries the copy
647  *   byte-by-byte until the fault happens again. Re-triggering machine
648  *   checks is potentially fatal so the implementation uses source
649  *   alignment and poison alignment assumptions to avoid re-triggering
650  *   hardware exceptions.
651  *
652  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
653  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
654  *   a short copy.
655  *
656  * See MCSAFE_TEST for self-test.
657  */
658 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
659 {
660 	const char *from = addr;
661 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
662 
663 	if (unlikely(iov_iter_is_pipe(i)))
664 		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
665 	if (iter_is_iovec(i))
666 		might_fault();
667 	iterate_and_advance(i, bytes, v,
668 		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
669 		({
670 		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
671                                (from += v.bv_len) - v.bv_len, v.bv_len);
672 		if (rem) {
673 			curr_addr = (unsigned long) from;
674 			bytes = curr_addr - s_addr - rem;
675 			return bytes;
676 		}
677 		}),
678 		({
679 		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
680 				v.iov_len);
681 		if (rem) {
682 			curr_addr = (unsigned long) from;
683 			bytes = curr_addr - s_addr - rem;
684 			return bytes;
685 		}
686 		})
687 	)
688 
689 	return bytes;
690 }
691 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
692 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
693 
694 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
695 {
696 	char *to = addr;
697 	if (unlikely(iov_iter_is_pipe(i))) {
698 		WARN_ON(1);
699 		return 0;
700 	}
701 	if (iter_is_iovec(i))
702 		might_fault();
703 	iterate_and_advance(i, bytes, v,
704 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
705 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
706 				 v.bv_offset, v.bv_len),
707 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
708 	)
709 
710 	return bytes;
711 }
712 EXPORT_SYMBOL(_copy_from_iter);
713 
714 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
715 {
716 	char *to = addr;
717 	if (unlikely(iov_iter_is_pipe(i))) {
718 		WARN_ON(1);
719 		return false;
720 	}
721 	if (unlikely(i->count < bytes))
722 		return false;
723 
724 	if (iter_is_iovec(i))
725 		might_fault();
726 	iterate_all_kinds(i, bytes, v, ({
727 		if (copyin((to += v.iov_len) - v.iov_len,
728 				      v.iov_base, v.iov_len))
729 			return false;
730 		0;}),
731 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
732 				 v.bv_offset, v.bv_len),
733 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
734 	)
735 
736 	iov_iter_advance(i, bytes);
737 	return true;
738 }
739 EXPORT_SYMBOL(_copy_from_iter_full);
740 
741 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
742 {
743 	char *to = addr;
744 	if (unlikely(iov_iter_is_pipe(i))) {
745 		WARN_ON(1);
746 		return 0;
747 	}
748 	iterate_and_advance(i, bytes, v,
749 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
750 					 v.iov_base, v.iov_len),
751 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
752 				 v.bv_offset, v.bv_len),
753 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
754 	)
755 
756 	return bytes;
757 }
758 EXPORT_SYMBOL(_copy_from_iter_nocache);
759 
760 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
761 /**
762  * _copy_from_iter_flushcache - write destination through cpu cache
763  * @addr: destination kernel address
764  * @bytes: total transfer length
765  * @iter: source iterator
766  *
767  * The pmem driver arranges for filesystem-dax to use this facility via
768  * dax_copy_from_iter() for ensuring that writes to persistent memory
769  * are flushed through the CPU cache. It is differentiated from
770  * _copy_from_iter_nocache() in that guarantees all data is flushed for
771  * all iterator types. The _copy_from_iter_nocache() only attempts to
772  * bypass the cache for the ITER_IOVEC case, and on some archs may use
773  * instructions that strand dirty-data in the cache.
774  */
775 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
776 {
777 	char *to = addr;
778 	if (unlikely(iov_iter_is_pipe(i))) {
779 		WARN_ON(1);
780 		return 0;
781 	}
782 	iterate_and_advance(i, bytes, v,
783 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
784 					 v.iov_base, v.iov_len),
785 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
786 				 v.bv_offset, v.bv_len),
787 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
788 			v.iov_len)
789 	)
790 
791 	return bytes;
792 }
793 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
794 #endif
795 
796 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
797 {
798 	char *to = addr;
799 	if (unlikely(iov_iter_is_pipe(i))) {
800 		WARN_ON(1);
801 		return false;
802 	}
803 	if (unlikely(i->count < bytes))
804 		return false;
805 	iterate_all_kinds(i, bytes, v, ({
806 		if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
807 					     v.iov_base, v.iov_len))
808 			return false;
809 		0;}),
810 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
811 				 v.bv_offset, v.bv_len),
812 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
813 	)
814 
815 	iov_iter_advance(i, bytes);
816 	return true;
817 }
818 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
819 
820 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
821 {
822 	struct page *head = compound_head(page);
823 	size_t v = n + offset + page_address(page) - page_address(head);
824 
825 	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
826 		return true;
827 	WARN_ON(1);
828 	return false;
829 }
830 
831 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
832 			 struct iov_iter *i)
833 {
834 	if (unlikely(!page_copy_sane(page, offset, bytes)))
835 		return 0;
836 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
837 		void *kaddr = kmap_atomic(page);
838 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
839 		kunmap_atomic(kaddr);
840 		return wanted;
841 	} else if (likely(!iov_iter_is_pipe(i)))
842 		return copy_page_to_iter_iovec(page, offset, bytes, i);
843 	else
844 		return copy_page_to_iter_pipe(page, offset, bytes, i);
845 }
846 EXPORT_SYMBOL(copy_page_to_iter);
847 
848 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
849 			 struct iov_iter *i)
850 {
851 	if (unlikely(!page_copy_sane(page, offset, bytes)))
852 		return 0;
853 	if (unlikely(iov_iter_is_pipe(i))) {
854 		WARN_ON(1);
855 		return 0;
856 	}
857 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
858 		void *kaddr = kmap_atomic(page);
859 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
860 		kunmap_atomic(kaddr);
861 		return wanted;
862 	} else
863 		return copy_page_from_iter_iovec(page, offset, bytes, i);
864 }
865 EXPORT_SYMBOL(copy_page_from_iter);
866 
867 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
868 {
869 	struct pipe_inode_info *pipe = i->pipe;
870 	size_t n, off;
871 	int idx;
872 
873 	if (!sanity(i))
874 		return 0;
875 
876 	bytes = n = push_pipe(i, bytes, &idx, &off);
877 	if (unlikely(!n))
878 		return 0;
879 
880 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
881 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
882 		memzero_page(pipe->bufs[idx].page, off, chunk);
883 		i->idx = idx;
884 		i->iov_offset = off + chunk;
885 		n -= chunk;
886 	}
887 	i->count -= bytes;
888 	return bytes;
889 }
890 
891 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
892 {
893 	if (unlikely(iov_iter_is_pipe(i)))
894 		return pipe_zero(bytes, i);
895 	iterate_and_advance(i, bytes, v,
896 		clear_user(v.iov_base, v.iov_len),
897 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
898 		memset(v.iov_base, 0, v.iov_len)
899 	)
900 
901 	return bytes;
902 }
903 EXPORT_SYMBOL(iov_iter_zero);
904 
905 size_t iov_iter_copy_from_user_atomic(struct page *page,
906 		struct iov_iter *i, unsigned long offset, size_t bytes)
907 {
908 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
909 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
910 		kunmap_atomic(kaddr);
911 		return 0;
912 	}
913 	if (unlikely(iov_iter_is_pipe(i))) {
914 		kunmap_atomic(kaddr);
915 		WARN_ON(1);
916 		return 0;
917 	}
918 	iterate_all_kinds(i, bytes, v,
919 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
920 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
921 				 v.bv_offset, v.bv_len),
922 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
923 	)
924 	kunmap_atomic(kaddr);
925 	return bytes;
926 }
927 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
928 
929 static inline void pipe_truncate(struct iov_iter *i)
930 {
931 	struct pipe_inode_info *pipe = i->pipe;
932 	if (pipe->nrbufs) {
933 		size_t off = i->iov_offset;
934 		int idx = i->idx;
935 		int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
936 		if (off) {
937 			pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
938 			idx = next_idx(idx, pipe);
939 			nrbufs++;
940 		}
941 		while (pipe->nrbufs > nrbufs) {
942 			pipe_buf_release(pipe, &pipe->bufs[idx]);
943 			idx = next_idx(idx, pipe);
944 			pipe->nrbufs--;
945 		}
946 	}
947 }
948 
949 static void pipe_advance(struct iov_iter *i, size_t size)
950 {
951 	struct pipe_inode_info *pipe = i->pipe;
952 	if (unlikely(i->count < size))
953 		size = i->count;
954 	if (size) {
955 		struct pipe_buffer *buf;
956 		size_t off = i->iov_offset, left = size;
957 		int idx = i->idx;
958 		if (off) /* make it relative to the beginning of buffer */
959 			left += off - pipe->bufs[idx].offset;
960 		while (1) {
961 			buf = &pipe->bufs[idx];
962 			if (left <= buf->len)
963 				break;
964 			left -= buf->len;
965 			idx = next_idx(idx, pipe);
966 		}
967 		i->idx = idx;
968 		i->iov_offset = buf->offset + left;
969 	}
970 	i->count -= size;
971 	/* ... and discard everything past that point */
972 	pipe_truncate(i);
973 }
974 
975 void iov_iter_advance(struct iov_iter *i, size_t size)
976 {
977 	if (unlikely(iov_iter_is_pipe(i))) {
978 		pipe_advance(i, size);
979 		return;
980 	}
981 	iterate_and_advance(i, size, v, 0, 0, 0)
982 }
983 EXPORT_SYMBOL(iov_iter_advance);
984 
985 void iov_iter_revert(struct iov_iter *i, size_t unroll)
986 {
987 	if (!unroll)
988 		return;
989 	if (WARN_ON(unroll > MAX_RW_COUNT))
990 		return;
991 	i->count += unroll;
992 	if (unlikely(iov_iter_is_pipe(i))) {
993 		struct pipe_inode_info *pipe = i->pipe;
994 		int idx = i->idx;
995 		size_t off = i->iov_offset;
996 		while (1) {
997 			size_t n = off - pipe->bufs[idx].offset;
998 			if (unroll < n) {
999 				off -= unroll;
1000 				break;
1001 			}
1002 			unroll -= n;
1003 			if (!unroll && idx == i->start_idx) {
1004 				off = 0;
1005 				break;
1006 			}
1007 			if (!idx--)
1008 				idx = pipe->buffers - 1;
1009 			off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1010 		}
1011 		i->iov_offset = off;
1012 		i->idx = idx;
1013 		pipe_truncate(i);
1014 		return;
1015 	}
1016 	if (unroll <= i->iov_offset) {
1017 		i->iov_offset -= unroll;
1018 		return;
1019 	}
1020 	unroll -= i->iov_offset;
1021 	if (iov_iter_is_bvec(i)) {
1022 		const struct bio_vec *bvec = i->bvec;
1023 		while (1) {
1024 			size_t n = (--bvec)->bv_len;
1025 			i->nr_segs++;
1026 			if (unroll <= n) {
1027 				i->bvec = bvec;
1028 				i->iov_offset = n - unroll;
1029 				return;
1030 			}
1031 			unroll -= n;
1032 		}
1033 	} else { /* same logics for iovec and kvec */
1034 		const struct iovec *iov = i->iov;
1035 		while (1) {
1036 			size_t n = (--iov)->iov_len;
1037 			i->nr_segs++;
1038 			if (unroll <= n) {
1039 				i->iov = iov;
1040 				i->iov_offset = n - unroll;
1041 				return;
1042 			}
1043 			unroll -= n;
1044 		}
1045 	}
1046 }
1047 EXPORT_SYMBOL(iov_iter_revert);
1048 
1049 /*
1050  * Return the count of just the current iov_iter segment.
1051  */
1052 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1053 {
1054 	if (unlikely(iov_iter_is_pipe(i)))
1055 		return i->count;	// it is a silly place, anyway
1056 	if (i->nr_segs == 1)
1057 		return i->count;
1058 	else if (iov_iter_is_bvec(i))
1059 		return min(i->count, i->bvec->bv_len - i->iov_offset);
1060 	else
1061 		return min(i->count, i->iov->iov_len - i->iov_offset);
1062 }
1063 EXPORT_SYMBOL(iov_iter_single_seg_count);
1064 
1065 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1066 			const struct kvec *kvec, unsigned long nr_segs,
1067 			size_t count)
1068 {
1069 	WARN_ON(direction & ~(READ | WRITE));
1070 	i->type = ITER_KVEC | (direction & (READ | WRITE));
1071 	i->kvec = kvec;
1072 	i->nr_segs = nr_segs;
1073 	i->iov_offset = 0;
1074 	i->count = count;
1075 }
1076 EXPORT_SYMBOL(iov_iter_kvec);
1077 
1078 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1079 			const struct bio_vec *bvec, unsigned long nr_segs,
1080 			size_t count)
1081 {
1082 	WARN_ON(direction & ~(READ | WRITE));
1083 	i->type = ITER_BVEC | (direction & (READ | WRITE));
1084 	i->bvec = bvec;
1085 	i->nr_segs = nr_segs;
1086 	i->iov_offset = 0;
1087 	i->count = count;
1088 }
1089 EXPORT_SYMBOL(iov_iter_bvec);
1090 
1091 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1092 			struct pipe_inode_info *pipe,
1093 			size_t count)
1094 {
1095 	BUG_ON(direction != READ);
1096 	WARN_ON(pipe->nrbufs == pipe->buffers);
1097 	i->type = ITER_PIPE | READ;
1098 	i->pipe = pipe;
1099 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1100 	i->iov_offset = 0;
1101 	i->count = count;
1102 	i->start_idx = i->idx;
1103 }
1104 EXPORT_SYMBOL(iov_iter_pipe);
1105 
1106 unsigned long iov_iter_alignment(const struct iov_iter *i)
1107 {
1108 	unsigned long res = 0;
1109 	size_t size = i->count;
1110 
1111 	if (unlikely(iov_iter_is_pipe(i))) {
1112 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1113 			return size | i->iov_offset;
1114 		return size;
1115 	}
1116 	iterate_all_kinds(i, size, v,
1117 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
1118 		res |= v.bv_offset | v.bv_len,
1119 		res |= (unsigned long)v.iov_base | v.iov_len
1120 	)
1121 	return res;
1122 }
1123 EXPORT_SYMBOL(iov_iter_alignment);
1124 
1125 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1126 {
1127 	unsigned long res = 0;
1128 	size_t size = i->count;
1129 
1130 	if (unlikely(iov_iter_is_pipe(i))) {
1131 		WARN_ON(1);
1132 		return ~0U;
1133 	}
1134 
1135 	iterate_all_kinds(i, size, v,
1136 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1137 			(size != v.iov_len ? size : 0), 0),
1138 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1139 			(size != v.bv_len ? size : 0)),
1140 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1141 			(size != v.iov_len ? size : 0))
1142 		);
1143 	return res;
1144 }
1145 EXPORT_SYMBOL(iov_iter_gap_alignment);
1146 
1147 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1148 				size_t maxsize,
1149 				struct page **pages,
1150 				int idx,
1151 				size_t *start)
1152 {
1153 	struct pipe_inode_info *pipe = i->pipe;
1154 	ssize_t n = push_pipe(i, maxsize, &idx, start);
1155 	if (!n)
1156 		return -EFAULT;
1157 
1158 	maxsize = n;
1159 	n += *start;
1160 	while (n > 0) {
1161 		get_page(*pages++ = pipe->bufs[idx].page);
1162 		idx = next_idx(idx, pipe);
1163 		n -= PAGE_SIZE;
1164 	}
1165 
1166 	return maxsize;
1167 }
1168 
1169 static ssize_t pipe_get_pages(struct iov_iter *i,
1170 		   struct page **pages, size_t maxsize, unsigned maxpages,
1171 		   size_t *start)
1172 {
1173 	unsigned npages;
1174 	size_t capacity;
1175 	int idx;
1176 
1177 	if (!maxsize)
1178 		return 0;
1179 
1180 	if (!sanity(i))
1181 		return -EFAULT;
1182 
1183 	data_start(i, &idx, start);
1184 	/* some of this one + all after this one */
1185 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1186 	capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1187 
1188 	return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1189 }
1190 
1191 ssize_t iov_iter_get_pages(struct iov_iter *i,
1192 		   struct page **pages, size_t maxsize, unsigned maxpages,
1193 		   size_t *start)
1194 {
1195 	if (maxsize > i->count)
1196 		maxsize = i->count;
1197 
1198 	if (unlikely(iov_iter_is_pipe(i)))
1199 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1200 	iterate_all_kinds(i, maxsize, v, ({
1201 		unsigned long addr = (unsigned long)v.iov_base;
1202 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1203 		int n;
1204 		int res;
1205 
1206 		if (len > maxpages * PAGE_SIZE)
1207 			len = maxpages * PAGE_SIZE;
1208 		addr &= ~(PAGE_SIZE - 1);
1209 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1210 		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
1211 		if (unlikely(res < 0))
1212 			return res;
1213 		return (res == n ? len : res * PAGE_SIZE) - *start;
1214 	0;}),({
1215 		/* can't be more than PAGE_SIZE */
1216 		*start = v.bv_offset;
1217 		get_page(*pages = v.bv_page);
1218 		return v.bv_len;
1219 	}),({
1220 		return -EFAULT;
1221 	})
1222 	)
1223 	return 0;
1224 }
1225 EXPORT_SYMBOL(iov_iter_get_pages);
1226 
1227 static struct page **get_pages_array(size_t n)
1228 {
1229 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1230 }
1231 
1232 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1233 		   struct page ***pages, size_t maxsize,
1234 		   size_t *start)
1235 {
1236 	struct page **p;
1237 	ssize_t n;
1238 	int idx;
1239 	int npages;
1240 
1241 	if (!maxsize)
1242 		return 0;
1243 
1244 	if (!sanity(i))
1245 		return -EFAULT;
1246 
1247 	data_start(i, &idx, start);
1248 	/* some of this one + all after this one */
1249 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1250 	n = npages * PAGE_SIZE - *start;
1251 	if (maxsize > n)
1252 		maxsize = n;
1253 	else
1254 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1255 	p = get_pages_array(npages);
1256 	if (!p)
1257 		return -ENOMEM;
1258 	n = __pipe_get_pages(i, maxsize, p, idx, start);
1259 	if (n > 0)
1260 		*pages = p;
1261 	else
1262 		kvfree(p);
1263 	return n;
1264 }
1265 
1266 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1267 		   struct page ***pages, size_t maxsize,
1268 		   size_t *start)
1269 {
1270 	struct page **p;
1271 
1272 	if (maxsize > i->count)
1273 		maxsize = i->count;
1274 
1275 	if (unlikely(iov_iter_is_pipe(i)))
1276 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1277 	iterate_all_kinds(i, maxsize, v, ({
1278 		unsigned long addr = (unsigned long)v.iov_base;
1279 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1280 		int n;
1281 		int res;
1282 
1283 		addr &= ~(PAGE_SIZE - 1);
1284 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1285 		p = get_pages_array(n);
1286 		if (!p)
1287 			return -ENOMEM;
1288 		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
1289 		if (unlikely(res < 0)) {
1290 			kvfree(p);
1291 			return res;
1292 		}
1293 		*pages = p;
1294 		return (res == n ? len : res * PAGE_SIZE) - *start;
1295 	0;}),({
1296 		/* can't be more than PAGE_SIZE */
1297 		*start = v.bv_offset;
1298 		*pages = p = get_pages_array(1);
1299 		if (!p)
1300 			return -ENOMEM;
1301 		get_page(*p = v.bv_page);
1302 		return v.bv_len;
1303 	}),({
1304 		return -EFAULT;
1305 	})
1306 	)
1307 	return 0;
1308 }
1309 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1310 
1311 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1312 			       struct iov_iter *i)
1313 {
1314 	char *to = addr;
1315 	__wsum sum, next;
1316 	size_t off = 0;
1317 	sum = *csum;
1318 	if (unlikely(iov_iter_is_pipe(i))) {
1319 		WARN_ON(1);
1320 		return 0;
1321 	}
1322 	iterate_and_advance(i, bytes, v, ({
1323 		int err = 0;
1324 		next = csum_and_copy_from_user(v.iov_base,
1325 					       (to += v.iov_len) - v.iov_len,
1326 					       v.iov_len, 0, &err);
1327 		if (!err) {
1328 			sum = csum_block_add(sum, next, off);
1329 			off += v.iov_len;
1330 		}
1331 		err ? v.iov_len : 0;
1332 	}), ({
1333 		char *p = kmap_atomic(v.bv_page);
1334 		next = csum_partial_copy_nocheck(p + v.bv_offset,
1335 						 (to += v.bv_len) - v.bv_len,
1336 						 v.bv_len, 0);
1337 		kunmap_atomic(p);
1338 		sum = csum_block_add(sum, next, off);
1339 		off += v.bv_len;
1340 	}),({
1341 		next = csum_partial_copy_nocheck(v.iov_base,
1342 						 (to += v.iov_len) - v.iov_len,
1343 						 v.iov_len, 0);
1344 		sum = csum_block_add(sum, next, off);
1345 		off += v.iov_len;
1346 	})
1347 	)
1348 	*csum = sum;
1349 	return bytes;
1350 }
1351 EXPORT_SYMBOL(csum_and_copy_from_iter);
1352 
1353 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1354 			       struct iov_iter *i)
1355 {
1356 	char *to = addr;
1357 	__wsum sum, next;
1358 	size_t off = 0;
1359 	sum = *csum;
1360 	if (unlikely(iov_iter_is_pipe(i))) {
1361 		WARN_ON(1);
1362 		return false;
1363 	}
1364 	if (unlikely(i->count < bytes))
1365 		return false;
1366 	iterate_all_kinds(i, bytes, v, ({
1367 		int err = 0;
1368 		next = csum_and_copy_from_user(v.iov_base,
1369 					       (to += v.iov_len) - v.iov_len,
1370 					       v.iov_len, 0, &err);
1371 		if (err)
1372 			return false;
1373 		sum = csum_block_add(sum, next, off);
1374 		off += v.iov_len;
1375 		0;
1376 	}), ({
1377 		char *p = kmap_atomic(v.bv_page);
1378 		next = csum_partial_copy_nocheck(p + v.bv_offset,
1379 						 (to += v.bv_len) - v.bv_len,
1380 						 v.bv_len, 0);
1381 		kunmap_atomic(p);
1382 		sum = csum_block_add(sum, next, off);
1383 		off += v.bv_len;
1384 	}),({
1385 		next = csum_partial_copy_nocheck(v.iov_base,
1386 						 (to += v.iov_len) - v.iov_len,
1387 						 v.iov_len, 0);
1388 		sum = csum_block_add(sum, next, off);
1389 		off += v.iov_len;
1390 	})
1391 	)
1392 	*csum = sum;
1393 	iov_iter_advance(i, bytes);
1394 	return true;
1395 }
1396 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1397 
1398 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
1399 			     struct iov_iter *i)
1400 {
1401 	const char *from = addr;
1402 	__wsum sum, next;
1403 	size_t off = 0;
1404 	sum = *csum;
1405 	if (unlikely(iov_iter_is_pipe(i))) {
1406 		WARN_ON(1);	/* for now */
1407 		return 0;
1408 	}
1409 	iterate_and_advance(i, bytes, v, ({
1410 		int err = 0;
1411 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1412 					     v.iov_base,
1413 					     v.iov_len, 0, &err);
1414 		if (!err) {
1415 			sum = csum_block_add(sum, next, off);
1416 			off += v.iov_len;
1417 		}
1418 		err ? v.iov_len : 0;
1419 	}), ({
1420 		char *p = kmap_atomic(v.bv_page);
1421 		next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
1422 						 p + v.bv_offset,
1423 						 v.bv_len, 0);
1424 		kunmap_atomic(p);
1425 		sum = csum_block_add(sum, next, off);
1426 		off += v.bv_len;
1427 	}),({
1428 		next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
1429 						 v.iov_base,
1430 						 v.iov_len, 0);
1431 		sum = csum_block_add(sum, next, off);
1432 		off += v.iov_len;
1433 	})
1434 	)
1435 	*csum = sum;
1436 	return bytes;
1437 }
1438 EXPORT_SYMBOL(csum_and_copy_to_iter);
1439 
1440 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1441 {
1442 	size_t size = i->count;
1443 	int npages = 0;
1444 
1445 	if (!size)
1446 		return 0;
1447 
1448 	if (unlikely(iov_iter_is_pipe(i))) {
1449 		struct pipe_inode_info *pipe = i->pipe;
1450 		size_t off;
1451 		int idx;
1452 
1453 		if (!sanity(i))
1454 			return 0;
1455 
1456 		data_start(i, &idx, &off);
1457 		/* some of this one + all after this one */
1458 		npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1459 		if (npages >= maxpages)
1460 			return maxpages;
1461 	} else iterate_all_kinds(i, size, v, ({
1462 		unsigned long p = (unsigned long)v.iov_base;
1463 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1464 			- p / PAGE_SIZE;
1465 		if (npages >= maxpages)
1466 			return maxpages;
1467 	0;}),({
1468 		npages++;
1469 		if (npages >= maxpages)
1470 			return maxpages;
1471 	}),({
1472 		unsigned long p = (unsigned long)v.iov_base;
1473 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1474 			- p / PAGE_SIZE;
1475 		if (npages >= maxpages)
1476 			return maxpages;
1477 	})
1478 	)
1479 	return npages;
1480 }
1481 EXPORT_SYMBOL(iov_iter_npages);
1482 
1483 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1484 {
1485 	*new = *old;
1486 	if (unlikely(iov_iter_is_pipe(new))) {
1487 		WARN_ON(1);
1488 		return NULL;
1489 	}
1490 	if (iov_iter_is_bvec(new))
1491 		return new->bvec = kmemdup(new->bvec,
1492 				    new->nr_segs * sizeof(struct bio_vec),
1493 				    flags);
1494 	else
1495 		/* iovec and kvec have identical layout */
1496 		return new->iov = kmemdup(new->iov,
1497 				   new->nr_segs * sizeof(struct iovec),
1498 				   flags);
1499 }
1500 EXPORT_SYMBOL(dup_iter);
1501 
1502 /**
1503  * import_iovec() - Copy an array of &struct iovec from userspace
1504  *     into the kernel, check that it is valid, and initialize a new
1505  *     &struct iov_iter iterator to access it.
1506  *
1507  * @type: One of %READ or %WRITE.
1508  * @uvector: Pointer to the userspace array.
1509  * @nr_segs: Number of elements in userspace array.
1510  * @fast_segs: Number of elements in @iov.
1511  * @iov: (input and output parameter) Pointer to pointer to (usually small
1512  *     on-stack) kernel array.
1513  * @i: Pointer to iterator that will be initialized on success.
1514  *
1515  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1516  * then this function places %NULL in *@iov on return. Otherwise, a new
1517  * array will be allocated and the result placed in *@iov. This means that
1518  * the caller may call kfree() on *@iov regardless of whether the small
1519  * on-stack array was used or not (and regardless of whether this function
1520  * returns an error or not).
1521  *
1522  * Return: 0 on success or negative error code on error.
1523  */
1524 int import_iovec(int type, const struct iovec __user * uvector,
1525 		 unsigned nr_segs, unsigned fast_segs,
1526 		 struct iovec **iov, struct iov_iter *i)
1527 {
1528 	ssize_t n;
1529 	struct iovec *p;
1530 	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1531 				  *iov, &p);
1532 	if (n < 0) {
1533 		if (p != *iov)
1534 			kfree(p);
1535 		*iov = NULL;
1536 		return n;
1537 	}
1538 	iov_iter_init(i, type, p, nr_segs, n);
1539 	*iov = p == *iov ? NULL : p;
1540 	return 0;
1541 }
1542 EXPORT_SYMBOL(import_iovec);
1543 
1544 #ifdef CONFIG_COMPAT
1545 #include <linux/compat.h>
1546 
1547 int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
1548 		 unsigned nr_segs, unsigned fast_segs,
1549 		 struct iovec **iov, struct iov_iter *i)
1550 {
1551 	ssize_t n;
1552 	struct iovec *p;
1553 	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1554 				  *iov, &p);
1555 	if (n < 0) {
1556 		if (p != *iov)
1557 			kfree(p);
1558 		*iov = NULL;
1559 		return n;
1560 	}
1561 	iov_iter_init(i, type, p, nr_segs, n);
1562 	*iov = p == *iov ? NULL : p;
1563 	return 0;
1564 }
1565 #endif
1566 
1567 int import_single_range(int rw, void __user *buf, size_t len,
1568 		 struct iovec *iov, struct iov_iter *i)
1569 {
1570 	if (len > MAX_RW_COUNT)
1571 		len = MAX_RW_COUNT;
1572 	if (unlikely(!access_ok(!rw, buf, len)))
1573 		return -EFAULT;
1574 
1575 	iov->iov_base = buf;
1576 	iov->iov_len = len;
1577 	iov_iter_init(i, rw, iov, 1, len);
1578 	return 0;
1579 }
1580 EXPORT_SYMBOL(import_single_range);
1581 
1582 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1583 			    int (*f)(struct kvec *vec, void *context),
1584 			    void *context)
1585 {
1586 	struct kvec w;
1587 	int err = -EINVAL;
1588 	if (!bytes)
1589 		return 0;
1590 
1591 	iterate_all_kinds(i, bytes, v, -EINVAL, ({
1592 		w.iov_base = kmap(v.bv_page) + v.bv_offset;
1593 		w.iov_len = v.bv_len;
1594 		err = f(&w, context);
1595 		kunmap(v.bv_page);
1596 		err;}), ({
1597 		w = v;
1598 		err = f(&w, context);})
1599 	)
1600 	return err;
1601 }
1602 EXPORT_SYMBOL(iov_iter_for_each_range);
1603