xref: /openbmc/linux/lib/iov_iter.c (revision 5c67aa90)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/highmem.h>
9 #include <linux/slab.h>
10 #include <linux/vmalloc.h>
11 #include <linux/splice.h>
12 #include <linux/compat.h>
13 #include <net/checksum.h>
14 #include <linux/scatterlist.h>
15 #include <linux/instrumented.h>
16 
17 #define PIPE_PARANOIA /* for now */
18 
19 /* covers iovec and kvec alike */
20 #define iterate_iovec(i, n, __v, __p, skip, STEP) {		\
21 	size_t left;						\
22 	size_t wanted = n;					\
23 	do {							\
24 		__v.iov_len = min(n, __p->iov_len - skip);	\
25 		if (likely(__v.iov_len)) {			\
26 			__v.iov_base = __p->iov_base + skip;	\
27 			left = (STEP);				\
28 			__v.iov_len -= left;			\
29 			skip += __v.iov_len;			\
30 			n -= __v.iov_len;			\
31 			if (skip < __p->iov_len)		\
32 				break;				\
33 		}						\
34 		__p++;						\
35 		skip = 0;					\
36 	} while (n);						\
37 	n = wanted - n;						\
38 }
39 
40 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
41 	struct bvec_iter __start;			\
42 	__start.bi_size = n;				\
43 	__start.bi_bvec_done = skip;			\
44 	__start.bi_idx = 0;				\
45 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
46 		(void)(STEP);				\
47 	}						\
48 }
49 
50 #define iterate_xarray(i, n, __v, skip, STEP) {		\
51 	struct page *head = NULL;				\
52 	size_t wanted = n, seg, offset;				\
53 	loff_t start = i->xarray_start + skip;			\
54 	pgoff_t index = start >> PAGE_SHIFT;			\
55 	int j;							\
56 								\
57 	XA_STATE(xas, i->xarray, index);			\
58 								\
59 	rcu_read_lock();						\
60 	xas_for_each(&xas, head, ULONG_MAX) {				\
61 		if (xas_retry(&xas, head))				\
62 			continue;					\
63 		if (WARN_ON(xa_is_value(head)))				\
64 			break;						\
65 		if (WARN_ON(PageHuge(head)))				\
66 			break;						\
67 		for (j = (head->index < index) ? index - head->index : 0; \
68 		     j < thp_nr_pages(head); j++) {			\
69 			__v.bv_page = head + j;				\
70 			offset = (i->xarray_start + skip) & ~PAGE_MASK;	\
71 			seg = PAGE_SIZE - offset;			\
72 			__v.bv_offset = offset;				\
73 			__v.bv_len = min(n, seg);			\
74 			(void)(STEP);					\
75 			n -= __v.bv_len;				\
76 			skip += __v.bv_len;				\
77 			if (n == 0)					\
78 				break;					\
79 		}							\
80 		if (n == 0)						\
81 			break;						\
82 	}							\
83 	rcu_read_unlock();					\
84 	n = wanted - n;						\
85 }
86 
87 #define iterate_and_advance(i, n, v, I, B, K, X) {		\
88 	if (unlikely(i->count < n))				\
89 		n = i->count;					\
90 	if (likely(n)) {					\
91 		size_t skip = i->iov_offset;			\
92 		if (likely(iter_is_iovec(i))) {			\
93 			const struct iovec *iov = i->iov;	\
94 			struct iovec v;				\
95 			iterate_iovec(i, n, v, iov, skip, (I))	\
96 			i->nr_segs -= iov - i->iov;		\
97 			i->iov = iov;				\
98 		} else if (iov_iter_is_bvec(i)) {		\
99 			const struct bio_vec *bvec = i->bvec;	\
100 			struct bio_vec v;			\
101 			struct bvec_iter __bi;			\
102 			iterate_bvec(i, n, v, __bi, skip, (B))	\
103 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
104 			i->nr_segs -= i->bvec - bvec;		\
105 			skip = __bi.bi_bvec_done;		\
106 		} else if (iov_iter_is_kvec(i)) {		\
107 			const struct kvec *kvec = i->kvec;	\
108 			struct kvec v;				\
109 			iterate_iovec(i, n, v, kvec, skip,	\
110 						((void)(K),0))	\
111 			i->nr_segs -= kvec - i->kvec;		\
112 			i->kvec = kvec;				\
113 		} else if (iov_iter_is_xarray(i)) {		\
114 			struct bio_vec v;			\
115 			iterate_xarray(i, n, v, skip, (X))	\
116 		}						\
117 		i->count -= n;					\
118 		i->iov_offset = skip;				\
119 	}							\
120 }
121 
122 static int copyout(void __user *to, const void *from, size_t n)
123 {
124 	if (should_fail_usercopy())
125 		return n;
126 	if (access_ok(to, n)) {
127 		instrument_copy_to_user(to, from, n);
128 		n = raw_copy_to_user(to, from, n);
129 	}
130 	return n;
131 }
132 
133 static int copyin(void *to, const void __user *from, size_t n)
134 {
135 	if (should_fail_usercopy())
136 		return n;
137 	if (access_ok(from, n)) {
138 		instrument_copy_from_user(to, from, n);
139 		n = raw_copy_from_user(to, from, n);
140 	}
141 	return n;
142 }
143 
144 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
145 			 struct iov_iter *i)
146 {
147 	size_t skip, copy, left, wanted;
148 	const struct iovec *iov;
149 	char __user *buf;
150 	void *kaddr, *from;
151 
152 	if (unlikely(bytes > i->count))
153 		bytes = i->count;
154 
155 	if (unlikely(!bytes))
156 		return 0;
157 
158 	might_fault();
159 	wanted = bytes;
160 	iov = i->iov;
161 	skip = i->iov_offset;
162 	buf = iov->iov_base + skip;
163 	copy = min(bytes, iov->iov_len - skip);
164 
165 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
166 		kaddr = kmap_atomic(page);
167 		from = kaddr + offset;
168 
169 		/* first chunk, usually the only one */
170 		left = copyout(buf, from, copy);
171 		copy -= left;
172 		skip += copy;
173 		from += copy;
174 		bytes -= copy;
175 
176 		while (unlikely(!left && bytes)) {
177 			iov++;
178 			buf = iov->iov_base;
179 			copy = min(bytes, iov->iov_len);
180 			left = copyout(buf, from, copy);
181 			copy -= left;
182 			skip = copy;
183 			from += copy;
184 			bytes -= copy;
185 		}
186 		if (likely(!bytes)) {
187 			kunmap_atomic(kaddr);
188 			goto done;
189 		}
190 		offset = from - kaddr;
191 		buf += copy;
192 		kunmap_atomic(kaddr);
193 		copy = min(bytes, iov->iov_len - skip);
194 	}
195 	/* Too bad - revert to non-atomic kmap */
196 
197 	kaddr = kmap(page);
198 	from = kaddr + offset;
199 	left = copyout(buf, from, copy);
200 	copy -= left;
201 	skip += copy;
202 	from += copy;
203 	bytes -= copy;
204 	while (unlikely(!left && bytes)) {
205 		iov++;
206 		buf = iov->iov_base;
207 		copy = min(bytes, iov->iov_len);
208 		left = copyout(buf, from, copy);
209 		copy -= left;
210 		skip = copy;
211 		from += copy;
212 		bytes -= copy;
213 	}
214 	kunmap(page);
215 
216 done:
217 	if (skip == iov->iov_len) {
218 		iov++;
219 		skip = 0;
220 	}
221 	i->count -= wanted - bytes;
222 	i->nr_segs -= iov - i->iov;
223 	i->iov = iov;
224 	i->iov_offset = skip;
225 	return wanted - bytes;
226 }
227 
228 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
229 			 struct iov_iter *i)
230 {
231 	size_t skip, copy, left, wanted;
232 	const struct iovec *iov;
233 	char __user *buf;
234 	void *kaddr, *to;
235 
236 	if (unlikely(bytes > i->count))
237 		bytes = i->count;
238 
239 	if (unlikely(!bytes))
240 		return 0;
241 
242 	might_fault();
243 	wanted = bytes;
244 	iov = i->iov;
245 	skip = i->iov_offset;
246 	buf = iov->iov_base + skip;
247 	copy = min(bytes, iov->iov_len - skip);
248 
249 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
250 		kaddr = kmap_atomic(page);
251 		to = kaddr + offset;
252 
253 		/* first chunk, usually the only one */
254 		left = copyin(to, buf, copy);
255 		copy -= left;
256 		skip += copy;
257 		to += copy;
258 		bytes -= copy;
259 
260 		while (unlikely(!left && bytes)) {
261 			iov++;
262 			buf = iov->iov_base;
263 			copy = min(bytes, iov->iov_len);
264 			left = copyin(to, buf, copy);
265 			copy -= left;
266 			skip = copy;
267 			to += copy;
268 			bytes -= copy;
269 		}
270 		if (likely(!bytes)) {
271 			kunmap_atomic(kaddr);
272 			goto done;
273 		}
274 		offset = to - kaddr;
275 		buf += copy;
276 		kunmap_atomic(kaddr);
277 		copy = min(bytes, iov->iov_len - skip);
278 	}
279 	/* Too bad - revert to non-atomic kmap */
280 
281 	kaddr = kmap(page);
282 	to = kaddr + offset;
283 	left = copyin(to, buf, copy);
284 	copy -= left;
285 	skip += copy;
286 	to += copy;
287 	bytes -= copy;
288 	while (unlikely(!left && bytes)) {
289 		iov++;
290 		buf = iov->iov_base;
291 		copy = min(bytes, iov->iov_len);
292 		left = copyin(to, buf, copy);
293 		copy -= left;
294 		skip = copy;
295 		to += copy;
296 		bytes -= copy;
297 	}
298 	kunmap(page);
299 
300 done:
301 	if (skip == iov->iov_len) {
302 		iov++;
303 		skip = 0;
304 	}
305 	i->count -= wanted - bytes;
306 	i->nr_segs -= iov - i->iov;
307 	i->iov = iov;
308 	i->iov_offset = skip;
309 	return wanted - bytes;
310 }
311 
312 #ifdef PIPE_PARANOIA
313 static bool sanity(const struct iov_iter *i)
314 {
315 	struct pipe_inode_info *pipe = i->pipe;
316 	unsigned int p_head = pipe->head;
317 	unsigned int p_tail = pipe->tail;
318 	unsigned int p_mask = pipe->ring_size - 1;
319 	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
320 	unsigned int i_head = i->head;
321 	unsigned int idx;
322 
323 	if (i->iov_offset) {
324 		struct pipe_buffer *p;
325 		if (unlikely(p_occupancy == 0))
326 			goto Bad;	// pipe must be non-empty
327 		if (unlikely(i_head != p_head - 1))
328 			goto Bad;	// must be at the last buffer...
329 
330 		p = &pipe->bufs[i_head & p_mask];
331 		if (unlikely(p->offset + p->len != i->iov_offset))
332 			goto Bad;	// ... at the end of segment
333 	} else {
334 		if (i_head != p_head)
335 			goto Bad;	// must be right after the last buffer
336 	}
337 	return true;
338 Bad:
339 	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
340 	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
341 			p_head, p_tail, pipe->ring_size);
342 	for (idx = 0; idx < pipe->ring_size; idx++)
343 		printk(KERN_ERR "[%p %p %d %d]\n",
344 			pipe->bufs[idx].ops,
345 			pipe->bufs[idx].page,
346 			pipe->bufs[idx].offset,
347 			pipe->bufs[idx].len);
348 	WARN_ON(1);
349 	return false;
350 }
351 #else
352 #define sanity(i) true
353 #endif
354 
355 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
356 			 struct iov_iter *i)
357 {
358 	struct pipe_inode_info *pipe = i->pipe;
359 	struct pipe_buffer *buf;
360 	unsigned int p_tail = pipe->tail;
361 	unsigned int p_mask = pipe->ring_size - 1;
362 	unsigned int i_head = i->head;
363 	size_t off;
364 
365 	if (unlikely(bytes > i->count))
366 		bytes = i->count;
367 
368 	if (unlikely(!bytes))
369 		return 0;
370 
371 	if (!sanity(i))
372 		return 0;
373 
374 	off = i->iov_offset;
375 	buf = &pipe->bufs[i_head & p_mask];
376 	if (off) {
377 		if (offset == off && buf->page == page) {
378 			/* merge with the last one */
379 			buf->len += bytes;
380 			i->iov_offset += bytes;
381 			goto out;
382 		}
383 		i_head++;
384 		buf = &pipe->bufs[i_head & p_mask];
385 	}
386 	if (pipe_full(i_head, p_tail, pipe->max_usage))
387 		return 0;
388 
389 	buf->ops = &page_cache_pipe_buf_ops;
390 	get_page(page);
391 	buf->page = page;
392 	buf->offset = offset;
393 	buf->len = bytes;
394 
395 	pipe->head = i_head + 1;
396 	i->iov_offset = offset + bytes;
397 	i->head = i_head;
398 out:
399 	i->count -= bytes;
400 	return bytes;
401 }
402 
403 /*
404  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
405  * bytes.  For each iovec, fault in each page that constitutes the iovec.
406  *
407  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
408  * because it is an invalid address).
409  */
410 int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
411 {
412 	if (iter_is_iovec(i)) {
413 		const struct iovec *p;
414 		size_t skip;
415 
416 		if (bytes > i->count)
417 			bytes = i->count;
418 		for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
419 			size_t len = min(bytes, p->iov_len - skip);
420 			int err;
421 
422 			if (unlikely(!len))
423 				continue;
424 			err = fault_in_pages_readable(p->iov_base + skip, len);
425 			if (unlikely(err))
426 				return err;
427 			bytes -= len;
428 		}
429 	}
430 	return 0;
431 }
432 EXPORT_SYMBOL(iov_iter_fault_in_readable);
433 
434 void iov_iter_init(struct iov_iter *i, unsigned int direction,
435 			const struct iovec *iov, unsigned long nr_segs,
436 			size_t count)
437 {
438 	WARN_ON(direction & ~(READ | WRITE));
439 	WARN_ON_ONCE(uaccess_kernel());
440 	*i = (struct iov_iter) {
441 		.iter_type = ITER_IOVEC,
442 		.data_source = direction,
443 		.iov = iov,
444 		.nr_segs = nr_segs,
445 		.iov_offset = 0,
446 		.count = count
447 	};
448 }
449 EXPORT_SYMBOL(iov_iter_init);
450 
451 static inline bool allocated(struct pipe_buffer *buf)
452 {
453 	return buf->ops == &default_pipe_buf_ops;
454 }
455 
456 static inline void data_start(const struct iov_iter *i,
457 			      unsigned int *iter_headp, size_t *offp)
458 {
459 	unsigned int p_mask = i->pipe->ring_size - 1;
460 	unsigned int iter_head = i->head;
461 	size_t off = i->iov_offset;
462 
463 	if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
464 		    off == PAGE_SIZE)) {
465 		iter_head++;
466 		off = 0;
467 	}
468 	*iter_headp = iter_head;
469 	*offp = off;
470 }
471 
472 static size_t push_pipe(struct iov_iter *i, size_t size,
473 			int *iter_headp, size_t *offp)
474 {
475 	struct pipe_inode_info *pipe = i->pipe;
476 	unsigned int p_tail = pipe->tail;
477 	unsigned int p_mask = pipe->ring_size - 1;
478 	unsigned int iter_head;
479 	size_t off;
480 	ssize_t left;
481 
482 	if (unlikely(size > i->count))
483 		size = i->count;
484 	if (unlikely(!size))
485 		return 0;
486 
487 	left = size;
488 	data_start(i, &iter_head, &off);
489 	*iter_headp = iter_head;
490 	*offp = off;
491 	if (off) {
492 		left -= PAGE_SIZE - off;
493 		if (left <= 0) {
494 			pipe->bufs[iter_head & p_mask].len += size;
495 			return size;
496 		}
497 		pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
498 		iter_head++;
499 	}
500 	while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
501 		struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
502 		struct page *page = alloc_page(GFP_USER);
503 		if (!page)
504 			break;
505 
506 		buf->ops = &default_pipe_buf_ops;
507 		buf->page = page;
508 		buf->offset = 0;
509 		buf->len = min_t(ssize_t, left, PAGE_SIZE);
510 		left -= buf->len;
511 		iter_head++;
512 		pipe->head = iter_head;
513 
514 		if (left == 0)
515 			return size;
516 	}
517 	return size - left;
518 }
519 
520 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
521 				struct iov_iter *i)
522 {
523 	struct pipe_inode_info *pipe = i->pipe;
524 	unsigned int p_mask = pipe->ring_size - 1;
525 	unsigned int i_head;
526 	size_t n, off;
527 
528 	if (!sanity(i))
529 		return 0;
530 
531 	bytes = n = push_pipe(i, bytes, &i_head, &off);
532 	if (unlikely(!n))
533 		return 0;
534 	do {
535 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
536 		memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
537 		i->head = i_head;
538 		i->iov_offset = off + chunk;
539 		n -= chunk;
540 		addr += chunk;
541 		off = 0;
542 		i_head++;
543 	} while (n);
544 	i->count -= bytes;
545 	return bytes;
546 }
547 
548 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
549 			      __wsum sum, size_t off)
550 {
551 	__wsum next = csum_partial_copy_nocheck(from, to, len);
552 	return csum_block_add(sum, next, off);
553 }
554 
555 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
556 					 struct csum_state *csstate,
557 					 struct iov_iter *i)
558 {
559 	struct pipe_inode_info *pipe = i->pipe;
560 	unsigned int p_mask = pipe->ring_size - 1;
561 	__wsum sum = csstate->csum;
562 	size_t off = csstate->off;
563 	unsigned int i_head;
564 	size_t n, r;
565 
566 	if (!sanity(i))
567 		return 0;
568 
569 	bytes = n = push_pipe(i, bytes, &i_head, &r);
570 	if (unlikely(!n))
571 		return 0;
572 	do {
573 		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
574 		char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
575 		sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
576 		kunmap_atomic(p);
577 		i->head = i_head;
578 		i->iov_offset = r + chunk;
579 		n -= chunk;
580 		off += chunk;
581 		addr += chunk;
582 		r = 0;
583 		i_head++;
584 	} while (n);
585 	i->count -= bytes;
586 	csstate->csum = sum;
587 	csstate->off = off;
588 	return bytes;
589 }
590 
591 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
592 {
593 	const char *from = addr;
594 	if (unlikely(iov_iter_is_pipe(i)))
595 		return copy_pipe_to_iter(addr, bytes, i);
596 	if (iter_is_iovec(i))
597 		might_fault();
598 	iterate_and_advance(i, bytes, v,
599 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
600 		memcpy_to_page(v.bv_page, v.bv_offset,
601 			       (from += v.bv_len) - v.bv_len, v.bv_len),
602 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
603 		memcpy_to_page(v.bv_page, v.bv_offset,
604 			       (from += v.bv_len) - v.bv_len, v.bv_len)
605 	)
606 
607 	return bytes;
608 }
609 EXPORT_SYMBOL(_copy_to_iter);
610 
611 #ifdef CONFIG_ARCH_HAS_COPY_MC
612 static int copyout_mc(void __user *to, const void *from, size_t n)
613 {
614 	if (access_ok(to, n)) {
615 		instrument_copy_to_user(to, from, n);
616 		n = copy_mc_to_user((__force void *) to, from, n);
617 	}
618 	return n;
619 }
620 
621 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
622 		const char *from, size_t len)
623 {
624 	unsigned long ret;
625 	char *to;
626 
627 	to = kmap_atomic(page);
628 	ret = copy_mc_to_kernel(to + offset, from, len);
629 	kunmap_atomic(to);
630 
631 	return ret;
632 }
633 
634 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
635 				struct iov_iter *i)
636 {
637 	struct pipe_inode_info *pipe = i->pipe;
638 	unsigned int p_mask = pipe->ring_size - 1;
639 	unsigned int i_head;
640 	size_t n, off, xfer = 0;
641 
642 	if (!sanity(i))
643 		return 0;
644 
645 	bytes = n = push_pipe(i, bytes, &i_head, &off);
646 	if (unlikely(!n))
647 		return 0;
648 	do {
649 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
650 		unsigned long rem;
651 
652 		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
653 					    off, addr, chunk);
654 		i->head = i_head;
655 		i->iov_offset = off + chunk - rem;
656 		xfer += chunk - rem;
657 		if (rem)
658 			break;
659 		n -= chunk;
660 		addr += chunk;
661 		off = 0;
662 		i_head++;
663 	} while (n);
664 	i->count -= xfer;
665 	return xfer;
666 }
667 
668 /**
669  * _copy_mc_to_iter - copy to iter with source memory error exception handling
670  * @addr: source kernel address
671  * @bytes: total transfer length
672  * @iter: destination iterator
673  *
674  * The pmem driver deploys this for the dax operation
675  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
676  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
677  * successfully copied.
678  *
679  * The main differences between this and typical _copy_to_iter().
680  *
681  * * Typical tail/residue handling after a fault retries the copy
682  *   byte-by-byte until the fault happens again. Re-triggering machine
683  *   checks is potentially fatal so the implementation uses source
684  *   alignment and poison alignment assumptions to avoid re-triggering
685  *   hardware exceptions.
686  *
687  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
688  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
689  *   a short copy.
690  */
691 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
692 {
693 	const char *from = addr;
694 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
695 
696 	if (unlikely(iov_iter_is_pipe(i)))
697 		return copy_mc_pipe_to_iter(addr, bytes, i);
698 	if (iter_is_iovec(i))
699 		might_fault();
700 	iterate_and_advance(i, bytes, v,
701 		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
702 			   v.iov_len),
703 		({
704 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
705 				      (from += v.bv_len) - v.bv_len, v.bv_len);
706 		if (rem) {
707 			curr_addr = (unsigned long) from;
708 			bytes = curr_addr - s_addr - rem;
709 			return bytes;
710 		}
711 		}),
712 		({
713 		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
714 					- v.iov_len, v.iov_len);
715 		if (rem) {
716 			curr_addr = (unsigned long) from;
717 			bytes = curr_addr - s_addr - rem;
718 			return bytes;
719 		}
720 		}),
721 		({
722 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
723 				      (from += v.bv_len) - v.bv_len, v.bv_len);
724 		if (rem) {
725 			curr_addr = (unsigned long) from;
726 			bytes = curr_addr - s_addr - rem;
727 			rcu_read_unlock();
728 			i->iov_offset += bytes;
729 			i->count -= bytes;
730 			return bytes;
731 		}
732 		})
733 	)
734 
735 	return bytes;
736 }
737 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
738 #endif /* CONFIG_ARCH_HAS_COPY_MC */
739 
740 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
741 {
742 	char *to = addr;
743 	if (unlikely(iov_iter_is_pipe(i))) {
744 		WARN_ON(1);
745 		return 0;
746 	}
747 	if (iter_is_iovec(i))
748 		might_fault();
749 	iterate_and_advance(i, bytes, v,
750 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
751 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
752 				 v.bv_offset, v.bv_len),
753 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
754 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
755 				 v.bv_offset, v.bv_len)
756 	)
757 
758 	return bytes;
759 }
760 EXPORT_SYMBOL(_copy_from_iter);
761 
762 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
763 {
764 	char *to = addr;
765 	if (unlikely(iov_iter_is_pipe(i))) {
766 		WARN_ON(1);
767 		return 0;
768 	}
769 	iterate_and_advance(i, bytes, v,
770 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
771 					 v.iov_base, v.iov_len),
772 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
773 				 v.bv_offset, v.bv_len),
774 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
775 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
776 				 v.bv_offset, v.bv_len)
777 	)
778 
779 	return bytes;
780 }
781 EXPORT_SYMBOL(_copy_from_iter_nocache);
782 
783 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
784 /**
785  * _copy_from_iter_flushcache - write destination through cpu cache
786  * @addr: destination kernel address
787  * @bytes: total transfer length
788  * @iter: source iterator
789  *
790  * The pmem driver arranges for filesystem-dax to use this facility via
791  * dax_copy_from_iter() for ensuring that writes to persistent memory
792  * are flushed through the CPU cache. It is differentiated from
793  * _copy_from_iter_nocache() in that guarantees all data is flushed for
794  * all iterator types. The _copy_from_iter_nocache() only attempts to
795  * bypass the cache for the ITER_IOVEC case, and on some archs may use
796  * instructions that strand dirty-data in the cache.
797  */
798 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
799 {
800 	char *to = addr;
801 	if (unlikely(iov_iter_is_pipe(i))) {
802 		WARN_ON(1);
803 		return 0;
804 	}
805 	iterate_and_advance(i, bytes, v,
806 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
807 					 v.iov_base, v.iov_len),
808 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
809 				 v.bv_offset, v.bv_len),
810 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
811 			v.iov_len),
812 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
813 				 v.bv_offset, v.bv_len)
814 	)
815 
816 	return bytes;
817 }
818 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
819 #endif
820 
821 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
822 {
823 	struct page *head;
824 	size_t v = n + offset;
825 
826 	/*
827 	 * The general case needs to access the page order in order
828 	 * to compute the page size.
829 	 * However, we mostly deal with order-0 pages and thus can
830 	 * avoid a possible cache line miss for requests that fit all
831 	 * page orders.
832 	 */
833 	if (n <= v && v <= PAGE_SIZE)
834 		return true;
835 
836 	head = compound_head(page);
837 	v += (page - head) << PAGE_SHIFT;
838 
839 	if (likely(n <= v && v <= (page_size(head))))
840 		return true;
841 	WARN_ON(1);
842 	return false;
843 }
844 
845 static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
846 			 struct iov_iter *i)
847 {
848 	if (likely(iter_is_iovec(i)))
849 		return copy_page_to_iter_iovec(page, offset, bytes, i);
850 	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
851 		void *kaddr = kmap_atomic(page);
852 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
853 		kunmap_atomic(kaddr);
854 		return wanted;
855 	}
856 	if (iov_iter_is_pipe(i))
857 		return copy_page_to_iter_pipe(page, offset, bytes, i);
858 	if (unlikely(iov_iter_is_discard(i))) {
859 		if (unlikely(i->count < bytes))
860 			bytes = i->count;
861 		i->count -= bytes;
862 		return bytes;
863 	}
864 	WARN_ON(1);
865 	return 0;
866 }
867 
868 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
869 			 struct iov_iter *i)
870 {
871 	size_t res = 0;
872 	if (unlikely(!page_copy_sane(page, offset, bytes)))
873 		return 0;
874 	page += offset / PAGE_SIZE; // first subpage
875 	offset %= PAGE_SIZE;
876 	while (1) {
877 		size_t n = __copy_page_to_iter(page, offset,
878 				min(bytes, (size_t)PAGE_SIZE - offset), i);
879 		res += n;
880 		bytes -= n;
881 		if (!bytes || !n)
882 			break;
883 		offset += n;
884 		if (offset == PAGE_SIZE) {
885 			page++;
886 			offset = 0;
887 		}
888 	}
889 	return res;
890 }
891 EXPORT_SYMBOL(copy_page_to_iter);
892 
893 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
894 			 struct iov_iter *i)
895 {
896 	if (unlikely(!page_copy_sane(page, offset, bytes)))
897 		return 0;
898 	if (likely(iter_is_iovec(i)))
899 		return copy_page_from_iter_iovec(page, offset, bytes, i);
900 	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
901 		void *kaddr = kmap_atomic(page);
902 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
903 		kunmap_atomic(kaddr);
904 		return wanted;
905 	}
906 	WARN_ON(1);
907 	return 0;
908 }
909 EXPORT_SYMBOL(copy_page_from_iter);
910 
911 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
912 {
913 	struct pipe_inode_info *pipe = i->pipe;
914 	unsigned int p_mask = pipe->ring_size - 1;
915 	unsigned int i_head;
916 	size_t n, off;
917 
918 	if (!sanity(i))
919 		return 0;
920 
921 	bytes = n = push_pipe(i, bytes, &i_head, &off);
922 	if (unlikely(!n))
923 		return 0;
924 
925 	do {
926 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
927 		memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
928 		i->head = i_head;
929 		i->iov_offset = off + chunk;
930 		n -= chunk;
931 		off = 0;
932 		i_head++;
933 	} while (n);
934 	i->count -= bytes;
935 	return bytes;
936 }
937 
938 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
939 {
940 	if (unlikely(iov_iter_is_pipe(i)))
941 		return pipe_zero(bytes, i);
942 	iterate_and_advance(i, bytes, v,
943 		clear_user(v.iov_base, v.iov_len),
944 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
945 		memset(v.iov_base, 0, v.iov_len),
946 		memzero_page(v.bv_page, v.bv_offset, v.bv_len)
947 	)
948 
949 	return bytes;
950 }
951 EXPORT_SYMBOL(iov_iter_zero);
952 
953 size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
954 				  struct iov_iter *i)
955 {
956 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
957 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
958 		kunmap_atomic(kaddr);
959 		return 0;
960 	}
961 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
962 		kunmap_atomic(kaddr);
963 		WARN_ON(1);
964 		return 0;
965 	}
966 	iterate_and_advance(i, bytes, v,
967 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
968 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
969 				 v.bv_offset, v.bv_len),
970 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
971 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
972 				 v.bv_offset, v.bv_len)
973 	)
974 	kunmap_atomic(kaddr);
975 	return bytes;
976 }
977 EXPORT_SYMBOL(copy_page_from_iter_atomic);
978 
979 static inline void pipe_truncate(struct iov_iter *i)
980 {
981 	struct pipe_inode_info *pipe = i->pipe;
982 	unsigned int p_tail = pipe->tail;
983 	unsigned int p_head = pipe->head;
984 	unsigned int p_mask = pipe->ring_size - 1;
985 
986 	if (!pipe_empty(p_head, p_tail)) {
987 		struct pipe_buffer *buf;
988 		unsigned int i_head = i->head;
989 		size_t off = i->iov_offset;
990 
991 		if (off) {
992 			buf = &pipe->bufs[i_head & p_mask];
993 			buf->len = off - buf->offset;
994 			i_head++;
995 		}
996 		while (p_head != i_head) {
997 			p_head--;
998 			pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
999 		}
1000 
1001 		pipe->head = p_head;
1002 	}
1003 }
1004 
1005 static void pipe_advance(struct iov_iter *i, size_t size)
1006 {
1007 	struct pipe_inode_info *pipe = i->pipe;
1008 	if (size) {
1009 		struct pipe_buffer *buf;
1010 		unsigned int p_mask = pipe->ring_size - 1;
1011 		unsigned int i_head = i->head;
1012 		size_t off = i->iov_offset, left = size;
1013 
1014 		if (off) /* make it relative to the beginning of buffer */
1015 			left += off - pipe->bufs[i_head & p_mask].offset;
1016 		while (1) {
1017 			buf = &pipe->bufs[i_head & p_mask];
1018 			if (left <= buf->len)
1019 				break;
1020 			left -= buf->len;
1021 			i_head++;
1022 		}
1023 		i->head = i_head;
1024 		i->iov_offset = buf->offset + left;
1025 	}
1026 	i->count -= size;
1027 	/* ... and discard everything past that point */
1028 	pipe_truncate(i);
1029 }
1030 
1031 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1032 {
1033 	struct bvec_iter bi;
1034 
1035 	bi.bi_size = i->count;
1036 	bi.bi_bvec_done = i->iov_offset;
1037 	bi.bi_idx = 0;
1038 	bvec_iter_advance(i->bvec, &bi, size);
1039 
1040 	i->bvec += bi.bi_idx;
1041 	i->nr_segs -= bi.bi_idx;
1042 	i->count = bi.bi_size;
1043 	i->iov_offset = bi.bi_bvec_done;
1044 }
1045 
1046 static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
1047 {
1048 	const struct iovec *iov, *end;
1049 
1050 	if (!i->count)
1051 		return;
1052 	i->count -= size;
1053 
1054 	size += i->iov_offset; // from beginning of current segment
1055 	for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
1056 		if (likely(size < iov->iov_len))
1057 			break;
1058 		size -= iov->iov_len;
1059 	}
1060 	i->iov_offset = size;
1061 	i->nr_segs -= iov - i->iov;
1062 	i->iov = iov;
1063 }
1064 
1065 void iov_iter_advance(struct iov_iter *i, size_t size)
1066 {
1067 	if (unlikely(i->count < size))
1068 		size = i->count;
1069 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
1070 		/* iovec and kvec have identical layouts */
1071 		iov_iter_iovec_advance(i, size);
1072 	} else if (iov_iter_is_bvec(i)) {
1073 		iov_iter_bvec_advance(i, size);
1074 	} else if (iov_iter_is_pipe(i)) {
1075 		pipe_advance(i, size);
1076 	} else if (unlikely(iov_iter_is_xarray(i))) {
1077 		i->iov_offset += size;
1078 		i->count -= size;
1079 	} else if (iov_iter_is_discard(i)) {
1080 		i->count -= size;
1081 	}
1082 }
1083 EXPORT_SYMBOL(iov_iter_advance);
1084 
1085 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1086 {
1087 	if (!unroll)
1088 		return;
1089 	if (WARN_ON(unroll > MAX_RW_COUNT))
1090 		return;
1091 	i->count += unroll;
1092 	if (unlikely(iov_iter_is_pipe(i))) {
1093 		struct pipe_inode_info *pipe = i->pipe;
1094 		unsigned int p_mask = pipe->ring_size - 1;
1095 		unsigned int i_head = i->head;
1096 		size_t off = i->iov_offset;
1097 		while (1) {
1098 			struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1099 			size_t n = off - b->offset;
1100 			if (unroll < n) {
1101 				off -= unroll;
1102 				break;
1103 			}
1104 			unroll -= n;
1105 			if (!unroll && i_head == i->start_head) {
1106 				off = 0;
1107 				break;
1108 			}
1109 			i_head--;
1110 			b = &pipe->bufs[i_head & p_mask];
1111 			off = b->offset + b->len;
1112 		}
1113 		i->iov_offset = off;
1114 		i->head = i_head;
1115 		pipe_truncate(i);
1116 		return;
1117 	}
1118 	if (unlikely(iov_iter_is_discard(i)))
1119 		return;
1120 	if (unroll <= i->iov_offset) {
1121 		i->iov_offset -= unroll;
1122 		return;
1123 	}
1124 	unroll -= i->iov_offset;
1125 	if (iov_iter_is_xarray(i)) {
1126 		BUG(); /* We should never go beyond the start of the specified
1127 			* range since we might then be straying into pages that
1128 			* aren't pinned.
1129 			*/
1130 	} else if (iov_iter_is_bvec(i)) {
1131 		const struct bio_vec *bvec = i->bvec;
1132 		while (1) {
1133 			size_t n = (--bvec)->bv_len;
1134 			i->nr_segs++;
1135 			if (unroll <= n) {
1136 				i->bvec = bvec;
1137 				i->iov_offset = n - unroll;
1138 				return;
1139 			}
1140 			unroll -= n;
1141 		}
1142 	} else { /* same logics for iovec and kvec */
1143 		const struct iovec *iov = i->iov;
1144 		while (1) {
1145 			size_t n = (--iov)->iov_len;
1146 			i->nr_segs++;
1147 			if (unroll <= n) {
1148 				i->iov = iov;
1149 				i->iov_offset = n - unroll;
1150 				return;
1151 			}
1152 			unroll -= n;
1153 		}
1154 	}
1155 }
1156 EXPORT_SYMBOL(iov_iter_revert);
1157 
1158 /*
1159  * Return the count of just the current iov_iter segment.
1160  */
1161 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1162 {
1163 	if (i->nr_segs > 1) {
1164 		if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1165 			return min(i->count, i->iov->iov_len - i->iov_offset);
1166 		if (iov_iter_is_bvec(i))
1167 			return min(i->count, i->bvec->bv_len - i->iov_offset);
1168 	}
1169 	return i->count;
1170 }
1171 EXPORT_SYMBOL(iov_iter_single_seg_count);
1172 
1173 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1174 			const struct kvec *kvec, unsigned long nr_segs,
1175 			size_t count)
1176 {
1177 	WARN_ON(direction & ~(READ | WRITE));
1178 	*i = (struct iov_iter){
1179 		.iter_type = ITER_KVEC,
1180 		.data_source = direction,
1181 		.kvec = kvec,
1182 		.nr_segs = nr_segs,
1183 		.iov_offset = 0,
1184 		.count = count
1185 	};
1186 }
1187 EXPORT_SYMBOL(iov_iter_kvec);
1188 
1189 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1190 			const struct bio_vec *bvec, unsigned long nr_segs,
1191 			size_t count)
1192 {
1193 	WARN_ON(direction & ~(READ | WRITE));
1194 	*i = (struct iov_iter){
1195 		.iter_type = ITER_BVEC,
1196 		.data_source = direction,
1197 		.bvec = bvec,
1198 		.nr_segs = nr_segs,
1199 		.iov_offset = 0,
1200 		.count = count
1201 	};
1202 }
1203 EXPORT_SYMBOL(iov_iter_bvec);
1204 
1205 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1206 			struct pipe_inode_info *pipe,
1207 			size_t count)
1208 {
1209 	BUG_ON(direction != READ);
1210 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1211 	*i = (struct iov_iter){
1212 		.iter_type = ITER_PIPE,
1213 		.data_source = false,
1214 		.pipe = pipe,
1215 		.head = pipe->head,
1216 		.start_head = pipe->head,
1217 		.iov_offset = 0,
1218 		.count = count
1219 	};
1220 }
1221 EXPORT_SYMBOL(iov_iter_pipe);
1222 
1223 /**
1224  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1225  * @i: The iterator to initialise.
1226  * @direction: The direction of the transfer.
1227  * @xarray: The xarray to access.
1228  * @start: The start file position.
1229  * @count: The size of the I/O buffer in bytes.
1230  *
1231  * Set up an I/O iterator to either draw data out of the pages attached to an
1232  * inode or to inject data into those pages.  The pages *must* be prevented
1233  * from evaporation, either by taking a ref on them or locking them by the
1234  * caller.
1235  */
1236 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1237 		     struct xarray *xarray, loff_t start, size_t count)
1238 {
1239 	BUG_ON(direction & ~1);
1240 	*i = (struct iov_iter) {
1241 		.iter_type = ITER_XARRAY,
1242 		.data_source = direction,
1243 		.xarray = xarray,
1244 		.xarray_start = start,
1245 		.count = count,
1246 		.iov_offset = 0
1247 	};
1248 }
1249 EXPORT_SYMBOL(iov_iter_xarray);
1250 
1251 /**
1252  * iov_iter_discard - Initialise an I/O iterator that discards data
1253  * @i: The iterator to initialise.
1254  * @direction: The direction of the transfer.
1255  * @count: The size of the I/O buffer in bytes.
1256  *
1257  * Set up an I/O iterator that just discards everything that's written to it.
1258  * It's only available as a READ iterator.
1259  */
1260 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1261 {
1262 	BUG_ON(direction != READ);
1263 	*i = (struct iov_iter){
1264 		.iter_type = ITER_DISCARD,
1265 		.data_source = false,
1266 		.count = count,
1267 		.iov_offset = 0
1268 	};
1269 }
1270 EXPORT_SYMBOL(iov_iter_discard);
1271 
1272 static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
1273 {
1274 	unsigned long res = 0;
1275 	size_t size = i->count;
1276 	size_t skip = i->iov_offset;
1277 	unsigned k;
1278 
1279 	for (k = 0; k < i->nr_segs; k++, skip = 0) {
1280 		size_t len = i->iov[k].iov_len - skip;
1281 		if (len) {
1282 			res |= (unsigned long)i->iov[k].iov_base + skip;
1283 			if (len > size)
1284 				len = size;
1285 			res |= len;
1286 			size -= len;
1287 			if (!size)
1288 				break;
1289 		}
1290 	}
1291 	return res;
1292 }
1293 
1294 static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
1295 {
1296 	unsigned res = 0;
1297 	size_t size = i->count;
1298 	unsigned skip = i->iov_offset;
1299 	unsigned k;
1300 
1301 	for (k = 0; k < i->nr_segs; k++, skip = 0) {
1302 		size_t len = i->bvec[k].bv_len - skip;
1303 		res |= (unsigned long)i->bvec[k].bv_offset + skip;
1304 		if (len > size)
1305 			len = size;
1306 		res |= len;
1307 		size -= len;
1308 		if (!size)
1309 			break;
1310 	}
1311 	return res;
1312 }
1313 
1314 unsigned long iov_iter_alignment(const struct iov_iter *i)
1315 {
1316 	/* iovec and kvec have identical layouts */
1317 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1318 		return iov_iter_alignment_iovec(i);
1319 
1320 	if (iov_iter_is_bvec(i))
1321 		return iov_iter_alignment_bvec(i);
1322 
1323 	if (iov_iter_is_pipe(i)) {
1324 		unsigned int p_mask = i->pipe->ring_size - 1;
1325 		size_t size = i->count;
1326 
1327 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1328 			return size | i->iov_offset;
1329 		return size;
1330 	}
1331 
1332 	if (iov_iter_is_xarray(i))
1333 		return (i->xarray_start + i->iov_offset) | i->count;
1334 
1335 	return 0;
1336 }
1337 EXPORT_SYMBOL(iov_iter_alignment);
1338 
1339 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1340 {
1341 	unsigned long res = 0;
1342 	unsigned long v = 0;
1343 	size_t size = i->count;
1344 	unsigned k;
1345 
1346 	if (WARN_ON(!iter_is_iovec(i)))
1347 		return ~0U;
1348 
1349 	for (k = 0; k < i->nr_segs; k++) {
1350 		if (i->iov[k].iov_len) {
1351 			unsigned long base = (unsigned long)i->iov[k].iov_base;
1352 			if (v) // if not the first one
1353 				res |= base | v; // this start | previous end
1354 			v = base + i->iov[k].iov_len;
1355 			if (size <= i->iov[k].iov_len)
1356 				break;
1357 			size -= i->iov[k].iov_len;
1358 		}
1359 	}
1360 	return res;
1361 }
1362 EXPORT_SYMBOL(iov_iter_gap_alignment);
1363 
1364 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1365 				size_t maxsize,
1366 				struct page **pages,
1367 				int iter_head,
1368 				size_t *start)
1369 {
1370 	struct pipe_inode_info *pipe = i->pipe;
1371 	unsigned int p_mask = pipe->ring_size - 1;
1372 	ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1373 	if (!n)
1374 		return -EFAULT;
1375 
1376 	maxsize = n;
1377 	n += *start;
1378 	while (n > 0) {
1379 		get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1380 		iter_head++;
1381 		n -= PAGE_SIZE;
1382 	}
1383 
1384 	return maxsize;
1385 }
1386 
1387 static ssize_t pipe_get_pages(struct iov_iter *i,
1388 		   struct page **pages, size_t maxsize, unsigned maxpages,
1389 		   size_t *start)
1390 {
1391 	unsigned int iter_head, npages;
1392 	size_t capacity;
1393 
1394 	if (!sanity(i))
1395 		return -EFAULT;
1396 
1397 	data_start(i, &iter_head, start);
1398 	/* Amount of free space: some of this one + all after this one */
1399 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1400 	capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1401 
1402 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1403 }
1404 
1405 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1406 					  pgoff_t index, unsigned int nr_pages)
1407 {
1408 	XA_STATE(xas, xa, index);
1409 	struct page *page;
1410 	unsigned int ret = 0;
1411 
1412 	rcu_read_lock();
1413 	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1414 		if (xas_retry(&xas, page))
1415 			continue;
1416 
1417 		/* Has the page moved or been split? */
1418 		if (unlikely(page != xas_reload(&xas))) {
1419 			xas_reset(&xas);
1420 			continue;
1421 		}
1422 
1423 		pages[ret] = find_subpage(page, xas.xa_index);
1424 		get_page(pages[ret]);
1425 		if (++ret == nr_pages)
1426 			break;
1427 	}
1428 	rcu_read_unlock();
1429 	return ret;
1430 }
1431 
1432 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1433 				     struct page **pages, size_t maxsize,
1434 				     unsigned maxpages, size_t *_start_offset)
1435 {
1436 	unsigned nr, offset;
1437 	pgoff_t index, count;
1438 	size_t size = maxsize, actual;
1439 	loff_t pos;
1440 
1441 	if (!size || !maxpages)
1442 		return 0;
1443 
1444 	pos = i->xarray_start + i->iov_offset;
1445 	index = pos >> PAGE_SHIFT;
1446 	offset = pos & ~PAGE_MASK;
1447 	*_start_offset = offset;
1448 
1449 	count = 1;
1450 	if (size > PAGE_SIZE - offset) {
1451 		size -= PAGE_SIZE - offset;
1452 		count += size >> PAGE_SHIFT;
1453 		size &= ~PAGE_MASK;
1454 		if (size)
1455 			count++;
1456 	}
1457 
1458 	if (count > maxpages)
1459 		count = maxpages;
1460 
1461 	nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1462 	if (nr == 0)
1463 		return 0;
1464 
1465 	actual = PAGE_SIZE * nr;
1466 	actual -= offset;
1467 	if (nr == count && size > 0) {
1468 		unsigned last_offset = (nr > 1) ? 0 : offset;
1469 		actual -= PAGE_SIZE - (last_offset + size);
1470 	}
1471 	return actual;
1472 }
1473 
1474 /* must be done on non-empty ITER_IOVEC one */
1475 static unsigned long first_iovec_segment(const struct iov_iter *i,
1476 					 size_t *size, size_t *start,
1477 					 size_t maxsize, unsigned maxpages)
1478 {
1479 	size_t skip;
1480 	long k;
1481 
1482 	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
1483 		unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
1484 		size_t len = i->iov[k].iov_len - skip;
1485 
1486 		if (unlikely(!len))
1487 			continue;
1488 		if (len > maxsize)
1489 			len = maxsize;
1490 		len += (*start = addr % PAGE_SIZE);
1491 		if (len > maxpages * PAGE_SIZE)
1492 			len = maxpages * PAGE_SIZE;
1493 		*size = len;
1494 		return addr & PAGE_MASK;
1495 	}
1496 	BUG(); // if it had been empty, we wouldn't get called
1497 }
1498 
1499 /* must be done on non-empty ITER_BVEC one */
1500 static struct page *first_bvec_segment(const struct iov_iter *i,
1501 				       size_t *size, size_t *start,
1502 				       size_t maxsize, unsigned maxpages)
1503 {
1504 	struct page *page;
1505 	size_t skip = i->iov_offset, len;
1506 
1507 	len = i->bvec->bv_len - skip;
1508 	if (len > maxsize)
1509 		len = maxsize;
1510 	skip += i->bvec->bv_offset;
1511 	page = i->bvec->bv_page + skip / PAGE_SIZE;
1512 	len += (*start = skip % PAGE_SIZE);
1513 	if (len > maxpages * PAGE_SIZE)
1514 		len = maxpages * PAGE_SIZE;
1515 	*size = len;
1516 	return page;
1517 }
1518 
1519 ssize_t iov_iter_get_pages(struct iov_iter *i,
1520 		   struct page **pages, size_t maxsize, unsigned maxpages,
1521 		   size_t *start)
1522 {
1523 	size_t len;
1524 	int n, res;
1525 
1526 	if (maxsize > i->count)
1527 		maxsize = i->count;
1528 	if (!maxsize)
1529 		return 0;
1530 
1531 	if (likely(iter_is_iovec(i))) {
1532 		unsigned long addr;
1533 
1534 		addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
1535 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1536 		res = get_user_pages_fast(addr, n,
1537 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1538 				pages);
1539 		if (unlikely(res < 0))
1540 			return res;
1541 		return (res == n ? len : res * PAGE_SIZE) - *start;
1542 	}
1543 	if (iov_iter_is_bvec(i)) {
1544 		struct page *page;
1545 
1546 		page = first_bvec_segment(i, &len, start, maxsize, maxpages);
1547 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1548 		while (n--)
1549 			get_page(*pages++ = page++);
1550 		return len - *start;
1551 	}
1552 	if (iov_iter_is_pipe(i))
1553 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1554 	if (iov_iter_is_xarray(i))
1555 		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1556 	return -EFAULT;
1557 }
1558 EXPORT_SYMBOL(iov_iter_get_pages);
1559 
1560 static struct page **get_pages_array(size_t n)
1561 {
1562 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1563 }
1564 
1565 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1566 		   struct page ***pages, size_t maxsize,
1567 		   size_t *start)
1568 {
1569 	struct page **p;
1570 	unsigned int iter_head, npages;
1571 	ssize_t n;
1572 
1573 	if (!sanity(i))
1574 		return -EFAULT;
1575 
1576 	data_start(i, &iter_head, start);
1577 	/* Amount of free space: some of this one + all after this one */
1578 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1579 	n = npages * PAGE_SIZE - *start;
1580 	if (maxsize > n)
1581 		maxsize = n;
1582 	else
1583 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1584 	p = get_pages_array(npages);
1585 	if (!p)
1586 		return -ENOMEM;
1587 	n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1588 	if (n > 0)
1589 		*pages = p;
1590 	else
1591 		kvfree(p);
1592 	return n;
1593 }
1594 
1595 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1596 					   struct page ***pages, size_t maxsize,
1597 					   size_t *_start_offset)
1598 {
1599 	struct page **p;
1600 	unsigned nr, offset;
1601 	pgoff_t index, count;
1602 	size_t size = maxsize, actual;
1603 	loff_t pos;
1604 
1605 	if (!size)
1606 		return 0;
1607 
1608 	pos = i->xarray_start + i->iov_offset;
1609 	index = pos >> PAGE_SHIFT;
1610 	offset = pos & ~PAGE_MASK;
1611 	*_start_offset = offset;
1612 
1613 	count = 1;
1614 	if (size > PAGE_SIZE - offset) {
1615 		size -= PAGE_SIZE - offset;
1616 		count += size >> PAGE_SHIFT;
1617 		size &= ~PAGE_MASK;
1618 		if (size)
1619 			count++;
1620 	}
1621 
1622 	p = get_pages_array(count);
1623 	if (!p)
1624 		return -ENOMEM;
1625 	*pages = p;
1626 
1627 	nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1628 	if (nr == 0)
1629 		return 0;
1630 
1631 	actual = PAGE_SIZE * nr;
1632 	actual -= offset;
1633 	if (nr == count && size > 0) {
1634 		unsigned last_offset = (nr > 1) ? 0 : offset;
1635 		actual -= PAGE_SIZE - (last_offset + size);
1636 	}
1637 	return actual;
1638 }
1639 
1640 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1641 		   struct page ***pages, size_t maxsize,
1642 		   size_t *start)
1643 {
1644 	struct page **p;
1645 	size_t len;
1646 	int n, res;
1647 
1648 	if (maxsize > i->count)
1649 		maxsize = i->count;
1650 	if (!maxsize)
1651 		return 0;
1652 
1653 	if (likely(iter_is_iovec(i))) {
1654 		unsigned long addr;
1655 
1656 		addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
1657 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1658 		p = get_pages_array(n);
1659 		if (!p)
1660 			return -ENOMEM;
1661 		res = get_user_pages_fast(addr, n,
1662 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1663 		if (unlikely(res < 0)) {
1664 			kvfree(p);
1665 			return res;
1666 		}
1667 		*pages = p;
1668 		return (res == n ? len : res * PAGE_SIZE) - *start;
1669 	}
1670 	if (iov_iter_is_bvec(i)) {
1671 		struct page *page;
1672 
1673 		page = first_bvec_segment(i, &len, start, maxsize, ~0U);
1674 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1675 		*pages = p = get_pages_array(n);
1676 		if (!p)
1677 			return -ENOMEM;
1678 		while (n--)
1679 			get_page(*p++ = page++);
1680 		return len - *start;
1681 	}
1682 	if (iov_iter_is_pipe(i))
1683 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1684 	if (iov_iter_is_xarray(i))
1685 		return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1686 	return -EFAULT;
1687 }
1688 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1689 
1690 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1691 			       struct iov_iter *i)
1692 {
1693 	char *to = addr;
1694 	__wsum sum, next;
1695 	size_t off = 0;
1696 	sum = *csum;
1697 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1698 		WARN_ON(1);
1699 		return 0;
1700 	}
1701 	iterate_and_advance(i, bytes, v, ({
1702 		next = csum_and_copy_from_user(v.iov_base,
1703 					       (to += v.iov_len) - v.iov_len,
1704 					       v.iov_len);
1705 		if (next) {
1706 			sum = csum_block_add(sum, next, off);
1707 			off += v.iov_len;
1708 		}
1709 		next ? 0 : v.iov_len;
1710 	}), ({
1711 		char *p = kmap_atomic(v.bv_page);
1712 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1713 				      p + v.bv_offset, v.bv_len,
1714 				      sum, off);
1715 		kunmap_atomic(p);
1716 		off += v.bv_len;
1717 	}),({
1718 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1719 				      v.iov_base, v.iov_len,
1720 				      sum, off);
1721 		off += v.iov_len;
1722 	}), ({
1723 		char *p = kmap_atomic(v.bv_page);
1724 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1725 				      p + v.bv_offset, v.bv_len,
1726 				      sum, off);
1727 		kunmap_atomic(p);
1728 		off += v.bv_len;
1729 	})
1730 	)
1731 	*csum = sum;
1732 	return bytes;
1733 }
1734 EXPORT_SYMBOL(csum_and_copy_from_iter);
1735 
1736 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1737 			     struct iov_iter *i)
1738 {
1739 	struct csum_state *csstate = _csstate;
1740 	const char *from = addr;
1741 	__wsum sum, next;
1742 	size_t off;
1743 
1744 	if (unlikely(iov_iter_is_pipe(i)))
1745 		return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1746 
1747 	sum = csum_shift(csstate->csum, csstate->off);
1748 	off = 0;
1749 	if (unlikely(iov_iter_is_discard(i))) {
1750 		WARN_ON(1);	/* for now */
1751 		return 0;
1752 	}
1753 	iterate_and_advance(i, bytes, v, ({
1754 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1755 					     v.iov_base,
1756 					     v.iov_len);
1757 		if (next) {
1758 			sum = csum_block_add(sum, next, off);
1759 			off += v.iov_len;
1760 		}
1761 		next ? 0 : v.iov_len;
1762 	}), ({
1763 		char *p = kmap_atomic(v.bv_page);
1764 		sum = csum_and_memcpy(p + v.bv_offset,
1765 				      (from += v.bv_len) - v.bv_len,
1766 				      v.bv_len, sum, off);
1767 		kunmap_atomic(p);
1768 		off += v.bv_len;
1769 	}),({
1770 		sum = csum_and_memcpy(v.iov_base,
1771 				     (from += v.iov_len) - v.iov_len,
1772 				     v.iov_len, sum, off);
1773 		off += v.iov_len;
1774 	}), ({
1775 		char *p = kmap_atomic(v.bv_page);
1776 		sum = csum_and_memcpy(p + v.bv_offset,
1777 				      (from += v.bv_len) - v.bv_len,
1778 				      v.bv_len, sum, off);
1779 		kunmap_atomic(p);
1780 		off += v.bv_len;
1781 	})
1782 	)
1783 	csstate->csum = csum_shift(sum, csstate->off);
1784 	csstate->off += bytes;
1785 	return bytes;
1786 }
1787 EXPORT_SYMBOL(csum_and_copy_to_iter);
1788 
1789 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1790 		struct iov_iter *i)
1791 {
1792 #ifdef CONFIG_CRYPTO_HASH
1793 	struct ahash_request *hash = hashp;
1794 	struct scatterlist sg;
1795 	size_t copied;
1796 
1797 	copied = copy_to_iter(addr, bytes, i);
1798 	sg_init_one(&sg, addr, copied);
1799 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1800 	crypto_ahash_update(hash);
1801 	return copied;
1802 #else
1803 	return 0;
1804 #endif
1805 }
1806 EXPORT_SYMBOL(hash_and_copy_to_iter);
1807 
1808 static int iov_npages(const struct iov_iter *i, int maxpages)
1809 {
1810 	size_t skip = i->iov_offset, size = i->count;
1811 	const struct iovec *p;
1812 	int npages = 0;
1813 
1814 	for (p = i->iov; size; skip = 0, p++) {
1815 		unsigned offs = offset_in_page(p->iov_base + skip);
1816 		size_t len = min(p->iov_len - skip, size);
1817 
1818 		if (len) {
1819 			size -= len;
1820 			npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1821 			if (unlikely(npages > maxpages))
1822 				return maxpages;
1823 		}
1824 	}
1825 	return npages;
1826 }
1827 
1828 static int bvec_npages(const struct iov_iter *i, int maxpages)
1829 {
1830 	size_t skip = i->iov_offset, size = i->count;
1831 	const struct bio_vec *p;
1832 	int npages = 0;
1833 
1834 	for (p = i->bvec; size; skip = 0, p++) {
1835 		unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
1836 		size_t len = min(p->bv_len - skip, size);
1837 
1838 		size -= len;
1839 		npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1840 		if (unlikely(npages > maxpages))
1841 			return maxpages;
1842 	}
1843 	return npages;
1844 }
1845 
1846 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1847 {
1848 	if (unlikely(!i->count))
1849 		return 0;
1850 	/* iovec and kvec have identical layouts */
1851 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1852 		return iov_npages(i, maxpages);
1853 	if (iov_iter_is_bvec(i))
1854 		return bvec_npages(i, maxpages);
1855 	if (iov_iter_is_pipe(i)) {
1856 		unsigned int iter_head;
1857 		int npages;
1858 		size_t off;
1859 
1860 		if (!sanity(i))
1861 			return 0;
1862 
1863 		data_start(i, &iter_head, &off);
1864 		/* some of this one + all after this one */
1865 		npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1866 		return min(npages, maxpages);
1867 	}
1868 	if (iov_iter_is_xarray(i)) {
1869 		unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
1870 		int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
1871 		return min(npages, maxpages);
1872 	}
1873 	return 0;
1874 }
1875 EXPORT_SYMBOL(iov_iter_npages);
1876 
1877 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1878 {
1879 	*new = *old;
1880 	if (unlikely(iov_iter_is_pipe(new))) {
1881 		WARN_ON(1);
1882 		return NULL;
1883 	}
1884 	if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1885 		return NULL;
1886 	if (iov_iter_is_bvec(new))
1887 		return new->bvec = kmemdup(new->bvec,
1888 				    new->nr_segs * sizeof(struct bio_vec),
1889 				    flags);
1890 	else
1891 		/* iovec and kvec have identical layout */
1892 		return new->iov = kmemdup(new->iov,
1893 				   new->nr_segs * sizeof(struct iovec),
1894 				   flags);
1895 }
1896 EXPORT_SYMBOL(dup_iter);
1897 
1898 static int copy_compat_iovec_from_user(struct iovec *iov,
1899 		const struct iovec __user *uvec, unsigned long nr_segs)
1900 {
1901 	const struct compat_iovec __user *uiov =
1902 		(const struct compat_iovec __user *)uvec;
1903 	int ret = -EFAULT, i;
1904 
1905 	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1906 		return -EFAULT;
1907 
1908 	for (i = 0; i < nr_segs; i++) {
1909 		compat_uptr_t buf;
1910 		compat_ssize_t len;
1911 
1912 		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1913 		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1914 
1915 		/* check for compat_size_t not fitting in compat_ssize_t .. */
1916 		if (len < 0) {
1917 			ret = -EINVAL;
1918 			goto uaccess_end;
1919 		}
1920 		iov[i].iov_base = compat_ptr(buf);
1921 		iov[i].iov_len = len;
1922 	}
1923 
1924 	ret = 0;
1925 uaccess_end:
1926 	user_access_end();
1927 	return ret;
1928 }
1929 
1930 static int copy_iovec_from_user(struct iovec *iov,
1931 		const struct iovec __user *uvec, unsigned long nr_segs)
1932 {
1933 	unsigned long seg;
1934 
1935 	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1936 		return -EFAULT;
1937 	for (seg = 0; seg < nr_segs; seg++) {
1938 		if ((ssize_t)iov[seg].iov_len < 0)
1939 			return -EINVAL;
1940 	}
1941 
1942 	return 0;
1943 }
1944 
1945 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1946 		unsigned long nr_segs, unsigned long fast_segs,
1947 		struct iovec *fast_iov, bool compat)
1948 {
1949 	struct iovec *iov = fast_iov;
1950 	int ret;
1951 
1952 	/*
1953 	 * SuS says "The readv() function *may* fail if the iovcnt argument was
1954 	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1955 	 * traditionally returned zero for zero segments, so...
1956 	 */
1957 	if (nr_segs == 0)
1958 		return iov;
1959 	if (nr_segs > UIO_MAXIOV)
1960 		return ERR_PTR(-EINVAL);
1961 	if (nr_segs > fast_segs) {
1962 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1963 		if (!iov)
1964 			return ERR_PTR(-ENOMEM);
1965 	}
1966 
1967 	if (compat)
1968 		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1969 	else
1970 		ret = copy_iovec_from_user(iov, uvec, nr_segs);
1971 	if (ret) {
1972 		if (iov != fast_iov)
1973 			kfree(iov);
1974 		return ERR_PTR(ret);
1975 	}
1976 
1977 	return iov;
1978 }
1979 
1980 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1981 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1982 		 struct iov_iter *i, bool compat)
1983 {
1984 	ssize_t total_len = 0;
1985 	unsigned long seg;
1986 	struct iovec *iov;
1987 
1988 	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1989 	if (IS_ERR(iov)) {
1990 		*iovp = NULL;
1991 		return PTR_ERR(iov);
1992 	}
1993 
1994 	/*
1995 	 * According to the Single Unix Specification we should return EINVAL if
1996 	 * an element length is < 0 when cast to ssize_t or if the total length
1997 	 * would overflow the ssize_t return value of the system call.
1998 	 *
1999 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
2000 	 * overflow case.
2001 	 */
2002 	for (seg = 0; seg < nr_segs; seg++) {
2003 		ssize_t len = (ssize_t)iov[seg].iov_len;
2004 
2005 		if (!access_ok(iov[seg].iov_base, len)) {
2006 			if (iov != *iovp)
2007 				kfree(iov);
2008 			*iovp = NULL;
2009 			return -EFAULT;
2010 		}
2011 
2012 		if (len > MAX_RW_COUNT - total_len) {
2013 			len = MAX_RW_COUNT - total_len;
2014 			iov[seg].iov_len = len;
2015 		}
2016 		total_len += len;
2017 	}
2018 
2019 	iov_iter_init(i, type, iov, nr_segs, total_len);
2020 	if (iov == *iovp)
2021 		*iovp = NULL;
2022 	else
2023 		*iovp = iov;
2024 	return total_len;
2025 }
2026 
2027 /**
2028  * import_iovec() - Copy an array of &struct iovec from userspace
2029  *     into the kernel, check that it is valid, and initialize a new
2030  *     &struct iov_iter iterator to access it.
2031  *
2032  * @type: One of %READ or %WRITE.
2033  * @uvec: Pointer to the userspace array.
2034  * @nr_segs: Number of elements in userspace array.
2035  * @fast_segs: Number of elements in @iov.
2036  * @iovp: (input and output parameter) Pointer to pointer to (usually small
2037  *     on-stack) kernel array.
2038  * @i: Pointer to iterator that will be initialized on success.
2039  *
2040  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2041  * then this function places %NULL in *@iov on return. Otherwise, a new
2042  * array will be allocated and the result placed in *@iov. This means that
2043  * the caller may call kfree() on *@iov regardless of whether the small
2044  * on-stack array was used or not (and regardless of whether this function
2045  * returns an error or not).
2046  *
2047  * Return: Negative error code on error, bytes imported on success
2048  */
2049 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2050 		 unsigned nr_segs, unsigned fast_segs,
2051 		 struct iovec **iovp, struct iov_iter *i)
2052 {
2053 	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2054 			      in_compat_syscall());
2055 }
2056 EXPORT_SYMBOL(import_iovec);
2057 
2058 int import_single_range(int rw, void __user *buf, size_t len,
2059 		 struct iovec *iov, struct iov_iter *i)
2060 {
2061 	if (len > MAX_RW_COUNT)
2062 		len = MAX_RW_COUNT;
2063 	if (unlikely(!access_ok(buf, len)))
2064 		return -EFAULT;
2065 
2066 	iov->iov_base = buf;
2067 	iov->iov_len = len;
2068 	iov_iter_init(i, rw, iov, 1, len);
2069 	return 0;
2070 }
2071 EXPORT_SYMBOL(import_single_range);
2072