xref: /openbmc/linux/lib/iov_iter.c (revision 185ac4d4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/highmem.h>
9 #include <linux/slab.h>
10 #include <linux/vmalloc.h>
11 #include <linux/splice.h>
12 #include <linux/compat.h>
13 #include <net/checksum.h>
14 #include <linux/scatterlist.h>
15 #include <linux/instrumented.h>
16 
17 #define PIPE_PARANOIA /* for now */
18 
19 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
20 	size_t left;					\
21 	size_t wanted = n;				\
22 	__p = i->iov;					\
23 	__v.iov_len = min(n, __p->iov_len - skip);	\
24 	if (likely(__v.iov_len)) {			\
25 		__v.iov_base = __p->iov_base + skip;	\
26 		left = (STEP);				\
27 		__v.iov_len -= left;			\
28 		skip += __v.iov_len;			\
29 		n -= __v.iov_len;			\
30 	} else {					\
31 		left = 0;				\
32 	}						\
33 	while (unlikely(!left && n)) {			\
34 		__p++;					\
35 		__v.iov_len = min(n, __p->iov_len);	\
36 		if (unlikely(!__v.iov_len))		\
37 			continue;			\
38 		__v.iov_base = __p->iov_base;		\
39 		left = (STEP);				\
40 		__v.iov_len -= left;			\
41 		skip = __v.iov_len;			\
42 		n -= __v.iov_len;			\
43 	}						\
44 	n = wanted - n;					\
45 }
46 
47 #define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
48 	size_t wanted = n;				\
49 	__p = i->kvec;					\
50 	__v.iov_len = min(n, __p->iov_len - skip);	\
51 	if (likely(__v.iov_len)) {			\
52 		__v.iov_base = __p->iov_base + skip;	\
53 		(void)(STEP);				\
54 		skip += __v.iov_len;			\
55 		n -= __v.iov_len;			\
56 	}						\
57 	while (unlikely(n)) {				\
58 		__p++;					\
59 		__v.iov_len = min(n, __p->iov_len);	\
60 		if (unlikely(!__v.iov_len))		\
61 			continue;			\
62 		__v.iov_base = __p->iov_base;		\
63 		(void)(STEP);				\
64 		skip = __v.iov_len;			\
65 		n -= __v.iov_len;			\
66 	}						\
67 	n = wanted;					\
68 }
69 
70 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
71 	struct bvec_iter __start;			\
72 	__start.bi_size = n;				\
73 	__start.bi_bvec_done = skip;			\
74 	__start.bi_idx = 0;				\
75 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
76 		(void)(STEP);				\
77 	}						\
78 }
79 
80 #define iterate_xarray(i, n, __v, skip, STEP) {		\
81 	struct page *head = NULL;				\
82 	size_t wanted = n, seg, offset;				\
83 	loff_t start = i->xarray_start + skip;			\
84 	pgoff_t index = start >> PAGE_SHIFT;			\
85 	int j;							\
86 								\
87 	XA_STATE(xas, i->xarray, index);			\
88 								\
89 	rcu_read_lock();						\
90 	xas_for_each(&xas, head, ULONG_MAX) {				\
91 		if (xas_retry(&xas, head))				\
92 			continue;					\
93 		if (WARN_ON(xa_is_value(head)))				\
94 			break;						\
95 		if (WARN_ON(PageHuge(head)))				\
96 			break;						\
97 		for (j = (head->index < index) ? index - head->index : 0; \
98 		     j < thp_nr_pages(head); j++) {			\
99 			__v.bv_page = head + j;				\
100 			offset = (i->xarray_start + skip) & ~PAGE_MASK;	\
101 			seg = PAGE_SIZE - offset;			\
102 			__v.bv_offset = offset;				\
103 			__v.bv_len = min(n, seg);			\
104 			(void)(STEP);					\
105 			n -= __v.bv_len;				\
106 			skip += __v.bv_len;				\
107 			if (n == 0)					\
108 				break;					\
109 		}							\
110 		if (n == 0)						\
111 			break;						\
112 	}							\
113 	rcu_read_unlock();					\
114 	n = wanted - n;						\
115 }
116 
117 #define iterate_all_kinds(i, n, v, I, B, K, X) {		\
118 	if (likely(n)) {					\
119 		size_t skip = i->iov_offset;			\
120 		if (likely(iter_is_iovec(i))) {			\
121 			const struct iovec *iov;		\
122 			struct iovec v;				\
123 			iterate_iovec(i, n, v, iov, skip, (I))	\
124 		} else if (iov_iter_is_bvec(i)) {		\
125 			struct bio_vec v;			\
126 			struct bvec_iter __bi;			\
127 			iterate_bvec(i, n, v, __bi, skip, (B))	\
128 		} else if (iov_iter_is_kvec(i)) {		\
129 			const struct kvec *kvec;		\
130 			struct kvec v;				\
131 			iterate_kvec(i, n, v, kvec, skip, (K))	\
132 		} else if (iov_iter_is_xarray(i)) {		\
133 			struct bio_vec v;			\
134 			iterate_xarray(i, n, v, skip, (X));	\
135 		}						\
136 	}							\
137 }
138 
139 #define iterate_and_advance(i, n, v, I, B, K, X) {		\
140 	if (unlikely(i->count < n))				\
141 		n = i->count;					\
142 	if (i->count) {						\
143 		size_t skip = i->iov_offset;			\
144 		if (likely(iter_is_iovec(i))) {			\
145 			const struct iovec *iov;		\
146 			struct iovec v;				\
147 			iterate_iovec(i, n, v, iov, skip, (I))	\
148 			if (skip == iov->iov_len) {		\
149 				iov++;				\
150 				skip = 0;			\
151 			}					\
152 			i->nr_segs -= iov - i->iov;		\
153 			i->iov = iov;				\
154 		} else if (iov_iter_is_bvec(i)) {		\
155 			const struct bio_vec *bvec = i->bvec;	\
156 			struct bio_vec v;			\
157 			struct bvec_iter __bi;			\
158 			iterate_bvec(i, n, v, __bi, skip, (B))	\
159 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
160 			i->nr_segs -= i->bvec - bvec;		\
161 			skip = __bi.bi_bvec_done;		\
162 		} else if (iov_iter_is_kvec(i)) {		\
163 			const struct kvec *kvec;		\
164 			struct kvec v;				\
165 			iterate_kvec(i, n, v, kvec, skip, (K))	\
166 			if (skip == kvec->iov_len) {		\
167 				kvec++;				\
168 				skip = 0;			\
169 			}					\
170 			i->nr_segs -= kvec - i->kvec;		\
171 			i->kvec = kvec;				\
172 		} else if (iov_iter_is_xarray(i)) {		\
173 			struct bio_vec v;			\
174 			iterate_xarray(i, n, v, skip, (X))	\
175 		}						\
176 		i->count -= n;					\
177 		i->iov_offset = skip;				\
178 	}							\
179 }
180 
181 static int copyout(void __user *to, const void *from, size_t n)
182 {
183 	if (should_fail_usercopy())
184 		return n;
185 	if (access_ok(to, n)) {
186 		instrument_copy_to_user(to, from, n);
187 		n = raw_copy_to_user(to, from, n);
188 	}
189 	return n;
190 }
191 
192 static int copyin(void *to, const void __user *from, size_t n)
193 {
194 	if (should_fail_usercopy())
195 		return n;
196 	if (access_ok(from, n)) {
197 		instrument_copy_from_user(to, from, n);
198 		n = raw_copy_from_user(to, from, n);
199 	}
200 	return n;
201 }
202 
203 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
204 			 struct iov_iter *i)
205 {
206 	size_t skip, copy, left, wanted;
207 	const struct iovec *iov;
208 	char __user *buf;
209 	void *kaddr, *from;
210 
211 	if (unlikely(bytes > i->count))
212 		bytes = i->count;
213 
214 	if (unlikely(!bytes))
215 		return 0;
216 
217 	might_fault();
218 	wanted = bytes;
219 	iov = i->iov;
220 	skip = i->iov_offset;
221 	buf = iov->iov_base + skip;
222 	copy = min(bytes, iov->iov_len - skip);
223 
224 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
225 		kaddr = kmap_atomic(page);
226 		from = kaddr + offset;
227 
228 		/* first chunk, usually the only one */
229 		left = copyout(buf, from, copy);
230 		copy -= left;
231 		skip += copy;
232 		from += copy;
233 		bytes -= copy;
234 
235 		while (unlikely(!left && bytes)) {
236 			iov++;
237 			buf = iov->iov_base;
238 			copy = min(bytes, iov->iov_len);
239 			left = copyout(buf, from, copy);
240 			copy -= left;
241 			skip = copy;
242 			from += copy;
243 			bytes -= copy;
244 		}
245 		if (likely(!bytes)) {
246 			kunmap_atomic(kaddr);
247 			goto done;
248 		}
249 		offset = from - kaddr;
250 		buf += copy;
251 		kunmap_atomic(kaddr);
252 		copy = min(bytes, iov->iov_len - skip);
253 	}
254 	/* Too bad - revert to non-atomic kmap */
255 
256 	kaddr = kmap(page);
257 	from = kaddr + offset;
258 	left = copyout(buf, from, copy);
259 	copy -= left;
260 	skip += copy;
261 	from += copy;
262 	bytes -= copy;
263 	while (unlikely(!left && bytes)) {
264 		iov++;
265 		buf = iov->iov_base;
266 		copy = min(bytes, iov->iov_len);
267 		left = copyout(buf, from, copy);
268 		copy -= left;
269 		skip = copy;
270 		from += copy;
271 		bytes -= copy;
272 	}
273 	kunmap(page);
274 
275 done:
276 	if (skip == iov->iov_len) {
277 		iov++;
278 		skip = 0;
279 	}
280 	i->count -= wanted - bytes;
281 	i->nr_segs -= iov - i->iov;
282 	i->iov = iov;
283 	i->iov_offset = skip;
284 	return wanted - bytes;
285 }
286 
287 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
288 			 struct iov_iter *i)
289 {
290 	size_t skip, copy, left, wanted;
291 	const struct iovec *iov;
292 	char __user *buf;
293 	void *kaddr, *to;
294 
295 	if (unlikely(bytes > i->count))
296 		bytes = i->count;
297 
298 	if (unlikely(!bytes))
299 		return 0;
300 
301 	might_fault();
302 	wanted = bytes;
303 	iov = i->iov;
304 	skip = i->iov_offset;
305 	buf = iov->iov_base + skip;
306 	copy = min(bytes, iov->iov_len - skip);
307 
308 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
309 		kaddr = kmap_atomic(page);
310 		to = kaddr + offset;
311 
312 		/* first chunk, usually the only one */
313 		left = copyin(to, buf, copy);
314 		copy -= left;
315 		skip += copy;
316 		to += copy;
317 		bytes -= copy;
318 
319 		while (unlikely(!left && bytes)) {
320 			iov++;
321 			buf = iov->iov_base;
322 			copy = min(bytes, iov->iov_len);
323 			left = copyin(to, buf, copy);
324 			copy -= left;
325 			skip = copy;
326 			to += copy;
327 			bytes -= copy;
328 		}
329 		if (likely(!bytes)) {
330 			kunmap_atomic(kaddr);
331 			goto done;
332 		}
333 		offset = to - kaddr;
334 		buf += copy;
335 		kunmap_atomic(kaddr);
336 		copy = min(bytes, iov->iov_len - skip);
337 	}
338 	/* Too bad - revert to non-atomic kmap */
339 
340 	kaddr = kmap(page);
341 	to = kaddr + offset;
342 	left = copyin(to, buf, copy);
343 	copy -= left;
344 	skip += copy;
345 	to += copy;
346 	bytes -= copy;
347 	while (unlikely(!left && bytes)) {
348 		iov++;
349 		buf = iov->iov_base;
350 		copy = min(bytes, iov->iov_len);
351 		left = copyin(to, buf, copy);
352 		copy -= left;
353 		skip = copy;
354 		to += copy;
355 		bytes -= copy;
356 	}
357 	kunmap(page);
358 
359 done:
360 	if (skip == iov->iov_len) {
361 		iov++;
362 		skip = 0;
363 	}
364 	i->count -= wanted - bytes;
365 	i->nr_segs -= iov - i->iov;
366 	i->iov = iov;
367 	i->iov_offset = skip;
368 	return wanted - bytes;
369 }
370 
371 #ifdef PIPE_PARANOIA
372 static bool sanity(const struct iov_iter *i)
373 {
374 	struct pipe_inode_info *pipe = i->pipe;
375 	unsigned int p_head = pipe->head;
376 	unsigned int p_tail = pipe->tail;
377 	unsigned int p_mask = pipe->ring_size - 1;
378 	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
379 	unsigned int i_head = i->head;
380 	unsigned int idx;
381 
382 	if (i->iov_offset) {
383 		struct pipe_buffer *p;
384 		if (unlikely(p_occupancy == 0))
385 			goto Bad;	// pipe must be non-empty
386 		if (unlikely(i_head != p_head - 1))
387 			goto Bad;	// must be at the last buffer...
388 
389 		p = &pipe->bufs[i_head & p_mask];
390 		if (unlikely(p->offset + p->len != i->iov_offset))
391 			goto Bad;	// ... at the end of segment
392 	} else {
393 		if (i_head != p_head)
394 			goto Bad;	// must be right after the last buffer
395 	}
396 	return true;
397 Bad:
398 	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
399 	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
400 			p_head, p_tail, pipe->ring_size);
401 	for (idx = 0; idx < pipe->ring_size; idx++)
402 		printk(KERN_ERR "[%p %p %d %d]\n",
403 			pipe->bufs[idx].ops,
404 			pipe->bufs[idx].page,
405 			pipe->bufs[idx].offset,
406 			pipe->bufs[idx].len);
407 	WARN_ON(1);
408 	return false;
409 }
410 #else
411 #define sanity(i) true
412 #endif
413 
414 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
415 			 struct iov_iter *i)
416 {
417 	struct pipe_inode_info *pipe = i->pipe;
418 	struct pipe_buffer *buf;
419 	unsigned int p_tail = pipe->tail;
420 	unsigned int p_mask = pipe->ring_size - 1;
421 	unsigned int i_head = i->head;
422 	size_t off;
423 
424 	if (unlikely(bytes > i->count))
425 		bytes = i->count;
426 
427 	if (unlikely(!bytes))
428 		return 0;
429 
430 	if (!sanity(i))
431 		return 0;
432 
433 	off = i->iov_offset;
434 	buf = &pipe->bufs[i_head & p_mask];
435 	if (off) {
436 		if (offset == off && buf->page == page) {
437 			/* merge with the last one */
438 			buf->len += bytes;
439 			i->iov_offset += bytes;
440 			goto out;
441 		}
442 		i_head++;
443 		buf = &pipe->bufs[i_head & p_mask];
444 	}
445 	if (pipe_full(i_head, p_tail, pipe->max_usage))
446 		return 0;
447 
448 	buf->ops = &page_cache_pipe_buf_ops;
449 	get_page(page);
450 	buf->page = page;
451 	buf->offset = offset;
452 	buf->len = bytes;
453 
454 	pipe->head = i_head + 1;
455 	i->iov_offset = offset + bytes;
456 	i->head = i_head;
457 out:
458 	i->count -= bytes;
459 	return bytes;
460 }
461 
462 /*
463  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
464  * bytes.  For each iovec, fault in each page that constitutes the iovec.
465  *
466  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
467  * because it is an invalid address).
468  */
469 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
470 {
471 	size_t skip = i->iov_offset;
472 	const struct iovec *iov;
473 	int err;
474 	struct iovec v;
475 
476 	if (iter_is_iovec(i)) {
477 		iterate_iovec(i, bytes, v, iov, skip, ({
478 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
479 			if (unlikely(err))
480 			return err;
481 		0;}))
482 	}
483 	return 0;
484 }
485 EXPORT_SYMBOL(iov_iter_fault_in_readable);
486 
487 void iov_iter_init(struct iov_iter *i, unsigned int direction,
488 			const struct iovec *iov, unsigned long nr_segs,
489 			size_t count)
490 {
491 	WARN_ON(direction & ~(READ | WRITE));
492 	WARN_ON_ONCE(uaccess_kernel());
493 	*i = (struct iov_iter) {
494 		.iter_type = ITER_IOVEC,
495 		.data_source = direction,
496 		.iov = iov,
497 		.nr_segs = nr_segs,
498 		.iov_offset = 0,
499 		.count = count
500 	};
501 }
502 EXPORT_SYMBOL(iov_iter_init);
503 
504 static inline bool allocated(struct pipe_buffer *buf)
505 {
506 	return buf->ops == &default_pipe_buf_ops;
507 }
508 
509 static inline void data_start(const struct iov_iter *i,
510 			      unsigned int *iter_headp, size_t *offp)
511 {
512 	unsigned int p_mask = i->pipe->ring_size - 1;
513 	unsigned int iter_head = i->head;
514 	size_t off = i->iov_offset;
515 
516 	if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
517 		    off == PAGE_SIZE)) {
518 		iter_head++;
519 		off = 0;
520 	}
521 	*iter_headp = iter_head;
522 	*offp = off;
523 }
524 
525 static size_t push_pipe(struct iov_iter *i, size_t size,
526 			int *iter_headp, size_t *offp)
527 {
528 	struct pipe_inode_info *pipe = i->pipe;
529 	unsigned int p_tail = pipe->tail;
530 	unsigned int p_mask = pipe->ring_size - 1;
531 	unsigned int iter_head;
532 	size_t off;
533 	ssize_t left;
534 
535 	if (unlikely(size > i->count))
536 		size = i->count;
537 	if (unlikely(!size))
538 		return 0;
539 
540 	left = size;
541 	data_start(i, &iter_head, &off);
542 	*iter_headp = iter_head;
543 	*offp = off;
544 	if (off) {
545 		left -= PAGE_SIZE - off;
546 		if (left <= 0) {
547 			pipe->bufs[iter_head & p_mask].len += size;
548 			return size;
549 		}
550 		pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
551 		iter_head++;
552 	}
553 	while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
554 		struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
555 		struct page *page = alloc_page(GFP_USER);
556 		if (!page)
557 			break;
558 
559 		buf->ops = &default_pipe_buf_ops;
560 		buf->page = page;
561 		buf->offset = 0;
562 		buf->len = min_t(ssize_t, left, PAGE_SIZE);
563 		left -= buf->len;
564 		iter_head++;
565 		pipe->head = iter_head;
566 
567 		if (left == 0)
568 			return size;
569 	}
570 	return size - left;
571 }
572 
573 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
574 				struct iov_iter *i)
575 {
576 	struct pipe_inode_info *pipe = i->pipe;
577 	unsigned int p_mask = pipe->ring_size - 1;
578 	unsigned int i_head;
579 	size_t n, off;
580 
581 	if (!sanity(i))
582 		return 0;
583 
584 	bytes = n = push_pipe(i, bytes, &i_head, &off);
585 	if (unlikely(!n))
586 		return 0;
587 	do {
588 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
589 		memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
590 		i->head = i_head;
591 		i->iov_offset = off + chunk;
592 		n -= chunk;
593 		addr += chunk;
594 		off = 0;
595 		i_head++;
596 	} while (n);
597 	i->count -= bytes;
598 	return bytes;
599 }
600 
601 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
602 			      __wsum sum, size_t off)
603 {
604 	__wsum next = csum_partial_copy_nocheck(from, to, len);
605 	return csum_block_add(sum, next, off);
606 }
607 
608 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
609 					 struct csum_state *csstate,
610 					 struct iov_iter *i)
611 {
612 	struct pipe_inode_info *pipe = i->pipe;
613 	unsigned int p_mask = pipe->ring_size - 1;
614 	__wsum sum = csstate->csum;
615 	size_t off = csstate->off;
616 	unsigned int i_head;
617 	size_t n, r;
618 
619 	if (!sanity(i))
620 		return 0;
621 
622 	bytes = n = push_pipe(i, bytes, &i_head, &r);
623 	if (unlikely(!n))
624 		return 0;
625 	do {
626 		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
627 		char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
628 		sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
629 		kunmap_atomic(p);
630 		i->head = i_head;
631 		i->iov_offset = r + chunk;
632 		n -= chunk;
633 		off += chunk;
634 		addr += chunk;
635 		r = 0;
636 		i_head++;
637 	} while (n);
638 	i->count -= bytes;
639 	csstate->csum = sum;
640 	csstate->off = off;
641 	return bytes;
642 }
643 
644 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
645 {
646 	const char *from = addr;
647 	if (unlikely(iov_iter_is_pipe(i)))
648 		return copy_pipe_to_iter(addr, bytes, i);
649 	if (iter_is_iovec(i))
650 		might_fault();
651 	iterate_and_advance(i, bytes, v,
652 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
653 		memcpy_to_page(v.bv_page, v.bv_offset,
654 			       (from += v.bv_len) - v.bv_len, v.bv_len),
655 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
656 		memcpy_to_page(v.bv_page, v.bv_offset,
657 			       (from += v.bv_len) - v.bv_len, v.bv_len)
658 	)
659 
660 	return bytes;
661 }
662 EXPORT_SYMBOL(_copy_to_iter);
663 
664 #ifdef CONFIG_ARCH_HAS_COPY_MC
665 static int copyout_mc(void __user *to, const void *from, size_t n)
666 {
667 	if (access_ok(to, n)) {
668 		instrument_copy_to_user(to, from, n);
669 		n = copy_mc_to_user((__force void *) to, from, n);
670 	}
671 	return n;
672 }
673 
674 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
675 		const char *from, size_t len)
676 {
677 	unsigned long ret;
678 	char *to;
679 
680 	to = kmap_atomic(page);
681 	ret = copy_mc_to_kernel(to + offset, from, len);
682 	kunmap_atomic(to);
683 
684 	return ret;
685 }
686 
687 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
688 				struct iov_iter *i)
689 {
690 	struct pipe_inode_info *pipe = i->pipe;
691 	unsigned int p_mask = pipe->ring_size - 1;
692 	unsigned int i_head;
693 	size_t n, off, xfer = 0;
694 
695 	if (!sanity(i))
696 		return 0;
697 
698 	bytes = n = push_pipe(i, bytes, &i_head, &off);
699 	if (unlikely(!n))
700 		return 0;
701 	do {
702 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
703 		unsigned long rem;
704 
705 		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
706 					    off, addr, chunk);
707 		i->head = i_head;
708 		i->iov_offset = off + chunk - rem;
709 		xfer += chunk - rem;
710 		if (rem)
711 			break;
712 		n -= chunk;
713 		addr += chunk;
714 		off = 0;
715 		i_head++;
716 	} while (n);
717 	i->count -= xfer;
718 	return xfer;
719 }
720 
721 /**
722  * _copy_mc_to_iter - copy to iter with source memory error exception handling
723  * @addr: source kernel address
724  * @bytes: total transfer length
725  * @iter: destination iterator
726  *
727  * The pmem driver deploys this for the dax operation
728  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
729  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
730  * successfully copied.
731  *
732  * The main differences between this and typical _copy_to_iter().
733  *
734  * * Typical tail/residue handling after a fault retries the copy
735  *   byte-by-byte until the fault happens again. Re-triggering machine
736  *   checks is potentially fatal so the implementation uses source
737  *   alignment and poison alignment assumptions to avoid re-triggering
738  *   hardware exceptions.
739  *
740  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
741  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
742  *   a short copy.
743  */
744 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
745 {
746 	const char *from = addr;
747 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
748 
749 	if (unlikely(iov_iter_is_pipe(i)))
750 		return copy_mc_pipe_to_iter(addr, bytes, i);
751 	if (iter_is_iovec(i))
752 		might_fault();
753 	iterate_and_advance(i, bytes, v,
754 		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
755 			   v.iov_len),
756 		({
757 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
758 				      (from += v.bv_len) - v.bv_len, v.bv_len);
759 		if (rem) {
760 			curr_addr = (unsigned long) from;
761 			bytes = curr_addr - s_addr - rem;
762 			return bytes;
763 		}
764 		}),
765 		({
766 		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
767 					- v.iov_len, v.iov_len);
768 		if (rem) {
769 			curr_addr = (unsigned long) from;
770 			bytes = curr_addr - s_addr - rem;
771 			return bytes;
772 		}
773 		}),
774 		({
775 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
776 				      (from += v.bv_len) - v.bv_len, v.bv_len);
777 		if (rem) {
778 			curr_addr = (unsigned long) from;
779 			bytes = curr_addr - s_addr - rem;
780 			rcu_read_unlock();
781 			i->iov_offset += bytes;
782 			i->count -= bytes;
783 			return bytes;
784 		}
785 		})
786 	)
787 
788 	return bytes;
789 }
790 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
791 #endif /* CONFIG_ARCH_HAS_COPY_MC */
792 
793 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
794 {
795 	char *to = addr;
796 	if (unlikely(iov_iter_is_pipe(i))) {
797 		WARN_ON(1);
798 		return 0;
799 	}
800 	if (iter_is_iovec(i))
801 		might_fault();
802 	iterate_and_advance(i, bytes, v,
803 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
804 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
805 				 v.bv_offset, v.bv_len),
806 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
807 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
808 				 v.bv_offset, v.bv_len)
809 	)
810 
811 	return bytes;
812 }
813 EXPORT_SYMBOL(_copy_from_iter);
814 
815 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
816 {
817 	char *to = addr;
818 	if (unlikely(iov_iter_is_pipe(i))) {
819 		WARN_ON(1);
820 		return 0;
821 	}
822 	iterate_and_advance(i, bytes, v,
823 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
824 					 v.iov_base, v.iov_len),
825 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
826 				 v.bv_offset, v.bv_len),
827 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
828 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
829 				 v.bv_offset, v.bv_len)
830 	)
831 
832 	return bytes;
833 }
834 EXPORT_SYMBOL(_copy_from_iter_nocache);
835 
836 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
837 /**
838  * _copy_from_iter_flushcache - write destination through cpu cache
839  * @addr: destination kernel address
840  * @bytes: total transfer length
841  * @iter: source iterator
842  *
843  * The pmem driver arranges for filesystem-dax to use this facility via
844  * dax_copy_from_iter() for ensuring that writes to persistent memory
845  * are flushed through the CPU cache. It is differentiated from
846  * _copy_from_iter_nocache() in that guarantees all data is flushed for
847  * all iterator types. The _copy_from_iter_nocache() only attempts to
848  * bypass the cache for the ITER_IOVEC case, and on some archs may use
849  * instructions that strand dirty-data in the cache.
850  */
851 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
852 {
853 	char *to = addr;
854 	if (unlikely(iov_iter_is_pipe(i))) {
855 		WARN_ON(1);
856 		return 0;
857 	}
858 	iterate_and_advance(i, bytes, v,
859 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
860 					 v.iov_base, v.iov_len),
861 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
862 				 v.bv_offset, v.bv_len),
863 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
864 			v.iov_len),
865 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
866 				 v.bv_offset, v.bv_len)
867 	)
868 
869 	return bytes;
870 }
871 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
872 #endif
873 
874 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
875 {
876 	struct page *head;
877 	size_t v = n + offset;
878 
879 	/*
880 	 * The general case needs to access the page order in order
881 	 * to compute the page size.
882 	 * However, we mostly deal with order-0 pages and thus can
883 	 * avoid a possible cache line miss for requests that fit all
884 	 * page orders.
885 	 */
886 	if (n <= v && v <= PAGE_SIZE)
887 		return true;
888 
889 	head = compound_head(page);
890 	v += (page - head) << PAGE_SHIFT;
891 
892 	if (likely(n <= v && v <= (page_size(head))))
893 		return true;
894 	WARN_ON(1);
895 	return false;
896 }
897 
898 static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
899 			 struct iov_iter *i)
900 {
901 	if (likely(iter_is_iovec(i)))
902 		return copy_page_to_iter_iovec(page, offset, bytes, i);
903 	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
904 		void *kaddr = kmap_atomic(page);
905 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
906 		kunmap_atomic(kaddr);
907 		return wanted;
908 	}
909 	if (iov_iter_is_pipe(i))
910 		return copy_page_to_iter_pipe(page, offset, bytes, i);
911 	if (unlikely(iov_iter_is_discard(i))) {
912 		if (unlikely(i->count < bytes))
913 			bytes = i->count;
914 		i->count -= bytes;
915 		return bytes;
916 	}
917 	WARN_ON(1);
918 	return 0;
919 }
920 
921 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
922 			 struct iov_iter *i)
923 {
924 	size_t res = 0;
925 	if (unlikely(!page_copy_sane(page, offset, bytes)))
926 		return 0;
927 	page += offset / PAGE_SIZE; // first subpage
928 	offset %= PAGE_SIZE;
929 	while (1) {
930 		size_t n = __copy_page_to_iter(page, offset,
931 				min(bytes, (size_t)PAGE_SIZE - offset), i);
932 		res += n;
933 		bytes -= n;
934 		if (!bytes || !n)
935 			break;
936 		offset += n;
937 		if (offset == PAGE_SIZE) {
938 			page++;
939 			offset = 0;
940 		}
941 	}
942 	return res;
943 }
944 EXPORT_SYMBOL(copy_page_to_iter);
945 
946 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
947 			 struct iov_iter *i)
948 {
949 	if (unlikely(!page_copy_sane(page, offset, bytes)))
950 		return 0;
951 	if (likely(iter_is_iovec(i)))
952 		return copy_page_from_iter_iovec(page, offset, bytes, i);
953 	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
954 		void *kaddr = kmap_atomic(page);
955 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
956 		kunmap_atomic(kaddr);
957 		return wanted;
958 	}
959 	WARN_ON(1);
960 	return 0;
961 }
962 EXPORT_SYMBOL(copy_page_from_iter);
963 
964 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
965 {
966 	struct pipe_inode_info *pipe = i->pipe;
967 	unsigned int p_mask = pipe->ring_size - 1;
968 	unsigned int i_head;
969 	size_t n, off;
970 
971 	if (!sanity(i))
972 		return 0;
973 
974 	bytes = n = push_pipe(i, bytes, &i_head, &off);
975 	if (unlikely(!n))
976 		return 0;
977 
978 	do {
979 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
980 		memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
981 		i->head = i_head;
982 		i->iov_offset = off + chunk;
983 		n -= chunk;
984 		off = 0;
985 		i_head++;
986 	} while (n);
987 	i->count -= bytes;
988 	return bytes;
989 }
990 
991 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
992 {
993 	if (unlikely(iov_iter_is_pipe(i)))
994 		return pipe_zero(bytes, i);
995 	iterate_and_advance(i, bytes, v,
996 		clear_user(v.iov_base, v.iov_len),
997 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
998 		memset(v.iov_base, 0, v.iov_len),
999 		memzero_page(v.bv_page, v.bv_offset, v.bv_len)
1000 	)
1001 
1002 	return bytes;
1003 }
1004 EXPORT_SYMBOL(iov_iter_zero);
1005 
1006 size_t iov_iter_copy_from_user_atomic(struct page *page,
1007 		struct iov_iter *i, unsigned long offset, size_t bytes)
1008 {
1009 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
1010 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
1011 		kunmap_atomic(kaddr);
1012 		return 0;
1013 	}
1014 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1015 		kunmap_atomic(kaddr);
1016 		WARN_ON(1);
1017 		return 0;
1018 	}
1019 	iterate_all_kinds(i, bytes, v,
1020 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1021 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1022 				 v.bv_offset, v.bv_len),
1023 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1024 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1025 				 v.bv_offset, v.bv_len)
1026 	)
1027 	kunmap_atomic(kaddr);
1028 	return bytes;
1029 }
1030 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1031 
1032 static inline void pipe_truncate(struct iov_iter *i)
1033 {
1034 	struct pipe_inode_info *pipe = i->pipe;
1035 	unsigned int p_tail = pipe->tail;
1036 	unsigned int p_head = pipe->head;
1037 	unsigned int p_mask = pipe->ring_size - 1;
1038 
1039 	if (!pipe_empty(p_head, p_tail)) {
1040 		struct pipe_buffer *buf;
1041 		unsigned int i_head = i->head;
1042 		size_t off = i->iov_offset;
1043 
1044 		if (off) {
1045 			buf = &pipe->bufs[i_head & p_mask];
1046 			buf->len = off - buf->offset;
1047 			i_head++;
1048 		}
1049 		while (p_head != i_head) {
1050 			p_head--;
1051 			pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1052 		}
1053 
1054 		pipe->head = p_head;
1055 	}
1056 }
1057 
1058 static void pipe_advance(struct iov_iter *i, size_t size)
1059 {
1060 	struct pipe_inode_info *pipe = i->pipe;
1061 	if (size) {
1062 		struct pipe_buffer *buf;
1063 		unsigned int p_mask = pipe->ring_size - 1;
1064 		unsigned int i_head = i->head;
1065 		size_t off = i->iov_offset, left = size;
1066 
1067 		if (off) /* make it relative to the beginning of buffer */
1068 			left += off - pipe->bufs[i_head & p_mask].offset;
1069 		while (1) {
1070 			buf = &pipe->bufs[i_head & p_mask];
1071 			if (left <= buf->len)
1072 				break;
1073 			left -= buf->len;
1074 			i_head++;
1075 		}
1076 		i->head = i_head;
1077 		i->iov_offset = buf->offset + left;
1078 	}
1079 	i->count -= size;
1080 	/* ... and discard everything past that point */
1081 	pipe_truncate(i);
1082 }
1083 
1084 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1085 {
1086 	struct bvec_iter bi;
1087 
1088 	bi.bi_size = i->count;
1089 	bi.bi_bvec_done = i->iov_offset;
1090 	bi.bi_idx = 0;
1091 	bvec_iter_advance(i->bvec, &bi, size);
1092 
1093 	i->bvec += bi.bi_idx;
1094 	i->nr_segs -= bi.bi_idx;
1095 	i->count = bi.bi_size;
1096 	i->iov_offset = bi.bi_bvec_done;
1097 }
1098 
1099 static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
1100 {
1101 	const struct iovec *iov, *end;
1102 
1103 	if (!i->count)
1104 		return;
1105 	i->count -= size;
1106 
1107 	size += i->iov_offset; // from beginning of current segment
1108 	for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
1109 		if (likely(size < iov->iov_len))
1110 			break;
1111 		size -= iov->iov_len;
1112 	}
1113 	i->iov_offset = size;
1114 	i->nr_segs -= iov - i->iov;
1115 	i->iov = iov;
1116 }
1117 
1118 void iov_iter_advance(struct iov_iter *i, size_t size)
1119 {
1120 	if (unlikely(i->count < size))
1121 		size = i->count;
1122 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
1123 		/* iovec and kvec have identical layouts */
1124 		iov_iter_iovec_advance(i, size);
1125 	} else if (iov_iter_is_bvec(i)) {
1126 		iov_iter_bvec_advance(i, size);
1127 	} else if (iov_iter_is_pipe(i)) {
1128 		pipe_advance(i, size);
1129 	} else if (unlikely(iov_iter_is_xarray(i))) {
1130 		i->iov_offset += size;
1131 		i->count -= size;
1132 	} else if (iov_iter_is_discard(i)) {
1133 		i->count -= size;
1134 	}
1135 }
1136 EXPORT_SYMBOL(iov_iter_advance);
1137 
1138 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1139 {
1140 	if (!unroll)
1141 		return;
1142 	if (WARN_ON(unroll > MAX_RW_COUNT))
1143 		return;
1144 	i->count += unroll;
1145 	if (unlikely(iov_iter_is_pipe(i))) {
1146 		struct pipe_inode_info *pipe = i->pipe;
1147 		unsigned int p_mask = pipe->ring_size - 1;
1148 		unsigned int i_head = i->head;
1149 		size_t off = i->iov_offset;
1150 		while (1) {
1151 			struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1152 			size_t n = off - b->offset;
1153 			if (unroll < n) {
1154 				off -= unroll;
1155 				break;
1156 			}
1157 			unroll -= n;
1158 			if (!unroll && i_head == i->start_head) {
1159 				off = 0;
1160 				break;
1161 			}
1162 			i_head--;
1163 			b = &pipe->bufs[i_head & p_mask];
1164 			off = b->offset + b->len;
1165 		}
1166 		i->iov_offset = off;
1167 		i->head = i_head;
1168 		pipe_truncate(i);
1169 		return;
1170 	}
1171 	if (unlikely(iov_iter_is_discard(i)))
1172 		return;
1173 	if (unroll <= i->iov_offset) {
1174 		i->iov_offset -= unroll;
1175 		return;
1176 	}
1177 	unroll -= i->iov_offset;
1178 	if (iov_iter_is_xarray(i)) {
1179 		BUG(); /* We should never go beyond the start of the specified
1180 			* range since we might then be straying into pages that
1181 			* aren't pinned.
1182 			*/
1183 	} else if (iov_iter_is_bvec(i)) {
1184 		const struct bio_vec *bvec = i->bvec;
1185 		while (1) {
1186 			size_t n = (--bvec)->bv_len;
1187 			i->nr_segs++;
1188 			if (unroll <= n) {
1189 				i->bvec = bvec;
1190 				i->iov_offset = n - unroll;
1191 				return;
1192 			}
1193 			unroll -= n;
1194 		}
1195 	} else { /* same logics for iovec and kvec */
1196 		const struct iovec *iov = i->iov;
1197 		while (1) {
1198 			size_t n = (--iov)->iov_len;
1199 			i->nr_segs++;
1200 			if (unroll <= n) {
1201 				i->iov = iov;
1202 				i->iov_offset = n - unroll;
1203 				return;
1204 			}
1205 			unroll -= n;
1206 		}
1207 	}
1208 }
1209 EXPORT_SYMBOL(iov_iter_revert);
1210 
1211 /*
1212  * Return the count of just the current iov_iter segment.
1213  */
1214 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1215 {
1216 	if (i->nr_segs > 1) {
1217 		if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1218 			return min(i->count, i->iov->iov_len - i->iov_offset);
1219 		if (iov_iter_is_bvec(i))
1220 			return min(i->count, i->bvec->bv_len - i->iov_offset);
1221 	}
1222 	return i->count;
1223 }
1224 EXPORT_SYMBOL(iov_iter_single_seg_count);
1225 
1226 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1227 			const struct kvec *kvec, unsigned long nr_segs,
1228 			size_t count)
1229 {
1230 	WARN_ON(direction & ~(READ | WRITE));
1231 	*i = (struct iov_iter){
1232 		.iter_type = ITER_KVEC,
1233 		.data_source = direction,
1234 		.kvec = kvec,
1235 		.nr_segs = nr_segs,
1236 		.iov_offset = 0,
1237 		.count = count
1238 	};
1239 }
1240 EXPORT_SYMBOL(iov_iter_kvec);
1241 
1242 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1243 			const struct bio_vec *bvec, unsigned long nr_segs,
1244 			size_t count)
1245 {
1246 	WARN_ON(direction & ~(READ | WRITE));
1247 	*i = (struct iov_iter){
1248 		.iter_type = ITER_BVEC,
1249 		.data_source = direction,
1250 		.bvec = bvec,
1251 		.nr_segs = nr_segs,
1252 		.iov_offset = 0,
1253 		.count = count
1254 	};
1255 }
1256 EXPORT_SYMBOL(iov_iter_bvec);
1257 
1258 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1259 			struct pipe_inode_info *pipe,
1260 			size_t count)
1261 {
1262 	BUG_ON(direction != READ);
1263 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1264 	*i = (struct iov_iter){
1265 		.iter_type = ITER_PIPE,
1266 		.data_source = false,
1267 		.pipe = pipe,
1268 		.head = pipe->head,
1269 		.start_head = pipe->head,
1270 		.iov_offset = 0,
1271 		.count = count
1272 	};
1273 }
1274 EXPORT_SYMBOL(iov_iter_pipe);
1275 
1276 /**
1277  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1278  * @i: The iterator to initialise.
1279  * @direction: The direction of the transfer.
1280  * @xarray: The xarray to access.
1281  * @start: The start file position.
1282  * @count: The size of the I/O buffer in bytes.
1283  *
1284  * Set up an I/O iterator to either draw data out of the pages attached to an
1285  * inode or to inject data into those pages.  The pages *must* be prevented
1286  * from evaporation, either by taking a ref on them or locking them by the
1287  * caller.
1288  */
1289 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1290 		     struct xarray *xarray, loff_t start, size_t count)
1291 {
1292 	BUG_ON(direction & ~1);
1293 	*i = (struct iov_iter) {
1294 		.iter_type = ITER_XARRAY,
1295 		.data_source = direction,
1296 		.xarray = xarray,
1297 		.xarray_start = start,
1298 		.count = count,
1299 		.iov_offset = 0
1300 	};
1301 }
1302 EXPORT_SYMBOL(iov_iter_xarray);
1303 
1304 /**
1305  * iov_iter_discard - Initialise an I/O iterator that discards data
1306  * @i: The iterator to initialise.
1307  * @direction: The direction of the transfer.
1308  * @count: The size of the I/O buffer in bytes.
1309  *
1310  * Set up an I/O iterator that just discards everything that's written to it.
1311  * It's only available as a READ iterator.
1312  */
1313 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1314 {
1315 	BUG_ON(direction != READ);
1316 	*i = (struct iov_iter){
1317 		.iter_type = ITER_DISCARD,
1318 		.data_source = false,
1319 		.count = count,
1320 		.iov_offset = 0
1321 	};
1322 }
1323 EXPORT_SYMBOL(iov_iter_discard);
1324 
1325 unsigned long iov_iter_alignment(const struct iov_iter *i)
1326 {
1327 	unsigned long res = 0;
1328 	size_t size = i->count;
1329 
1330 	if (unlikely(iov_iter_is_pipe(i))) {
1331 		unsigned int p_mask = i->pipe->ring_size - 1;
1332 
1333 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1334 			return size | i->iov_offset;
1335 		return size;
1336 	}
1337 	if (unlikely(iov_iter_is_xarray(i)))
1338 		return (i->xarray_start + i->iov_offset) | i->count;
1339 	iterate_all_kinds(i, size, v,
1340 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
1341 		res |= v.bv_offset | v.bv_len,
1342 		res |= (unsigned long)v.iov_base | v.iov_len,
1343 		res |= v.bv_offset | v.bv_len
1344 	)
1345 	return res;
1346 }
1347 EXPORT_SYMBOL(iov_iter_alignment);
1348 
1349 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1350 {
1351 	unsigned long res = 0;
1352 	size_t size = i->count;
1353 
1354 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1355 		WARN_ON(1);
1356 		return ~0U;
1357 	}
1358 
1359 	iterate_all_kinds(i, size, v,
1360 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1361 			(size != v.iov_len ? size : 0), 0),
1362 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1363 			(size != v.bv_len ? size : 0)),
1364 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1365 			(size != v.iov_len ? size : 0)),
1366 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1367 			(size != v.bv_len ? size : 0))
1368 		);
1369 	return res;
1370 }
1371 EXPORT_SYMBOL(iov_iter_gap_alignment);
1372 
1373 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1374 				size_t maxsize,
1375 				struct page **pages,
1376 				int iter_head,
1377 				size_t *start)
1378 {
1379 	struct pipe_inode_info *pipe = i->pipe;
1380 	unsigned int p_mask = pipe->ring_size - 1;
1381 	ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1382 	if (!n)
1383 		return -EFAULT;
1384 
1385 	maxsize = n;
1386 	n += *start;
1387 	while (n > 0) {
1388 		get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1389 		iter_head++;
1390 		n -= PAGE_SIZE;
1391 	}
1392 
1393 	return maxsize;
1394 }
1395 
1396 static ssize_t pipe_get_pages(struct iov_iter *i,
1397 		   struct page **pages, size_t maxsize, unsigned maxpages,
1398 		   size_t *start)
1399 {
1400 	unsigned int iter_head, npages;
1401 	size_t capacity;
1402 
1403 	if (!maxsize)
1404 		return 0;
1405 
1406 	if (!sanity(i))
1407 		return -EFAULT;
1408 
1409 	data_start(i, &iter_head, start);
1410 	/* Amount of free space: some of this one + all after this one */
1411 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1412 	capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1413 
1414 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1415 }
1416 
1417 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1418 					  pgoff_t index, unsigned int nr_pages)
1419 {
1420 	XA_STATE(xas, xa, index);
1421 	struct page *page;
1422 	unsigned int ret = 0;
1423 
1424 	rcu_read_lock();
1425 	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1426 		if (xas_retry(&xas, page))
1427 			continue;
1428 
1429 		/* Has the page moved or been split? */
1430 		if (unlikely(page != xas_reload(&xas))) {
1431 			xas_reset(&xas);
1432 			continue;
1433 		}
1434 
1435 		pages[ret] = find_subpage(page, xas.xa_index);
1436 		get_page(pages[ret]);
1437 		if (++ret == nr_pages)
1438 			break;
1439 	}
1440 	rcu_read_unlock();
1441 	return ret;
1442 }
1443 
1444 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1445 				     struct page **pages, size_t maxsize,
1446 				     unsigned maxpages, size_t *_start_offset)
1447 {
1448 	unsigned nr, offset;
1449 	pgoff_t index, count;
1450 	size_t size = maxsize, actual;
1451 	loff_t pos;
1452 
1453 	if (!size || !maxpages)
1454 		return 0;
1455 
1456 	pos = i->xarray_start + i->iov_offset;
1457 	index = pos >> PAGE_SHIFT;
1458 	offset = pos & ~PAGE_MASK;
1459 	*_start_offset = offset;
1460 
1461 	count = 1;
1462 	if (size > PAGE_SIZE - offset) {
1463 		size -= PAGE_SIZE - offset;
1464 		count += size >> PAGE_SHIFT;
1465 		size &= ~PAGE_MASK;
1466 		if (size)
1467 			count++;
1468 	}
1469 
1470 	if (count > maxpages)
1471 		count = maxpages;
1472 
1473 	nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1474 	if (nr == 0)
1475 		return 0;
1476 
1477 	actual = PAGE_SIZE * nr;
1478 	actual -= offset;
1479 	if (nr == count && size > 0) {
1480 		unsigned last_offset = (nr > 1) ? 0 : offset;
1481 		actual -= PAGE_SIZE - (last_offset + size);
1482 	}
1483 	return actual;
1484 }
1485 
1486 ssize_t iov_iter_get_pages(struct iov_iter *i,
1487 		   struct page **pages, size_t maxsize, unsigned maxpages,
1488 		   size_t *start)
1489 {
1490 	if (maxsize > i->count)
1491 		maxsize = i->count;
1492 
1493 	if (unlikely(iov_iter_is_pipe(i)))
1494 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1495 	if (unlikely(iov_iter_is_xarray(i)))
1496 		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1497 	if (unlikely(iov_iter_is_discard(i)))
1498 		return -EFAULT;
1499 
1500 	iterate_all_kinds(i, maxsize, v, ({
1501 		unsigned long addr = (unsigned long)v.iov_base;
1502 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1503 		int n;
1504 		int res;
1505 
1506 		if (len > maxpages * PAGE_SIZE)
1507 			len = maxpages * PAGE_SIZE;
1508 		addr &= ~(PAGE_SIZE - 1);
1509 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1510 		res = get_user_pages_fast(addr, n,
1511 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1512 				pages);
1513 		if (unlikely(res < 0))
1514 			return res;
1515 		return (res == n ? len : res * PAGE_SIZE) - *start;
1516 	0;}),({
1517 		/* can't be more than PAGE_SIZE */
1518 		*start = v.bv_offset;
1519 		get_page(*pages = v.bv_page);
1520 		return v.bv_len;
1521 	}),({
1522 		return -EFAULT;
1523 	}),
1524 	0
1525 	)
1526 	return 0;
1527 }
1528 EXPORT_SYMBOL(iov_iter_get_pages);
1529 
1530 static struct page **get_pages_array(size_t n)
1531 {
1532 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1533 }
1534 
1535 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1536 		   struct page ***pages, size_t maxsize,
1537 		   size_t *start)
1538 {
1539 	struct page **p;
1540 	unsigned int iter_head, npages;
1541 	ssize_t n;
1542 
1543 	if (!maxsize)
1544 		return 0;
1545 
1546 	if (!sanity(i))
1547 		return -EFAULT;
1548 
1549 	data_start(i, &iter_head, start);
1550 	/* Amount of free space: some of this one + all after this one */
1551 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1552 	n = npages * PAGE_SIZE - *start;
1553 	if (maxsize > n)
1554 		maxsize = n;
1555 	else
1556 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1557 	p = get_pages_array(npages);
1558 	if (!p)
1559 		return -ENOMEM;
1560 	n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1561 	if (n > 0)
1562 		*pages = p;
1563 	else
1564 		kvfree(p);
1565 	return n;
1566 }
1567 
1568 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1569 					   struct page ***pages, size_t maxsize,
1570 					   size_t *_start_offset)
1571 {
1572 	struct page **p;
1573 	unsigned nr, offset;
1574 	pgoff_t index, count;
1575 	size_t size = maxsize, actual;
1576 	loff_t pos;
1577 
1578 	if (!size)
1579 		return 0;
1580 
1581 	pos = i->xarray_start + i->iov_offset;
1582 	index = pos >> PAGE_SHIFT;
1583 	offset = pos & ~PAGE_MASK;
1584 	*_start_offset = offset;
1585 
1586 	count = 1;
1587 	if (size > PAGE_SIZE - offset) {
1588 		size -= PAGE_SIZE - offset;
1589 		count += size >> PAGE_SHIFT;
1590 		size &= ~PAGE_MASK;
1591 		if (size)
1592 			count++;
1593 	}
1594 
1595 	p = get_pages_array(count);
1596 	if (!p)
1597 		return -ENOMEM;
1598 	*pages = p;
1599 
1600 	nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1601 	if (nr == 0)
1602 		return 0;
1603 
1604 	actual = PAGE_SIZE * nr;
1605 	actual -= offset;
1606 	if (nr == count && size > 0) {
1607 		unsigned last_offset = (nr > 1) ? 0 : offset;
1608 		actual -= PAGE_SIZE - (last_offset + size);
1609 	}
1610 	return actual;
1611 }
1612 
1613 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1614 		   struct page ***pages, size_t maxsize,
1615 		   size_t *start)
1616 {
1617 	struct page **p;
1618 
1619 	if (maxsize > i->count)
1620 		maxsize = i->count;
1621 
1622 	if (unlikely(iov_iter_is_pipe(i)))
1623 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1624 	if (unlikely(iov_iter_is_xarray(i)))
1625 		return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1626 	if (unlikely(iov_iter_is_discard(i)))
1627 		return -EFAULT;
1628 
1629 	iterate_all_kinds(i, maxsize, v, ({
1630 		unsigned long addr = (unsigned long)v.iov_base;
1631 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1632 		int n;
1633 		int res;
1634 
1635 		addr &= ~(PAGE_SIZE - 1);
1636 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1637 		p = get_pages_array(n);
1638 		if (!p)
1639 			return -ENOMEM;
1640 		res = get_user_pages_fast(addr, n,
1641 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1642 		if (unlikely(res < 0)) {
1643 			kvfree(p);
1644 			return res;
1645 		}
1646 		*pages = p;
1647 		return (res == n ? len : res * PAGE_SIZE) - *start;
1648 	0;}),({
1649 		/* can't be more than PAGE_SIZE */
1650 		*start = v.bv_offset;
1651 		*pages = p = get_pages_array(1);
1652 		if (!p)
1653 			return -ENOMEM;
1654 		get_page(*p = v.bv_page);
1655 		return v.bv_len;
1656 	}),({
1657 		return -EFAULT;
1658 	}), 0
1659 	)
1660 	return 0;
1661 }
1662 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1663 
1664 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1665 			       struct iov_iter *i)
1666 {
1667 	char *to = addr;
1668 	__wsum sum, next;
1669 	size_t off = 0;
1670 	sum = *csum;
1671 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1672 		WARN_ON(1);
1673 		return 0;
1674 	}
1675 	iterate_and_advance(i, bytes, v, ({
1676 		next = csum_and_copy_from_user(v.iov_base,
1677 					       (to += v.iov_len) - v.iov_len,
1678 					       v.iov_len);
1679 		if (next) {
1680 			sum = csum_block_add(sum, next, off);
1681 			off += v.iov_len;
1682 		}
1683 		next ? 0 : v.iov_len;
1684 	}), ({
1685 		char *p = kmap_atomic(v.bv_page);
1686 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1687 				      p + v.bv_offset, v.bv_len,
1688 				      sum, off);
1689 		kunmap_atomic(p);
1690 		off += v.bv_len;
1691 	}),({
1692 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1693 				      v.iov_base, v.iov_len,
1694 				      sum, off);
1695 		off += v.iov_len;
1696 	}), ({
1697 		char *p = kmap_atomic(v.bv_page);
1698 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1699 				      p + v.bv_offset, v.bv_len,
1700 				      sum, off);
1701 		kunmap_atomic(p);
1702 		off += v.bv_len;
1703 	})
1704 	)
1705 	*csum = sum;
1706 	return bytes;
1707 }
1708 EXPORT_SYMBOL(csum_and_copy_from_iter);
1709 
1710 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1711 			     struct iov_iter *i)
1712 {
1713 	struct csum_state *csstate = _csstate;
1714 	const char *from = addr;
1715 	__wsum sum, next;
1716 	size_t off;
1717 
1718 	if (unlikely(iov_iter_is_pipe(i)))
1719 		return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1720 
1721 	sum = csstate->csum;
1722 	off = csstate->off;
1723 	if (unlikely(iov_iter_is_discard(i))) {
1724 		WARN_ON(1);	/* for now */
1725 		return 0;
1726 	}
1727 	iterate_and_advance(i, bytes, v, ({
1728 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1729 					     v.iov_base,
1730 					     v.iov_len);
1731 		if (next) {
1732 			sum = csum_block_add(sum, next, off);
1733 			off += v.iov_len;
1734 		}
1735 		next ? 0 : v.iov_len;
1736 	}), ({
1737 		char *p = kmap_atomic(v.bv_page);
1738 		sum = csum_and_memcpy(p + v.bv_offset,
1739 				      (from += v.bv_len) - v.bv_len,
1740 				      v.bv_len, sum, off);
1741 		kunmap_atomic(p);
1742 		off += v.bv_len;
1743 	}),({
1744 		sum = csum_and_memcpy(v.iov_base,
1745 				     (from += v.iov_len) - v.iov_len,
1746 				     v.iov_len, sum, off);
1747 		off += v.iov_len;
1748 	}), ({
1749 		char *p = kmap_atomic(v.bv_page);
1750 		sum = csum_and_memcpy(p + v.bv_offset,
1751 				      (from += v.bv_len) - v.bv_len,
1752 				      v.bv_len, sum, off);
1753 		kunmap_atomic(p);
1754 		off += v.bv_len;
1755 	})
1756 	)
1757 	csstate->csum = sum;
1758 	csstate->off = off;
1759 	return bytes;
1760 }
1761 EXPORT_SYMBOL(csum_and_copy_to_iter);
1762 
1763 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1764 		struct iov_iter *i)
1765 {
1766 #ifdef CONFIG_CRYPTO_HASH
1767 	struct ahash_request *hash = hashp;
1768 	struct scatterlist sg;
1769 	size_t copied;
1770 
1771 	copied = copy_to_iter(addr, bytes, i);
1772 	sg_init_one(&sg, addr, copied);
1773 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1774 	crypto_ahash_update(hash);
1775 	return copied;
1776 #else
1777 	return 0;
1778 #endif
1779 }
1780 EXPORT_SYMBOL(hash_and_copy_to_iter);
1781 
1782 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1783 {
1784 	size_t size = i->count;
1785 	int npages = 0;
1786 
1787 	if (!size)
1788 		return 0;
1789 	if (unlikely(iov_iter_is_discard(i)))
1790 		return 0;
1791 
1792 	if (unlikely(iov_iter_is_pipe(i))) {
1793 		struct pipe_inode_info *pipe = i->pipe;
1794 		unsigned int iter_head;
1795 		size_t off;
1796 
1797 		if (!sanity(i))
1798 			return 0;
1799 
1800 		data_start(i, &iter_head, &off);
1801 		/* some of this one + all after this one */
1802 		npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1803 		if (npages >= maxpages)
1804 			return maxpages;
1805 	} else if (unlikely(iov_iter_is_xarray(i))) {
1806 		unsigned offset;
1807 
1808 		offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1809 
1810 		npages = 1;
1811 		if (size > PAGE_SIZE - offset) {
1812 			size -= PAGE_SIZE - offset;
1813 			npages += size >> PAGE_SHIFT;
1814 			size &= ~PAGE_MASK;
1815 			if (size)
1816 				npages++;
1817 		}
1818 		if (npages >= maxpages)
1819 			return maxpages;
1820 	} else iterate_all_kinds(i, size, v, ({
1821 		unsigned long p = (unsigned long)v.iov_base;
1822 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1823 			- p / PAGE_SIZE;
1824 		if (npages >= maxpages)
1825 			return maxpages;
1826 	0;}),({
1827 		npages++;
1828 		if (npages >= maxpages)
1829 			return maxpages;
1830 	}),({
1831 		unsigned long p = (unsigned long)v.iov_base;
1832 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1833 			- p / PAGE_SIZE;
1834 		if (npages >= maxpages)
1835 			return maxpages;
1836 	}),
1837 	0
1838 	)
1839 	return npages;
1840 }
1841 EXPORT_SYMBOL(iov_iter_npages);
1842 
1843 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1844 {
1845 	*new = *old;
1846 	if (unlikely(iov_iter_is_pipe(new))) {
1847 		WARN_ON(1);
1848 		return NULL;
1849 	}
1850 	if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1851 		return NULL;
1852 	if (iov_iter_is_bvec(new))
1853 		return new->bvec = kmemdup(new->bvec,
1854 				    new->nr_segs * sizeof(struct bio_vec),
1855 				    flags);
1856 	else
1857 		/* iovec and kvec have identical layout */
1858 		return new->iov = kmemdup(new->iov,
1859 				   new->nr_segs * sizeof(struct iovec),
1860 				   flags);
1861 }
1862 EXPORT_SYMBOL(dup_iter);
1863 
1864 static int copy_compat_iovec_from_user(struct iovec *iov,
1865 		const struct iovec __user *uvec, unsigned long nr_segs)
1866 {
1867 	const struct compat_iovec __user *uiov =
1868 		(const struct compat_iovec __user *)uvec;
1869 	int ret = -EFAULT, i;
1870 
1871 	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1872 		return -EFAULT;
1873 
1874 	for (i = 0; i < nr_segs; i++) {
1875 		compat_uptr_t buf;
1876 		compat_ssize_t len;
1877 
1878 		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1879 		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1880 
1881 		/* check for compat_size_t not fitting in compat_ssize_t .. */
1882 		if (len < 0) {
1883 			ret = -EINVAL;
1884 			goto uaccess_end;
1885 		}
1886 		iov[i].iov_base = compat_ptr(buf);
1887 		iov[i].iov_len = len;
1888 	}
1889 
1890 	ret = 0;
1891 uaccess_end:
1892 	user_access_end();
1893 	return ret;
1894 }
1895 
1896 static int copy_iovec_from_user(struct iovec *iov,
1897 		const struct iovec __user *uvec, unsigned long nr_segs)
1898 {
1899 	unsigned long seg;
1900 
1901 	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1902 		return -EFAULT;
1903 	for (seg = 0; seg < nr_segs; seg++) {
1904 		if ((ssize_t)iov[seg].iov_len < 0)
1905 			return -EINVAL;
1906 	}
1907 
1908 	return 0;
1909 }
1910 
1911 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1912 		unsigned long nr_segs, unsigned long fast_segs,
1913 		struct iovec *fast_iov, bool compat)
1914 {
1915 	struct iovec *iov = fast_iov;
1916 	int ret;
1917 
1918 	/*
1919 	 * SuS says "The readv() function *may* fail if the iovcnt argument was
1920 	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1921 	 * traditionally returned zero for zero segments, so...
1922 	 */
1923 	if (nr_segs == 0)
1924 		return iov;
1925 	if (nr_segs > UIO_MAXIOV)
1926 		return ERR_PTR(-EINVAL);
1927 	if (nr_segs > fast_segs) {
1928 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1929 		if (!iov)
1930 			return ERR_PTR(-ENOMEM);
1931 	}
1932 
1933 	if (compat)
1934 		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1935 	else
1936 		ret = copy_iovec_from_user(iov, uvec, nr_segs);
1937 	if (ret) {
1938 		if (iov != fast_iov)
1939 			kfree(iov);
1940 		return ERR_PTR(ret);
1941 	}
1942 
1943 	return iov;
1944 }
1945 
1946 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1947 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1948 		 struct iov_iter *i, bool compat)
1949 {
1950 	ssize_t total_len = 0;
1951 	unsigned long seg;
1952 	struct iovec *iov;
1953 
1954 	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1955 	if (IS_ERR(iov)) {
1956 		*iovp = NULL;
1957 		return PTR_ERR(iov);
1958 	}
1959 
1960 	/*
1961 	 * According to the Single Unix Specification we should return EINVAL if
1962 	 * an element length is < 0 when cast to ssize_t or if the total length
1963 	 * would overflow the ssize_t return value of the system call.
1964 	 *
1965 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1966 	 * overflow case.
1967 	 */
1968 	for (seg = 0; seg < nr_segs; seg++) {
1969 		ssize_t len = (ssize_t)iov[seg].iov_len;
1970 
1971 		if (!access_ok(iov[seg].iov_base, len)) {
1972 			if (iov != *iovp)
1973 				kfree(iov);
1974 			*iovp = NULL;
1975 			return -EFAULT;
1976 		}
1977 
1978 		if (len > MAX_RW_COUNT - total_len) {
1979 			len = MAX_RW_COUNT - total_len;
1980 			iov[seg].iov_len = len;
1981 		}
1982 		total_len += len;
1983 	}
1984 
1985 	iov_iter_init(i, type, iov, nr_segs, total_len);
1986 	if (iov == *iovp)
1987 		*iovp = NULL;
1988 	else
1989 		*iovp = iov;
1990 	return total_len;
1991 }
1992 
1993 /**
1994  * import_iovec() - Copy an array of &struct iovec from userspace
1995  *     into the kernel, check that it is valid, and initialize a new
1996  *     &struct iov_iter iterator to access it.
1997  *
1998  * @type: One of %READ or %WRITE.
1999  * @uvec: Pointer to the userspace array.
2000  * @nr_segs: Number of elements in userspace array.
2001  * @fast_segs: Number of elements in @iov.
2002  * @iovp: (input and output parameter) Pointer to pointer to (usually small
2003  *     on-stack) kernel array.
2004  * @i: Pointer to iterator that will be initialized on success.
2005  *
2006  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2007  * then this function places %NULL in *@iov on return. Otherwise, a new
2008  * array will be allocated and the result placed in *@iov. This means that
2009  * the caller may call kfree() on *@iov regardless of whether the small
2010  * on-stack array was used or not (and regardless of whether this function
2011  * returns an error or not).
2012  *
2013  * Return: Negative error code on error, bytes imported on success
2014  */
2015 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2016 		 unsigned nr_segs, unsigned fast_segs,
2017 		 struct iovec **iovp, struct iov_iter *i)
2018 {
2019 	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2020 			      in_compat_syscall());
2021 }
2022 EXPORT_SYMBOL(import_iovec);
2023 
2024 int import_single_range(int rw, void __user *buf, size_t len,
2025 		 struct iovec *iov, struct iov_iter *i)
2026 {
2027 	if (len > MAX_RW_COUNT)
2028 		len = MAX_RW_COUNT;
2029 	if (unlikely(!access_ok(buf, len)))
2030 		return -EFAULT;
2031 
2032 	iov->iov_base = buf;
2033 	iov->iov_len = len;
2034 	iov_iter_init(i, rw, iov, 1, len);
2035 	return 0;
2036 }
2037 EXPORT_SYMBOL(import_single_range);
2038