xref: /openbmc/linux/lib/iov_iter.c (revision 8cd54c1c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/highmem.h>
9 #include <linux/slab.h>
10 #include <linux/vmalloc.h>
11 #include <linux/splice.h>
12 #include <linux/compat.h>
13 #include <net/checksum.h>
14 #include <linux/scatterlist.h>
15 #include <linux/instrumented.h>
16 
17 #define PIPE_PARANOIA /* for now */
18 
19 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
20 	size_t left;					\
21 	size_t wanted = n;				\
22 	__p = i->iov;					\
23 	__v.iov_len = min(n, __p->iov_len - skip);	\
24 	if (likely(__v.iov_len)) {			\
25 		__v.iov_base = __p->iov_base + skip;	\
26 		left = (STEP);				\
27 		__v.iov_len -= left;			\
28 		skip += __v.iov_len;			\
29 		n -= __v.iov_len;			\
30 	} else {					\
31 		left = 0;				\
32 	}						\
33 	while (unlikely(!left && n)) {			\
34 		__p++;					\
35 		__v.iov_len = min(n, __p->iov_len);	\
36 		if (unlikely(!__v.iov_len))		\
37 			continue;			\
38 		__v.iov_base = __p->iov_base;		\
39 		left = (STEP);				\
40 		__v.iov_len -= left;			\
41 		skip = __v.iov_len;			\
42 		n -= __v.iov_len;			\
43 	}						\
44 	n = wanted - n;					\
45 }
46 
47 #define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
48 	size_t wanted = n;				\
49 	__p = i->kvec;					\
50 	__v.iov_len = min(n, __p->iov_len - skip);	\
51 	if (likely(__v.iov_len)) {			\
52 		__v.iov_base = __p->iov_base + skip;	\
53 		(void)(STEP);				\
54 		skip += __v.iov_len;			\
55 		n -= __v.iov_len;			\
56 	}						\
57 	while (unlikely(n)) {				\
58 		__p++;					\
59 		__v.iov_len = min(n, __p->iov_len);	\
60 		if (unlikely(!__v.iov_len))		\
61 			continue;			\
62 		__v.iov_base = __p->iov_base;		\
63 		(void)(STEP);				\
64 		skip = __v.iov_len;			\
65 		n -= __v.iov_len;			\
66 	}						\
67 	n = wanted;					\
68 }
69 
70 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
71 	struct bvec_iter __start;			\
72 	__start.bi_size = n;				\
73 	__start.bi_bvec_done = skip;			\
74 	__start.bi_idx = 0;				\
75 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
76 		(void)(STEP);				\
77 	}						\
78 }
79 
80 #define iterate_xarray(i, n, __v, skip, STEP) {		\
81 	struct page *head = NULL;				\
82 	size_t wanted = n, seg, offset;				\
83 	loff_t start = i->xarray_start + skip;			\
84 	pgoff_t index = start >> PAGE_SHIFT;			\
85 	int j;							\
86 								\
87 	XA_STATE(xas, i->xarray, index);			\
88 								\
89 	rcu_read_lock();						\
90 	xas_for_each(&xas, head, ULONG_MAX) {				\
91 		if (xas_retry(&xas, head))				\
92 			continue;					\
93 		if (WARN_ON(xa_is_value(head)))				\
94 			break;						\
95 		if (WARN_ON(PageHuge(head)))				\
96 			break;						\
97 		for (j = (head->index < index) ? index - head->index : 0; \
98 		     j < thp_nr_pages(head); j++) {			\
99 			__v.bv_page = head + j;				\
100 			offset = (i->xarray_start + skip) & ~PAGE_MASK;	\
101 			seg = PAGE_SIZE - offset;			\
102 			__v.bv_offset = offset;				\
103 			__v.bv_len = min(n, seg);			\
104 			(void)(STEP);					\
105 			n -= __v.bv_len;				\
106 			skip += __v.bv_len;				\
107 			if (n == 0)					\
108 				break;					\
109 		}							\
110 		if (n == 0)						\
111 			break;						\
112 	}							\
113 	rcu_read_unlock();					\
114 	n = wanted - n;						\
115 }
116 
117 #define iterate_all_kinds(i, n, v, I, B, K, X) {		\
118 	if (likely(n)) {					\
119 		size_t skip = i->iov_offset;			\
120 		if (likely(iter_is_iovec(i))) {			\
121 			const struct iovec *iov;		\
122 			struct iovec v;				\
123 			iterate_iovec(i, n, v, iov, skip, (I))	\
124 		} else if (iov_iter_is_bvec(i)) {		\
125 			struct bio_vec v;			\
126 			struct bvec_iter __bi;			\
127 			iterate_bvec(i, n, v, __bi, skip, (B))	\
128 		} else if (iov_iter_is_kvec(i)) {		\
129 			const struct kvec *kvec;		\
130 			struct kvec v;				\
131 			iterate_kvec(i, n, v, kvec, skip, (K))	\
132 		} else if (iov_iter_is_xarray(i)) {		\
133 			struct bio_vec v;			\
134 			iterate_xarray(i, n, v, skip, (X));	\
135 		}						\
136 	}							\
137 }
138 
139 #define iterate_and_advance(i, n, v, I, B, K, X) {		\
140 	if (unlikely(i->count < n))				\
141 		n = i->count;					\
142 	if (i->count) {						\
143 		size_t skip = i->iov_offset;			\
144 		if (likely(iter_is_iovec(i))) {			\
145 			const struct iovec *iov;		\
146 			struct iovec v;				\
147 			iterate_iovec(i, n, v, iov, skip, (I))	\
148 			if (skip == iov->iov_len) {		\
149 				iov++;				\
150 				skip = 0;			\
151 			}					\
152 			i->nr_segs -= iov - i->iov;		\
153 			i->iov = iov;				\
154 		} else if (iov_iter_is_bvec(i)) {		\
155 			const struct bio_vec *bvec = i->bvec;	\
156 			struct bio_vec v;			\
157 			struct bvec_iter __bi;			\
158 			iterate_bvec(i, n, v, __bi, skip, (B))	\
159 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
160 			i->nr_segs -= i->bvec - bvec;		\
161 			skip = __bi.bi_bvec_done;		\
162 		} else if (iov_iter_is_kvec(i)) {		\
163 			const struct kvec *kvec;		\
164 			struct kvec v;				\
165 			iterate_kvec(i, n, v, kvec, skip, (K))	\
166 			if (skip == kvec->iov_len) {		\
167 				kvec++;				\
168 				skip = 0;			\
169 			}					\
170 			i->nr_segs -= kvec - i->kvec;		\
171 			i->kvec = kvec;				\
172 		} else if (iov_iter_is_xarray(i)) {		\
173 			struct bio_vec v;			\
174 			iterate_xarray(i, n, v, skip, (X))	\
175 		}						\
176 		i->count -= n;					\
177 		i->iov_offset = skip;				\
178 	}							\
179 }
180 
181 static int copyout(void __user *to, const void *from, size_t n)
182 {
183 	if (should_fail_usercopy())
184 		return n;
185 	if (access_ok(to, n)) {
186 		instrument_copy_to_user(to, from, n);
187 		n = raw_copy_to_user(to, from, n);
188 	}
189 	return n;
190 }
191 
192 static int copyin(void *to, const void __user *from, size_t n)
193 {
194 	if (should_fail_usercopy())
195 		return n;
196 	if (access_ok(from, n)) {
197 		instrument_copy_from_user(to, from, n);
198 		n = raw_copy_from_user(to, from, n);
199 	}
200 	return n;
201 }
202 
203 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
204 			 struct iov_iter *i)
205 {
206 	size_t skip, copy, left, wanted;
207 	const struct iovec *iov;
208 	char __user *buf;
209 	void *kaddr, *from;
210 
211 	if (unlikely(bytes > i->count))
212 		bytes = i->count;
213 
214 	if (unlikely(!bytes))
215 		return 0;
216 
217 	might_fault();
218 	wanted = bytes;
219 	iov = i->iov;
220 	skip = i->iov_offset;
221 	buf = iov->iov_base + skip;
222 	copy = min(bytes, iov->iov_len - skip);
223 
224 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
225 		kaddr = kmap_atomic(page);
226 		from = kaddr + offset;
227 
228 		/* first chunk, usually the only one */
229 		left = copyout(buf, from, copy);
230 		copy -= left;
231 		skip += copy;
232 		from += copy;
233 		bytes -= copy;
234 
235 		while (unlikely(!left && bytes)) {
236 			iov++;
237 			buf = iov->iov_base;
238 			copy = min(bytes, iov->iov_len);
239 			left = copyout(buf, from, copy);
240 			copy -= left;
241 			skip = copy;
242 			from += copy;
243 			bytes -= copy;
244 		}
245 		if (likely(!bytes)) {
246 			kunmap_atomic(kaddr);
247 			goto done;
248 		}
249 		offset = from - kaddr;
250 		buf += copy;
251 		kunmap_atomic(kaddr);
252 		copy = min(bytes, iov->iov_len - skip);
253 	}
254 	/* Too bad - revert to non-atomic kmap */
255 
256 	kaddr = kmap(page);
257 	from = kaddr + offset;
258 	left = copyout(buf, from, copy);
259 	copy -= left;
260 	skip += copy;
261 	from += copy;
262 	bytes -= copy;
263 	while (unlikely(!left && bytes)) {
264 		iov++;
265 		buf = iov->iov_base;
266 		copy = min(bytes, iov->iov_len);
267 		left = copyout(buf, from, copy);
268 		copy -= left;
269 		skip = copy;
270 		from += copy;
271 		bytes -= copy;
272 	}
273 	kunmap(page);
274 
275 done:
276 	if (skip == iov->iov_len) {
277 		iov++;
278 		skip = 0;
279 	}
280 	i->count -= wanted - bytes;
281 	i->nr_segs -= iov - i->iov;
282 	i->iov = iov;
283 	i->iov_offset = skip;
284 	return wanted - bytes;
285 }
286 
287 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
288 			 struct iov_iter *i)
289 {
290 	size_t skip, copy, left, wanted;
291 	const struct iovec *iov;
292 	char __user *buf;
293 	void *kaddr, *to;
294 
295 	if (unlikely(bytes > i->count))
296 		bytes = i->count;
297 
298 	if (unlikely(!bytes))
299 		return 0;
300 
301 	might_fault();
302 	wanted = bytes;
303 	iov = i->iov;
304 	skip = i->iov_offset;
305 	buf = iov->iov_base + skip;
306 	copy = min(bytes, iov->iov_len - skip);
307 
308 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
309 		kaddr = kmap_atomic(page);
310 		to = kaddr + offset;
311 
312 		/* first chunk, usually the only one */
313 		left = copyin(to, buf, copy);
314 		copy -= left;
315 		skip += copy;
316 		to += copy;
317 		bytes -= copy;
318 
319 		while (unlikely(!left && bytes)) {
320 			iov++;
321 			buf = iov->iov_base;
322 			copy = min(bytes, iov->iov_len);
323 			left = copyin(to, buf, copy);
324 			copy -= left;
325 			skip = copy;
326 			to += copy;
327 			bytes -= copy;
328 		}
329 		if (likely(!bytes)) {
330 			kunmap_atomic(kaddr);
331 			goto done;
332 		}
333 		offset = to - kaddr;
334 		buf += copy;
335 		kunmap_atomic(kaddr);
336 		copy = min(bytes, iov->iov_len - skip);
337 	}
338 	/* Too bad - revert to non-atomic kmap */
339 
340 	kaddr = kmap(page);
341 	to = kaddr + offset;
342 	left = copyin(to, buf, copy);
343 	copy -= left;
344 	skip += copy;
345 	to += copy;
346 	bytes -= copy;
347 	while (unlikely(!left && bytes)) {
348 		iov++;
349 		buf = iov->iov_base;
350 		copy = min(bytes, iov->iov_len);
351 		left = copyin(to, buf, copy);
352 		copy -= left;
353 		skip = copy;
354 		to += copy;
355 		bytes -= copy;
356 	}
357 	kunmap(page);
358 
359 done:
360 	if (skip == iov->iov_len) {
361 		iov++;
362 		skip = 0;
363 	}
364 	i->count -= wanted - bytes;
365 	i->nr_segs -= iov - i->iov;
366 	i->iov = iov;
367 	i->iov_offset = skip;
368 	return wanted - bytes;
369 }
370 
371 #ifdef PIPE_PARANOIA
372 static bool sanity(const struct iov_iter *i)
373 {
374 	struct pipe_inode_info *pipe = i->pipe;
375 	unsigned int p_head = pipe->head;
376 	unsigned int p_tail = pipe->tail;
377 	unsigned int p_mask = pipe->ring_size - 1;
378 	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
379 	unsigned int i_head = i->head;
380 	unsigned int idx;
381 
382 	if (i->iov_offset) {
383 		struct pipe_buffer *p;
384 		if (unlikely(p_occupancy == 0))
385 			goto Bad;	// pipe must be non-empty
386 		if (unlikely(i_head != p_head - 1))
387 			goto Bad;	// must be at the last buffer...
388 
389 		p = &pipe->bufs[i_head & p_mask];
390 		if (unlikely(p->offset + p->len != i->iov_offset))
391 			goto Bad;	// ... at the end of segment
392 	} else {
393 		if (i_head != p_head)
394 			goto Bad;	// must be right after the last buffer
395 	}
396 	return true;
397 Bad:
398 	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
399 	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
400 			p_head, p_tail, pipe->ring_size);
401 	for (idx = 0; idx < pipe->ring_size; idx++)
402 		printk(KERN_ERR "[%p %p %d %d]\n",
403 			pipe->bufs[idx].ops,
404 			pipe->bufs[idx].page,
405 			pipe->bufs[idx].offset,
406 			pipe->bufs[idx].len);
407 	WARN_ON(1);
408 	return false;
409 }
410 #else
411 #define sanity(i) true
412 #endif
413 
414 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
415 			 struct iov_iter *i)
416 {
417 	struct pipe_inode_info *pipe = i->pipe;
418 	struct pipe_buffer *buf;
419 	unsigned int p_tail = pipe->tail;
420 	unsigned int p_mask = pipe->ring_size - 1;
421 	unsigned int i_head = i->head;
422 	size_t off;
423 
424 	if (unlikely(bytes > i->count))
425 		bytes = i->count;
426 
427 	if (unlikely(!bytes))
428 		return 0;
429 
430 	if (!sanity(i))
431 		return 0;
432 
433 	off = i->iov_offset;
434 	buf = &pipe->bufs[i_head & p_mask];
435 	if (off) {
436 		if (offset == off && buf->page == page) {
437 			/* merge with the last one */
438 			buf->len += bytes;
439 			i->iov_offset += bytes;
440 			goto out;
441 		}
442 		i_head++;
443 		buf = &pipe->bufs[i_head & p_mask];
444 	}
445 	if (pipe_full(i_head, p_tail, pipe->max_usage))
446 		return 0;
447 
448 	buf->ops = &page_cache_pipe_buf_ops;
449 	get_page(page);
450 	buf->page = page;
451 	buf->offset = offset;
452 	buf->len = bytes;
453 
454 	pipe->head = i_head + 1;
455 	i->iov_offset = offset + bytes;
456 	i->head = i_head;
457 out:
458 	i->count -= bytes;
459 	return bytes;
460 }
461 
462 /*
463  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
464  * bytes.  For each iovec, fault in each page that constitutes the iovec.
465  *
466  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
467  * because it is an invalid address).
468  */
469 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
470 {
471 	size_t skip = i->iov_offset;
472 	const struct iovec *iov;
473 	int err;
474 	struct iovec v;
475 
476 	if (iter_is_iovec(i)) {
477 		iterate_iovec(i, bytes, v, iov, skip, ({
478 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
479 			if (unlikely(err))
480 			return err;
481 		0;}))
482 	}
483 	return 0;
484 }
485 EXPORT_SYMBOL(iov_iter_fault_in_readable);
486 
487 void iov_iter_init(struct iov_iter *i, unsigned int direction,
488 			const struct iovec *iov, unsigned long nr_segs,
489 			size_t count)
490 {
491 	WARN_ON(direction & ~(READ | WRITE));
492 	WARN_ON_ONCE(uaccess_kernel());
493 	*i = (struct iov_iter) {
494 		.iter_type = ITER_IOVEC,
495 		.data_source = direction,
496 		.iov = iov,
497 		.nr_segs = nr_segs,
498 		.iov_offset = 0,
499 		.count = count
500 	};
501 }
502 EXPORT_SYMBOL(iov_iter_init);
503 
504 static inline bool allocated(struct pipe_buffer *buf)
505 {
506 	return buf->ops == &default_pipe_buf_ops;
507 }
508 
509 static inline void data_start(const struct iov_iter *i,
510 			      unsigned int *iter_headp, size_t *offp)
511 {
512 	unsigned int p_mask = i->pipe->ring_size - 1;
513 	unsigned int iter_head = i->head;
514 	size_t off = i->iov_offset;
515 
516 	if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
517 		    off == PAGE_SIZE)) {
518 		iter_head++;
519 		off = 0;
520 	}
521 	*iter_headp = iter_head;
522 	*offp = off;
523 }
524 
525 static size_t push_pipe(struct iov_iter *i, size_t size,
526 			int *iter_headp, size_t *offp)
527 {
528 	struct pipe_inode_info *pipe = i->pipe;
529 	unsigned int p_tail = pipe->tail;
530 	unsigned int p_mask = pipe->ring_size - 1;
531 	unsigned int iter_head;
532 	size_t off;
533 	ssize_t left;
534 
535 	if (unlikely(size > i->count))
536 		size = i->count;
537 	if (unlikely(!size))
538 		return 0;
539 
540 	left = size;
541 	data_start(i, &iter_head, &off);
542 	*iter_headp = iter_head;
543 	*offp = off;
544 	if (off) {
545 		left -= PAGE_SIZE - off;
546 		if (left <= 0) {
547 			pipe->bufs[iter_head & p_mask].len += size;
548 			return size;
549 		}
550 		pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
551 		iter_head++;
552 	}
553 	while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
554 		struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
555 		struct page *page = alloc_page(GFP_USER);
556 		if (!page)
557 			break;
558 
559 		buf->ops = &default_pipe_buf_ops;
560 		buf->page = page;
561 		buf->offset = 0;
562 		buf->len = min_t(ssize_t, left, PAGE_SIZE);
563 		left -= buf->len;
564 		iter_head++;
565 		pipe->head = iter_head;
566 
567 		if (left == 0)
568 			return size;
569 	}
570 	return size - left;
571 }
572 
573 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
574 				struct iov_iter *i)
575 {
576 	struct pipe_inode_info *pipe = i->pipe;
577 	unsigned int p_mask = pipe->ring_size - 1;
578 	unsigned int i_head;
579 	size_t n, off;
580 
581 	if (!sanity(i))
582 		return 0;
583 
584 	bytes = n = push_pipe(i, bytes, &i_head, &off);
585 	if (unlikely(!n))
586 		return 0;
587 	do {
588 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
589 		memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
590 		i->head = i_head;
591 		i->iov_offset = off + chunk;
592 		n -= chunk;
593 		addr += chunk;
594 		off = 0;
595 		i_head++;
596 	} while (n);
597 	i->count -= bytes;
598 	return bytes;
599 }
600 
601 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
602 			      __wsum sum, size_t off)
603 {
604 	__wsum next = csum_partial_copy_nocheck(from, to, len);
605 	return csum_block_add(sum, next, off);
606 }
607 
608 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
609 					 struct csum_state *csstate,
610 					 struct iov_iter *i)
611 {
612 	struct pipe_inode_info *pipe = i->pipe;
613 	unsigned int p_mask = pipe->ring_size - 1;
614 	__wsum sum = csstate->csum;
615 	size_t off = csstate->off;
616 	unsigned int i_head;
617 	size_t n, r;
618 
619 	if (!sanity(i))
620 		return 0;
621 
622 	bytes = n = push_pipe(i, bytes, &i_head, &r);
623 	if (unlikely(!n))
624 		return 0;
625 	do {
626 		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
627 		char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
628 		sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
629 		kunmap_atomic(p);
630 		i->head = i_head;
631 		i->iov_offset = r + chunk;
632 		n -= chunk;
633 		off += chunk;
634 		addr += chunk;
635 		r = 0;
636 		i_head++;
637 	} while (n);
638 	i->count -= bytes;
639 	csstate->csum = sum;
640 	csstate->off = off;
641 	return bytes;
642 }
643 
644 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
645 {
646 	const char *from = addr;
647 	if (unlikely(iov_iter_is_pipe(i)))
648 		return copy_pipe_to_iter(addr, bytes, i);
649 	if (iter_is_iovec(i))
650 		might_fault();
651 	iterate_and_advance(i, bytes, v,
652 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
653 		memcpy_to_page(v.bv_page, v.bv_offset,
654 			       (from += v.bv_len) - v.bv_len, v.bv_len),
655 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
656 		memcpy_to_page(v.bv_page, v.bv_offset,
657 			       (from += v.bv_len) - v.bv_len, v.bv_len)
658 	)
659 
660 	return bytes;
661 }
662 EXPORT_SYMBOL(_copy_to_iter);
663 
664 #ifdef CONFIG_ARCH_HAS_COPY_MC
665 static int copyout_mc(void __user *to, const void *from, size_t n)
666 {
667 	if (access_ok(to, n)) {
668 		instrument_copy_to_user(to, from, n);
669 		n = copy_mc_to_user((__force void *) to, from, n);
670 	}
671 	return n;
672 }
673 
674 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
675 		const char *from, size_t len)
676 {
677 	unsigned long ret;
678 	char *to;
679 
680 	to = kmap_atomic(page);
681 	ret = copy_mc_to_kernel(to + offset, from, len);
682 	kunmap_atomic(to);
683 
684 	return ret;
685 }
686 
687 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
688 				struct iov_iter *i)
689 {
690 	struct pipe_inode_info *pipe = i->pipe;
691 	unsigned int p_mask = pipe->ring_size - 1;
692 	unsigned int i_head;
693 	size_t n, off, xfer = 0;
694 
695 	if (!sanity(i))
696 		return 0;
697 
698 	bytes = n = push_pipe(i, bytes, &i_head, &off);
699 	if (unlikely(!n))
700 		return 0;
701 	do {
702 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
703 		unsigned long rem;
704 
705 		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
706 					    off, addr, chunk);
707 		i->head = i_head;
708 		i->iov_offset = off + chunk - rem;
709 		xfer += chunk - rem;
710 		if (rem)
711 			break;
712 		n -= chunk;
713 		addr += chunk;
714 		off = 0;
715 		i_head++;
716 	} while (n);
717 	i->count -= xfer;
718 	return xfer;
719 }
720 
721 /**
722  * _copy_mc_to_iter - copy to iter with source memory error exception handling
723  * @addr: source kernel address
724  * @bytes: total transfer length
725  * @iter: destination iterator
726  *
727  * The pmem driver deploys this for the dax operation
728  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
729  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
730  * successfully copied.
731  *
732  * The main differences between this and typical _copy_to_iter().
733  *
734  * * Typical tail/residue handling after a fault retries the copy
735  *   byte-by-byte until the fault happens again. Re-triggering machine
736  *   checks is potentially fatal so the implementation uses source
737  *   alignment and poison alignment assumptions to avoid re-triggering
738  *   hardware exceptions.
739  *
740  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
741  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
742  *   a short copy.
743  */
744 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
745 {
746 	const char *from = addr;
747 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
748 
749 	if (unlikely(iov_iter_is_pipe(i)))
750 		return copy_mc_pipe_to_iter(addr, bytes, i);
751 	if (iter_is_iovec(i))
752 		might_fault();
753 	iterate_and_advance(i, bytes, v,
754 		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
755 			   v.iov_len),
756 		({
757 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
758 				      (from += v.bv_len) - v.bv_len, v.bv_len);
759 		if (rem) {
760 			curr_addr = (unsigned long) from;
761 			bytes = curr_addr - s_addr - rem;
762 			return bytes;
763 		}
764 		}),
765 		({
766 		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
767 					- v.iov_len, v.iov_len);
768 		if (rem) {
769 			curr_addr = (unsigned long) from;
770 			bytes = curr_addr - s_addr - rem;
771 			return bytes;
772 		}
773 		}),
774 		({
775 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
776 				      (from += v.bv_len) - v.bv_len, v.bv_len);
777 		if (rem) {
778 			curr_addr = (unsigned long) from;
779 			bytes = curr_addr - s_addr - rem;
780 			rcu_read_unlock();
781 			i->iov_offset += bytes;
782 			i->count -= bytes;
783 			return bytes;
784 		}
785 		})
786 	)
787 
788 	return bytes;
789 }
790 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
791 #endif /* CONFIG_ARCH_HAS_COPY_MC */
792 
793 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
794 {
795 	char *to = addr;
796 	if (unlikely(iov_iter_is_pipe(i))) {
797 		WARN_ON(1);
798 		return 0;
799 	}
800 	if (iter_is_iovec(i))
801 		might_fault();
802 	iterate_and_advance(i, bytes, v,
803 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
804 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
805 				 v.bv_offset, v.bv_len),
806 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
807 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
808 				 v.bv_offset, v.bv_len)
809 	)
810 
811 	return bytes;
812 }
813 EXPORT_SYMBOL(_copy_from_iter);
814 
815 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
816 {
817 	char *to = addr;
818 	if (unlikely(iov_iter_is_pipe(i))) {
819 		WARN_ON(1);
820 		return 0;
821 	}
822 	iterate_and_advance(i, bytes, v,
823 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
824 					 v.iov_base, v.iov_len),
825 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
826 				 v.bv_offset, v.bv_len),
827 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
828 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
829 				 v.bv_offset, v.bv_len)
830 	)
831 
832 	return bytes;
833 }
834 EXPORT_SYMBOL(_copy_from_iter_nocache);
835 
836 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
837 /**
838  * _copy_from_iter_flushcache - write destination through cpu cache
839  * @addr: destination kernel address
840  * @bytes: total transfer length
841  * @iter: source iterator
842  *
843  * The pmem driver arranges for filesystem-dax to use this facility via
844  * dax_copy_from_iter() for ensuring that writes to persistent memory
845  * are flushed through the CPU cache. It is differentiated from
846  * _copy_from_iter_nocache() in that guarantees all data is flushed for
847  * all iterator types. The _copy_from_iter_nocache() only attempts to
848  * bypass the cache for the ITER_IOVEC case, and on some archs may use
849  * instructions that strand dirty-data in the cache.
850  */
851 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
852 {
853 	char *to = addr;
854 	if (unlikely(iov_iter_is_pipe(i))) {
855 		WARN_ON(1);
856 		return 0;
857 	}
858 	iterate_and_advance(i, bytes, v,
859 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
860 					 v.iov_base, v.iov_len),
861 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
862 				 v.bv_offset, v.bv_len),
863 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
864 			v.iov_len),
865 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
866 				 v.bv_offset, v.bv_len)
867 	)
868 
869 	return bytes;
870 }
871 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
872 #endif
873 
874 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
875 {
876 	struct page *head;
877 	size_t v = n + offset;
878 
879 	/*
880 	 * The general case needs to access the page order in order
881 	 * to compute the page size.
882 	 * However, we mostly deal with order-0 pages and thus can
883 	 * avoid a possible cache line miss for requests that fit all
884 	 * page orders.
885 	 */
886 	if (n <= v && v <= PAGE_SIZE)
887 		return true;
888 
889 	head = compound_head(page);
890 	v += (page - head) << PAGE_SHIFT;
891 
892 	if (likely(n <= v && v <= (page_size(head))))
893 		return true;
894 	WARN_ON(1);
895 	return false;
896 }
897 
898 static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
899 			 struct iov_iter *i)
900 {
901 	if (likely(iter_is_iovec(i)))
902 		return copy_page_to_iter_iovec(page, offset, bytes, i);
903 	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
904 		void *kaddr = kmap_atomic(page);
905 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
906 		kunmap_atomic(kaddr);
907 		return wanted;
908 	}
909 	if (iov_iter_is_pipe(i))
910 		return copy_page_to_iter_pipe(page, offset, bytes, i);
911 	if (unlikely(iov_iter_is_discard(i))) {
912 		if (unlikely(i->count < bytes))
913 			bytes = i->count;
914 		i->count -= bytes;
915 		return bytes;
916 	}
917 	WARN_ON(1);
918 	return 0;
919 }
920 
921 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
922 			 struct iov_iter *i)
923 {
924 	size_t res = 0;
925 	if (unlikely(!page_copy_sane(page, offset, bytes)))
926 		return 0;
927 	page += offset / PAGE_SIZE; // first subpage
928 	offset %= PAGE_SIZE;
929 	while (1) {
930 		size_t n = __copy_page_to_iter(page, offset,
931 				min(bytes, (size_t)PAGE_SIZE - offset), i);
932 		res += n;
933 		bytes -= n;
934 		if (!bytes || !n)
935 			break;
936 		offset += n;
937 		if (offset == PAGE_SIZE) {
938 			page++;
939 			offset = 0;
940 		}
941 	}
942 	return res;
943 }
944 EXPORT_SYMBOL(copy_page_to_iter);
945 
946 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
947 			 struct iov_iter *i)
948 {
949 	if (unlikely(!page_copy_sane(page, offset, bytes)))
950 		return 0;
951 	if (likely(iter_is_iovec(i)))
952 		return copy_page_from_iter_iovec(page, offset, bytes, i);
953 	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
954 		void *kaddr = kmap_atomic(page);
955 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
956 		kunmap_atomic(kaddr);
957 		return wanted;
958 	}
959 	WARN_ON(1);
960 	return 0;
961 }
962 EXPORT_SYMBOL(copy_page_from_iter);
963 
964 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
965 {
966 	struct pipe_inode_info *pipe = i->pipe;
967 	unsigned int p_mask = pipe->ring_size - 1;
968 	unsigned int i_head;
969 	size_t n, off;
970 
971 	if (!sanity(i))
972 		return 0;
973 
974 	bytes = n = push_pipe(i, bytes, &i_head, &off);
975 	if (unlikely(!n))
976 		return 0;
977 
978 	do {
979 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
980 		memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
981 		i->head = i_head;
982 		i->iov_offset = off + chunk;
983 		n -= chunk;
984 		off = 0;
985 		i_head++;
986 	} while (n);
987 	i->count -= bytes;
988 	return bytes;
989 }
990 
991 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
992 {
993 	if (unlikely(iov_iter_is_pipe(i)))
994 		return pipe_zero(bytes, i);
995 	iterate_and_advance(i, bytes, v,
996 		clear_user(v.iov_base, v.iov_len),
997 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
998 		memset(v.iov_base, 0, v.iov_len),
999 		memzero_page(v.bv_page, v.bv_offset, v.bv_len)
1000 	)
1001 
1002 	return bytes;
1003 }
1004 EXPORT_SYMBOL(iov_iter_zero);
1005 
1006 size_t iov_iter_copy_from_user_atomic(struct page *page,
1007 		struct iov_iter *i, unsigned long offset, size_t bytes)
1008 {
1009 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
1010 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
1011 		kunmap_atomic(kaddr);
1012 		return 0;
1013 	}
1014 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1015 		kunmap_atomic(kaddr);
1016 		WARN_ON(1);
1017 		return 0;
1018 	}
1019 	iterate_all_kinds(i, bytes, v,
1020 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1021 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1022 				 v.bv_offset, v.bv_len),
1023 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1024 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1025 				 v.bv_offset, v.bv_len)
1026 	)
1027 	kunmap_atomic(kaddr);
1028 	return bytes;
1029 }
1030 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1031 
1032 static inline void pipe_truncate(struct iov_iter *i)
1033 {
1034 	struct pipe_inode_info *pipe = i->pipe;
1035 	unsigned int p_tail = pipe->tail;
1036 	unsigned int p_head = pipe->head;
1037 	unsigned int p_mask = pipe->ring_size - 1;
1038 
1039 	if (!pipe_empty(p_head, p_tail)) {
1040 		struct pipe_buffer *buf;
1041 		unsigned int i_head = i->head;
1042 		size_t off = i->iov_offset;
1043 
1044 		if (off) {
1045 			buf = &pipe->bufs[i_head & p_mask];
1046 			buf->len = off - buf->offset;
1047 			i_head++;
1048 		}
1049 		while (p_head != i_head) {
1050 			p_head--;
1051 			pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1052 		}
1053 
1054 		pipe->head = p_head;
1055 	}
1056 }
1057 
1058 static void pipe_advance(struct iov_iter *i, size_t size)
1059 {
1060 	struct pipe_inode_info *pipe = i->pipe;
1061 	if (size) {
1062 		struct pipe_buffer *buf;
1063 		unsigned int p_mask = pipe->ring_size - 1;
1064 		unsigned int i_head = i->head;
1065 		size_t off = i->iov_offset, left = size;
1066 
1067 		if (off) /* make it relative to the beginning of buffer */
1068 			left += off - pipe->bufs[i_head & p_mask].offset;
1069 		while (1) {
1070 			buf = &pipe->bufs[i_head & p_mask];
1071 			if (left <= buf->len)
1072 				break;
1073 			left -= buf->len;
1074 			i_head++;
1075 		}
1076 		i->head = i_head;
1077 		i->iov_offset = buf->offset + left;
1078 	}
1079 	i->count -= size;
1080 	/* ... and discard everything past that point */
1081 	pipe_truncate(i);
1082 }
1083 
1084 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1085 {
1086 	struct bvec_iter bi;
1087 
1088 	bi.bi_size = i->count;
1089 	bi.bi_bvec_done = i->iov_offset;
1090 	bi.bi_idx = 0;
1091 	bvec_iter_advance(i->bvec, &bi, size);
1092 
1093 	i->bvec += bi.bi_idx;
1094 	i->nr_segs -= bi.bi_idx;
1095 	i->count = bi.bi_size;
1096 	i->iov_offset = bi.bi_bvec_done;
1097 }
1098 
1099 void iov_iter_advance(struct iov_iter *i, size_t size)
1100 {
1101 	if (unlikely(i->count < size))
1102 		size = i->count;
1103 	if (unlikely(iov_iter_is_pipe(i))) {
1104 		pipe_advance(i, size);
1105 		return;
1106 	}
1107 	if (unlikely(iov_iter_is_discard(i))) {
1108 		i->count -= size;
1109 		return;
1110 	}
1111 	if (unlikely(iov_iter_is_xarray(i))) {
1112 		i->iov_offset += size;
1113 		i->count -= size;
1114 		return;
1115 	}
1116 	if (iov_iter_is_bvec(i)) {
1117 		iov_iter_bvec_advance(i, size);
1118 		return;
1119 	}
1120 	iterate_and_advance(i, size, v, 0, 0, 0, 0)
1121 }
1122 EXPORT_SYMBOL(iov_iter_advance);
1123 
1124 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1125 {
1126 	if (!unroll)
1127 		return;
1128 	if (WARN_ON(unroll > MAX_RW_COUNT))
1129 		return;
1130 	i->count += unroll;
1131 	if (unlikely(iov_iter_is_pipe(i))) {
1132 		struct pipe_inode_info *pipe = i->pipe;
1133 		unsigned int p_mask = pipe->ring_size - 1;
1134 		unsigned int i_head = i->head;
1135 		size_t off = i->iov_offset;
1136 		while (1) {
1137 			struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1138 			size_t n = off - b->offset;
1139 			if (unroll < n) {
1140 				off -= unroll;
1141 				break;
1142 			}
1143 			unroll -= n;
1144 			if (!unroll && i_head == i->start_head) {
1145 				off = 0;
1146 				break;
1147 			}
1148 			i_head--;
1149 			b = &pipe->bufs[i_head & p_mask];
1150 			off = b->offset + b->len;
1151 		}
1152 		i->iov_offset = off;
1153 		i->head = i_head;
1154 		pipe_truncate(i);
1155 		return;
1156 	}
1157 	if (unlikely(iov_iter_is_discard(i)))
1158 		return;
1159 	if (unroll <= i->iov_offset) {
1160 		i->iov_offset -= unroll;
1161 		return;
1162 	}
1163 	unroll -= i->iov_offset;
1164 	if (iov_iter_is_xarray(i)) {
1165 		BUG(); /* We should never go beyond the start of the specified
1166 			* range since we might then be straying into pages that
1167 			* aren't pinned.
1168 			*/
1169 	} else if (iov_iter_is_bvec(i)) {
1170 		const struct bio_vec *bvec = i->bvec;
1171 		while (1) {
1172 			size_t n = (--bvec)->bv_len;
1173 			i->nr_segs++;
1174 			if (unroll <= n) {
1175 				i->bvec = bvec;
1176 				i->iov_offset = n - unroll;
1177 				return;
1178 			}
1179 			unroll -= n;
1180 		}
1181 	} else { /* same logics for iovec and kvec */
1182 		const struct iovec *iov = i->iov;
1183 		while (1) {
1184 			size_t n = (--iov)->iov_len;
1185 			i->nr_segs++;
1186 			if (unroll <= n) {
1187 				i->iov = iov;
1188 				i->iov_offset = n - unroll;
1189 				return;
1190 			}
1191 			unroll -= n;
1192 		}
1193 	}
1194 }
1195 EXPORT_SYMBOL(iov_iter_revert);
1196 
1197 /*
1198  * Return the count of just the current iov_iter segment.
1199  */
1200 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1201 {
1202 	if (i->nr_segs > 1) {
1203 		if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1204 			return min(i->count, i->iov->iov_len - i->iov_offset);
1205 		if (iov_iter_is_bvec(i))
1206 			return min(i->count, i->bvec->bv_len - i->iov_offset);
1207 	}
1208 	return i->count;
1209 }
1210 EXPORT_SYMBOL(iov_iter_single_seg_count);
1211 
1212 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1213 			const struct kvec *kvec, unsigned long nr_segs,
1214 			size_t count)
1215 {
1216 	WARN_ON(direction & ~(READ | WRITE));
1217 	*i = (struct iov_iter){
1218 		.iter_type = ITER_KVEC,
1219 		.data_source = direction,
1220 		.kvec = kvec,
1221 		.nr_segs = nr_segs,
1222 		.iov_offset = 0,
1223 		.count = count
1224 	};
1225 }
1226 EXPORT_SYMBOL(iov_iter_kvec);
1227 
1228 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1229 			const struct bio_vec *bvec, unsigned long nr_segs,
1230 			size_t count)
1231 {
1232 	WARN_ON(direction & ~(READ | WRITE));
1233 	*i = (struct iov_iter){
1234 		.iter_type = ITER_BVEC,
1235 		.data_source = direction,
1236 		.bvec = bvec,
1237 		.nr_segs = nr_segs,
1238 		.iov_offset = 0,
1239 		.count = count
1240 	};
1241 }
1242 EXPORT_SYMBOL(iov_iter_bvec);
1243 
1244 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1245 			struct pipe_inode_info *pipe,
1246 			size_t count)
1247 {
1248 	BUG_ON(direction != READ);
1249 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1250 	*i = (struct iov_iter){
1251 		.iter_type = ITER_PIPE,
1252 		.data_source = false,
1253 		.pipe = pipe,
1254 		.head = pipe->head,
1255 		.start_head = pipe->head,
1256 		.iov_offset = 0,
1257 		.count = count
1258 	};
1259 }
1260 EXPORT_SYMBOL(iov_iter_pipe);
1261 
1262 /**
1263  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1264  * @i: The iterator to initialise.
1265  * @direction: The direction of the transfer.
1266  * @xarray: The xarray to access.
1267  * @start: The start file position.
1268  * @count: The size of the I/O buffer in bytes.
1269  *
1270  * Set up an I/O iterator to either draw data out of the pages attached to an
1271  * inode or to inject data into those pages.  The pages *must* be prevented
1272  * from evaporation, either by taking a ref on them or locking them by the
1273  * caller.
1274  */
1275 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1276 		     struct xarray *xarray, loff_t start, size_t count)
1277 {
1278 	BUG_ON(direction & ~1);
1279 	*i = (struct iov_iter) {
1280 		.iter_type = ITER_XARRAY,
1281 		.data_source = direction,
1282 		.xarray = xarray,
1283 		.xarray_start = start,
1284 		.count = count,
1285 		.iov_offset = 0
1286 	};
1287 }
1288 EXPORT_SYMBOL(iov_iter_xarray);
1289 
1290 /**
1291  * iov_iter_discard - Initialise an I/O iterator that discards data
1292  * @i: The iterator to initialise.
1293  * @direction: The direction of the transfer.
1294  * @count: The size of the I/O buffer in bytes.
1295  *
1296  * Set up an I/O iterator that just discards everything that's written to it.
1297  * It's only available as a READ iterator.
1298  */
1299 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1300 {
1301 	BUG_ON(direction != READ);
1302 	*i = (struct iov_iter){
1303 		.iter_type = ITER_DISCARD,
1304 		.data_source = false,
1305 		.count = count,
1306 		.iov_offset = 0
1307 	};
1308 }
1309 EXPORT_SYMBOL(iov_iter_discard);
1310 
1311 unsigned long iov_iter_alignment(const struct iov_iter *i)
1312 {
1313 	unsigned long res = 0;
1314 	size_t size = i->count;
1315 
1316 	if (unlikely(iov_iter_is_pipe(i))) {
1317 		unsigned int p_mask = i->pipe->ring_size - 1;
1318 
1319 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1320 			return size | i->iov_offset;
1321 		return size;
1322 	}
1323 	if (unlikely(iov_iter_is_xarray(i)))
1324 		return (i->xarray_start + i->iov_offset) | i->count;
1325 	iterate_all_kinds(i, size, v,
1326 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
1327 		res |= v.bv_offset | v.bv_len,
1328 		res |= (unsigned long)v.iov_base | v.iov_len,
1329 		res |= v.bv_offset | v.bv_len
1330 	)
1331 	return res;
1332 }
1333 EXPORT_SYMBOL(iov_iter_alignment);
1334 
1335 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1336 {
1337 	unsigned long res = 0;
1338 	size_t size = i->count;
1339 
1340 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1341 		WARN_ON(1);
1342 		return ~0U;
1343 	}
1344 
1345 	iterate_all_kinds(i, size, v,
1346 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1347 			(size != v.iov_len ? size : 0), 0),
1348 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1349 			(size != v.bv_len ? size : 0)),
1350 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1351 			(size != v.iov_len ? size : 0)),
1352 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1353 			(size != v.bv_len ? size : 0))
1354 		);
1355 	return res;
1356 }
1357 EXPORT_SYMBOL(iov_iter_gap_alignment);
1358 
1359 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1360 				size_t maxsize,
1361 				struct page **pages,
1362 				int iter_head,
1363 				size_t *start)
1364 {
1365 	struct pipe_inode_info *pipe = i->pipe;
1366 	unsigned int p_mask = pipe->ring_size - 1;
1367 	ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1368 	if (!n)
1369 		return -EFAULT;
1370 
1371 	maxsize = n;
1372 	n += *start;
1373 	while (n > 0) {
1374 		get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1375 		iter_head++;
1376 		n -= PAGE_SIZE;
1377 	}
1378 
1379 	return maxsize;
1380 }
1381 
1382 static ssize_t pipe_get_pages(struct iov_iter *i,
1383 		   struct page **pages, size_t maxsize, unsigned maxpages,
1384 		   size_t *start)
1385 {
1386 	unsigned int iter_head, npages;
1387 	size_t capacity;
1388 
1389 	if (!maxsize)
1390 		return 0;
1391 
1392 	if (!sanity(i))
1393 		return -EFAULT;
1394 
1395 	data_start(i, &iter_head, start);
1396 	/* Amount of free space: some of this one + all after this one */
1397 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1398 	capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1399 
1400 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1401 }
1402 
1403 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1404 					  pgoff_t index, unsigned int nr_pages)
1405 {
1406 	XA_STATE(xas, xa, index);
1407 	struct page *page;
1408 	unsigned int ret = 0;
1409 
1410 	rcu_read_lock();
1411 	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1412 		if (xas_retry(&xas, page))
1413 			continue;
1414 
1415 		/* Has the page moved or been split? */
1416 		if (unlikely(page != xas_reload(&xas))) {
1417 			xas_reset(&xas);
1418 			continue;
1419 		}
1420 
1421 		pages[ret] = find_subpage(page, xas.xa_index);
1422 		get_page(pages[ret]);
1423 		if (++ret == nr_pages)
1424 			break;
1425 	}
1426 	rcu_read_unlock();
1427 	return ret;
1428 }
1429 
1430 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1431 				     struct page **pages, size_t maxsize,
1432 				     unsigned maxpages, size_t *_start_offset)
1433 {
1434 	unsigned nr, offset;
1435 	pgoff_t index, count;
1436 	size_t size = maxsize, actual;
1437 	loff_t pos;
1438 
1439 	if (!size || !maxpages)
1440 		return 0;
1441 
1442 	pos = i->xarray_start + i->iov_offset;
1443 	index = pos >> PAGE_SHIFT;
1444 	offset = pos & ~PAGE_MASK;
1445 	*_start_offset = offset;
1446 
1447 	count = 1;
1448 	if (size > PAGE_SIZE - offset) {
1449 		size -= PAGE_SIZE - offset;
1450 		count += size >> PAGE_SHIFT;
1451 		size &= ~PAGE_MASK;
1452 		if (size)
1453 			count++;
1454 	}
1455 
1456 	if (count > maxpages)
1457 		count = maxpages;
1458 
1459 	nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1460 	if (nr == 0)
1461 		return 0;
1462 
1463 	actual = PAGE_SIZE * nr;
1464 	actual -= offset;
1465 	if (nr == count && size > 0) {
1466 		unsigned last_offset = (nr > 1) ? 0 : offset;
1467 		actual -= PAGE_SIZE - (last_offset + size);
1468 	}
1469 	return actual;
1470 }
1471 
1472 ssize_t iov_iter_get_pages(struct iov_iter *i,
1473 		   struct page **pages, size_t maxsize, unsigned maxpages,
1474 		   size_t *start)
1475 {
1476 	if (maxsize > i->count)
1477 		maxsize = i->count;
1478 
1479 	if (unlikely(iov_iter_is_pipe(i)))
1480 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1481 	if (unlikely(iov_iter_is_xarray(i)))
1482 		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1483 	if (unlikely(iov_iter_is_discard(i)))
1484 		return -EFAULT;
1485 
1486 	iterate_all_kinds(i, maxsize, v, ({
1487 		unsigned long addr = (unsigned long)v.iov_base;
1488 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1489 		int n;
1490 		int res;
1491 
1492 		if (len > maxpages * PAGE_SIZE)
1493 			len = maxpages * PAGE_SIZE;
1494 		addr &= ~(PAGE_SIZE - 1);
1495 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1496 		res = get_user_pages_fast(addr, n,
1497 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1498 				pages);
1499 		if (unlikely(res < 0))
1500 			return res;
1501 		return (res == n ? len : res * PAGE_SIZE) - *start;
1502 	0;}),({
1503 		/* can't be more than PAGE_SIZE */
1504 		*start = v.bv_offset;
1505 		get_page(*pages = v.bv_page);
1506 		return v.bv_len;
1507 	}),({
1508 		return -EFAULT;
1509 	}),
1510 	0
1511 	)
1512 	return 0;
1513 }
1514 EXPORT_SYMBOL(iov_iter_get_pages);
1515 
1516 static struct page **get_pages_array(size_t n)
1517 {
1518 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1519 }
1520 
1521 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1522 		   struct page ***pages, size_t maxsize,
1523 		   size_t *start)
1524 {
1525 	struct page **p;
1526 	unsigned int iter_head, npages;
1527 	ssize_t n;
1528 
1529 	if (!maxsize)
1530 		return 0;
1531 
1532 	if (!sanity(i))
1533 		return -EFAULT;
1534 
1535 	data_start(i, &iter_head, start);
1536 	/* Amount of free space: some of this one + all after this one */
1537 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1538 	n = npages * PAGE_SIZE - *start;
1539 	if (maxsize > n)
1540 		maxsize = n;
1541 	else
1542 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1543 	p = get_pages_array(npages);
1544 	if (!p)
1545 		return -ENOMEM;
1546 	n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1547 	if (n > 0)
1548 		*pages = p;
1549 	else
1550 		kvfree(p);
1551 	return n;
1552 }
1553 
1554 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1555 					   struct page ***pages, size_t maxsize,
1556 					   size_t *_start_offset)
1557 {
1558 	struct page **p;
1559 	unsigned nr, offset;
1560 	pgoff_t index, count;
1561 	size_t size = maxsize, actual;
1562 	loff_t pos;
1563 
1564 	if (!size)
1565 		return 0;
1566 
1567 	pos = i->xarray_start + i->iov_offset;
1568 	index = pos >> PAGE_SHIFT;
1569 	offset = pos & ~PAGE_MASK;
1570 	*_start_offset = offset;
1571 
1572 	count = 1;
1573 	if (size > PAGE_SIZE - offset) {
1574 		size -= PAGE_SIZE - offset;
1575 		count += size >> PAGE_SHIFT;
1576 		size &= ~PAGE_MASK;
1577 		if (size)
1578 			count++;
1579 	}
1580 
1581 	p = get_pages_array(count);
1582 	if (!p)
1583 		return -ENOMEM;
1584 	*pages = p;
1585 
1586 	nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1587 	if (nr == 0)
1588 		return 0;
1589 
1590 	actual = PAGE_SIZE * nr;
1591 	actual -= offset;
1592 	if (nr == count && size > 0) {
1593 		unsigned last_offset = (nr > 1) ? 0 : offset;
1594 		actual -= PAGE_SIZE - (last_offset + size);
1595 	}
1596 	return actual;
1597 }
1598 
1599 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1600 		   struct page ***pages, size_t maxsize,
1601 		   size_t *start)
1602 {
1603 	struct page **p;
1604 
1605 	if (maxsize > i->count)
1606 		maxsize = i->count;
1607 
1608 	if (unlikely(iov_iter_is_pipe(i)))
1609 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1610 	if (unlikely(iov_iter_is_xarray(i)))
1611 		return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1612 	if (unlikely(iov_iter_is_discard(i)))
1613 		return -EFAULT;
1614 
1615 	iterate_all_kinds(i, maxsize, v, ({
1616 		unsigned long addr = (unsigned long)v.iov_base;
1617 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1618 		int n;
1619 		int res;
1620 
1621 		addr &= ~(PAGE_SIZE - 1);
1622 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1623 		p = get_pages_array(n);
1624 		if (!p)
1625 			return -ENOMEM;
1626 		res = get_user_pages_fast(addr, n,
1627 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1628 		if (unlikely(res < 0)) {
1629 			kvfree(p);
1630 			return res;
1631 		}
1632 		*pages = p;
1633 		return (res == n ? len : res * PAGE_SIZE) - *start;
1634 	0;}),({
1635 		/* can't be more than PAGE_SIZE */
1636 		*start = v.bv_offset;
1637 		*pages = p = get_pages_array(1);
1638 		if (!p)
1639 			return -ENOMEM;
1640 		get_page(*p = v.bv_page);
1641 		return v.bv_len;
1642 	}),({
1643 		return -EFAULT;
1644 	}), 0
1645 	)
1646 	return 0;
1647 }
1648 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1649 
1650 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1651 			       struct iov_iter *i)
1652 {
1653 	char *to = addr;
1654 	__wsum sum, next;
1655 	size_t off = 0;
1656 	sum = *csum;
1657 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1658 		WARN_ON(1);
1659 		return 0;
1660 	}
1661 	iterate_and_advance(i, bytes, v, ({
1662 		next = csum_and_copy_from_user(v.iov_base,
1663 					       (to += v.iov_len) - v.iov_len,
1664 					       v.iov_len);
1665 		if (next) {
1666 			sum = csum_block_add(sum, next, off);
1667 			off += v.iov_len;
1668 		}
1669 		next ? 0 : v.iov_len;
1670 	}), ({
1671 		char *p = kmap_atomic(v.bv_page);
1672 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1673 				      p + v.bv_offset, v.bv_len,
1674 				      sum, off);
1675 		kunmap_atomic(p);
1676 		off += v.bv_len;
1677 	}),({
1678 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1679 				      v.iov_base, v.iov_len,
1680 				      sum, off);
1681 		off += v.iov_len;
1682 	}), ({
1683 		char *p = kmap_atomic(v.bv_page);
1684 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1685 				      p + v.bv_offset, v.bv_len,
1686 				      sum, off);
1687 		kunmap_atomic(p);
1688 		off += v.bv_len;
1689 	})
1690 	)
1691 	*csum = sum;
1692 	return bytes;
1693 }
1694 EXPORT_SYMBOL(csum_and_copy_from_iter);
1695 
1696 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1697 			     struct iov_iter *i)
1698 {
1699 	struct csum_state *csstate = _csstate;
1700 	const char *from = addr;
1701 	__wsum sum, next;
1702 	size_t off;
1703 
1704 	if (unlikely(iov_iter_is_pipe(i)))
1705 		return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1706 
1707 	sum = csstate->csum;
1708 	off = csstate->off;
1709 	if (unlikely(iov_iter_is_discard(i))) {
1710 		WARN_ON(1);	/* for now */
1711 		return 0;
1712 	}
1713 	iterate_and_advance(i, bytes, v, ({
1714 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1715 					     v.iov_base,
1716 					     v.iov_len);
1717 		if (next) {
1718 			sum = csum_block_add(sum, next, off);
1719 			off += v.iov_len;
1720 		}
1721 		next ? 0 : v.iov_len;
1722 	}), ({
1723 		char *p = kmap_atomic(v.bv_page);
1724 		sum = csum_and_memcpy(p + v.bv_offset,
1725 				      (from += v.bv_len) - v.bv_len,
1726 				      v.bv_len, sum, off);
1727 		kunmap_atomic(p);
1728 		off += v.bv_len;
1729 	}),({
1730 		sum = csum_and_memcpy(v.iov_base,
1731 				     (from += v.iov_len) - v.iov_len,
1732 				     v.iov_len, sum, off);
1733 		off += v.iov_len;
1734 	}), ({
1735 		char *p = kmap_atomic(v.bv_page);
1736 		sum = csum_and_memcpy(p + v.bv_offset,
1737 				      (from += v.bv_len) - v.bv_len,
1738 				      v.bv_len, sum, off);
1739 		kunmap_atomic(p);
1740 		off += v.bv_len;
1741 	})
1742 	)
1743 	csstate->csum = sum;
1744 	csstate->off = off;
1745 	return bytes;
1746 }
1747 EXPORT_SYMBOL(csum_and_copy_to_iter);
1748 
1749 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1750 		struct iov_iter *i)
1751 {
1752 #ifdef CONFIG_CRYPTO_HASH
1753 	struct ahash_request *hash = hashp;
1754 	struct scatterlist sg;
1755 	size_t copied;
1756 
1757 	copied = copy_to_iter(addr, bytes, i);
1758 	sg_init_one(&sg, addr, copied);
1759 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1760 	crypto_ahash_update(hash);
1761 	return copied;
1762 #else
1763 	return 0;
1764 #endif
1765 }
1766 EXPORT_SYMBOL(hash_and_copy_to_iter);
1767 
1768 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1769 {
1770 	size_t size = i->count;
1771 	int npages = 0;
1772 
1773 	if (!size)
1774 		return 0;
1775 	if (unlikely(iov_iter_is_discard(i)))
1776 		return 0;
1777 
1778 	if (unlikely(iov_iter_is_pipe(i))) {
1779 		struct pipe_inode_info *pipe = i->pipe;
1780 		unsigned int iter_head;
1781 		size_t off;
1782 
1783 		if (!sanity(i))
1784 			return 0;
1785 
1786 		data_start(i, &iter_head, &off);
1787 		/* some of this one + all after this one */
1788 		npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1789 		if (npages >= maxpages)
1790 			return maxpages;
1791 	} else if (unlikely(iov_iter_is_xarray(i))) {
1792 		unsigned offset;
1793 
1794 		offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1795 
1796 		npages = 1;
1797 		if (size > PAGE_SIZE - offset) {
1798 			size -= PAGE_SIZE - offset;
1799 			npages += size >> PAGE_SHIFT;
1800 			size &= ~PAGE_MASK;
1801 			if (size)
1802 				npages++;
1803 		}
1804 		if (npages >= maxpages)
1805 			return maxpages;
1806 	} else iterate_all_kinds(i, size, v, ({
1807 		unsigned long p = (unsigned long)v.iov_base;
1808 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1809 			- p / PAGE_SIZE;
1810 		if (npages >= maxpages)
1811 			return maxpages;
1812 	0;}),({
1813 		npages++;
1814 		if (npages >= maxpages)
1815 			return maxpages;
1816 	}),({
1817 		unsigned long p = (unsigned long)v.iov_base;
1818 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1819 			- p / PAGE_SIZE;
1820 		if (npages >= maxpages)
1821 			return maxpages;
1822 	}),
1823 	0
1824 	)
1825 	return npages;
1826 }
1827 EXPORT_SYMBOL(iov_iter_npages);
1828 
1829 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1830 {
1831 	*new = *old;
1832 	if (unlikely(iov_iter_is_pipe(new))) {
1833 		WARN_ON(1);
1834 		return NULL;
1835 	}
1836 	if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1837 		return NULL;
1838 	if (iov_iter_is_bvec(new))
1839 		return new->bvec = kmemdup(new->bvec,
1840 				    new->nr_segs * sizeof(struct bio_vec),
1841 				    flags);
1842 	else
1843 		/* iovec and kvec have identical layout */
1844 		return new->iov = kmemdup(new->iov,
1845 				   new->nr_segs * sizeof(struct iovec),
1846 				   flags);
1847 }
1848 EXPORT_SYMBOL(dup_iter);
1849 
1850 static int copy_compat_iovec_from_user(struct iovec *iov,
1851 		const struct iovec __user *uvec, unsigned long nr_segs)
1852 {
1853 	const struct compat_iovec __user *uiov =
1854 		(const struct compat_iovec __user *)uvec;
1855 	int ret = -EFAULT, i;
1856 
1857 	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1858 		return -EFAULT;
1859 
1860 	for (i = 0; i < nr_segs; i++) {
1861 		compat_uptr_t buf;
1862 		compat_ssize_t len;
1863 
1864 		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1865 		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1866 
1867 		/* check for compat_size_t not fitting in compat_ssize_t .. */
1868 		if (len < 0) {
1869 			ret = -EINVAL;
1870 			goto uaccess_end;
1871 		}
1872 		iov[i].iov_base = compat_ptr(buf);
1873 		iov[i].iov_len = len;
1874 	}
1875 
1876 	ret = 0;
1877 uaccess_end:
1878 	user_access_end();
1879 	return ret;
1880 }
1881 
1882 static int copy_iovec_from_user(struct iovec *iov,
1883 		const struct iovec __user *uvec, unsigned long nr_segs)
1884 {
1885 	unsigned long seg;
1886 
1887 	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1888 		return -EFAULT;
1889 	for (seg = 0; seg < nr_segs; seg++) {
1890 		if ((ssize_t)iov[seg].iov_len < 0)
1891 			return -EINVAL;
1892 	}
1893 
1894 	return 0;
1895 }
1896 
1897 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1898 		unsigned long nr_segs, unsigned long fast_segs,
1899 		struct iovec *fast_iov, bool compat)
1900 {
1901 	struct iovec *iov = fast_iov;
1902 	int ret;
1903 
1904 	/*
1905 	 * SuS says "The readv() function *may* fail if the iovcnt argument was
1906 	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1907 	 * traditionally returned zero for zero segments, so...
1908 	 */
1909 	if (nr_segs == 0)
1910 		return iov;
1911 	if (nr_segs > UIO_MAXIOV)
1912 		return ERR_PTR(-EINVAL);
1913 	if (nr_segs > fast_segs) {
1914 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1915 		if (!iov)
1916 			return ERR_PTR(-ENOMEM);
1917 	}
1918 
1919 	if (compat)
1920 		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1921 	else
1922 		ret = copy_iovec_from_user(iov, uvec, nr_segs);
1923 	if (ret) {
1924 		if (iov != fast_iov)
1925 			kfree(iov);
1926 		return ERR_PTR(ret);
1927 	}
1928 
1929 	return iov;
1930 }
1931 
1932 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1933 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1934 		 struct iov_iter *i, bool compat)
1935 {
1936 	ssize_t total_len = 0;
1937 	unsigned long seg;
1938 	struct iovec *iov;
1939 
1940 	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1941 	if (IS_ERR(iov)) {
1942 		*iovp = NULL;
1943 		return PTR_ERR(iov);
1944 	}
1945 
1946 	/*
1947 	 * According to the Single Unix Specification we should return EINVAL if
1948 	 * an element length is < 0 when cast to ssize_t or if the total length
1949 	 * would overflow the ssize_t return value of the system call.
1950 	 *
1951 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1952 	 * overflow case.
1953 	 */
1954 	for (seg = 0; seg < nr_segs; seg++) {
1955 		ssize_t len = (ssize_t)iov[seg].iov_len;
1956 
1957 		if (!access_ok(iov[seg].iov_base, len)) {
1958 			if (iov != *iovp)
1959 				kfree(iov);
1960 			*iovp = NULL;
1961 			return -EFAULT;
1962 		}
1963 
1964 		if (len > MAX_RW_COUNT - total_len) {
1965 			len = MAX_RW_COUNT - total_len;
1966 			iov[seg].iov_len = len;
1967 		}
1968 		total_len += len;
1969 	}
1970 
1971 	iov_iter_init(i, type, iov, nr_segs, total_len);
1972 	if (iov == *iovp)
1973 		*iovp = NULL;
1974 	else
1975 		*iovp = iov;
1976 	return total_len;
1977 }
1978 
1979 /**
1980  * import_iovec() - Copy an array of &struct iovec from userspace
1981  *     into the kernel, check that it is valid, and initialize a new
1982  *     &struct iov_iter iterator to access it.
1983  *
1984  * @type: One of %READ or %WRITE.
1985  * @uvec: Pointer to the userspace array.
1986  * @nr_segs: Number of elements in userspace array.
1987  * @fast_segs: Number of elements in @iov.
1988  * @iovp: (input and output parameter) Pointer to pointer to (usually small
1989  *     on-stack) kernel array.
1990  * @i: Pointer to iterator that will be initialized on success.
1991  *
1992  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1993  * then this function places %NULL in *@iov on return. Otherwise, a new
1994  * array will be allocated and the result placed in *@iov. This means that
1995  * the caller may call kfree() on *@iov regardless of whether the small
1996  * on-stack array was used or not (and regardless of whether this function
1997  * returns an error or not).
1998  *
1999  * Return: Negative error code on error, bytes imported on success
2000  */
2001 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2002 		 unsigned nr_segs, unsigned fast_segs,
2003 		 struct iovec **iovp, struct iov_iter *i)
2004 {
2005 	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2006 			      in_compat_syscall());
2007 }
2008 EXPORT_SYMBOL(import_iovec);
2009 
2010 int import_single_range(int rw, void __user *buf, size_t len,
2011 		 struct iovec *iov, struct iov_iter *i)
2012 {
2013 	if (len > MAX_RW_COUNT)
2014 		len = MAX_RW_COUNT;
2015 	if (unlikely(!access_ok(buf, len)))
2016 		return -EFAULT;
2017 
2018 	iov->iov_base = buf;
2019 	iov->iov_len = len;
2020 	iov_iter_init(i, rw, iov, 1, len);
2021 	return 0;
2022 }
2023 EXPORT_SYMBOL(import_single_range);
2024