1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 27999096fSHerbert Xu #include <crypto/hash.h> 3d879cb83SAl Viro #include <linux/export.h> 42f8b5444SChristoph Hellwig #include <linux/bvec.h> 54d0e9df5SAlbert van der Linde #include <linux/fault-inject-usercopy.h> 6d879cb83SAl Viro #include <linux/uio.h> 7d879cb83SAl Viro #include <linux/pagemap.h> 828961998SIra Weiny #include <linux/highmem.h> 9d879cb83SAl Viro #include <linux/slab.h> 10d879cb83SAl Viro #include <linux/vmalloc.h> 11241699cdSAl Viro #include <linux/splice.h> 12bfdc5970SChristoph Hellwig #include <linux/compat.h> 13d879cb83SAl Viro #include <net/checksum.h> 14d05f4435SSagi Grimberg #include <linux/scatterlist.h> 15d0ef4c36SMarco Elver #include <linux/instrumented.h> 16d879cb83SAl Viro 17241699cdSAl Viro #define PIPE_PARANOIA /* for now */ 18241699cdSAl Viro 195c67aa90SAl Viro /* covers iovec and kvec alike */ 20a6e4ec7bSAl Viro #define iterate_iovec(i, n, base, len, off, __p, STEP) { \ 217baa5099SAl Viro size_t off = 0; \ 22a6e4ec7bSAl Viro size_t skip = i->iov_offset; \ 237a1bcb5dSAl Viro do { \ 247baa5099SAl Viro len = min(n, __p->iov_len - skip); \ 257baa5099SAl Viro if (likely(len)) { \ 267baa5099SAl Viro base = __p->iov_base + skip; \ 277baa5099SAl Viro len -= (STEP); \ 287baa5099SAl Viro off += len; \ 297baa5099SAl Viro skip += len; \ 307baa5099SAl Viro n -= len; \ 317a1bcb5dSAl Viro if (skip < __p->iov_len) \ 327a1bcb5dSAl Viro break; \ 33d879cb83SAl Viro } \ 34d879cb83SAl Viro __p++; \ 357a1bcb5dSAl Viro skip = 0; \ 367a1bcb5dSAl Viro } while (n); \ 37a6e4ec7bSAl Viro i->iov_offset = skip; \ 387baa5099SAl Viro n = off; \ 39d879cb83SAl Viro } 40d879cb83SAl Viro 41a6e4ec7bSAl Viro #define iterate_bvec(i, n, base, len, off, p, STEP) { \ 427baa5099SAl Viro size_t off = 0; \ 43a6e4ec7bSAl Viro unsigned skip = i->iov_offset; \ 447491a2bfSAl Viro while (n) { \ 457491a2bfSAl Viro unsigned offset = p->bv_offset + skip; \ 461b4fb5ffSAl Viro unsigned left; \ 4721b56c84SAl Viro void *kaddr = kmap_local_page(p->bv_page + \ 4821b56c84SAl Viro offset / PAGE_SIZE); \ 497baa5099SAl Viro base = kaddr + offset % PAGE_SIZE; \ 50a6e4ec7bSAl Viro len = min(min(n, (size_t)(p->bv_len - skip)), \ 517491a2bfSAl Viro (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ 521b4fb5ffSAl Viro left = (STEP); \ 5321b56c84SAl Viro kunmap_local(kaddr); \ 547baa5099SAl Viro len -= left; \ 557baa5099SAl Viro off += len; \ 567baa5099SAl Viro skip += len; \ 577491a2bfSAl Viro if (skip == p->bv_len) { \ 587491a2bfSAl Viro skip = 0; \ 597491a2bfSAl Viro p++; \ 60d879cb83SAl Viro } \ 617baa5099SAl Viro n -= len; \ 621b4fb5ffSAl Viro if (left) \ 631b4fb5ffSAl Viro break; \ 647491a2bfSAl Viro } \ 65a6e4ec7bSAl Viro i->iov_offset = skip; \ 667baa5099SAl Viro n = off; \ 67d879cb83SAl Viro } 68d879cb83SAl Viro 69a6e4ec7bSAl Viro #define iterate_xarray(i, n, base, len, __off, STEP) { \ 701b4fb5ffSAl Viro __label__ __out; \ 71622838f3SAl Viro size_t __off = 0; \ 72821979f5SMatthew Wilcox (Oracle) struct folio *folio; \ 73a6e4ec7bSAl Viro loff_t start = i->xarray_start + i->iov_offset; \ 744b179e9aSAl Viro pgoff_t index = start / PAGE_SIZE; \ 757ff50620SDavid Howells XA_STATE(xas, i->xarray, index); \ 767ff50620SDavid Howells \ 77821979f5SMatthew Wilcox (Oracle) len = PAGE_SIZE - offset_in_page(start); \ 787ff50620SDavid Howells rcu_read_lock(); \ 79821979f5SMatthew Wilcox (Oracle) xas_for_each(&xas, folio, ULONG_MAX) { \ 801b4fb5ffSAl Viro unsigned left; \ 81821979f5SMatthew Wilcox (Oracle) size_t offset; \ 82821979f5SMatthew Wilcox (Oracle) if (xas_retry(&xas, folio)) \ 837ff50620SDavid Howells continue; \ 84821979f5SMatthew Wilcox (Oracle) if (WARN_ON(xa_is_value(folio))) \ 857ff50620SDavid Howells break; \ 86821979f5SMatthew Wilcox (Oracle) if (WARN_ON(folio_test_hugetlb(folio))) \ 877ff50620SDavid Howells break; \ 88821979f5SMatthew Wilcox (Oracle) offset = offset_in_folio(folio, start + __off); \ 89821979f5SMatthew Wilcox (Oracle) while (offset < folio_size(folio)) { \ 90821979f5SMatthew Wilcox (Oracle) base = kmap_local_folio(folio, offset); \ 917baa5099SAl Viro len = min(n, len); \ 921b4fb5ffSAl Viro left = (STEP); \ 93821979f5SMatthew Wilcox (Oracle) kunmap_local(base); \ 947baa5099SAl Viro len -= left; \ 957baa5099SAl Viro __off += len; \ 967baa5099SAl Viro n -= len; \ 971b4fb5ffSAl Viro if (left || n == 0) \ 981b4fb5ffSAl Viro goto __out; \ 99821979f5SMatthew Wilcox (Oracle) offset += len; \ 100821979f5SMatthew Wilcox (Oracle) len = PAGE_SIZE; \ 1017ff50620SDavid Howells } \ 1027ff50620SDavid Howells } \ 1031b4fb5ffSAl Viro __out: \ 1047ff50620SDavid Howells rcu_read_unlock(); \ 105a6e4ec7bSAl Viro i->iov_offset += __off; \ 106622838f3SAl Viro n = __off; \ 1077ff50620SDavid Howells } 1087ff50620SDavid Howells 1097baa5099SAl Viro #define __iterate_and_advance(i, n, base, len, off, I, K) { \ 110dd254f5aSAl Viro if (unlikely(i->count < n)) \ 111dd254f5aSAl Viro n = i->count; \ 112f5da8354SAl Viro if (likely(n)) { \ 11328f38db7SAl Viro if (likely(iter_is_iovec(i))) { \ 1145c67aa90SAl Viro const struct iovec *iov = i->iov; \ 1157baa5099SAl Viro void __user *base; \ 1167baa5099SAl Viro size_t len; \ 1177baa5099SAl Viro iterate_iovec(i, n, base, len, off, \ 118a6e4ec7bSAl Viro iov, (I)) \ 119d879cb83SAl Viro i->nr_segs -= iov - i->iov; \ 120d879cb83SAl Viro i->iov = iov; \ 12128f38db7SAl Viro } else if (iov_iter_is_bvec(i)) { \ 12228f38db7SAl Viro const struct bio_vec *bvec = i->bvec; \ 1237baa5099SAl Viro void *base; \ 1247baa5099SAl Viro size_t len; \ 1257baa5099SAl Viro iterate_bvec(i, n, base, len, off, \ 126a6e4ec7bSAl Viro bvec, (K)) \ 1277491a2bfSAl Viro i->nr_segs -= bvec - i->bvec; \ 1287491a2bfSAl Viro i->bvec = bvec; \ 12928f38db7SAl Viro } else if (iov_iter_is_kvec(i)) { \ 1305c67aa90SAl Viro const struct kvec *kvec = i->kvec; \ 1317baa5099SAl Viro void *base; \ 1327baa5099SAl Viro size_t len; \ 1337baa5099SAl Viro iterate_iovec(i, n, base, len, off, \ 134a6e4ec7bSAl Viro kvec, (K)) \ 13528f38db7SAl Viro i->nr_segs -= kvec - i->kvec; \ 13628f38db7SAl Viro i->kvec = kvec; \ 13728f38db7SAl Viro } else if (iov_iter_is_xarray(i)) { \ 1387baa5099SAl Viro void *base; \ 1397baa5099SAl Viro size_t len; \ 1407baa5099SAl Viro iterate_xarray(i, n, base, len, off, \ 141a6e4ec7bSAl Viro (K)) \ 142d879cb83SAl Viro } \ 143d879cb83SAl Viro i->count -= n; \ 144dd254f5aSAl Viro } \ 145d879cb83SAl Viro } 1467baa5099SAl Viro #define iterate_and_advance(i, n, base, len, off, I, K) \ 1477baa5099SAl Viro __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0)) 148d879cb83SAl Viro 14909fc68dcSAl Viro static int copyout(void __user *to, const void *from, size_t n) 15009fc68dcSAl Viro { 1514d0e9df5SAlbert van der Linde if (should_fail_usercopy()) 1524d0e9df5SAlbert van der Linde return n; 15396d4f267SLinus Torvalds if (access_ok(to, n)) { 154d0ef4c36SMarco Elver instrument_copy_to_user(to, from, n); 15509fc68dcSAl Viro n = raw_copy_to_user(to, from, n); 15609fc68dcSAl Viro } 15709fc68dcSAl Viro return n; 15809fc68dcSAl Viro } 15909fc68dcSAl Viro 16009fc68dcSAl Viro static int copyin(void *to, const void __user *from, size_t n) 16109fc68dcSAl Viro { 1624d0e9df5SAlbert van der Linde if (should_fail_usercopy()) 1634d0e9df5SAlbert van der Linde return n; 16496d4f267SLinus Torvalds if (access_ok(from, n)) { 165d0ef4c36SMarco Elver instrument_copy_from_user(to, from, n); 16609fc68dcSAl Viro n = raw_copy_from_user(to, from, n); 16709fc68dcSAl Viro } 16809fc68dcSAl Viro return n; 16909fc68dcSAl Viro } 17009fc68dcSAl Viro 171241699cdSAl Viro #ifdef PIPE_PARANOIA 172241699cdSAl Viro static bool sanity(const struct iov_iter *i) 173241699cdSAl Viro { 174241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 1758cefc107SDavid Howells unsigned int p_head = pipe->head; 1768cefc107SDavid Howells unsigned int p_tail = pipe->tail; 1778cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 1788cefc107SDavid Howells unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 1798cefc107SDavid Howells unsigned int i_head = i->head; 1808cefc107SDavid Howells unsigned int idx; 1818cefc107SDavid Howells 182241699cdSAl Viro if (i->iov_offset) { 183241699cdSAl Viro struct pipe_buffer *p; 1848cefc107SDavid Howells if (unlikely(p_occupancy == 0)) 185241699cdSAl Viro goto Bad; // pipe must be non-empty 1868cefc107SDavid Howells if (unlikely(i_head != p_head - 1)) 187241699cdSAl Viro goto Bad; // must be at the last buffer... 188241699cdSAl Viro 1898cefc107SDavid Howells p = &pipe->bufs[i_head & p_mask]; 190241699cdSAl Viro if (unlikely(p->offset + p->len != i->iov_offset)) 191241699cdSAl Viro goto Bad; // ... at the end of segment 192241699cdSAl Viro } else { 1938cefc107SDavid Howells if (i_head != p_head) 194241699cdSAl Viro goto Bad; // must be right after the last buffer 195241699cdSAl Viro } 196241699cdSAl Viro return true; 197241699cdSAl Viro Bad: 1988cefc107SDavid Howells printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 1998cefc107SDavid Howells printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 2008cefc107SDavid Howells p_head, p_tail, pipe->ring_size); 2018cefc107SDavid Howells for (idx = 0; idx < pipe->ring_size; idx++) 202241699cdSAl Viro printk(KERN_ERR "[%p %p %d %d]\n", 203241699cdSAl Viro pipe->bufs[idx].ops, 204241699cdSAl Viro pipe->bufs[idx].page, 205241699cdSAl Viro pipe->bufs[idx].offset, 206241699cdSAl Viro pipe->bufs[idx].len); 207241699cdSAl Viro WARN_ON(1); 208241699cdSAl Viro return false; 209241699cdSAl Viro } 210241699cdSAl Viro #else 211241699cdSAl Viro #define sanity(i) true 212241699cdSAl Viro #endif 213241699cdSAl Viro 214241699cdSAl Viro static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 215241699cdSAl Viro struct iov_iter *i) 216241699cdSAl Viro { 217241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 218241699cdSAl Viro struct pipe_buffer *buf; 2198cefc107SDavid Howells unsigned int p_tail = pipe->tail; 2208cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 2218cefc107SDavid Howells unsigned int i_head = i->head; 222241699cdSAl Viro size_t off; 223241699cdSAl Viro 224241699cdSAl Viro if (unlikely(bytes > i->count)) 225241699cdSAl Viro bytes = i->count; 226241699cdSAl Viro 227241699cdSAl Viro if (unlikely(!bytes)) 228241699cdSAl Viro return 0; 229241699cdSAl Viro 230241699cdSAl Viro if (!sanity(i)) 231241699cdSAl Viro return 0; 232241699cdSAl Viro 233241699cdSAl Viro off = i->iov_offset; 2348cefc107SDavid Howells buf = &pipe->bufs[i_head & p_mask]; 235241699cdSAl Viro if (off) { 236241699cdSAl Viro if (offset == off && buf->page == page) { 237241699cdSAl Viro /* merge with the last one */ 238241699cdSAl Viro buf->len += bytes; 239241699cdSAl Viro i->iov_offset += bytes; 240241699cdSAl Viro goto out; 241241699cdSAl Viro } 2428cefc107SDavid Howells i_head++; 2438cefc107SDavid Howells buf = &pipe->bufs[i_head & p_mask]; 244241699cdSAl Viro } 2456718b6f8SDavid Howells if (pipe_full(i_head, p_tail, pipe->max_usage)) 246241699cdSAl Viro return 0; 2478cefc107SDavid Howells 248241699cdSAl Viro buf->ops = &page_cache_pipe_buf_ops; 2499d2231c5SMax Kellermann buf->flags = 0; 2508cefc107SDavid Howells get_page(page); 2518cefc107SDavid Howells buf->page = page; 252241699cdSAl Viro buf->offset = offset; 253241699cdSAl Viro buf->len = bytes; 2548cefc107SDavid Howells 2558cefc107SDavid Howells pipe->head = i_head + 1; 256241699cdSAl Viro i->iov_offset = offset + bytes; 2578cefc107SDavid Howells i->head = i_head; 258241699cdSAl Viro out: 259241699cdSAl Viro i->count -= bytes; 260241699cdSAl Viro return bytes; 261241699cdSAl Viro } 262241699cdSAl Viro 263d879cb83SAl Viro /* 264a6294593SAndreas Gruenbacher * fault_in_iov_iter_readable - fault in iov iterator for reading 265a6294593SAndreas Gruenbacher * @i: iterator 266a6294593SAndreas Gruenbacher * @size: maximum length 267171a0203SAnton Altaparmakov * 268a6294593SAndreas Gruenbacher * Fault in one or more iovecs of the given iov_iter, to a maximum length of 269a6294593SAndreas Gruenbacher * @size. For each iovec, fault in each page that constitutes the iovec. 270a6294593SAndreas Gruenbacher * 271a6294593SAndreas Gruenbacher * Returns the number of bytes not faulted in (like copy_to_user() and 272a6294593SAndreas Gruenbacher * copy_from_user()). 273a6294593SAndreas Gruenbacher * 274a6294593SAndreas Gruenbacher * Always returns 0 for non-userspace iterators. 275171a0203SAnton Altaparmakov */ 276a6294593SAndreas Gruenbacher size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) 277171a0203SAnton Altaparmakov { 2780e8f0d67SAl Viro if (iter_is_iovec(i)) { 279a6294593SAndreas Gruenbacher size_t count = min(size, iov_iter_count(i)); 2808409a0d2SAl Viro const struct iovec *p; 2818409a0d2SAl Viro size_t skip; 2828409a0d2SAl Viro 283a6294593SAndreas Gruenbacher size -= count; 284a6294593SAndreas Gruenbacher for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { 285a6294593SAndreas Gruenbacher size_t len = min(count, p->iov_len - skip); 286a6294593SAndreas Gruenbacher size_t ret; 2878409a0d2SAl Viro 2888409a0d2SAl Viro if (unlikely(!len)) 2898409a0d2SAl Viro continue; 290a6294593SAndreas Gruenbacher ret = fault_in_readable(p->iov_base + skip, len); 291a6294593SAndreas Gruenbacher count -= len - ret; 292a6294593SAndreas Gruenbacher if (ret) 293a6294593SAndreas Gruenbacher break; 2948409a0d2SAl Viro } 295a6294593SAndreas Gruenbacher return count + size; 296171a0203SAnton Altaparmakov } 297171a0203SAnton Altaparmakov return 0; 298171a0203SAnton Altaparmakov } 299a6294593SAndreas Gruenbacher EXPORT_SYMBOL(fault_in_iov_iter_readable); 300171a0203SAnton Altaparmakov 301cdd591fcSAndreas Gruenbacher /* 302cdd591fcSAndreas Gruenbacher * fault_in_iov_iter_writeable - fault in iov iterator for writing 303cdd591fcSAndreas Gruenbacher * @i: iterator 304cdd591fcSAndreas Gruenbacher * @size: maximum length 305cdd591fcSAndreas Gruenbacher * 306cdd591fcSAndreas Gruenbacher * Faults in the iterator using get_user_pages(), i.e., without triggering 307cdd591fcSAndreas Gruenbacher * hardware page faults. This is primarily useful when we already know that 308cdd591fcSAndreas Gruenbacher * some or all of the pages in @i aren't in memory. 309cdd591fcSAndreas Gruenbacher * 310cdd591fcSAndreas Gruenbacher * Returns the number of bytes not faulted in, like copy_to_user() and 311cdd591fcSAndreas Gruenbacher * copy_from_user(). 312cdd591fcSAndreas Gruenbacher * 313cdd591fcSAndreas Gruenbacher * Always returns 0 for non-user-space iterators. 314cdd591fcSAndreas Gruenbacher */ 315cdd591fcSAndreas Gruenbacher size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) 316cdd591fcSAndreas Gruenbacher { 317cdd591fcSAndreas Gruenbacher if (iter_is_iovec(i)) { 318cdd591fcSAndreas Gruenbacher size_t count = min(size, iov_iter_count(i)); 319cdd591fcSAndreas Gruenbacher const struct iovec *p; 320cdd591fcSAndreas Gruenbacher size_t skip; 321cdd591fcSAndreas Gruenbacher 322cdd591fcSAndreas Gruenbacher size -= count; 323cdd591fcSAndreas Gruenbacher for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { 324cdd591fcSAndreas Gruenbacher size_t len = min(count, p->iov_len - skip); 325cdd591fcSAndreas Gruenbacher size_t ret; 326cdd591fcSAndreas Gruenbacher 327cdd591fcSAndreas Gruenbacher if (unlikely(!len)) 328cdd591fcSAndreas Gruenbacher continue; 329cdd591fcSAndreas Gruenbacher ret = fault_in_safe_writeable(p->iov_base + skip, len); 330cdd591fcSAndreas Gruenbacher count -= len - ret; 331cdd591fcSAndreas Gruenbacher if (ret) 332cdd591fcSAndreas Gruenbacher break; 333cdd591fcSAndreas Gruenbacher } 334cdd591fcSAndreas Gruenbacher return count + size; 335cdd591fcSAndreas Gruenbacher } 336cdd591fcSAndreas Gruenbacher return 0; 337cdd591fcSAndreas Gruenbacher } 338cdd591fcSAndreas Gruenbacher EXPORT_SYMBOL(fault_in_iov_iter_writeable); 339cdd591fcSAndreas Gruenbacher 340aa563d7bSDavid Howells void iov_iter_init(struct iov_iter *i, unsigned int direction, 341d879cb83SAl Viro const struct iovec *iov, unsigned long nr_segs, 342d879cb83SAl Viro size_t count) 343d879cb83SAl Viro { 344aa563d7bSDavid Howells WARN_ON(direction & ~(READ | WRITE)); 3458cd54c1cSAl Viro *i = (struct iov_iter) { 3468cd54c1cSAl Viro .iter_type = ITER_IOVEC, 3473337ab08SAndreas Gruenbacher .nofault = false, 3488cd54c1cSAl Viro .data_source = direction, 3498cd54c1cSAl Viro .iov = iov, 3508cd54c1cSAl Viro .nr_segs = nr_segs, 3518cd54c1cSAl Viro .iov_offset = 0, 3528cd54c1cSAl Viro .count = count 3538cd54c1cSAl Viro }; 354d879cb83SAl Viro } 355d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_init); 356d879cb83SAl Viro 357241699cdSAl Viro static inline bool allocated(struct pipe_buffer *buf) 358241699cdSAl Viro { 359241699cdSAl Viro return buf->ops == &default_pipe_buf_ops; 360241699cdSAl Viro } 361241699cdSAl Viro 3628cefc107SDavid Howells static inline void data_start(const struct iov_iter *i, 3638cefc107SDavid Howells unsigned int *iter_headp, size_t *offp) 364241699cdSAl Viro { 3658cefc107SDavid Howells unsigned int p_mask = i->pipe->ring_size - 1; 3668cefc107SDavid Howells unsigned int iter_head = i->head; 367241699cdSAl Viro size_t off = i->iov_offset; 3688cefc107SDavid Howells 3698cefc107SDavid Howells if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 3708cefc107SDavid Howells off == PAGE_SIZE)) { 3718cefc107SDavid Howells iter_head++; 372241699cdSAl Viro off = 0; 373241699cdSAl Viro } 3748cefc107SDavid Howells *iter_headp = iter_head; 375241699cdSAl Viro *offp = off; 376241699cdSAl Viro } 377241699cdSAl Viro 378241699cdSAl Viro static size_t push_pipe(struct iov_iter *i, size_t size, 3798cefc107SDavid Howells int *iter_headp, size_t *offp) 380241699cdSAl Viro { 381241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 3828cefc107SDavid Howells unsigned int p_tail = pipe->tail; 3838cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 3848cefc107SDavid Howells unsigned int iter_head; 385241699cdSAl Viro size_t off; 386241699cdSAl Viro ssize_t left; 387241699cdSAl Viro 388241699cdSAl Viro if (unlikely(size > i->count)) 389241699cdSAl Viro size = i->count; 390241699cdSAl Viro if (unlikely(!size)) 391241699cdSAl Viro return 0; 392241699cdSAl Viro 393241699cdSAl Viro left = size; 3948cefc107SDavid Howells data_start(i, &iter_head, &off); 3958cefc107SDavid Howells *iter_headp = iter_head; 396241699cdSAl Viro *offp = off; 397241699cdSAl Viro if (off) { 398241699cdSAl Viro left -= PAGE_SIZE - off; 399241699cdSAl Viro if (left <= 0) { 4008cefc107SDavid Howells pipe->bufs[iter_head & p_mask].len += size; 401241699cdSAl Viro return size; 402241699cdSAl Viro } 4038cefc107SDavid Howells pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 4048cefc107SDavid Howells iter_head++; 405241699cdSAl Viro } 4066718b6f8SDavid Howells while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 4078cefc107SDavid Howells struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 408241699cdSAl Viro struct page *page = alloc_page(GFP_USER); 409241699cdSAl Viro if (!page) 410241699cdSAl Viro break; 4118cefc107SDavid Howells 4128cefc107SDavid Howells buf->ops = &default_pipe_buf_ops; 4139d2231c5SMax Kellermann buf->flags = 0; 4148cefc107SDavid Howells buf->page = page; 4158cefc107SDavid Howells buf->offset = 0; 4168cefc107SDavid Howells buf->len = min_t(ssize_t, left, PAGE_SIZE); 4178cefc107SDavid Howells left -= buf->len; 4188cefc107SDavid Howells iter_head++; 4198cefc107SDavid Howells pipe->head = iter_head; 4208cefc107SDavid Howells 4218cefc107SDavid Howells if (left == 0) 422241699cdSAl Viro return size; 423241699cdSAl Viro } 424241699cdSAl Viro return size - left; 425241699cdSAl Viro } 426241699cdSAl Viro 427241699cdSAl Viro static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 428241699cdSAl Viro struct iov_iter *i) 429241699cdSAl Viro { 430241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 4318cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 4328cefc107SDavid Howells unsigned int i_head; 433241699cdSAl Viro size_t n, off; 434241699cdSAl Viro 435241699cdSAl Viro if (!sanity(i)) 436241699cdSAl Viro return 0; 437241699cdSAl Viro 4388cefc107SDavid Howells bytes = n = push_pipe(i, bytes, &i_head, &off); 439241699cdSAl Viro if (unlikely(!n)) 440241699cdSAl Viro return 0; 4418cefc107SDavid Howells do { 442241699cdSAl Viro size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 4438cefc107SDavid Howells memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 4448cefc107SDavid Howells i->head = i_head; 445241699cdSAl Viro i->iov_offset = off + chunk; 446241699cdSAl Viro n -= chunk; 447241699cdSAl Viro addr += chunk; 4488cefc107SDavid Howells off = 0; 4498cefc107SDavid Howells i_head++; 4508cefc107SDavid Howells } while (n); 451241699cdSAl Viro i->count -= bytes; 452241699cdSAl Viro return bytes; 453241699cdSAl Viro } 454241699cdSAl Viro 455f9152895SAl Viro static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 456f9152895SAl Viro __wsum sum, size_t off) 457f9152895SAl Viro { 458cc44c17bSAl Viro __wsum next = csum_partial_copy_nocheck(from, to, len); 459f9152895SAl Viro return csum_block_add(sum, next, off); 460f9152895SAl Viro } 461f9152895SAl Viro 46278e1f386SAl Viro static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 4636852df12SAl Viro struct iov_iter *i, __wsum *sump) 46478e1f386SAl Viro { 46578e1f386SAl Viro struct pipe_inode_info *pipe = i->pipe; 4668cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 4676852df12SAl Viro __wsum sum = *sump; 4686852df12SAl Viro size_t off = 0; 4698cefc107SDavid Howells unsigned int i_head; 4706852df12SAl Viro size_t r; 47178e1f386SAl Viro 47278e1f386SAl Viro if (!sanity(i)) 47378e1f386SAl Viro return 0; 47478e1f386SAl Viro 4756852df12SAl Viro bytes = push_pipe(i, bytes, &i_head, &r); 4766852df12SAl Viro while (bytes) { 4776852df12SAl Viro size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); 4782495bdccSAl Viro char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); 4796852df12SAl Viro sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); 4802495bdccSAl Viro kunmap_local(p); 4818cefc107SDavid Howells i->head = i_head; 48278e1f386SAl Viro i->iov_offset = r + chunk; 4836852df12SAl Viro bytes -= chunk; 48478e1f386SAl Viro off += chunk; 4858cefc107SDavid Howells r = 0; 4868cefc107SDavid Howells i_head++; 4876852df12SAl Viro } 4886852df12SAl Viro *sump = sum; 4896852df12SAl Viro i->count -= off; 4906852df12SAl Viro return off; 49178e1f386SAl Viro } 49278e1f386SAl Viro 493aa28de27SAl Viro size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 494d879cb83SAl Viro { 49500e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) 496241699cdSAl Viro return copy_pipe_to_iter(addr, bytes, i); 49709fc68dcSAl Viro if (iter_is_iovec(i)) 49809fc68dcSAl Viro might_fault(); 4997baa5099SAl Viro iterate_and_advance(i, bytes, base, len, off, 5007baa5099SAl Viro copyout(base, addr + off, len), 5017baa5099SAl Viro memcpy(base, addr + off, len) 502d879cb83SAl Viro ) 503d879cb83SAl Viro 504d879cb83SAl Viro return bytes; 505d879cb83SAl Viro } 506aa28de27SAl Viro EXPORT_SYMBOL(_copy_to_iter); 507d879cb83SAl Viro 508ec6347bbSDan Williams #ifdef CONFIG_ARCH_HAS_COPY_MC 509ec6347bbSDan Williams static int copyout_mc(void __user *to, const void *from, size_t n) 5108780356eSDan Williams { 51196d4f267SLinus Torvalds if (access_ok(to, n)) { 512d0ef4c36SMarco Elver instrument_copy_to_user(to, from, n); 513ec6347bbSDan Williams n = copy_mc_to_user((__force void *) to, from, n); 5148780356eSDan Williams } 5158780356eSDan Williams return n; 5168780356eSDan Williams } 5178780356eSDan Williams 518ec6347bbSDan Williams static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 519ca146f6fSDan Williams struct iov_iter *i) 520ca146f6fSDan Williams { 521ca146f6fSDan Williams struct pipe_inode_info *pipe = i->pipe; 5228cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 5238cefc107SDavid Howells unsigned int i_head; 524ca146f6fSDan Williams size_t n, off, xfer = 0; 525ca146f6fSDan Williams 526ca146f6fSDan Williams if (!sanity(i)) 527ca146f6fSDan Williams return 0; 528ca146f6fSDan Williams 5292a510a74SAl Viro n = push_pipe(i, bytes, &i_head, &off); 5302a510a74SAl Viro while (n) { 531ca146f6fSDan Williams size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 5322a510a74SAl Viro char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); 533ca146f6fSDan Williams unsigned long rem; 5342a510a74SAl Viro rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); 5352a510a74SAl Viro chunk -= rem; 5362a510a74SAl Viro kunmap_local(p); 5378cefc107SDavid Howells i->head = i_head; 5382a510a74SAl Viro i->iov_offset = off + chunk; 5392a510a74SAl Viro xfer += chunk; 540ca146f6fSDan Williams if (rem) 541ca146f6fSDan Williams break; 542ca146f6fSDan Williams n -= chunk; 5438cefc107SDavid Howells off = 0; 5448cefc107SDavid Howells i_head++; 5452a510a74SAl Viro } 546ca146f6fSDan Williams i->count -= xfer; 547ca146f6fSDan Williams return xfer; 548ca146f6fSDan Williams } 549ca146f6fSDan Williams 550bf3eeb9bSDan Williams /** 551ec6347bbSDan Williams * _copy_mc_to_iter - copy to iter with source memory error exception handling 552bf3eeb9bSDan Williams * @addr: source kernel address 553bf3eeb9bSDan Williams * @bytes: total transfer length 55444e55997SRandy Dunlap * @i: destination iterator 555bf3eeb9bSDan Williams * 556ec6347bbSDan Williams * The pmem driver deploys this for the dax operation 557ec6347bbSDan Williams * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 558ec6347bbSDan Williams * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 559ec6347bbSDan Williams * successfully copied. 560bf3eeb9bSDan Williams * 561ec6347bbSDan Williams * The main differences between this and typical _copy_to_iter(). 562bf3eeb9bSDan Williams * 563bf3eeb9bSDan Williams * * Typical tail/residue handling after a fault retries the copy 564bf3eeb9bSDan Williams * byte-by-byte until the fault happens again. Re-triggering machine 565bf3eeb9bSDan Williams * checks is potentially fatal so the implementation uses source 566bf3eeb9bSDan Williams * alignment and poison alignment assumptions to avoid re-triggering 567bf3eeb9bSDan Williams * hardware exceptions. 568bf3eeb9bSDan Williams * 569bf3eeb9bSDan Williams * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 570bf3eeb9bSDan Williams * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 571bf3eeb9bSDan Williams * a short copy. 57244e55997SRandy Dunlap * 57344e55997SRandy Dunlap * Return: number of bytes copied (may be %0) 574bf3eeb9bSDan Williams */ 575ec6347bbSDan Williams size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 5768780356eSDan Williams { 57700e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) 578ec6347bbSDan Williams return copy_mc_pipe_to_iter(addr, bytes, i); 5798780356eSDan Williams if (iter_is_iovec(i)) 5808780356eSDan Williams might_fault(); 5817baa5099SAl Viro __iterate_and_advance(i, bytes, base, len, off, 5827baa5099SAl Viro copyout_mc(base, addr + off, len), 5837baa5099SAl Viro copy_mc_to_kernel(base, addr + off, len) 5848780356eSDan Williams ) 5858780356eSDan Williams 5868780356eSDan Williams return bytes; 5878780356eSDan Williams } 588ec6347bbSDan Williams EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 589ec6347bbSDan Williams #endif /* CONFIG_ARCH_HAS_COPY_MC */ 5908780356eSDan Williams 591aa28de27SAl Viro size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 592d879cb83SAl Viro { 59300e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) { 594241699cdSAl Viro WARN_ON(1); 595241699cdSAl Viro return 0; 596241699cdSAl Viro } 59709fc68dcSAl Viro if (iter_is_iovec(i)) 59809fc68dcSAl Viro might_fault(); 5997baa5099SAl Viro iterate_and_advance(i, bytes, base, len, off, 6007baa5099SAl Viro copyin(addr + off, base, len), 6017baa5099SAl Viro memcpy(addr + off, base, len) 602d879cb83SAl Viro ) 603d879cb83SAl Viro 604d879cb83SAl Viro return bytes; 605d879cb83SAl Viro } 606aa28de27SAl Viro EXPORT_SYMBOL(_copy_from_iter); 607d879cb83SAl Viro 608aa28de27SAl Viro size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 609d879cb83SAl Viro { 61000e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) { 611241699cdSAl Viro WARN_ON(1); 612241699cdSAl Viro return 0; 613241699cdSAl Viro } 6147baa5099SAl Viro iterate_and_advance(i, bytes, base, len, off, 6157baa5099SAl Viro __copy_from_user_inatomic_nocache(addr + off, base, len), 6167baa5099SAl Viro memcpy(addr + off, base, len) 617d879cb83SAl Viro ) 618d879cb83SAl Viro 619d879cb83SAl Viro return bytes; 620d879cb83SAl Viro } 621aa28de27SAl Viro EXPORT_SYMBOL(_copy_from_iter_nocache); 622d879cb83SAl Viro 6230aed55afSDan Williams #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 624abd08d7dSDan Williams /** 625abd08d7dSDan Williams * _copy_from_iter_flushcache - write destination through cpu cache 626abd08d7dSDan Williams * @addr: destination kernel address 627abd08d7dSDan Williams * @bytes: total transfer length 62844e55997SRandy Dunlap * @i: source iterator 629abd08d7dSDan Williams * 630abd08d7dSDan Williams * The pmem driver arranges for filesystem-dax to use this facility via 631abd08d7dSDan Williams * dax_copy_from_iter() for ensuring that writes to persistent memory 632abd08d7dSDan Williams * are flushed through the CPU cache. It is differentiated from 633abd08d7dSDan Williams * _copy_from_iter_nocache() in that guarantees all data is flushed for 634abd08d7dSDan Williams * all iterator types. The _copy_from_iter_nocache() only attempts to 635abd08d7dSDan Williams * bypass the cache for the ITER_IOVEC case, and on some archs may use 636abd08d7dSDan Williams * instructions that strand dirty-data in the cache. 63744e55997SRandy Dunlap * 63844e55997SRandy Dunlap * Return: number of bytes copied (may be %0) 639abd08d7dSDan Williams */ 6406a37e940SLinus Torvalds size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 6410aed55afSDan Williams { 64200e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) { 6430aed55afSDan Williams WARN_ON(1); 6440aed55afSDan Williams return 0; 6450aed55afSDan Williams } 6467baa5099SAl Viro iterate_and_advance(i, bytes, base, len, off, 6477baa5099SAl Viro __copy_from_user_flushcache(addr + off, base, len), 6487baa5099SAl Viro memcpy_flushcache(addr + off, base, len) 6490aed55afSDan Williams ) 6500aed55afSDan Williams 6510aed55afSDan Williams return bytes; 6520aed55afSDan Williams } 6536a37e940SLinus Torvalds EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 6540aed55afSDan Williams #endif 6550aed55afSDan Williams 65672e809edSAl Viro static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 65772e809edSAl Viro { 6586daef95bSEric Dumazet struct page *head; 6596daef95bSEric Dumazet size_t v = n + offset; 6606daef95bSEric Dumazet 6616daef95bSEric Dumazet /* 6626daef95bSEric Dumazet * The general case needs to access the page order in order 6636daef95bSEric Dumazet * to compute the page size. 6646daef95bSEric Dumazet * However, we mostly deal with order-0 pages and thus can 6656daef95bSEric Dumazet * avoid a possible cache line miss for requests that fit all 6666daef95bSEric Dumazet * page orders. 6676daef95bSEric Dumazet */ 6686daef95bSEric Dumazet if (n <= v && v <= PAGE_SIZE) 6696daef95bSEric Dumazet return true; 6706daef95bSEric Dumazet 6716daef95bSEric Dumazet head = compound_head(page); 6726daef95bSEric Dumazet v += (page - head) << PAGE_SHIFT; 673a90bcb86SPetar Penkov 674a50b854eSMatthew Wilcox (Oracle) if (likely(n <= v && v <= (page_size(head)))) 67572e809edSAl Viro return true; 67672e809edSAl Viro WARN_ON(1); 67772e809edSAl Viro return false; 67872e809edSAl Viro } 679cbbd26b8SAl Viro 68008aa6479SAl Viro static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 681d879cb83SAl Viro struct iov_iter *i) 682d879cb83SAl Viro { 68359bb69c6SAl Viro if (unlikely(iov_iter_is_pipe(i))) { 68459bb69c6SAl Viro return copy_page_to_iter_pipe(page, offset, bytes, i); 68559bb69c6SAl Viro } else { 686c1d4d6a9SAl Viro void *kaddr = kmap_local_page(page); 687c1d4d6a9SAl Viro size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); 688c1d4d6a9SAl Viro kunmap_local(kaddr); 689d879cb83SAl Viro return wanted; 69028f38db7SAl Viro } 691d879cb83SAl Viro } 69208aa6479SAl Viro 69308aa6479SAl Viro size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 69408aa6479SAl Viro struct iov_iter *i) 69508aa6479SAl Viro { 69608aa6479SAl Viro size_t res = 0; 69708aa6479SAl Viro if (unlikely(!page_copy_sane(page, offset, bytes))) 69808aa6479SAl Viro return 0; 69908aa6479SAl Viro page += offset / PAGE_SIZE; // first subpage 70008aa6479SAl Viro offset %= PAGE_SIZE; 70108aa6479SAl Viro while (1) { 70208aa6479SAl Viro size_t n = __copy_page_to_iter(page, offset, 70308aa6479SAl Viro min(bytes, (size_t)PAGE_SIZE - offset), i); 70408aa6479SAl Viro res += n; 70508aa6479SAl Viro bytes -= n; 70608aa6479SAl Viro if (!bytes || !n) 70708aa6479SAl Viro break; 70808aa6479SAl Viro offset += n; 70908aa6479SAl Viro if (offset == PAGE_SIZE) { 71008aa6479SAl Viro page++; 71108aa6479SAl Viro offset = 0; 71208aa6479SAl Viro } 71308aa6479SAl Viro } 71408aa6479SAl Viro return res; 71508aa6479SAl Viro } 716d879cb83SAl Viro EXPORT_SYMBOL(copy_page_to_iter); 717d879cb83SAl Viro 718d879cb83SAl Viro size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 719d879cb83SAl Viro struct iov_iter *i) 720d879cb83SAl Viro { 72159bb69c6SAl Viro if (page_copy_sane(page, offset, bytes)) { 72255ca375cSAl Viro void *kaddr = kmap_local_page(page); 723aa28de27SAl Viro size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 72455ca375cSAl Viro kunmap_local(kaddr); 725d879cb83SAl Viro return wanted; 72628f38db7SAl Viro } 72728f38db7SAl Viro return 0; 728d879cb83SAl Viro } 729d879cb83SAl Viro EXPORT_SYMBOL(copy_page_from_iter); 730d879cb83SAl Viro 731241699cdSAl Viro static size_t pipe_zero(size_t bytes, struct iov_iter *i) 732241699cdSAl Viro { 733241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 7348cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 7358cefc107SDavid Howells unsigned int i_head; 736241699cdSAl Viro size_t n, off; 737241699cdSAl Viro 738241699cdSAl Viro if (!sanity(i)) 739241699cdSAl Viro return 0; 740241699cdSAl Viro 7418cefc107SDavid Howells bytes = n = push_pipe(i, bytes, &i_head, &off); 742241699cdSAl Viro if (unlikely(!n)) 743241699cdSAl Viro return 0; 744241699cdSAl Viro 7458cefc107SDavid Howells do { 746241699cdSAl Viro size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 747893839fdSAl Viro char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); 748893839fdSAl Viro memset(p + off, 0, chunk); 749893839fdSAl Viro kunmap_local(p); 7508cefc107SDavid Howells i->head = i_head; 751241699cdSAl Viro i->iov_offset = off + chunk; 752241699cdSAl Viro n -= chunk; 7538cefc107SDavid Howells off = 0; 7548cefc107SDavid Howells i_head++; 7558cefc107SDavid Howells } while (n); 756241699cdSAl Viro i->count -= bytes; 757241699cdSAl Viro return bytes; 758241699cdSAl Viro } 759241699cdSAl Viro 760d879cb83SAl Viro size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 761d879cb83SAl Viro { 76200e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) 763241699cdSAl Viro return pipe_zero(bytes, i); 7647baa5099SAl Viro iterate_and_advance(i, bytes, base, len, count, 7657baa5099SAl Viro clear_user(base, len), 7667baa5099SAl Viro memset(base, 0, len) 767d879cb83SAl Viro ) 768d879cb83SAl Viro 769d879cb83SAl Viro return bytes; 770d879cb83SAl Viro } 771d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_zero); 772d879cb83SAl Viro 773f0b65f39SAl Viro size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes, 774f0b65f39SAl Viro struct iov_iter *i) 775d879cb83SAl Viro { 776d879cb83SAl Viro char *kaddr = kmap_atomic(page), *p = kaddr + offset; 77772e809edSAl Viro if (unlikely(!page_copy_sane(page, offset, bytes))) { 77872e809edSAl Viro kunmap_atomic(kaddr); 77972e809edSAl Viro return 0; 78072e809edSAl Viro } 7819ea9ce04SDavid Howells if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 782241699cdSAl Viro kunmap_atomic(kaddr); 783241699cdSAl Viro WARN_ON(1); 784241699cdSAl Viro return 0; 785241699cdSAl Viro } 7867baa5099SAl Viro iterate_and_advance(i, bytes, base, len, off, 7877baa5099SAl Viro copyin(p + off, base, len), 7887baa5099SAl Viro memcpy(p + off, base, len) 789d879cb83SAl Viro ) 790d879cb83SAl Viro kunmap_atomic(kaddr); 791d879cb83SAl Viro return bytes; 792d879cb83SAl Viro } 793f0b65f39SAl Viro EXPORT_SYMBOL(copy_page_from_iter_atomic); 794d879cb83SAl Viro 795b9dc6f65SAl Viro static inline void pipe_truncate(struct iov_iter *i) 796241699cdSAl Viro { 797241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 7988cefc107SDavid Howells unsigned int p_tail = pipe->tail; 7998cefc107SDavid Howells unsigned int p_head = pipe->head; 8008cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 8018cefc107SDavid Howells 8028cefc107SDavid Howells if (!pipe_empty(p_head, p_tail)) { 8038cefc107SDavid Howells struct pipe_buffer *buf; 8048cefc107SDavid Howells unsigned int i_head = i->head; 805b9dc6f65SAl Viro size_t off = i->iov_offset; 8068cefc107SDavid Howells 807b9dc6f65SAl Viro if (off) { 8088cefc107SDavid Howells buf = &pipe->bufs[i_head & p_mask]; 8098cefc107SDavid Howells buf->len = off - buf->offset; 8108cefc107SDavid Howells i_head++; 811b9dc6f65SAl Viro } 8128cefc107SDavid Howells while (p_head != i_head) { 8138cefc107SDavid Howells p_head--; 8148cefc107SDavid Howells pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 815241699cdSAl Viro } 8168cefc107SDavid Howells 8178cefc107SDavid Howells pipe->head = p_head; 818241699cdSAl Viro } 819b9dc6f65SAl Viro } 820b9dc6f65SAl Viro 821b9dc6f65SAl Viro static void pipe_advance(struct iov_iter *i, size_t size) 822b9dc6f65SAl Viro { 823b9dc6f65SAl Viro struct pipe_inode_info *pipe = i->pipe; 824b9dc6f65SAl Viro if (size) { 825b9dc6f65SAl Viro struct pipe_buffer *buf; 8268cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 8278cefc107SDavid Howells unsigned int i_head = i->head; 828b9dc6f65SAl Viro size_t off = i->iov_offset, left = size; 8298cefc107SDavid Howells 830b9dc6f65SAl Viro if (off) /* make it relative to the beginning of buffer */ 8318cefc107SDavid Howells left += off - pipe->bufs[i_head & p_mask].offset; 832b9dc6f65SAl Viro while (1) { 8338cefc107SDavid Howells buf = &pipe->bufs[i_head & p_mask]; 834b9dc6f65SAl Viro if (left <= buf->len) 835b9dc6f65SAl Viro break; 836b9dc6f65SAl Viro left -= buf->len; 8378cefc107SDavid Howells i_head++; 838b9dc6f65SAl Viro } 8398cefc107SDavid Howells i->head = i_head; 840b9dc6f65SAl Viro i->iov_offset = buf->offset + left; 841b9dc6f65SAl Viro } 842b9dc6f65SAl Viro i->count -= size; 843b9dc6f65SAl Viro /* ... and discard everything past that point */ 844b9dc6f65SAl Viro pipe_truncate(i); 845241699cdSAl Viro } 846241699cdSAl Viro 84754c8195bSPavel Begunkov static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) 84854c8195bSPavel Begunkov { 849*18fa9af7SAl Viro const struct bio_vec *bvec, *end; 85054c8195bSPavel Begunkov 851*18fa9af7SAl Viro if (!i->count) 852*18fa9af7SAl Viro return; 853*18fa9af7SAl Viro i->count -= size; 85454c8195bSPavel Begunkov 855*18fa9af7SAl Viro size += i->iov_offset; 856*18fa9af7SAl Viro 857*18fa9af7SAl Viro for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) { 858*18fa9af7SAl Viro if (likely(size < bvec->bv_len)) 859*18fa9af7SAl Viro break; 860*18fa9af7SAl Viro size -= bvec->bv_len; 861*18fa9af7SAl Viro } 862*18fa9af7SAl Viro i->iov_offset = size; 863*18fa9af7SAl Viro i->nr_segs -= bvec - i->bvec; 864*18fa9af7SAl Viro i->bvec = bvec; 86554c8195bSPavel Begunkov } 86654c8195bSPavel Begunkov 867185ac4d4SAl Viro static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) 868185ac4d4SAl Viro { 869185ac4d4SAl Viro const struct iovec *iov, *end; 870185ac4d4SAl Viro 871185ac4d4SAl Viro if (!i->count) 872185ac4d4SAl Viro return; 873185ac4d4SAl Viro i->count -= size; 874185ac4d4SAl Viro 875185ac4d4SAl Viro size += i->iov_offset; // from beginning of current segment 876185ac4d4SAl Viro for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) { 877185ac4d4SAl Viro if (likely(size < iov->iov_len)) 878185ac4d4SAl Viro break; 879185ac4d4SAl Viro size -= iov->iov_len; 880185ac4d4SAl Viro } 881185ac4d4SAl Viro i->iov_offset = size; 882185ac4d4SAl Viro i->nr_segs -= iov - i->iov; 883185ac4d4SAl Viro i->iov = iov; 884185ac4d4SAl Viro } 885185ac4d4SAl Viro 886d879cb83SAl Viro void iov_iter_advance(struct iov_iter *i, size_t size) 887d879cb83SAl Viro { 8883b3fc051SAl Viro if (unlikely(i->count < size)) 8893b3fc051SAl Viro size = i->count; 890185ac4d4SAl Viro if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { 891185ac4d4SAl Viro /* iovec and kvec have identical layouts */ 892185ac4d4SAl Viro iov_iter_iovec_advance(i, size); 893185ac4d4SAl Viro } else if (iov_iter_is_bvec(i)) { 894185ac4d4SAl Viro iov_iter_bvec_advance(i, size); 895185ac4d4SAl Viro } else if (iov_iter_is_pipe(i)) { 896241699cdSAl Viro pipe_advance(i, size); 897185ac4d4SAl Viro } else if (unlikely(iov_iter_is_xarray(i))) { 8987ff50620SDavid Howells i->iov_offset += size; 8997ff50620SDavid Howells i->count -= size; 900185ac4d4SAl Viro } else if (iov_iter_is_discard(i)) { 901185ac4d4SAl Viro i->count -= size; 9027ff50620SDavid Howells } 903d879cb83SAl Viro } 904d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_advance); 905d879cb83SAl Viro 90627c0e374SAl Viro void iov_iter_revert(struct iov_iter *i, size_t unroll) 90727c0e374SAl Viro { 90827c0e374SAl Viro if (!unroll) 90927c0e374SAl Viro return; 9105b47d59aSAl Viro if (WARN_ON(unroll > MAX_RW_COUNT)) 9115b47d59aSAl Viro return; 91227c0e374SAl Viro i->count += unroll; 91300e23707SDavid Howells if (unlikely(iov_iter_is_pipe(i))) { 91427c0e374SAl Viro struct pipe_inode_info *pipe = i->pipe; 9158cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 9168cefc107SDavid Howells unsigned int i_head = i->head; 91727c0e374SAl Viro size_t off = i->iov_offset; 91827c0e374SAl Viro while (1) { 9198cefc107SDavid Howells struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 9208cefc107SDavid Howells size_t n = off - b->offset; 92127c0e374SAl Viro if (unroll < n) { 9224fa55cefSAl Viro off -= unroll; 92327c0e374SAl Viro break; 92427c0e374SAl Viro } 92527c0e374SAl Viro unroll -= n; 9268cefc107SDavid Howells if (!unroll && i_head == i->start_head) { 92727c0e374SAl Viro off = 0; 92827c0e374SAl Viro break; 92927c0e374SAl Viro } 9308cefc107SDavid Howells i_head--; 9318cefc107SDavid Howells b = &pipe->bufs[i_head & p_mask]; 9328cefc107SDavid Howells off = b->offset + b->len; 93327c0e374SAl Viro } 93427c0e374SAl Viro i->iov_offset = off; 9358cefc107SDavid Howells i->head = i_head; 93627c0e374SAl Viro pipe_truncate(i); 93727c0e374SAl Viro return; 93827c0e374SAl Viro } 9399ea9ce04SDavid Howells if (unlikely(iov_iter_is_discard(i))) 9409ea9ce04SDavid Howells return; 94127c0e374SAl Viro if (unroll <= i->iov_offset) { 94227c0e374SAl Viro i->iov_offset -= unroll; 94327c0e374SAl Viro return; 94427c0e374SAl Viro } 94527c0e374SAl Viro unroll -= i->iov_offset; 9467ff50620SDavid Howells if (iov_iter_is_xarray(i)) { 9477ff50620SDavid Howells BUG(); /* We should never go beyond the start of the specified 9487ff50620SDavid Howells * range since we might then be straying into pages that 9497ff50620SDavid Howells * aren't pinned. 9507ff50620SDavid Howells */ 9517ff50620SDavid Howells } else if (iov_iter_is_bvec(i)) { 95227c0e374SAl Viro const struct bio_vec *bvec = i->bvec; 95327c0e374SAl Viro while (1) { 95427c0e374SAl Viro size_t n = (--bvec)->bv_len; 95527c0e374SAl Viro i->nr_segs++; 95627c0e374SAl Viro if (unroll <= n) { 95727c0e374SAl Viro i->bvec = bvec; 95827c0e374SAl Viro i->iov_offset = n - unroll; 95927c0e374SAl Viro return; 96027c0e374SAl Viro } 96127c0e374SAl Viro unroll -= n; 96227c0e374SAl Viro } 96327c0e374SAl Viro } else { /* same logics for iovec and kvec */ 96427c0e374SAl Viro const struct iovec *iov = i->iov; 96527c0e374SAl Viro while (1) { 96627c0e374SAl Viro size_t n = (--iov)->iov_len; 96727c0e374SAl Viro i->nr_segs++; 96827c0e374SAl Viro if (unroll <= n) { 96927c0e374SAl Viro i->iov = iov; 97027c0e374SAl Viro i->iov_offset = n - unroll; 97127c0e374SAl Viro return; 97227c0e374SAl Viro } 97327c0e374SAl Viro unroll -= n; 97427c0e374SAl Viro } 97527c0e374SAl Viro } 97627c0e374SAl Viro } 97727c0e374SAl Viro EXPORT_SYMBOL(iov_iter_revert); 97827c0e374SAl Viro 979d879cb83SAl Viro /* 980d879cb83SAl Viro * Return the count of just the current iov_iter segment. 981d879cb83SAl Viro */ 982d879cb83SAl Viro size_t iov_iter_single_seg_count(const struct iov_iter *i) 983d879cb83SAl Viro { 98428f38db7SAl Viro if (i->nr_segs > 1) { 98528f38db7SAl Viro if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) 98628f38db7SAl Viro return min(i->count, i->iov->iov_len - i->iov_offset); 9877ff50620SDavid Howells if (iov_iter_is_bvec(i)) 988d879cb83SAl Viro return min(i->count, i->bvec->bv_len - i->iov_offset); 98928f38db7SAl Viro } 99028f38db7SAl Viro return i->count; 991d879cb83SAl Viro } 992d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_single_seg_count); 993d879cb83SAl Viro 994aa563d7bSDavid Howells void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 995d879cb83SAl Viro const struct kvec *kvec, unsigned long nr_segs, 996d879cb83SAl Viro size_t count) 997d879cb83SAl Viro { 998aa563d7bSDavid Howells WARN_ON(direction & ~(READ | WRITE)); 9998cd54c1cSAl Viro *i = (struct iov_iter){ 10008cd54c1cSAl Viro .iter_type = ITER_KVEC, 10018cd54c1cSAl Viro .data_source = direction, 10028cd54c1cSAl Viro .kvec = kvec, 10038cd54c1cSAl Viro .nr_segs = nr_segs, 10048cd54c1cSAl Viro .iov_offset = 0, 10058cd54c1cSAl Viro .count = count 10068cd54c1cSAl Viro }; 1007d879cb83SAl Viro } 1008d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_kvec); 1009d879cb83SAl Viro 1010aa563d7bSDavid Howells void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1011d879cb83SAl Viro const struct bio_vec *bvec, unsigned long nr_segs, 1012d879cb83SAl Viro size_t count) 1013d879cb83SAl Viro { 1014aa563d7bSDavid Howells WARN_ON(direction & ~(READ | WRITE)); 10158cd54c1cSAl Viro *i = (struct iov_iter){ 10168cd54c1cSAl Viro .iter_type = ITER_BVEC, 10178cd54c1cSAl Viro .data_source = direction, 10188cd54c1cSAl Viro .bvec = bvec, 10198cd54c1cSAl Viro .nr_segs = nr_segs, 10208cd54c1cSAl Viro .iov_offset = 0, 10218cd54c1cSAl Viro .count = count 10228cd54c1cSAl Viro }; 1023d879cb83SAl Viro } 1024d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_bvec); 1025d879cb83SAl Viro 1026aa563d7bSDavid Howells void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1027241699cdSAl Viro struct pipe_inode_info *pipe, 1028241699cdSAl Viro size_t count) 1029241699cdSAl Viro { 1030aa563d7bSDavid Howells BUG_ON(direction != READ); 10318cefc107SDavid Howells WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 10328cd54c1cSAl Viro *i = (struct iov_iter){ 10338cd54c1cSAl Viro .iter_type = ITER_PIPE, 10348cd54c1cSAl Viro .data_source = false, 10358cd54c1cSAl Viro .pipe = pipe, 10368cd54c1cSAl Viro .head = pipe->head, 10378cd54c1cSAl Viro .start_head = pipe->head, 10388cd54c1cSAl Viro .iov_offset = 0, 10398cd54c1cSAl Viro .count = count 10408cd54c1cSAl Viro }; 1041241699cdSAl Viro } 1042241699cdSAl Viro EXPORT_SYMBOL(iov_iter_pipe); 1043241699cdSAl Viro 10449ea9ce04SDavid Howells /** 10457ff50620SDavid Howells * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray 10467ff50620SDavid Howells * @i: The iterator to initialise. 10477ff50620SDavid Howells * @direction: The direction of the transfer. 10487ff50620SDavid Howells * @xarray: The xarray to access. 10497ff50620SDavid Howells * @start: The start file position. 10507ff50620SDavid Howells * @count: The size of the I/O buffer in bytes. 10517ff50620SDavid Howells * 10527ff50620SDavid Howells * Set up an I/O iterator to either draw data out of the pages attached to an 10537ff50620SDavid Howells * inode or to inject data into those pages. The pages *must* be prevented 10547ff50620SDavid Howells * from evaporation, either by taking a ref on them or locking them by the 10557ff50620SDavid Howells * caller. 10567ff50620SDavid Howells */ 10577ff50620SDavid Howells void iov_iter_xarray(struct iov_iter *i, unsigned int direction, 10587ff50620SDavid Howells struct xarray *xarray, loff_t start, size_t count) 10597ff50620SDavid Howells { 10607ff50620SDavid Howells BUG_ON(direction & ~1); 10618cd54c1cSAl Viro *i = (struct iov_iter) { 10628cd54c1cSAl Viro .iter_type = ITER_XARRAY, 10638cd54c1cSAl Viro .data_source = direction, 10648cd54c1cSAl Viro .xarray = xarray, 10658cd54c1cSAl Viro .xarray_start = start, 10668cd54c1cSAl Viro .count = count, 10678cd54c1cSAl Viro .iov_offset = 0 10688cd54c1cSAl Viro }; 10697ff50620SDavid Howells } 10707ff50620SDavid Howells EXPORT_SYMBOL(iov_iter_xarray); 10717ff50620SDavid Howells 10727ff50620SDavid Howells /** 10739ea9ce04SDavid Howells * iov_iter_discard - Initialise an I/O iterator that discards data 10749ea9ce04SDavid Howells * @i: The iterator to initialise. 10759ea9ce04SDavid Howells * @direction: The direction of the transfer. 10769ea9ce04SDavid Howells * @count: The size of the I/O buffer in bytes. 10779ea9ce04SDavid Howells * 10789ea9ce04SDavid Howells * Set up an I/O iterator that just discards everything that's written to it. 10799ea9ce04SDavid Howells * It's only available as a READ iterator. 10809ea9ce04SDavid Howells */ 10819ea9ce04SDavid Howells void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 10829ea9ce04SDavid Howells { 10839ea9ce04SDavid Howells BUG_ON(direction != READ); 10848cd54c1cSAl Viro *i = (struct iov_iter){ 10858cd54c1cSAl Viro .iter_type = ITER_DISCARD, 10868cd54c1cSAl Viro .data_source = false, 10878cd54c1cSAl Viro .count = count, 10888cd54c1cSAl Viro .iov_offset = 0 10898cd54c1cSAl Viro }; 10909ea9ce04SDavid Howells } 10919ea9ce04SDavid Howells EXPORT_SYMBOL(iov_iter_discard); 10929ea9ce04SDavid Howells 10939221d2e3SAl Viro static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) 1094d879cb83SAl Viro { 1095d879cb83SAl Viro unsigned long res = 0; 1096d879cb83SAl Viro size_t size = i->count; 10979221d2e3SAl Viro size_t skip = i->iov_offset; 10989221d2e3SAl Viro unsigned k; 1099d879cb83SAl Viro 11009221d2e3SAl Viro for (k = 0; k < i->nr_segs; k++, skip = 0) { 11019221d2e3SAl Viro size_t len = i->iov[k].iov_len - skip; 11029221d2e3SAl Viro if (len) { 11039221d2e3SAl Viro res |= (unsigned long)i->iov[k].iov_base + skip; 11049221d2e3SAl Viro if (len > size) 11059221d2e3SAl Viro len = size; 11069221d2e3SAl Viro res |= len; 11079221d2e3SAl Viro size -= len; 11089221d2e3SAl Viro if (!size) 11099221d2e3SAl Viro break; 11109221d2e3SAl Viro } 11119221d2e3SAl Viro } 11129221d2e3SAl Viro return res; 11139221d2e3SAl Viro } 11149221d2e3SAl Viro 11159221d2e3SAl Viro static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) 11169221d2e3SAl Viro { 11179221d2e3SAl Viro unsigned res = 0; 11189221d2e3SAl Viro size_t size = i->count; 11199221d2e3SAl Viro unsigned skip = i->iov_offset; 11209221d2e3SAl Viro unsigned k; 11219221d2e3SAl Viro 11229221d2e3SAl Viro for (k = 0; k < i->nr_segs; k++, skip = 0) { 11239221d2e3SAl Viro size_t len = i->bvec[k].bv_len - skip; 11249221d2e3SAl Viro res |= (unsigned long)i->bvec[k].bv_offset + skip; 11259221d2e3SAl Viro if (len > size) 11269221d2e3SAl Viro len = size; 11279221d2e3SAl Viro res |= len; 11289221d2e3SAl Viro size -= len; 11299221d2e3SAl Viro if (!size) 11309221d2e3SAl Viro break; 11319221d2e3SAl Viro } 11329221d2e3SAl Viro return res; 11339221d2e3SAl Viro } 11349221d2e3SAl Viro 11359221d2e3SAl Viro unsigned long iov_iter_alignment(const struct iov_iter *i) 11369221d2e3SAl Viro { 11379221d2e3SAl Viro /* iovec and kvec have identical layouts */ 11389221d2e3SAl Viro if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) 11399221d2e3SAl Viro return iov_iter_alignment_iovec(i); 11409221d2e3SAl Viro 11419221d2e3SAl Viro if (iov_iter_is_bvec(i)) 11429221d2e3SAl Viro return iov_iter_alignment_bvec(i); 11439221d2e3SAl Viro 11449221d2e3SAl Viro if (iov_iter_is_pipe(i)) { 1145e0ff126eSJan Kara unsigned int p_mask = i->pipe->ring_size - 1; 11469221d2e3SAl Viro size_t size = i->count; 1147e0ff126eSJan Kara 11488cefc107SDavid Howells if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1149241699cdSAl Viro return size | i->iov_offset; 1150241699cdSAl Viro return size; 1151241699cdSAl Viro } 11529221d2e3SAl Viro 11539221d2e3SAl Viro if (iov_iter_is_xarray(i)) 11543d14ec1fSDavid Howells return (i->xarray_start + i->iov_offset) | i->count; 11559221d2e3SAl Viro 11569221d2e3SAl Viro return 0; 1157d879cb83SAl Viro } 1158d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_alignment); 1159d879cb83SAl Viro 1160357f435dSAl Viro unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1161357f435dSAl Viro { 1162357f435dSAl Viro unsigned long res = 0; 1163610c7a71SAl Viro unsigned long v = 0; 1164357f435dSAl Viro size_t size = i->count; 1165610c7a71SAl Viro unsigned k; 1166357f435dSAl Viro 1167610c7a71SAl Viro if (WARN_ON(!iter_is_iovec(i))) 1168241699cdSAl Viro return ~0U; 1169241699cdSAl Viro 1170610c7a71SAl Viro for (k = 0; k < i->nr_segs; k++) { 1171610c7a71SAl Viro if (i->iov[k].iov_len) { 1172610c7a71SAl Viro unsigned long base = (unsigned long)i->iov[k].iov_base; 1173610c7a71SAl Viro if (v) // if not the first one 1174610c7a71SAl Viro res |= base | v; // this start | previous end 1175610c7a71SAl Viro v = base + i->iov[k].iov_len; 1176610c7a71SAl Viro if (size <= i->iov[k].iov_len) 1177610c7a71SAl Viro break; 1178610c7a71SAl Viro size -= i->iov[k].iov_len; 1179610c7a71SAl Viro } 1180610c7a71SAl Viro } 1181357f435dSAl Viro return res; 1182357f435dSAl Viro } 1183357f435dSAl Viro EXPORT_SYMBOL(iov_iter_gap_alignment); 1184357f435dSAl Viro 1185e76b6312SIlya Dryomov static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1186241699cdSAl Viro size_t maxsize, 1187241699cdSAl Viro struct page **pages, 11888cefc107SDavid Howells int iter_head, 1189241699cdSAl Viro size_t *start) 1190241699cdSAl Viro { 1191241699cdSAl Viro struct pipe_inode_info *pipe = i->pipe; 11928cefc107SDavid Howells unsigned int p_mask = pipe->ring_size - 1; 11938cefc107SDavid Howells ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1194241699cdSAl Viro if (!n) 1195241699cdSAl Viro return -EFAULT; 1196241699cdSAl Viro 1197241699cdSAl Viro maxsize = n; 1198241699cdSAl Viro n += *start; 11991689c73aSAl Viro while (n > 0) { 12008cefc107SDavid Howells get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 12018cefc107SDavid Howells iter_head++; 1202241699cdSAl Viro n -= PAGE_SIZE; 1203241699cdSAl Viro } 1204241699cdSAl Viro 1205241699cdSAl Viro return maxsize; 1206241699cdSAl Viro } 1207241699cdSAl Viro 1208241699cdSAl Viro static ssize_t pipe_get_pages(struct iov_iter *i, 1209241699cdSAl Viro struct page **pages, size_t maxsize, unsigned maxpages, 1210241699cdSAl Viro size_t *start) 1211241699cdSAl Viro { 12128cefc107SDavid Howells unsigned int iter_head, npages; 1213241699cdSAl Viro size_t capacity; 1214241699cdSAl Viro 1215241699cdSAl Viro if (!sanity(i)) 1216241699cdSAl Viro return -EFAULT; 1217241699cdSAl Viro 12188cefc107SDavid Howells data_start(i, &iter_head, start); 12198cefc107SDavid Howells /* Amount of free space: some of this one + all after this one */ 12208cefc107SDavid Howells npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1221241699cdSAl Viro capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1222241699cdSAl Viro 12238cefc107SDavid Howells return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1224241699cdSAl Viro } 1225241699cdSAl Viro 12267ff50620SDavid Howells static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, 12277ff50620SDavid Howells pgoff_t index, unsigned int nr_pages) 12287ff50620SDavid Howells { 12297ff50620SDavid Howells XA_STATE(xas, xa, index); 12307ff50620SDavid Howells struct page *page; 12317ff50620SDavid Howells unsigned int ret = 0; 12327ff50620SDavid Howells 12337ff50620SDavid Howells rcu_read_lock(); 12347ff50620SDavid Howells for (page = xas_load(&xas); page; page = xas_next(&xas)) { 12357ff50620SDavid Howells if (xas_retry(&xas, page)) 12367ff50620SDavid Howells continue; 12377ff50620SDavid Howells 12387ff50620SDavid Howells /* Has the page moved or been split? */ 12397ff50620SDavid Howells if (unlikely(page != xas_reload(&xas))) { 12407ff50620SDavid Howells xas_reset(&xas); 12417ff50620SDavid Howells continue; 12427ff50620SDavid Howells } 12437ff50620SDavid Howells 12447ff50620SDavid Howells pages[ret] = find_subpage(page, xas.xa_index); 12457ff50620SDavid Howells get_page(pages[ret]); 12467ff50620SDavid Howells if (++ret == nr_pages) 12477ff50620SDavid Howells break; 12487ff50620SDavid Howells } 12497ff50620SDavid Howells rcu_read_unlock(); 12507ff50620SDavid Howells return ret; 12517ff50620SDavid Howells } 12527ff50620SDavid Howells 12537ff50620SDavid Howells static ssize_t iter_xarray_get_pages(struct iov_iter *i, 12547ff50620SDavid Howells struct page **pages, size_t maxsize, 12557ff50620SDavid Howells unsigned maxpages, size_t *_start_offset) 12567ff50620SDavid Howells { 12577ff50620SDavid Howells unsigned nr, offset; 12587ff50620SDavid Howells pgoff_t index, count; 12597ff50620SDavid Howells size_t size = maxsize, actual; 12607ff50620SDavid Howells loff_t pos; 12617ff50620SDavid Howells 12627ff50620SDavid Howells if (!size || !maxpages) 12637ff50620SDavid Howells return 0; 12647ff50620SDavid Howells 12657ff50620SDavid Howells pos = i->xarray_start + i->iov_offset; 12667ff50620SDavid Howells index = pos >> PAGE_SHIFT; 12677ff50620SDavid Howells offset = pos & ~PAGE_MASK; 12687ff50620SDavid Howells *_start_offset = offset; 12697ff50620SDavid Howells 12707ff50620SDavid Howells count = 1; 12717ff50620SDavid Howells if (size > PAGE_SIZE - offset) { 12727ff50620SDavid Howells size -= PAGE_SIZE - offset; 12737ff50620SDavid Howells count += size >> PAGE_SHIFT; 12747ff50620SDavid Howells size &= ~PAGE_MASK; 12757ff50620SDavid Howells if (size) 12767ff50620SDavid Howells count++; 12777ff50620SDavid Howells } 12787ff50620SDavid Howells 12797ff50620SDavid Howells if (count > maxpages) 12807ff50620SDavid Howells count = maxpages; 12817ff50620SDavid Howells 12827ff50620SDavid Howells nr = iter_xarray_populate_pages(pages, i->xarray, index, count); 12837ff50620SDavid Howells if (nr == 0) 12847ff50620SDavid Howells return 0; 12857ff50620SDavid Howells 12867ff50620SDavid Howells actual = PAGE_SIZE * nr; 12877ff50620SDavid Howells actual -= offset; 12887ff50620SDavid Howells if (nr == count && size > 0) { 12897ff50620SDavid Howells unsigned last_offset = (nr > 1) ? 0 : offset; 12907ff50620SDavid Howells actual -= PAGE_SIZE - (last_offset + size); 12917ff50620SDavid Howells } 12927ff50620SDavid Howells return actual; 12937ff50620SDavid Howells } 12947ff50620SDavid Howells 12953d671ca6SAl Viro /* must be done on non-empty ITER_IOVEC one */ 12963d671ca6SAl Viro static unsigned long first_iovec_segment(const struct iov_iter *i, 12973d671ca6SAl Viro size_t *size, size_t *start, 12983d671ca6SAl Viro size_t maxsize, unsigned maxpages) 12993d671ca6SAl Viro { 13003d671ca6SAl Viro size_t skip; 13013d671ca6SAl Viro long k; 13023d671ca6SAl Viro 13033d671ca6SAl Viro for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { 13043d671ca6SAl Viro unsigned long addr = (unsigned long)i->iov[k].iov_base + skip; 13053d671ca6SAl Viro size_t len = i->iov[k].iov_len - skip; 13063d671ca6SAl Viro 13073d671ca6SAl Viro if (unlikely(!len)) 13083d671ca6SAl Viro continue; 13093d671ca6SAl Viro if (len > maxsize) 13103d671ca6SAl Viro len = maxsize; 13113d671ca6SAl Viro len += (*start = addr % PAGE_SIZE); 13123d671ca6SAl Viro if (len > maxpages * PAGE_SIZE) 13133d671ca6SAl Viro len = maxpages * PAGE_SIZE; 13143d671ca6SAl Viro *size = len; 13153d671ca6SAl Viro return addr & PAGE_MASK; 13163d671ca6SAl Viro } 13173d671ca6SAl Viro BUG(); // if it had been empty, we wouldn't get called 13183d671ca6SAl Viro } 13193d671ca6SAl Viro 13203d671ca6SAl Viro /* must be done on non-empty ITER_BVEC one */ 13213d671ca6SAl Viro static struct page *first_bvec_segment(const struct iov_iter *i, 13223d671ca6SAl Viro size_t *size, size_t *start, 13233d671ca6SAl Viro size_t maxsize, unsigned maxpages) 13243d671ca6SAl Viro { 13253d671ca6SAl Viro struct page *page; 13263d671ca6SAl Viro size_t skip = i->iov_offset, len; 13273d671ca6SAl Viro 13283d671ca6SAl Viro len = i->bvec->bv_len - skip; 13293d671ca6SAl Viro if (len > maxsize) 13303d671ca6SAl Viro len = maxsize; 13313d671ca6SAl Viro skip += i->bvec->bv_offset; 13323d671ca6SAl Viro page = i->bvec->bv_page + skip / PAGE_SIZE; 13333d671ca6SAl Viro len += (*start = skip % PAGE_SIZE); 13343d671ca6SAl Viro if (len > maxpages * PAGE_SIZE) 13353d671ca6SAl Viro len = maxpages * PAGE_SIZE; 13363d671ca6SAl Viro *size = len; 13373d671ca6SAl Viro return page; 13383d671ca6SAl Viro } 13393d671ca6SAl Viro 1340d879cb83SAl Viro ssize_t iov_iter_get_pages(struct iov_iter *i, 1341d879cb83SAl Viro struct page **pages, size_t maxsize, unsigned maxpages, 1342d879cb83SAl Viro size_t *start) 1343d879cb83SAl Viro { 13443d671ca6SAl Viro size_t len; 13453d671ca6SAl Viro int n, res; 13463d671ca6SAl Viro 1347d879cb83SAl Viro if (maxsize > i->count) 1348d879cb83SAl Viro maxsize = i->count; 13493d671ca6SAl Viro if (!maxsize) 13503d671ca6SAl Viro return 0; 1351d879cb83SAl Viro 13523d671ca6SAl Viro if (likely(iter_is_iovec(i))) { 13533337ab08SAndreas Gruenbacher unsigned int gup_flags = 0; 13543d671ca6SAl Viro unsigned long addr; 13559ea9ce04SDavid Howells 13563337ab08SAndreas Gruenbacher if (iov_iter_rw(i) != WRITE) 13573337ab08SAndreas Gruenbacher gup_flags |= FOLL_WRITE; 13583337ab08SAndreas Gruenbacher if (i->nofault) 13593337ab08SAndreas Gruenbacher gup_flags |= FOLL_NOFAULT; 13603337ab08SAndreas Gruenbacher 13613d671ca6SAl Viro addr = first_iovec_segment(i, &len, start, maxsize, maxpages); 1362d879cb83SAl Viro n = DIV_ROUND_UP(len, PAGE_SIZE); 13633337ab08SAndreas Gruenbacher res = get_user_pages_fast(addr, n, gup_flags, pages); 1364814a6674SAndreas Gruenbacher if (unlikely(res <= 0)) 1365d879cb83SAl Viro return res; 1366d879cb83SAl Viro return (res == n ? len : res * PAGE_SIZE) - *start; 13673d671ca6SAl Viro } 13683d671ca6SAl Viro if (iov_iter_is_bvec(i)) { 13693d671ca6SAl Viro struct page *page; 13703d671ca6SAl Viro 13713d671ca6SAl Viro page = first_bvec_segment(i, &len, start, maxsize, maxpages); 13723d671ca6SAl Viro n = DIV_ROUND_UP(len, PAGE_SIZE); 13733d671ca6SAl Viro while (n--) 13743d671ca6SAl Viro get_page(*pages++ = page++); 13753d671ca6SAl Viro return len - *start; 13763d671ca6SAl Viro } 13773d671ca6SAl Viro if (iov_iter_is_pipe(i)) 13783d671ca6SAl Viro return pipe_get_pages(i, pages, maxsize, maxpages, start); 13793d671ca6SAl Viro if (iov_iter_is_xarray(i)) 13803d671ca6SAl Viro return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); 1381d879cb83SAl Viro return -EFAULT; 1382d879cb83SAl Viro } 1383d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_get_pages); 1384d879cb83SAl Viro 1385d879cb83SAl Viro static struct page **get_pages_array(size_t n) 1386d879cb83SAl Viro { 1387752ade68SMichal Hocko return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1388d879cb83SAl Viro } 1389d879cb83SAl Viro 1390241699cdSAl Viro static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1391241699cdSAl Viro struct page ***pages, size_t maxsize, 1392241699cdSAl Viro size_t *start) 1393241699cdSAl Viro { 1394241699cdSAl Viro struct page **p; 13958cefc107SDavid Howells unsigned int iter_head, npages; 1396d7760d63SIlya Dryomov ssize_t n; 1397241699cdSAl Viro 1398241699cdSAl Viro if (!sanity(i)) 1399241699cdSAl Viro return -EFAULT; 1400241699cdSAl Viro 14018cefc107SDavid Howells data_start(i, &iter_head, start); 14028cefc107SDavid Howells /* Amount of free space: some of this one + all after this one */ 14038cefc107SDavid Howells npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1404241699cdSAl Viro n = npages * PAGE_SIZE - *start; 1405241699cdSAl Viro if (maxsize > n) 1406241699cdSAl Viro maxsize = n; 1407241699cdSAl Viro else 1408241699cdSAl Viro npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1409241699cdSAl Viro p = get_pages_array(npages); 1410241699cdSAl Viro if (!p) 1411241699cdSAl Viro return -ENOMEM; 14128cefc107SDavid Howells n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1413241699cdSAl Viro if (n > 0) 1414241699cdSAl Viro *pages = p; 1415241699cdSAl Viro else 1416241699cdSAl Viro kvfree(p); 1417241699cdSAl Viro return n; 1418241699cdSAl Viro } 1419241699cdSAl Viro 14207ff50620SDavid Howells static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, 14217ff50620SDavid Howells struct page ***pages, size_t maxsize, 14227ff50620SDavid Howells size_t *_start_offset) 14237ff50620SDavid Howells { 14247ff50620SDavid Howells struct page **p; 14257ff50620SDavid Howells unsigned nr, offset; 14267ff50620SDavid Howells pgoff_t index, count; 14277ff50620SDavid Howells size_t size = maxsize, actual; 14287ff50620SDavid Howells loff_t pos; 14297ff50620SDavid Howells 14307ff50620SDavid Howells if (!size) 14317ff50620SDavid Howells return 0; 14327ff50620SDavid Howells 14337ff50620SDavid Howells pos = i->xarray_start + i->iov_offset; 14347ff50620SDavid Howells index = pos >> PAGE_SHIFT; 14357ff50620SDavid Howells offset = pos & ~PAGE_MASK; 14367ff50620SDavid Howells *_start_offset = offset; 14377ff50620SDavid Howells 14387ff50620SDavid Howells count = 1; 14397ff50620SDavid Howells if (size > PAGE_SIZE - offset) { 14407ff50620SDavid Howells size -= PAGE_SIZE - offset; 14417ff50620SDavid Howells count += size >> PAGE_SHIFT; 14427ff50620SDavid Howells size &= ~PAGE_MASK; 14437ff50620SDavid Howells if (size) 14447ff50620SDavid Howells count++; 14457ff50620SDavid Howells } 14467ff50620SDavid Howells 14477ff50620SDavid Howells p = get_pages_array(count); 14487ff50620SDavid Howells if (!p) 14497ff50620SDavid Howells return -ENOMEM; 14507ff50620SDavid Howells *pages = p; 14517ff50620SDavid Howells 14527ff50620SDavid Howells nr = iter_xarray_populate_pages(p, i->xarray, index, count); 14537ff50620SDavid Howells if (nr == 0) 14547ff50620SDavid Howells return 0; 14557ff50620SDavid Howells 14567ff50620SDavid Howells actual = PAGE_SIZE * nr; 14577ff50620SDavid Howells actual -= offset; 14587ff50620SDavid Howells if (nr == count && size > 0) { 14597ff50620SDavid Howells unsigned last_offset = (nr > 1) ? 0 : offset; 14607ff50620SDavid Howells actual -= PAGE_SIZE - (last_offset + size); 14617ff50620SDavid Howells } 14627ff50620SDavid Howells return actual; 14637ff50620SDavid Howells } 14647ff50620SDavid Howells 1465d879cb83SAl Viro ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1466d879cb83SAl Viro struct page ***pages, size_t maxsize, 1467d879cb83SAl Viro size_t *start) 1468d879cb83SAl Viro { 1469d879cb83SAl Viro struct page **p; 14703d671ca6SAl Viro size_t len; 14713d671ca6SAl Viro int n, res; 1472d879cb83SAl Viro 1473d879cb83SAl Viro if (maxsize > i->count) 1474d879cb83SAl Viro maxsize = i->count; 14753d671ca6SAl Viro if (!maxsize) 14763d671ca6SAl Viro return 0; 1477d879cb83SAl Viro 14783d671ca6SAl Viro if (likely(iter_is_iovec(i))) { 14793337ab08SAndreas Gruenbacher unsigned int gup_flags = 0; 14803d671ca6SAl Viro unsigned long addr; 14819ea9ce04SDavid Howells 14823337ab08SAndreas Gruenbacher if (iov_iter_rw(i) != WRITE) 14833337ab08SAndreas Gruenbacher gup_flags |= FOLL_WRITE; 14843337ab08SAndreas Gruenbacher if (i->nofault) 14853337ab08SAndreas Gruenbacher gup_flags |= FOLL_NOFAULT; 14863337ab08SAndreas Gruenbacher 14873d671ca6SAl Viro addr = first_iovec_segment(i, &len, start, maxsize, ~0U); 1488d879cb83SAl Viro n = DIV_ROUND_UP(len, PAGE_SIZE); 1489d879cb83SAl Viro p = get_pages_array(n); 1490d879cb83SAl Viro if (!p) 1491d879cb83SAl Viro return -ENOMEM; 14923337ab08SAndreas Gruenbacher res = get_user_pages_fast(addr, n, gup_flags, p); 1493814a6674SAndreas Gruenbacher if (unlikely(res <= 0)) { 1494d879cb83SAl Viro kvfree(p); 1495814a6674SAndreas Gruenbacher *pages = NULL; 1496d879cb83SAl Viro return res; 1497d879cb83SAl Viro } 1498d879cb83SAl Viro *pages = p; 1499d879cb83SAl Viro return (res == n ? len : res * PAGE_SIZE) - *start; 15003d671ca6SAl Viro } 15013d671ca6SAl Viro if (iov_iter_is_bvec(i)) { 15023d671ca6SAl Viro struct page *page; 15033d671ca6SAl Viro 15043d671ca6SAl Viro page = first_bvec_segment(i, &len, start, maxsize, ~0U); 15053d671ca6SAl Viro n = DIV_ROUND_UP(len, PAGE_SIZE); 15063d671ca6SAl Viro *pages = p = get_pages_array(n); 1507d879cb83SAl Viro if (!p) 1508d879cb83SAl Viro return -ENOMEM; 15093d671ca6SAl Viro while (n--) 15103d671ca6SAl Viro get_page(*p++ = page++); 15113d671ca6SAl Viro return len - *start; 15123d671ca6SAl Viro } 15133d671ca6SAl Viro if (iov_iter_is_pipe(i)) 15143d671ca6SAl Viro return pipe_get_pages_alloc(i, pages, maxsize, start); 15153d671ca6SAl Viro if (iov_iter_is_xarray(i)) 15163d671ca6SAl Viro return iter_xarray_get_pages_alloc(i, pages, maxsize, start); 1517d879cb83SAl Viro return -EFAULT; 1518d879cb83SAl Viro } 1519d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1520d879cb83SAl Viro 1521d879cb83SAl Viro size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1522d879cb83SAl Viro struct iov_iter *i) 1523d879cb83SAl Viro { 1524d879cb83SAl Viro __wsum sum, next; 1525d879cb83SAl Viro sum = *csum; 15269ea9ce04SDavid Howells if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1527241699cdSAl Viro WARN_ON(1); 1528241699cdSAl Viro return 0; 1529241699cdSAl Viro } 15307baa5099SAl Viro iterate_and_advance(i, bytes, base, len, off, ({ 15317baa5099SAl Viro next = csum_and_copy_from_user(base, addr + off, len); 1532d879cb83SAl Viro sum = csum_block_add(sum, next, off); 15337baa5099SAl Viro next ? 0 : len; 1534d879cb83SAl Viro }), ({ 15357baa5099SAl Viro sum = csum_and_memcpy(addr + off, base, len, sum, off); 1536d879cb83SAl Viro }) 1537d879cb83SAl Viro ) 1538d879cb83SAl Viro *csum = sum; 1539d879cb83SAl Viro return bytes; 1540d879cb83SAl Viro } 1541d879cb83SAl Viro EXPORT_SYMBOL(csum_and_copy_from_iter); 1542d879cb83SAl Viro 154352cbd23aSWillem de Bruijn size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, 1544d879cb83SAl Viro struct iov_iter *i) 1545d879cb83SAl Viro { 154652cbd23aSWillem de Bruijn struct csum_state *csstate = _csstate; 1547d879cb83SAl Viro __wsum sum, next; 154878e1f386SAl Viro 154978e1f386SAl Viro if (unlikely(iov_iter_is_discard(i))) { 1550241699cdSAl Viro WARN_ON(1); /* for now */ 1551241699cdSAl Viro return 0; 1552241699cdSAl Viro } 15536852df12SAl Viro 15546852df12SAl Viro sum = csum_shift(csstate->csum, csstate->off); 15556852df12SAl Viro if (unlikely(iov_iter_is_pipe(i))) 15566852df12SAl Viro bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum); 15576852df12SAl Viro else iterate_and_advance(i, bytes, base, len, off, ({ 15587baa5099SAl Viro next = csum_and_copy_to_user(addr + off, base, len); 1559d879cb83SAl Viro sum = csum_block_add(sum, next, off); 15607baa5099SAl Viro next ? 0 : len; 1561d879cb83SAl Viro }), ({ 15627baa5099SAl Viro sum = csum_and_memcpy(base, addr + off, len, sum, off); 1563d879cb83SAl Viro }) 1564d879cb83SAl Viro ) 1565594e450bSAl Viro csstate->csum = csum_shift(sum, csstate->off); 1566594e450bSAl Viro csstate->off += bytes; 1567d879cb83SAl Viro return bytes; 1568d879cb83SAl Viro } 1569d879cb83SAl Viro EXPORT_SYMBOL(csum_and_copy_to_iter); 1570d879cb83SAl Viro 1571d05f4435SSagi Grimberg size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1572d05f4435SSagi Grimberg struct iov_iter *i) 1573d05f4435SSagi Grimberg { 15747999096fSHerbert Xu #ifdef CONFIG_CRYPTO_HASH 1575d05f4435SSagi Grimberg struct ahash_request *hash = hashp; 1576d05f4435SSagi Grimberg struct scatterlist sg; 1577d05f4435SSagi Grimberg size_t copied; 1578d05f4435SSagi Grimberg 1579d05f4435SSagi Grimberg copied = copy_to_iter(addr, bytes, i); 1580d05f4435SSagi Grimberg sg_init_one(&sg, addr, copied); 1581d05f4435SSagi Grimberg ahash_request_set_crypt(hash, &sg, NULL, copied); 1582d05f4435SSagi Grimberg crypto_ahash_update(hash); 1583d05f4435SSagi Grimberg return copied; 158427fad74aSYueHaibing #else 158527fad74aSYueHaibing return 0; 158627fad74aSYueHaibing #endif 1587d05f4435SSagi Grimberg } 1588d05f4435SSagi Grimberg EXPORT_SYMBOL(hash_and_copy_to_iter); 1589d05f4435SSagi Grimberg 159066531c65SAl Viro static int iov_npages(const struct iov_iter *i, int maxpages) 1591d879cb83SAl Viro { 159266531c65SAl Viro size_t skip = i->iov_offset, size = i->count; 159366531c65SAl Viro const struct iovec *p; 1594d879cb83SAl Viro int npages = 0; 1595d879cb83SAl Viro 159666531c65SAl Viro for (p = i->iov; size; skip = 0, p++) { 159766531c65SAl Viro unsigned offs = offset_in_page(p->iov_base + skip); 159866531c65SAl Viro size_t len = min(p->iov_len - skip, size); 1599d879cb83SAl Viro 160066531c65SAl Viro if (len) { 160166531c65SAl Viro size -= len; 160266531c65SAl Viro npages += DIV_ROUND_UP(offs + len, PAGE_SIZE); 160366531c65SAl Viro if (unlikely(npages > maxpages)) 160466531c65SAl Viro return maxpages; 160566531c65SAl Viro } 160666531c65SAl Viro } 160766531c65SAl Viro return npages; 160866531c65SAl Viro } 160966531c65SAl Viro 161066531c65SAl Viro static int bvec_npages(const struct iov_iter *i, int maxpages) 161166531c65SAl Viro { 161266531c65SAl Viro size_t skip = i->iov_offset, size = i->count; 161366531c65SAl Viro const struct bio_vec *p; 161466531c65SAl Viro int npages = 0; 161566531c65SAl Viro 161666531c65SAl Viro for (p = i->bvec; size; skip = 0, p++) { 161766531c65SAl Viro unsigned offs = (p->bv_offset + skip) % PAGE_SIZE; 161866531c65SAl Viro size_t len = min(p->bv_len - skip, size); 161966531c65SAl Viro 162066531c65SAl Viro size -= len; 162166531c65SAl Viro npages += DIV_ROUND_UP(offs + len, PAGE_SIZE); 162266531c65SAl Viro if (unlikely(npages > maxpages)) 162366531c65SAl Viro return maxpages; 162466531c65SAl Viro } 162566531c65SAl Viro return npages; 162666531c65SAl Viro } 162766531c65SAl Viro 162866531c65SAl Viro int iov_iter_npages(const struct iov_iter *i, int maxpages) 162966531c65SAl Viro { 163066531c65SAl Viro if (unlikely(!i->count)) 163166531c65SAl Viro return 0; 163266531c65SAl Viro /* iovec and kvec have identical layouts */ 163366531c65SAl Viro if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) 163466531c65SAl Viro return iov_npages(i, maxpages); 163566531c65SAl Viro if (iov_iter_is_bvec(i)) 163666531c65SAl Viro return bvec_npages(i, maxpages); 163766531c65SAl Viro if (iov_iter_is_pipe(i)) { 16388cefc107SDavid Howells unsigned int iter_head; 163966531c65SAl Viro int npages; 1640241699cdSAl Viro size_t off; 1641241699cdSAl Viro 1642241699cdSAl Viro if (!sanity(i)) 1643241699cdSAl Viro return 0; 1644241699cdSAl Viro 16458cefc107SDavid Howells data_start(i, &iter_head, &off); 1646241699cdSAl Viro /* some of this one + all after this one */ 164766531c65SAl Viro npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 164866531c65SAl Viro return min(npages, maxpages); 164966531c65SAl Viro } 165066531c65SAl Viro if (iov_iter_is_xarray(i)) { 1651e4f8df86SAl Viro unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE; 1652e4f8df86SAl Viro int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); 165366531c65SAl Viro return min(npages, maxpages); 165466531c65SAl Viro } 165566531c65SAl Viro return 0; 1656d879cb83SAl Viro } 1657d879cb83SAl Viro EXPORT_SYMBOL(iov_iter_npages); 1658d879cb83SAl Viro 1659d879cb83SAl Viro const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1660d879cb83SAl Viro { 1661d879cb83SAl Viro *new = *old; 166200e23707SDavid Howells if (unlikely(iov_iter_is_pipe(new))) { 1663241699cdSAl Viro WARN_ON(1); 1664241699cdSAl Viro return NULL; 1665241699cdSAl Viro } 16667ff50620SDavid Howells if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) 16679ea9ce04SDavid Howells return NULL; 166800e23707SDavid Howells if (iov_iter_is_bvec(new)) 1669d879cb83SAl Viro return new->bvec = kmemdup(new->bvec, 1670d879cb83SAl Viro new->nr_segs * sizeof(struct bio_vec), 1671d879cb83SAl Viro flags); 1672d879cb83SAl Viro else 1673d879cb83SAl Viro /* iovec and kvec have identical layout */ 1674d879cb83SAl Viro return new->iov = kmemdup(new->iov, 1675d879cb83SAl Viro new->nr_segs * sizeof(struct iovec), 1676d879cb83SAl Viro flags); 1677d879cb83SAl Viro } 1678d879cb83SAl Viro EXPORT_SYMBOL(dup_iter); 1679bc917be8SAl Viro 1680bfdc5970SChristoph Hellwig static int copy_compat_iovec_from_user(struct iovec *iov, 1681bfdc5970SChristoph Hellwig const struct iovec __user *uvec, unsigned long nr_segs) 1682bfdc5970SChristoph Hellwig { 1683bfdc5970SChristoph Hellwig const struct compat_iovec __user *uiov = 1684bfdc5970SChristoph Hellwig (const struct compat_iovec __user *)uvec; 1685bfdc5970SChristoph Hellwig int ret = -EFAULT, i; 1686bfdc5970SChristoph Hellwig 1687a959a978SChristoph Hellwig if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) 1688bfdc5970SChristoph Hellwig return -EFAULT; 1689bfdc5970SChristoph Hellwig 1690bfdc5970SChristoph Hellwig for (i = 0; i < nr_segs; i++) { 1691bfdc5970SChristoph Hellwig compat_uptr_t buf; 1692bfdc5970SChristoph Hellwig compat_ssize_t len; 1693bfdc5970SChristoph Hellwig 1694bfdc5970SChristoph Hellwig unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1695bfdc5970SChristoph Hellwig unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1696bfdc5970SChristoph Hellwig 1697bfdc5970SChristoph Hellwig /* check for compat_size_t not fitting in compat_ssize_t .. */ 1698bfdc5970SChristoph Hellwig if (len < 0) { 1699bfdc5970SChristoph Hellwig ret = -EINVAL; 1700bfdc5970SChristoph Hellwig goto uaccess_end; 1701bfdc5970SChristoph Hellwig } 1702bfdc5970SChristoph Hellwig iov[i].iov_base = compat_ptr(buf); 1703bfdc5970SChristoph Hellwig iov[i].iov_len = len; 1704bfdc5970SChristoph Hellwig } 1705bfdc5970SChristoph Hellwig 1706bfdc5970SChristoph Hellwig ret = 0; 1707bfdc5970SChristoph Hellwig uaccess_end: 1708bfdc5970SChristoph Hellwig user_access_end(); 1709bfdc5970SChristoph Hellwig return ret; 1710bfdc5970SChristoph Hellwig } 1711bfdc5970SChristoph Hellwig 1712bfdc5970SChristoph Hellwig static int copy_iovec_from_user(struct iovec *iov, 1713bfdc5970SChristoph Hellwig const struct iovec __user *uvec, unsigned long nr_segs) 1714fb041b59SDavid Laight { 1715fb041b59SDavid Laight unsigned long seg; 1716bfdc5970SChristoph Hellwig 1717bfdc5970SChristoph Hellwig if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1718bfdc5970SChristoph Hellwig return -EFAULT; 1719bfdc5970SChristoph Hellwig for (seg = 0; seg < nr_segs; seg++) { 1720bfdc5970SChristoph Hellwig if ((ssize_t)iov[seg].iov_len < 0) 1721bfdc5970SChristoph Hellwig return -EINVAL; 1722bfdc5970SChristoph Hellwig } 1723bfdc5970SChristoph Hellwig 1724bfdc5970SChristoph Hellwig return 0; 1725bfdc5970SChristoph Hellwig } 1726bfdc5970SChristoph Hellwig 1727bfdc5970SChristoph Hellwig struct iovec *iovec_from_user(const struct iovec __user *uvec, 1728bfdc5970SChristoph Hellwig unsigned long nr_segs, unsigned long fast_segs, 1729bfdc5970SChristoph Hellwig struct iovec *fast_iov, bool compat) 1730bfdc5970SChristoph Hellwig { 1731bfdc5970SChristoph Hellwig struct iovec *iov = fast_iov; 1732bfdc5970SChristoph Hellwig int ret; 1733fb041b59SDavid Laight 1734fb041b59SDavid Laight /* 1735bfdc5970SChristoph Hellwig * SuS says "The readv() function *may* fail if the iovcnt argument was 1736bfdc5970SChristoph Hellwig * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1737fb041b59SDavid Laight * traditionally returned zero for zero segments, so... 1738fb041b59SDavid Laight */ 1739bfdc5970SChristoph Hellwig if (nr_segs == 0) 1740bfdc5970SChristoph Hellwig return iov; 1741bfdc5970SChristoph Hellwig if (nr_segs > UIO_MAXIOV) 1742bfdc5970SChristoph Hellwig return ERR_PTR(-EINVAL); 1743fb041b59SDavid Laight if (nr_segs > fast_segs) { 1744fb041b59SDavid Laight iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1745bfdc5970SChristoph Hellwig if (!iov) 1746bfdc5970SChristoph Hellwig return ERR_PTR(-ENOMEM); 1747fb041b59SDavid Laight } 1748bfdc5970SChristoph Hellwig 1749bfdc5970SChristoph Hellwig if (compat) 1750bfdc5970SChristoph Hellwig ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1751bfdc5970SChristoph Hellwig else 1752bfdc5970SChristoph Hellwig ret = copy_iovec_from_user(iov, uvec, nr_segs); 1753bfdc5970SChristoph Hellwig if (ret) { 1754bfdc5970SChristoph Hellwig if (iov != fast_iov) 1755bfdc5970SChristoph Hellwig kfree(iov); 1756bfdc5970SChristoph Hellwig return ERR_PTR(ret); 1757fb041b59SDavid Laight } 1758bfdc5970SChristoph Hellwig 1759bfdc5970SChristoph Hellwig return iov; 1760bfdc5970SChristoph Hellwig } 1761bfdc5970SChristoph Hellwig 1762bfdc5970SChristoph Hellwig ssize_t __import_iovec(int type, const struct iovec __user *uvec, 1763bfdc5970SChristoph Hellwig unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 1764bfdc5970SChristoph Hellwig struct iov_iter *i, bool compat) 1765bfdc5970SChristoph Hellwig { 1766bfdc5970SChristoph Hellwig ssize_t total_len = 0; 1767bfdc5970SChristoph Hellwig unsigned long seg; 1768bfdc5970SChristoph Hellwig struct iovec *iov; 1769bfdc5970SChristoph Hellwig 1770bfdc5970SChristoph Hellwig iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 1771bfdc5970SChristoph Hellwig if (IS_ERR(iov)) { 1772bfdc5970SChristoph Hellwig *iovp = NULL; 1773bfdc5970SChristoph Hellwig return PTR_ERR(iov); 1774fb041b59SDavid Laight } 1775fb041b59SDavid Laight 1776fb041b59SDavid Laight /* 1777bfdc5970SChristoph Hellwig * According to the Single Unix Specification we should return EINVAL if 1778bfdc5970SChristoph Hellwig * an element length is < 0 when cast to ssize_t or if the total length 1779bfdc5970SChristoph Hellwig * would overflow the ssize_t return value of the system call. 1780fb041b59SDavid Laight * 1781fb041b59SDavid Laight * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 1782fb041b59SDavid Laight * overflow case. 1783fb041b59SDavid Laight */ 1784fb041b59SDavid Laight for (seg = 0; seg < nr_segs; seg++) { 1785fb041b59SDavid Laight ssize_t len = (ssize_t)iov[seg].iov_len; 1786fb041b59SDavid Laight 1787bfdc5970SChristoph Hellwig if (!access_ok(iov[seg].iov_base, len)) { 1788bfdc5970SChristoph Hellwig if (iov != *iovp) 1789bfdc5970SChristoph Hellwig kfree(iov); 1790bfdc5970SChristoph Hellwig *iovp = NULL; 1791bfdc5970SChristoph Hellwig return -EFAULT; 1792fb041b59SDavid Laight } 1793bfdc5970SChristoph Hellwig 1794bfdc5970SChristoph Hellwig if (len > MAX_RW_COUNT - total_len) { 1795bfdc5970SChristoph Hellwig len = MAX_RW_COUNT - total_len; 1796fb041b59SDavid Laight iov[seg].iov_len = len; 1797fb041b59SDavid Laight } 1798bfdc5970SChristoph Hellwig total_len += len; 1799fb041b59SDavid Laight } 1800bfdc5970SChristoph Hellwig 1801bfdc5970SChristoph Hellwig iov_iter_init(i, type, iov, nr_segs, total_len); 1802bfdc5970SChristoph Hellwig if (iov == *iovp) 1803bfdc5970SChristoph Hellwig *iovp = NULL; 1804bfdc5970SChristoph Hellwig else 1805bfdc5970SChristoph Hellwig *iovp = iov; 1806bfdc5970SChristoph Hellwig return total_len; 1807fb041b59SDavid Laight } 1808fb041b59SDavid Laight 1809ffecee4fSVegard Nossum /** 1810ffecee4fSVegard Nossum * import_iovec() - Copy an array of &struct iovec from userspace 1811ffecee4fSVegard Nossum * into the kernel, check that it is valid, and initialize a new 1812ffecee4fSVegard Nossum * &struct iov_iter iterator to access it. 1813ffecee4fSVegard Nossum * 1814ffecee4fSVegard Nossum * @type: One of %READ or %WRITE. 1815bfdc5970SChristoph Hellwig * @uvec: Pointer to the userspace array. 1816ffecee4fSVegard Nossum * @nr_segs: Number of elements in userspace array. 1817ffecee4fSVegard Nossum * @fast_segs: Number of elements in @iov. 1818bfdc5970SChristoph Hellwig * @iovp: (input and output parameter) Pointer to pointer to (usually small 1819ffecee4fSVegard Nossum * on-stack) kernel array. 1820ffecee4fSVegard Nossum * @i: Pointer to iterator that will be initialized on success. 1821ffecee4fSVegard Nossum * 1822ffecee4fSVegard Nossum * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1823ffecee4fSVegard Nossum * then this function places %NULL in *@iov on return. Otherwise, a new 1824ffecee4fSVegard Nossum * array will be allocated and the result placed in *@iov. This means that 1825ffecee4fSVegard Nossum * the caller may call kfree() on *@iov regardless of whether the small 1826ffecee4fSVegard Nossum * on-stack array was used or not (and regardless of whether this function 1827ffecee4fSVegard Nossum * returns an error or not). 1828ffecee4fSVegard Nossum * 182987e5e6daSJens Axboe * Return: Negative error code on error, bytes imported on success 1830ffecee4fSVegard Nossum */ 1831bfdc5970SChristoph Hellwig ssize_t import_iovec(int type, const struct iovec __user *uvec, 1832bc917be8SAl Viro unsigned nr_segs, unsigned fast_segs, 1833bfdc5970SChristoph Hellwig struct iovec **iovp, struct iov_iter *i) 1834bc917be8SAl Viro { 183589cd35c5SChristoph Hellwig return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 183689cd35c5SChristoph Hellwig in_compat_syscall()); 1837bc917be8SAl Viro } 1838bc917be8SAl Viro EXPORT_SYMBOL(import_iovec); 1839bc917be8SAl Viro 1840bc917be8SAl Viro int import_single_range(int rw, void __user *buf, size_t len, 1841bc917be8SAl Viro struct iovec *iov, struct iov_iter *i) 1842bc917be8SAl Viro { 1843bc917be8SAl Viro if (len > MAX_RW_COUNT) 1844bc917be8SAl Viro len = MAX_RW_COUNT; 184596d4f267SLinus Torvalds if (unlikely(!access_ok(buf, len))) 1846bc917be8SAl Viro return -EFAULT; 1847bc917be8SAl Viro 1848bc917be8SAl Viro iov->iov_base = buf; 1849bc917be8SAl Viro iov->iov_len = len; 1850bc917be8SAl Viro iov_iter_init(i, rw, iov, 1, len); 1851bc917be8SAl Viro return 0; 1852bc917be8SAl Viro } 1853e1267585SAl Viro EXPORT_SYMBOL(import_single_range); 18548fb0f47aSJens Axboe 18558fb0f47aSJens Axboe /** 18568fb0f47aSJens Axboe * iov_iter_restore() - Restore a &struct iov_iter to the same state as when 18578fb0f47aSJens Axboe * iov_iter_save_state() was called. 18588fb0f47aSJens Axboe * 18598fb0f47aSJens Axboe * @i: &struct iov_iter to restore 18608fb0f47aSJens Axboe * @state: state to restore from 18618fb0f47aSJens Axboe * 18628fb0f47aSJens Axboe * Used after iov_iter_save_state() to bring restore @i, if operations may 18638fb0f47aSJens Axboe * have advanced it. 18648fb0f47aSJens Axboe * 18658fb0f47aSJens Axboe * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC 18668fb0f47aSJens Axboe */ 18678fb0f47aSJens Axboe void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) 18688fb0f47aSJens Axboe { 18698fb0f47aSJens Axboe if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && 18708fb0f47aSJens Axboe !iov_iter_is_kvec(i)) 18718fb0f47aSJens Axboe return; 18728fb0f47aSJens Axboe i->iov_offset = state->iov_offset; 18738fb0f47aSJens Axboe i->count = state->count; 18748fb0f47aSJens Axboe /* 18758fb0f47aSJens Axboe * For the *vec iters, nr_segs + iov is constant - if we increment 18768fb0f47aSJens Axboe * the vec, then we also decrement the nr_segs count. Hence we don't 18778fb0f47aSJens Axboe * need to track both of these, just one is enough and we can deduct 18788fb0f47aSJens Axboe * the other from that. ITER_KVEC and ITER_IOVEC are the same struct 18798fb0f47aSJens Axboe * size, so we can just increment the iov pointer as they are unionzed. 18808fb0f47aSJens Axboe * ITER_BVEC _may_ be the same size on some archs, but on others it is 18818fb0f47aSJens Axboe * not. Be safe and handle it separately. 18828fb0f47aSJens Axboe */ 18838fb0f47aSJens Axboe BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); 18848fb0f47aSJens Axboe if (iov_iter_is_bvec(i)) 18858fb0f47aSJens Axboe i->bvec -= state->nr_segs - i->nr_segs; 18868fb0f47aSJens Axboe else 18878fb0f47aSJens Axboe i->iov -= state->nr_segs - i->nr_segs; 18888fb0f47aSJens Axboe i->nr_segs = state->nr_segs; 18898fb0f47aSJens Axboe } 1890