15274f052SJens Axboe /* 25274f052SJens Axboe * "splice": joining two ropes together by interweaving their strands. 35274f052SJens Axboe * 45274f052SJens Axboe * This is the "extended pipe" functionality, where a pipe is used as 55274f052SJens Axboe * an arbitrary in-memory buffer. Think of a pipe as a small kernel 65274f052SJens Axboe * buffer that you can use to transfer data from one end to the other. 75274f052SJens Axboe * 85274f052SJens Axboe * The traditional unix read/write is extended with a "splice()" operation 95274f052SJens Axboe * that transfers data buffers to or from a pipe buffer. 105274f052SJens Axboe * 115274f052SJens Axboe * Named by Larry McVoy, original implementation from Linus, extended by 125274f052SJens Axboe * Jens to support splicing to files and fixing the initial implementation 135274f052SJens Axboe * bugs. 145274f052SJens Axboe * 155274f052SJens Axboe * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 165274f052SJens Axboe * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 175274f052SJens Axboe * 185274f052SJens Axboe */ 195274f052SJens Axboe #include <linux/fs.h> 205274f052SJens Axboe #include <linux/file.h> 215274f052SJens Axboe #include <linux/pagemap.h> 225274f052SJens Axboe #include <linux/pipe_fs_i.h> 235274f052SJens Axboe #include <linux/mm_inline.h> 245abc97aaSJens Axboe #include <linux/swap.h> 254f6f0bd2SJens Axboe #include <linux/writeback.h> 264f6f0bd2SJens Axboe #include <linux/buffer_head.h> 27a0f06780SJeff Garzik #include <linux/module.h> 284f6f0bd2SJens Axboe #include <linux/syscalls.h> 295274f052SJens Axboe 305274f052SJens Axboe /* 315274f052SJens Axboe * Passed to the actors 325274f052SJens Axboe */ 335274f052SJens Axboe struct splice_desc { 345274f052SJens Axboe unsigned int len, total_len; /* current and remaining length */ 355274f052SJens Axboe unsigned int flags; /* splice flags */ 365274f052SJens Axboe struct file *file; /* file to read/write */ 375274f052SJens Axboe loff_t pos; /* file position */ 385274f052SJens Axboe }; 395274f052SJens Axboe 4083f9135bSJens Axboe /* 4183f9135bSJens Axboe * Attempt to steal a page from a pipe buffer. This should perhaps go into 4283f9135bSJens Axboe * a vm helper function, it's already simplified quite a bit by the 4383f9135bSJens Axboe * addition of remove_mapping(). If success is returned, the caller may 4483f9135bSJens Axboe * attempt to reuse this page for another destination. 4583f9135bSJens Axboe */ 465abc97aaSJens Axboe static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 475abc97aaSJens Axboe struct pipe_buffer *buf) 485abc97aaSJens Axboe { 495abc97aaSJens Axboe struct page *page = buf->page; 504f6f0bd2SJens Axboe struct address_space *mapping = page_mapping(page); 515abc97aaSJens Axboe 525abc97aaSJens Axboe WARN_ON(!PageLocked(page)); 535abc97aaSJens Axboe WARN_ON(!PageUptodate(page)); 545abc97aaSJens Axboe 55ad8d6f0aSJens Axboe /* 56ad8d6f0aSJens Axboe * At least for ext2 with nobh option, we need to wait on writeback 57ad8d6f0aSJens Axboe * completing on this page, since we'll remove it from the pagecache. 58ad8d6f0aSJens Axboe * Otherwise truncate wont wait on the page, allowing the disk 59ad8d6f0aSJens Axboe * blocks to be reused by someone else before we actually wrote our 60ad8d6f0aSJens Axboe * data to them. fs corruption ensues. 61ad8d6f0aSJens Axboe */ 62ad8d6f0aSJens Axboe wait_on_page_writeback(page); 63ad8d6f0aSJens Axboe 644f6f0bd2SJens Axboe if (PagePrivate(page)) 654f6f0bd2SJens Axboe try_to_release_page(page, mapping_gfp_mask(mapping)); 664f6f0bd2SJens Axboe 674f6f0bd2SJens Axboe if (!remove_mapping(mapping, page)) 685abc97aaSJens Axboe return 1; 695abc97aaSJens Axboe 703e7ee3e7SJens Axboe buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 715abc97aaSJens Axboe return 0; 725abc97aaSJens Axboe } 735abc97aaSJens Axboe 745274f052SJens Axboe static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 755274f052SJens Axboe struct pipe_buffer *buf) 765274f052SJens Axboe { 775274f052SJens Axboe page_cache_release(buf->page); 785274f052SJens Axboe buf->page = NULL; 793e7ee3e7SJens Axboe buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 805274f052SJens Axboe } 815274f052SJens Axboe 825274f052SJens Axboe static void *page_cache_pipe_buf_map(struct file *file, 835274f052SJens Axboe struct pipe_inode_info *info, 845274f052SJens Axboe struct pipe_buffer *buf) 855274f052SJens Axboe { 865274f052SJens Axboe struct page *page = buf->page; 8749d0b21bSJens Axboe int err; 885274f052SJens Axboe 895274f052SJens Axboe if (!PageUptodate(page)) { 9049d0b21bSJens Axboe lock_page(page); 915274f052SJens Axboe 9249d0b21bSJens Axboe /* 9349d0b21bSJens Axboe * Page got truncated/unhashed. This will cause a 0-byte 9449d0b21bSJens Axboe * splice, if this is the first page 9549d0b21bSJens Axboe */ 965274f052SJens Axboe if (!page->mapping) { 9749d0b21bSJens Axboe err = -ENODATA; 9849d0b21bSJens Axboe goto error; 995274f052SJens Axboe } 1005274f052SJens Axboe 10149d0b21bSJens Axboe /* 10249d0b21bSJens Axboe * uh oh, read-error from disk 10349d0b21bSJens Axboe */ 10449d0b21bSJens Axboe if (!PageUptodate(page)) { 10549d0b21bSJens Axboe err = -EIO; 10649d0b21bSJens Axboe goto error; 10749d0b21bSJens Axboe } 10849d0b21bSJens Axboe 10949d0b21bSJens Axboe /* 11049d0b21bSJens Axboe * page is ok afterall, fall through to mapping 11149d0b21bSJens Axboe */ 11249d0b21bSJens Axboe unlock_page(page); 11349d0b21bSJens Axboe } 11449d0b21bSJens Axboe 11549d0b21bSJens Axboe return kmap(page); 11649d0b21bSJens Axboe error: 11749d0b21bSJens Axboe unlock_page(page); 11849d0b21bSJens Axboe return ERR_PTR(err); 1195274f052SJens Axboe } 1205274f052SJens Axboe 1215274f052SJens Axboe static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 1225274f052SJens Axboe struct pipe_buffer *buf) 1235274f052SJens Axboe { 1245274f052SJens Axboe kunmap(buf->page); 1255274f052SJens Axboe } 1265274f052SJens Axboe 1275274f052SJens Axboe static struct pipe_buf_operations page_cache_pipe_buf_ops = { 1285274f052SJens Axboe .can_merge = 0, 1295274f052SJens Axboe .map = page_cache_pipe_buf_map, 1305274f052SJens Axboe .unmap = page_cache_pipe_buf_unmap, 1315274f052SJens Axboe .release = page_cache_pipe_buf_release, 1325abc97aaSJens Axboe .steal = page_cache_pipe_buf_steal, 1335274f052SJens Axboe }; 1345274f052SJens Axboe 13583f9135bSJens Axboe /* 13683f9135bSJens Axboe * Pipe output worker. This sets up our pipe format with the page cache 13783f9135bSJens Axboe * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 13883f9135bSJens Axboe */ 1393a326a2cSIngo Molnar static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 1405274f052SJens Axboe int nr_pages, unsigned long offset, 14129e35094SLinus Torvalds unsigned long len, unsigned int flags) 1425274f052SJens Axboe { 1435274f052SJens Axboe int ret, do_wakeup, i; 1445274f052SJens Axboe 1455274f052SJens Axboe ret = 0; 1465274f052SJens Axboe do_wakeup = 0; 1475274f052SJens Axboe i = 0; 1485274f052SJens Axboe 1493a326a2cSIngo Molnar if (pipe->inode) 1503a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 1515274f052SJens Axboe 1525274f052SJens Axboe for (;;) { 1535274f052SJens Axboe int bufs; 1545274f052SJens Axboe 1553a326a2cSIngo Molnar if (!pipe->readers) { 1565274f052SJens Axboe send_sig(SIGPIPE, current, 0); 1575274f052SJens Axboe if (!ret) 1585274f052SJens Axboe ret = -EPIPE; 1595274f052SJens Axboe break; 1605274f052SJens Axboe } 1615274f052SJens Axboe 1623a326a2cSIngo Molnar bufs = pipe->nrbufs; 1635274f052SJens Axboe if (bufs < PIPE_BUFFERS) { 1643a326a2cSIngo Molnar int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS - 1); 1653a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + newbuf; 1665274f052SJens Axboe struct page *page = pages[i++]; 1675274f052SJens Axboe unsigned long this_len; 1685274f052SJens Axboe 1695274f052SJens Axboe this_len = PAGE_CACHE_SIZE - offset; 1705274f052SJens Axboe if (this_len > len) 1715274f052SJens Axboe this_len = len; 1725274f052SJens Axboe 1735274f052SJens Axboe buf->page = page; 1745274f052SJens Axboe buf->offset = offset; 1755274f052SJens Axboe buf->len = this_len; 1765274f052SJens Axboe buf->ops = &page_cache_pipe_buf_ops; 1773a326a2cSIngo Molnar pipe->nrbufs = ++bufs; 1785274f052SJens Axboe do_wakeup = 1; 1795274f052SJens Axboe 1805274f052SJens Axboe ret += this_len; 1815274f052SJens Axboe len -= this_len; 1825274f052SJens Axboe offset = 0; 1835274f052SJens Axboe if (!--nr_pages) 1845274f052SJens Axboe break; 1855274f052SJens Axboe if (!len) 1865274f052SJens Axboe break; 1875274f052SJens Axboe if (bufs < PIPE_BUFFERS) 1885274f052SJens Axboe continue; 1895274f052SJens Axboe 1905274f052SJens Axboe break; 1915274f052SJens Axboe } 1925274f052SJens Axboe 19329e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 19429e35094SLinus Torvalds if (!ret) 19529e35094SLinus Torvalds ret = -EAGAIN; 19629e35094SLinus Torvalds break; 19729e35094SLinus Torvalds } 19829e35094SLinus Torvalds 1995274f052SJens Axboe if (signal_pending(current)) { 2005274f052SJens Axboe if (!ret) 2015274f052SJens Axboe ret = -ERESTARTSYS; 2025274f052SJens Axboe break; 2035274f052SJens Axboe } 2045274f052SJens Axboe 2055274f052SJens Axboe if (do_wakeup) { 206c0bd1f65SJens Axboe smp_mb(); 2073a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2083a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 2093a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2105274f052SJens Axboe do_wakeup = 0; 2115274f052SJens Axboe } 2125274f052SJens Axboe 2133a326a2cSIngo Molnar pipe->waiting_writers++; 2143a326a2cSIngo Molnar pipe_wait(pipe); 2153a326a2cSIngo Molnar pipe->waiting_writers--; 2165274f052SJens Axboe } 2175274f052SJens Axboe 2183a326a2cSIngo Molnar if (pipe->inode) 2193a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 2205274f052SJens Axboe 2215274f052SJens Axboe if (do_wakeup) { 222c0bd1f65SJens Axboe smp_mb(); 2233a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2243a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 2253a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2265274f052SJens Axboe } 2275274f052SJens Axboe 2285274f052SJens Axboe while (i < nr_pages) 2295274f052SJens Axboe page_cache_release(pages[i++]); 2305274f052SJens Axboe 2315274f052SJens Axboe return ret; 2325274f052SJens Axboe } 2335274f052SJens Axboe 2343a326a2cSIngo Molnar static int 2353a326a2cSIngo Molnar __generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 23629e35094SLinus Torvalds size_t len, unsigned int flags) 2375274f052SJens Axboe { 2385274f052SJens Axboe struct address_space *mapping = in->f_mapping; 2395274f052SJens Axboe unsigned int offset, nr_pages; 24016c523ddSJens Axboe struct page *pages[PIPE_BUFFERS]; 2415274f052SJens Axboe struct page *page; 24216c523ddSJens Axboe pgoff_t index; 24316c523ddSJens Axboe int i; 2445274f052SJens Axboe 2455274f052SJens Axboe index = in->f_pos >> PAGE_CACHE_SHIFT; 2465274f052SJens Axboe offset = in->f_pos & ~PAGE_CACHE_MASK; 2475274f052SJens Axboe nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2485274f052SJens Axboe 2495274f052SJens Axboe if (nr_pages > PIPE_BUFFERS) 2505274f052SJens Axboe nr_pages = PIPE_BUFFERS; 2515274f052SJens Axboe 2525274f052SJens Axboe /* 2530b749ce3SJens Axboe * initiate read-ahead on this page range. however, don't call into 2540b749ce3SJens Axboe * read-ahead if this is a non-zero offset (we are likely doing small 2550b749ce3SJens Axboe * chunk splice and the page is already there) for a single page. 2565274f052SJens Axboe */ 2570b749ce3SJens Axboe if (!offset || nr_pages > 1) 2585274f052SJens Axboe do_page_cache_readahead(mapping, in, index, nr_pages); 2595274f052SJens Axboe 2605274f052SJens Axboe /* 2615274f052SJens Axboe * now fill in the holes 2625274f052SJens Axboe */ 26316c523ddSJens Axboe for (i = 0; i < nr_pages; i++, index++) { 2645274f052SJens Axboe /* 2655274f052SJens Axboe * no page there, look one up / create it 2665274f052SJens Axboe */ 26716c523ddSJens Axboe page = find_or_create_page(mapping, index, 2685274f052SJens Axboe mapping_gfp_mask(mapping)); 2695274f052SJens Axboe if (!page) 2705274f052SJens Axboe break; 2715274f052SJens Axboe 2725274f052SJens Axboe if (PageUptodate(page)) 2735274f052SJens Axboe unlock_page(page); 2745274f052SJens Axboe else { 27516c523ddSJens Axboe int error = mapping->a_ops->readpage(in, page); 2765274f052SJens Axboe 2775274f052SJens Axboe if (unlikely(error)) { 2785274f052SJens Axboe page_cache_release(page); 2795274f052SJens Axboe break; 2805274f052SJens Axboe } 2815274f052SJens Axboe } 28216c523ddSJens Axboe pages[i] = page; 2835274f052SJens Axboe } 2845274f052SJens Axboe 28516c523ddSJens Axboe if (i) 28629e35094SLinus Torvalds return move_to_pipe(pipe, pages, i, offset, len, flags); 28716c523ddSJens Axboe 28816c523ddSJens Axboe return 0; 2895274f052SJens Axboe } 2905274f052SJens Axboe 29183f9135bSJens Axboe /** 29283f9135bSJens Axboe * generic_file_splice_read - splice data from file to a pipe 29383f9135bSJens Axboe * @in: file to splice from 29483f9135bSJens Axboe * @pipe: pipe to splice to 29583f9135bSJens Axboe * @len: number of bytes to splice 29683f9135bSJens Axboe * @flags: splice modifier flags 29783f9135bSJens Axboe * 29883f9135bSJens Axboe * Will read pages from given file and fill them into a pipe. 29983f9135bSJens Axboe * 30083f9135bSJens Axboe */ 3013a326a2cSIngo Molnar ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 3025274f052SJens Axboe size_t len, unsigned int flags) 3035274f052SJens Axboe { 3045274f052SJens Axboe ssize_t spliced; 3055274f052SJens Axboe int ret; 3065274f052SJens Axboe 3075274f052SJens Axboe ret = 0; 3085274f052SJens Axboe spliced = 0; 3093a326a2cSIngo Molnar 3105274f052SJens Axboe while (len) { 31129e35094SLinus Torvalds ret = __generic_file_splice_read(in, pipe, len, flags); 3125274f052SJens Axboe 3135274f052SJens Axboe if (ret <= 0) 3145274f052SJens Axboe break; 3155274f052SJens Axboe 3165274f052SJens Axboe in->f_pos += ret; 3175274f052SJens Axboe len -= ret; 3185274f052SJens Axboe spliced += ret; 31929e35094SLinus Torvalds 32029e35094SLinus Torvalds if (!(flags & SPLICE_F_NONBLOCK)) 32129e35094SLinus Torvalds continue; 32229e35094SLinus Torvalds ret = -EAGAIN; 32329e35094SLinus Torvalds break; 3245274f052SJens Axboe } 3255274f052SJens Axboe 3265274f052SJens Axboe if (spliced) 3275274f052SJens Axboe return spliced; 3285274f052SJens Axboe 3295274f052SJens Axboe return ret; 3305274f052SJens Axboe } 3315274f052SJens Axboe 332059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_read); 333059a8f37SJens Axboe 3345274f052SJens Axboe /* 3354f6f0bd2SJens Axboe * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 3364f6f0bd2SJens Axboe * using sendpage(). 3375274f052SJens Axboe */ 3385274f052SJens Axboe static int pipe_to_sendpage(struct pipe_inode_info *info, 3395274f052SJens Axboe struct pipe_buffer *buf, struct splice_desc *sd) 3405274f052SJens Axboe { 3415274f052SJens Axboe struct file *file = sd->file; 3425274f052SJens Axboe loff_t pos = sd->pos; 3435274f052SJens Axboe unsigned int offset; 3445274f052SJens Axboe ssize_t ret; 3455274f052SJens Axboe void *ptr; 346b2b39fa4SJens Axboe int more; 3475274f052SJens Axboe 3485274f052SJens Axboe /* 3495274f052SJens Axboe * sub-optimal, but we are limited by the pipe ->map. we don't 3505274f052SJens Axboe * need a kmap'ed buffer here, we just want to make sure we 3515274f052SJens Axboe * have the page pinned if the pipe page originates from the 3525274f052SJens Axboe * page cache 3535274f052SJens Axboe */ 3545274f052SJens Axboe ptr = buf->ops->map(file, info, buf); 3555274f052SJens Axboe if (IS_ERR(ptr)) 3565274f052SJens Axboe return PTR_ERR(ptr); 3575274f052SJens Axboe 3585274f052SJens Axboe offset = pos & ~PAGE_CACHE_MASK; 359b2b39fa4SJens Axboe more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 3605274f052SJens Axboe 361b2b39fa4SJens Axboe ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 3625274f052SJens Axboe 3635274f052SJens Axboe buf->ops->unmap(info, buf); 3645274f052SJens Axboe if (ret == sd->len) 3655274f052SJens Axboe return 0; 3665274f052SJens Axboe 3675274f052SJens Axboe return -EIO; 3685274f052SJens Axboe } 3695274f052SJens Axboe 3705274f052SJens Axboe /* 3715274f052SJens Axboe * This is a little more tricky than the file -> pipe splicing. There are 3725274f052SJens Axboe * basically three cases: 3735274f052SJens Axboe * 3745274f052SJens Axboe * - Destination page already exists in the address space and there 3755274f052SJens Axboe * are users of it. For that case we have no other option that 3765274f052SJens Axboe * copying the data. Tough luck. 3775274f052SJens Axboe * - Destination page already exists in the address space, but there 3785274f052SJens Axboe * are no users of it. Make sure it's uptodate, then drop it. Fall 3795274f052SJens Axboe * through to last case. 3805274f052SJens Axboe * - Destination page does not exist, we can add the pipe page to 3815274f052SJens Axboe * the page cache and avoid the copy. 3825274f052SJens Axboe * 38383f9135bSJens Axboe * If asked to move pages to the output file (SPLICE_F_MOVE is set in 38483f9135bSJens Axboe * sd->flags), we attempt to migrate pages from the pipe to the output 38583f9135bSJens Axboe * file address space page cache. This is possible if no one else has 38683f9135bSJens Axboe * the pipe page referenced outside of the pipe and page cache. If 38783f9135bSJens Axboe * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 38883f9135bSJens Axboe * a new page in the output file page cache and fill/dirty that. 3895274f052SJens Axboe */ 3905274f052SJens Axboe static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 3915274f052SJens Axboe struct splice_desc *sd) 3925274f052SJens Axboe { 3935274f052SJens Axboe struct file *file = sd->file; 3945274f052SJens Axboe struct address_space *mapping = file->f_mapping; 3953e7ee3e7SJens Axboe gfp_t gfp_mask = mapping_gfp_mask(mapping); 3965274f052SJens Axboe unsigned int offset; 3975274f052SJens Axboe struct page *page; 3985274f052SJens Axboe pgoff_t index; 3995abc97aaSJens Axboe char *src; 4003e7ee3e7SJens Axboe int ret; 4015274f052SJens Axboe 4025274f052SJens Axboe /* 40349d0b21bSJens Axboe * make sure the data in this buffer is uptodate 4045274f052SJens Axboe */ 4055274f052SJens Axboe src = buf->ops->map(file, info, buf); 4065274f052SJens Axboe if (IS_ERR(src)) 4075274f052SJens Axboe return PTR_ERR(src); 4085274f052SJens Axboe 4095274f052SJens Axboe index = sd->pos >> PAGE_CACHE_SHIFT; 4105274f052SJens Axboe offset = sd->pos & ~PAGE_CACHE_MASK; 4115274f052SJens Axboe 4125abc97aaSJens Axboe /* 4135abc97aaSJens Axboe * reuse buf page, if SPLICE_F_MOVE is set 4145abc97aaSJens Axboe */ 4155abc97aaSJens Axboe if (sd->flags & SPLICE_F_MOVE) { 41683f9135bSJens Axboe /* 41783f9135bSJens Axboe * If steal succeeds, buf->page is now pruned from the vm 41883f9135bSJens Axboe * side (LRU and page cache) and we can reuse it. 41983f9135bSJens Axboe */ 4205abc97aaSJens Axboe if (buf->ops->steal(info, buf)) 4215abc97aaSJens Axboe goto find_page; 4225abc97aaSJens Axboe 42349d0b21bSJens Axboe /* 42449d0b21bSJens Axboe * this will also set the page locked 42549d0b21bSJens Axboe */ 4265abc97aaSJens Axboe page = buf->page; 4273e7ee3e7SJens Axboe if (add_to_page_cache(page, mapping, index, gfp_mask)) 4285abc97aaSJens Axboe goto find_page; 4293e7ee3e7SJens Axboe 4303e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 4313e7ee3e7SJens Axboe lru_cache_add(page); 4325abc97aaSJens Axboe } else { 4335274f052SJens Axboe find_page: 4345274f052SJens Axboe ret = -ENOMEM; 4353e7ee3e7SJens Axboe page = find_or_create_page(mapping, index, gfp_mask); 4365274f052SJens Axboe if (!page) 4379aefe431SDave Jones goto out_nomem; 4385274f052SJens Axboe 4395274f052SJens Axboe /* 4405274f052SJens Axboe * If the page is uptodate, it is also locked. If it isn't 4415274f052SJens Axboe * uptodate, we can mark it uptodate if we are filling the 4425274f052SJens Axboe * full page. Otherwise we need to read it in first... 4435274f052SJens Axboe */ 4445274f052SJens Axboe if (!PageUptodate(page)) { 4455274f052SJens Axboe if (sd->len < PAGE_CACHE_SIZE) { 4465274f052SJens Axboe ret = mapping->a_ops->readpage(file, page); 4475274f052SJens Axboe if (unlikely(ret)) 4485274f052SJens Axboe goto out; 4495274f052SJens Axboe 4505274f052SJens Axboe lock_page(page); 4515274f052SJens Axboe 4525274f052SJens Axboe if (!PageUptodate(page)) { 4535274f052SJens Axboe /* 4545274f052SJens Axboe * page got invalidated, repeat 4555274f052SJens Axboe */ 4565274f052SJens Axboe if (!page->mapping) { 4575274f052SJens Axboe unlock_page(page); 4585274f052SJens Axboe page_cache_release(page); 4595274f052SJens Axboe goto find_page; 4605274f052SJens Axboe } 4615274f052SJens Axboe ret = -EIO; 4625274f052SJens Axboe goto out; 4635274f052SJens Axboe } 4645274f052SJens Axboe } else { 4655274f052SJens Axboe WARN_ON(!PageLocked(page)); 4665274f052SJens Axboe SetPageUptodate(page); 4675274f052SJens Axboe } 4685274f052SJens Axboe } 4695abc97aaSJens Axboe } 4705274f052SJens Axboe 4715274f052SJens Axboe ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 4724f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 4734f6f0bd2SJens Axboe page_cache_release(page); 4744f6f0bd2SJens Axboe goto find_page; 4754f6f0bd2SJens Axboe } else if (ret) 4765274f052SJens Axboe goto out; 4775274f052SJens Axboe 4783e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 4795abc97aaSJens Axboe char *dst = kmap_atomic(page, KM_USER0); 4805abc97aaSJens Axboe 4815274f052SJens Axboe memcpy(dst + offset, src + buf->offset, sd->len); 4825274f052SJens Axboe flush_dcache_page(page); 4835274f052SJens Axboe kunmap_atomic(dst, KM_USER0); 4845abc97aaSJens Axboe } 4855274f052SJens Axboe 4865274f052SJens Axboe ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 4874f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 4884f6f0bd2SJens Axboe page_cache_release(page); 4894f6f0bd2SJens Axboe goto find_page; 4904f6f0bd2SJens Axboe } else if (ret) 4915274f052SJens Axboe goto out; 4925274f052SJens Axboe 493c7f21e4fSJens Axboe mark_page_accessed(page); 4944f6f0bd2SJens Axboe balance_dirty_pages_ratelimited(mapping); 4955274f052SJens Axboe out: 4963e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 4975274f052SJens Axboe page_cache_release(page); 4984f6f0bd2SJens Axboe unlock_page(page); 4994f6f0bd2SJens Axboe } 5009aefe431SDave Jones out_nomem: 5015274f052SJens Axboe buf->ops->unmap(info, buf); 5025274f052SJens Axboe return ret; 5035274f052SJens Axboe } 5045274f052SJens Axboe 5055274f052SJens Axboe typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 5065274f052SJens Axboe struct splice_desc *); 5075274f052SJens Axboe 50883f9135bSJens Axboe /* 50983f9135bSJens Axboe * Pipe input worker. Most of this logic works like a regular pipe, the 51083f9135bSJens Axboe * key here is the 'actor' worker passed in that actually moves the data 51183f9135bSJens Axboe * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 51283f9135bSJens Axboe */ 5133a326a2cSIngo Molnar static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 5145274f052SJens Axboe size_t len, unsigned int flags, 5155274f052SJens Axboe splice_actor *actor) 5165274f052SJens Axboe { 5175274f052SJens Axboe int ret, do_wakeup, err; 5185274f052SJens Axboe struct splice_desc sd; 5195274f052SJens Axboe 5205274f052SJens Axboe ret = 0; 5215274f052SJens Axboe do_wakeup = 0; 5225274f052SJens Axboe 5235274f052SJens Axboe sd.total_len = len; 5245274f052SJens Axboe sd.flags = flags; 5255274f052SJens Axboe sd.file = out; 5265274f052SJens Axboe sd.pos = out->f_pos; 5275274f052SJens Axboe 5283a326a2cSIngo Molnar if (pipe->inode) 5293a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 5305274f052SJens Axboe 5315274f052SJens Axboe for (;;) { 5323a326a2cSIngo Molnar int bufs = pipe->nrbufs; 5335274f052SJens Axboe 5345274f052SJens Axboe if (bufs) { 5353a326a2cSIngo Molnar int curbuf = pipe->curbuf; 5363a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + curbuf; 5375274f052SJens Axboe struct pipe_buf_operations *ops = buf->ops; 5385274f052SJens Axboe 5395274f052SJens Axboe sd.len = buf->len; 5405274f052SJens Axboe if (sd.len > sd.total_len) 5415274f052SJens Axboe sd.len = sd.total_len; 5425274f052SJens Axboe 5433a326a2cSIngo Molnar err = actor(pipe, buf, &sd); 5445274f052SJens Axboe if (err) { 5455274f052SJens Axboe if (!ret && err != -ENODATA) 5465274f052SJens Axboe ret = err; 5475274f052SJens Axboe 5485274f052SJens Axboe break; 5495274f052SJens Axboe } 5505274f052SJens Axboe 5515274f052SJens Axboe ret += sd.len; 5525274f052SJens Axboe buf->offset += sd.len; 5535274f052SJens Axboe buf->len -= sd.len; 5545274f052SJens Axboe if (!buf->len) { 5555274f052SJens Axboe buf->ops = NULL; 5563a326a2cSIngo Molnar ops->release(pipe, buf); 5575274f052SJens Axboe curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 5583a326a2cSIngo Molnar pipe->curbuf = curbuf; 5593a326a2cSIngo Molnar pipe->nrbufs = --bufs; 5605274f052SJens Axboe do_wakeup = 1; 5615274f052SJens Axboe } 5625274f052SJens Axboe 5635274f052SJens Axboe sd.pos += sd.len; 5645274f052SJens Axboe sd.total_len -= sd.len; 5655274f052SJens Axboe if (!sd.total_len) 5665274f052SJens Axboe break; 5675274f052SJens Axboe } 5685274f052SJens Axboe 5695274f052SJens Axboe if (bufs) 5705274f052SJens Axboe continue; 5713a326a2cSIngo Molnar if (!pipe->writers) 5725274f052SJens Axboe break; 5733a326a2cSIngo Molnar if (!pipe->waiting_writers) { 5745274f052SJens Axboe if (ret) 5755274f052SJens Axboe break; 5765274f052SJens Axboe } 5775274f052SJens Axboe 57829e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 57929e35094SLinus Torvalds if (!ret) 58029e35094SLinus Torvalds ret = -EAGAIN; 58129e35094SLinus Torvalds break; 58229e35094SLinus Torvalds } 58329e35094SLinus Torvalds 5845274f052SJens Axboe if (signal_pending(current)) { 5855274f052SJens Axboe if (!ret) 5865274f052SJens Axboe ret = -ERESTARTSYS; 5875274f052SJens Axboe break; 5885274f052SJens Axboe } 5895274f052SJens Axboe 5905274f052SJens Axboe if (do_wakeup) { 591c0bd1f65SJens Axboe smp_mb(); 5923a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 5933a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 5943a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 5955274f052SJens Axboe do_wakeup = 0; 5965274f052SJens Axboe } 5975274f052SJens Axboe 5983a326a2cSIngo Molnar pipe_wait(pipe); 5995274f052SJens Axboe } 6005274f052SJens Axboe 6013a326a2cSIngo Molnar if (pipe->inode) 6023a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 6035274f052SJens Axboe 6045274f052SJens Axboe if (do_wakeup) { 605c0bd1f65SJens Axboe smp_mb(); 6063a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6073a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 6083a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6095274f052SJens Axboe } 6105274f052SJens Axboe 6115274f052SJens Axboe mutex_lock(&out->f_mapping->host->i_mutex); 6125274f052SJens Axboe out->f_pos = sd.pos; 6135274f052SJens Axboe mutex_unlock(&out->f_mapping->host->i_mutex); 6145274f052SJens Axboe return ret; 6155274f052SJens Axboe 6165274f052SJens Axboe } 6175274f052SJens Axboe 61883f9135bSJens Axboe /** 61983f9135bSJens Axboe * generic_file_splice_write - splice data from a pipe to a file 6203a326a2cSIngo Molnar * @pipe: pipe info 62183f9135bSJens Axboe * @out: file to write to 62283f9135bSJens Axboe * @len: number of bytes to splice 62383f9135bSJens Axboe * @flags: splice modifier flags 62483f9135bSJens Axboe * 62583f9135bSJens Axboe * Will either move or copy pages (determined by @flags options) from 62683f9135bSJens Axboe * the given pipe inode to the given file. 62783f9135bSJens Axboe * 62883f9135bSJens Axboe */ 6293a326a2cSIngo Molnar ssize_t 6303a326a2cSIngo Molnar generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 6315274f052SJens Axboe size_t len, unsigned int flags) 6325274f052SJens Axboe { 6334f6f0bd2SJens Axboe struct address_space *mapping = out->f_mapping; 6343a326a2cSIngo Molnar ssize_t ret; 6353a326a2cSIngo Molnar 6363a326a2cSIngo Molnar ret = move_from_pipe(pipe, out, len, flags, pipe_to_file); 6374f6f0bd2SJens Axboe 6384f6f0bd2SJens Axboe /* 6394f6f0bd2SJens Axboe * if file or inode is SYNC and we actually wrote some data, sync it 6404f6f0bd2SJens Axboe */ 6414f6f0bd2SJens Axboe if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 6424f6f0bd2SJens Axboe && ret > 0) { 6434f6f0bd2SJens Axboe struct inode *inode = mapping->host; 6444f6f0bd2SJens Axboe int err; 6454f6f0bd2SJens Axboe 6464f6f0bd2SJens Axboe mutex_lock(&inode->i_mutex); 6474f6f0bd2SJens Axboe err = generic_osync_inode(mapping->host, mapping, 6484f6f0bd2SJens Axboe OSYNC_METADATA|OSYNC_DATA); 6494f6f0bd2SJens Axboe mutex_unlock(&inode->i_mutex); 6504f6f0bd2SJens Axboe 6514f6f0bd2SJens Axboe if (err) 6524f6f0bd2SJens Axboe ret = err; 6534f6f0bd2SJens Axboe } 6544f6f0bd2SJens Axboe 6554f6f0bd2SJens Axboe return ret; 6565274f052SJens Axboe } 6575274f052SJens Axboe 658059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_write); 659059a8f37SJens Axboe 66083f9135bSJens Axboe /** 66183f9135bSJens Axboe * generic_splice_sendpage - splice data from a pipe to a socket 66283f9135bSJens Axboe * @inode: pipe inode 66383f9135bSJens Axboe * @out: socket to write to 66483f9135bSJens Axboe * @len: number of bytes to splice 66583f9135bSJens Axboe * @flags: splice modifier flags 66683f9135bSJens Axboe * 66783f9135bSJens Axboe * Will send @len bytes from the pipe to a network socket. No data copying 66883f9135bSJens Axboe * is involved. 66983f9135bSJens Axboe * 67083f9135bSJens Axboe */ 6713a326a2cSIngo Molnar ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 6725274f052SJens Axboe size_t len, unsigned int flags) 6735274f052SJens Axboe { 6743a326a2cSIngo Molnar return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage); 6755274f052SJens Axboe } 6765274f052SJens Axboe 677059a8f37SJens Axboe EXPORT_SYMBOL(generic_splice_sendpage); 678a0f06780SJeff Garzik 67983f9135bSJens Axboe /* 68083f9135bSJens Axboe * Attempt to initiate a splice from pipe to file. 68183f9135bSJens Axboe */ 6823a326a2cSIngo Molnar static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 683529565dcSIngo Molnar loff_t __user *off_out, size_t len, 684529565dcSIngo Molnar unsigned int flags) 6855274f052SJens Axboe { 6865274f052SJens Axboe loff_t pos; 6875274f052SJens Axboe int ret; 6885274f052SJens Axboe 6895274f052SJens Axboe if (!out->f_op || !out->f_op->splice_write) 6905274f052SJens Axboe return -EINVAL; 6915274f052SJens Axboe 6925274f052SJens Axboe if (!(out->f_mode & FMODE_WRITE)) 6935274f052SJens Axboe return -EBADF; 6945274f052SJens Axboe 695529565dcSIngo Molnar if (off_out && copy_from_user(&out->f_pos, off_out, sizeof(loff_t))) 696529565dcSIngo Molnar return -EFAULT; 697529565dcSIngo Molnar 6985274f052SJens Axboe pos = out->f_pos; 699529565dcSIngo Molnar 7005274f052SJens Axboe ret = rw_verify_area(WRITE, out, &pos, len); 7015274f052SJens Axboe if (unlikely(ret < 0)) 7025274f052SJens Axboe return ret; 7035274f052SJens Axboe 7045274f052SJens Axboe return out->f_op->splice_write(pipe, out, len, flags); 7055274f052SJens Axboe } 7065274f052SJens Axboe 70783f9135bSJens Axboe /* 70883f9135bSJens Axboe * Attempt to initiate a splice from a file to a pipe. 70983f9135bSJens Axboe */ 710529565dcSIngo Molnar static long do_splice_to(struct file *in, loff_t __user *off_in, 711529565dcSIngo Molnar struct pipe_inode_info *pipe, size_t len, 712529565dcSIngo Molnar unsigned int flags) 7135274f052SJens Axboe { 7145274f052SJens Axboe loff_t pos, isize, left; 7155274f052SJens Axboe int ret; 7165274f052SJens Axboe 7175274f052SJens Axboe if (!in->f_op || !in->f_op->splice_read) 7185274f052SJens Axboe return -EINVAL; 7195274f052SJens Axboe 7205274f052SJens Axboe if (!(in->f_mode & FMODE_READ)) 7215274f052SJens Axboe return -EBADF; 7225274f052SJens Axboe 723529565dcSIngo Molnar if (off_in && copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) 724529565dcSIngo Molnar return -EFAULT; 725529565dcSIngo Molnar 7265274f052SJens Axboe pos = in->f_pos; 727529565dcSIngo Molnar 7285274f052SJens Axboe ret = rw_verify_area(READ, in, &pos, len); 7295274f052SJens Axboe if (unlikely(ret < 0)) 7305274f052SJens Axboe return ret; 7315274f052SJens Axboe 7325274f052SJens Axboe isize = i_size_read(in->f_mapping->host); 7335274f052SJens Axboe if (unlikely(in->f_pos >= isize)) 7345274f052SJens Axboe return 0; 7355274f052SJens Axboe 7365274f052SJens Axboe left = isize - in->f_pos; 7375274f052SJens Axboe if (left < len) 7385274f052SJens Axboe len = left; 7395274f052SJens Axboe 7405274f052SJens Axboe return in->f_op->splice_read(in, pipe, len, flags); 7415274f052SJens Axboe } 7425274f052SJens Axboe 74383f9135bSJens Axboe /* 74483f9135bSJens Axboe * Determine where to splice to/from. 74583f9135bSJens Axboe */ 746529565dcSIngo Molnar static long do_splice(struct file *in, loff_t __user *off_in, 747529565dcSIngo Molnar struct file *out, loff_t __user *off_out, 748529565dcSIngo Molnar size_t len, unsigned int flags) 7495274f052SJens Axboe { 7503a326a2cSIngo Molnar struct pipe_inode_info *pipe; 7515274f052SJens Axboe 752529565dcSIngo Molnar if (off_out && out->f_op->llseek == no_llseek) 753529565dcSIngo Molnar return -EINVAL; 754529565dcSIngo Molnar if (off_in && in->f_op->llseek == no_llseek) 755529565dcSIngo Molnar return -EINVAL; 756529565dcSIngo Molnar 7573a326a2cSIngo Molnar pipe = in->f_dentry->d_inode->i_pipe; 758529565dcSIngo Molnar if (pipe) { 759529565dcSIngo Molnar if (off_in) 760529565dcSIngo Molnar return -ESPIPE; 761529565dcSIngo Molnar 762529565dcSIngo Molnar return do_splice_from(pipe, out, off_out, len, flags); 763529565dcSIngo Molnar } 7645274f052SJens Axboe 7653a326a2cSIngo Molnar pipe = out->f_dentry->d_inode->i_pipe; 766529565dcSIngo Molnar if (pipe) { 767529565dcSIngo Molnar if (off_out) 768529565dcSIngo Molnar return -ESPIPE; 769529565dcSIngo Molnar 770529565dcSIngo Molnar return do_splice_to(in, off_in, pipe, len, flags); 771529565dcSIngo Molnar } 7725274f052SJens Axboe 7735274f052SJens Axboe return -EINVAL; 7745274f052SJens Axboe } 7755274f052SJens Axboe 776529565dcSIngo Molnar asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 777529565dcSIngo Molnar int fd_out, loff_t __user *off_out, 778529565dcSIngo Molnar size_t len, unsigned int flags) 7795274f052SJens Axboe { 7805274f052SJens Axboe long error; 7815274f052SJens Axboe struct file *in, *out; 7825274f052SJens Axboe int fput_in, fput_out; 7835274f052SJens Axboe 7845274f052SJens Axboe if (unlikely(!len)) 7855274f052SJens Axboe return 0; 7865274f052SJens Axboe 7875274f052SJens Axboe error = -EBADF; 788529565dcSIngo Molnar in = fget_light(fd_in, &fput_in); 7895274f052SJens Axboe if (in) { 7905274f052SJens Axboe if (in->f_mode & FMODE_READ) { 791529565dcSIngo Molnar out = fget_light(fd_out, &fput_out); 7925274f052SJens Axboe if (out) { 7935274f052SJens Axboe if (out->f_mode & FMODE_WRITE) 794529565dcSIngo Molnar error = do_splice(in, off_in, 795529565dcSIngo Molnar out, off_out, 796529565dcSIngo Molnar len, flags); 7975274f052SJens Axboe fput_light(out, fput_out); 7985274f052SJens Axboe } 7995274f052SJens Axboe } 8005274f052SJens Axboe 8015274f052SJens Axboe fput_light(in, fput_in); 8025274f052SJens Axboe } 8035274f052SJens Axboe 8045274f052SJens Axboe return error; 8055274f052SJens Axboe } 806