15274f052SJens Axboe /* 25274f052SJens Axboe * "splice": joining two ropes together by interweaving their strands. 35274f052SJens Axboe * 45274f052SJens Axboe * This is the "extended pipe" functionality, where a pipe is used as 55274f052SJens Axboe * an arbitrary in-memory buffer. Think of a pipe as a small kernel 65274f052SJens Axboe * buffer that you can use to transfer data from one end to the other. 75274f052SJens Axboe * 85274f052SJens Axboe * The traditional unix read/write is extended with a "splice()" operation 95274f052SJens Axboe * that transfers data buffers to or from a pipe buffer. 105274f052SJens Axboe * 115274f052SJens Axboe * Named by Larry McVoy, original implementation from Linus, extended by 125274f052SJens Axboe * Jens to support splicing to files and fixing the initial implementation 135274f052SJens Axboe * bugs. 145274f052SJens Axboe * 155274f052SJens Axboe * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 165274f052SJens Axboe * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 175274f052SJens Axboe * 185274f052SJens Axboe */ 195274f052SJens Axboe #include <linux/fs.h> 205274f052SJens Axboe #include <linux/file.h> 215274f052SJens Axboe #include <linux/pagemap.h> 225274f052SJens Axboe #include <linux/pipe_fs_i.h> 235274f052SJens Axboe #include <linux/mm_inline.h> 245abc97aaSJens Axboe #include <linux/swap.h> 254f6f0bd2SJens Axboe #include <linux/writeback.h> 264f6f0bd2SJens Axboe #include <linux/buffer_head.h> 27a0f06780SJeff Garzik #include <linux/module.h> 284f6f0bd2SJens Axboe #include <linux/syscalls.h> 295274f052SJens Axboe 305274f052SJens Axboe /* 315274f052SJens Axboe * Passed to the actors 325274f052SJens Axboe */ 335274f052SJens Axboe struct splice_desc { 345274f052SJens Axboe unsigned int len, total_len; /* current and remaining length */ 355274f052SJens Axboe unsigned int flags; /* splice flags */ 365274f052SJens Axboe struct file *file; /* file to read/write */ 375274f052SJens Axboe loff_t pos; /* file position */ 385274f052SJens Axboe }; 395274f052SJens Axboe 4083f9135bSJens Axboe /* 4183f9135bSJens Axboe * Attempt to steal a page from a pipe buffer. This should perhaps go into 4283f9135bSJens Axboe * a vm helper function, it's already simplified quite a bit by the 4383f9135bSJens Axboe * addition of remove_mapping(). If success is returned, the caller may 4483f9135bSJens Axboe * attempt to reuse this page for another destination. 4583f9135bSJens Axboe */ 465abc97aaSJens Axboe static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 475abc97aaSJens Axboe struct pipe_buffer *buf) 485abc97aaSJens Axboe { 495abc97aaSJens Axboe struct page *page = buf->page; 504f6f0bd2SJens Axboe struct address_space *mapping = page_mapping(page); 515abc97aaSJens Axboe 525abc97aaSJens Axboe WARN_ON(!PageLocked(page)); 535abc97aaSJens Axboe WARN_ON(!PageUptodate(page)); 545abc97aaSJens Axboe 55ad8d6f0aSJens Axboe /* 56ad8d6f0aSJens Axboe * At least for ext2 with nobh option, we need to wait on writeback 57ad8d6f0aSJens Axboe * completing on this page, since we'll remove it from the pagecache. 58ad8d6f0aSJens Axboe * Otherwise truncate wont wait on the page, allowing the disk 59ad8d6f0aSJens Axboe * blocks to be reused by someone else before we actually wrote our 60ad8d6f0aSJens Axboe * data to them. fs corruption ensues. 61ad8d6f0aSJens Axboe */ 62ad8d6f0aSJens Axboe wait_on_page_writeback(page); 63ad8d6f0aSJens Axboe 644f6f0bd2SJens Axboe if (PagePrivate(page)) 654f6f0bd2SJens Axboe try_to_release_page(page, mapping_gfp_mask(mapping)); 664f6f0bd2SJens Axboe 674f6f0bd2SJens Axboe if (!remove_mapping(mapping, page)) 685abc97aaSJens Axboe return 1; 695abc97aaSJens Axboe 703e7ee3e7SJens Axboe buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 715abc97aaSJens Axboe return 0; 725abc97aaSJens Axboe } 735abc97aaSJens Axboe 745274f052SJens Axboe static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 755274f052SJens Axboe struct pipe_buffer *buf) 765274f052SJens Axboe { 775274f052SJens Axboe page_cache_release(buf->page); 785274f052SJens Axboe buf->page = NULL; 793e7ee3e7SJens Axboe buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 805274f052SJens Axboe } 815274f052SJens Axboe 825274f052SJens Axboe static void *page_cache_pipe_buf_map(struct file *file, 835274f052SJens Axboe struct pipe_inode_info *info, 845274f052SJens Axboe struct pipe_buffer *buf) 855274f052SJens Axboe { 865274f052SJens Axboe struct page *page = buf->page; 8749d0b21bSJens Axboe int err; 885274f052SJens Axboe 895274f052SJens Axboe if (!PageUptodate(page)) { 9049d0b21bSJens Axboe lock_page(page); 915274f052SJens Axboe 9249d0b21bSJens Axboe /* 9349d0b21bSJens Axboe * Page got truncated/unhashed. This will cause a 0-byte 9449d0b21bSJens Axboe * splice, if this is the first page 9549d0b21bSJens Axboe */ 965274f052SJens Axboe if (!page->mapping) { 9749d0b21bSJens Axboe err = -ENODATA; 9849d0b21bSJens Axboe goto error; 995274f052SJens Axboe } 1005274f052SJens Axboe 10149d0b21bSJens Axboe /* 10249d0b21bSJens Axboe * uh oh, read-error from disk 10349d0b21bSJens Axboe */ 10449d0b21bSJens Axboe if (!PageUptodate(page)) { 10549d0b21bSJens Axboe err = -EIO; 10649d0b21bSJens Axboe goto error; 10749d0b21bSJens Axboe } 10849d0b21bSJens Axboe 10949d0b21bSJens Axboe /* 11049d0b21bSJens Axboe * page is ok afterall, fall through to mapping 11149d0b21bSJens Axboe */ 11249d0b21bSJens Axboe unlock_page(page); 11349d0b21bSJens Axboe } 11449d0b21bSJens Axboe 11549d0b21bSJens Axboe return kmap(page); 11649d0b21bSJens Axboe error: 11749d0b21bSJens Axboe unlock_page(page); 11849d0b21bSJens Axboe return ERR_PTR(err); 1195274f052SJens Axboe } 1205274f052SJens Axboe 1215274f052SJens Axboe static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 1225274f052SJens Axboe struct pipe_buffer *buf) 1235274f052SJens Axboe { 1245274f052SJens Axboe kunmap(buf->page); 1255274f052SJens Axboe } 1265274f052SJens Axboe 1275274f052SJens Axboe static struct pipe_buf_operations page_cache_pipe_buf_ops = { 1285274f052SJens Axboe .can_merge = 0, 1295274f052SJens Axboe .map = page_cache_pipe_buf_map, 1305274f052SJens Axboe .unmap = page_cache_pipe_buf_unmap, 1315274f052SJens Axboe .release = page_cache_pipe_buf_release, 1325abc97aaSJens Axboe .steal = page_cache_pipe_buf_steal, 1335274f052SJens Axboe }; 1345274f052SJens Axboe 13583f9135bSJens Axboe /* 13683f9135bSJens Axboe * Pipe output worker. This sets up our pipe format with the page cache 13783f9135bSJens Axboe * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 13883f9135bSJens Axboe */ 1393a326a2cSIngo Molnar static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 1405274f052SJens Axboe int nr_pages, unsigned long offset, 14129e35094SLinus Torvalds unsigned long len, unsigned int flags) 1425274f052SJens Axboe { 1435274f052SJens Axboe int ret, do_wakeup, i; 1445274f052SJens Axboe 1455274f052SJens Axboe ret = 0; 1465274f052SJens Axboe do_wakeup = 0; 1475274f052SJens Axboe i = 0; 1485274f052SJens Axboe 1493a326a2cSIngo Molnar if (pipe->inode) 1503a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 1515274f052SJens Axboe 1525274f052SJens Axboe for (;;) { 1535274f052SJens Axboe int bufs; 1545274f052SJens Axboe 1553a326a2cSIngo Molnar if (!pipe->readers) { 1565274f052SJens Axboe send_sig(SIGPIPE, current, 0); 1575274f052SJens Axboe if (!ret) 1585274f052SJens Axboe ret = -EPIPE; 1595274f052SJens Axboe break; 1605274f052SJens Axboe } 1615274f052SJens Axboe 1623a326a2cSIngo Molnar bufs = pipe->nrbufs; 1635274f052SJens Axboe if (bufs < PIPE_BUFFERS) { 1643a326a2cSIngo Molnar int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS - 1); 1653a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + newbuf; 1665274f052SJens Axboe struct page *page = pages[i++]; 1675274f052SJens Axboe unsigned long this_len; 1685274f052SJens Axboe 1695274f052SJens Axboe this_len = PAGE_CACHE_SIZE - offset; 1705274f052SJens Axboe if (this_len > len) 1715274f052SJens Axboe this_len = len; 1725274f052SJens Axboe 1735274f052SJens Axboe buf->page = page; 1745274f052SJens Axboe buf->offset = offset; 1755274f052SJens Axboe buf->len = this_len; 1765274f052SJens Axboe buf->ops = &page_cache_pipe_buf_ops; 1773a326a2cSIngo Molnar pipe->nrbufs = ++bufs; 1785274f052SJens Axboe do_wakeup = 1; 1795274f052SJens Axboe 1805274f052SJens Axboe ret += this_len; 1815274f052SJens Axboe len -= this_len; 1825274f052SJens Axboe offset = 0; 1835274f052SJens Axboe if (!--nr_pages) 1845274f052SJens Axboe break; 1855274f052SJens Axboe if (!len) 1865274f052SJens Axboe break; 1875274f052SJens Axboe if (bufs < PIPE_BUFFERS) 1885274f052SJens Axboe continue; 1895274f052SJens Axboe 1905274f052SJens Axboe break; 1915274f052SJens Axboe } 1925274f052SJens Axboe 19329e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 19429e35094SLinus Torvalds if (!ret) 19529e35094SLinus Torvalds ret = -EAGAIN; 19629e35094SLinus Torvalds break; 19729e35094SLinus Torvalds } 19829e35094SLinus Torvalds 1995274f052SJens Axboe if (signal_pending(current)) { 2005274f052SJens Axboe if (!ret) 2015274f052SJens Axboe ret = -ERESTARTSYS; 2025274f052SJens Axboe break; 2035274f052SJens Axboe } 2045274f052SJens Axboe 2055274f052SJens Axboe if (do_wakeup) { 206c0bd1f65SJens Axboe smp_mb(); 2073a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2083a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 2093a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2105274f052SJens Axboe do_wakeup = 0; 2115274f052SJens Axboe } 2125274f052SJens Axboe 2133a326a2cSIngo Molnar pipe->waiting_writers++; 2143a326a2cSIngo Molnar pipe_wait(pipe); 2153a326a2cSIngo Molnar pipe->waiting_writers--; 2165274f052SJens Axboe } 2175274f052SJens Axboe 2183a326a2cSIngo Molnar if (pipe->inode) 2193a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 2205274f052SJens Axboe 2215274f052SJens Axboe if (do_wakeup) { 222c0bd1f65SJens Axboe smp_mb(); 2233a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2243a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 2253a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2265274f052SJens Axboe } 2275274f052SJens Axboe 2285274f052SJens Axboe while (i < nr_pages) 2295274f052SJens Axboe page_cache_release(pages[i++]); 2305274f052SJens Axboe 2315274f052SJens Axboe return ret; 2325274f052SJens Axboe } 2335274f052SJens Axboe 2343a326a2cSIngo Molnar static int 2353a326a2cSIngo Molnar __generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 23629e35094SLinus Torvalds size_t len, unsigned int flags) 2375274f052SJens Axboe { 2385274f052SJens Axboe struct address_space *mapping = in->f_mapping; 2395274f052SJens Axboe unsigned int offset, nr_pages; 24016c523ddSJens Axboe struct page *pages[PIPE_BUFFERS]; 2415274f052SJens Axboe struct page *page; 24216c523ddSJens Axboe pgoff_t index; 2437480a904SJens Axboe int i, error; 2445274f052SJens Axboe 2455274f052SJens Axboe index = in->f_pos >> PAGE_CACHE_SHIFT; 2465274f052SJens Axboe offset = in->f_pos & ~PAGE_CACHE_MASK; 2475274f052SJens Axboe nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2485274f052SJens Axboe 2495274f052SJens Axboe if (nr_pages > PIPE_BUFFERS) 2505274f052SJens Axboe nr_pages = PIPE_BUFFERS; 2515274f052SJens Axboe 2525274f052SJens Axboe /* 2530b749ce3SJens Axboe * initiate read-ahead on this page range. however, don't call into 2540b749ce3SJens Axboe * read-ahead if this is a non-zero offset (we are likely doing small 2550b749ce3SJens Axboe * chunk splice and the page is already there) for a single page. 2565274f052SJens Axboe */ 2570b749ce3SJens Axboe if (!offset || nr_pages > 1) 2585274f052SJens Axboe do_page_cache_readahead(mapping, in, index, nr_pages); 2595274f052SJens Axboe 2605274f052SJens Axboe /* 2615274f052SJens Axboe * now fill in the holes 2625274f052SJens Axboe */ 2637480a904SJens Axboe error = 0; 26416c523ddSJens Axboe for (i = 0; i < nr_pages; i++, index++) { 2657480a904SJens Axboe find_page: 2665274f052SJens Axboe /* 2677480a904SJens Axboe * lookup the page for this index 2685274f052SJens Axboe */ 2697480a904SJens Axboe page = find_get_page(mapping, index); 2707480a904SJens Axboe if (!page) { 2717480a904SJens Axboe /* 2727480a904SJens Axboe * If in nonblock mode then dont block on 2737480a904SJens Axboe * readpage (we've kicked readahead so there 2747480a904SJens Axboe * will be asynchronous progress): 2757480a904SJens Axboe */ 2767480a904SJens Axboe if (flags & SPLICE_F_NONBLOCK) 2777480a904SJens Axboe break; 2787480a904SJens Axboe 2797480a904SJens Axboe /* 2807480a904SJens Axboe * page didn't exist, allocate one 2817480a904SJens Axboe */ 2827480a904SJens Axboe page = page_cache_alloc_cold(mapping); 2835274f052SJens Axboe if (!page) 2845274f052SJens Axboe break; 2855274f052SJens Axboe 2867480a904SJens Axboe error = add_to_page_cache_lru(page, mapping, index, 2877480a904SJens Axboe mapping_gfp_mask(mapping)); 2885274f052SJens Axboe if (unlikely(error)) { 2895274f052SJens Axboe page_cache_release(page); 2905274f052SJens Axboe break; 2915274f052SJens Axboe } 2927480a904SJens Axboe 2937480a904SJens Axboe goto readpage; 2945274f052SJens Axboe } 2957480a904SJens Axboe 2967480a904SJens Axboe /* 2977480a904SJens Axboe * If the page isn't uptodate, we may need to start io on it 2987480a904SJens Axboe */ 2997480a904SJens Axboe if (!PageUptodate(page)) { 3007480a904SJens Axboe lock_page(page); 3017480a904SJens Axboe 3027480a904SJens Axboe /* 3037480a904SJens Axboe * page was truncated, stop here. if this isn't the 3047480a904SJens Axboe * first page, we'll just complete what we already 3057480a904SJens Axboe * added 3067480a904SJens Axboe */ 3077480a904SJens Axboe if (!page->mapping) { 3087480a904SJens Axboe unlock_page(page); 3097480a904SJens Axboe page_cache_release(page); 3107480a904SJens Axboe break; 3117480a904SJens Axboe } 3127480a904SJens Axboe /* 3137480a904SJens Axboe * page was already under io and is now done, great 3147480a904SJens Axboe */ 3157480a904SJens Axboe if (PageUptodate(page)) { 3167480a904SJens Axboe unlock_page(page); 3177480a904SJens Axboe goto fill_it; 3187480a904SJens Axboe } 3197480a904SJens Axboe 3207480a904SJens Axboe readpage: 3217480a904SJens Axboe /* 3227480a904SJens Axboe * need to read in the page 3237480a904SJens Axboe */ 3247480a904SJens Axboe error = mapping->a_ops->readpage(in, page); 3257480a904SJens Axboe 3267480a904SJens Axboe if (unlikely(error)) { 3277480a904SJens Axboe page_cache_release(page); 3287480a904SJens Axboe if (error == AOP_TRUNCATED_PAGE) 3297480a904SJens Axboe goto find_page; 3307480a904SJens Axboe break; 3317480a904SJens Axboe } 3327480a904SJens Axboe } 3337480a904SJens Axboe fill_it: 33416c523ddSJens Axboe pages[i] = page; 3355274f052SJens Axboe } 3365274f052SJens Axboe 33716c523ddSJens Axboe if (i) 33829e35094SLinus Torvalds return move_to_pipe(pipe, pages, i, offset, len, flags); 33916c523ddSJens Axboe 3407480a904SJens Axboe return error; 3415274f052SJens Axboe } 3425274f052SJens Axboe 34383f9135bSJens Axboe /** 34483f9135bSJens Axboe * generic_file_splice_read - splice data from file to a pipe 34583f9135bSJens Axboe * @in: file to splice from 34683f9135bSJens Axboe * @pipe: pipe to splice to 34783f9135bSJens Axboe * @len: number of bytes to splice 34883f9135bSJens Axboe * @flags: splice modifier flags 34983f9135bSJens Axboe * 35083f9135bSJens Axboe * Will read pages from given file and fill them into a pipe. 35183f9135bSJens Axboe * 35283f9135bSJens Axboe */ 3533a326a2cSIngo Molnar ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 3545274f052SJens Axboe size_t len, unsigned int flags) 3555274f052SJens Axboe { 3565274f052SJens Axboe ssize_t spliced; 3575274f052SJens Axboe int ret; 3585274f052SJens Axboe 3595274f052SJens Axboe ret = 0; 3605274f052SJens Axboe spliced = 0; 3613a326a2cSIngo Molnar 3625274f052SJens Axboe while (len) { 36329e35094SLinus Torvalds ret = __generic_file_splice_read(in, pipe, len, flags); 3645274f052SJens Axboe 3655274f052SJens Axboe if (ret <= 0) 3665274f052SJens Axboe break; 3675274f052SJens Axboe 3685274f052SJens Axboe in->f_pos += ret; 3695274f052SJens Axboe len -= ret; 3705274f052SJens Axboe spliced += ret; 37129e35094SLinus Torvalds 37229e35094SLinus Torvalds if (!(flags & SPLICE_F_NONBLOCK)) 37329e35094SLinus Torvalds continue; 37429e35094SLinus Torvalds ret = -EAGAIN; 37529e35094SLinus Torvalds break; 3765274f052SJens Axboe } 3775274f052SJens Axboe 3785274f052SJens Axboe if (spliced) 3795274f052SJens Axboe return spliced; 3805274f052SJens Axboe 3815274f052SJens Axboe return ret; 3825274f052SJens Axboe } 3835274f052SJens Axboe 384059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_read); 385059a8f37SJens Axboe 3865274f052SJens Axboe /* 3874f6f0bd2SJens Axboe * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 3884f6f0bd2SJens Axboe * using sendpage(). 3895274f052SJens Axboe */ 3905274f052SJens Axboe static int pipe_to_sendpage(struct pipe_inode_info *info, 3915274f052SJens Axboe struct pipe_buffer *buf, struct splice_desc *sd) 3925274f052SJens Axboe { 3935274f052SJens Axboe struct file *file = sd->file; 3945274f052SJens Axboe loff_t pos = sd->pos; 3955274f052SJens Axboe unsigned int offset; 3965274f052SJens Axboe ssize_t ret; 3975274f052SJens Axboe void *ptr; 398b2b39fa4SJens Axboe int more; 3995274f052SJens Axboe 4005274f052SJens Axboe /* 4015274f052SJens Axboe * sub-optimal, but we are limited by the pipe ->map. we don't 4025274f052SJens Axboe * need a kmap'ed buffer here, we just want to make sure we 4035274f052SJens Axboe * have the page pinned if the pipe page originates from the 4045274f052SJens Axboe * page cache 4055274f052SJens Axboe */ 4065274f052SJens Axboe ptr = buf->ops->map(file, info, buf); 4075274f052SJens Axboe if (IS_ERR(ptr)) 4085274f052SJens Axboe return PTR_ERR(ptr); 4095274f052SJens Axboe 4105274f052SJens Axboe offset = pos & ~PAGE_CACHE_MASK; 411b2b39fa4SJens Axboe more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 4125274f052SJens Axboe 413b2b39fa4SJens Axboe ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 4145274f052SJens Axboe 4155274f052SJens Axboe buf->ops->unmap(info, buf); 4165274f052SJens Axboe if (ret == sd->len) 4175274f052SJens Axboe return 0; 4185274f052SJens Axboe 4195274f052SJens Axboe return -EIO; 4205274f052SJens Axboe } 4215274f052SJens Axboe 4225274f052SJens Axboe /* 4235274f052SJens Axboe * This is a little more tricky than the file -> pipe splicing. There are 4245274f052SJens Axboe * basically three cases: 4255274f052SJens Axboe * 4265274f052SJens Axboe * - Destination page already exists in the address space and there 4275274f052SJens Axboe * are users of it. For that case we have no other option that 4285274f052SJens Axboe * copying the data. Tough luck. 4295274f052SJens Axboe * - Destination page already exists in the address space, but there 4305274f052SJens Axboe * are no users of it. Make sure it's uptodate, then drop it. Fall 4315274f052SJens Axboe * through to last case. 4325274f052SJens Axboe * - Destination page does not exist, we can add the pipe page to 4335274f052SJens Axboe * the page cache and avoid the copy. 4345274f052SJens Axboe * 43583f9135bSJens Axboe * If asked to move pages to the output file (SPLICE_F_MOVE is set in 43683f9135bSJens Axboe * sd->flags), we attempt to migrate pages from the pipe to the output 43783f9135bSJens Axboe * file address space page cache. This is possible if no one else has 43883f9135bSJens Axboe * the pipe page referenced outside of the pipe and page cache. If 43983f9135bSJens Axboe * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 44083f9135bSJens Axboe * a new page in the output file page cache and fill/dirty that. 4415274f052SJens Axboe */ 4425274f052SJens Axboe static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 4435274f052SJens Axboe struct splice_desc *sd) 4445274f052SJens Axboe { 4455274f052SJens Axboe struct file *file = sd->file; 4465274f052SJens Axboe struct address_space *mapping = file->f_mapping; 4473e7ee3e7SJens Axboe gfp_t gfp_mask = mapping_gfp_mask(mapping); 4485274f052SJens Axboe unsigned int offset; 4495274f052SJens Axboe struct page *page; 4505274f052SJens Axboe pgoff_t index; 4515abc97aaSJens Axboe char *src; 4523e7ee3e7SJens Axboe int ret; 4535274f052SJens Axboe 4545274f052SJens Axboe /* 45549d0b21bSJens Axboe * make sure the data in this buffer is uptodate 4565274f052SJens Axboe */ 4575274f052SJens Axboe src = buf->ops->map(file, info, buf); 4585274f052SJens Axboe if (IS_ERR(src)) 4595274f052SJens Axboe return PTR_ERR(src); 4605274f052SJens Axboe 4615274f052SJens Axboe index = sd->pos >> PAGE_CACHE_SHIFT; 4625274f052SJens Axboe offset = sd->pos & ~PAGE_CACHE_MASK; 4635274f052SJens Axboe 4645abc97aaSJens Axboe /* 4655abc97aaSJens Axboe * reuse buf page, if SPLICE_F_MOVE is set 4665abc97aaSJens Axboe */ 4675abc97aaSJens Axboe if (sd->flags & SPLICE_F_MOVE) { 46883f9135bSJens Axboe /* 46983f9135bSJens Axboe * If steal succeeds, buf->page is now pruned from the vm 47083f9135bSJens Axboe * side (LRU and page cache) and we can reuse it. 47183f9135bSJens Axboe */ 4725abc97aaSJens Axboe if (buf->ops->steal(info, buf)) 4735abc97aaSJens Axboe goto find_page; 4745abc97aaSJens Axboe 47549d0b21bSJens Axboe /* 47649d0b21bSJens Axboe * this will also set the page locked 47749d0b21bSJens Axboe */ 4785abc97aaSJens Axboe page = buf->page; 4793e7ee3e7SJens Axboe if (add_to_page_cache(page, mapping, index, gfp_mask)) 4805abc97aaSJens Axboe goto find_page; 4813e7ee3e7SJens Axboe 4823e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 4833e7ee3e7SJens Axboe lru_cache_add(page); 4845abc97aaSJens Axboe } else { 4855274f052SJens Axboe find_page: 4865274f052SJens Axboe ret = -ENOMEM; 4873e7ee3e7SJens Axboe page = find_or_create_page(mapping, index, gfp_mask); 4885274f052SJens Axboe if (!page) 4899aefe431SDave Jones goto out_nomem; 4905274f052SJens Axboe 4915274f052SJens Axboe /* 4925274f052SJens Axboe * If the page is uptodate, it is also locked. If it isn't 4935274f052SJens Axboe * uptodate, we can mark it uptodate if we are filling the 4945274f052SJens Axboe * full page. Otherwise we need to read it in first... 4955274f052SJens Axboe */ 4965274f052SJens Axboe if (!PageUptodate(page)) { 4975274f052SJens Axboe if (sd->len < PAGE_CACHE_SIZE) { 4985274f052SJens Axboe ret = mapping->a_ops->readpage(file, page); 4995274f052SJens Axboe if (unlikely(ret)) 5005274f052SJens Axboe goto out; 5015274f052SJens Axboe 5025274f052SJens Axboe lock_page(page); 5035274f052SJens Axboe 5045274f052SJens Axboe if (!PageUptodate(page)) { 5055274f052SJens Axboe /* 5065274f052SJens Axboe * page got invalidated, repeat 5075274f052SJens Axboe */ 5085274f052SJens Axboe if (!page->mapping) { 5095274f052SJens Axboe unlock_page(page); 5105274f052SJens Axboe page_cache_release(page); 5115274f052SJens Axboe goto find_page; 5125274f052SJens Axboe } 5135274f052SJens Axboe ret = -EIO; 5145274f052SJens Axboe goto out; 5155274f052SJens Axboe } 5165274f052SJens Axboe } else { 5175274f052SJens Axboe WARN_ON(!PageLocked(page)); 5185274f052SJens Axboe SetPageUptodate(page); 5195274f052SJens Axboe } 5205274f052SJens Axboe } 5215abc97aaSJens Axboe } 5225274f052SJens Axboe 5235274f052SJens Axboe ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 5244f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5254f6f0bd2SJens Axboe page_cache_release(page); 5264f6f0bd2SJens Axboe goto find_page; 5274f6f0bd2SJens Axboe } else if (ret) 5285274f052SJens Axboe goto out; 5295274f052SJens Axboe 5303e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5315abc97aaSJens Axboe char *dst = kmap_atomic(page, KM_USER0); 5325abc97aaSJens Axboe 5335274f052SJens Axboe memcpy(dst + offset, src + buf->offset, sd->len); 5345274f052SJens Axboe flush_dcache_page(page); 5355274f052SJens Axboe kunmap_atomic(dst, KM_USER0); 5365abc97aaSJens Axboe } 5375274f052SJens Axboe 5385274f052SJens Axboe ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 5394f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5404f6f0bd2SJens Axboe page_cache_release(page); 5414f6f0bd2SJens Axboe goto find_page; 5424f6f0bd2SJens Axboe } else if (ret) 5435274f052SJens Axboe goto out; 5445274f052SJens Axboe 545c7f21e4fSJens Axboe mark_page_accessed(page); 5464f6f0bd2SJens Axboe balance_dirty_pages_ratelimited(mapping); 5475274f052SJens Axboe out: 5483e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5495274f052SJens Axboe page_cache_release(page); 5504f6f0bd2SJens Axboe unlock_page(page); 5514f6f0bd2SJens Axboe } 5529aefe431SDave Jones out_nomem: 5535274f052SJens Axboe buf->ops->unmap(info, buf); 5545274f052SJens Axboe return ret; 5555274f052SJens Axboe } 5565274f052SJens Axboe 5575274f052SJens Axboe typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 5585274f052SJens Axboe struct splice_desc *); 5595274f052SJens Axboe 56083f9135bSJens Axboe /* 56183f9135bSJens Axboe * Pipe input worker. Most of this logic works like a regular pipe, the 56283f9135bSJens Axboe * key here is the 'actor' worker passed in that actually moves the data 56383f9135bSJens Axboe * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 56483f9135bSJens Axboe */ 5653a326a2cSIngo Molnar static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 5665274f052SJens Axboe size_t len, unsigned int flags, 5675274f052SJens Axboe splice_actor *actor) 5685274f052SJens Axboe { 5695274f052SJens Axboe int ret, do_wakeup, err; 5705274f052SJens Axboe struct splice_desc sd; 5715274f052SJens Axboe 5725274f052SJens Axboe ret = 0; 5735274f052SJens Axboe do_wakeup = 0; 5745274f052SJens Axboe 5755274f052SJens Axboe sd.total_len = len; 5765274f052SJens Axboe sd.flags = flags; 5775274f052SJens Axboe sd.file = out; 5785274f052SJens Axboe sd.pos = out->f_pos; 5795274f052SJens Axboe 5803a326a2cSIngo Molnar if (pipe->inode) 5813a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 5825274f052SJens Axboe 5835274f052SJens Axboe for (;;) { 5843a326a2cSIngo Molnar int bufs = pipe->nrbufs; 5855274f052SJens Axboe 5865274f052SJens Axboe if (bufs) { 5873a326a2cSIngo Molnar int curbuf = pipe->curbuf; 5883a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + curbuf; 5895274f052SJens Axboe struct pipe_buf_operations *ops = buf->ops; 5905274f052SJens Axboe 5915274f052SJens Axboe sd.len = buf->len; 5925274f052SJens Axboe if (sd.len > sd.total_len) 5935274f052SJens Axboe sd.len = sd.total_len; 5945274f052SJens Axboe 5953a326a2cSIngo Molnar err = actor(pipe, buf, &sd); 5965274f052SJens Axboe if (err) { 5975274f052SJens Axboe if (!ret && err != -ENODATA) 5985274f052SJens Axboe ret = err; 5995274f052SJens Axboe 6005274f052SJens Axboe break; 6015274f052SJens Axboe } 6025274f052SJens Axboe 6035274f052SJens Axboe ret += sd.len; 6045274f052SJens Axboe buf->offset += sd.len; 6055274f052SJens Axboe buf->len -= sd.len; 6065274f052SJens Axboe if (!buf->len) { 6075274f052SJens Axboe buf->ops = NULL; 6083a326a2cSIngo Molnar ops->release(pipe, buf); 6095274f052SJens Axboe curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 6103a326a2cSIngo Molnar pipe->curbuf = curbuf; 6113a326a2cSIngo Molnar pipe->nrbufs = --bufs; 6125274f052SJens Axboe do_wakeup = 1; 6135274f052SJens Axboe } 6145274f052SJens Axboe 6155274f052SJens Axboe sd.pos += sd.len; 6165274f052SJens Axboe sd.total_len -= sd.len; 6175274f052SJens Axboe if (!sd.total_len) 6185274f052SJens Axboe break; 6195274f052SJens Axboe } 6205274f052SJens Axboe 6215274f052SJens Axboe if (bufs) 6225274f052SJens Axboe continue; 6233a326a2cSIngo Molnar if (!pipe->writers) 6245274f052SJens Axboe break; 6253a326a2cSIngo Molnar if (!pipe->waiting_writers) { 6265274f052SJens Axboe if (ret) 6275274f052SJens Axboe break; 6285274f052SJens Axboe } 6295274f052SJens Axboe 63029e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 63129e35094SLinus Torvalds if (!ret) 63229e35094SLinus Torvalds ret = -EAGAIN; 63329e35094SLinus Torvalds break; 63429e35094SLinus Torvalds } 63529e35094SLinus Torvalds 6365274f052SJens Axboe if (signal_pending(current)) { 6375274f052SJens Axboe if (!ret) 6385274f052SJens Axboe ret = -ERESTARTSYS; 6395274f052SJens Axboe break; 6405274f052SJens Axboe } 6415274f052SJens Axboe 6425274f052SJens Axboe if (do_wakeup) { 643c0bd1f65SJens Axboe smp_mb(); 6443a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6453a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 6463a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6475274f052SJens Axboe do_wakeup = 0; 6485274f052SJens Axboe } 6495274f052SJens Axboe 6503a326a2cSIngo Molnar pipe_wait(pipe); 6515274f052SJens Axboe } 6525274f052SJens Axboe 6533a326a2cSIngo Molnar if (pipe->inode) 6543a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 6555274f052SJens Axboe 6565274f052SJens Axboe if (do_wakeup) { 657c0bd1f65SJens Axboe smp_mb(); 6583a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6593a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 6603a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6615274f052SJens Axboe } 6625274f052SJens Axboe 6635274f052SJens Axboe mutex_lock(&out->f_mapping->host->i_mutex); 6645274f052SJens Axboe out->f_pos = sd.pos; 6655274f052SJens Axboe mutex_unlock(&out->f_mapping->host->i_mutex); 6665274f052SJens Axboe return ret; 6675274f052SJens Axboe 6685274f052SJens Axboe } 6695274f052SJens Axboe 67083f9135bSJens Axboe /** 67183f9135bSJens Axboe * generic_file_splice_write - splice data from a pipe to a file 6723a326a2cSIngo Molnar * @pipe: pipe info 67383f9135bSJens Axboe * @out: file to write to 67483f9135bSJens Axboe * @len: number of bytes to splice 67583f9135bSJens Axboe * @flags: splice modifier flags 67683f9135bSJens Axboe * 67783f9135bSJens Axboe * Will either move or copy pages (determined by @flags options) from 67883f9135bSJens Axboe * the given pipe inode to the given file. 67983f9135bSJens Axboe * 68083f9135bSJens Axboe */ 6813a326a2cSIngo Molnar ssize_t 6823a326a2cSIngo Molnar generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 6835274f052SJens Axboe size_t len, unsigned int flags) 6845274f052SJens Axboe { 6854f6f0bd2SJens Axboe struct address_space *mapping = out->f_mapping; 6863a326a2cSIngo Molnar ssize_t ret; 6873a326a2cSIngo Molnar 6883a326a2cSIngo Molnar ret = move_from_pipe(pipe, out, len, flags, pipe_to_file); 6894f6f0bd2SJens Axboe 6904f6f0bd2SJens Axboe /* 6914f6f0bd2SJens Axboe * if file or inode is SYNC and we actually wrote some data, sync it 6924f6f0bd2SJens Axboe */ 6934f6f0bd2SJens Axboe if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 6944f6f0bd2SJens Axboe && ret > 0) { 6954f6f0bd2SJens Axboe struct inode *inode = mapping->host; 6964f6f0bd2SJens Axboe int err; 6974f6f0bd2SJens Axboe 6984f6f0bd2SJens Axboe mutex_lock(&inode->i_mutex); 6994f6f0bd2SJens Axboe err = generic_osync_inode(mapping->host, mapping, 7004f6f0bd2SJens Axboe OSYNC_METADATA|OSYNC_DATA); 7014f6f0bd2SJens Axboe mutex_unlock(&inode->i_mutex); 7024f6f0bd2SJens Axboe 7034f6f0bd2SJens Axboe if (err) 7044f6f0bd2SJens Axboe ret = err; 7054f6f0bd2SJens Axboe } 7064f6f0bd2SJens Axboe 7074f6f0bd2SJens Axboe return ret; 7085274f052SJens Axboe } 7095274f052SJens Axboe 710059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_write); 711059a8f37SJens Axboe 71283f9135bSJens Axboe /** 71383f9135bSJens Axboe * generic_splice_sendpage - splice data from a pipe to a socket 71483f9135bSJens Axboe * @inode: pipe inode 71583f9135bSJens Axboe * @out: socket to write to 71683f9135bSJens Axboe * @len: number of bytes to splice 71783f9135bSJens Axboe * @flags: splice modifier flags 71883f9135bSJens Axboe * 71983f9135bSJens Axboe * Will send @len bytes from the pipe to a network socket. No data copying 72083f9135bSJens Axboe * is involved. 72183f9135bSJens Axboe * 72283f9135bSJens Axboe */ 7233a326a2cSIngo Molnar ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 7245274f052SJens Axboe size_t len, unsigned int flags) 7255274f052SJens Axboe { 7263a326a2cSIngo Molnar return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage); 7275274f052SJens Axboe } 7285274f052SJens Axboe 729059a8f37SJens Axboe EXPORT_SYMBOL(generic_splice_sendpage); 730a0f06780SJeff Garzik 73183f9135bSJens Axboe /* 73283f9135bSJens Axboe * Attempt to initiate a splice from pipe to file. 73383f9135bSJens Axboe */ 7343a326a2cSIngo Molnar static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 735b92ce558SJens Axboe size_t len, unsigned int flags) 7365274f052SJens Axboe { 7375274f052SJens Axboe loff_t pos; 7385274f052SJens Axboe int ret; 7395274f052SJens Axboe 7405274f052SJens Axboe if (!out->f_op || !out->f_op->splice_write) 7415274f052SJens Axboe return -EINVAL; 7425274f052SJens Axboe 7435274f052SJens Axboe if (!(out->f_mode & FMODE_WRITE)) 7445274f052SJens Axboe return -EBADF; 7455274f052SJens Axboe 7465274f052SJens Axboe pos = out->f_pos; 747529565dcSIngo Molnar 7485274f052SJens Axboe ret = rw_verify_area(WRITE, out, &pos, len); 7495274f052SJens Axboe if (unlikely(ret < 0)) 7505274f052SJens Axboe return ret; 7515274f052SJens Axboe 7525274f052SJens Axboe return out->f_op->splice_write(pipe, out, len, flags); 7535274f052SJens Axboe } 7545274f052SJens Axboe 75583f9135bSJens Axboe /* 75683f9135bSJens Axboe * Attempt to initiate a splice from a file to a pipe. 75783f9135bSJens Axboe */ 758b92ce558SJens Axboe static long do_splice_to(struct file *in, struct pipe_inode_info *pipe, 759b92ce558SJens Axboe size_t len, unsigned int flags) 7605274f052SJens Axboe { 7615274f052SJens Axboe loff_t pos, isize, left; 7625274f052SJens Axboe int ret; 7635274f052SJens Axboe 7645274f052SJens Axboe if (!in->f_op || !in->f_op->splice_read) 7655274f052SJens Axboe return -EINVAL; 7665274f052SJens Axboe 7675274f052SJens Axboe if (!(in->f_mode & FMODE_READ)) 7685274f052SJens Axboe return -EBADF; 7695274f052SJens Axboe 7705274f052SJens Axboe pos = in->f_pos; 771529565dcSIngo Molnar 7725274f052SJens Axboe ret = rw_verify_area(READ, in, &pos, len); 7735274f052SJens Axboe if (unlikely(ret < 0)) 7745274f052SJens Axboe return ret; 7755274f052SJens Axboe 7765274f052SJens Axboe isize = i_size_read(in->f_mapping->host); 7775274f052SJens Axboe if (unlikely(in->f_pos >= isize)) 7785274f052SJens Axboe return 0; 7795274f052SJens Axboe 7805274f052SJens Axboe left = isize - in->f_pos; 7815274f052SJens Axboe if (left < len) 7825274f052SJens Axboe len = left; 7835274f052SJens Axboe 7845274f052SJens Axboe return in->f_op->splice_read(in, pipe, len, flags); 7855274f052SJens Axboe } 7865274f052SJens Axboe 787b92ce558SJens Axboe long do_splice_direct(struct file *in, struct file *out, size_t len, 788b92ce558SJens Axboe unsigned int flags) 789b92ce558SJens Axboe { 790b92ce558SJens Axboe struct pipe_inode_info *pipe; 791b92ce558SJens Axboe long ret, bytes; 792b92ce558SJens Axboe umode_t i_mode; 793b92ce558SJens Axboe int i; 794b92ce558SJens Axboe 795b92ce558SJens Axboe /* 796b92ce558SJens Axboe * We require the input being a regular file, as we don't want to 797b92ce558SJens Axboe * randomly drop data for eg socket -> socket splicing. Use the 798b92ce558SJens Axboe * piped splicing for that! 799b92ce558SJens Axboe */ 800b92ce558SJens Axboe i_mode = in->f_dentry->d_inode->i_mode; 801b92ce558SJens Axboe if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 802b92ce558SJens Axboe return -EINVAL; 803b92ce558SJens Axboe 804b92ce558SJens Axboe /* 805b92ce558SJens Axboe * neither in nor out is a pipe, setup an internal pipe attached to 806b92ce558SJens Axboe * 'out' and transfer the wanted data from 'in' to 'out' through that 807b92ce558SJens Axboe */ 808b92ce558SJens Axboe pipe = current->splice_pipe; 809b92ce558SJens Axboe if (!pipe) { 810b92ce558SJens Axboe pipe = alloc_pipe_info(NULL); 811b92ce558SJens Axboe if (!pipe) 812b92ce558SJens Axboe return -ENOMEM; 813b92ce558SJens Axboe 814b92ce558SJens Axboe /* 815b92ce558SJens Axboe * We don't have an immediate reader, but we'll read the stuff 816b92ce558SJens Axboe * out of the pipe right after the move_to_pipe(). So set 817b92ce558SJens Axboe * PIPE_READERS appropriately. 818b92ce558SJens Axboe */ 819b92ce558SJens Axboe pipe->readers = 1; 820b92ce558SJens Axboe 821b92ce558SJens Axboe current->splice_pipe = pipe; 822b92ce558SJens Axboe } 823b92ce558SJens Axboe 824b92ce558SJens Axboe /* 825b92ce558SJens Axboe * do the splice 826b92ce558SJens Axboe */ 827b92ce558SJens Axboe ret = 0; 828b92ce558SJens Axboe bytes = 0; 829b92ce558SJens Axboe 830b92ce558SJens Axboe while (len) { 831b92ce558SJens Axboe size_t read_len, max_read_len; 832b92ce558SJens Axboe 833b92ce558SJens Axboe /* 834b92ce558SJens Axboe * Do at most PIPE_BUFFERS pages worth of transfer: 835b92ce558SJens Axboe */ 836b92ce558SJens Axboe max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 837b92ce558SJens Axboe 838b92ce558SJens Axboe ret = do_splice_to(in, pipe, max_read_len, flags); 839b92ce558SJens Axboe if (unlikely(ret < 0)) 840b92ce558SJens Axboe goto out_release; 841b92ce558SJens Axboe 842b92ce558SJens Axboe read_len = ret; 843b92ce558SJens Axboe 844b92ce558SJens Axboe /* 845b92ce558SJens Axboe * NOTE: nonblocking mode only applies to the input. We 846b92ce558SJens Axboe * must not do the output in nonblocking mode as then we 847b92ce558SJens Axboe * could get stuck data in the internal pipe: 848b92ce558SJens Axboe */ 849b92ce558SJens Axboe ret = do_splice_from(pipe, out, read_len, 850b92ce558SJens Axboe flags & ~SPLICE_F_NONBLOCK); 851b92ce558SJens Axboe if (unlikely(ret < 0)) 852b92ce558SJens Axboe goto out_release; 853b92ce558SJens Axboe 854b92ce558SJens Axboe bytes += ret; 855b92ce558SJens Axboe len -= ret; 856b92ce558SJens Axboe 857b92ce558SJens Axboe /* 858b92ce558SJens Axboe * In nonblocking mode, if we got back a short read then 859b92ce558SJens Axboe * that was due to either an IO error or due to the 860b92ce558SJens Axboe * pagecache entry not being there. In the IO error case 861b92ce558SJens Axboe * the _next_ splice attempt will produce a clean IO error 862b92ce558SJens Axboe * return value (not a short read), so in both cases it's 863b92ce558SJens Axboe * correct to break out of the loop here: 864b92ce558SJens Axboe */ 865b92ce558SJens Axboe if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) 866b92ce558SJens Axboe break; 867b92ce558SJens Axboe } 868b92ce558SJens Axboe 869b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 870b92ce558SJens Axboe 871b92ce558SJens Axboe return bytes; 872b92ce558SJens Axboe 873b92ce558SJens Axboe out_release: 874b92ce558SJens Axboe /* 875b92ce558SJens Axboe * If we did an incomplete transfer we must release 876b92ce558SJens Axboe * the pipe buffers in question: 877b92ce558SJens Axboe */ 878b92ce558SJens Axboe for (i = 0; i < PIPE_BUFFERS; i++) { 879b92ce558SJens Axboe struct pipe_buffer *buf = pipe->bufs + i; 880b92ce558SJens Axboe 881b92ce558SJens Axboe if (buf->ops) { 882b92ce558SJens Axboe buf->ops->release(pipe, buf); 883b92ce558SJens Axboe buf->ops = NULL; 884b92ce558SJens Axboe } 885b92ce558SJens Axboe } 886b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 887b92ce558SJens Axboe 888b92ce558SJens Axboe /* 889b92ce558SJens Axboe * If we transferred some data, return the number of bytes: 890b92ce558SJens Axboe */ 891b92ce558SJens Axboe if (bytes > 0) 892b92ce558SJens Axboe return bytes; 893b92ce558SJens Axboe 894b92ce558SJens Axboe return ret; 895b92ce558SJens Axboe } 896b92ce558SJens Axboe 897b92ce558SJens Axboe EXPORT_SYMBOL(do_splice_direct); 898b92ce558SJens Axboe 89983f9135bSJens Axboe /* 90083f9135bSJens Axboe * Determine where to splice to/from. 90183f9135bSJens Axboe */ 902529565dcSIngo Molnar static long do_splice(struct file *in, loff_t __user *off_in, 903529565dcSIngo Molnar struct file *out, loff_t __user *off_out, 904529565dcSIngo Molnar size_t len, unsigned int flags) 9055274f052SJens Axboe { 9063a326a2cSIngo Molnar struct pipe_inode_info *pipe; 9075274f052SJens Axboe 9083a326a2cSIngo Molnar pipe = in->f_dentry->d_inode->i_pipe; 909529565dcSIngo Molnar if (pipe) { 910529565dcSIngo Molnar if (off_in) 911529565dcSIngo Molnar return -ESPIPE; 912b92ce558SJens Axboe if (off_out) { 913b92ce558SJens Axboe if (out->f_op->llseek == no_llseek) 914b92ce558SJens Axboe return -EINVAL; 915b92ce558SJens Axboe if (copy_from_user(&out->f_pos, off_out, 916b92ce558SJens Axboe sizeof(loff_t))) 917b92ce558SJens Axboe return -EFAULT; 918b92ce558SJens Axboe } 919529565dcSIngo Molnar 920b92ce558SJens Axboe return do_splice_from(pipe, out, len, flags); 921529565dcSIngo Molnar } 9225274f052SJens Axboe 9233a326a2cSIngo Molnar pipe = out->f_dentry->d_inode->i_pipe; 924529565dcSIngo Molnar if (pipe) { 925529565dcSIngo Molnar if (off_out) 926529565dcSIngo Molnar return -ESPIPE; 927b92ce558SJens Axboe if (off_in) { 928b92ce558SJens Axboe if (in->f_op->llseek == no_llseek) 929b92ce558SJens Axboe return -EINVAL; 930b92ce558SJens Axboe if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) 931b92ce558SJens Axboe return -EFAULT; 932b92ce558SJens Axboe } 933529565dcSIngo Molnar 934b92ce558SJens Axboe return do_splice_to(in, pipe, len, flags); 935529565dcSIngo Molnar } 9365274f052SJens Axboe 9375274f052SJens Axboe return -EINVAL; 9385274f052SJens Axboe } 9395274f052SJens Axboe 940529565dcSIngo Molnar asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 941529565dcSIngo Molnar int fd_out, loff_t __user *off_out, 942529565dcSIngo Molnar size_t len, unsigned int flags) 9435274f052SJens Axboe { 9445274f052SJens Axboe long error; 9455274f052SJens Axboe struct file *in, *out; 9465274f052SJens Axboe int fput_in, fput_out; 9475274f052SJens Axboe 9485274f052SJens Axboe if (unlikely(!len)) 9495274f052SJens Axboe return 0; 9505274f052SJens Axboe 9515274f052SJens Axboe error = -EBADF; 952529565dcSIngo Molnar in = fget_light(fd_in, &fput_in); 9535274f052SJens Axboe if (in) { 9545274f052SJens Axboe if (in->f_mode & FMODE_READ) { 955529565dcSIngo Molnar out = fget_light(fd_out, &fput_out); 9565274f052SJens Axboe if (out) { 9575274f052SJens Axboe if (out->f_mode & FMODE_WRITE) 958529565dcSIngo Molnar error = do_splice(in, off_in, 959529565dcSIngo Molnar out, off_out, 960529565dcSIngo Molnar len, flags); 9615274f052SJens Axboe fput_light(out, fput_out); 9625274f052SJens Axboe } 9635274f052SJens Axboe } 9645274f052SJens Axboe 9655274f052SJens Axboe fput_light(in, fput_in); 9665274f052SJens Axboe } 9675274f052SJens Axboe 9685274f052SJens Axboe return error; 9695274f052SJens Axboe } 970