15274f052SJens Axboe /* 25274f052SJens Axboe * "splice": joining two ropes together by interweaving their strands. 35274f052SJens Axboe * 45274f052SJens Axboe * This is the "extended pipe" functionality, where a pipe is used as 55274f052SJens Axboe * an arbitrary in-memory buffer. Think of a pipe as a small kernel 65274f052SJens Axboe * buffer that you can use to transfer data from one end to the other. 75274f052SJens Axboe * 85274f052SJens Axboe * The traditional unix read/write is extended with a "splice()" operation 95274f052SJens Axboe * that transfers data buffers to or from a pipe buffer. 105274f052SJens Axboe * 115274f052SJens Axboe * Named by Larry McVoy, original implementation from Linus, extended by 12c2058e06SJens Axboe * Jens to support splicing to files, network, direct splicing, etc and 13c2058e06SJens Axboe * fixing lots of bugs. 145274f052SJens Axboe * 15c2058e06SJens Axboe * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de> 16c2058e06SJens Axboe * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 17c2058e06SJens Axboe * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 185274f052SJens Axboe * 195274f052SJens Axboe */ 205274f052SJens Axboe #include <linux/fs.h> 215274f052SJens Axboe #include <linux/file.h> 225274f052SJens Axboe #include <linux/pagemap.h> 235274f052SJens Axboe #include <linux/pipe_fs_i.h> 245274f052SJens Axboe #include <linux/mm_inline.h> 255abc97aaSJens Axboe #include <linux/swap.h> 264f6f0bd2SJens Axboe #include <linux/writeback.h> 274f6f0bd2SJens Axboe #include <linux/buffer_head.h> 28a0f06780SJeff Garzik #include <linux/module.h> 294f6f0bd2SJens Axboe #include <linux/syscalls.h> 305274f052SJens Axboe 315274f052SJens Axboe /* 325274f052SJens Axboe * Passed to the actors 335274f052SJens Axboe */ 345274f052SJens Axboe struct splice_desc { 355274f052SJens Axboe unsigned int len, total_len; /* current and remaining length */ 365274f052SJens Axboe unsigned int flags; /* splice flags */ 375274f052SJens Axboe struct file *file; /* file to read/write */ 385274f052SJens Axboe loff_t pos; /* file position */ 395274f052SJens Axboe }; 405274f052SJens Axboe 4183f9135bSJens Axboe /* 4283f9135bSJens Axboe * Attempt to steal a page from a pipe buffer. This should perhaps go into 4383f9135bSJens Axboe * a vm helper function, it's already simplified quite a bit by the 4483f9135bSJens Axboe * addition of remove_mapping(). If success is returned, the caller may 4583f9135bSJens Axboe * attempt to reuse this page for another destination. 4683f9135bSJens Axboe */ 475abc97aaSJens Axboe static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 485abc97aaSJens Axboe struct pipe_buffer *buf) 495abc97aaSJens Axboe { 505abc97aaSJens Axboe struct page *page = buf->page; 514f6f0bd2SJens Axboe struct address_space *mapping = page_mapping(page); 525abc97aaSJens Axboe 535abc97aaSJens Axboe WARN_ON(!PageLocked(page)); 545abc97aaSJens Axboe WARN_ON(!PageUptodate(page)); 555abc97aaSJens Axboe 56ad8d6f0aSJens Axboe /* 57ad8d6f0aSJens Axboe * At least for ext2 with nobh option, we need to wait on writeback 58ad8d6f0aSJens Axboe * completing on this page, since we'll remove it from the pagecache. 59ad8d6f0aSJens Axboe * Otherwise truncate wont wait on the page, allowing the disk 60ad8d6f0aSJens Axboe * blocks to be reused by someone else before we actually wrote our 61ad8d6f0aSJens Axboe * data to them. fs corruption ensues. 62ad8d6f0aSJens Axboe */ 63ad8d6f0aSJens Axboe wait_on_page_writeback(page); 64ad8d6f0aSJens Axboe 654f6f0bd2SJens Axboe if (PagePrivate(page)) 664f6f0bd2SJens Axboe try_to_release_page(page, mapping_gfp_mask(mapping)); 674f6f0bd2SJens Axboe 684f6f0bd2SJens Axboe if (!remove_mapping(mapping, page)) 695abc97aaSJens Axboe return 1; 705abc97aaSJens Axboe 713e7ee3e7SJens Axboe buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 725abc97aaSJens Axboe return 0; 735abc97aaSJens Axboe } 745abc97aaSJens Axboe 755274f052SJens Axboe static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 765274f052SJens Axboe struct pipe_buffer *buf) 775274f052SJens Axboe { 785274f052SJens Axboe page_cache_release(buf->page); 795274f052SJens Axboe buf->page = NULL; 803e7ee3e7SJens Axboe buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 815274f052SJens Axboe } 825274f052SJens Axboe 835274f052SJens Axboe static void *page_cache_pipe_buf_map(struct file *file, 845274f052SJens Axboe struct pipe_inode_info *info, 855274f052SJens Axboe struct pipe_buffer *buf) 865274f052SJens Axboe { 875274f052SJens Axboe struct page *page = buf->page; 8849d0b21bSJens Axboe int err; 895274f052SJens Axboe 905274f052SJens Axboe if (!PageUptodate(page)) { 9149d0b21bSJens Axboe lock_page(page); 925274f052SJens Axboe 9349d0b21bSJens Axboe /* 9449d0b21bSJens Axboe * Page got truncated/unhashed. This will cause a 0-byte 9573d62d83SIngo Molnar * splice, if this is the first page. 9649d0b21bSJens Axboe */ 975274f052SJens Axboe if (!page->mapping) { 9849d0b21bSJens Axboe err = -ENODATA; 9949d0b21bSJens Axboe goto error; 1005274f052SJens Axboe } 1015274f052SJens Axboe 10249d0b21bSJens Axboe /* 10373d62d83SIngo Molnar * Uh oh, read-error from disk. 10449d0b21bSJens Axboe */ 10549d0b21bSJens Axboe if (!PageUptodate(page)) { 10649d0b21bSJens Axboe err = -EIO; 10749d0b21bSJens Axboe goto error; 10849d0b21bSJens Axboe } 10949d0b21bSJens Axboe 11049d0b21bSJens Axboe /* 11173d62d83SIngo Molnar * Page is ok afterall, fall through to mapping. 11249d0b21bSJens Axboe */ 11349d0b21bSJens Axboe unlock_page(page); 11449d0b21bSJens Axboe } 11549d0b21bSJens Axboe 11649d0b21bSJens Axboe return kmap(page); 11749d0b21bSJens Axboe error: 11849d0b21bSJens Axboe unlock_page(page); 11949d0b21bSJens Axboe return ERR_PTR(err); 1205274f052SJens Axboe } 1215274f052SJens Axboe 1225274f052SJens Axboe static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 1235274f052SJens Axboe struct pipe_buffer *buf) 1245274f052SJens Axboe { 1255274f052SJens Axboe kunmap(buf->page); 1265274f052SJens Axboe } 1275274f052SJens Axboe 1285274f052SJens Axboe static struct pipe_buf_operations page_cache_pipe_buf_ops = { 1295274f052SJens Axboe .can_merge = 0, 1305274f052SJens Axboe .map = page_cache_pipe_buf_map, 1315274f052SJens Axboe .unmap = page_cache_pipe_buf_unmap, 1325274f052SJens Axboe .release = page_cache_pipe_buf_release, 1335abc97aaSJens Axboe .steal = page_cache_pipe_buf_steal, 1345274f052SJens Axboe }; 1355274f052SJens Axboe 13683f9135bSJens Axboe /* 13783f9135bSJens Axboe * Pipe output worker. This sets up our pipe format with the page cache 13883f9135bSJens Axboe * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 13983f9135bSJens Axboe */ 1403a326a2cSIngo Molnar static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 1415274f052SJens Axboe int nr_pages, unsigned long offset, 14229e35094SLinus Torvalds unsigned long len, unsigned int flags) 1435274f052SJens Axboe { 1445274f052SJens Axboe int ret, do_wakeup, i; 1455274f052SJens Axboe 1465274f052SJens Axboe ret = 0; 1475274f052SJens Axboe do_wakeup = 0; 1485274f052SJens Axboe i = 0; 1495274f052SJens Axboe 1503a326a2cSIngo Molnar if (pipe->inode) 1513a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 1525274f052SJens Axboe 1535274f052SJens Axboe for (;;) { 1543a326a2cSIngo Molnar if (!pipe->readers) { 1555274f052SJens Axboe send_sig(SIGPIPE, current, 0); 1565274f052SJens Axboe if (!ret) 1575274f052SJens Axboe ret = -EPIPE; 1585274f052SJens Axboe break; 1595274f052SJens Axboe } 1605274f052SJens Axboe 1616f767b04SJens Axboe if (pipe->nrbufs < PIPE_BUFFERS) { 1626f767b04SJens Axboe int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 1633a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + newbuf; 1645274f052SJens Axboe struct page *page = pages[i++]; 1655274f052SJens Axboe unsigned long this_len; 1665274f052SJens Axboe 1675274f052SJens Axboe this_len = PAGE_CACHE_SIZE - offset; 1685274f052SJens Axboe if (this_len > len) 1695274f052SJens Axboe this_len = len; 1705274f052SJens Axboe 1715274f052SJens Axboe buf->page = page; 1725274f052SJens Axboe buf->offset = offset; 1735274f052SJens Axboe buf->len = this_len; 1745274f052SJens Axboe buf->ops = &page_cache_pipe_buf_ops; 1756f767b04SJens Axboe pipe->nrbufs++; 1766f767b04SJens Axboe if (pipe->inode) 1775274f052SJens Axboe do_wakeup = 1; 1785274f052SJens Axboe 1795274f052SJens Axboe ret += this_len; 1805274f052SJens Axboe len -= this_len; 1815274f052SJens Axboe offset = 0; 1825274f052SJens Axboe if (!--nr_pages) 1835274f052SJens Axboe break; 1845274f052SJens Axboe if (!len) 1855274f052SJens Axboe break; 1866f767b04SJens Axboe if (pipe->nrbufs < PIPE_BUFFERS) 1875274f052SJens Axboe continue; 1885274f052SJens Axboe 1895274f052SJens Axboe break; 1905274f052SJens Axboe } 1915274f052SJens Axboe 19229e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 19329e35094SLinus Torvalds if (!ret) 19429e35094SLinus Torvalds ret = -EAGAIN; 19529e35094SLinus Torvalds break; 19629e35094SLinus Torvalds } 19729e35094SLinus Torvalds 1985274f052SJens Axboe if (signal_pending(current)) { 1995274f052SJens Axboe if (!ret) 2005274f052SJens Axboe ret = -ERESTARTSYS; 2015274f052SJens Axboe break; 2025274f052SJens Axboe } 2035274f052SJens Axboe 2045274f052SJens Axboe if (do_wakeup) { 205c0bd1f65SJens Axboe smp_mb(); 2063a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2073a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 2083a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2095274f052SJens Axboe do_wakeup = 0; 2105274f052SJens Axboe } 2115274f052SJens Axboe 2123a326a2cSIngo Molnar pipe->waiting_writers++; 2133a326a2cSIngo Molnar pipe_wait(pipe); 2143a326a2cSIngo Molnar pipe->waiting_writers--; 2155274f052SJens Axboe } 2165274f052SJens Axboe 2173a326a2cSIngo Molnar if (pipe->inode) 2183a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 2195274f052SJens Axboe 2205274f052SJens Axboe if (do_wakeup) { 221c0bd1f65SJens Axboe smp_mb(); 2223a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2233a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 2243a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2255274f052SJens Axboe } 2265274f052SJens Axboe 2275274f052SJens Axboe while (i < nr_pages) 2285274f052SJens Axboe page_cache_release(pages[i++]); 2295274f052SJens Axboe 2305274f052SJens Axboe return ret; 2315274f052SJens Axboe } 2325274f052SJens Axboe 2333a326a2cSIngo Molnar static int 234cbb7e577SJens Axboe __generic_file_splice_read(struct file *in, loff_t *ppos, 235cbb7e577SJens Axboe struct pipe_inode_info *pipe, size_t len, 236cbb7e577SJens Axboe unsigned int flags) 2375274f052SJens Axboe { 2385274f052SJens Axboe struct address_space *mapping = in->f_mapping; 2395274f052SJens Axboe unsigned int offset, nr_pages; 24016c523ddSJens Axboe struct page *pages[PIPE_BUFFERS]; 2415274f052SJens Axboe struct page *page; 24216c523ddSJens Axboe pgoff_t index; 2437480a904SJens Axboe int i, error; 2445274f052SJens Axboe 245cbb7e577SJens Axboe index = *ppos >> PAGE_CACHE_SHIFT; 246cbb7e577SJens Axboe offset = *ppos & ~PAGE_CACHE_MASK; 2475274f052SJens Axboe nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2485274f052SJens Axboe 2495274f052SJens Axboe if (nr_pages > PIPE_BUFFERS) 2505274f052SJens Axboe nr_pages = PIPE_BUFFERS; 2515274f052SJens Axboe 2525274f052SJens Axboe /* 25373d62d83SIngo Molnar * Initiate read-ahead on this page range. however, don't call into 2540b749ce3SJens Axboe * read-ahead if this is a non-zero offset (we are likely doing small 2550b749ce3SJens Axboe * chunk splice and the page is already there) for a single page. 2565274f052SJens Axboe */ 2570b749ce3SJens Axboe if (!offset || nr_pages > 1) 2585274f052SJens Axboe do_page_cache_readahead(mapping, in, index, nr_pages); 2595274f052SJens Axboe 2605274f052SJens Axboe /* 26173d62d83SIngo Molnar * Now fill in the holes: 2625274f052SJens Axboe */ 2637480a904SJens Axboe error = 0; 26416c523ddSJens Axboe for (i = 0; i < nr_pages; i++, index++) { 2657480a904SJens Axboe find_page: 2665274f052SJens Axboe /* 2677480a904SJens Axboe * lookup the page for this index 2685274f052SJens Axboe */ 2697480a904SJens Axboe page = find_get_page(mapping, index); 2707480a904SJens Axboe if (!page) { 2717480a904SJens Axboe /* 2727480a904SJens Axboe * If in nonblock mode then dont block on 2737480a904SJens Axboe * readpage (we've kicked readahead so there 2747480a904SJens Axboe * will be asynchronous progress): 2757480a904SJens Axboe */ 2767480a904SJens Axboe if (flags & SPLICE_F_NONBLOCK) 2777480a904SJens Axboe break; 2787480a904SJens Axboe 2797480a904SJens Axboe /* 2807480a904SJens Axboe * page didn't exist, allocate one 2817480a904SJens Axboe */ 2827480a904SJens Axboe page = page_cache_alloc_cold(mapping); 2835274f052SJens Axboe if (!page) 2845274f052SJens Axboe break; 2855274f052SJens Axboe 2867480a904SJens Axboe error = add_to_page_cache_lru(page, mapping, index, 2877480a904SJens Axboe mapping_gfp_mask(mapping)); 2885274f052SJens Axboe if (unlikely(error)) { 2895274f052SJens Axboe page_cache_release(page); 2905274f052SJens Axboe break; 2915274f052SJens Axboe } 2927480a904SJens Axboe 2937480a904SJens Axboe goto readpage; 2945274f052SJens Axboe } 2957480a904SJens Axboe 2967480a904SJens Axboe /* 2977480a904SJens Axboe * If the page isn't uptodate, we may need to start io on it 2987480a904SJens Axboe */ 2997480a904SJens Axboe if (!PageUptodate(page)) { 3007480a904SJens Axboe lock_page(page); 3017480a904SJens Axboe 3027480a904SJens Axboe /* 3037480a904SJens Axboe * page was truncated, stop here. if this isn't the 3047480a904SJens Axboe * first page, we'll just complete what we already 3057480a904SJens Axboe * added 3067480a904SJens Axboe */ 3077480a904SJens Axboe if (!page->mapping) { 3087480a904SJens Axboe unlock_page(page); 3097480a904SJens Axboe page_cache_release(page); 3107480a904SJens Axboe break; 3117480a904SJens Axboe } 3127480a904SJens Axboe /* 3137480a904SJens Axboe * page was already under io and is now done, great 3147480a904SJens Axboe */ 3157480a904SJens Axboe if (PageUptodate(page)) { 3167480a904SJens Axboe unlock_page(page); 3177480a904SJens Axboe goto fill_it; 3187480a904SJens Axboe } 3197480a904SJens Axboe 3207480a904SJens Axboe readpage: 3217480a904SJens Axboe /* 3227480a904SJens Axboe * need to read in the page 3237480a904SJens Axboe */ 3247480a904SJens Axboe error = mapping->a_ops->readpage(in, page); 3257480a904SJens Axboe 3267480a904SJens Axboe if (unlikely(error)) { 3277480a904SJens Axboe page_cache_release(page); 3287480a904SJens Axboe if (error == AOP_TRUNCATED_PAGE) 3297480a904SJens Axboe goto find_page; 3307480a904SJens Axboe break; 3317480a904SJens Axboe } 3327480a904SJens Axboe } 3337480a904SJens Axboe fill_it: 33416c523ddSJens Axboe pages[i] = page; 3355274f052SJens Axboe } 3365274f052SJens Axboe 33716c523ddSJens Axboe if (i) 33829e35094SLinus Torvalds return move_to_pipe(pipe, pages, i, offset, len, flags); 33916c523ddSJens Axboe 3407480a904SJens Axboe return error; 3415274f052SJens Axboe } 3425274f052SJens Axboe 34383f9135bSJens Axboe /** 34483f9135bSJens Axboe * generic_file_splice_read - splice data from file to a pipe 34583f9135bSJens Axboe * @in: file to splice from 34683f9135bSJens Axboe * @pipe: pipe to splice to 34783f9135bSJens Axboe * @len: number of bytes to splice 34883f9135bSJens Axboe * @flags: splice modifier flags 34983f9135bSJens Axboe * 35083f9135bSJens Axboe * Will read pages from given file and fill them into a pipe. 35183f9135bSJens Axboe */ 352cbb7e577SJens Axboe ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 353cbb7e577SJens Axboe struct pipe_inode_info *pipe, size_t len, 354cbb7e577SJens Axboe unsigned int flags) 3555274f052SJens Axboe { 3565274f052SJens Axboe ssize_t spliced; 3575274f052SJens Axboe int ret; 3585274f052SJens Axboe 3595274f052SJens Axboe ret = 0; 3605274f052SJens Axboe spliced = 0; 3613a326a2cSIngo Molnar 3625274f052SJens Axboe while (len) { 363cbb7e577SJens Axboe ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 3645274f052SJens Axboe 3655274f052SJens Axboe if (ret <= 0) 3665274f052SJens Axboe break; 3675274f052SJens Axboe 368cbb7e577SJens Axboe *ppos += ret; 3695274f052SJens Axboe len -= ret; 3705274f052SJens Axboe spliced += ret; 37129e35094SLinus Torvalds 37229e35094SLinus Torvalds if (!(flags & SPLICE_F_NONBLOCK)) 37329e35094SLinus Torvalds continue; 37429e35094SLinus Torvalds ret = -EAGAIN; 37529e35094SLinus Torvalds break; 3765274f052SJens Axboe } 3775274f052SJens Axboe 3785274f052SJens Axboe if (spliced) 3795274f052SJens Axboe return spliced; 3805274f052SJens Axboe 3815274f052SJens Axboe return ret; 3825274f052SJens Axboe } 3835274f052SJens Axboe 384059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_read); 385059a8f37SJens Axboe 3865274f052SJens Axboe /* 3874f6f0bd2SJens Axboe * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 3884f6f0bd2SJens Axboe * using sendpage(). 3895274f052SJens Axboe */ 3905274f052SJens Axboe static int pipe_to_sendpage(struct pipe_inode_info *info, 3915274f052SJens Axboe struct pipe_buffer *buf, struct splice_desc *sd) 3925274f052SJens Axboe { 3935274f052SJens Axboe struct file *file = sd->file; 3945274f052SJens Axboe loff_t pos = sd->pos; 3955274f052SJens Axboe unsigned int offset; 3965274f052SJens Axboe ssize_t ret; 3975274f052SJens Axboe void *ptr; 398b2b39fa4SJens Axboe int more; 3995274f052SJens Axboe 4005274f052SJens Axboe /* 40173d62d83SIngo Molnar * Sub-optimal, but we are limited by the pipe ->map. We don't 4025274f052SJens Axboe * need a kmap'ed buffer here, we just want to make sure we 4035274f052SJens Axboe * have the page pinned if the pipe page originates from the 40473d62d83SIngo Molnar * page cache. 4055274f052SJens Axboe */ 4065274f052SJens Axboe ptr = buf->ops->map(file, info, buf); 4075274f052SJens Axboe if (IS_ERR(ptr)) 4085274f052SJens Axboe return PTR_ERR(ptr); 4095274f052SJens Axboe 4105274f052SJens Axboe offset = pos & ~PAGE_CACHE_MASK; 411b2b39fa4SJens Axboe more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 4125274f052SJens Axboe 413b2b39fa4SJens Axboe ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 4145274f052SJens Axboe 4155274f052SJens Axboe buf->ops->unmap(info, buf); 4165274f052SJens Axboe if (ret == sd->len) 4175274f052SJens Axboe return 0; 4185274f052SJens Axboe 4195274f052SJens Axboe return -EIO; 4205274f052SJens Axboe } 4215274f052SJens Axboe 4225274f052SJens Axboe /* 4235274f052SJens Axboe * This is a little more tricky than the file -> pipe splicing. There are 4245274f052SJens Axboe * basically three cases: 4255274f052SJens Axboe * 4265274f052SJens Axboe * - Destination page already exists in the address space and there 4275274f052SJens Axboe * are users of it. For that case we have no other option that 4285274f052SJens Axboe * copying the data. Tough luck. 4295274f052SJens Axboe * - Destination page already exists in the address space, but there 4305274f052SJens Axboe * are no users of it. Make sure it's uptodate, then drop it. Fall 4315274f052SJens Axboe * through to last case. 4325274f052SJens Axboe * - Destination page does not exist, we can add the pipe page to 4335274f052SJens Axboe * the page cache and avoid the copy. 4345274f052SJens Axboe * 43583f9135bSJens Axboe * If asked to move pages to the output file (SPLICE_F_MOVE is set in 43683f9135bSJens Axboe * sd->flags), we attempt to migrate pages from the pipe to the output 43783f9135bSJens Axboe * file address space page cache. This is possible if no one else has 43883f9135bSJens Axboe * the pipe page referenced outside of the pipe and page cache. If 43983f9135bSJens Axboe * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 44083f9135bSJens Axboe * a new page in the output file page cache and fill/dirty that. 4415274f052SJens Axboe */ 4425274f052SJens Axboe static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 4435274f052SJens Axboe struct splice_desc *sd) 4445274f052SJens Axboe { 4455274f052SJens Axboe struct file *file = sd->file; 4465274f052SJens Axboe struct address_space *mapping = file->f_mapping; 4473e7ee3e7SJens Axboe gfp_t gfp_mask = mapping_gfp_mask(mapping); 4485274f052SJens Axboe unsigned int offset; 4495274f052SJens Axboe struct page *page; 4505274f052SJens Axboe pgoff_t index; 4515abc97aaSJens Axboe char *src; 4523e7ee3e7SJens Axboe int ret; 4535274f052SJens Axboe 4545274f052SJens Axboe /* 45549d0b21bSJens Axboe * make sure the data in this buffer is uptodate 4565274f052SJens Axboe */ 4575274f052SJens Axboe src = buf->ops->map(file, info, buf); 4585274f052SJens Axboe if (IS_ERR(src)) 4595274f052SJens Axboe return PTR_ERR(src); 4605274f052SJens Axboe 4615274f052SJens Axboe index = sd->pos >> PAGE_CACHE_SHIFT; 4625274f052SJens Axboe offset = sd->pos & ~PAGE_CACHE_MASK; 4635274f052SJens Axboe 4645abc97aaSJens Axboe /* 46573d62d83SIngo Molnar * Reuse buf page, if SPLICE_F_MOVE is set. 4665abc97aaSJens Axboe */ 4675abc97aaSJens Axboe if (sd->flags & SPLICE_F_MOVE) { 46883f9135bSJens Axboe /* 46983f9135bSJens Axboe * If steal succeeds, buf->page is now pruned from the vm 47083f9135bSJens Axboe * side (LRU and page cache) and we can reuse it. 47183f9135bSJens Axboe */ 4725abc97aaSJens Axboe if (buf->ops->steal(info, buf)) 4735abc97aaSJens Axboe goto find_page; 4745abc97aaSJens Axboe 47549d0b21bSJens Axboe /* 47649d0b21bSJens Axboe * this will also set the page locked 47749d0b21bSJens Axboe */ 4785abc97aaSJens Axboe page = buf->page; 4793e7ee3e7SJens Axboe if (add_to_page_cache(page, mapping, index, gfp_mask)) 4805abc97aaSJens Axboe goto find_page; 4813e7ee3e7SJens Axboe 4823e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 4833e7ee3e7SJens Axboe lru_cache_add(page); 4845abc97aaSJens Axboe } else { 4855274f052SJens Axboe find_page: 4865274f052SJens Axboe ret = -ENOMEM; 4873e7ee3e7SJens Axboe page = find_or_create_page(mapping, index, gfp_mask); 4885274f052SJens Axboe if (!page) 4899aefe431SDave Jones goto out_nomem; 4905274f052SJens Axboe 4915274f052SJens Axboe /* 4925274f052SJens Axboe * If the page is uptodate, it is also locked. If it isn't 4935274f052SJens Axboe * uptodate, we can mark it uptodate if we are filling the 4945274f052SJens Axboe * full page. Otherwise we need to read it in first... 4955274f052SJens Axboe */ 4965274f052SJens Axboe if (!PageUptodate(page)) { 4975274f052SJens Axboe if (sd->len < PAGE_CACHE_SIZE) { 4985274f052SJens Axboe ret = mapping->a_ops->readpage(file, page); 4995274f052SJens Axboe if (unlikely(ret)) 5005274f052SJens Axboe goto out; 5015274f052SJens Axboe 5025274f052SJens Axboe lock_page(page); 5035274f052SJens Axboe 5045274f052SJens Axboe if (!PageUptodate(page)) { 5055274f052SJens Axboe /* 50673d62d83SIngo Molnar * Page got invalidated, repeat. 5075274f052SJens Axboe */ 5085274f052SJens Axboe if (!page->mapping) { 5095274f052SJens Axboe unlock_page(page); 5105274f052SJens Axboe page_cache_release(page); 5115274f052SJens Axboe goto find_page; 5125274f052SJens Axboe } 5135274f052SJens Axboe ret = -EIO; 5145274f052SJens Axboe goto out; 5155274f052SJens Axboe } 5165274f052SJens Axboe } else { 5175274f052SJens Axboe WARN_ON(!PageLocked(page)); 5185274f052SJens Axboe SetPageUptodate(page); 5195274f052SJens Axboe } 5205274f052SJens Axboe } 5215abc97aaSJens Axboe } 5225274f052SJens Axboe 5235274f052SJens Axboe ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 5244f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5254f6f0bd2SJens Axboe page_cache_release(page); 5264f6f0bd2SJens Axboe goto find_page; 5274f6f0bd2SJens Axboe } else if (ret) 5285274f052SJens Axboe goto out; 5295274f052SJens Axboe 5303e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5315abc97aaSJens Axboe char *dst = kmap_atomic(page, KM_USER0); 5325abc97aaSJens Axboe 5335274f052SJens Axboe memcpy(dst + offset, src + buf->offset, sd->len); 5345274f052SJens Axboe flush_dcache_page(page); 5355274f052SJens Axboe kunmap_atomic(dst, KM_USER0); 5365abc97aaSJens Axboe } 5375274f052SJens Axboe 5385274f052SJens Axboe ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 5394f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5404f6f0bd2SJens Axboe page_cache_release(page); 5414f6f0bd2SJens Axboe goto find_page; 5424f6f0bd2SJens Axboe } else if (ret) 5435274f052SJens Axboe goto out; 5445274f052SJens Axboe 545c7f21e4fSJens Axboe mark_page_accessed(page); 5464f6f0bd2SJens Axboe balance_dirty_pages_ratelimited(mapping); 5475274f052SJens Axboe out: 5483e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5495274f052SJens Axboe page_cache_release(page); 5504f6f0bd2SJens Axboe unlock_page(page); 5514f6f0bd2SJens Axboe } 5529aefe431SDave Jones out_nomem: 5535274f052SJens Axboe buf->ops->unmap(info, buf); 5545274f052SJens Axboe return ret; 5555274f052SJens Axboe } 5565274f052SJens Axboe 5575274f052SJens Axboe typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 5585274f052SJens Axboe struct splice_desc *); 5595274f052SJens Axboe 56083f9135bSJens Axboe /* 56183f9135bSJens Axboe * Pipe input worker. Most of this logic works like a regular pipe, the 56283f9135bSJens Axboe * key here is the 'actor' worker passed in that actually moves the data 56383f9135bSJens Axboe * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 56483f9135bSJens Axboe */ 5653a326a2cSIngo Molnar static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 566cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags, 5675274f052SJens Axboe splice_actor *actor) 5685274f052SJens Axboe { 5695274f052SJens Axboe int ret, do_wakeup, err; 5705274f052SJens Axboe struct splice_desc sd; 5715274f052SJens Axboe 5725274f052SJens Axboe ret = 0; 5735274f052SJens Axboe do_wakeup = 0; 5745274f052SJens Axboe 5755274f052SJens Axboe sd.total_len = len; 5765274f052SJens Axboe sd.flags = flags; 5775274f052SJens Axboe sd.file = out; 578cbb7e577SJens Axboe sd.pos = *ppos; 5795274f052SJens Axboe 5803a326a2cSIngo Molnar if (pipe->inode) 5813a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 5825274f052SJens Axboe 5835274f052SJens Axboe for (;;) { 5846f767b04SJens Axboe if (pipe->nrbufs) { 5856f767b04SJens Axboe struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 5865274f052SJens Axboe struct pipe_buf_operations *ops = buf->ops; 5875274f052SJens Axboe 5885274f052SJens Axboe sd.len = buf->len; 5895274f052SJens Axboe if (sd.len > sd.total_len) 5905274f052SJens Axboe sd.len = sd.total_len; 5915274f052SJens Axboe 5923a326a2cSIngo Molnar err = actor(pipe, buf, &sd); 5935274f052SJens Axboe if (err) { 5945274f052SJens Axboe if (!ret && err != -ENODATA) 5955274f052SJens Axboe ret = err; 5965274f052SJens Axboe 5975274f052SJens Axboe break; 5985274f052SJens Axboe } 5995274f052SJens Axboe 6005274f052SJens Axboe ret += sd.len; 6015274f052SJens Axboe buf->offset += sd.len; 6025274f052SJens Axboe buf->len -= sd.len; 60373d62d83SIngo Molnar 6045274f052SJens Axboe if (!buf->len) { 6055274f052SJens Axboe buf->ops = NULL; 6063a326a2cSIngo Molnar ops->release(pipe, buf); 6076f767b04SJens Axboe pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); 6086f767b04SJens Axboe pipe->nrbufs--; 6096f767b04SJens Axboe if (pipe->inode) 6105274f052SJens Axboe do_wakeup = 1; 6115274f052SJens Axboe } 6125274f052SJens Axboe 6135274f052SJens Axboe sd.pos += sd.len; 6145274f052SJens Axboe sd.total_len -= sd.len; 6155274f052SJens Axboe if (!sd.total_len) 6165274f052SJens Axboe break; 6175274f052SJens Axboe } 6185274f052SJens Axboe 6196f767b04SJens Axboe if (pipe->nrbufs) 6205274f052SJens Axboe continue; 6213a326a2cSIngo Molnar if (!pipe->writers) 6225274f052SJens Axboe break; 6233a326a2cSIngo Molnar if (!pipe->waiting_writers) { 6245274f052SJens Axboe if (ret) 6255274f052SJens Axboe break; 6265274f052SJens Axboe } 6275274f052SJens Axboe 62829e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 62929e35094SLinus Torvalds if (!ret) 63029e35094SLinus Torvalds ret = -EAGAIN; 63129e35094SLinus Torvalds break; 63229e35094SLinus Torvalds } 63329e35094SLinus Torvalds 6345274f052SJens Axboe if (signal_pending(current)) { 6355274f052SJens Axboe if (!ret) 6365274f052SJens Axboe ret = -ERESTARTSYS; 6375274f052SJens Axboe break; 6385274f052SJens Axboe } 6395274f052SJens Axboe 6405274f052SJens Axboe if (do_wakeup) { 641c0bd1f65SJens Axboe smp_mb(); 6423a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6433a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 6443a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6455274f052SJens Axboe do_wakeup = 0; 6465274f052SJens Axboe } 6475274f052SJens Axboe 6483a326a2cSIngo Molnar pipe_wait(pipe); 6495274f052SJens Axboe } 6505274f052SJens Axboe 6513a326a2cSIngo Molnar if (pipe->inode) 6523a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 6535274f052SJens Axboe 6545274f052SJens Axboe if (do_wakeup) { 655c0bd1f65SJens Axboe smp_mb(); 6563a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6573a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 6583a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6595274f052SJens Axboe } 6605274f052SJens Axboe 6615274f052SJens Axboe return ret; 6625274f052SJens Axboe } 6635274f052SJens Axboe 66483f9135bSJens Axboe /** 66583f9135bSJens Axboe * generic_file_splice_write - splice data from a pipe to a file 6663a326a2cSIngo Molnar * @pipe: pipe info 66783f9135bSJens Axboe * @out: file to write to 66883f9135bSJens Axboe * @len: number of bytes to splice 66983f9135bSJens Axboe * @flags: splice modifier flags 67083f9135bSJens Axboe * 67183f9135bSJens Axboe * Will either move or copy pages (determined by @flags options) from 67283f9135bSJens Axboe * the given pipe inode to the given file. 67383f9135bSJens Axboe * 67483f9135bSJens Axboe */ 6753a326a2cSIngo Molnar ssize_t 6763a326a2cSIngo Molnar generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 677cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags) 6785274f052SJens Axboe { 6794f6f0bd2SJens Axboe struct address_space *mapping = out->f_mapping; 6803a326a2cSIngo Molnar ssize_t ret; 6813a326a2cSIngo Molnar 682cbb7e577SJens Axboe ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 6834f6f0bd2SJens Axboe 6844f6f0bd2SJens Axboe /* 68573d62d83SIngo Molnar * If file or inode is SYNC and we actually wrote some data, sync it. 6864f6f0bd2SJens Axboe */ 6874f6f0bd2SJens Axboe if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 6884f6f0bd2SJens Axboe && ret > 0) { 6894f6f0bd2SJens Axboe struct inode *inode = mapping->host; 6904f6f0bd2SJens Axboe int err; 6914f6f0bd2SJens Axboe 6924f6f0bd2SJens Axboe mutex_lock(&inode->i_mutex); 6934f6f0bd2SJens Axboe err = generic_osync_inode(mapping->host, mapping, 6944f6f0bd2SJens Axboe OSYNC_METADATA|OSYNC_DATA); 6954f6f0bd2SJens Axboe mutex_unlock(&inode->i_mutex); 6964f6f0bd2SJens Axboe 6974f6f0bd2SJens Axboe if (err) 6984f6f0bd2SJens Axboe ret = err; 6994f6f0bd2SJens Axboe } 7004f6f0bd2SJens Axboe 7014f6f0bd2SJens Axboe return ret; 7025274f052SJens Axboe } 7035274f052SJens Axboe 704059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_write); 705059a8f37SJens Axboe 70683f9135bSJens Axboe /** 70783f9135bSJens Axboe * generic_splice_sendpage - splice data from a pipe to a socket 70883f9135bSJens Axboe * @inode: pipe inode 70983f9135bSJens Axboe * @out: socket to write to 71083f9135bSJens Axboe * @len: number of bytes to splice 71183f9135bSJens Axboe * @flags: splice modifier flags 71283f9135bSJens Axboe * 71383f9135bSJens Axboe * Will send @len bytes from the pipe to a network socket. No data copying 71483f9135bSJens Axboe * is involved. 71583f9135bSJens Axboe * 71683f9135bSJens Axboe */ 7173a326a2cSIngo Molnar ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 718cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags) 7195274f052SJens Axboe { 720cbb7e577SJens Axboe return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 7215274f052SJens Axboe } 7225274f052SJens Axboe 723059a8f37SJens Axboe EXPORT_SYMBOL(generic_splice_sendpage); 724a0f06780SJeff Garzik 72583f9135bSJens Axboe /* 72683f9135bSJens Axboe * Attempt to initiate a splice from pipe to file. 72783f9135bSJens Axboe */ 7283a326a2cSIngo Molnar static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 729cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags) 7305274f052SJens Axboe { 7315274f052SJens Axboe int ret; 7325274f052SJens Axboe 73349570e9bSJens Axboe if (unlikely(!out->f_op || !out->f_op->splice_write)) 7345274f052SJens Axboe return -EINVAL; 7355274f052SJens Axboe 73649570e9bSJens Axboe if (unlikely(!(out->f_mode & FMODE_WRITE))) 7375274f052SJens Axboe return -EBADF; 7385274f052SJens Axboe 739cbb7e577SJens Axboe ret = rw_verify_area(WRITE, out, ppos, len); 7405274f052SJens Axboe if (unlikely(ret < 0)) 7415274f052SJens Axboe return ret; 7425274f052SJens Axboe 743cbb7e577SJens Axboe return out->f_op->splice_write(pipe, out, ppos, len, flags); 7445274f052SJens Axboe } 7455274f052SJens Axboe 74683f9135bSJens Axboe /* 74783f9135bSJens Axboe * Attempt to initiate a splice from a file to a pipe. 74883f9135bSJens Axboe */ 749cbb7e577SJens Axboe static long do_splice_to(struct file *in, loff_t *ppos, 750cbb7e577SJens Axboe struct pipe_inode_info *pipe, size_t len, 751cbb7e577SJens Axboe unsigned int flags) 7525274f052SJens Axboe { 753cbb7e577SJens Axboe loff_t isize, left; 7545274f052SJens Axboe int ret; 7555274f052SJens Axboe 75649570e9bSJens Axboe if (unlikely(!in->f_op || !in->f_op->splice_read)) 7575274f052SJens Axboe return -EINVAL; 7585274f052SJens Axboe 75949570e9bSJens Axboe if (unlikely(!(in->f_mode & FMODE_READ))) 7605274f052SJens Axboe return -EBADF; 7615274f052SJens Axboe 762cbb7e577SJens Axboe ret = rw_verify_area(READ, in, ppos, len); 7635274f052SJens Axboe if (unlikely(ret < 0)) 7645274f052SJens Axboe return ret; 7655274f052SJens Axboe 7665274f052SJens Axboe isize = i_size_read(in->f_mapping->host); 767cbb7e577SJens Axboe if (unlikely(*ppos >= isize)) 7685274f052SJens Axboe return 0; 7695274f052SJens Axboe 770cbb7e577SJens Axboe left = isize - *ppos; 77149570e9bSJens Axboe if (unlikely(left < len)) 7725274f052SJens Axboe len = left; 7735274f052SJens Axboe 774cbb7e577SJens Axboe return in->f_op->splice_read(in, ppos, pipe, len, flags); 7755274f052SJens Axboe } 7765274f052SJens Axboe 777cbb7e577SJens Axboe long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 778cbb7e577SJens Axboe size_t len, unsigned int flags) 779b92ce558SJens Axboe { 780b92ce558SJens Axboe struct pipe_inode_info *pipe; 781b92ce558SJens Axboe long ret, bytes; 782cbb7e577SJens Axboe loff_t out_off; 783b92ce558SJens Axboe umode_t i_mode; 784b92ce558SJens Axboe int i; 785b92ce558SJens Axboe 786b92ce558SJens Axboe /* 787b92ce558SJens Axboe * We require the input being a regular file, as we don't want to 788b92ce558SJens Axboe * randomly drop data for eg socket -> socket splicing. Use the 789b92ce558SJens Axboe * piped splicing for that! 790b92ce558SJens Axboe */ 791b92ce558SJens Axboe i_mode = in->f_dentry->d_inode->i_mode; 792b92ce558SJens Axboe if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 793b92ce558SJens Axboe return -EINVAL; 794b92ce558SJens Axboe 795b92ce558SJens Axboe /* 796b92ce558SJens Axboe * neither in nor out is a pipe, setup an internal pipe attached to 797b92ce558SJens Axboe * 'out' and transfer the wanted data from 'in' to 'out' through that 798b92ce558SJens Axboe */ 799b92ce558SJens Axboe pipe = current->splice_pipe; 80049570e9bSJens Axboe if (unlikely(!pipe)) { 801b92ce558SJens Axboe pipe = alloc_pipe_info(NULL); 802b92ce558SJens Axboe if (!pipe) 803b92ce558SJens Axboe return -ENOMEM; 804b92ce558SJens Axboe 805b92ce558SJens Axboe /* 806b92ce558SJens Axboe * We don't have an immediate reader, but we'll read the stuff 807b92ce558SJens Axboe * out of the pipe right after the move_to_pipe(). So set 808b92ce558SJens Axboe * PIPE_READERS appropriately. 809b92ce558SJens Axboe */ 810b92ce558SJens Axboe pipe->readers = 1; 811b92ce558SJens Axboe 812b92ce558SJens Axboe current->splice_pipe = pipe; 813b92ce558SJens Axboe } 814b92ce558SJens Axboe 815b92ce558SJens Axboe /* 81673d62d83SIngo Molnar * Do the splice. 817b92ce558SJens Axboe */ 818b92ce558SJens Axboe ret = 0; 819b92ce558SJens Axboe bytes = 0; 820cbb7e577SJens Axboe out_off = 0; 821b92ce558SJens Axboe 822b92ce558SJens Axboe while (len) { 823b92ce558SJens Axboe size_t read_len, max_read_len; 824b92ce558SJens Axboe 825b92ce558SJens Axboe /* 826b92ce558SJens Axboe * Do at most PIPE_BUFFERS pages worth of transfer: 827b92ce558SJens Axboe */ 828b92ce558SJens Axboe max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 829b92ce558SJens Axboe 830cbb7e577SJens Axboe ret = do_splice_to(in, ppos, pipe, max_read_len, flags); 831b92ce558SJens Axboe if (unlikely(ret < 0)) 832b92ce558SJens Axboe goto out_release; 833b92ce558SJens Axboe 834b92ce558SJens Axboe read_len = ret; 835b92ce558SJens Axboe 836b92ce558SJens Axboe /* 837b92ce558SJens Axboe * NOTE: nonblocking mode only applies to the input. We 838b92ce558SJens Axboe * must not do the output in nonblocking mode as then we 839b92ce558SJens Axboe * could get stuck data in the internal pipe: 840b92ce558SJens Axboe */ 841cbb7e577SJens Axboe ret = do_splice_from(pipe, out, &out_off, read_len, 842b92ce558SJens Axboe flags & ~SPLICE_F_NONBLOCK); 843b92ce558SJens Axboe if (unlikely(ret < 0)) 844b92ce558SJens Axboe goto out_release; 845b92ce558SJens Axboe 846b92ce558SJens Axboe bytes += ret; 847b92ce558SJens Axboe len -= ret; 848b92ce558SJens Axboe 849b92ce558SJens Axboe /* 850b92ce558SJens Axboe * In nonblocking mode, if we got back a short read then 851b92ce558SJens Axboe * that was due to either an IO error or due to the 852b92ce558SJens Axboe * pagecache entry not being there. In the IO error case 853b92ce558SJens Axboe * the _next_ splice attempt will produce a clean IO error 854b92ce558SJens Axboe * return value (not a short read), so in both cases it's 855b92ce558SJens Axboe * correct to break out of the loop here: 856b92ce558SJens Axboe */ 857b92ce558SJens Axboe if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) 858b92ce558SJens Axboe break; 859b92ce558SJens Axboe } 860b92ce558SJens Axboe 861b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 862b92ce558SJens Axboe 863b92ce558SJens Axboe return bytes; 864b92ce558SJens Axboe 865b92ce558SJens Axboe out_release: 866b92ce558SJens Axboe /* 867b92ce558SJens Axboe * If we did an incomplete transfer we must release 868b92ce558SJens Axboe * the pipe buffers in question: 869b92ce558SJens Axboe */ 870b92ce558SJens Axboe for (i = 0; i < PIPE_BUFFERS; i++) { 871b92ce558SJens Axboe struct pipe_buffer *buf = pipe->bufs + i; 872b92ce558SJens Axboe 873b92ce558SJens Axboe if (buf->ops) { 874b92ce558SJens Axboe buf->ops->release(pipe, buf); 875b92ce558SJens Axboe buf->ops = NULL; 876b92ce558SJens Axboe } 877b92ce558SJens Axboe } 878b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 879b92ce558SJens Axboe 880b92ce558SJens Axboe /* 881b92ce558SJens Axboe * If we transferred some data, return the number of bytes: 882b92ce558SJens Axboe */ 883b92ce558SJens Axboe if (bytes > 0) 884b92ce558SJens Axboe return bytes; 885b92ce558SJens Axboe 886b92ce558SJens Axboe return ret; 887b92ce558SJens Axboe } 888b92ce558SJens Axboe 889b92ce558SJens Axboe EXPORT_SYMBOL(do_splice_direct); 890b92ce558SJens Axboe 89183f9135bSJens Axboe /* 89283f9135bSJens Axboe * Determine where to splice to/from. 89383f9135bSJens Axboe */ 894529565dcSIngo Molnar static long do_splice(struct file *in, loff_t __user *off_in, 895529565dcSIngo Molnar struct file *out, loff_t __user *off_out, 896529565dcSIngo Molnar size_t len, unsigned int flags) 8975274f052SJens Axboe { 8983a326a2cSIngo Molnar struct pipe_inode_info *pipe; 899cbb7e577SJens Axboe loff_t offset, *off; 9005274f052SJens Axboe 9013a326a2cSIngo Molnar pipe = in->f_dentry->d_inode->i_pipe; 902529565dcSIngo Molnar if (pipe) { 903529565dcSIngo Molnar if (off_in) 904529565dcSIngo Molnar return -ESPIPE; 905b92ce558SJens Axboe if (off_out) { 906b92ce558SJens Axboe if (out->f_op->llseek == no_llseek) 907b92ce558SJens Axboe return -EINVAL; 908cbb7e577SJens Axboe if (copy_from_user(&offset, off_out, sizeof(loff_t))) 909b92ce558SJens Axboe return -EFAULT; 910cbb7e577SJens Axboe off = &offset; 911cbb7e577SJens Axboe } else 912cbb7e577SJens Axboe off = &out->f_pos; 913529565dcSIngo Molnar 914cbb7e577SJens Axboe return do_splice_from(pipe, out, off, len, flags); 915529565dcSIngo Molnar } 9165274f052SJens Axboe 9173a326a2cSIngo Molnar pipe = out->f_dentry->d_inode->i_pipe; 918529565dcSIngo Molnar if (pipe) { 919529565dcSIngo Molnar if (off_out) 920529565dcSIngo Molnar return -ESPIPE; 921b92ce558SJens Axboe if (off_in) { 922b92ce558SJens Axboe if (in->f_op->llseek == no_llseek) 923b92ce558SJens Axboe return -EINVAL; 924cbb7e577SJens Axboe if (copy_from_user(&offset, off_in, sizeof(loff_t))) 925b92ce558SJens Axboe return -EFAULT; 926cbb7e577SJens Axboe off = &offset; 927cbb7e577SJens Axboe } else 928cbb7e577SJens Axboe off = &in->f_pos; 929529565dcSIngo Molnar 930cbb7e577SJens Axboe return do_splice_to(in, off, pipe, len, flags); 931529565dcSIngo Molnar } 9325274f052SJens Axboe 9335274f052SJens Axboe return -EINVAL; 9345274f052SJens Axboe } 9355274f052SJens Axboe 936529565dcSIngo Molnar asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 937529565dcSIngo Molnar int fd_out, loff_t __user *off_out, 938529565dcSIngo Molnar size_t len, unsigned int flags) 9395274f052SJens Axboe { 9405274f052SJens Axboe long error; 9415274f052SJens Axboe struct file *in, *out; 9425274f052SJens Axboe int fput_in, fput_out; 9435274f052SJens Axboe 9445274f052SJens Axboe if (unlikely(!len)) 9455274f052SJens Axboe return 0; 9465274f052SJens Axboe 9475274f052SJens Axboe error = -EBADF; 948529565dcSIngo Molnar in = fget_light(fd_in, &fput_in); 9495274f052SJens Axboe if (in) { 9505274f052SJens Axboe if (in->f_mode & FMODE_READ) { 951529565dcSIngo Molnar out = fget_light(fd_out, &fput_out); 9525274f052SJens Axboe if (out) { 9535274f052SJens Axboe if (out->f_mode & FMODE_WRITE) 954529565dcSIngo Molnar error = do_splice(in, off_in, 955529565dcSIngo Molnar out, off_out, 956529565dcSIngo Molnar len, flags); 9575274f052SJens Axboe fput_light(out, fput_out); 9585274f052SJens Axboe } 9595274f052SJens Axboe } 9605274f052SJens Axboe 9615274f052SJens Axboe fput_light(in, fput_in); 9625274f052SJens Axboe } 9635274f052SJens Axboe 9645274f052SJens Axboe return error; 9655274f052SJens Axboe } 966