15274f052SJens Axboe /* 25274f052SJens Axboe * "splice": joining two ropes together by interweaving their strands. 35274f052SJens Axboe * 45274f052SJens Axboe * This is the "extended pipe" functionality, where a pipe is used as 55274f052SJens Axboe * an arbitrary in-memory buffer. Think of a pipe as a small kernel 65274f052SJens Axboe * buffer that you can use to transfer data from one end to the other. 75274f052SJens Axboe * 85274f052SJens Axboe * The traditional unix read/write is extended with a "splice()" operation 95274f052SJens Axboe * that transfers data buffers to or from a pipe buffer. 105274f052SJens Axboe * 115274f052SJens Axboe * Named by Larry McVoy, original implementation from Linus, extended by 125274f052SJens Axboe * Jens to support splicing to files and fixing the initial implementation 135274f052SJens Axboe * bugs. 145274f052SJens Axboe * 155274f052SJens Axboe * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 165274f052SJens Axboe * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 175274f052SJens Axboe * 185274f052SJens Axboe */ 195274f052SJens Axboe #include <linux/fs.h> 205274f052SJens Axboe #include <linux/file.h> 215274f052SJens Axboe #include <linux/pagemap.h> 225274f052SJens Axboe #include <linux/pipe_fs_i.h> 235274f052SJens Axboe #include <linux/mm_inline.h> 245abc97aaSJens Axboe #include <linux/swap.h> 254f6f0bd2SJens Axboe #include <linux/writeback.h> 264f6f0bd2SJens Axboe #include <linux/buffer_head.h> 27a0f06780SJeff Garzik #include <linux/module.h> 284f6f0bd2SJens Axboe #include <linux/syscalls.h> 295274f052SJens Axboe 305274f052SJens Axboe /* 315274f052SJens Axboe * Passed to the actors 325274f052SJens Axboe */ 335274f052SJens Axboe struct splice_desc { 345274f052SJens Axboe unsigned int len, total_len; /* current and remaining length */ 355274f052SJens Axboe unsigned int flags; /* splice flags */ 365274f052SJens Axboe struct file *file; /* file to read/write */ 375274f052SJens Axboe loff_t pos; /* file position */ 385274f052SJens Axboe }; 395274f052SJens Axboe 4083f9135bSJens Axboe /* 4183f9135bSJens Axboe * Attempt to steal a page from a pipe buffer. This should perhaps go into 4283f9135bSJens Axboe * a vm helper function, it's already simplified quite a bit by the 4383f9135bSJens Axboe * addition of remove_mapping(). If success is returned, the caller may 4483f9135bSJens Axboe * attempt to reuse this page for another destination. 4583f9135bSJens Axboe */ 465abc97aaSJens Axboe static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 475abc97aaSJens Axboe struct pipe_buffer *buf) 485abc97aaSJens Axboe { 495abc97aaSJens Axboe struct page *page = buf->page; 504f6f0bd2SJens Axboe struct address_space *mapping = page_mapping(page); 515abc97aaSJens Axboe 525abc97aaSJens Axboe WARN_ON(!PageLocked(page)); 535abc97aaSJens Axboe WARN_ON(!PageUptodate(page)); 545abc97aaSJens Axboe 55ad8d6f0aSJens Axboe /* 56ad8d6f0aSJens Axboe * At least for ext2 with nobh option, we need to wait on writeback 57ad8d6f0aSJens Axboe * completing on this page, since we'll remove it from the pagecache. 58ad8d6f0aSJens Axboe * Otherwise truncate wont wait on the page, allowing the disk 59ad8d6f0aSJens Axboe * blocks to be reused by someone else before we actually wrote our 60ad8d6f0aSJens Axboe * data to them. fs corruption ensues. 61ad8d6f0aSJens Axboe */ 62ad8d6f0aSJens Axboe wait_on_page_writeback(page); 63ad8d6f0aSJens Axboe 644f6f0bd2SJens Axboe if (PagePrivate(page)) 654f6f0bd2SJens Axboe try_to_release_page(page, mapping_gfp_mask(mapping)); 664f6f0bd2SJens Axboe 674f6f0bd2SJens Axboe if (!remove_mapping(mapping, page)) 685abc97aaSJens Axboe return 1; 695abc97aaSJens Axboe 703e7ee3e7SJens Axboe buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 715abc97aaSJens Axboe return 0; 725abc97aaSJens Axboe } 735abc97aaSJens Axboe 745274f052SJens Axboe static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 755274f052SJens Axboe struct pipe_buffer *buf) 765274f052SJens Axboe { 775274f052SJens Axboe page_cache_release(buf->page); 785274f052SJens Axboe buf->page = NULL; 793e7ee3e7SJens Axboe buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 805274f052SJens Axboe } 815274f052SJens Axboe 825274f052SJens Axboe static void *page_cache_pipe_buf_map(struct file *file, 835274f052SJens Axboe struct pipe_inode_info *info, 845274f052SJens Axboe struct pipe_buffer *buf) 855274f052SJens Axboe { 865274f052SJens Axboe struct page *page = buf->page; 8749d0b21bSJens Axboe int err; 885274f052SJens Axboe 895274f052SJens Axboe if (!PageUptodate(page)) { 9049d0b21bSJens Axboe lock_page(page); 915274f052SJens Axboe 9249d0b21bSJens Axboe /* 9349d0b21bSJens Axboe * Page got truncated/unhashed. This will cause a 0-byte 9449d0b21bSJens Axboe * splice, if this is the first page 9549d0b21bSJens Axboe */ 965274f052SJens Axboe if (!page->mapping) { 9749d0b21bSJens Axboe err = -ENODATA; 9849d0b21bSJens Axboe goto error; 995274f052SJens Axboe } 1005274f052SJens Axboe 10149d0b21bSJens Axboe /* 10249d0b21bSJens Axboe * uh oh, read-error from disk 10349d0b21bSJens Axboe */ 10449d0b21bSJens Axboe if (!PageUptodate(page)) { 10549d0b21bSJens Axboe err = -EIO; 10649d0b21bSJens Axboe goto error; 10749d0b21bSJens Axboe } 10849d0b21bSJens Axboe 10949d0b21bSJens Axboe /* 11049d0b21bSJens Axboe * page is ok afterall, fall through to mapping 11149d0b21bSJens Axboe */ 11249d0b21bSJens Axboe unlock_page(page); 11349d0b21bSJens Axboe } 11449d0b21bSJens Axboe 11549d0b21bSJens Axboe return kmap(page); 11649d0b21bSJens Axboe error: 11749d0b21bSJens Axboe unlock_page(page); 11849d0b21bSJens Axboe return ERR_PTR(err); 1195274f052SJens Axboe } 1205274f052SJens Axboe 1215274f052SJens Axboe static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 1225274f052SJens Axboe struct pipe_buffer *buf) 1235274f052SJens Axboe { 1245274f052SJens Axboe kunmap(buf->page); 1255274f052SJens Axboe } 1265274f052SJens Axboe 1275274f052SJens Axboe static struct pipe_buf_operations page_cache_pipe_buf_ops = { 1285274f052SJens Axboe .can_merge = 0, 1295274f052SJens Axboe .map = page_cache_pipe_buf_map, 1305274f052SJens Axboe .unmap = page_cache_pipe_buf_unmap, 1315274f052SJens Axboe .release = page_cache_pipe_buf_release, 1325abc97aaSJens Axboe .steal = page_cache_pipe_buf_steal, 1335274f052SJens Axboe }; 1345274f052SJens Axboe 13583f9135bSJens Axboe /* 13683f9135bSJens Axboe * Pipe output worker. This sets up our pipe format with the page cache 13783f9135bSJens Axboe * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 13883f9135bSJens Axboe */ 1393a326a2cSIngo Molnar static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 1405274f052SJens Axboe int nr_pages, unsigned long offset, 14129e35094SLinus Torvalds unsigned long len, unsigned int flags) 1425274f052SJens Axboe { 1435274f052SJens Axboe int ret, do_wakeup, i; 1445274f052SJens Axboe 1455274f052SJens Axboe ret = 0; 1465274f052SJens Axboe do_wakeup = 0; 1475274f052SJens Axboe i = 0; 1485274f052SJens Axboe 1493a326a2cSIngo Molnar if (pipe->inode) 1503a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 1515274f052SJens Axboe 1525274f052SJens Axboe for (;;) { 1533a326a2cSIngo Molnar if (!pipe->readers) { 1545274f052SJens Axboe send_sig(SIGPIPE, current, 0); 1555274f052SJens Axboe if (!ret) 1565274f052SJens Axboe ret = -EPIPE; 1575274f052SJens Axboe break; 1585274f052SJens Axboe } 1595274f052SJens Axboe 1606f767b04SJens Axboe if (pipe->nrbufs < PIPE_BUFFERS) { 1616f767b04SJens Axboe int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 1623a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + newbuf; 1635274f052SJens Axboe struct page *page = pages[i++]; 1645274f052SJens Axboe unsigned long this_len; 1655274f052SJens Axboe 1665274f052SJens Axboe this_len = PAGE_CACHE_SIZE - offset; 1675274f052SJens Axboe if (this_len > len) 1685274f052SJens Axboe this_len = len; 1695274f052SJens Axboe 1705274f052SJens Axboe buf->page = page; 1715274f052SJens Axboe buf->offset = offset; 1725274f052SJens Axboe buf->len = this_len; 1735274f052SJens Axboe buf->ops = &page_cache_pipe_buf_ops; 1746f767b04SJens Axboe pipe->nrbufs++; 1756f767b04SJens Axboe if (pipe->inode) 1765274f052SJens Axboe do_wakeup = 1; 1775274f052SJens Axboe 1785274f052SJens Axboe ret += this_len; 1795274f052SJens Axboe len -= this_len; 1805274f052SJens Axboe offset = 0; 1815274f052SJens Axboe if (!--nr_pages) 1825274f052SJens Axboe break; 1835274f052SJens Axboe if (!len) 1845274f052SJens Axboe break; 1856f767b04SJens Axboe if (pipe->nrbufs < PIPE_BUFFERS) 1865274f052SJens Axboe continue; 1875274f052SJens Axboe 1885274f052SJens Axboe break; 1895274f052SJens Axboe } 1905274f052SJens Axboe 19129e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 19229e35094SLinus Torvalds if (!ret) 19329e35094SLinus Torvalds ret = -EAGAIN; 19429e35094SLinus Torvalds break; 19529e35094SLinus Torvalds } 19629e35094SLinus Torvalds 1975274f052SJens Axboe if (signal_pending(current)) { 1985274f052SJens Axboe if (!ret) 1995274f052SJens Axboe ret = -ERESTARTSYS; 2005274f052SJens Axboe break; 2015274f052SJens Axboe } 2025274f052SJens Axboe 2035274f052SJens Axboe if (do_wakeup) { 204c0bd1f65SJens Axboe smp_mb(); 2053a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2063a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 2073a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2085274f052SJens Axboe do_wakeup = 0; 2095274f052SJens Axboe } 2105274f052SJens Axboe 2113a326a2cSIngo Molnar pipe->waiting_writers++; 2123a326a2cSIngo Molnar pipe_wait(pipe); 2133a326a2cSIngo Molnar pipe->waiting_writers--; 2145274f052SJens Axboe } 2155274f052SJens Axboe 2163a326a2cSIngo Molnar if (pipe->inode) 2173a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 2185274f052SJens Axboe 2195274f052SJens Axboe if (do_wakeup) { 220c0bd1f65SJens Axboe smp_mb(); 2213a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2223a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 2233a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2245274f052SJens Axboe } 2255274f052SJens Axboe 2265274f052SJens Axboe while (i < nr_pages) 2275274f052SJens Axboe page_cache_release(pages[i++]); 2285274f052SJens Axboe 2295274f052SJens Axboe return ret; 2305274f052SJens Axboe } 2315274f052SJens Axboe 2323a326a2cSIngo Molnar static int 2333a326a2cSIngo Molnar __generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 23429e35094SLinus Torvalds size_t len, unsigned int flags) 2355274f052SJens Axboe { 2365274f052SJens Axboe struct address_space *mapping = in->f_mapping; 2375274f052SJens Axboe unsigned int offset, nr_pages; 23816c523ddSJens Axboe struct page *pages[PIPE_BUFFERS]; 2395274f052SJens Axboe struct page *page; 24016c523ddSJens Axboe pgoff_t index; 2417480a904SJens Axboe int i, error; 2425274f052SJens Axboe 2435274f052SJens Axboe index = in->f_pos >> PAGE_CACHE_SHIFT; 2445274f052SJens Axboe offset = in->f_pos & ~PAGE_CACHE_MASK; 2455274f052SJens Axboe nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2465274f052SJens Axboe 2475274f052SJens Axboe if (nr_pages > PIPE_BUFFERS) 2485274f052SJens Axboe nr_pages = PIPE_BUFFERS; 2495274f052SJens Axboe 2505274f052SJens Axboe /* 2510b749ce3SJens Axboe * initiate read-ahead on this page range. however, don't call into 2520b749ce3SJens Axboe * read-ahead if this is a non-zero offset (we are likely doing small 2530b749ce3SJens Axboe * chunk splice and the page is already there) for a single page. 2545274f052SJens Axboe */ 2550b749ce3SJens Axboe if (!offset || nr_pages > 1) 2565274f052SJens Axboe do_page_cache_readahead(mapping, in, index, nr_pages); 2575274f052SJens Axboe 2585274f052SJens Axboe /* 2595274f052SJens Axboe * now fill in the holes 2605274f052SJens Axboe */ 2617480a904SJens Axboe error = 0; 26216c523ddSJens Axboe for (i = 0; i < nr_pages; i++, index++) { 2637480a904SJens Axboe find_page: 2645274f052SJens Axboe /* 2657480a904SJens Axboe * lookup the page for this index 2665274f052SJens Axboe */ 2677480a904SJens Axboe page = find_get_page(mapping, index); 2687480a904SJens Axboe if (!page) { 2697480a904SJens Axboe /* 2707480a904SJens Axboe * If in nonblock mode then dont block on 2717480a904SJens Axboe * readpage (we've kicked readahead so there 2727480a904SJens Axboe * will be asynchronous progress): 2737480a904SJens Axboe */ 2747480a904SJens Axboe if (flags & SPLICE_F_NONBLOCK) 2757480a904SJens Axboe break; 2767480a904SJens Axboe 2777480a904SJens Axboe /* 2787480a904SJens Axboe * page didn't exist, allocate one 2797480a904SJens Axboe */ 2807480a904SJens Axboe page = page_cache_alloc_cold(mapping); 2815274f052SJens Axboe if (!page) 2825274f052SJens Axboe break; 2835274f052SJens Axboe 2847480a904SJens Axboe error = add_to_page_cache_lru(page, mapping, index, 2857480a904SJens Axboe mapping_gfp_mask(mapping)); 2865274f052SJens Axboe if (unlikely(error)) { 2875274f052SJens Axboe page_cache_release(page); 2885274f052SJens Axboe break; 2895274f052SJens Axboe } 2907480a904SJens Axboe 2917480a904SJens Axboe goto readpage; 2925274f052SJens Axboe } 2937480a904SJens Axboe 2947480a904SJens Axboe /* 2957480a904SJens Axboe * If the page isn't uptodate, we may need to start io on it 2967480a904SJens Axboe */ 2977480a904SJens Axboe if (!PageUptodate(page)) { 2987480a904SJens Axboe lock_page(page); 2997480a904SJens Axboe 3007480a904SJens Axboe /* 3017480a904SJens Axboe * page was truncated, stop here. if this isn't the 3027480a904SJens Axboe * first page, we'll just complete what we already 3037480a904SJens Axboe * added 3047480a904SJens Axboe */ 3057480a904SJens Axboe if (!page->mapping) { 3067480a904SJens Axboe unlock_page(page); 3077480a904SJens Axboe page_cache_release(page); 3087480a904SJens Axboe break; 3097480a904SJens Axboe } 3107480a904SJens Axboe /* 3117480a904SJens Axboe * page was already under io and is now done, great 3127480a904SJens Axboe */ 3137480a904SJens Axboe if (PageUptodate(page)) { 3147480a904SJens Axboe unlock_page(page); 3157480a904SJens Axboe goto fill_it; 3167480a904SJens Axboe } 3177480a904SJens Axboe 3187480a904SJens Axboe readpage: 3197480a904SJens Axboe /* 3207480a904SJens Axboe * need to read in the page 3217480a904SJens Axboe */ 3227480a904SJens Axboe error = mapping->a_ops->readpage(in, page); 3237480a904SJens Axboe 3247480a904SJens Axboe if (unlikely(error)) { 3257480a904SJens Axboe page_cache_release(page); 3267480a904SJens Axboe if (error == AOP_TRUNCATED_PAGE) 3277480a904SJens Axboe goto find_page; 3287480a904SJens Axboe break; 3297480a904SJens Axboe } 3307480a904SJens Axboe } 3317480a904SJens Axboe fill_it: 33216c523ddSJens Axboe pages[i] = page; 3335274f052SJens Axboe } 3345274f052SJens Axboe 33516c523ddSJens Axboe if (i) 33629e35094SLinus Torvalds return move_to_pipe(pipe, pages, i, offset, len, flags); 33716c523ddSJens Axboe 3387480a904SJens Axboe return error; 3395274f052SJens Axboe } 3405274f052SJens Axboe 34183f9135bSJens Axboe /** 34283f9135bSJens Axboe * generic_file_splice_read - splice data from file to a pipe 34383f9135bSJens Axboe * @in: file to splice from 34483f9135bSJens Axboe * @pipe: pipe to splice to 34583f9135bSJens Axboe * @len: number of bytes to splice 34683f9135bSJens Axboe * @flags: splice modifier flags 34783f9135bSJens Axboe * 34883f9135bSJens Axboe * Will read pages from given file and fill them into a pipe. 34983f9135bSJens Axboe */ 3503a326a2cSIngo Molnar ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe, 3515274f052SJens Axboe size_t len, unsigned int flags) 3525274f052SJens Axboe { 3535274f052SJens Axboe ssize_t spliced; 3545274f052SJens Axboe int ret; 3555274f052SJens Axboe 3565274f052SJens Axboe ret = 0; 3575274f052SJens Axboe spliced = 0; 3583a326a2cSIngo Molnar 3595274f052SJens Axboe while (len) { 36029e35094SLinus Torvalds ret = __generic_file_splice_read(in, pipe, len, flags); 3615274f052SJens Axboe 3625274f052SJens Axboe if (ret <= 0) 3635274f052SJens Axboe break; 3645274f052SJens Axboe 3655274f052SJens Axboe in->f_pos += ret; 3665274f052SJens Axboe len -= ret; 3675274f052SJens Axboe spliced += ret; 36829e35094SLinus Torvalds 36929e35094SLinus Torvalds if (!(flags & SPLICE_F_NONBLOCK)) 37029e35094SLinus Torvalds continue; 37129e35094SLinus Torvalds ret = -EAGAIN; 37229e35094SLinus Torvalds break; 3735274f052SJens Axboe } 3745274f052SJens Axboe 3755274f052SJens Axboe if (spliced) 3765274f052SJens Axboe return spliced; 3775274f052SJens Axboe 3785274f052SJens Axboe return ret; 3795274f052SJens Axboe } 3805274f052SJens Axboe 381059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_read); 382059a8f37SJens Axboe 3835274f052SJens Axboe /* 3844f6f0bd2SJens Axboe * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 3854f6f0bd2SJens Axboe * using sendpage(). 3865274f052SJens Axboe */ 3875274f052SJens Axboe static int pipe_to_sendpage(struct pipe_inode_info *info, 3885274f052SJens Axboe struct pipe_buffer *buf, struct splice_desc *sd) 3895274f052SJens Axboe { 3905274f052SJens Axboe struct file *file = sd->file; 3915274f052SJens Axboe loff_t pos = sd->pos; 3925274f052SJens Axboe unsigned int offset; 3935274f052SJens Axboe ssize_t ret; 3945274f052SJens Axboe void *ptr; 395b2b39fa4SJens Axboe int more; 3965274f052SJens Axboe 3975274f052SJens Axboe /* 3985274f052SJens Axboe * sub-optimal, but we are limited by the pipe ->map. we don't 3995274f052SJens Axboe * need a kmap'ed buffer here, we just want to make sure we 4005274f052SJens Axboe * have the page pinned if the pipe page originates from the 4015274f052SJens Axboe * page cache 4025274f052SJens Axboe */ 4035274f052SJens Axboe ptr = buf->ops->map(file, info, buf); 4045274f052SJens Axboe if (IS_ERR(ptr)) 4055274f052SJens Axboe return PTR_ERR(ptr); 4065274f052SJens Axboe 4075274f052SJens Axboe offset = pos & ~PAGE_CACHE_MASK; 408b2b39fa4SJens Axboe more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 4095274f052SJens Axboe 410b2b39fa4SJens Axboe ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 4115274f052SJens Axboe 4125274f052SJens Axboe buf->ops->unmap(info, buf); 4135274f052SJens Axboe if (ret == sd->len) 4145274f052SJens Axboe return 0; 4155274f052SJens Axboe 4165274f052SJens Axboe return -EIO; 4175274f052SJens Axboe } 4185274f052SJens Axboe 4195274f052SJens Axboe /* 4205274f052SJens Axboe * This is a little more tricky than the file -> pipe splicing. There are 4215274f052SJens Axboe * basically three cases: 4225274f052SJens Axboe * 4235274f052SJens Axboe * - Destination page already exists in the address space and there 4245274f052SJens Axboe * are users of it. For that case we have no other option that 4255274f052SJens Axboe * copying the data. Tough luck. 4265274f052SJens Axboe * - Destination page already exists in the address space, but there 4275274f052SJens Axboe * are no users of it. Make sure it's uptodate, then drop it. Fall 4285274f052SJens Axboe * through to last case. 4295274f052SJens Axboe * - Destination page does not exist, we can add the pipe page to 4305274f052SJens Axboe * the page cache and avoid the copy. 4315274f052SJens Axboe * 43283f9135bSJens Axboe * If asked to move pages to the output file (SPLICE_F_MOVE is set in 43383f9135bSJens Axboe * sd->flags), we attempt to migrate pages from the pipe to the output 43483f9135bSJens Axboe * file address space page cache. This is possible if no one else has 43583f9135bSJens Axboe * the pipe page referenced outside of the pipe and page cache. If 43683f9135bSJens Axboe * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 43783f9135bSJens Axboe * a new page in the output file page cache and fill/dirty that. 4385274f052SJens Axboe */ 4395274f052SJens Axboe static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 4405274f052SJens Axboe struct splice_desc *sd) 4415274f052SJens Axboe { 4425274f052SJens Axboe struct file *file = sd->file; 4435274f052SJens Axboe struct address_space *mapping = file->f_mapping; 4443e7ee3e7SJens Axboe gfp_t gfp_mask = mapping_gfp_mask(mapping); 4455274f052SJens Axboe unsigned int offset; 4465274f052SJens Axboe struct page *page; 4475274f052SJens Axboe pgoff_t index; 4485abc97aaSJens Axboe char *src; 4493e7ee3e7SJens Axboe int ret; 4505274f052SJens Axboe 4515274f052SJens Axboe /* 45249d0b21bSJens Axboe * make sure the data in this buffer is uptodate 4535274f052SJens Axboe */ 4545274f052SJens Axboe src = buf->ops->map(file, info, buf); 4555274f052SJens Axboe if (IS_ERR(src)) 4565274f052SJens Axboe return PTR_ERR(src); 4575274f052SJens Axboe 4585274f052SJens Axboe index = sd->pos >> PAGE_CACHE_SHIFT; 4595274f052SJens Axboe offset = sd->pos & ~PAGE_CACHE_MASK; 4605274f052SJens Axboe 4615abc97aaSJens Axboe /* 4625abc97aaSJens Axboe * reuse buf page, if SPLICE_F_MOVE is set 4635abc97aaSJens Axboe */ 4645abc97aaSJens Axboe if (sd->flags & SPLICE_F_MOVE) { 46583f9135bSJens Axboe /* 46683f9135bSJens Axboe * If steal succeeds, buf->page is now pruned from the vm 46783f9135bSJens Axboe * side (LRU and page cache) and we can reuse it. 46883f9135bSJens Axboe */ 4695abc97aaSJens Axboe if (buf->ops->steal(info, buf)) 4705abc97aaSJens Axboe goto find_page; 4715abc97aaSJens Axboe 47249d0b21bSJens Axboe /* 47349d0b21bSJens Axboe * this will also set the page locked 47449d0b21bSJens Axboe */ 4755abc97aaSJens Axboe page = buf->page; 4763e7ee3e7SJens Axboe if (add_to_page_cache(page, mapping, index, gfp_mask)) 4775abc97aaSJens Axboe goto find_page; 4783e7ee3e7SJens Axboe 4793e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 4803e7ee3e7SJens Axboe lru_cache_add(page); 4815abc97aaSJens Axboe } else { 4825274f052SJens Axboe find_page: 4835274f052SJens Axboe ret = -ENOMEM; 4843e7ee3e7SJens Axboe page = find_or_create_page(mapping, index, gfp_mask); 4855274f052SJens Axboe if (!page) 4869aefe431SDave Jones goto out_nomem; 4875274f052SJens Axboe 4885274f052SJens Axboe /* 4895274f052SJens Axboe * If the page is uptodate, it is also locked. If it isn't 4905274f052SJens Axboe * uptodate, we can mark it uptodate if we are filling the 4915274f052SJens Axboe * full page. Otherwise we need to read it in first... 4925274f052SJens Axboe */ 4935274f052SJens Axboe if (!PageUptodate(page)) { 4945274f052SJens Axboe if (sd->len < PAGE_CACHE_SIZE) { 4955274f052SJens Axboe ret = mapping->a_ops->readpage(file, page); 4965274f052SJens Axboe if (unlikely(ret)) 4975274f052SJens Axboe goto out; 4985274f052SJens Axboe 4995274f052SJens Axboe lock_page(page); 5005274f052SJens Axboe 5015274f052SJens Axboe if (!PageUptodate(page)) { 5025274f052SJens Axboe /* 5035274f052SJens Axboe * page got invalidated, repeat 5045274f052SJens Axboe */ 5055274f052SJens Axboe if (!page->mapping) { 5065274f052SJens Axboe unlock_page(page); 5075274f052SJens Axboe page_cache_release(page); 5085274f052SJens Axboe goto find_page; 5095274f052SJens Axboe } 5105274f052SJens Axboe ret = -EIO; 5115274f052SJens Axboe goto out; 5125274f052SJens Axboe } 5135274f052SJens Axboe } else { 5145274f052SJens Axboe WARN_ON(!PageLocked(page)); 5155274f052SJens Axboe SetPageUptodate(page); 5165274f052SJens Axboe } 5175274f052SJens Axboe } 5185abc97aaSJens Axboe } 5195274f052SJens Axboe 5205274f052SJens Axboe ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 5214f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5224f6f0bd2SJens Axboe page_cache_release(page); 5234f6f0bd2SJens Axboe goto find_page; 5244f6f0bd2SJens Axboe } else if (ret) 5255274f052SJens Axboe goto out; 5265274f052SJens Axboe 5273e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5285abc97aaSJens Axboe char *dst = kmap_atomic(page, KM_USER0); 5295abc97aaSJens Axboe 5305274f052SJens Axboe memcpy(dst + offset, src + buf->offset, sd->len); 5315274f052SJens Axboe flush_dcache_page(page); 5325274f052SJens Axboe kunmap_atomic(dst, KM_USER0); 5335abc97aaSJens Axboe } 5345274f052SJens Axboe 5355274f052SJens Axboe ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 5364f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5374f6f0bd2SJens Axboe page_cache_release(page); 5384f6f0bd2SJens Axboe goto find_page; 5394f6f0bd2SJens Axboe } else if (ret) 5405274f052SJens Axboe goto out; 5415274f052SJens Axboe 542c7f21e4fSJens Axboe mark_page_accessed(page); 5434f6f0bd2SJens Axboe balance_dirty_pages_ratelimited(mapping); 5445274f052SJens Axboe out: 5453e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5465274f052SJens Axboe page_cache_release(page); 5474f6f0bd2SJens Axboe unlock_page(page); 5484f6f0bd2SJens Axboe } 5499aefe431SDave Jones out_nomem: 5505274f052SJens Axboe buf->ops->unmap(info, buf); 5515274f052SJens Axboe return ret; 5525274f052SJens Axboe } 5535274f052SJens Axboe 5545274f052SJens Axboe typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 5555274f052SJens Axboe struct splice_desc *); 5565274f052SJens Axboe 55783f9135bSJens Axboe /* 55883f9135bSJens Axboe * Pipe input worker. Most of this logic works like a regular pipe, the 55983f9135bSJens Axboe * key here is the 'actor' worker passed in that actually moves the data 56083f9135bSJens Axboe * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 56183f9135bSJens Axboe */ 5623a326a2cSIngo Molnar static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 5635274f052SJens Axboe size_t len, unsigned int flags, 5645274f052SJens Axboe splice_actor *actor) 5655274f052SJens Axboe { 5665274f052SJens Axboe int ret, do_wakeup, err; 5675274f052SJens Axboe struct splice_desc sd; 5685274f052SJens Axboe 5695274f052SJens Axboe ret = 0; 5705274f052SJens Axboe do_wakeup = 0; 5715274f052SJens Axboe 5725274f052SJens Axboe sd.total_len = len; 5735274f052SJens Axboe sd.flags = flags; 5745274f052SJens Axboe sd.file = out; 5755274f052SJens Axboe sd.pos = out->f_pos; 5765274f052SJens Axboe 5773a326a2cSIngo Molnar if (pipe->inode) 5783a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 5795274f052SJens Axboe 5805274f052SJens Axboe for (;;) { 5816f767b04SJens Axboe if (pipe->nrbufs) { 5826f767b04SJens Axboe struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 5835274f052SJens Axboe struct pipe_buf_operations *ops = buf->ops; 5845274f052SJens Axboe 5855274f052SJens Axboe sd.len = buf->len; 5865274f052SJens Axboe if (sd.len > sd.total_len) 5875274f052SJens Axboe sd.len = sd.total_len; 5885274f052SJens Axboe 5893a326a2cSIngo Molnar err = actor(pipe, buf, &sd); 5905274f052SJens Axboe if (err) { 5915274f052SJens Axboe if (!ret && err != -ENODATA) 5925274f052SJens Axboe ret = err; 5935274f052SJens Axboe 5945274f052SJens Axboe break; 5955274f052SJens Axboe } 5965274f052SJens Axboe 5975274f052SJens Axboe ret += sd.len; 5985274f052SJens Axboe buf->offset += sd.len; 5995274f052SJens Axboe buf->len -= sd.len; 6005274f052SJens Axboe if (!buf->len) { 6015274f052SJens Axboe buf->ops = NULL; 6023a326a2cSIngo Molnar ops->release(pipe, buf); 6036f767b04SJens Axboe pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); 6046f767b04SJens Axboe pipe->nrbufs--; 6056f767b04SJens Axboe if (pipe->inode) 6065274f052SJens Axboe do_wakeup = 1; 6075274f052SJens Axboe } 6085274f052SJens Axboe 6095274f052SJens Axboe sd.pos += sd.len; 6105274f052SJens Axboe sd.total_len -= sd.len; 6115274f052SJens Axboe if (!sd.total_len) 6125274f052SJens Axboe break; 6135274f052SJens Axboe } 6145274f052SJens Axboe 6156f767b04SJens Axboe if (pipe->nrbufs) 6165274f052SJens Axboe continue; 6173a326a2cSIngo Molnar if (!pipe->writers) 6185274f052SJens Axboe break; 6193a326a2cSIngo Molnar if (!pipe->waiting_writers) { 6205274f052SJens Axboe if (ret) 6215274f052SJens Axboe break; 6225274f052SJens Axboe } 6235274f052SJens Axboe 62429e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 62529e35094SLinus Torvalds if (!ret) 62629e35094SLinus Torvalds ret = -EAGAIN; 62729e35094SLinus Torvalds break; 62829e35094SLinus Torvalds } 62929e35094SLinus Torvalds 6305274f052SJens Axboe if (signal_pending(current)) { 6315274f052SJens Axboe if (!ret) 6325274f052SJens Axboe ret = -ERESTARTSYS; 6335274f052SJens Axboe break; 6345274f052SJens Axboe } 6355274f052SJens Axboe 6365274f052SJens Axboe if (do_wakeup) { 637c0bd1f65SJens Axboe smp_mb(); 6383a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6393a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 6403a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6415274f052SJens Axboe do_wakeup = 0; 6425274f052SJens Axboe } 6435274f052SJens Axboe 6443a326a2cSIngo Molnar pipe_wait(pipe); 6455274f052SJens Axboe } 6465274f052SJens Axboe 6473a326a2cSIngo Molnar if (pipe->inode) 6483a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 6495274f052SJens Axboe 6505274f052SJens Axboe if (do_wakeup) { 651c0bd1f65SJens Axboe smp_mb(); 6523a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6533a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 6543a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6555274f052SJens Axboe } 6565274f052SJens Axboe 6575274f052SJens Axboe out->f_pos = sd.pos; 6585274f052SJens Axboe return ret; 6595274f052SJens Axboe 6605274f052SJens Axboe } 6615274f052SJens Axboe 66283f9135bSJens Axboe /** 66383f9135bSJens Axboe * generic_file_splice_write - splice data from a pipe to a file 6643a326a2cSIngo Molnar * @pipe: pipe info 66583f9135bSJens Axboe * @out: file to write to 66683f9135bSJens Axboe * @len: number of bytes to splice 66783f9135bSJens Axboe * @flags: splice modifier flags 66883f9135bSJens Axboe * 66983f9135bSJens Axboe * Will either move or copy pages (determined by @flags options) from 67083f9135bSJens Axboe * the given pipe inode to the given file. 67183f9135bSJens Axboe * 67283f9135bSJens Axboe */ 6733a326a2cSIngo Molnar ssize_t 6743a326a2cSIngo Molnar generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 6755274f052SJens Axboe size_t len, unsigned int flags) 6765274f052SJens Axboe { 6774f6f0bd2SJens Axboe struct address_space *mapping = out->f_mapping; 6783a326a2cSIngo Molnar ssize_t ret; 6793a326a2cSIngo Molnar 6803a326a2cSIngo Molnar ret = move_from_pipe(pipe, out, len, flags, pipe_to_file); 6814f6f0bd2SJens Axboe 6824f6f0bd2SJens Axboe /* 6834f6f0bd2SJens Axboe * if file or inode is SYNC and we actually wrote some data, sync it 6844f6f0bd2SJens Axboe */ 6854f6f0bd2SJens Axboe if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 6864f6f0bd2SJens Axboe && ret > 0) { 6874f6f0bd2SJens Axboe struct inode *inode = mapping->host; 6884f6f0bd2SJens Axboe int err; 6894f6f0bd2SJens Axboe 6904f6f0bd2SJens Axboe mutex_lock(&inode->i_mutex); 6914f6f0bd2SJens Axboe err = generic_osync_inode(mapping->host, mapping, 6924f6f0bd2SJens Axboe OSYNC_METADATA|OSYNC_DATA); 6934f6f0bd2SJens Axboe mutex_unlock(&inode->i_mutex); 6944f6f0bd2SJens Axboe 6954f6f0bd2SJens Axboe if (err) 6964f6f0bd2SJens Axboe ret = err; 6974f6f0bd2SJens Axboe } 6984f6f0bd2SJens Axboe 6994f6f0bd2SJens Axboe return ret; 7005274f052SJens Axboe } 7015274f052SJens Axboe 702059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_write); 703059a8f37SJens Axboe 70483f9135bSJens Axboe /** 70583f9135bSJens Axboe * generic_splice_sendpage - splice data from a pipe to a socket 70683f9135bSJens Axboe * @inode: pipe inode 70783f9135bSJens Axboe * @out: socket to write to 70883f9135bSJens Axboe * @len: number of bytes to splice 70983f9135bSJens Axboe * @flags: splice modifier flags 71083f9135bSJens Axboe * 71183f9135bSJens Axboe * Will send @len bytes from the pipe to a network socket. No data copying 71283f9135bSJens Axboe * is involved. 71383f9135bSJens Axboe * 71483f9135bSJens Axboe */ 7153a326a2cSIngo Molnar ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 7165274f052SJens Axboe size_t len, unsigned int flags) 7175274f052SJens Axboe { 7183a326a2cSIngo Molnar return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage); 7195274f052SJens Axboe } 7205274f052SJens Axboe 721059a8f37SJens Axboe EXPORT_SYMBOL(generic_splice_sendpage); 722a0f06780SJeff Garzik 72383f9135bSJens Axboe /* 72483f9135bSJens Axboe * Attempt to initiate a splice from pipe to file. 72583f9135bSJens Axboe */ 7263a326a2cSIngo Molnar static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 727b92ce558SJens Axboe size_t len, unsigned int flags) 7285274f052SJens Axboe { 7295274f052SJens Axboe loff_t pos; 7305274f052SJens Axboe int ret; 7315274f052SJens Axboe 73249570e9bSJens Axboe if (unlikely(!out->f_op || !out->f_op->splice_write)) 7335274f052SJens Axboe return -EINVAL; 7345274f052SJens Axboe 73549570e9bSJens Axboe if (unlikely(!(out->f_mode & FMODE_WRITE))) 7365274f052SJens Axboe return -EBADF; 7375274f052SJens Axboe 7385274f052SJens Axboe pos = out->f_pos; 739529565dcSIngo Molnar 7405274f052SJens Axboe ret = rw_verify_area(WRITE, out, &pos, len); 7415274f052SJens Axboe if (unlikely(ret < 0)) 7425274f052SJens Axboe return ret; 7435274f052SJens Axboe 7445274f052SJens Axboe return out->f_op->splice_write(pipe, out, len, flags); 7455274f052SJens Axboe } 7465274f052SJens Axboe 74783f9135bSJens Axboe /* 74883f9135bSJens Axboe * Attempt to initiate a splice from a file to a pipe. 74983f9135bSJens Axboe */ 750b92ce558SJens Axboe static long do_splice_to(struct file *in, struct pipe_inode_info *pipe, 751b92ce558SJens Axboe size_t len, unsigned int flags) 7525274f052SJens Axboe { 7535274f052SJens Axboe loff_t pos, isize, left; 7545274f052SJens Axboe int ret; 7555274f052SJens Axboe 75649570e9bSJens Axboe if (unlikely(!in->f_op || !in->f_op->splice_read)) 7575274f052SJens Axboe return -EINVAL; 7585274f052SJens Axboe 75949570e9bSJens Axboe if (unlikely(!(in->f_mode & FMODE_READ))) 7605274f052SJens Axboe return -EBADF; 7615274f052SJens Axboe 7625274f052SJens Axboe pos = in->f_pos; 763529565dcSIngo Molnar 7645274f052SJens Axboe ret = rw_verify_area(READ, in, &pos, len); 7655274f052SJens Axboe if (unlikely(ret < 0)) 7665274f052SJens Axboe return ret; 7675274f052SJens Axboe 7685274f052SJens Axboe isize = i_size_read(in->f_mapping->host); 7695274f052SJens Axboe if (unlikely(in->f_pos >= isize)) 7705274f052SJens Axboe return 0; 7715274f052SJens Axboe 7725274f052SJens Axboe left = isize - in->f_pos; 77349570e9bSJens Axboe if (unlikely(left < len)) 7745274f052SJens Axboe len = left; 7755274f052SJens Axboe 7765274f052SJens Axboe return in->f_op->splice_read(in, pipe, len, flags); 7775274f052SJens Axboe } 7785274f052SJens Axboe 779b92ce558SJens Axboe long do_splice_direct(struct file *in, struct file *out, size_t len, 780b92ce558SJens Axboe unsigned int flags) 781b92ce558SJens Axboe { 782b92ce558SJens Axboe struct pipe_inode_info *pipe; 783b92ce558SJens Axboe long ret, bytes; 784b92ce558SJens Axboe umode_t i_mode; 785b92ce558SJens Axboe int i; 786b92ce558SJens Axboe 787b92ce558SJens Axboe /* 788b92ce558SJens Axboe * We require the input being a regular file, as we don't want to 789b92ce558SJens Axboe * randomly drop data for eg socket -> socket splicing. Use the 790b92ce558SJens Axboe * piped splicing for that! 791b92ce558SJens Axboe */ 792b92ce558SJens Axboe i_mode = in->f_dentry->d_inode->i_mode; 793b92ce558SJens Axboe if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 794b92ce558SJens Axboe return -EINVAL; 795b92ce558SJens Axboe 796b92ce558SJens Axboe /* 797b92ce558SJens Axboe * neither in nor out is a pipe, setup an internal pipe attached to 798b92ce558SJens Axboe * 'out' and transfer the wanted data from 'in' to 'out' through that 799b92ce558SJens Axboe */ 800b92ce558SJens Axboe pipe = current->splice_pipe; 80149570e9bSJens Axboe if (unlikely(!pipe)) { 802b92ce558SJens Axboe pipe = alloc_pipe_info(NULL); 803b92ce558SJens Axboe if (!pipe) 804b92ce558SJens Axboe return -ENOMEM; 805b92ce558SJens Axboe 806b92ce558SJens Axboe /* 807b92ce558SJens Axboe * We don't have an immediate reader, but we'll read the stuff 808b92ce558SJens Axboe * out of the pipe right after the move_to_pipe(). So set 809b92ce558SJens Axboe * PIPE_READERS appropriately. 810b92ce558SJens Axboe */ 811b92ce558SJens Axboe pipe->readers = 1; 812b92ce558SJens Axboe 813b92ce558SJens Axboe current->splice_pipe = pipe; 814b92ce558SJens Axboe } 815b92ce558SJens Axboe 816b92ce558SJens Axboe /* 817b92ce558SJens Axboe * do the splice 818b92ce558SJens Axboe */ 819b92ce558SJens Axboe ret = 0; 820b92ce558SJens Axboe bytes = 0; 821b92ce558SJens Axboe 822b92ce558SJens Axboe while (len) { 823b92ce558SJens Axboe size_t read_len, max_read_len; 824b92ce558SJens Axboe 825b92ce558SJens Axboe /* 826b92ce558SJens Axboe * Do at most PIPE_BUFFERS pages worth of transfer: 827b92ce558SJens Axboe */ 828b92ce558SJens Axboe max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 829b92ce558SJens Axboe 830b92ce558SJens Axboe ret = do_splice_to(in, pipe, max_read_len, flags); 831b92ce558SJens Axboe if (unlikely(ret < 0)) 832b92ce558SJens Axboe goto out_release; 833b92ce558SJens Axboe 834b92ce558SJens Axboe read_len = ret; 835b92ce558SJens Axboe 836b92ce558SJens Axboe /* 837b92ce558SJens Axboe * NOTE: nonblocking mode only applies to the input. We 838b92ce558SJens Axboe * must not do the output in nonblocking mode as then we 839b92ce558SJens Axboe * could get stuck data in the internal pipe: 840b92ce558SJens Axboe */ 841b92ce558SJens Axboe ret = do_splice_from(pipe, out, read_len, 842b92ce558SJens Axboe flags & ~SPLICE_F_NONBLOCK); 843b92ce558SJens Axboe if (unlikely(ret < 0)) 844b92ce558SJens Axboe goto out_release; 845b92ce558SJens Axboe 846b92ce558SJens Axboe bytes += ret; 847b92ce558SJens Axboe len -= ret; 848b92ce558SJens Axboe 849b92ce558SJens Axboe /* 850b92ce558SJens Axboe * In nonblocking mode, if we got back a short read then 851b92ce558SJens Axboe * that was due to either an IO error or due to the 852b92ce558SJens Axboe * pagecache entry not being there. In the IO error case 853b92ce558SJens Axboe * the _next_ splice attempt will produce a clean IO error 854b92ce558SJens Axboe * return value (not a short read), so in both cases it's 855b92ce558SJens Axboe * correct to break out of the loop here: 856b92ce558SJens Axboe */ 857b92ce558SJens Axboe if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) 858b92ce558SJens Axboe break; 859b92ce558SJens Axboe } 860b92ce558SJens Axboe 861b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 862b92ce558SJens Axboe 863b92ce558SJens Axboe return bytes; 864b92ce558SJens Axboe 865b92ce558SJens Axboe out_release: 866b92ce558SJens Axboe /* 867b92ce558SJens Axboe * If we did an incomplete transfer we must release 868b92ce558SJens Axboe * the pipe buffers in question: 869b92ce558SJens Axboe */ 870b92ce558SJens Axboe for (i = 0; i < PIPE_BUFFERS; i++) { 871b92ce558SJens Axboe struct pipe_buffer *buf = pipe->bufs + i; 872b92ce558SJens Axboe 873b92ce558SJens Axboe if (buf->ops) { 874b92ce558SJens Axboe buf->ops->release(pipe, buf); 875b92ce558SJens Axboe buf->ops = NULL; 876b92ce558SJens Axboe } 877b92ce558SJens Axboe } 878b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 879b92ce558SJens Axboe 880b92ce558SJens Axboe /* 881b92ce558SJens Axboe * If we transferred some data, return the number of bytes: 882b92ce558SJens Axboe */ 883b92ce558SJens Axboe if (bytes > 0) 884b92ce558SJens Axboe return bytes; 885b92ce558SJens Axboe 886b92ce558SJens Axboe return ret; 887b92ce558SJens Axboe } 888b92ce558SJens Axboe 889b92ce558SJens Axboe EXPORT_SYMBOL(do_splice_direct); 890b92ce558SJens Axboe 89183f9135bSJens Axboe /* 89283f9135bSJens Axboe * Determine where to splice to/from. 89383f9135bSJens Axboe */ 894529565dcSIngo Molnar static long do_splice(struct file *in, loff_t __user *off_in, 895529565dcSIngo Molnar struct file *out, loff_t __user *off_out, 896529565dcSIngo Molnar size_t len, unsigned int flags) 8975274f052SJens Axboe { 8983a326a2cSIngo Molnar struct pipe_inode_info *pipe; 8995274f052SJens Axboe 9003a326a2cSIngo Molnar pipe = in->f_dentry->d_inode->i_pipe; 901529565dcSIngo Molnar if (pipe) { 902529565dcSIngo Molnar if (off_in) 903529565dcSIngo Molnar return -ESPIPE; 904b92ce558SJens Axboe if (off_out) { 905b92ce558SJens Axboe if (out->f_op->llseek == no_llseek) 906b92ce558SJens Axboe return -EINVAL; 907b92ce558SJens Axboe if (copy_from_user(&out->f_pos, off_out, 908b92ce558SJens Axboe sizeof(loff_t))) 909b92ce558SJens Axboe return -EFAULT; 910b92ce558SJens Axboe } 911529565dcSIngo Molnar 912b92ce558SJens Axboe return do_splice_from(pipe, out, len, flags); 913529565dcSIngo Molnar } 9145274f052SJens Axboe 9153a326a2cSIngo Molnar pipe = out->f_dentry->d_inode->i_pipe; 916529565dcSIngo Molnar if (pipe) { 917529565dcSIngo Molnar if (off_out) 918529565dcSIngo Molnar return -ESPIPE; 919b92ce558SJens Axboe if (off_in) { 920b92ce558SJens Axboe if (in->f_op->llseek == no_llseek) 921b92ce558SJens Axboe return -EINVAL; 922b92ce558SJens Axboe if (copy_from_user(&in->f_pos, off_in, sizeof(loff_t))) 923b92ce558SJens Axboe return -EFAULT; 924b92ce558SJens Axboe } 925529565dcSIngo Molnar 926b92ce558SJens Axboe return do_splice_to(in, pipe, len, flags); 927529565dcSIngo Molnar } 9285274f052SJens Axboe 9295274f052SJens Axboe return -EINVAL; 9305274f052SJens Axboe } 9315274f052SJens Axboe 932529565dcSIngo Molnar asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 933529565dcSIngo Molnar int fd_out, loff_t __user *off_out, 934529565dcSIngo Molnar size_t len, unsigned int flags) 9355274f052SJens Axboe { 9365274f052SJens Axboe long error; 9375274f052SJens Axboe struct file *in, *out; 9385274f052SJens Axboe int fput_in, fput_out; 9395274f052SJens Axboe 9405274f052SJens Axboe if (unlikely(!len)) 9415274f052SJens Axboe return 0; 9425274f052SJens Axboe 9435274f052SJens Axboe error = -EBADF; 944529565dcSIngo Molnar in = fget_light(fd_in, &fput_in); 9455274f052SJens Axboe if (in) { 9465274f052SJens Axboe if (in->f_mode & FMODE_READ) { 947529565dcSIngo Molnar out = fget_light(fd_out, &fput_out); 9485274f052SJens Axboe if (out) { 9495274f052SJens Axboe if (out->f_mode & FMODE_WRITE) 950529565dcSIngo Molnar error = do_splice(in, off_in, 951529565dcSIngo Molnar out, off_out, 952529565dcSIngo Molnar len, flags); 9535274f052SJens Axboe fput_light(out, fput_out); 9545274f052SJens Axboe } 9555274f052SJens Axboe } 9565274f052SJens Axboe 9575274f052SJens Axboe fput_light(in, fput_in); 9585274f052SJens Axboe } 9595274f052SJens Axboe 9605274f052SJens Axboe return error; 9615274f052SJens Axboe } 962