15274f052SJens Axboe /* 25274f052SJens Axboe * "splice": joining two ropes together by interweaving their strands. 35274f052SJens Axboe * 45274f052SJens Axboe * This is the "extended pipe" functionality, where a pipe is used as 55274f052SJens Axboe * an arbitrary in-memory buffer. Think of a pipe as a small kernel 65274f052SJens Axboe * buffer that you can use to transfer data from one end to the other. 75274f052SJens Axboe * 85274f052SJens Axboe * The traditional unix read/write is extended with a "splice()" operation 95274f052SJens Axboe * that transfers data buffers to or from a pipe buffer. 105274f052SJens Axboe * 115274f052SJens Axboe * Named by Larry McVoy, original implementation from Linus, extended by 12c2058e06SJens Axboe * Jens to support splicing to files, network, direct splicing, etc and 13c2058e06SJens Axboe * fixing lots of bugs. 145274f052SJens Axboe * 15c2058e06SJens Axboe * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de> 16c2058e06SJens Axboe * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 17c2058e06SJens Axboe * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 185274f052SJens Axboe * 195274f052SJens Axboe */ 205274f052SJens Axboe #include <linux/fs.h> 215274f052SJens Axboe #include <linux/file.h> 225274f052SJens Axboe #include <linux/pagemap.h> 235274f052SJens Axboe #include <linux/pipe_fs_i.h> 245274f052SJens Axboe #include <linux/mm_inline.h> 255abc97aaSJens Axboe #include <linux/swap.h> 264f6f0bd2SJens Axboe #include <linux/writeback.h> 274f6f0bd2SJens Axboe #include <linux/buffer_head.h> 28a0f06780SJeff Garzik #include <linux/module.h> 294f6f0bd2SJens Axboe #include <linux/syscalls.h> 305274f052SJens Axboe 315274f052SJens Axboe /* 325274f052SJens Axboe * Passed to the actors 335274f052SJens Axboe */ 345274f052SJens Axboe struct splice_desc { 355274f052SJens Axboe unsigned int len, total_len; /* current and remaining length */ 365274f052SJens Axboe unsigned int flags; /* splice flags */ 375274f052SJens Axboe struct file *file; /* file to read/write */ 385274f052SJens Axboe loff_t pos; /* file position */ 395274f052SJens Axboe }; 405274f052SJens Axboe 4183f9135bSJens Axboe /* 4283f9135bSJens Axboe * Attempt to steal a page from a pipe buffer. This should perhaps go into 4383f9135bSJens Axboe * a vm helper function, it's already simplified quite a bit by the 4483f9135bSJens Axboe * addition of remove_mapping(). If success is returned, the caller may 4583f9135bSJens Axboe * attempt to reuse this page for another destination. 4683f9135bSJens Axboe */ 475abc97aaSJens Axboe static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 485abc97aaSJens Axboe struct pipe_buffer *buf) 495abc97aaSJens Axboe { 505abc97aaSJens Axboe struct page *page = buf->page; 514f6f0bd2SJens Axboe struct address_space *mapping = page_mapping(page); 525abc97aaSJens Axboe 535abc97aaSJens Axboe WARN_ON(!PageLocked(page)); 545abc97aaSJens Axboe WARN_ON(!PageUptodate(page)); 555abc97aaSJens Axboe 56ad8d6f0aSJens Axboe /* 57ad8d6f0aSJens Axboe * At least for ext2 with nobh option, we need to wait on writeback 58ad8d6f0aSJens Axboe * completing on this page, since we'll remove it from the pagecache. 59ad8d6f0aSJens Axboe * Otherwise truncate wont wait on the page, allowing the disk 60ad8d6f0aSJens Axboe * blocks to be reused by someone else before we actually wrote our 61ad8d6f0aSJens Axboe * data to them. fs corruption ensues. 62ad8d6f0aSJens Axboe */ 63ad8d6f0aSJens Axboe wait_on_page_writeback(page); 64ad8d6f0aSJens Axboe 654f6f0bd2SJens Axboe if (PagePrivate(page)) 664f6f0bd2SJens Axboe try_to_release_page(page, mapping_gfp_mask(mapping)); 674f6f0bd2SJens Axboe 684f6f0bd2SJens Axboe if (!remove_mapping(mapping, page)) 695abc97aaSJens Axboe return 1; 705abc97aaSJens Axboe 713e7ee3e7SJens Axboe buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 725abc97aaSJens Axboe return 0; 735abc97aaSJens Axboe } 745abc97aaSJens Axboe 755274f052SJens Axboe static void page_cache_pipe_buf_release(struct pipe_inode_info *info, 765274f052SJens Axboe struct pipe_buffer *buf) 775274f052SJens Axboe { 785274f052SJens Axboe page_cache_release(buf->page); 795274f052SJens Axboe buf->page = NULL; 803e7ee3e7SJens Axboe buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 815274f052SJens Axboe } 825274f052SJens Axboe 835274f052SJens Axboe static void *page_cache_pipe_buf_map(struct file *file, 845274f052SJens Axboe struct pipe_inode_info *info, 855274f052SJens Axboe struct pipe_buffer *buf) 865274f052SJens Axboe { 875274f052SJens Axboe struct page *page = buf->page; 8849d0b21bSJens Axboe int err; 895274f052SJens Axboe 905274f052SJens Axboe if (!PageUptodate(page)) { 9149d0b21bSJens Axboe lock_page(page); 925274f052SJens Axboe 9349d0b21bSJens Axboe /* 9449d0b21bSJens Axboe * Page got truncated/unhashed. This will cause a 0-byte 9573d62d83SIngo Molnar * splice, if this is the first page. 9649d0b21bSJens Axboe */ 975274f052SJens Axboe if (!page->mapping) { 9849d0b21bSJens Axboe err = -ENODATA; 9949d0b21bSJens Axboe goto error; 1005274f052SJens Axboe } 1015274f052SJens Axboe 10249d0b21bSJens Axboe /* 10373d62d83SIngo Molnar * Uh oh, read-error from disk. 10449d0b21bSJens Axboe */ 10549d0b21bSJens Axboe if (!PageUptodate(page)) { 10649d0b21bSJens Axboe err = -EIO; 10749d0b21bSJens Axboe goto error; 10849d0b21bSJens Axboe } 10949d0b21bSJens Axboe 11049d0b21bSJens Axboe /* 11173d62d83SIngo Molnar * Page is ok afterall, fall through to mapping. 11249d0b21bSJens Axboe */ 11349d0b21bSJens Axboe unlock_page(page); 11449d0b21bSJens Axboe } 11549d0b21bSJens Axboe 11649d0b21bSJens Axboe return kmap(page); 11749d0b21bSJens Axboe error: 11849d0b21bSJens Axboe unlock_page(page); 11949d0b21bSJens Axboe return ERR_PTR(err); 1205274f052SJens Axboe } 1215274f052SJens Axboe 1225274f052SJens Axboe static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 1235274f052SJens Axboe struct pipe_buffer *buf) 1245274f052SJens Axboe { 1255274f052SJens Axboe kunmap(buf->page); 1265274f052SJens Axboe } 1275274f052SJens Axboe 12870524490SJens Axboe static void page_cache_pipe_buf_get(struct pipe_inode_info *info, 12970524490SJens Axboe struct pipe_buffer *buf) 13070524490SJens Axboe { 13170524490SJens Axboe page_cache_get(buf->page); 13270524490SJens Axboe } 13370524490SJens Axboe 1345274f052SJens Axboe static struct pipe_buf_operations page_cache_pipe_buf_ops = { 1355274f052SJens Axboe .can_merge = 0, 1365274f052SJens Axboe .map = page_cache_pipe_buf_map, 1375274f052SJens Axboe .unmap = page_cache_pipe_buf_unmap, 1385274f052SJens Axboe .release = page_cache_pipe_buf_release, 1395abc97aaSJens Axboe .steal = page_cache_pipe_buf_steal, 14070524490SJens Axboe .get = page_cache_pipe_buf_get, 1415274f052SJens Axboe }; 1425274f052SJens Axboe 14383f9135bSJens Axboe /* 14483f9135bSJens Axboe * Pipe output worker. This sets up our pipe format with the page cache 14583f9135bSJens Axboe * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 14683f9135bSJens Axboe */ 1473a326a2cSIngo Molnar static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 14891ad66efSJens Axboe int nr_pages, unsigned long len, 14991ad66efSJens Axboe unsigned int offset, unsigned int flags) 1505274f052SJens Axboe { 1515274f052SJens Axboe int ret, do_wakeup, i; 1525274f052SJens Axboe 1535274f052SJens Axboe ret = 0; 1545274f052SJens Axboe do_wakeup = 0; 1555274f052SJens Axboe i = 0; 1565274f052SJens Axboe 1573a326a2cSIngo Molnar if (pipe->inode) 1583a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 1595274f052SJens Axboe 1605274f052SJens Axboe for (;;) { 1613a326a2cSIngo Molnar if (!pipe->readers) { 1625274f052SJens Axboe send_sig(SIGPIPE, current, 0); 1635274f052SJens Axboe if (!ret) 1645274f052SJens Axboe ret = -EPIPE; 1655274f052SJens Axboe break; 1665274f052SJens Axboe } 1675274f052SJens Axboe 1686f767b04SJens Axboe if (pipe->nrbufs < PIPE_BUFFERS) { 1696f767b04SJens Axboe int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 1703a326a2cSIngo Molnar struct pipe_buffer *buf = pipe->bufs + newbuf; 1715274f052SJens Axboe struct page *page = pages[i++]; 1725274f052SJens Axboe unsigned long this_len; 1735274f052SJens Axboe 1745274f052SJens Axboe this_len = PAGE_CACHE_SIZE - offset; 1755274f052SJens Axboe if (this_len > len) 1765274f052SJens Axboe this_len = len; 1775274f052SJens Axboe 1785274f052SJens Axboe buf->page = page; 1795274f052SJens Axboe buf->offset = offset; 1805274f052SJens Axboe buf->len = this_len; 1815274f052SJens Axboe buf->ops = &page_cache_pipe_buf_ops; 1826f767b04SJens Axboe pipe->nrbufs++; 1836f767b04SJens Axboe if (pipe->inode) 1845274f052SJens Axboe do_wakeup = 1; 1855274f052SJens Axboe 1865274f052SJens Axboe ret += this_len; 1875274f052SJens Axboe len -= this_len; 1885274f052SJens Axboe offset = 0; 1895274f052SJens Axboe if (!--nr_pages) 1905274f052SJens Axboe break; 1915274f052SJens Axboe if (!len) 1925274f052SJens Axboe break; 1936f767b04SJens Axboe if (pipe->nrbufs < PIPE_BUFFERS) 1945274f052SJens Axboe continue; 1955274f052SJens Axboe 1965274f052SJens Axboe break; 1975274f052SJens Axboe } 1985274f052SJens Axboe 19929e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 20029e35094SLinus Torvalds if (!ret) 20129e35094SLinus Torvalds ret = -EAGAIN; 20229e35094SLinus Torvalds break; 20329e35094SLinus Torvalds } 20429e35094SLinus Torvalds 2055274f052SJens Axboe if (signal_pending(current)) { 2065274f052SJens Axboe if (!ret) 2075274f052SJens Axboe ret = -ERESTARTSYS; 2085274f052SJens Axboe break; 2095274f052SJens Axboe } 2105274f052SJens Axboe 2115274f052SJens Axboe if (do_wakeup) { 212c0bd1f65SJens Axboe smp_mb(); 2133a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2143a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 2153a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2165274f052SJens Axboe do_wakeup = 0; 2175274f052SJens Axboe } 2185274f052SJens Axboe 2193a326a2cSIngo Molnar pipe->waiting_writers++; 2203a326a2cSIngo Molnar pipe_wait(pipe); 2213a326a2cSIngo Molnar pipe->waiting_writers--; 2225274f052SJens Axboe } 2235274f052SJens Axboe 2243a326a2cSIngo Molnar if (pipe->inode) 2253a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 2265274f052SJens Axboe 2275274f052SJens Axboe if (do_wakeup) { 228c0bd1f65SJens Axboe smp_mb(); 2293a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 2303a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 2313a326a2cSIngo Molnar kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 2325274f052SJens Axboe } 2335274f052SJens Axboe 2345274f052SJens Axboe while (i < nr_pages) 2355274f052SJens Axboe page_cache_release(pages[i++]); 2365274f052SJens Axboe 2375274f052SJens Axboe return ret; 2385274f052SJens Axboe } 2395274f052SJens Axboe 2403a326a2cSIngo Molnar static int 241cbb7e577SJens Axboe __generic_file_splice_read(struct file *in, loff_t *ppos, 242cbb7e577SJens Axboe struct pipe_inode_info *pipe, size_t len, 243cbb7e577SJens Axboe unsigned int flags) 2445274f052SJens Axboe { 2455274f052SJens Axboe struct address_space *mapping = in->f_mapping; 24691ad66efSJens Axboe unsigned int loff, offset, nr_pages; 24716c523ddSJens Axboe struct page *pages[PIPE_BUFFERS]; 2485274f052SJens Axboe struct page *page; 24991ad66efSJens Axboe pgoff_t index, end_index; 25091ad66efSJens Axboe loff_t isize; 25191ad66efSJens Axboe size_t bytes; 2527480a904SJens Axboe int i, error; 2535274f052SJens Axboe 254cbb7e577SJens Axboe index = *ppos >> PAGE_CACHE_SHIFT; 25591ad66efSJens Axboe loff = offset = *ppos & ~PAGE_CACHE_MASK; 2565274f052SJens Axboe nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2575274f052SJens Axboe 2585274f052SJens Axboe if (nr_pages > PIPE_BUFFERS) 2595274f052SJens Axboe nr_pages = PIPE_BUFFERS; 2605274f052SJens Axboe 2615274f052SJens Axboe /* 26273d62d83SIngo Molnar * Initiate read-ahead on this page range. however, don't call into 2630b749ce3SJens Axboe * read-ahead if this is a non-zero offset (we are likely doing small 2640b749ce3SJens Axboe * chunk splice and the page is already there) for a single page. 2655274f052SJens Axboe */ 2660b749ce3SJens Axboe if (!offset || nr_pages > 1) 2675274f052SJens Axboe do_page_cache_readahead(mapping, in, index, nr_pages); 2685274f052SJens Axboe 2695274f052SJens Axboe /* 27073d62d83SIngo Molnar * Now fill in the holes: 2715274f052SJens Axboe */ 2727480a904SJens Axboe error = 0; 27391ad66efSJens Axboe bytes = 0; 27416c523ddSJens Axboe for (i = 0; i < nr_pages; i++, index++) { 2757480a904SJens Axboe find_page: 2765274f052SJens Axboe /* 2777480a904SJens Axboe * lookup the page for this index 2785274f052SJens Axboe */ 2797480a904SJens Axboe page = find_get_page(mapping, index); 2807480a904SJens Axboe if (!page) { 2817480a904SJens Axboe /* 2827480a904SJens Axboe * page didn't exist, allocate one 2837480a904SJens Axboe */ 2847480a904SJens Axboe page = page_cache_alloc_cold(mapping); 2855274f052SJens Axboe if (!page) 2865274f052SJens Axboe break; 2875274f052SJens Axboe 2887480a904SJens Axboe error = add_to_page_cache_lru(page, mapping, index, 2897480a904SJens Axboe mapping_gfp_mask(mapping)); 2905274f052SJens Axboe if (unlikely(error)) { 2915274f052SJens Axboe page_cache_release(page); 2925274f052SJens Axboe break; 2935274f052SJens Axboe } 2947480a904SJens Axboe 2957480a904SJens Axboe goto readpage; 2965274f052SJens Axboe } 2977480a904SJens Axboe 2987480a904SJens Axboe /* 2997480a904SJens Axboe * If the page isn't uptodate, we may need to start io on it 3007480a904SJens Axboe */ 3017480a904SJens Axboe if (!PageUptodate(page)) { 302c4f895cbSJens Axboe /* 303c4f895cbSJens Axboe * If in nonblock mode then dont block on waiting 304c4f895cbSJens Axboe * for an in-flight io page 305c4f895cbSJens Axboe */ 306c4f895cbSJens Axboe if (flags & SPLICE_F_NONBLOCK) 307c4f895cbSJens Axboe break; 308c4f895cbSJens Axboe 3097480a904SJens Axboe lock_page(page); 3107480a904SJens Axboe 3117480a904SJens Axboe /* 3127480a904SJens Axboe * page was truncated, stop here. if this isn't the 3137480a904SJens Axboe * first page, we'll just complete what we already 3147480a904SJens Axboe * added 3157480a904SJens Axboe */ 3167480a904SJens Axboe if (!page->mapping) { 3177480a904SJens Axboe unlock_page(page); 3187480a904SJens Axboe page_cache_release(page); 3197480a904SJens Axboe break; 3207480a904SJens Axboe } 3217480a904SJens Axboe /* 3227480a904SJens Axboe * page was already under io and is now done, great 3237480a904SJens Axboe */ 3247480a904SJens Axboe if (PageUptodate(page)) { 3257480a904SJens Axboe unlock_page(page); 3267480a904SJens Axboe goto fill_it; 3277480a904SJens Axboe } 3287480a904SJens Axboe 3297480a904SJens Axboe readpage: 3307480a904SJens Axboe /* 3317480a904SJens Axboe * need to read in the page 3327480a904SJens Axboe */ 3337480a904SJens Axboe error = mapping->a_ops->readpage(in, page); 3347480a904SJens Axboe 3357480a904SJens Axboe if (unlikely(error)) { 3367480a904SJens Axboe page_cache_release(page); 3377480a904SJens Axboe if (error == AOP_TRUNCATED_PAGE) 3387480a904SJens Axboe goto find_page; 3397480a904SJens Axboe break; 3407480a904SJens Axboe } 34191ad66efSJens Axboe 34291ad66efSJens Axboe /* 34391ad66efSJens Axboe * i_size must be checked after ->readpage(). 34491ad66efSJens Axboe */ 34591ad66efSJens Axboe isize = i_size_read(mapping->host); 34691ad66efSJens Axboe end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 34791ad66efSJens Axboe if (unlikely(!isize || index > end_index)) { 34891ad66efSJens Axboe page_cache_release(page); 34991ad66efSJens Axboe break; 35091ad66efSJens Axboe } 35191ad66efSJens Axboe 35291ad66efSJens Axboe /* 35391ad66efSJens Axboe * if this is the last page, see if we need to shrink 35491ad66efSJens Axboe * the length and stop 35591ad66efSJens Axboe */ 35691ad66efSJens Axboe if (end_index == index) { 35791ad66efSJens Axboe loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); 35891ad66efSJens Axboe if (bytes + loff > isize) { 35991ad66efSJens Axboe page_cache_release(page); 36091ad66efSJens Axboe break; 36191ad66efSJens Axboe } 36291ad66efSJens Axboe /* 36391ad66efSJens Axboe * force quit after adding this page 36491ad66efSJens Axboe */ 36591ad66efSJens Axboe nr_pages = i; 36691ad66efSJens Axboe } 3677480a904SJens Axboe } 3687480a904SJens Axboe fill_it: 36916c523ddSJens Axboe pages[i] = page; 37091ad66efSJens Axboe bytes += PAGE_CACHE_SIZE - loff; 37191ad66efSJens Axboe loff = 0; 3725274f052SJens Axboe } 3735274f052SJens Axboe 37416c523ddSJens Axboe if (i) 37591ad66efSJens Axboe return move_to_pipe(pipe, pages, i, bytes, offset, flags); 37616c523ddSJens Axboe 3777480a904SJens Axboe return error; 3785274f052SJens Axboe } 3795274f052SJens Axboe 38083f9135bSJens Axboe /** 38183f9135bSJens Axboe * generic_file_splice_read - splice data from file to a pipe 38283f9135bSJens Axboe * @in: file to splice from 38383f9135bSJens Axboe * @pipe: pipe to splice to 38483f9135bSJens Axboe * @len: number of bytes to splice 38583f9135bSJens Axboe * @flags: splice modifier flags 38683f9135bSJens Axboe * 38783f9135bSJens Axboe * Will read pages from given file and fill them into a pipe. 38883f9135bSJens Axboe */ 389cbb7e577SJens Axboe ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 390cbb7e577SJens Axboe struct pipe_inode_info *pipe, size_t len, 391cbb7e577SJens Axboe unsigned int flags) 3925274f052SJens Axboe { 3935274f052SJens Axboe ssize_t spliced; 3945274f052SJens Axboe int ret; 3955274f052SJens Axboe 3965274f052SJens Axboe ret = 0; 3975274f052SJens Axboe spliced = 0; 3983a326a2cSIngo Molnar 3995274f052SJens Axboe while (len) { 400cbb7e577SJens Axboe ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 4015274f052SJens Axboe 402c4f895cbSJens Axboe if (ret < 0) 4035274f052SJens Axboe break; 404c4f895cbSJens Axboe else if (!ret) { 405c4f895cbSJens Axboe if (spliced) 406c4f895cbSJens Axboe break; 407c4f895cbSJens Axboe if (flags & SPLICE_F_NONBLOCK) { 408c4f895cbSJens Axboe ret = -EAGAIN; 409c4f895cbSJens Axboe break; 410c4f895cbSJens Axboe } 411c4f895cbSJens Axboe } 4125274f052SJens Axboe 413cbb7e577SJens Axboe *ppos += ret; 4145274f052SJens Axboe len -= ret; 4155274f052SJens Axboe spliced += ret; 4165274f052SJens Axboe } 4175274f052SJens Axboe 4185274f052SJens Axboe if (spliced) 4195274f052SJens Axboe return spliced; 4205274f052SJens Axboe 4215274f052SJens Axboe return ret; 4225274f052SJens Axboe } 4235274f052SJens Axboe 424059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_read); 425059a8f37SJens Axboe 4265274f052SJens Axboe /* 4274f6f0bd2SJens Axboe * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 4284f6f0bd2SJens Axboe * using sendpage(). 4295274f052SJens Axboe */ 4305274f052SJens Axboe static int pipe_to_sendpage(struct pipe_inode_info *info, 4315274f052SJens Axboe struct pipe_buffer *buf, struct splice_desc *sd) 4325274f052SJens Axboe { 4335274f052SJens Axboe struct file *file = sd->file; 4345274f052SJens Axboe loff_t pos = sd->pos; 4355274f052SJens Axboe unsigned int offset; 4365274f052SJens Axboe ssize_t ret; 4375274f052SJens Axboe void *ptr; 438b2b39fa4SJens Axboe int more; 4395274f052SJens Axboe 4405274f052SJens Axboe /* 44173d62d83SIngo Molnar * Sub-optimal, but we are limited by the pipe ->map. We don't 4425274f052SJens Axboe * need a kmap'ed buffer here, we just want to make sure we 4435274f052SJens Axboe * have the page pinned if the pipe page originates from the 44473d62d83SIngo Molnar * page cache. 4455274f052SJens Axboe */ 4465274f052SJens Axboe ptr = buf->ops->map(file, info, buf); 4475274f052SJens Axboe if (IS_ERR(ptr)) 4485274f052SJens Axboe return PTR_ERR(ptr); 4495274f052SJens Axboe 4505274f052SJens Axboe offset = pos & ~PAGE_CACHE_MASK; 451b2b39fa4SJens Axboe more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 4525274f052SJens Axboe 453b2b39fa4SJens Axboe ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 4545274f052SJens Axboe 4555274f052SJens Axboe buf->ops->unmap(info, buf); 4565274f052SJens Axboe if (ret == sd->len) 4575274f052SJens Axboe return 0; 4585274f052SJens Axboe 4595274f052SJens Axboe return -EIO; 4605274f052SJens Axboe } 4615274f052SJens Axboe 4625274f052SJens Axboe /* 4635274f052SJens Axboe * This is a little more tricky than the file -> pipe splicing. There are 4645274f052SJens Axboe * basically three cases: 4655274f052SJens Axboe * 4665274f052SJens Axboe * - Destination page already exists in the address space and there 4675274f052SJens Axboe * are users of it. For that case we have no other option that 4685274f052SJens Axboe * copying the data. Tough luck. 4695274f052SJens Axboe * - Destination page already exists in the address space, but there 4705274f052SJens Axboe * are no users of it. Make sure it's uptodate, then drop it. Fall 4715274f052SJens Axboe * through to last case. 4725274f052SJens Axboe * - Destination page does not exist, we can add the pipe page to 4735274f052SJens Axboe * the page cache and avoid the copy. 4745274f052SJens Axboe * 47583f9135bSJens Axboe * If asked to move pages to the output file (SPLICE_F_MOVE is set in 47683f9135bSJens Axboe * sd->flags), we attempt to migrate pages from the pipe to the output 47783f9135bSJens Axboe * file address space page cache. This is possible if no one else has 47883f9135bSJens Axboe * the pipe page referenced outside of the pipe and page cache. If 47983f9135bSJens Axboe * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 48083f9135bSJens Axboe * a new page in the output file page cache and fill/dirty that. 4815274f052SJens Axboe */ 4825274f052SJens Axboe static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 4835274f052SJens Axboe struct splice_desc *sd) 4845274f052SJens Axboe { 4855274f052SJens Axboe struct file *file = sd->file; 4865274f052SJens Axboe struct address_space *mapping = file->f_mapping; 4873e7ee3e7SJens Axboe gfp_t gfp_mask = mapping_gfp_mask(mapping); 4885274f052SJens Axboe unsigned int offset; 4895274f052SJens Axboe struct page *page; 4905274f052SJens Axboe pgoff_t index; 4915abc97aaSJens Axboe char *src; 4923e7ee3e7SJens Axboe int ret; 4935274f052SJens Axboe 4945274f052SJens Axboe /* 49549d0b21bSJens Axboe * make sure the data in this buffer is uptodate 4965274f052SJens Axboe */ 4975274f052SJens Axboe src = buf->ops->map(file, info, buf); 4985274f052SJens Axboe if (IS_ERR(src)) 4995274f052SJens Axboe return PTR_ERR(src); 5005274f052SJens Axboe 5015274f052SJens Axboe index = sd->pos >> PAGE_CACHE_SHIFT; 5025274f052SJens Axboe offset = sd->pos & ~PAGE_CACHE_MASK; 5035274f052SJens Axboe 5045abc97aaSJens Axboe /* 50573d62d83SIngo Molnar * Reuse buf page, if SPLICE_F_MOVE is set. 5065abc97aaSJens Axboe */ 5075abc97aaSJens Axboe if (sd->flags & SPLICE_F_MOVE) { 50883f9135bSJens Axboe /* 50983f9135bSJens Axboe * If steal succeeds, buf->page is now pruned from the vm 51083f9135bSJens Axboe * side (LRU and page cache) and we can reuse it. 51183f9135bSJens Axboe */ 5125abc97aaSJens Axboe if (buf->ops->steal(info, buf)) 5135abc97aaSJens Axboe goto find_page; 5145abc97aaSJens Axboe 51549d0b21bSJens Axboe /* 51649d0b21bSJens Axboe * this will also set the page locked 51749d0b21bSJens Axboe */ 5185abc97aaSJens Axboe page = buf->page; 5193e7ee3e7SJens Axboe if (add_to_page_cache(page, mapping, index, gfp_mask)) 5205abc97aaSJens Axboe goto find_page; 5213e7ee3e7SJens Axboe 5223e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 5233e7ee3e7SJens Axboe lru_cache_add(page); 5245abc97aaSJens Axboe } else { 5255274f052SJens Axboe find_page: 5265274f052SJens Axboe ret = -ENOMEM; 5273e7ee3e7SJens Axboe page = find_or_create_page(mapping, index, gfp_mask); 5285274f052SJens Axboe if (!page) 5299aefe431SDave Jones goto out_nomem; 5305274f052SJens Axboe 5315274f052SJens Axboe /* 5325274f052SJens Axboe * If the page is uptodate, it is also locked. If it isn't 5335274f052SJens Axboe * uptodate, we can mark it uptodate if we are filling the 5345274f052SJens Axboe * full page. Otherwise we need to read it in first... 5355274f052SJens Axboe */ 5365274f052SJens Axboe if (!PageUptodate(page)) { 5375274f052SJens Axboe if (sd->len < PAGE_CACHE_SIZE) { 5385274f052SJens Axboe ret = mapping->a_ops->readpage(file, page); 5395274f052SJens Axboe if (unlikely(ret)) 5405274f052SJens Axboe goto out; 5415274f052SJens Axboe 5425274f052SJens Axboe lock_page(page); 5435274f052SJens Axboe 5445274f052SJens Axboe if (!PageUptodate(page)) { 5455274f052SJens Axboe /* 54673d62d83SIngo Molnar * Page got invalidated, repeat. 5475274f052SJens Axboe */ 5485274f052SJens Axboe if (!page->mapping) { 5495274f052SJens Axboe unlock_page(page); 5505274f052SJens Axboe page_cache_release(page); 5515274f052SJens Axboe goto find_page; 5525274f052SJens Axboe } 5535274f052SJens Axboe ret = -EIO; 5545274f052SJens Axboe goto out; 5555274f052SJens Axboe } 5565274f052SJens Axboe } else { 5575274f052SJens Axboe WARN_ON(!PageLocked(page)); 5585274f052SJens Axboe SetPageUptodate(page); 5595274f052SJens Axboe } 5605274f052SJens Axboe } 5615abc97aaSJens Axboe } 5625274f052SJens Axboe 5635274f052SJens Axboe ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 5644f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5654f6f0bd2SJens Axboe page_cache_release(page); 5664f6f0bd2SJens Axboe goto find_page; 5674f6f0bd2SJens Axboe } else if (ret) 5685274f052SJens Axboe goto out; 5695274f052SJens Axboe 5703e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5715abc97aaSJens Axboe char *dst = kmap_atomic(page, KM_USER0); 5725abc97aaSJens Axboe 5735274f052SJens Axboe memcpy(dst + offset, src + buf->offset, sd->len); 5745274f052SJens Axboe flush_dcache_page(page); 5755274f052SJens Axboe kunmap_atomic(dst, KM_USER0); 5765abc97aaSJens Axboe } 5775274f052SJens Axboe 5785274f052SJens Axboe ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 5794f6f0bd2SJens Axboe if (ret == AOP_TRUNCATED_PAGE) { 5804f6f0bd2SJens Axboe page_cache_release(page); 5814f6f0bd2SJens Axboe goto find_page; 5824f6f0bd2SJens Axboe } else if (ret) 5835274f052SJens Axboe goto out; 5845274f052SJens Axboe 585c7f21e4fSJens Axboe mark_page_accessed(page); 5864f6f0bd2SJens Axboe balance_dirty_pages_ratelimited(mapping); 5875274f052SJens Axboe out: 5883e7ee3e7SJens Axboe if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 5895274f052SJens Axboe page_cache_release(page); 5904f6f0bd2SJens Axboe unlock_page(page); 5914f6f0bd2SJens Axboe } 5929aefe431SDave Jones out_nomem: 5935274f052SJens Axboe buf->ops->unmap(info, buf); 5945274f052SJens Axboe return ret; 5955274f052SJens Axboe } 5965274f052SJens Axboe 5975274f052SJens Axboe typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 5985274f052SJens Axboe struct splice_desc *); 5995274f052SJens Axboe 60083f9135bSJens Axboe /* 60183f9135bSJens Axboe * Pipe input worker. Most of this logic works like a regular pipe, the 60283f9135bSJens Axboe * key here is the 'actor' worker passed in that actually moves the data 60383f9135bSJens Axboe * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 60483f9135bSJens Axboe */ 6053a326a2cSIngo Molnar static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 606cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags, 6075274f052SJens Axboe splice_actor *actor) 6085274f052SJens Axboe { 6095274f052SJens Axboe int ret, do_wakeup, err; 6105274f052SJens Axboe struct splice_desc sd; 6115274f052SJens Axboe 6125274f052SJens Axboe ret = 0; 6135274f052SJens Axboe do_wakeup = 0; 6145274f052SJens Axboe 6155274f052SJens Axboe sd.total_len = len; 6165274f052SJens Axboe sd.flags = flags; 6175274f052SJens Axboe sd.file = out; 618cbb7e577SJens Axboe sd.pos = *ppos; 6195274f052SJens Axboe 6203a326a2cSIngo Molnar if (pipe->inode) 6213a326a2cSIngo Molnar mutex_lock(&pipe->inode->i_mutex); 6225274f052SJens Axboe 6235274f052SJens Axboe for (;;) { 6246f767b04SJens Axboe if (pipe->nrbufs) { 6256f767b04SJens Axboe struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 6265274f052SJens Axboe struct pipe_buf_operations *ops = buf->ops; 6275274f052SJens Axboe 6285274f052SJens Axboe sd.len = buf->len; 6295274f052SJens Axboe if (sd.len > sd.total_len) 6305274f052SJens Axboe sd.len = sd.total_len; 6315274f052SJens Axboe 6323a326a2cSIngo Molnar err = actor(pipe, buf, &sd); 6335274f052SJens Axboe if (err) { 6345274f052SJens Axboe if (!ret && err != -ENODATA) 6355274f052SJens Axboe ret = err; 6365274f052SJens Axboe 6375274f052SJens Axboe break; 6385274f052SJens Axboe } 6395274f052SJens Axboe 6405274f052SJens Axboe ret += sd.len; 6415274f052SJens Axboe buf->offset += sd.len; 6425274f052SJens Axboe buf->len -= sd.len; 64373d62d83SIngo Molnar 6445274f052SJens Axboe if (!buf->len) { 6455274f052SJens Axboe buf->ops = NULL; 6463a326a2cSIngo Molnar ops->release(pipe, buf); 6476f767b04SJens Axboe pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); 6486f767b04SJens Axboe pipe->nrbufs--; 6496f767b04SJens Axboe if (pipe->inode) 6505274f052SJens Axboe do_wakeup = 1; 6515274f052SJens Axboe } 6525274f052SJens Axboe 6535274f052SJens Axboe sd.pos += sd.len; 6545274f052SJens Axboe sd.total_len -= sd.len; 6555274f052SJens Axboe if (!sd.total_len) 6565274f052SJens Axboe break; 6575274f052SJens Axboe } 6585274f052SJens Axboe 6596f767b04SJens Axboe if (pipe->nrbufs) 6605274f052SJens Axboe continue; 6613a326a2cSIngo Molnar if (!pipe->writers) 6625274f052SJens Axboe break; 6633a326a2cSIngo Molnar if (!pipe->waiting_writers) { 6645274f052SJens Axboe if (ret) 6655274f052SJens Axboe break; 6665274f052SJens Axboe } 6675274f052SJens Axboe 66829e35094SLinus Torvalds if (flags & SPLICE_F_NONBLOCK) { 66929e35094SLinus Torvalds if (!ret) 67029e35094SLinus Torvalds ret = -EAGAIN; 67129e35094SLinus Torvalds break; 67229e35094SLinus Torvalds } 67329e35094SLinus Torvalds 6745274f052SJens Axboe if (signal_pending(current)) { 6755274f052SJens Axboe if (!ret) 6765274f052SJens Axboe ret = -ERESTARTSYS; 6775274f052SJens Axboe break; 6785274f052SJens Axboe } 6795274f052SJens Axboe 6805274f052SJens Axboe if (do_wakeup) { 681c0bd1f65SJens Axboe smp_mb(); 6823a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6833a326a2cSIngo Molnar wake_up_interruptible_sync(&pipe->wait); 6843a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6855274f052SJens Axboe do_wakeup = 0; 6865274f052SJens Axboe } 6875274f052SJens Axboe 6883a326a2cSIngo Molnar pipe_wait(pipe); 6895274f052SJens Axboe } 6905274f052SJens Axboe 6913a326a2cSIngo Molnar if (pipe->inode) 6923a326a2cSIngo Molnar mutex_unlock(&pipe->inode->i_mutex); 6935274f052SJens Axboe 6945274f052SJens Axboe if (do_wakeup) { 695c0bd1f65SJens Axboe smp_mb(); 6963a326a2cSIngo Molnar if (waitqueue_active(&pipe->wait)) 6973a326a2cSIngo Molnar wake_up_interruptible(&pipe->wait); 6983a326a2cSIngo Molnar kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 6995274f052SJens Axboe } 7005274f052SJens Axboe 7015274f052SJens Axboe return ret; 7025274f052SJens Axboe } 7035274f052SJens Axboe 70483f9135bSJens Axboe /** 70583f9135bSJens Axboe * generic_file_splice_write - splice data from a pipe to a file 7063a326a2cSIngo Molnar * @pipe: pipe info 70783f9135bSJens Axboe * @out: file to write to 70883f9135bSJens Axboe * @len: number of bytes to splice 70983f9135bSJens Axboe * @flags: splice modifier flags 71083f9135bSJens Axboe * 71183f9135bSJens Axboe * Will either move or copy pages (determined by @flags options) from 71283f9135bSJens Axboe * the given pipe inode to the given file. 71383f9135bSJens Axboe * 71483f9135bSJens Axboe */ 7153a326a2cSIngo Molnar ssize_t 7163a326a2cSIngo Molnar generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 717cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags) 7185274f052SJens Axboe { 7194f6f0bd2SJens Axboe struct address_space *mapping = out->f_mapping; 7203a326a2cSIngo Molnar ssize_t ret; 7213a326a2cSIngo Molnar 722cbb7e577SJens Axboe ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 723a4514ebdSJens Axboe if (ret > 0) { 724a4514ebdSJens Axboe struct inode *inode = mapping->host; 725a4514ebdSJens Axboe 726a4514ebdSJens Axboe *ppos += ret; 7274f6f0bd2SJens Axboe 7284f6f0bd2SJens Axboe /* 729a4514ebdSJens Axboe * If file or inode is SYNC and we actually wrote some data, 730a4514ebdSJens Axboe * sync it. 7314f6f0bd2SJens Axboe */ 732a4514ebdSJens Axboe if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 7334f6f0bd2SJens Axboe int err; 7344f6f0bd2SJens Axboe 7354f6f0bd2SJens Axboe mutex_lock(&inode->i_mutex); 736a4514ebdSJens Axboe err = generic_osync_inode(inode, mapping, 7374f6f0bd2SJens Axboe OSYNC_METADATA|OSYNC_DATA); 7384f6f0bd2SJens Axboe mutex_unlock(&inode->i_mutex); 7394f6f0bd2SJens Axboe 7404f6f0bd2SJens Axboe if (err) 7414f6f0bd2SJens Axboe ret = err; 7424f6f0bd2SJens Axboe } 743a4514ebdSJens Axboe } 7444f6f0bd2SJens Axboe 7454f6f0bd2SJens Axboe return ret; 7465274f052SJens Axboe } 7475274f052SJens Axboe 748059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_write); 749059a8f37SJens Axboe 75083f9135bSJens Axboe /** 75183f9135bSJens Axboe * generic_splice_sendpage - splice data from a pipe to a socket 75283f9135bSJens Axboe * @inode: pipe inode 75383f9135bSJens Axboe * @out: socket to write to 75483f9135bSJens Axboe * @len: number of bytes to splice 75583f9135bSJens Axboe * @flags: splice modifier flags 75683f9135bSJens Axboe * 75783f9135bSJens Axboe * Will send @len bytes from the pipe to a network socket. No data copying 75883f9135bSJens Axboe * is involved. 75983f9135bSJens Axboe * 76083f9135bSJens Axboe */ 7613a326a2cSIngo Molnar ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 762cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags) 7635274f052SJens Axboe { 764cbb7e577SJens Axboe return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 7655274f052SJens Axboe } 7665274f052SJens Axboe 767059a8f37SJens Axboe EXPORT_SYMBOL(generic_splice_sendpage); 768a0f06780SJeff Garzik 76983f9135bSJens Axboe /* 77083f9135bSJens Axboe * Attempt to initiate a splice from pipe to file. 77183f9135bSJens Axboe */ 7723a326a2cSIngo Molnar static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 773cbb7e577SJens Axboe loff_t *ppos, size_t len, unsigned int flags) 7745274f052SJens Axboe { 7755274f052SJens Axboe int ret; 7765274f052SJens Axboe 77749570e9bSJens Axboe if (unlikely(!out->f_op || !out->f_op->splice_write)) 7785274f052SJens Axboe return -EINVAL; 7795274f052SJens Axboe 78049570e9bSJens Axboe if (unlikely(!(out->f_mode & FMODE_WRITE))) 7815274f052SJens Axboe return -EBADF; 7825274f052SJens Axboe 783cbb7e577SJens Axboe ret = rw_verify_area(WRITE, out, ppos, len); 7845274f052SJens Axboe if (unlikely(ret < 0)) 7855274f052SJens Axboe return ret; 7865274f052SJens Axboe 787cbb7e577SJens Axboe return out->f_op->splice_write(pipe, out, ppos, len, flags); 7885274f052SJens Axboe } 7895274f052SJens Axboe 79083f9135bSJens Axboe /* 79183f9135bSJens Axboe * Attempt to initiate a splice from a file to a pipe. 79283f9135bSJens Axboe */ 793cbb7e577SJens Axboe static long do_splice_to(struct file *in, loff_t *ppos, 794cbb7e577SJens Axboe struct pipe_inode_info *pipe, size_t len, 795cbb7e577SJens Axboe unsigned int flags) 7965274f052SJens Axboe { 797cbb7e577SJens Axboe loff_t isize, left; 7985274f052SJens Axboe int ret; 7995274f052SJens Axboe 80049570e9bSJens Axboe if (unlikely(!in->f_op || !in->f_op->splice_read)) 8015274f052SJens Axboe return -EINVAL; 8025274f052SJens Axboe 80349570e9bSJens Axboe if (unlikely(!(in->f_mode & FMODE_READ))) 8045274f052SJens Axboe return -EBADF; 8055274f052SJens Axboe 806cbb7e577SJens Axboe ret = rw_verify_area(READ, in, ppos, len); 8075274f052SJens Axboe if (unlikely(ret < 0)) 8085274f052SJens Axboe return ret; 8095274f052SJens Axboe 8105274f052SJens Axboe isize = i_size_read(in->f_mapping->host); 811cbb7e577SJens Axboe if (unlikely(*ppos >= isize)) 8125274f052SJens Axboe return 0; 8135274f052SJens Axboe 814cbb7e577SJens Axboe left = isize - *ppos; 81549570e9bSJens Axboe if (unlikely(left < len)) 8165274f052SJens Axboe len = left; 8175274f052SJens Axboe 818cbb7e577SJens Axboe return in->f_op->splice_read(in, ppos, pipe, len, flags); 8195274f052SJens Axboe } 8205274f052SJens Axboe 821cbb7e577SJens Axboe long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 822cbb7e577SJens Axboe size_t len, unsigned int flags) 823b92ce558SJens Axboe { 824b92ce558SJens Axboe struct pipe_inode_info *pipe; 825b92ce558SJens Axboe long ret, bytes; 826cbb7e577SJens Axboe loff_t out_off; 827b92ce558SJens Axboe umode_t i_mode; 828b92ce558SJens Axboe int i; 829b92ce558SJens Axboe 830b92ce558SJens Axboe /* 831b92ce558SJens Axboe * We require the input being a regular file, as we don't want to 832b92ce558SJens Axboe * randomly drop data for eg socket -> socket splicing. Use the 833b92ce558SJens Axboe * piped splicing for that! 834b92ce558SJens Axboe */ 835b92ce558SJens Axboe i_mode = in->f_dentry->d_inode->i_mode; 836b92ce558SJens Axboe if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 837b92ce558SJens Axboe return -EINVAL; 838b92ce558SJens Axboe 839b92ce558SJens Axboe /* 840b92ce558SJens Axboe * neither in nor out is a pipe, setup an internal pipe attached to 841b92ce558SJens Axboe * 'out' and transfer the wanted data from 'in' to 'out' through that 842b92ce558SJens Axboe */ 843b92ce558SJens Axboe pipe = current->splice_pipe; 84449570e9bSJens Axboe if (unlikely(!pipe)) { 845b92ce558SJens Axboe pipe = alloc_pipe_info(NULL); 846b92ce558SJens Axboe if (!pipe) 847b92ce558SJens Axboe return -ENOMEM; 848b92ce558SJens Axboe 849b92ce558SJens Axboe /* 850b92ce558SJens Axboe * We don't have an immediate reader, but we'll read the stuff 851b92ce558SJens Axboe * out of the pipe right after the move_to_pipe(). So set 852b92ce558SJens Axboe * PIPE_READERS appropriately. 853b92ce558SJens Axboe */ 854b92ce558SJens Axboe pipe->readers = 1; 855b92ce558SJens Axboe 856b92ce558SJens Axboe current->splice_pipe = pipe; 857b92ce558SJens Axboe } 858b92ce558SJens Axboe 859b92ce558SJens Axboe /* 86073d62d83SIngo Molnar * Do the splice. 861b92ce558SJens Axboe */ 862b92ce558SJens Axboe ret = 0; 863b92ce558SJens Axboe bytes = 0; 864cbb7e577SJens Axboe out_off = 0; 865b92ce558SJens Axboe 866b92ce558SJens Axboe while (len) { 867b92ce558SJens Axboe size_t read_len, max_read_len; 868b92ce558SJens Axboe 869b92ce558SJens Axboe /* 870b92ce558SJens Axboe * Do at most PIPE_BUFFERS pages worth of transfer: 871b92ce558SJens Axboe */ 872b92ce558SJens Axboe max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 873b92ce558SJens Axboe 874cbb7e577SJens Axboe ret = do_splice_to(in, ppos, pipe, max_read_len, flags); 875b92ce558SJens Axboe if (unlikely(ret < 0)) 876b92ce558SJens Axboe goto out_release; 877b92ce558SJens Axboe 878b92ce558SJens Axboe read_len = ret; 879b92ce558SJens Axboe 880b92ce558SJens Axboe /* 881b92ce558SJens Axboe * NOTE: nonblocking mode only applies to the input. We 882b92ce558SJens Axboe * must not do the output in nonblocking mode as then we 883b92ce558SJens Axboe * could get stuck data in the internal pipe: 884b92ce558SJens Axboe */ 885cbb7e577SJens Axboe ret = do_splice_from(pipe, out, &out_off, read_len, 886b92ce558SJens Axboe flags & ~SPLICE_F_NONBLOCK); 887b92ce558SJens Axboe if (unlikely(ret < 0)) 888b92ce558SJens Axboe goto out_release; 889b92ce558SJens Axboe 890b92ce558SJens Axboe bytes += ret; 891b92ce558SJens Axboe len -= ret; 892b92ce558SJens Axboe 893b92ce558SJens Axboe /* 894b92ce558SJens Axboe * In nonblocking mode, if we got back a short read then 895b92ce558SJens Axboe * that was due to either an IO error or due to the 896b92ce558SJens Axboe * pagecache entry not being there. In the IO error case 897b92ce558SJens Axboe * the _next_ splice attempt will produce a clean IO error 898b92ce558SJens Axboe * return value (not a short read), so in both cases it's 899b92ce558SJens Axboe * correct to break out of the loop here: 900b92ce558SJens Axboe */ 901b92ce558SJens Axboe if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len)) 902b92ce558SJens Axboe break; 903b92ce558SJens Axboe } 904b92ce558SJens Axboe 905b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 906b92ce558SJens Axboe 907b92ce558SJens Axboe return bytes; 908b92ce558SJens Axboe 909b92ce558SJens Axboe out_release: 910b92ce558SJens Axboe /* 911b92ce558SJens Axboe * If we did an incomplete transfer we must release 912b92ce558SJens Axboe * the pipe buffers in question: 913b92ce558SJens Axboe */ 914b92ce558SJens Axboe for (i = 0; i < PIPE_BUFFERS; i++) { 915b92ce558SJens Axboe struct pipe_buffer *buf = pipe->bufs + i; 916b92ce558SJens Axboe 917b92ce558SJens Axboe if (buf->ops) { 918b92ce558SJens Axboe buf->ops->release(pipe, buf); 919b92ce558SJens Axboe buf->ops = NULL; 920b92ce558SJens Axboe } 921b92ce558SJens Axboe } 922b92ce558SJens Axboe pipe->nrbufs = pipe->curbuf = 0; 923b92ce558SJens Axboe 924b92ce558SJens Axboe /* 925b92ce558SJens Axboe * If we transferred some data, return the number of bytes: 926b92ce558SJens Axboe */ 927b92ce558SJens Axboe if (bytes > 0) 928b92ce558SJens Axboe return bytes; 929b92ce558SJens Axboe 930b92ce558SJens Axboe return ret; 931b92ce558SJens Axboe } 932b92ce558SJens Axboe 933b92ce558SJens Axboe EXPORT_SYMBOL(do_splice_direct); 934b92ce558SJens Axboe 93583f9135bSJens Axboe /* 93683f9135bSJens Axboe * Determine where to splice to/from. 93783f9135bSJens Axboe */ 938529565dcSIngo Molnar static long do_splice(struct file *in, loff_t __user *off_in, 939529565dcSIngo Molnar struct file *out, loff_t __user *off_out, 940529565dcSIngo Molnar size_t len, unsigned int flags) 9415274f052SJens Axboe { 9423a326a2cSIngo Molnar struct pipe_inode_info *pipe; 943cbb7e577SJens Axboe loff_t offset, *off; 944a4514ebdSJens Axboe long ret; 9455274f052SJens Axboe 9463a326a2cSIngo Molnar pipe = in->f_dentry->d_inode->i_pipe; 947529565dcSIngo Molnar if (pipe) { 948529565dcSIngo Molnar if (off_in) 949529565dcSIngo Molnar return -ESPIPE; 950b92ce558SJens Axboe if (off_out) { 951b92ce558SJens Axboe if (out->f_op->llseek == no_llseek) 952b92ce558SJens Axboe return -EINVAL; 953cbb7e577SJens Axboe if (copy_from_user(&offset, off_out, sizeof(loff_t))) 954b92ce558SJens Axboe return -EFAULT; 955cbb7e577SJens Axboe off = &offset; 956cbb7e577SJens Axboe } else 957cbb7e577SJens Axboe off = &out->f_pos; 958529565dcSIngo Molnar 959a4514ebdSJens Axboe ret = do_splice_from(pipe, out, off, len, flags); 960a4514ebdSJens Axboe 961a4514ebdSJens Axboe if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) 962a4514ebdSJens Axboe ret = -EFAULT; 963a4514ebdSJens Axboe 964a4514ebdSJens Axboe return ret; 965529565dcSIngo Molnar } 9665274f052SJens Axboe 9673a326a2cSIngo Molnar pipe = out->f_dentry->d_inode->i_pipe; 968529565dcSIngo Molnar if (pipe) { 969529565dcSIngo Molnar if (off_out) 970529565dcSIngo Molnar return -ESPIPE; 971b92ce558SJens Axboe if (off_in) { 972b92ce558SJens Axboe if (in->f_op->llseek == no_llseek) 973b92ce558SJens Axboe return -EINVAL; 974cbb7e577SJens Axboe if (copy_from_user(&offset, off_in, sizeof(loff_t))) 975b92ce558SJens Axboe return -EFAULT; 976cbb7e577SJens Axboe off = &offset; 977cbb7e577SJens Axboe } else 978cbb7e577SJens Axboe off = &in->f_pos; 979529565dcSIngo Molnar 980a4514ebdSJens Axboe ret = do_splice_to(in, off, pipe, len, flags); 981a4514ebdSJens Axboe 982a4514ebdSJens Axboe if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) 983a4514ebdSJens Axboe ret = -EFAULT; 984a4514ebdSJens Axboe 985a4514ebdSJens Axboe return ret; 986529565dcSIngo Molnar } 9875274f052SJens Axboe 9885274f052SJens Axboe return -EINVAL; 9895274f052SJens Axboe } 9905274f052SJens Axboe 991529565dcSIngo Molnar asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 992529565dcSIngo Molnar int fd_out, loff_t __user *off_out, 993529565dcSIngo Molnar size_t len, unsigned int flags) 9945274f052SJens Axboe { 9955274f052SJens Axboe long error; 9965274f052SJens Axboe struct file *in, *out; 9975274f052SJens Axboe int fput_in, fput_out; 9985274f052SJens Axboe 9995274f052SJens Axboe if (unlikely(!len)) 10005274f052SJens Axboe return 0; 10015274f052SJens Axboe 10025274f052SJens Axboe error = -EBADF; 1003529565dcSIngo Molnar in = fget_light(fd_in, &fput_in); 10045274f052SJens Axboe if (in) { 10055274f052SJens Axboe if (in->f_mode & FMODE_READ) { 1006529565dcSIngo Molnar out = fget_light(fd_out, &fput_out); 10075274f052SJens Axboe if (out) { 10085274f052SJens Axboe if (out->f_mode & FMODE_WRITE) 1009529565dcSIngo Molnar error = do_splice(in, off_in, 1010529565dcSIngo Molnar out, off_out, 1011529565dcSIngo Molnar len, flags); 10125274f052SJens Axboe fput_light(out, fput_out); 10135274f052SJens Axboe } 10145274f052SJens Axboe } 10155274f052SJens Axboe 10165274f052SJens Axboe fput_light(in, fput_in); 10175274f052SJens Axboe } 10185274f052SJens Axboe 10195274f052SJens Axboe return error; 10205274f052SJens Axboe } 102170524490SJens Axboe 102270524490SJens Axboe /* 102370524490SJens Axboe * Link contents of ipipe to opipe. 102470524490SJens Axboe */ 102570524490SJens Axboe static int link_pipe(struct pipe_inode_info *ipipe, 102670524490SJens Axboe struct pipe_inode_info *opipe, 102770524490SJens Axboe size_t len, unsigned int flags) 102870524490SJens Axboe { 102970524490SJens Axboe struct pipe_buffer *ibuf, *obuf; 10302a27250eSJens Axboe int ret, do_wakeup, i, ipipe_first; 10312a27250eSJens Axboe 10322a27250eSJens Axboe ret = do_wakeup = ipipe_first = 0; 103370524490SJens Axboe 103470524490SJens Axboe /* 103570524490SJens Axboe * Potential ABBA deadlock, work around it by ordering lock 103670524490SJens Axboe * grabbing by inode address. Otherwise two different processes 103770524490SJens Axboe * could deadlock (one doing tee from A -> B, the other from B -> A). 103870524490SJens Axboe */ 103970524490SJens Axboe if (ipipe->inode < opipe->inode) { 10402a27250eSJens Axboe ipipe_first = 1; 104170524490SJens Axboe mutex_lock(&ipipe->inode->i_mutex); 104270524490SJens Axboe mutex_lock(&opipe->inode->i_mutex); 104370524490SJens Axboe } else { 104470524490SJens Axboe mutex_lock(&opipe->inode->i_mutex); 104570524490SJens Axboe mutex_lock(&ipipe->inode->i_mutex); 104670524490SJens Axboe } 104770524490SJens Axboe 104870524490SJens Axboe for (i = 0;; i++) { 104970524490SJens Axboe if (!opipe->readers) { 105070524490SJens Axboe send_sig(SIGPIPE, current, 0); 105170524490SJens Axboe if (!ret) 105270524490SJens Axboe ret = -EPIPE; 105370524490SJens Axboe break; 105470524490SJens Axboe } 105570524490SJens Axboe if (ipipe->nrbufs - i) { 105670524490SJens Axboe ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); 105770524490SJens Axboe 105870524490SJens Axboe /* 105970524490SJens Axboe * If we have room, fill this buffer 106070524490SJens Axboe */ 106170524490SJens Axboe if (opipe->nrbufs < PIPE_BUFFERS) { 106270524490SJens Axboe int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); 106370524490SJens Axboe 106470524490SJens Axboe /* 106570524490SJens Axboe * Get a reference to this pipe buffer, 106670524490SJens Axboe * so we can copy the contents over. 106770524490SJens Axboe */ 106870524490SJens Axboe ibuf->ops->get(ipipe, ibuf); 106970524490SJens Axboe 107070524490SJens Axboe obuf = opipe->bufs + nbuf; 107170524490SJens Axboe *obuf = *ibuf; 107270524490SJens Axboe 107370524490SJens Axboe if (obuf->len > len) 107470524490SJens Axboe obuf->len = len; 107570524490SJens Axboe 107670524490SJens Axboe opipe->nrbufs++; 107770524490SJens Axboe do_wakeup = 1; 107870524490SJens Axboe ret += obuf->len; 107970524490SJens Axboe len -= obuf->len; 108070524490SJens Axboe 108170524490SJens Axboe if (!len) 108270524490SJens Axboe break; 108370524490SJens Axboe if (opipe->nrbufs < PIPE_BUFFERS) 108470524490SJens Axboe continue; 108570524490SJens Axboe } 108670524490SJens Axboe 108770524490SJens Axboe /* 108870524490SJens Axboe * We have input available, but no output room. 10892a27250eSJens Axboe * If we already copied data, return that. If we 10902a27250eSJens Axboe * need to drop the opipe lock, it must be ordered 10912a27250eSJens Axboe * last to avoid deadlocks. 109270524490SJens Axboe */ 10932a27250eSJens Axboe if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { 109470524490SJens Axboe if (!ret) 109570524490SJens Axboe ret = -EAGAIN; 109670524490SJens Axboe break; 109770524490SJens Axboe } 109870524490SJens Axboe if (signal_pending(current)) { 109970524490SJens Axboe if (!ret) 110070524490SJens Axboe ret = -ERESTARTSYS; 110170524490SJens Axboe break; 110270524490SJens Axboe } 110370524490SJens Axboe if (do_wakeup) { 110470524490SJens Axboe smp_mb(); 110570524490SJens Axboe if (waitqueue_active(&opipe->wait)) 110670524490SJens Axboe wake_up_interruptible(&opipe->wait); 110770524490SJens Axboe kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); 110870524490SJens Axboe do_wakeup = 0; 110970524490SJens Axboe } 111070524490SJens Axboe 111170524490SJens Axboe opipe->waiting_writers++; 111270524490SJens Axboe pipe_wait(opipe); 111370524490SJens Axboe opipe->waiting_writers--; 111470524490SJens Axboe continue; 111570524490SJens Axboe } 111670524490SJens Axboe 111770524490SJens Axboe /* 111870524490SJens Axboe * No input buffers, do the usual checks for available 111970524490SJens Axboe * writers and blocking and wait if necessary 112070524490SJens Axboe */ 112170524490SJens Axboe if (!ipipe->writers) 112270524490SJens Axboe break; 112370524490SJens Axboe if (!ipipe->waiting_writers) { 112470524490SJens Axboe if (ret) 112570524490SJens Axboe break; 112670524490SJens Axboe } 11272a27250eSJens Axboe /* 11282a27250eSJens Axboe * pipe_wait() drops the ipipe mutex. To avoid deadlocks 11292a27250eSJens Axboe * with another process, we can only safely do that if 11302a27250eSJens Axboe * the ipipe lock is ordered last. 11312a27250eSJens Axboe */ 11322a27250eSJens Axboe if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { 113370524490SJens Axboe if (!ret) 113470524490SJens Axboe ret = -EAGAIN; 113570524490SJens Axboe break; 113670524490SJens Axboe } 113770524490SJens Axboe if (signal_pending(current)) { 113870524490SJens Axboe if (!ret) 113970524490SJens Axboe ret = -ERESTARTSYS; 114070524490SJens Axboe break; 114170524490SJens Axboe } 114270524490SJens Axboe 114370524490SJens Axboe if (waitqueue_active(&ipipe->wait)) 114470524490SJens Axboe wake_up_interruptible_sync(&ipipe->wait); 114570524490SJens Axboe kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); 114670524490SJens Axboe 114770524490SJens Axboe pipe_wait(ipipe); 114870524490SJens Axboe } 114970524490SJens Axboe 115070524490SJens Axboe mutex_unlock(&ipipe->inode->i_mutex); 115170524490SJens Axboe mutex_unlock(&opipe->inode->i_mutex); 115270524490SJens Axboe 115370524490SJens Axboe if (do_wakeup) { 115470524490SJens Axboe smp_mb(); 115570524490SJens Axboe if (waitqueue_active(&opipe->wait)) 115670524490SJens Axboe wake_up_interruptible(&opipe->wait); 115770524490SJens Axboe kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); 115870524490SJens Axboe } 115970524490SJens Axboe 116070524490SJens Axboe return ret; 116170524490SJens Axboe } 116270524490SJens Axboe 116370524490SJens Axboe /* 116470524490SJens Axboe * This is a tee(1) implementation that works on pipes. It doesn't copy 116570524490SJens Axboe * any data, it simply references the 'in' pages on the 'out' pipe. 116670524490SJens Axboe * The 'flags' used are the SPLICE_F_* variants, currently the only 116770524490SJens Axboe * applicable one is SPLICE_F_NONBLOCK. 116870524490SJens Axboe */ 116970524490SJens Axboe static long do_tee(struct file *in, struct file *out, size_t len, 117070524490SJens Axboe unsigned int flags) 117170524490SJens Axboe { 117270524490SJens Axboe struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; 117370524490SJens Axboe struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; 117470524490SJens Axboe 117570524490SJens Axboe /* 117670524490SJens Axboe * Link ipipe to the two output pipes, consuming as we go along. 117770524490SJens Axboe */ 117870524490SJens Axboe if (ipipe && opipe) 117970524490SJens Axboe return link_pipe(ipipe, opipe, len, flags); 118070524490SJens Axboe 118170524490SJens Axboe return -EINVAL; 118270524490SJens Axboe } 118370524490SJens Axboe 118470524490SJens Axboe asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) 118570524490SJens Axboe { 118670524490SJens Axboe struct file *in; 118770524490SJens Axboe int error, fput_in; 118870524490SJens Axboe 118970524490SJens Axboe if (unlikely(!len)) 119070524490SJens Axboe return 0; 119170524490SJens Axboe 119270524490SJens Axboe error = -EBADF; 119370524490SJens Axboe in = fget_light(fdin, &fput_in); 119470524490SJens Axboe if (in) { 119570524490SJens Axboe if (in->f_mode & FMODE_READ) { 119670524490SJens Axboe int fput_out; 119770524490SJens Axboe struct file *out = fget_light(fdout, &fput_out); 119870524490SJens Axboe 119970524490SJens Axboe if (out) { 120070524490SJens Axboe if (out->f_mode & FMODE_WRITE) 120170524490SJens Axboe error = do_tee(in, out, len, flags); 120270524490SJens Axboe fput_light(out, fput_out); 120370524490SJens Axboe } 120470524490SJens Axboe } 120570524490SJens Axboe fput_light(in, fput_in); 120670524490SJens Axboe } 120770524490SJens Axboe 120870524490SJens Axboe return error; 120970524490SJens Axboe } 1210