xref: /openbmc/linux/fs/splice.c (revision 6130f531)
15274f052SJens Axboe /*
25274f052SJens Axboe  * "splice": joining two ropes together by interweaving their strands.
35274f052SJens Axboe  *
45274f052SJens Axboe  * This is the "extended pipe" functionality, where a pipe is used as
55274f052SJens Axboe  * an arbitrary in-memory buffer. Think of a pipe as a small kernel
65274f052SJens Axboe  * buffer that you can use to transfer data from one end to the other.
75274f052SJens Axboe  *
85274f052SJens Axboe  * The traditional unix read/write is extended with a "splice()" operation
95274f052SJens Axboe  * that transfers data buffers to or from a pipe buffer.
105274f052SJens Axboe  *
115274f052SJens Axboe  * Named by Larry McVoy, original implementation from Linus, extended by
12c2058e06SJens Axboe  * Jens to support splicing to files, network, direct splicing, etc and
13c2058e06SJens Axboe  * fixing lots of bugs.
145274f052SJens Axboe  *
150fe23479SJens Axboe  * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
16c2058e06SJens Axboe  * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17c2058e06SJens Axboe  * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
185274f052SJens Axboe  *
195274f052SJens Axboe  */
205274f052SJens Axboe #include <linux/fs.h>
215274f052SJens Axboe #include <linux/file.h>
225274f052SJens Axboe #include <linux/pagemap.h>
23d6b29d7cSJens Axboe #include <linux/splice.h>
2408e552c6SKAMEZAWA Hiroyuki #include <linux/memcontrol.h>
255274f052SJens Axboe #include <linux/mm_inline.h>
265abc97aaSJens Axboe #include <linux/swap.h>
274f6f0bd2SJens Axboe #include <linux/writeback.h>
28630d9c47SPaul Gortmaker #include <linux/export.h>
294f6f0bd2SJens Axboe #include <linux/syscalls.h>
30912d35f8SJens Axboe #include <linux/uio.h>
3129ce2058SJames Morris #include <linux/security.h>
325a0e3ad6STejun Heo #include <linux/gfp.h>
3335f9c09fSEric Dumazet #include <linux/socket.h>
3476b021d0SAl Viro #include <linux/compat.h>
3506ae43f3SAl Viro #include "internal.h"
365274f052SJens Axboe 
3783f9135bSJens Axboe /*
3883f9135bSJens Axboe  * Attempt to steal a page from a pipe buffer. This should perhaps go into
3983f9135bSJens Axboe  * a vm helper function, it's already simplified quite a bit by the
4083f9135bSJens Axboe  * addition of remove_mapping(). If success is returned, the caller may
4183f9135bSJens Axboe  * attempt to reuse this page for another destination.
4283f9135bSJens Axboe  */
4376ad4d11SJens Axboe static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
445abc97aaSJens Axboe 				     struct pipe_buffer *buf)
455abc97aaSJens Axboe {
465abc97aaSJens Axboe 	struct page *page = buf->page;
479e94cd4fSJens Axboe 	struct address_space *mapping;
485abc97aaSJens Axboe 
499e0267c2SJens Axboe 	lock_page(page);
509e0267c2SJens Axboe 
519e94cd4fSJens Axboe 	mapping = page_mapping(page);
529e94cd4fSJens Axboe 	if (mapping) {
535abc97aaSJens Axboe 		WARN_ON(!PageUptodate(page));
545abc97aaSJens Axboe 
55ad8d6f0aSJens Axboe 		/*
569e94cd4fSJens Axboe 		 * At least for ext2 with nobh option, we need to wait on
579e94cd4fSJens Axboe 		 * writeback completing on this page, since we'll remove it
589e94cd4fSJens Axboe 		 * from the pagecache.  Otherwise truncate wont wait on the
599e94cd4fSJens Axboe 		 * page, allowing the disk blocks to be reused by someone else
609e94cd4fSJens Axboe 		 * before we actually wrote our data to them. fs corruption
619e94cd4fSJens Axboe 		 * ensues.
62ad8d6f0aSJens Axboe 		 */
63ad8d6f0aSJens Axboe 		wait_on_page_writeback(page);
64ad8d6f0aSJens Axboe 
65266cf658SDavid Howells 		if (page_has_private(page) &&
66266cf658SDavid Howells 		    !try_to_release_page(page, GFP_KERNEL))
67ca39d651SJens Axboe 			goto out_unlock;
684f6f0bd2SJens Axboe 
699e94cd4fSJens Axboe 		/*
709e94cd4fSJens Axboe 		 * If we succeeded in removing the mapping, set LRU flag
719e94cd4fSJens Axboe 		 * and return good.
729e94cd4fSJens Axboe 		 */
739e94cd4fSJens Axboe 		if (remove_mapping(mapping, page)) {
741432873aSJens Axboe 			buf->flags |= PIPE_BUF_FLAG_LRU;
755abc97aaSJens Axboe 			return 0;
765abc97aaSJens Axboe 		}
779e94cd4fSJens Axboe 	}
789e94cd4fSJens Axboe 
799e94cd4fSJens Axboe 	/*
809e94cd4fSJens Axboe 	 * Raced with truncate or failed to remove page from current
819e94cd4fSJens Axboe 	 * address space, unlock and return failure.
829e94cd4fSJens Axboe 	 */
83ca39d651SJens Axboe out_unlock:
849e94cd4fSJens Axboe 	unlock_page(page);
859e94cd4fSJens Axboe 	return 1;
869e94cd4fSJens Axboe }
875abc97aaSJens Axboe 
8876ad4d11SJens Axboe static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
895274f052SJens Axboe 					struct pipe_buffer *buf)
905274f052SJens Axboe {
915274f052SJens Axboe 	page_cache_release(buf->page);
921432873aSJens Axboe 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
935274f052SJens Axboe }
945274f052SJens Axboe 
950845718dSJens Axboe /*
960845718dSJens Axboe  * Check whether the contents of buf is OK to access. Since the content
970845718dSJens Axboe  * is a page cache page, IO may be in flight.
980845718dSJens Axboe  */
99cac36bb0SJens Axboe static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
1005274f052SJens Axboe 				       struct pipe_buffer *buf)
1015274f052SJens Axboe {
1025274f052SJens Axboe 	struct page *page = buf->page;
10349d0b21bSJens Axboe 	int err;
1045274f052SJens Axboe 
1055274f052SJens Axboe 	if (!PageUptodate(page)) {
10649d0b21bSJens Axboe 		lock_page(page);
1075274f052SJens Axboe 
10849d0b21bSJens Axboe 		/*
10949d0b21bSJens Axboe 		 * Page got truncated/unhashed. This will cause a 0-byte
11073d62d83SIngo Molnar 		 * splice, if this is the first page.
11149d0b21bSJens Axboe 		 */
1125274f052SJens Axboe 		if (!page->mapping) {
11349d0b21bSJens Axboe 			err = -ENODATA;
11449d0b21bSJens Axboe 			goto error;
1155274f052SJens Axboe 		}
1165274f052SJens Axboe 
11749d0b21bSJens Axboe 		/*
11873d62d83SIngo Molnar 		 * Uh oh, read-error from disk.
11949d0b21bSJens Axboe 		 */
12049d0b21bSJens Axboe 		if (!PageUptodate(page)) {
12149d0b21bSJens Axboe 			err = -EIO;
12249d0b21bSJens Axboe 			goto error;
12349d0b21bSJens Axboe 		}
12449d0b21bSJens Axboe 
12549d0b21bSJens Axboe 		/*
126f84d7519SJens Axboe 		 * Page is ok afterall, we are done.
12749d0b21bSJens Axboe 		 */
12849d0b21bSJens Axboe 		unlock_page(page);
12949d0b21bSJens Axboe 	}
13049d0b21bSJens Axboe 
131f84d7519SJens Axboe 	return 0;
13249d0b21bSJens Axboe error:
13349d0b21bSJens Axboe 	unlock_page(page);
134f84d7519SJens Axboe 	return err;
13570524490SJens Axboe }
13670524490SJens Axboe 
137708e3508SHugh Dickins const struct pipe_buf_operations page_cache_pipe_buf_ops = {
1385274f052SJens Axboe 	.can_merge = 0,
139cac36bb0SJens Axboe 	.confirm = page_cache_pipe_buf_confirm,
1405274f052SJens Axboe 	.release = page_cache_pipe_buf_release,
1415abc97aaSJens Axboe 	.steal = page_cache_pipe_buf_steal,
142f84d7519SJens Axboe 	.get = generic_pipe_buf_get,
1435274f052SJens Axboe };
1445274f052SJens Axboe 
145912d35f8SJens Axboe static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
146912d35f8SJens Axboe 				    struct pipe_buffer *buf)
147912d35f8SJens Axboe {
1487afa6fd0SJens Axboe 	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
149912d35f8SJens Axboe 		return 1;
1507afa6fd0SJens Axboe 
1511432873aSJens Axboe 	buf->flags |= PIPE_BUF_FLAG_LRU;
152330ab716SJens Axboe 	return generic_pipe_buf_steal(pipe, buf);
153912d35f8SJens Axboe }
154912d35f8SJens Axboe 
155d4c3cca9SEric Dumazet static const struct pipe_buf_operations user_page_pipe_buf_ops = {
156912d35f8SJens Axboe 	.can_merge = 0,
157cac36bb0SJens Axboe 	.confirm = generic_pipe_buf_confirm,
158912d35f8SJens Axboe 	.release = page_cache_pipe_buf_release,
159912d35f8SJens Axboe 	.steal = user_page_pipe_buf_steal,
160f84d7519SJens Axboe 	.get = generic_pipe_buf_get,
161912d35f8SJens Axboe };
162912d35f8SJens Axboe 
163825cdcb1SNamhyung Kim static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
164825cdcb1SNamhyung Kim {
165825cdcb1SNamhyung Kim 	smp_mb();
166825cdcb1SNamhyung Kim 	if (waitqueue_active(&pipe->wait))
167825cdcb1SNamhyung Kim 		wake_up_interruptible(&pipe->wait);
168825cdcb1SNamhyung Kim 	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
169825cdcb1SNamhyung Kim }
170825cdcb1SNamhyung Kim 
171932cc6d4SJens Axboe /**
172932cc6d4SJens Axboe  * splice_to_pipe - fill passed data into a pipe
173932cc6d4SJens Axboe  * @pipe:	pipe to fill
174932cc6d4SJens Axboe  * @spd:	data to fill
175932cc6d4SJens Axboe  *
176932cc6d4SJens Axboe  * Description:
17779685b8dSRandy Dunlap  *    @spd contains a map of pages and len/offset tuples, along with
178932cc6d4SJens Axboe  *    the struct pipe_buf_operations associated with these pages. This
179932cc6d4SJens Axboe  *    function will link that data to the pipe.
180932cc6d4SJens Axboe  *
18183f9135bSJens Axboe  */
182d6b29d7cSJens Axboe ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
183912d35f8SJens Axboe 		       struct splice_pipe_desc *spd)
1845274f052SJens Axboe {
18500de00bdSJens Axboe 	unsigned int spd_pages = spd->nr_pages;
186912d35f8SJens Axboe 	int ret, do_wakeup, page_nr;
1875274f052SJens Axboe 
1885274f052SJens Axboe 	ret = 0;
1895274f052SJens Axboe 	do_wakeup = 0;
190912d35f8SJens Axboe 	page_nr = 0;
1915274f052SJens Axboe 
19261e0d47cSMiklos Szeredi 	pipe_lock(pipe);
1935274f052SJens Axboe 
1945274f052SJens Axboe 	for (;;) {
1953a326a2cSIngo Molnar 		if (!pipe->readers) {
1965274f052SJens Axboe 			send_sig(SIGPIPE, current, 0);
1975274f052SJens Axboe 			if (!ret)
1985274f052SJens Axboe 				ret = -EPIPE;
1995274f052SJens Axboe 			break;
2005274f052SJens Axboe 		}
2015274f052SJens Axboe 
20235f3d14dSJens Axboe 		if (pipe->nrbufs < pipe->buffers) {
20335f3d14dSJens Axboe 			int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
2043a326a2cSIngo Molnar 			struct pipe_buffer *buf = pipe->bufs + newbuf;
2055274f052SJens Axboe 
206912d35f8SJens Axboe 			buf->page = spd->pages[page_nr];
207912d35f8SJens Axboe 			buf->offset = spd->partial[page_nr].offset;
208912d35f8SJens Axboe 			buf->len = spd->partial[page_nr].len;
209497f9625SJens Axboe 			buf->private = spd->partial[page_nr].private;
210912d35f8SJens Axboe 			buf->ops = spd->ops;
2117afa6fd0SJens Axboe 			if (spd->flags & SPLICE_F_GIFT)
2127afa6fd0SJens Axboe 				buf->flags |= PIPE_BUF_FLAG_GIFT;
2137afa6fd0SJens Axboe 
2146f767b04SJens Axboe 			pipe->nrbufs++;
215912d35f8SJens Axboe 			page_nr++;
216912d35f8SJens Axboe 			ret += buf->len;
217912d35f8SJens Axboe 
2186447a3cfSAl Viro 			if (pipe->files)
2195274f052SJens Axboe 				do_wakeup = 1;
2205274f052SJens Axboe 
221912d35f8SJens Axboe 			if (!--spd->nr_pages)
2225274f052SJens Axboe 				break;
22335f3d14dSJens Axboe 			if (pipe->nrbufs < pipe->buffers)
2245274f052SJens Axboe 				continue;
2255274f052SJens Axboe 
2265274f052SJens Axboe 			break;
2275274f052SJens Axboe 		}
2285274f052SJens Axboe 
229912d35f8SJens Axboe 		if (spd->flags & SPLICE_F_NONBLOCK) {
23029e35094SLinus Torvalds 			if (!ret)
23129e35094SLinus Torvalds 				ret = -EAGAIN;
23229e35094SLinus Torvalds 			break;
23329e35094SLinus Torvalds 		}
23429e35094SLinus Torvalds 
2355274f052SJens Axboe 		if (signal_pending(current)) {
2365274f052SJens Axboe 			if (!ret)
2375274f052SJens Axboe 				ret = -ERESTARTSYS;
2385274f052SJens Axboe 			break;
2395274f052SJens Axboe 		}
2405274f052SJens Axboe 
2415274f052SJens Axboe 		if (do_wakeup) {
242c0bd1f65SJens Axboe 			smp_mb();
2433a326a2cSIngo Molnar 			if (waitqueue_active(&pipe->wait))
2443a326a2cSIngo Molnar 				wake_up_interruptible_sync(&pipe->wait);
2453a326a2cSIngo Molnar 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
2465274f052SJens Axboe 			do_wakeup = 0;
2475274f052SJens Axboe 		}
2485274f052SJens Axboe 
2493a326a2cSIngo Molnar 		pipe->waiting_writers++;
2503a326a2cSIngo Molnar 		pipe_wait(pipe);
2513a326a2cSIngo Molnar 		pipe->waiting_writers--;
2525274f052SJens Axboe 	}
2535274f052SJens Axboe 
25461e0d47cSMiklos Szeredi 	pipe_unlock(pipe);
2555274f052SJens Axboe 
256825cdcb1SNamhyung Kim 	if (do_wakeup)
257825cdcb1SNamhyung Kim 		wakeup_pipe_readers(pipe);
2585274f052SJens Axboe 
25900de00bdSJens Axboe 	while (page_nr < spd_pages)
260bbdfc2f7SJens Axboe 		spd->spd_release(spd, page_nr++);
2615274f052SJens Axboe 
2625274f052SJens Axboe 	return ret;
2635274f052SJens Axboe }
2645274f052SJens Axboe 
265708e3508SHugh Dickins void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
266bbdfc2f7SJens Axboe {
267bbdfc2f7SJens Axboe 	page_cache_release(spd->pages[i]);
268bbdfc2f7SJens Axboe }
269bbdfc2f7SJens Axboe 
27035f3d14dSJens Axboe /*
27135f3d14dSJens Axboe  * Check if we need to grow the arrays holding pages and partial page
27235f3d14dSJens Axboe  * descriptions.
27335f3d14dSJens Axboe  */
274047fe360SEric Dumazet int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
27535f3d14dSJens Axboe {
276047fe360SEric Dumazet 	unsigned int buffers = ACCESS_ONCE(pipe->buffers);
277047fe360SEric Dumazet 
278047fe360SEric Dumazet 	spd->nr_pages_max = buffers;
279047fe360SEric Dumazet 	if (buffers <= PIPE_DEF_BUFFERS)
28035f3d14dSJens Axboe 		return 0;
28135f3d14dSJens Axboe 
282047fe360SEric Dumazet 	spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
283047fe360SEric Dumazet 	spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
28435f3d14dSJens Axboe 
28535f3d14dSJens Axboe 	if (spd->pages && spd->partial)
28635f3d14dSJens Axboe 		return 0;
28735f3d14dSJens Axboe 
28835f3d14dSJens Axboe 	kfree(spd->pages);
28935f3d14dSJens Axboe 	kfree(spd->partial);
29035f3d14dSJens Axboe 	return -ENOMEM;
29135f3d14dSJens Axboe }
29235f3d14dSJens Axboe 
293047fe360SEric Dumazet void splice_shrink_spd(struct splice_pipe_desc *spd)
29435f3d14dSJens Axboe {
295047fe360SEric Dumazet 	if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
29635f3d14dSJens Axboe 		return;
29735f3d14dSJens Axboe 
29835f3d14dSJens Axboe 	kfree(spd->pages);
29935f3d14dSJens Axboe 	kfree(spd->partial);
30035f3d14dSJens Axboe }
30135f3d14dSJens Axboe 
3023a326a2cSIngo Molnar static int
303cbb7e577SJens Axboe __generic_file_splice_read(struct file *in, loff_t *ppos,
304cbb7e577SJens Axboe 			   struct pipe_inode_info *pipe, size_t len,
305cbb7e577SJens Axboe 			   unsigned int flags)
3065274f052SJens Axboe {
3075274f052SJens Axboe 	struct address_space *mapping = in->f_mapping;
308d8983910SFengguang Wu 	unsigned int loff, nr_pages, req_pages;
30935f3d14dSJens Axboe 	struct page *pages[PIPE_DEF_BUFFERS];
31035f3d14dSJens Axboe 	struct partial_page partial[PIPE_DEF_BUFFERS];
3115274f052SJens Axboe 	struct page *page;
31291ad66efSJens Axboe 	pgoff_t index, end_index;
31391ad66efSJens Axboe 	loff_t isize;
314eb20796bSJens Axboe 	int error, page_nr;
315912d35f8SJens Axboe 	struct splice_pipe_desc spd = {
316912d35f8SJens Axboe 		.pages = pages,
317912d35f8SJens Axboe 		.partial = partial,
318047fe360SEric Dumazet 		.nr_pages_max = PIPE_DEF_BUFFERS,
319912d35f8SJens Axboe 		.flags = flags,
320912d35f8SJens Axboe 		.ops = &page_cache_pipe_buf_ops,
321bbdfc2f7SJens Axboe 		.spd_release = spd_release_page,
322912d35f8SJens Axboe 	};
3235274f052SJens Axboe 
32435f3d14dSJens Axboe 	if (splice_grow_spd(pipe, &spd))
32535f3d14dSJens Axboe 		return -ENOMEM;
32635f3d14dSJens Axboe 
327cbb7e577SJens Axboe 	index = *ppos >> PAGE_CACHE_SHIFT;
328912d35f8SJens Axboe 	loff = *ppos & ~PAGE_CACHE_MASK;
329d8983910SFengguang Wu 	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
330047fe360SEric Dumazet 	nr_pages = min(req_pages, spd.nr_pages_max);
3315274f052SJens Axboe 
3325274f052SJens Axboe 	/*
333eb20796bSJens Axboe 	 * Lookup the (hopefully) full range of pages we need.
33482aa5d61SJens Axboe 	 */
33535f3d14dSJens Axboe 	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
336431a4820SFengguang Wu 	index += spd.nr_pages;
337eb20796bSJens Axboe 
3385274f052SJens Axboe 	/*
339eb20796bSJens Axboe 	 * If find_get_pages_contig() returned fewer pages than we needed,
340431a4820SFengguang Wu 	 * readahead/allocate the rest and fill in the holes.
341eb20796bSJens Axboe 	 */
342431a4820SFengguang Wu 	if (spd.nr_pages < nr_pages)
343cf914a7dSRusty Russell 		page_cache_sync_readahead(mapping, &in->f_ra, in,
344cf914a7dSRusty Russell 				index, req_pages - spd.nr_pages);
345431a4820SFengguang Wu 
346932cc6d4SJens Axboe 	error = 0;
347eb20796bSJens Axboe 	while (spd.nr_pages < nr_pages) {
348eb20796bSJens Axboe 		/*
349eb20796bSJens Axboe 		 * Page could be there, find_get_pages_contig() breaks on
350eb20796bSJens Axboe 		 * the first hole.
3515274f052SJens Axboe 		 */
3527480a904SJens Axboe 		page = find_get_page(mapping, index);
3537480a904SJens Axboe 		if (!page) {
354e27dedd8SJens Axboe 			/*
355eb20796bSJens Axboe 			 * page didn't exist, allocate one.
3567480a904SJens Axboe 			 */
3577480a904SJens Axboe 			page = page_cache_alloc_cold(mapping);
3585274f052SJens Axboe 			if (!page)
3595274f052SJens Axboe 				break;
3605274f052SJens Axboe 
3617480a904SJens Axboe 			error = add_to_page_cache_lru(page, mapping, index,
3620ae0b5d0SNick Piggin 						GFP_KERNEL);
3635274f052SJens Axboe 			if (unlikely(error)) {
3645274f052SJens Axboe 				page_cache_release(page);
365a0548871SJens Axboe 				if (error == -EEXIST)
366a0548871SJens Axboe 					continue;
3675274f052SJens Axboe 				break;
3685274f052SJens Axboe 			}
369eb20796bSJens Axboe 			/*
370eb20796bSJens Axboe 			 * add_to_page_cache() locks the page, unlock it
371eb20796bSJens Axboe 			 * to avoid convoluting the logic below even more.
372eb20796bSJens Axboe 			 */
373eb20796bSJens Axboe 			unlock_page(page);
3745274f052SJens Axboe 		}
3757480a904SJens Axboe 
37635f3d14dSJens Axboe 		spd.pages[spd.nr_pages++] = page;
377eb20796bSJens Axboe 		index++;
378eb20796bSJens Axboe 	}
379eb20796bSJens Axboe 
380eb20796bSJens Axboe 	/*
381eb20796bSJens Axboe 	 * Now loop over the map and see if we need to start IO on any
382eb20796bSJens Axboe 	 * pages, fill in the partial map, etc.
383eb20796bSJens Axboe 	 */
384eb20796bSJens Axboe 	index = *ppos >> PAGE_CACHE_SHIFT;
385eb20796bSJens Axboe 	nr_pages = spd.nr_pages;
386eb20796bSJens Axboe 	spd.nr_pages = 0;
387eb20796bSJens Axboe 	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
388eb20796bSJens Axboe 		unsigned int this_len;
389eb20796bSJens Axboe 
390eb20796bSJens Axboe 		if (!len)
391eb20796bSJens Axboe 			break;
392eb20796bSJens Axboe 
393eb20796bSJens Axboe 		/*
394eb20796bSJens Axboe 		 * this_len is the max we'll use from this page
395eb20796bSJens Axboe 		 */
396eb20796bSJens Axboe 		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
39735f3d14dSJens Axboe 		page = spd.pages[page_nr];
398eb20796bSJens Axboe 
399a08a166fSFengguang Wu 		if (PageReadahead(page))
400cf914a7dSRusty Russell 			page_cache_async_readahead(mapping, &in->f_ra, in,
401d8983910SFengguang Wu 					page, index, req_pages - page_nr);
402a08a166fSFengguang Wu 
4037480a904SJens Axboe 		/*
4047480a904SJens Axboe 		 * If the page isn't uptodate, we may need to start io on it
4057480a904SJens Axboe 		 */
4067480a904SJens Axboe 		if (!PageUptodate(page)) {
4077480a904SJens Axboe 			lock_page(page);
4087480a904SJens Axboe 
4097480a904SJens Axboe 			/*
41032502b84SMiklos Szeredi 			 * Page was truncated, or invalidated by the
41132502b84SMiklos Szeredi 			 * filesystem.  Redo the find/create, but this time the
41232502b84SMiklos Szeredi 			 * page is kept locked, so there's no chance of another
41332502b84SMiklos Szeredi 			 * race with truncate/invalidate.
4147480a904SJens Axboe 			 */
4157480a904SJens Axboe 			if (!page->mapping) {
4167480a904SJens Axboe 				unlock_page(page);
41732502b84SMiklos Szeredi 				page = find_or_create_page(mapping, index,
41832502b84SMiklos Szeredi 						mapping_gfp_mask(mapping));
41932502b84SMiklos Szeredi 
42032502b84SMiklos Szeredi 				if (!page) {
42132502b84SMiklos Szeredi 					error = -ENOMEM;
4227480a904SJens Axboe 					break;
4237480a904SJens Axboe 				}
42435f3d14dSJens Axboe 				page_cache_release(spd.pages[page_nr]);
42535f3d14dSJens Axboe 				spd.pages[page_nr] = page;
42632502b84SMiklos Szeredi 			}
4277480a904SJens Axboe 			/*
4287480a904SJens Axboe 			 * page was already under io and is now done, great
4297480a904SJens Axboe 			 */
4307480a904SJens Axboe 			if (PageUptodate(page)) {
4317480a904SJens Axboe 				unlock_page(page);
4327480a904SJens Axboe 				goto fill_it;
4337480a904SJens Axboe 			}
4347480a904SJens Axboe 
4357480a904SJens Axboe 			/*
4367480a904SJens Axboe 			 * need to read in the page
4377480a904SJens Axboe 			 */
4387480a904SJens Axboe 			error = mapping->a_ops->readpage(in, page);
4397480a904SJens Axboe 			if (unlikely(error)) {
440eb20796bSJens Axboe 				/*
441eb20796bSJens Axboe 				 * We really should re-lookup the page here,
442eb20796bSJens Axboe 				 * but it complicates things a lot. Instead
443eb20796bSJens Axboe 				 * lets just do what we already stored, and
444eb20796bSJens Axboe 				 * we'll get it the next time we are called.
445eb20796bSJens Axboe 				 */
4467480a904SJens Axboe 				if (error == AOP_TRUNCATED_PAGE)
447eb20796bSJens Axboe 					error = 0;
448eb20796bSJens Axboe 
4497480a904SJens Axboe 				break;
4507480a904SJens Axboe 			}
451620a324bSJens Axboe 		}
452620a324bSJens Axboe fill_it:
45391ad66efSJens Axboe 		/*
454620a324bSJens Axboe 		 * i_size must be checked after PageUptodate.
45591ad66efSJens Axboe 		 */
45691ad66efSJens Axboe 		isize = i_size_read(mapping->host);
45791ad66efSJens Axboe 		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
458eb20796bSJens Axboe 		if (unlikely(!isize || index > end_index))
45991ad66efSJens Axboe 			break;
46091ad66efSJens Axboe 
46191ad66efSJens Axboe 		/*
46291ad66efSJens Axboe 		 * if this is the last page, see if we need to shrink
46391ad66efSJens Axboe 		 * the length and stop
46491ad66efSJens Axboe 		 */
46591ad66efSJens Axboe 		if (end_index == index) {
466475ecadeSHugh Dickins 			unsigned int plen;
467475ecadeSHugh Dickins 
468475ecadeSHugh Dickins 			/*
469475ecadeSHugh Dickins 			 * max good bytes in this page
470475ecadeSHugh Dickins 			 */
471475ecadeSHugh Dickins 			plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
472475ecadeSHugh Dickins 			if (plen <= loff)
47391ad66efSJens Axboe 				break;
474475ecadeSHugh Dickins 
47591ad66efSJens Axboe 			/*
47691ad66efSJens Axboe 			 * force quit after adding this page
47791ad66efSJens Axboe 			 */
478475ecadeSHugh Dickins 			this_len = min(this_len, plen - loff);
479eb20796bSJens Axboe 			len = this_len;
48091ad66efSJens Axboe 		}
481620a324bSJens Axboe 
48235f3d14dSJens Axboe 		spd.partial[page_nr].offset = loff;
48335f3d14dSJens Axboe 		spd.partial[page_nr].len = this_len;
48482aa5d61SJens Axboe 		len -= this_len;
48591ad66efSJens Axboe 		loff = 0;
486eb20796bSJens Axboe 		spd.nr_pages++;
487eb20796bSJens Axboe 		index++;
4885274f052SJens Axboe 	}
4895274f052SJens Axboe 
490eb20796bSJens Axboe 	/*
491475ecadeSHugh Dickins 	 * Release any pages at the end, if we quit early. 'page_nr' is how far
492eb20796bSJens Axboe 	 * we got, 'nr_pages' is how many pages are in the map.
493eb20796bSJens Axboe 	 */
494eb20796bSJens Axboe 	while (page_nr < nr_pages)
49535f3d14dSJens Axboe 		page_cache_release(spd.pages[page_nr++]);
496f4e6b498SFengguang Wu 	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
497eb20796bSJens Axboe 
498912d35f8SJens Axboe 	if (spd.nr_pages)
49935f3d14dSJens Axboe 		error = splice_to_pipe(pipe, &spd);
50016c523ddSJens Axboe 
501047fe360SEric Dumazet 	splice_shrink_spd(&spd);
5027480a904SJens Axboe 	return error;
5035274f052SJens Axboe }
5045274f052SJens Axboe 
50583f9135bSJens Axboe /**
50683f9135bSJens Axboe  * generic_file_splice_read - splice data from file to a pipe
50783f9135bSJens Axboe  * @in:		file to splice from
508932cc6d4SJens Axboe  * @ppos:	position in @in
50983f9135bSJens Axboe  * @pipe:	pipe to splice to
51083f9135bSJens Axboe  * @len:	number of bytes to splice
51183f9135bSJens Axboe  * @flags:	splice modifier flags
51283f9135bSJens Axboe  *
513932cc6d4SJens Axboe  * Description:
514932cc6d4SJens Axboe  *    Will read pages from given file and fill them into a pipe. Can be
515932cc6d4SJens Axboe  *    used as long as the address_space operations for the source implements
516932cc6d4SJens Axboe  *    a readpage() hook.
517932cc6d4SJens Axboe  *
51883f9135bSJens Axboe  */
519cbb7e577SJens Axboe ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
520cbb7e577SJens Axboe 				 struct pipe_inode_info *pipe, size_t len,
521cbb7e577SJens Axboe 				 unsigned int flags)
5225274f052SJens Axboe {
523d366d398SJens Axboe 	loff_t isize, left;
5248191ecd1SJens Axboe 	int ret;
525d366d398SJens Axboe 
526d366d398SJens Axboe 	isize = i_size_read(in->f_mapping->host);
527d366d398SJens Axboe 	if (unlikely(*ppos >= isize))
528d366d398SJens Axboe 		return 0;
529d366d398SJens Axboe 
530d366d398SJens Axboe 	left = isize - *ppos;
531d366d398SJens Axboe 	if (unlikely(left < len))
532d366d398SJens Axboe 		len = left;
5335274f052SJens Axboe 
534cbb7e577SJens Axboe 	ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
535723590edSMiklos Szeredi 	if (ret > 0) {
536cbb7e577SJens Axboe 		*ppos += ret;
537723590edSMiklos Szeredi 		file_accessed(in);
538723590edSMiklos Szeredi 	}
5395274f052SJens Axboe 
5405274f052SJens Axboe 	return ret;
5415274f052SJens Axboe }
542059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_read);
543059a8f37SJens Axboe 
5446818173bSMiklos Szeredi static const struct pipe_buf_operations default_pipe_buf_ops = {
5456818173bSMiklos Szeredi 	.can_merge = 0,
5466818173bSMiklos Szeredi 	.confirm = generic_pipe_buf_confirm,
5476818173bSMiklos Szeredi 	.release = generic_pipe_buf_release,
5486818173bSMiklos Szeredi 	.steal = generic_pipe_buf_steal,
5496818173bSMiklos Szeredi 	.get = generic_pipe_buf_get,
5506818173bSMiklos Szeredi };
5516818173bSMiklos Szeredi 
55228a625cbSMiklos Szeredi static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
55328a625cbSMiklos Szeredi 				    struct pipe_buffer *buf)
55428a625cbSMiklos Szeredi {
55528a625cbSMiklos Szeredi 	return 1;
55628a625cbSMiklos Szeredi }
55728a625cbSMiklos Szeredi 
55828a625cbSMiklos Szeredi /* Pipe buffer operations for a socket and similar. */
55928a625cbSMiklos Szeredi const struct pipe_buf_operations nosteal_pipe_buf_ops = {
56028a625cbSMiklos Szeredi 	.can_merge = 0,
56128a625cbSMiklos Szeredi 	.confirm = generic_pipe_buf_confirm,
56228a625cbSMiklos Szeredi 	.release = generic_pipe_buf_release,
56328a625cbSMiklos Szeredi 	.steal = generic_pipe_buf_nosteal,
56428a625cbSMiklos Szeredi 	.get = generic_pipe_buf_get,
56528a625cbSMiklos Szeredi };
56628a625cbSMiklos Szeredi EXPORT_SYMBOL(nosteal_pipe_buf_ops);
56728a625cbSMiklos Szeredi 
5686818173bSMiklos Szeredi static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
5696818173bSMiklos Szeredi 			    unsigned long vlen, loff_t offset)
5706818173bSMiklos Szeredi {
5716818173bSMiklos Szeredi 	mm_segment_t old_fs;
5726818173bSMiklos Szeredi 	loff_t pos = offset;
5736818173bSMiklos Szeredi 	ssize_t res;
5746818173bSMiklos Szeredi 
5756818173bSMiklos Szeredi 	old_fs = get_fs();
5766818173bSMiklos Szeredi 	set_fs(get_ds());
5776818173bSMiklos Szeredi 	/* The cast to a user pointer is valid due to the set_fs() */
5786818173bSMiklos Szeredi 	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
5796818173bSMiklos Szeredi 	set_fs(old_fs);
5806818173bSMiklos Szeredi 
5816818173bSMiklos Szeredi 	return res;
5826818173bSMiklos Szeredi }
5836818173bSMiklos Szeredi 
5847bb307e8SAl Viro ssize_t kernel_write(struct file *file, const char *buf, size_t count,
585b2858d7dSMiklos Szeredi 			    loff_t pos)
5860b0a47f5SMiklos Szeredi {
5870b0a47f5SMiklos Szeredi 	mm_segment_t old_fs;
5880b0a47f5SMiklos Szeredi 	ssize_t res;
5890b0a47f5SMiklos Szeredi 
5900b0a47f5SMiklos Szeredi 	old_fs = get_fs();
5910b0a47f5SMiklos Szeredi 	set_fs(get_ds());
5920b0a47f5SMiklos Szeredi 	/* The cast to a user pointer is valid due to the set_fs() */
5937bb307e8SAl Viro 	res = vfs_write(file, (__force const char __user *)buf, count, &pos);
5940b0a47f5SMiklos Szeredi 	set_fs(old_fs);
5950b0a47f5SMiklos Szeredi 
5960b0a47f5SMiklos Szeredi 	return res;
5970b0a47f5SMiklos Szeredi }
5987bb307e8SAl Viro EXPORT_SYMBOL(kernel_write);
5990b0a47f5SMiklos Szeredi 
6006818173bSMiklos Szeredi ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
6016818173bSMiklos Szeredi 				 struct pipe_inode_info *pipe, size_t len,
6026818173bSMiklos Szeredi 				 unsigned int flags)
6036818173bSMiklos Szeredi {
6046818173bSMiklos Szeredi 	unsigned int nr_pages;
6056818173bSMiklos Szeredi 	unsigned int nr_freed;
6066818173bSMiklos Szeredi 	size_t offset;
60735f3d14dSJens Axboe 	struct page *pages[PIPE_DEF_BUFFERS];
60835f3d14dSJens Axboe 	struct partial_page partial[PIPE_DEF_BUFFERS];
60935f3d14dSJens Axboe 	struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
6106818173bSMiklos Szeredi 	ssize_t res;
6116818173bSMiklos Szeredi 	size_t this_len;
6126818173bSMiklos Szeredi 	int error;
6136818173bSMiklos Szeredi 	int i;
6146818173bSMiklos Szeredi 	struct splice_pipe_desc spd = {
6156818173bSMiklos Szeredi 		.pages = pages,
6166818173bSMiklos Szeredi 		.partial = partial,
617047fe360SEric Dumazet 		.nr_pages_max = PIPE_DEF_BUFFERS,
6186818173bSMiklos Szeredi 		.flags = flags,
6196818173bSMiklos Szeredi 		.ops = &default_pipe_buf_ops,
6206818173bSMiklos Szeredi 		.spd_release = spd_release_page,
6216818173bSMiklos Szeredi 	};
6226818173bSMiklos Szeredi 
62335f3d14dSJens Axboe 	if (splice_grow_spd(pipe, &spd))
62435f3d14dSJens Axboe 		return -ENOMEM;
62535f3d14dSJens Axboe 
62635f3d14dSJens Axboe 	res = -ENOMEM;
62735f3d14dSJens Axboe 	vec = __vec;
628047fe360SEric Dumazet 	if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
629047fe360SEric Dumazet 		vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
63035f3d14dSJens Axboe 		if (!vec)
63135f3d14dSJens Axboe 			goto shrink_ret;
63235f3d14dSJens Axboe 	}
63335f3d14dSJens Axboe 
6346818173bSMiklos Szeredi 	offset = *ppos & ~PAGE_CACHE_MASK;
6356818173bSMiklos Szeredi 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
6366818173bSMiklos Szeredi 
637047fe360SEric Dumazet 	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
6386818173bSMiklos Szeredi 		struct page *page;
6396818173bSMiklos Szeredi 
6404f231228SJens Axboe 		page = alloc_page(GFP_USER);
6416818173bSMiklos Szeredi 		error = -ENOMEM;
6426818173bSMiklos Szeredi 		if (!page)
6436818173bSMiklos Szeredi 			goto err;
6446818173bSMiklos Szeredi 
6456818173bSMiklos Szeredi 		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
6464f231228SJens Axboe 		vec[i].iov_base = (void __user *) page_address(page);
6476818173bSMiklos Szeredi 		vec[i].iov_len = this_len;
64835f3d14dSJens Axboe 		spd.pages[i] = page;
6496818173bSMiklos Szeredi 		spd.nr_pages++;
6506818173bSMiklos Szeredi 		len -= this_len;
6516818173bSMiklos Szeredi 		offset = 0;
6526818173bSMiklos Szeredi 	}
6536818173bSMiklos Szeredi 
6546818173bSMiklos Szeredi 	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
65577f6bf57SAndrew Morton 	if (res < 0) {
65677f6bf57SAndrew Morton 		error = res;
6576818173bSMiklos Szeredi 		goto err;
65877f6bf57SAndrew Morton 	}
6596818173bSMiklos Szeredi 
6606818173bSMiklos Szeredi 	error = 0;
6616818173bSMiklos Szeredi 	if (!res)
6626818173bSMiklos Szeredi 		goto err;
6636818173bSMiklos Szeredi 
6646818173bSMiklos Szeredi 	nr_freed = 0;
6656818173bSMiklos Szeredi 	for (i = 0; i < spd.nr_pages; i++) {
6666818173bSMiklos Szeredi 		this_len = min_t(size_t, vec[i].iov_len, res);
66735f3d14dSJens Axboe 		spd.partial[i].offset = 0;
66835f3d14dSJens Axboe 		spd.partial[i].len = this_len;
6696818173bSMiklos Szeredi 		if (!this_len) {
67035f3d14dSJens Axboe 			__free_page(spd.pages[i]);
67135f3d14dSJens Axboe 			spd.pages[i] = NULL;
6726818173bSMiklos Szeredi 			nr_freed++;
6736818173bSMiklos Szeredi 		}
6746818173bSMiklos Szeredi 		res -= this_len;
6756818173bSMiklos Szeredi 	}
6766818173bSMiklos Szeredi 	spd.nr_pages -= nr_freed;
6776818173bSMiklos Szeredi 
6786818173bSMiklos Szeredi 	res = splice_to_pipe(pipe, &spd);
6796818173bSMiklos Szeredi 	if (res > 0)
6806818173bSMiklos Szeredi 		*ppos += res;
6816818173bSMiklos Szeredi 
68235f3d14dSJens Axboe shrink_ret:
68335f3d14dSJens Axboe 	if (vec != __vec)
68435f3d14dSJens Axboe 		kfree(vec);
685047fe360SEric Dumazet 	splice_shrink_spd(&spd);
6866818173bSMiklos Szeredi 	return res;
6876818173bSMiklos Szeredi 
6886818173bSMiklos Szeredi err:
6894f231228SJens Axboe 	for (i = 0; i < spd.nr_pages; i++)
69035f3d14dSJens Axboe 		__free_page(spd.pages[i]);
6914f231228SJens Axboe 
69235f3d14dSJens Axboe 	res = error;
69335f3d14dSJens Axboe 	goto shrink_ret;
6946818173bSMiklos Szeredi }
6956818173bSMiklos Szeredi EXPORT_SYMBOL(default_file_splice_read);
6966818173bSMiklos Szeredi 
6975274f052SJens Axboe /*
6984f6f0bd2SJens Axboe  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
699016b661eSJens Axboe  * using sendpage(). Return the number of bytes sent.
7005274f052SJens Axboe  */
70176ad4d11SJens Axboe static int pipe_to_sendpage(struct pipe_inode_info *pipe,
7025274f052SJens Axboe 			    struct pipe_buffer *buf, struct splice_desc *sd)
7035274f052SJens Axboe {
7046a14b90bSJens Axboe 	struct file *file = sd->u.file;
7055274f052SJens Axboe 	loff_t pos = sd->pos;
706a8adbe37SMichał Mirosław 	int more;
7075274f052SJens Axboe 
70872c2d531SAl Viro 	if (!likely(file->f_op->sendpage))
709a8adbe37SMichał Mirosław 		return -EINVAL;
710a8adbe37SMichał Mirosław 
71135f9c09fSEric Dumazet 	more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
712ae62ca7bSEric Dumazet 
713ae62ca7bSEric Dumazet 	if (sd->len < sd->total_len && pipe->nrbufs > 1)
71435f9c09fSEric Dumazet 		more |= MSG_SENDPAGE_NOTLAST;
715ae62ca7bSEric Dumazet 
716a8adbe37SMichał Mirosław 	return file->f_op->sendpage(file, buf->page, buf->offset,
717f84d7519SJens Axboe 				    sd->len, &pos, more);
7185274f052SJens Axboe }
7195274f052SJens Axboe 
7205274f052SJens Axboe /*
7215274f052SJens Axboe  * This is a little more tricky than the file -> pipe splicing. There are
7225274f052SJens Axboe  * basically three cases:
7235274f052SJens Axboe  *
7245274f052SJens Axboe  *	- Destination page already exists in the address space and there
7255274f052SJens Axboe  *	  are users of it. For that case we have no other option that
7265274f052SJens Axboe  *	  copying the data. Tough luck.
7275274f052SJens Axboe  *	- Destination page already exists in the address space, but there
7285274f052SJens Axboe  *	  are no users of it. Make sure it's uptodate, then drop it. Fall
7295274f052SJens Axboe  *	  through to last case.
7305274f052SJens Axboe  *	- Destination page does not exist, we can add the pipe page to
7315274f052SJens Axboe  *	  the page cache and avoid the copy.
7325274f052SJens Axboe  *
73383f9135bSJens Axboe  * If asked to move pages to the output file (SPLICE_F_MOVE is set in
73483f9135bSJens Axboe  * sd->flags), we attempt to migrate pages from the pipe to the output
73583f9135bSJens Axboe  * file address space page cache. This is possible if no one else has
73683f9135bSJens Axboe  * the pipe page referenced outside of the pipe and page cache. If
73783f9135bSJens Axboe  * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
73883f9135bSJens Axboe  * a new page in the output file page cache and fill/dirty that.
7395274f052SJens Axboe  */
740328eaabaSMiklos Szeredi int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
7415274f052SJens Axboe 		 struct splice_desc *sd)
7425274f052SJens Axboe {
7436a14b90bSJens Axboe 	struct file *file = sd->u.file;
7445274f052SJens Axboe 	struct address_space *mapping = file->f_mapping;
745016b661eSJens Axboe 	unsigned int offset, this_len;
7465274f052SJens Axboe 	struct page *page;
747afddba49SNick Piggin 	void *fsdata;
7483e7ee3e7SJens Axboe 	int ret;
7495274f052SJens Axboe 
7505274f052SJens Axboe 	offset = sd->pos & ~PAGE_CACHE_MASK;
7515274f052SJens Axboe 
752016b661eSJens Axboe 	this_len = sd->len;
753016b661eSJens Axboe 	if (this_len + offset > PAGE_CACHE_SIZE)
754016b661eSJens Axboe 		this_len = PAGE_CACHE_SIZE - offset;
755016b661eSJens Axboe 
756afddba49SNick Piggin 	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
757afddba49SNick Piggin 				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
7589e0267c2SJens Axboe 	if (unlikely(ret))
759afddba49SNick Piggin 		goto out;
7605274f052SJens Axboe 
7610568b409SJens Axboe 	if (buf->page != page) {
762fbb32750SAl Viro 		char *src = kmap_atomic(buf->page);
763e8e3c3d6SCong Wang 		char *dst = kmap_atomic(page);
7645abc97aaSJens Axboe 
765016b661eSJens Axboe 		memcpy(dst + offset, src + buf->offset, this_len);
7665274f052SJens Axboe 		flush_dcache_page(page);
767e8e3c3d6SCong Wang 		kunmap_atomic(dst);
768fbb32750SAl Viro 		kunmap_atomic(src);
7695abc97aaSJens Axboe 	}
770afddba49SNick Piggin 	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
771afddba49SNick Piggin 				page, fsdata);
7720568b409SJens Axboe out:
7735274f052SJens Axboe 	return ret;
7745274f052SJens Axboe }
775328eaabaSMiklos Szeredi EXPORT_SYMBOL(pipe_to_file);
7765274f052SJens Axboe 
777b3c2d2ddSMiklos Szeredi static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
778b3c2d2ddSMiklos Szeredi {
779b3c2d2ddSMiklos Szeredi 	smp_mb();
780b3c2d2ddSMiklos Szeredi 	if (waitqueue_active(&pipe->wait))
781b3c2d2ddSMiklos Szeredi 		wake_up_interruptible(&pipe->wait);
782b3c2d2ddSMiklos Szeredi 	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
783b3c2d2ddSMiklos Szeredi }
784b3c2d2ddSMiklos Szeredi 
785b3c2d2ddSMiklos Szeredi /**
786b3c2d2ddSMiklos Szeredi  * splice_from_pipe_feed - feed available data from a pipe to a file
787b3c2d2ddSMiklos Szeredi  * @pipe:	pipe to splice from
788b3c2d2ddSMiklos Szeredi  * @sd:		information to @actor
789b3c2d2ddSMiklos Szeredi  * @actor:	handler that splices the data
790b3c2d2ddSMiklos Szeredi  *
791b3c2d2ddSMiklos Szeredi  * Description:
792b3c2d2ddSMiklos Szeredi  *    This function loops over the pipe and calls @actor to do the
793b3c2d2ddSMiklos Szeredi  *    actual moving of a single struct pipe_buffer to the desired
794b3c2d2ddSMiklos Szeredi  *    destination.  It returns when there's no more buffers left in
795b3c2d2ddSMiklos Szeredi  *    the pipe or if the requested number of bytes (@sd->total_len)
796b3c2d2ddSMiklos Szeredi  *    have been copied.  It returns a positive number (one) if the
797b3c2d2ddSMiklos Szeredi  *    pipe needs to be filled with more data, zero if the required
798b3c2d2ddSMiklos Szeredi  *    number of bytes have been copied and -errno on error.
799b3c2d2ddSMiklos Szeredi  *
800b3c2d2ddSMiklos Szeredi  *    This, together with splice_from_pipe_{begin,end,next}, may be
801b3c2d2ddSMiklos Szeredi  *    used to implement the functionality of __splice_from_pipe() when
802b3c2d2ddSMiklos Szeredi  *    locking is required around copying the pipe buffers to the
803b3c2d2ddSMiklos Szeredi  *    destination.
804b3c2d2ddSMiklos Szeredi  */
805b3c2d2ddSMiklos Szeredi int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
806b3c2d2ddSMiklos Szeredi 			  splice_actor *actor)
807b3c2d2ddSMiklos Szeredi {
808b3c2d2ddSMiklos Szeredi 	int ret;
809b3c2d2ddSMiklos Szeredi 
810b3c2d2ddSMiklos Szeredi 	while (pipe->nrbufs) {
811b3c2d2ddSMiklos Szeredi 		struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
812b3c2d2ddSMiklos Szeredi 		const struct pipe_buf_operations *ops = buf->ops;
813b3c2d2ddSMiklos Szeredi 
814b3c2d2ddSMiklos Szeredi 		sd->len = buf->len;
815b3c2d2ddSMiklos Szeredi 		if (sd->len > sd->total_len)
816b3c2d2ddSMiklos Szeredi 			sd->len = sd->total_len;
817b3c2d2ddSMiklos Szeredi 
818a8adbe37SMichał Mirosław 		ret = buf->ops->confirm(pipe, buf);
819a8adbe37SMichał Mirosław 		if (unlikely(ret)) {
820b3c2d2ddSMiklos Szeredi 			if (ret == -ENODATA)
821b3c2d2ddSMiklos Szeredi 				ret = 0;
822b3c2d2ddSMiklos Szeredi 			return ret;
823b3c2d2ddSMiklos Szeredi 		}
824a8adbe37SMichał Mirosław 
825a8adbe37SMichał Mirosław 		ret = actor(pipe, buf, sd);
826a8adbe37SMichał Mirosław 		if (ret <= 0)
827a8adbe37SMichał Mirosław 			return ret;
828a8adbe37SMichał Mirosław 
829b3c2d2ddSMiklos Szeredi 		buf->offset += ret;
830b3c2d2ddSMiklos Szeredi 		buf->len -= ret;
831b3c2d2ddSMiklos Szeredi 
832b3c2d2ddSMiklos Szeredi 		sd->num_spliced += ret;
833b3c2d2ddSMiklos Szeredi 		sd->len -= ret;
834b3c2d2ddSMiklos Szeredi 		sd->pos += ret;
835b3c2d2ddSMiklos Szeredi 		sd->total_len -= ret;
836b3c2d2ddSMiklos Szeredi 
837b3c2d2ddSMiklos Szeredi 		if (!buf->len) {
838b3c2d2ddSMiklos Szeredi 			buf->ops = NULL;
839b3c2d2ddSMiklos Szeredi 			ops->release(pipe, buf);
84035f3d14dSJens Axboe 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
841b3c2d2ddSMiklos Szeredi 			pipe->nrbufs--;
8426447a3cfSAl Viro 			if (pipe->files)
843b3c2d2ddSMiklos Szeredi 				sd->need_wakeup = true;
844b3c2d2ddSMiklos Szeredi 		}
845b3c2d2ddSMiklos Szeredi 
846b3c2d2ddSMiklos Szeredi 		if (!sd->total_len)
847b3c2d2ddSMiklos Szeredi 			return 0;
848b3c2d2ddSMiklos Szeredi 	}
849b3c2d2ddSMiklos Szeredi 
850b3c2d2ddSMiklos Szeredi 	return 1;
851b3c2d2ddSMiklos Szeredi }
852b3c2d2ddSMiklos Szeredi EXPORT_SYMBOL(splice_from_pipe_feed);
853b3c2d2ddSMiklos Szeredi 
854b3c2d2ddSMiklos Szeredi /**
855b3c2d2ddSMiklos Szeredi  * splice_from_pipe_next - wait for some data to splice from
856b3c2d2ddSMiklos Szeredi  * @pipe:	pipe to splice from
857b3c2d2ddSMiklos Szeredi  * @sd:		information about the splice operation
858b3c2d2ddSMiklos Szeredi  *
859b3c2d2ddSMiklos Szeredi  * Description:
860b3c2d2ddSMiklos Szeredi  *    This function will wait for some data and return a positive
861b3c2d2ddSMiklos Szeredi  *    value (one) if pipe buffers are available.  It will return zero
862b3c2d2ddSMiklos Szeredi  *    or -errno if no more data needs to be spliced.
863b3c2d2ddSMiklos Szeredi  */
864b3c2d2ddSMiklos Szeredi int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
865b3c2d2ddSMiklos Szeredi {
866b3c2d2ddSMiklos Szeredi 	while (!pipe->nrbufs) {
867b3c2d2ddSMiklos Szeredi 		if (!pipe->writers)
868b3c2d2ddSMiklos Szeredi 			return 0;
869b3c2d2ddSMiklos Szeredi 
870b3c2d2ddSMiklos Szeredi 		if (!pipe->waiting_writers && sd->num_spliced)
871b3c2d2ddSMiklos Szeredi 			return 0;
872b3c2d2ddSMiklos Szeredi 
873b3c2d2ddSMiklos Szeredi 		if (sd->flags & SPLICE_F_NONBLOCK)
874b3c2d2ddSMiklos Szeredi 			return -EAGAIN;
875b3c2d2ddSMiklos Szeredi 
876b3c2d2ddSMiklos Szeredi 		if (signal_pending(current))
877b3c2d2ddSMiklos Szeredi 			return -ERESTARTSYS;
878b3c2d2ddSMiklos Szeredi 
879b3c2d2ddSMiklos Szeredi 		if (sd->need_wakeup) {
880b3c2d2ddSMiklos Szeredi 			wakeup_pipe_writers(pipe);
881b3c2d2ddSMiklos Szeredi 			sd->need_wakeup = false;
882b3c2d2ddSMiklos Szeredi 		}
883b3c2d2ddSMiklos Szeredi 
884b3c2d2ddSMiklos Szeredi 		pipe_wait(pipe);
885b3c2d2ddSMiklos Szeredi 	}
886b3c2d2ddSMiklos Szeredi 
887b3c2d2ddSMiklos Szeredi 	return 1;
888b3c2d2ddSMiklos Szeredi }
889b3c2d2ddSMiklos Szeredi EXPORT_SYMBOL(splice_from_pipe_next);
890b3c2d2ddSMiklos Szeredi 
891b3c2d2ddSMiklos Szeredi /**
892b3c2d2ddSMiklos Szeredi  * splice_from_pipe_begin - start splicing from pipe
893b80901bbSRandy Dunlap  * @sd:		information about the splice operation
894b3c2d2ddSMiklos Szeredi  *
895b3c2d2ddSMiklos Szeredi  * Description:
896b3c2d2ddSMiklos Szeredi  *    This function should be called before a loop containing
897b3c2d2ddSMiklos Szeredi  *    splice_from_pipe_next() and splice_from_pipe_feed() to
898b3c2d2ddSMiklos Szeredi  *    initialize the necessary fields of @sd.
899b3c2d2ddSMiklos Szeredi  */
900b3c2d2ddSMiklos Szeredi void splice_from_pipe_begin(struct splice_desc *sd)
901b3c2d2ddSMiklos Szeredi {
902b3c2d2ddSMiklos Szeredi 	sd->num_spliced = 0;
903b3c2d2ddSMiklos Szeredi 	sd->need_wakeup = false;
904b3c2d2ddSMiklos Szeredi }
905b3c2d2ddSMiklos Szeredi EXPORT_SYMBOL(splice_from_pipe_begin);
906b3c2d2ddSMiklos Szeredi 
907b3c2d2ddSMiklos Szeredi /**
908b3c2d2ddSMiklos Szeredi  * splice_from_pipe_end - finish splicing from pipe
909b3c2d2ddSMiklos Szeredi  * @pipe:	pipe to splice from
910b3c2d2ddSMiklos Szeredi  * @sd:		information about the splice operation
911b3c2d2ddSMiklos Szeredi  *
912b3c2d2ddSMiklos Szeredi  * Description:
913b3c2d2ddSMiklos Szeredi  *    This function will wake up pipe writers if necessary.  It should
914b3c2d2ddSMiklos Szeredi  *    be called after a loop containing splice_from_pipe_next() and
915b3c2d2ddSMiklos Szeredi  *    splice_from_pipe_feed().
916b3c2d2ddSMiklos Szeredi  */
917b3c2d2ddSMiklos Szeredi void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
918b3c2d2ddSMiklos Szeredi {
919b3c2d2ddSMiklos Szeredi 	if (sd->need_wakeup)
920b3c2d2ddSMiklos Szeredi 		wakeup_pipe_writers(pipe);
921b3c2d2ddSMiklos Szeredi }
922b3c2d2ddSMiklos Szeredi EXPORT_SYMBOL(splice_from_pipe_end);
923b3c2d2ddSMiklos Szeredi 
924932cc6d4SJens Axboe /**
925932cc6d4SJens Axboe  * __splice_from_pipe - splice data from a pipe to given actor
926932cc6d4SJens Axboe  * @pipe:	pipe to splice from
927932cc6d4SJens Axboe  * @sd:		information to @actor
928932cc6d4SJens Axboe  * @actor:	handler that splices the data
929932cc6d4SJens Axboe  *
930932cc6d4SJens Axboe  * Description:
931932cc6d4SJens Axboe  *    This function does little more than loop over the pipe and call
932932cc6d4SJens Axboe  *    @actor to do the actual moving of a single struct pipe_buffer to
933932cc6d4SJens Axboe  *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
934932cc6d4SJens Axboe  *    pipe_to_user.
935932cc6d4SJens Axboe  *
93683f9135bSJens Axboe  */
937c66ab6faSJens Axboe ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
938c66ab6faSJens Axboe 			   splice_actor *actor)
9395274f052SJens Axboe {
940b3c2d2ddSMiklos Szeredi 	int ret;
9415274f052SJens Axboe 
942b3c2d2ddSMiklos Szeredi 	splice_from_pipe_begin(sd);
943b3c2d2ddSMiklos Szeredi 	do {
944b3c2d2ddSMiklos Szeredi 		ret = splice_from_pipe_next(pipe, sd);
945b3c2d2ddSMiklos Szeredi 		if (ret > 0)
946b3c2d2ddSMiklos Szeredi 			ret = splice_from_pipe_feed(pipe, sd, actor);
947b3c2d2ddSMiklos Szeredi 	} while (ret > 0);
948b3c2d2ddSMiklos Szeredi 	splice_from_pipe_end(pipe, sd);
9495274f052SJens Axboe 
950b3c2d2ddSMiklos Szeredi 	return sd->num_spliced ? sd->num_spliced : ret;
9515274f052SJens Axboe }
95240bee44eSMark Fasheh EXPORT_SYMBOL(__splice_from_pipe);
9535274f052SJens Axboe 
954932cc6d4SJens Axboe /**
955932cc6d4SJens Axboe  * splice_from_pipe - splice data from a pipe to a file
956932cc6d4SJens Axboe  * @pipe:	pipe to splice from
957932cc6d4SJens Axboe  * @out:	file to splice to
958932cc6d4SJens Axboe  * @ppos:	position in @out
959932cc6d4SJens Axboe  * @len:	how many bytes to splice
960932cc6d4SJens Axboe  * @flags:	splice modifier flags
961932cc6d4SJens Axboe  * @actor:	handler that splices the data
962932cc6d4SJens Axboe  *
963932cc6d4SJens Axboe  * Description:
9642933970bSMiklos Szeredi  *    See __splice_from_pipe. This function locks the pipe inode,
965932cc6d4SJens Axboe  *    otherwise it's identical to __splice_from_pipe().
966932cc6d4SJens Axboe  *
967932cc6d4SJens Axboe  */
9686da61809SMark Fasheh ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
9696da61809SMark Fasheh 			 loff_t *ppos, size_t len, unsigned int flags,
9706da61809SMark Fasheh 			 splice_actor *actor)
9716da61809SMark Fasheh {
9726da61809SMark Fasheh 	ssize_t ret;
973c66ab6faSJens Axboe 	struct splice_desc sd = {
974c66ab6faSJens Axboe 		.total_len = len,
975c66ab6faSJens Axboe 		.flags = flags,
976c66ab6faSJens Axboe 		.pos = *ppos,
9776a14b90bSJens Axboe 		.u.file = out,
978c66ab6faSJens Axboe 	};
9796da61809SMark Fasheh 
98061e0d47cSMiklos Szeredi 	pipe_lock(pipe);
981c66ab6faSJens Axboe 	ret = __splice_from_pipe(pipe, &sd, actor);
98261e0d47cSMiklos Szeredi 	pipe_unlock(pipe);
9836da61809SMark Fasheh 
9846da61809SMark Fasheh 	return ret;
9856da61809SMark Fasheh }
9866da61809SMark Fasheh 
9876da61809SMark Fasheh /**
98883f9135bSJens Axboe  * generic_file_splice_write - splice data from a pipe to a file
9893a326a2cSIngo Molnar  * @pipe:	pipe info
99083f9135bSJens Axboe  * @out:	file to write to
991932cc6d4SJens Axboe  * @ppos:	position in @out
99283f9135bSJens Axboe  * @len:	number of bytes to splice
99383f9135bSJens Axboe  * @flags:	splice modifier flags
99483f9135bSJens Axboe  *
995932cc6d4SJens Axboe  * Description:
99683f9135bSJens Axboe  *    Will either move or copy pages (determined by @flags options) from
99783f9135bSJens Axboe  *    the given pipe inode to the given file.
99883f9135bSJens Axboe  *
99983f9135bSJens Axboe  */
10003a326a2cSIngo Molnar ssize_t
10013a326a2cSIngo Molnar generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1002cbb7e577SJens Axboe 			  loff_t *ppos, size_t len, unsigned int flags)
10035274f052SJens Axboe {
10044f6f0bd2SJens Axboe 	struct address_space *mapping = out->f_mapping;
10058c34e2d6SJens Axboe 	struct inode *inode = mapping->host;
10067f3d4ee1SMiklos Szeredi 	struct splice_desc sd = {
10077f3d4ee1SMiklos Szeredi 		.total_len = len,
10087f3d4ee1SMiklos Szeredi 		.flags = flags,
10097f3d4ee1SMiklos Szeredi 		.pos = *ppos,
10107f3d4ee1SMiklos Szeredi 		.u.file = out,
10117f3d4ee1SMiklos Szeredi 	};
10123a326a2cSIngo Molnar 	ssize_t ret;
10138c34e2d6SJens Axboe 
101461e0d47cSMiklos Szeredi 	pipe_lock(pipe);
1015eb443e5aSMiklos Szeredi 
1016eb443e5aSMiklos Szeredi 	splice_from_pipe_begin(&sd);
1017eb443e5aSMiklos Szeredi 	do {
1018eb443e5aSMiklos Szeredi 		ret = splice_from_pipe_next(pipe, &sd);
1019eb443e5aSMiklos Szeredi 		if (ret <= 0)
1020eb443e5aSMiklos Szeredi 			break;
1021eb443e5aSMiklos Szeredi 
1022eb443e5aSMiklos Szeredi 		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1023eb443e5aSMiklos Szeredi 		ret = file_remove_suid(out);
1024723590edSMiklos Szeredi 		if (!ret) {
1025c3b2da31SJosef Bacik 			ret = file_update_time(out);
1026c3b2da31SJosef Bacik 			if (!ret)
1027c3b2da31SJosef Bacik 				ret = splice_from_pipe_feed(pipe, &sd,
1028c3b2da31SJosef Bacik 							    pipe_to_file);
1029723590edSMiklos Szeredi 		}
1030eb443e5aSMiklos Szeredi 		mutex_unlock(&inode->i_mutex);
1031eb443e5aSMiklos Szeredi 	} while (ret > 0);
1032eb443e5aSMiklos Szeredi 	splice_from_pipe_end(pipe, &sd);
1033eb443e5aSMiklos Szeredi 
103461e0d47cSMiklos Szeredi 	pipe_unlock(pipe);
1035eb443e5aSMiklos Szeredi 
1036eb443e5aSMiklos Szeredi 	if (sd.num_spliced)
1037eb443e5aSMiklos Szeredi 		ret = sd.num_spliced;
1038eb443e5aSMiklos Szeredi 
1039a4514ebdSJens Axboe 	if (ret > 0) {
10407f3d4ee1SMiklos Szeredi 		int err;
10417f3d4ee1SMiklos Szeredi 
1042148f948bSJan Kara 		err = generic_write_sync(out, *ppos, ret);
10434f6f0bd2SJens Axboe 		if (err)
10444f6f0bd2SJens Axboe 			ret = err;
1045148f948bSJan Kara 		else
1046148f948bSJan Kara 			*ppos += ret;
1047d0e1d66bSNamjae Jeon 		balance_dirty_pages_ratelimited(mapping);
1048a4514ebdSJens Axboe 	}
10494f6f0bd2SJens Axboe 
10504f6f0bd2SJens Axboe 	return ret;
10515274f052SJens Axboe }
10525274f052SJens Axboe 
1053059a8f37SJens Axboe EXPORT_SYMBOL(generic_file_splice_write);
1054059a8f37SJens Axboe 
1055b2858d7dSMiklos Szeredi static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1056b2858d7dSMiklos Szeredi 			  struct splice_desc *sd)
10570b0a47f5SMiklos Szeredi {
1058b2858d7dSMiklos Szeredi 	int ret;
1059b2858d7dSMiklos Szeredi 	void *data;
106006ae43f3SAl Viro 	loff_t tmp = sd->pos;
1061b2858d7dSMiklos Szeredi 
1062fbb32750SAl Viro 	data = kmap(buf->page);
106306ae43f3SAl Viro 	ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
1064fbb32750SAl Viro 	kunmap(buf->page);
1065b2858d7dSMiklos Szeredi 
1066b2858d7dSMiklos Szeredi 	return ret;
10670b0a47f5SMiklos Szeredi }
10680b0a47f5SMiklos Szeredi 
10690b0a47f5SMiklos Szeredi static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
10700b0a47f5SMiklos Szeredi 					 struct file *out, loff_t *ppos,
10710b0a47f5SMiklos Szeredi 					 size_t len, unsigned int flags)
10720b0a47f5SMiklos Szeredi {
1073b2858d7dSMiklos Szeredi 	ssize_t ret;
10740b0a47f5SMiklos Szeredi 
1075b2858d7dSMiklos Szeredi 	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
1076b2858d7dSMiklos Szeredi 	if (ret > 0)
1077b2858d7dSMiklos Szeredi 		*ppos += ret;
10780b0a47f5SMiklos Szeredi 
1079b2858d7dSMiklos Szeredi 	return ret;
10800b0a47f5SMiklos Szeredi }
10810b0a47f5SMiklos Szeredi 
108283f9135bSJens Axboe /**
108383f9135bSJens Axboe  * generic_splice_sendpage - splice data from a pipe to a socket
1084932cc6d4SJens Axboe  * @pipe:	pipe to splice from
108583f9135bSJens Axboe  * @out:	socket to write to
1086932cc6d4SJens Axboe  * @ppos:	position in @out
108783f9135bSJens Axboe  * @len:	number of bytes to splice
108883f9135bSJens Axboe  * @flags:	splice modifier flags
108983f9135bSJens Axboe  *
1090932cc6d4SJens Axboe  * Description:
109183f9135bSJens Axboe  *    Will send @len bytes from the pipe to a network socket. No data copying
109283f9135bSJens Axboe  *    is involved.
109383f9135bSJens Axboe  *
109483f9135bSJens Axboe  */
10953a326a2cSIngo Molnar ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
1096cbb7e577SJens Axboe 				loff_t *ppos, size_t len, unsigned int flags)
10975274f052SJens Axboe {
109800522fb4SJens Axboe 	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
10995274f052SJens Axboe }
11005274f052SJens Axboe 
1101059a8f37SJens Axboe EXPORT_SYMBOL(generic_splice_sendpage);
1102a0f06780SJeff Garzik 
110383f9135bSJens Axboe /*
110483f9135bSJens Axboe  * Attempt to initiate a splice from pipe to file.
110583f9135bSJens Axboe  */
11063a326a2cSIngo Molnar static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1107cbb7e577SJens Axboe 			   loff_t *ppos, size_t len, unsigned int flags)
11085274f052SJens Axboe {
11090b0a47f5SMiklos Szeredi 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
11100b0a47f5SMiklos Szeredi 				loff_t *, size_t, unsigned int);
11115274f052SJens Axboe 
111272c2d531SAl Viro 	if (out->f_op->splice_write)
11130b0a47f5SMiklos Szeredi 		splice_write = out->f_op->splice_write;
1114cc56f7deSChangli Gao 	else
11150b0a47f5SMiklos Szeredi 		splice_write = default_file_splice_write;
11160b0a47f5SMiklos Szeredi 
1117500368f7SAl Viro 	return splice_write(pipe, out, ppos, len, flags);
11185274f052SJens Axboe }
11195274f052SJens Axboe 
112083f9135bSJens Axboe /*
112183f9135bSJens Axboe  * Attempt to initiate a splice from a file to a pipe.
112283f9135bSJens Axboe  */
1123cbb7e577SJens Axboe static long do_splice_to(struct file *in, loff_t *ppos,
1124cbb7e577SJens Axboe 			 struct pipe_inode_info *pipe, size_t len,
1125cbb7e577SJens Axboe 			 unsigned int flags)
11265274f052SJens Axboe {
11276818173bSMiklos Szeredi 	ssize_t (*splice_read)(struct file *, loff_t *,
11286818173bSMiklos Szeredi 			       struct pipe_inode_info *, size_t, unsigned int);
11295274f052SJens Axboe 	int ret;
11305274f052SJens Axboe 
113149570e9bSJens Axboe 	if (unlikely(!(in->f_mode & FMODE_READ)))
11325274f052SJens Axboe 		return -EBADF;
11335274f052SJens Axboe 
1134cbb7e577SJens Axboe 	ret = rw_verify_area(READ, in, ppos, len);
11355274f052SJens Axboe 	if (unlikely(ret < 0))
11365274f052SJens Axboe 		return ret;
11375274f052SJens Axboe 
113872c2d531SAl Viro 	if (in->f_op->splice_read)
11396818173bSMiklos Szeredi 		splice_read = in->f_op->splice_read;
1140cc56f7deSChangli Gao 	else
11416818173bSMiklos Szeredi 		splice_read = default_file_splice_read;
11426818173bSMiklos Szeredi 
11436818173bSMiklos Szeredi 	return splice_read(in, ppos, pipe, len, flags);
11445274f052SJens Axboe }
11455274f052SJens Axboe 
1146932cc6d4SJens Axboe /**
1147932cc6d4SJens Axboe  * splice_direct_to_actor - splices data directly between two non-pipes
1148932cc6d4SJens Axboe  * @in:		file to splice from
1149932cc6d4SJens Axboe  * @sd:		actor information on where to splice to
1150932cc6d4SJens Axboe  * @actor:	handles the data splicing
1151932cc6d4SJens Axboe  *
1152932cc6d4SJens Axboe  * Description:
1153932cc6d4SJens Axboe  *    This is a special case helper to splice directly between two
1154932cc6d4SJens Axboe  *    points, without requiring an explicit pipe. Internally an allocated
1155932cc6d4SJens Axboe  *    pipe is cached in the process, and reused during the lifetime of
1156932cc6d4SJens Axboe  *    that process.
1157932cc6d4SJens Axboe  *
1158c66ab6faSJens Axboe  */
1159c66ab6faSJens Axboe ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1160c66ab6faSJens Axboe 			       splice_direct_actor *actor)
1161b92ce558SJens Axboe {
1162b92ce558SJens Axboe 	struct pipe_inode_info *pipe;
1163b92ce558SJens Axboe 	long ret, bytes;
1164b92ce558SJens Axboe 	umode_t i_mode;
1165c66ab6faSJens Axboe 	size_t len;
1166c66ab6faSJens Axboe 	int i, flags;
1167b92ce558SJens Axboe 
1168b92ce558SJens Axboe 	/*
1169b92ce558SJens Axboe 	 * We require the input being a regular file, as we don't want to
1170b92ce558SJens Axboe 	 * randomly drop data for eg socket -> socket splicing. Use the
1171b92ce558SJens Axboe 	 * piped splicing for that!
1172b92ce558SJens Axboe 	 */
1173496ad9aaSAl Viro 	i_mode = file_inode(in)->i_mode;
1174b92ce558SJens Axboe 	if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
1175b92ce558SJens Axboe 		return -EINVAL;
1176b92ce558SJens Axboe 
1177b92ce558SJens Axboe 	/*
1178b92ce558SJens Axboe 	 * neither in nor out is a pipe, setup an internal pipe attached to
1179b92ce558SJens Axboe 	 * 'out' and transfer the wanted data from 'in' to 'out' through that
1180b92ce558SJens Axboe 	 */
1181b92ce558SJens Axboe 	pipe = current->splice_pipe;
118249570e9bSJens Axboe 	if (unlikely(!pipe)) {
11837bee130eSAl Viro 		pipe = alloc_pipe_info();
1184b92ce558SJens Axboe 		if (!pipe)
1185b92ce558SJens Axboe 			return -ENOMEM;
1186b92ce558SJens Axboe 
1187b92ce558SJens Axboe 		/*
1188b92ce558SJens Axboe 		 * We don't have an immediate reader, but we'll read the stuff
118900522fb4SJens Axboe 		 * out of the pipe right after the splice_to_pipe(). So set
1190b92ce558SJens Axboe 		 * PIPE_READERS appropriately.
1191b92ce558SJens Axboe 		 */
1192b92ce558SJens Axboe 		pipe->readers = 1;
1193b92ce558SJens Axboe 
1194b92ce558SJens Axboe 		current->splice_pipe = pipe;
1195b92ce558SJens Axboe 	}
1196b92ce558SJens Axboe 
1197b92ce558SJens Axboe 	/*
119873d62d83SIngo Molnar 	 * Do the splice.
1199b92ce558SJens Axboe 	 */
1200b92ce558SJens Axboe 	ret = 0;
1201b92ce558SJens Axboe 	bytes = 0;
1202c66ab6faSJens Axboe 	len = sd->total_len;
1203c66ab6faSJens Axboe 	flags = sd->flags;
1204c66ab6faSJens Axboe 
1205c66ab6faSJens Axboe 	/*
1206c66ab6faSJens Axboe 	 * Don't block on output, we have to drain the direct pipe.
1207c66ab6faSJens Axboe 	 */
1208c66ab6faSJens Axboe 	sd->flags &= ~SPLICE_F_NONBLOCK;
1209b92ce558SJens Axboe 
1210b92ce558SJens Axboe 	while (len) {
121151a92c0fSJens Axboe 		size_t read_len;
1212a82c53a0STom Zanussi 		loff_t pos = sd->pos, prev_pos = pos;
1213b92ce558SJens Axboe 
1214bcd4f3acSJens Axboe 		ret = do_splice_to(in, &pos, pipe, len, flags);
121551a92c0fSJens Axboe 		if (unlikely(ret <= 0))
1216b92ce558SJens Axboe 			goto out_release;
1217b92ce558SJens Axboe 
1218b92ce558SJens Axboe 		read_len = ret;
1219c66ab6faSJens Axboe 		sd->total_len = read_len;
1220b92ce558SJens Axboe 
1221b92ce558SJens Axboe 		/*
1222b92ce558SJens Axboe 		 * NOTE: nonblocking mode only applies to the input. We
1223b92ce558SJens Axboe 		 * must not do the output in nonblocking mode as then we
1224b92ce558SJens Axboe 		 * could get stuck data in the internal pipe:
1225b92ce558SJens Axboe 		 */
1226c66ab6faSJens Axboe 		ret = actor(pipe, sd);
1227a82c53a0STom Zanussi 		if (unlikely(ret <= 0)) {
1228a82c53a0STom Zanussi 			sd->pos = prev_pos;
1229b92ce558SJens Axboe 			goto out_release;
1230a82c53a0STom Zanussi 		}
1231b92ce558SJens Axboe 
1232b92ce558SJens Axboe 		bytes += ret;
1233b92ce558SJens Axboe 		len -= ret;
1234bcd4f3acSJens Axboe 		sd->pos = pos;
1235b92ce558SJens Axboe 
1236a82c53a0STom Zanussi 		if (ret < read_len) {
1237a82c53a0STom Zanussi 			sd->pos = prev_pos + ret;
123851a92c0fSJens Axboe 			goto out_release;
1239b92ce558SJens Axboe 		}
1240a82c53a0STom Zanussi 	}
1241b92ce558SJens Axboe 
12429e97198dSJens Axboe done:
1243b92ce558SJens Axboe 	pipe->nrbufs = pipe->curbuf = 0;
12449e97198dSJens Axboe 	file_accessed(in);
1245b92ce558SJens Axboe 	return bytes;
1246b92ce558SJens Axboe 
1247b92ce558SJens Axboe out_release:
1248b92ce558SJens Axboe 	/*
1249b92ce558SJens Axboe 	 * If we did an incomplete transfer we must release
1250b92ce558SJens Axboe 	 * the pipe buffers in question:
1251b92ce558SJens Axboe 	 */
125235f3d14dSJens Axboe 	for (i = 0; i < pipe->buffers; i++) {
1253b92ce558SJens Axboe 		struct pipe_buffer *buf = pipe->bufs + i;
1254b92ce558SJens Axboe 
1255b92ce558SJens Axboe 		if (buf->ops) {
1256b92ce558SJens Axboe 			buf->ops->release(pipe, buf);
1257b92ce558SJens Axboe 			buf->ops = NULL;
1258b92ce558SJens Axboe 		}
1259b92ce558SJens Axboe 	}
1260b92ce558SJens Axboe 
12619e97198dSJens Axboe 	if (!bytes)
12629e97198dSJens Axboe 		bytes = ret;
1263b92ce558SJens Axboe 
12649e97198dSJens Axboe 	goto done;
1265c66ab6faSJens Axboe }
1266c66ab6faSJens Axboe EXPORT_SYMBOL(splice_direct_to_actor);
1267c66ab6faSJens Axboe 
1268c66ab6faSJens Axboe static int direct_splice_actor(struct pipe_inode_info *pipe,
1269c66ab6faSJens Axboe 			       struct splice_desc *sd)
1270c66ab6faSJens Axboe {
12716a14b90bSJens Axboe 	struct file *file = sd->u.file;
1272c66ab6faSJens Axboe 
12737995bd28SAl Viro 	return do_splice_from(pipe, file, sd->opos, sd->total_len,
12742cb4b05eSChangli Gao 			      sd->flags);
1275c66ab6faSJens Axboe }
1276c66ab6faSJens Axboe 
1277932cc6d4SJens Axboe /**
1278932cc6d4SJens Axboe  * do_splice_direct - splices data directly between two files
1279932cc6d4SJens Axboe  * @in:		file to splice from
1280932cc6d4SJens Axboe  * @ppos:	input file offset
1281932cc6d4SJens Axboe  * @out:	file to splice to
1282acdb37c3SRandy Dunlap  * @opos:	output file offset
1283932cc6d4SJens Axboe  * @len:	number of bytes to splice
1284932cc6d4SJens Axboe  * @flags:	splice modifier flags
1285932cc6d4SJens Axboe  *
1286932cc6d4SJens Axboe  * Description:
1287932cc6d4SJens Axboe  *    For use by do_sendfile(). splice can easily emulate sendfile, but
1288932cc6d4SJens Axboe  *    doing it in the application would incur an extra system call
1289932cc6d4SJens Axboe  *    (splice in + splice out, as compared to just sendfile()). So this helper
1290932cc6d4SJens Axboe  *    can splice directly through a process-private pipe.
1291932cc6d4SJens Axboe  *
1292932cc6d4SJens Axboe  */
1293c66ab6faSJens Axboe long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
12947995bd28SAl Viro 		      loff_t *opos, size_t len, unsigned int flags)
1295c66ab6faSJens Axboe {
1296c66ab6faSJens Axboe 	struct splice_desc sd = {
1297c66ab6faSJens Axboe 		.len		= len,
1298c66ab6faSJens Axboe 		.total_len	= len,
1299c66ab6faSJens Axboe 		.flags		= flags,
1300c66ab6faSJens Axboe 		.pos		= *ppos,
13016a14b90bSJens Axboe 		.u.file		= out,
13027995bd28SAl Viro 		.opos		= opos,
1303c66ab6faSJens Axboe 	};
130451a92c0fSJens Axboe 	long ret;
1305c66ab6faSJens Axboe 
130618c67cb9SAl Viro 	if (unlikely(!(out->f_mode & FMODE_WRITE)))
130718c67cb9SAl Viro 		return -EBADF;
130818c67cb9SAl Viro 
130918c67cb9SAl Viro 	if (unlikely(out->f_flags & O_APPEND))
131018c67cb9SAl Viro 		return -EINVAL;
131118c67cb9SAl Viro 
131218c67cb9SAl Viro 	ret = rw_verify_area(WRITE, out, opos, len);
131318c67cb9SAl Viro 	if (unlikely(ret < 0))
131418c67cb9SAl Viro 		return ret;
131518c67cb9SAl Viro 
1316c66ab6faSJens Axboe 	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
131751a92c0fSJens Axboe 	if (ret > 0)
1318a82c53a0STom Zanussi 		*ppos = sd.pos;
131951a92c0fSJens Axboe 
1320c66ab6faSJens Axboe 	return ret;
1321b92ce558SJens Axboe }
1322b92ce558SJens Axboe 
13237c77f0b3SMiklos Szeredi static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
13247c77f0b3SMiklos Szeredi 			       struct pipe_inode_info *opipe,
13257c77f0b3SMiklos Szeredi 			       size_t len, unsigned int flags);
1326ddac0d39SJens Axboe 
1327ddac0d39SJens Axboe /*
132883f9135bSJens Axboe  * Determine where to splice to/from.
132983f9135bSJens Axboe  */
1330529565dcSIngo Molnar static long do_splice(struct file *in, loff_t __user *off_in,
1331529565dcSIngo Molnar 		      struct file *out, loff_t __user *off_out,
1332529565dcSIngo Molnar 		      size_t len, unsigned int flags)
13335274f052SJens Axboe {
13347c77f0b3SMiklos Szeredi 	struct pipe_inode_info *ipipe;
13357c77f0b3SMiklos Szeredi 	struct pipe_inode_info *opipe;
13367995bd28SAl Viro 	loff_t offset;
1337a4514ebdSJens Axboe 	long ret;
13385274f052SJens Axboe 
133971993e62SLinus Torvalds 	ipipe = get_pipe_info(in);
134071993e62SLinus Torvalds 	opipe = get_pipe_info(out);
13417c77f0b3SMiklos Szeredi 
13427c77f0b3SMiklos Szeredi 	if (ipipe && opipe) {
13437c77f0b3SMiklos Szeredi 		if (off_in || off_out)
13447c77f0b3SMiklos Szeredi 			return -ESPIPE;
13457c77f0b3SMiklos Szeredi 
13467c77f0b3SMiklos Szeredi 		if (!(in->f_mode & FMODE_READ))
13477c77f0b3SMiklos Szeredi 			return -EBADF;
13487c77f0b3SMiklos Szeredi 
13497c77f0b3SMiklos Szeredi 		if (!(out->f_mode & FMODE_WRITE))
13507c77f0b3SMiklos Szeredi 			return -EBADF;
13517c77f0b3SMiklos Szeredi 
13527c77f0b3SMiklos Szeredi 		/* Splicing to self would be fun, but... */
13537c77f0b3SMiklos Szeredi 		if (ipipe == opipe)
13547c77f0b3SMiklos Szeredi 			return -EINVAL;
13557c77f0b3SMiklos Szeredi 
13567c77f0b3SMiklos Szeredi 		return splice_pipe_to_pipe(ipipe, opipe, len, flags);
13577c77f0b3SMiklos Szeredi 	}
13587c77f0b3SMiklos Szeredi 
13597c77f0b3SMiklos Szeredi 	if (ipipe) {
1360529565dcSIngo Molnar 		if (off_in)
1361529565dcSIngo Molnar 			return -ESPIPE;
1362b92ce558SJens Axboe 		if (off_out) {
136319c9a49bSChangli Gao 			if (!(out->f_mode & FMODE_PWRITE))
1364b92ce558SJens Axboe 				return -EINVAL;
1365cbb7e577SJens Axboe 			if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1366b92ce558SJens Axboe 				return -EFAULT;
13677995bd28SAl Viro 		} else {
13687995bd28SAl Viro 			offset = out->f_pos;
13697995bd28SAl Viro 		}
1370529565dcSIngo Molnar 
137118c67cb9SAl Viro 		if (unlikely(!(out->f_mode & FMODE_WRITE)))
137218c67cb9SAl Viro 			return -EBADF;
137318c67cb9SAl Viro 
137418c67cb9SAl Viro 		if (unlikely(out->f_flags & O_APPEND))
137518c67cb9SAl Viro 			return -EINVAL;
137618c67cb9SAl Viro 
137718c67cb9SAl Viro 		ret = rw_verify_area(WRITE, out, &offset, len);
137818c67cb9SAl Viro 		if (unlikely(ret < 0))
137918c67cb9SAl Viro 			return ret;
138018c67cb9SAl Viro 
1381500368f7SAl Viro 		file_start_write(out);
13827995bd28SAl Viro 		ret = do_splice_from(ipipe, out, &offset, len, flags);
1383500368f7SAl Viro 		file_end_write(out);
1384a4514ebdSJens Axboe 
13857995bd28SAl Viro 		if (!off_out)
13867995bd28SAl Viro 			out->f_pos = offset;
13877995bd28SAl Viro 		else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
1388a4514ebdSJens Axboe 			ret = -EFAULT;
1389a4514ebdSJens Axboe 
1390a4514ebdSJens Axboe 		return ret;
1391529565dcSIngo Molnar 	}
13925274f052SJens Axboe 
13937c77f0b3SMiklos Szeredi 	if (opipe) {
1394529565dcSIngo Molnar 		if (off_out)
1395529565dcSIngo Molnar 			return -ESPIPE;
1396b92ce558SJens Axboe 		if (off_in) {
139719c9a49bSChangli Gao 			if (!(in->f_mode & FMODE_PREAD))
1398b92ce558SJens Axboe 				return -EINVAL;
1399cbb7e577SJens Axboe 			if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1400b92ce558SJens Axboe 				return -EFAULT;
14017995bd28SAl Viro 		} else {
14027995bd28SAl Viro 			offset = in->f_pos;
14037995bd28SAl Viro 		}
1404529565dcSIngo Molnar 
14057995bd28SAl Viro 		ret = do_splice_to(in, &offset, opipe, len, flags);
1406a4514ebdSJens Axboe 
14077995bd28SAl Viro 		if (!off_in)
14087995bd28SAl Viro 			in->f_pos = offset;
14097995bd28SAl Viro 		else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
1410a4514ebdSJens Axboe 			ret = -EFAULT;
1411a4514ebdSJens Axboe 
1412a4514ebdSJens Axboe 		return ret;
1413529565dcSIngo Molnar 	}
14145274f052SJens Axboe 
14155274f052SJens Axboe 	return -EINVAL;
14165274f052SJens Axboe }
14175274f052SJens Axboe 
1418912d35f8SJens Axboe /*
1419912d35f8SJens Axboe  * Map an iov into an array of pages and offset/length tupples. With the
1420912d35f8SJens Axboe  * partial_page structure, we can map several non-contiguous ranges into
1421912d35f8SJens Axboe  * our ones pages[] map instead of splitting that operation into pieces.
1422912d35f8SJens Axboe  * Could easily be exported as a generic helper for other users, in which
1423912d35f8SJens Axboe  * case one would probably want to add a 'max_nr_pages' parameter as well.
1424912d35f8SJens Axboe  */
1425912d35f8SJens Axboe static int get_iovec_page_array(const struct iovec __user *iov,
1426912d35f8SJens Axboe 				unsigned int nr_vecs, struct page **pages,
1427bd1a68b5SEric Dumazet 				struct partial_page *partial, bool aligned,
142835f3d14dSJens Axboe 				unsigned int pipe_buffers)
1429912d35f8SJens Axboe {
1430912d35f8SJens Axboe 	int buffers = 0, error = 0;
1431912d35f8SJens Axboe 
1432912d35f8SJens Axboe 	while (nr_vecs) {
1433912d35f8SJens Axboe 		unsigned long off, npages;
143475723957SLinus Torvalds 		struct iovec entry;
1435912d35f8SJens Axboe 		void __user *base;
1436912d35f8SJens Axboe 		size_t len;
1437912d35f8SJens Axboe 		int i;
1438912d35f8SJens Axboe 
143975723957SLinus Torvalds 		error = -EFAULT;
1440bc40d73cSNick Piggin 		if (copy_from_user(&entry, iov, sizeof(entry)))
1441912d35f8SJens Axboe 			break;
144275723957SLinus Torvalds 
144375723957SLinus Torvalds 		base = entry.iov_base;
144475723957SLinus Torvalds 		len = entry.iov_len;
1445912d35f8SJens Axboe 
1446912d35f8SJens Axboe 		/*
1447912d35f8SJens Axboe 		 * Sanity check this iovec. 0 read succeeds.
1448912d35f8SJens Axboe 		 */
144975723957SLinus Torvalds 		error = 0;
1450912d35f8SJens Axboe 		if (unlikely(!len))
1451912d35f8SJens Axboe 			break;
1452912d35f8SJens Axboe 		error = -EFAULT;
1453712a30e6SBastian Blank 		if (!access_ok(VERIFY_READ, base, len))
1454912d35f8SJens Axboe 			break;
1455912d35f8SJens Axboe 
1456912d35f8SJens Axboe 		/*
1457912d35f8SJens Axboe 		 * Get this base offset and number of pages, then map
1458912d35f8SJens Axboe 		 * in the user pages.
1459912d35f8SJens Axboe 		 */
1460912d35f8SJens Axboe 		off = (unsigned long) base & ~PAGE_MASK;
14617afa6fd0SJens Axboe 
14627afa6fd0SJens Axboe 		/*
14637afa6fd0SJens Axboe 		 * If asked for alignment, the offset must be zero and the
14647afa6fd0SJens Axboe 		 * length a multiple of the PAGE_SIZE.
14657afa6fd0SJens Axboe 		 */
14667afa6fd0SJens Axboe 		error = -EINVAL;
14677afa6fd0SJens Axboe 		if (aligned && (off || len & ~PAGE_MASK))
14687afa6fd0SJens Axboe 			break;
14697afa6fd0SJens Axboe 
1470912d35f8SJens Axboe 		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
147135f3d14dSJens Axboe 		if (npages > pipe_buffers - buffers)
147235f3d14dSJens Axboe 			npages = pipe_buffers - buffers;
1473912d35f8SJens Axboe 
1474bc40d73cSNick Piggin 		error = get_user_pages_fast((unsigned long)base, npages,
1475bc40d73cSNick Piggin 					0, &pages[buffers]);
1476912d35f8SJens Axboe 
1477912d35f8SJens Axboe 		if (unlikely(error <= 0))
1478912d35f8SJens Axboe 			break;
1479912d35f8SJens Axboe 
1480912d35f8SJens Axboe 		/*
1481912d35f8SJens Axboe 		 * Fill this contiguous range into the partial page map.
1482912d35f8SJens Axboe 		 */
1483912d35f8SJens Axboe 		for (i = 0; i < error; i++) {
14847591489aSJens Axboe 			const int plen = min_t(size_t, len, PAGE_SIZE - off);
1485912d35f8SJens Axboe 
1486912d35f8SJens Axboe 			partial[buffers].offset = off;
1487912d35f8SJens Axboe 			partial[buffers].len = plen;
1488912d35f8SJens Axboe 
1489912d35f8SJens Axboe 			off = 0;
1490912d35f8SJens Axboe 			len -= plen;
1491912d35f8SJens Axboe 			buffers++;
1492912d35f8SJens Axboe 		}
1493912d35f8SJens Axboe 
1494912d35f8SJens Axboe 		/*
1495912d35f8SJens Axboe 		 * We didn't complete this iov, stop here since it probably
1496912d35f8SJens Axboe 		 * means we have to move some of this into a pipe to
1497912d35f8SJens Axboe 		 * be able to continue.
1498912d35f8SJens Axboe 		 */
1499912d35f8SJens Axboe 		if (len)
1500912d35f8SJens Axboe 			break;
1501912d35f8SJens Axboe 
1502912d35f8SJens Axboe 		/*
1503912d35f8SJens Axboe 		 * Don't continue if we mapped fewer pages than we asked for,
1504912d35f8SJens Axboe 		 * or if we mapped the max number of pages that we have
1505912d35f8SJens Axboe 		 * room for.
1506912d35f8SJens Axboe 		 */
150735f3d14dSJens Axboe 		if (error < npages || buffers == pipe_buffers)
1508912d35f8SJens Axboe 			break;
1509912d35f8SJens Axboe 
1510912d35f8SJens Axboe 		nr_vecs--;
1511912d35f8SJens Axboe 		iov++;
1512912d35f8SJens Axboe 	}
1513912d35f8SJens Axboe 
1514912d35f8SJens Axboe 	if (buffers)
1515912d35f8SJens Axboe 		return buffers;
1516912d35f8SJens Axboe 
1517912d35f8SJens Axboe 	return error;
1518912d35f8SJens Axboe }
1519912d35f8SJens Axboe 
15206a14b90bSJens Axboe static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
15216a14b90bSJens Axboe 			struct splice_desc *sd)
15226a14b90bSJens Axboe {
15236130f531SAl Viro 	int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
15246130f531SAl Viro 	return n == sd->len ? n : -EFAULT;
15256a14b90bSJens Axboe }
15266a14b90bSJens Axboe 
15276a14b90bSJens Axboe /*
15286a14b90bSJens Axboe  * For lack of a better implementation, implement vmsplice() to userspace
15296a14b90bSJens Axboe  * as a simple copy of the pipes pages to the user iov.
15306a14b90bSJens Axboe  */
15316130f531SAl Viro static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
15326a14b90bSJens Axboe 			     unsigned long nr_segs, unsigned int flags)
15336a14b90bSJens Axboe {
15346a14b90bSJens Axboe 	struct pipe_inode_info *pipe;
15356a14b90bSJens Axboe 	struct splice_desc sd;
15366a14b90bSJens Axboe 	long ret;
15376130f531SAl Viro 	struct iovec iovstack[UIO_FASTIOV];
15386130f531SAl Viro 	struct iovec *iov = iovstack;
15396130f531SAl Viro 	struct iov_iter iter;
15406130f531SAl Viro 	ssize_t count = 0;
15416a14b90bSJens Axboe 
154271993e62SLinus Torvalds 	pipe = get_pipe_info(file);
15436a14b90bSJens Axboe 	if (!pipe)
15446a14b90bSJens Axboe 		return -EBADF;
15456a14b90bSJens Axboe 
15466130f531SAl Viro 	ret = rw_copy_check_uvector(READ, uiov, nr_segs,
15476130f531SAl Viro 				    ARRAY_SIZE(iovstack), iovstack, &iov);
15486130f531SAl Viro 	if (ret <= 0)
15496130f531SAl Viro 		return ret;
15506a14b90bSJens Axboe 
15516130f531SAl Viro 	iov_iter_init(&iter, iov, nr_segs, count, 0);
15528811930dSJens Axboe 
15536a14b90bSJens Axboe 	sd.len = 0;
15546130f531SAl Viro 	sd.total_len = count;
15556a14b90bSJens Axboe 	sd.flags = flags;
15566130f531SAl Viro 	sd.u.data = &iter;
15576a14b90bSJens Axboe 	sd.pos = 0;
15586a14b90bSJens Axboe 
15596130f531SAl Viro 	pipe_lock(pipe);
15606130f531SAl Viro 	ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
156161e0d47cSMiklos Szeredi 	pipe_unlock(pipe);
15626a14b90bSJens Axboe 
15636130f531SAl Viro 	if (iov != iovstack)
15646130f531SAl Viro 		kfree(iov);
15656a14b90bSJens Axboe 
15666a14b90bSJens Axboe 	return ret;
15676a14b90bSJens Axboe }
15686a14b90bSJens Axboe 
1569912d35f8SJens Axboe /*
1570912d35f8SJens Axboe  * vmsplice splices a user address range into a pipe. It can be thought of
1571912d35f8SJens Axboe  * as splice-from-memory, where the regular splice is splice-from-file (or
1572912d35f8SJens Axboe  * to file). In both cases the output is a pipe, naturally.
1573912d35f8SJens Axboe  */
15746a14b90bSJens Axboe static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1575912d35f8SJens Axboe 			     unsigned long nr_segs, unsigned int flags)
1576912d35f8SJens Axboe {
1577ddac0d39SJens Axboe 	struct pipe_inode_info *pipe;
157835f3d14dSJens Axboe 	struct page *pages[PIPE_DEF_BUFFERS];
157935f3d14dSJens Axboe 	struct partial_page partial[PIPE_DEF_BUFFERS];
1580912d35f8SJens Axboe 	struct splice_pipe_desc spd = {
1581912d35f8SJens Axboe 		.pages = pages,
1582912d35f8SJens Axboe 		.partial = partial,
1583047fe360SEric Dumazet 		.nr_pages_max = PIPE_DEF_BUFFERS,
1584912d35f8SJens Axboe 		.flags = flags,
1585912d35f8SJens Axboe 		.ops = &user_page_pipe_buf_ops,
1586bbdfc2f7SJens Axboe 		.spd_release = spd_release_page,
1587912d35f8SJens Axboe 	};
158835f3d14dSJens Axboe 	long ret;
1589912d35f8SJens Axboe 
159071993e62SLinus Torvalds 	pipe = get_pipe_info(file);
1591ddac0d39SJens Axboe 	if (!pipe)
1592912d35f8SJens Axboe 		return -EBADF;
1593912d35f8SJens Axboe 
159435f3d14dSJens Axboe 	if (splice_grow_spd(pipe, &spd))
159535f3d14dSJens Axboe 		return -ENOMEM;
1596912d35f8SJens Axboe 
159735f3d14dSJens Axboe 	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
1598bd1a68b5SEric Dumazet 					    spd.partial, false,
1599047fe360SEric Dumazet 					    spd.nr_pages_max);
160035f3d14dSJens Axboe 	if (spd.nr_pages <= 0)
160135f3d14dSJens Axboe 		ret = spd.nr_pages;
160235f3d14dSJens Axboe 	else
160335f3d14dSJens Axboe 		ret = splice_to_pipe(pipe, &spd);
160435f3d14dSJens Axboe 
1605047fe360SEric Dumazet 	splice_shrink_spd(&spd);
160635f3d14dSJens Axboe 	return ret;
1607912d35f8SJens Axboe }
1608912d35f8SJens Axboe 
16096a14b90bSJens Axboe /*
16106a14b90bSJens Axboe  * Note that vmsplice only really supports true splicing _from_ user memory
16116a14b90bSJens Axboe  * to a pipe, not the other way around. Splicing from user memory is a simple
16126a14b90bSJens Axboe  * operation that can be supported without any funky alignment restrictions
16136a14b90bSJens Axboe  * or nasty vm tricks. We simply map in the user memory and fill them into
16146a14b90bSJens Axboe  * a pipe. The reverse isn't quite as easy, though. There are two possible
16156a14b90bSJens Axboe  * solutions for that:
16166a14b90bSJens Axboe  *
16176a14b90bSJens Axboe  *	- memcpy() the data internally, at which point we might as well just
16186a14b90bSJens Axboe  *	  do a regular read() on the buffer anyway.
16196a14b90bSJens Axboe  *	- Lots of nasty vm tricks, that are neither fast nor flexible (it
16206a14b90bSJens Axboe  *	  has restriction limitations on both ends of the pipe).
16216a14b90bSJens Axboe  *
16226a14b90bSJens Axboe  * Currently we punt and implement it as a normal copy, see pipe_to_user().
16236a14b90bSJens Axboe  *
16246a14b90bSJens Axboe  */
1625836f92adSHeiko Carstens SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1626836f92adSHeiko Carstens 		unsigned long, nr_segs, unsigned int, flags)
1627912d35f8SJens Axboe {
16282903ff01SAl Viro 	struct fd f;
1629912d35f8SJens Axboe 	long error;
1630912d35f8SJens Axboe 
16316a14b90bSJens Axboe 	if (unlikely(nr_segs > UIO_MAXIOV))
16326a14b90bSJens Axboe 		return -EINVAL;
16336a14b90bSJens Axboe 	else if (unlikely(!nr_segs))
16346a14b90bSJens Axboe 		return 0;
16356a14b90bSJens Axboe 
1636912d35f8SJens Axboe 	error = -EBADF;
16372903ff01SAl Viro 	f = fdget(fd);
16382903ff01SAl Viro 	if (f.file) {
16392903ff01SAl Viro 		if (f.file->f_mode & FMODE_WRITE)
16402903ff01SAl Viro 			error = vmsplice_to_pipe(f.file, iov, nr_segs, flags);
16412903ff01SAl Viro 		else if (f.file->f_mode & FMODE_READ)
16422903ff01SAl Viro 			error = vmsplice_to_user(f.file, iov, nr_segs, flags);
1643912d35f8SJens Axboe 
16442903ff01SAl Viro 		fdput(f);
1645912d35f8SJens Axboe 	}
1646912d35f8SJens Axboe 
1647912d35f8SJens Axboe 	return error;
1648912d35f8SJens Axboe }
1649912d35f8SJens Axboe 
165076b021d0SAl Viro #ifdef CONFIG_COMPAT
165176b021d0SAl Viro COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
165276b021d0SAl Viro 		    unsigned int, nr_segs, unsigned int, flags)
165376b021d0SAl Viro {
165476b021d0SAl Viro 	unsigned i;
165576b021d0SAl Viro 	struct iovec __user *iov;
165676b021d0SAl Viro 	if (nr_segs > UIO_MAXIOV)
165776b021d0SAl Viro 		return -EINVAL;
165876b021d0SAl Viro 	iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec));
165976b021d0SAl Viro 	for (i = 0; i < nr_segs; i++) {
166076b021d0SAl Viro 		struct compat_iovec v;
166176b021d0SAl Viro 		if (get_user(v.iov_base, &iov32[i].iov_base) ||
166276b021d0SAl Viro 		    get_user(v.iov_len, &iov32[i].iov_len) ||
166376b021d0SAl Viro 		    put_user(compat_ptr(v.iov_base), &iov[i].iov_base) ||
166476b021d0SAl Viro 		    put_user(v.iov_len, &iov[i].iov_len))
166576b021d0SAl Viro 			return -EFAULT;
166676b021d0SAl Viro 	}
166776b021d0SAl Viro 	return sys_vmsplice(fd, iov, nr_segs, flags);
166876b021d0SAl Viro }
166976b021d0SAl Viro #endif
167076b021d0SAl Viro 
1671836f92adSHeiko Carstens SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1672836f92adSHeiko Carstens 		int, fd_out, loff_t __user *, off_out,
1673836f92adSHeiko Carstens 		size_t, len, unsigned int, flags)
16745274f052SJens Axboe {
16752903ff01SAl Viro 	struct fd in, out;
16765274f052SJens Axboe 	long error;
16775274f052SJens Axboe 
16785274f052SJens Axboe 	if (unlikely(!len))
16795274f052SJens Axboe 		return 0;
16805274f052SJens Axboe 
16815274f052SJens Axboe 	error = -EBADF;
16822903ff01SAl Viro 	in = fdget(fd_in);
16832903ff01SAl Viro 	if (in.file) {
16842903ff01SAl Viro 		if (in.file->f_mode & FMODE_READ) {
16852903ff01SAl Viro 			out = fdget(fd_out);
16862903ff01SAl Viro 			if (out.file) {
16872903ff01SAl Viro 				if (out.file->f_mode & FMODE_WRITE)
16882903ff01SAl Viro 					error = do_splice(in.file, off_in,
16892903ff01SAl Viro 							  out.file, off_out,
1690529565dcSIngo Molnar 							  len, flags);
16912903ff01SAl Viro 				fdput(out);
16925274f052SJens Axboe 			}
16935274f052SJens Axboe 		}
16942903ff01SAl Viro 		fdput(in);
16955274f052SJens Axboe 	}
16965274f052SJens Axboe 	return error;
16975274f052SJens Axboe }
169870524490SJens Axboe 
169970524490SJens Axboe /*
1700aadd06e5SJens Axboe  * Make sure there's data to read. Wait for input if we can, otherwise
1701aadd06e5SJens Axboe  * return an appropriate error.
1702aadd06e5SJens Axboe  */
17037c77f0b3SMiklos Szeredi static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1704aadd06e5SJens Axboe {
1705aadd06e5SJens Axboe 	int ret;
1706aadd06e5SJens Axboe 
1707aadd06e5SJens Axboe 	/*
1708aadd06e5SJens Axboe 	 * Check ->nrbufs without the inode lock first. This function
1709aadd06e5SJens Axboe 	 * is speculative anyways, so missing one is ok.
1710aadd06e5SJens Axboe 	 */
1711aadd06e5SJens Axboe 	if (pipe->nrbufs)
1712aadd06e5SJens Axboe 		return 0;
1713aadd06e5SJens Axboe 
1714aadd06e5SJens Axboe 	ret = 0;
171561e0d47cSMiklos Szeredi 	pipe_lock(pipe);
1716aadd06e5SJens Axboe 
1717aadd06e5SJens Axboe 	while (!pipe->nrbufs) {
1718aadd06e5SJens Axboe 		if (signal_pending(current)) {
1719aadd06e5SJens Axboe 			ret = -ERESTARTSYS;
1720aadd06e5SJens Axboe 			break;
1721aadd06e5SJens Axboe 		}
1722aadd06e5SJens Axboe 		if (!pipe->writers)
1723aadd06e5SJens Axboe 			break;
1724aadd06e5SJens Axboe 		if (!pipe->waiting_writers) {
1725aadd06e5SJens Axboe 			if (flags & SPLICE_F_NONBLOCK) {
1726aadd06e5SJens Axboe 				ret = -EAGAIN;
1727aadd06e5SJens Axboe 				break;
1728aadd06e5SJens Axboe 			}
1729aadd06e5SJens Axboe 		}
1730aadd06e5SJens Axboe 		pipe_wait(pipe);
1731aadd06e5SJens Axboe 	}
1732aadd06e5SJens Axboe 
173361e0d47cSMiklos Szeredi 	pipe_unlock(pipe);
1734aadd06e5SJens Axboe 	return ret;
1735aadd06e5SJens Axboe }
1736aadd06e5SJens Axboe 
1737aadd06e5SJens Axboe /*
1738aadd06e5SJens Axboe  * Make sure there's writeable room. Wait for room if we can, otherwise
1739aadd06e5SJens Axboe  * return an appropriate error.
1740aadd06e5SJens Axboe  */
17417c77f0b3SMiklos Szeredi static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1742aadd06e5SJens Axboe {
1743aadd06e5SJens Axboe 	int ret;
1744aadd06e5SJens Axboe 
1745aadd06e5SJens Axboe 	/*
1746aadd06e5SJens Axboe 	 * Check ->nrbufs without the inode lock first. This function
1747aadd06e5SJens Axboe 	 * is speculative anyways, so missing one is ok.
1748aadd06e5SJens Axboe 	 */
174935f3d14dSJens Axboe 	if (pipe->nrbufs < pipe->buffers)
1750aadd06e5SJens Axboe 		return 0;
1751aadd06e5SJens Axboe 
1752aadd06e5SJens Axboe 	ret = 0;
175361e0d47cSMiklos Szeredi 	pipe_lock(pipe);
1754aadd06e5SJens Axboe 
175535f3d14dSJens Axboe 	while (pipe->nrbufs >= pipe->buffers) {
1756aadd06e5SJens Axboe 		if (!pipe->readers) {
1757aadd06e5SJens Axboe 			send_sig(SIGPIPE, current, 0);
1758aadd06e5SJens Axboe 			ret = -EPIPE;
1759aadd06e5SJens Axboe 			break;
1760aadd06e5SJens Axboe 		}
1761aadd06e5SJens Axboe 		if (flags & SPLICE_F_NONBLOCK) {
1762aadd06e5SJens Axboe 			ret = -EAGAIN;
1763aadd06e5SJens Axboe 			break;
1764aadd06e5SJens Axboe 		}
1765aadd06e5SJens Axboe 		if (signal_pending(current)) {
1766aadd06e5SJens Axboe 			ret = -ERESTARTSYS;
1767aadd06e5SJens Axboe 			break;
1768aadd06e5SJens Axboe 		}
1769aadd06e5SJens Axboe 		pipe->waiting_writers++;
1770aadd06e5SJens Axboe 		pipe_wait(pipe);
1771aadd06e5SJens Axboe 		pipe->waiting_writers--;
1772aadd06e5SJens Axboe 	}
1773aadd06e5SJens Axboe 
177461e0d47cSMiklos Szeredi 	pipe_unlock(pipe);
1775aadd06e5SJens Axboe 	return ret;
1776aadd06e5SJens Axboe }
1777aadd06e5SJens Axboe 
1778aadd06e5SJens Axboe /*
17797c77f0b3SMiklos Szeredi  * Splice contents of ipipe to opipe.
17807c77f0b3SMiklos Szeredi  */
17817c77f0b3SMiklos Szeredi static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
17827c77f0b3SMiklos Szeredi 			       struct pipe_inode_info *opipe,
17837c77f0b3SMiklos Szeredi 			       size_t len, unsigned int flags)
17847c77f0b3SMiklos Szeredi {
17857c77f0b3SMiklos Szeredi 	struct pipe_buffer *ibuf, *obuf;
17867c77f0b3SMiklos Szeredi 	int ret = 0, nbuf;
17877c77f0b3SMiklos Szeredi 	bool input_wakeup = false;
17887c77f0b3SMiklos Szeredi 
17897c77f0b3SMiklos Szeredi 
17907c77f0b3SMiklos Szeredi retry:
17917c77f0b3SMiklos Szeredi 	ret = ipipe_prep(ipipe, flags);
17927c77f0b3SMiklos Szeredi 	if (ret)
17937c77f0b3SMiklos Szeredi 		return ret;
17947c77f0b3SMiklos Szeredi 
17957c77f0b3SMiklos Szeredi 	ret = opipe_prep(opipe, flags);
17967c77f0b3SMiklos Szeredi 	if (ret)
17977c77f0b3SMiklos Szeredi 		return ret;
17987c77f0b3SMiklos Szeredi 
17997c77f0b3SMiklos Szeredi 	/*
18007c77f0b3SMiklos Szeredi 	 * Potential ABBA deadlock, work around it by ordering lock
18017c77f0b3SMiklos Szeredi 	 * grabbing by pipe info address. Otherwise two different processes
18027c77f0b3SMiklos Szeredi 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
18037c77f0b3SMiklos Szeredi 	 */
18047c77f0b3SMiklos Szeredi 	pipe_double_lock(ipipe, opipe);
18057c77f0b3SMiklos Szeredi 
18067c77f0b3SMiklos Szeredi 	do {
18077c77f0b3SMiklos Szeredi 		if (!opipe->readers) {
18087c77f0b3SMiklos Szeredi 			send_sig(SIGPIPE, current, 0);
18097c77f0b3SMiklos Szeredi 			if (!ret)
18107c77f0b3SMiklos Szeredi 				ret = -EPIPE;
18117c77f0b3SMiklos Szeredi 			break;
18127c77f0b3SMiklos Szeredi 		}
18137c77f0b3SMiklos Szeredi 
18147c77f0b3SMiklos Szeredi 		if (!ipipe->nrbufs && !ipipe->writers)
18157c77f0b3SMiklos Szeredi 			break;
18167c77f0b3SMiklos Szeredi 
18177c77f0b3SMiklos Szeredi 		/*
18187c77f0b3SMiklos Szeredi 		 * Cannot make any progress, because either the input
18197c77f0b3SMiklos Szeredi 		 * pipe is empty or the output pipe is full.
18207c77f0b3SMiklos Szeredi 		 */
182135f3d14dSJens Axboe 		if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) {
18227c77f0b3SMiklos Szeredi 			/* Already processed some buffers, break */
18237c77f0b3SMiklos Szeredi 			if (ret)
18247c77f0b3SMiklos Szeredi 				break;
18257c77f0b3SMiklos Szeredi 
18267c77f0b3SMiklos Szeredi 			if (flags & SPLICE_F_NONBLOCK) {
18277c77f0b3SMiklos Szeredi 				ret = -EAGAIN;
18287c77f0b3SMiklos Szeredi 				break;
18297c77f0b3SMiklos Szeredi 			}
18307c77f0b3SMiklos Szeredi 
18317c77f0b3SMiklos Szeredi 			/*
18327c77f0b3SMiklos Szeredi 			 * We raced with another reader/writer and haven't
18337c77f0b3SMiklos Szeredi 			 * managed to process any buffers.  A zero return
18347c77f0b3SMiklos Szeredi 			 * value means EOF, so retry instead.
18357c77f0b3SMiklos Szeredi 			 */
18367c77f0b3SMiklos Szeredi 			pipe_unlock(ipipe);
18377c77f0b3SMiklos Szeredi 			pipe_unlock(opipe);
18387c77f0b3SMiklos Szeredi 			goto retry;
18397c77f0b3SMiklos Szeredi 		}
18407c77f0b3SMiklos Szeredi 
18417c77f0b3SMiklos Szeredi 		ibuf = ipipe->bufs + ipipe->curbuf;
184235f3d14dSJens Axboe 		nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
18437c77f0b3SMiklos Szeredi 		obuf = opipe->bufs + nbuf;
18447c77f0b3SMiklos Szeredi 
18457c77f0b3SMiklos Szeredi 		if (len >= ibuf->len) {
18467c77f0b3SMiklos Szeredi 			/*
18477c77f0b3SMiklos Szeredi 			 * Simply move the whole buffer from ipipe to opipe
18487c77f0b3SMiklos Szeredi 			 */
18497c77f0b3SMiklos Szeredi 			*obuf = *ibuf;
18507c77f0b3SMiklos Szeredi 			ibuf->ops = NULL;
18517c77f0b3SMiklos Szeredi 			opipe->nrbufs++;
185235f3d14dSJens Axboe 			ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1);
18537c77f0b3SMiklos Szeredi 			ipipe->nrbufs--;
18547c77f0b3SMiklos Szeredi 			input_wakeup = true;
18557c77f0b3SMiklos Szeredi 		} else {
18567c77f0b3SMiklos Szeredi 			/*
18577c77f0b3SMiklos Szeredi 			 * Get a reference to this pipe buffer,
18587c77f0b3SMiklos Szeredi 			 * so we can copy the contents over.
18597c77f0b3SMiklos Szeredi 			 */
18607c77f0b3SMiklos Szeredi 			ibuf->ops->get(ipipe, ibuf);
18617c77f0b3SMiklos Szeredi 			*obuf = *ibuf;
18627c77f0b3SMiklos Szeredi 
18637c77f0b3SMiklos Szeredi 			/*
18647c77f0b3SMiklos Szeredi 			 * Don't inherit the gift flag, we need to
18657c77f0b3SMiklos Szeredi 			 * prevent multiple steals of this page.
18667c77f0b3SMiklos Szeredi 			 */
18677c77f0b3SMiklos Szeredi 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
18687c77f0b3SMiklos Szeredi 
18697c77f0b3SMiklos Szeredi 			obuf->len = len;
18707c77f0b3SMiklos Szeredi 			opipe->nrbufs++;
18717c77f0b3SMiklos Szeredi 			ibuf->offset += obuf->len;
18727c77f0b3SMiklos Szeredi 			ibuf->len -= obuf->len;
18737c77f0b3SMiklos Szeredi 		}
18747c77f0b3SMiklos Szeredi 		ret += obuf->len;
18757c77f0b3SMiklos Szeredi 		len -= obuf->len;
18767c77f0b3SMiklos Szeredi 	} while (len);
18777c77f0b3SMiklos Szeredi 
18787c77f0b3SMiklos Szeredi 	pipe_unlock(ipipe);
18797c77f0b3SMiklos Szeredi 	pipe_unlock(opipe);
18807c77f0b3SMiklos Szeredi 
18817c77f0b3SMiklos Szeredi 	/*
18827c77f0b3SMiklos Szeredi 	 * If we put data in the output pipe, wakeup any potential readers.
18837c77f0b3SMiklos Szeredi 	 */
1884825cdcb1SNamhyung Kim 	if (ret > 0)
1885825cdcb1SNamhyung Kim 		wakeup_pipe_readers(opipe);
1886825cdcb1SNamhyung Kim 
18877c77f0b3SMiklos Szeredi 	if (input_wakeup)
18887c77f0b3SMiklos Szeredi 		wakeup_pipe_writers(ipipe);
18897c77f0b3SMiklos Szeredi 
18907c77f0b3SMiklos Szeredi 	return ret;
18917c77f0b3SMiklos Szeredi }
18927c77f0b3SMiklos Szeredi 
18937c77f0b3SMiklos Szeredi /*
189470524490SJens Axboe  * Link contents of ipipe to opipe.
189570524490SJens Axboe  */
189670524490SJens Axboe static int link_pipe(struct pipe_inode_info *ipipe,
189770524490SJens Axboe 		     struct pipe_inode_info *opipe,
189870524490SJens Axboe 		     size_t len, unsigned int flags)
189970524490SJens Axboe {
190070524490SJens Axboe 	struct pipe_buffer *ibuf, *obuf;
1901aadd06e5SJens Axboe 	int ret = 0, i = 0, nbuf;
190270524490SJens Axboe 
190370524490SJens Axboe 	/*
190470524490SJens Axboe 	 * Potential ABBA deadlock, work around it by ordering lock
190561e0d47cSMiklos Szeredi 	 * grabbing by pipe info address. Otherwise two different processes
190670524490SJens Axboe 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
190770524490SJens Axboe 	 */
190861e0d47cSMiklos Szeredi 	pipe_double_lock(ipipe, opipe);
190970524490SJens Axboe 
1910aadd06e5SJens Axboe 	do {
191170524490SJens Axboe 		if (!opipe->readers) {
191270524490SJens Axboe 			send_sig(SIGPIPE, current, 0);
191370524490SJens Axboe 			if (!ret)
191470524490SJens Axboe 				ret = -EPIPE;
191570524490SJens Axboe 			break;
191670524490SJens Axboe 		}
191770524490SJens Axboe 
191870524490SJens Axboe 		/*
1919aadd06e5SJens Axboe 		 * If we have iterated all input buffers or ran out of
1920aadd06e5SJens Axboe 		 * output room, break.
192170524490SJens Axboe 		 */
192235f3d14dSJens Axboe 		if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers)
1923aadd06e5SJens Axboe 			break;
1924aadd06e5SJens Axboe 
192535f3d14dSJens Axboe 		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1));
192635f3d14dSJens Axboe 		nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
192770524490SJens Axboe 
192870524490SJens Axboe 		/*
192970524490SJens Axboe 		 * Get a reference to this pipe buffer,
193070524490SJens Axboe 		 * so we can copy the contents over.
193170524490SJens Axboe 		 */
193270524490SJens Axboe 		ibuf->ops->get(ipipe, ibuf);
193370524490SJens Axboe 
193470524490SJens Axboe 		obuf = opipe->bufs + nbuf;
193570524490SJens Axboe 		*obuf = *ibuf;
193670524490SJens Axboe 
19377afa6fd0SJens Axboe 		/*
19387afa6fd0SJens Axboe 		 * Don't inherit the gift flag, we need to
19397afa6fd0SJens Axboe 		 * prevent multiple steals of this page.
19407afa6fd0SJens Axboe 		 */
19417afa6fd0SJens Axboe 		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
19427afa6fd0SJens Axboe 
194370524490SJens Axboe 		if (obuf->len > len)
194470524490SJens Axboe 			obuf->len = len;
194570524490SJens Axboe 
194670524490SJens Axboe 		opipe->nrbufs++;
194770524490SJens Axboe 		ret += obuf->len;
194870524490SJens Axboe 		len -= obuf->len;
1949aadd06e5SJens Axboe 		i++;
1950aadd06e5SJens Axboe 	} while (len);
195170524490SJens Axboe 
195202cf01aeSJens Axboe 	/*
195302cf01aeSJens Axboe 	 * return EAGAIN if we have the potential of some data in the
195402cf01aeSJens Axboe 	 * future, otherwise just return 0
195502cf01aeSJens Axboe 	 */
195602cf01aeSJens Axboe 	if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
195702cf01aeSJens Axboe 		ret = -EAGAIN;
195802cf01aeSJens Axboe 
195961e0d47cSMiklos Szeredi 	pipe_unlock(ipipe);
196061e0d47cSMiklos Szeredi 	pipe_unlock(opipe);
196170524490SJens Axboe 
1962aadd06e5SJens Axboe 	/*
1963aadd06e5SJens Axboe 	 * If we put data in the output pipe, wakeup any potential readers.
1964aadd06e5SJens Axboe 	 */
1965825cdcb1SNamhyung Kim 	if (ret > 0)
1966825cdcb1SNamhyung Kim 		wakeup_pipe_readers(opipe);
196770524490SJens Axboe 
196870524490SJens Axboe 	return ret;
196970524490SJens Axboe }
197070524490SJens Axboe 
197170524490SJens Axboe /*
197270524490SJens Axboe  * This is a tee(1) implementation that works on pipes. It doesn't copy
197370524490SJens Axboe  * any data, it simply references the 'in' pages on the 'out' pipe.
197470524490SJens Axboe  * The 'flags' used are the SPLICE_F_* variants, currently the only
197570524490SJens Axboe  * applicable one is SPLICE_F_NONBLOCK.
197670524490SJens Axboe  */
197770524490SJens Axboe static long do_tee(struct file *in, struct file *out, size_t len,
197870524490SJens Axboe 		   unsigned int flags)
197970524490SJens Axboe {
198071993e62SLinus Torvalds 	struct pipe_inode_info *ipipe = get_pipe_info(in);
198171993e62SLinus Torvalds 	struct pipe_inode_info *opipe = get_pipe_info(out);
1982aadd06e5SJens Axboe 	int ret = -EINVAL;
198370524490SJens Axboe 
198470524490SJens Axboe 	/*
1985aadd06e5SJens Axboe 	 * Duplicate the contents of ipipe to opipe without actually
1986aadd06e5SJens Axboe 	 * copying the data.
198770524490SJens Axboe 	 */
1988aadd06e5SJens Axboe 	if (ipipe && opipe && ipipe != opipe) {
1989aadd06e5SJens Axboe 		/*
1990aadd06e5SJens Axboe 		 * Keep going, unless we encounter an error. The ipipe/opipe
1991aadd06e5SJens Axboe 		 * ordering doesn't really matter.
1992aadd06e5SJens Axboe 		 */
19937c77f0b3SMiklos Szeredi 		ret = ipipe_prep(ipipe, flags);
1994aadd06e5SJens Axboe 		if (!ret) {
19957c77f0b3SMiklos Szeredi 			ret = opipe_prep(opipe, flags);
199602cf01aeSJens Axboe 			if (!ret)
1997aadd06e5SJens Axboe 				ret = link_pipe(ipipe, opipe, len, flags);
1998aadd06e5SJens Axboe 		}
1999aadd06e5SJens Axboe 	}
200070524490SJens Axboe 
2001aadd06e5SJens Axboe 	return ret;
200270524490SJens Axboe }
200370524490SJens Axboe 
2004836f92adSHeiko Carstens SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
200570524490SJens Axboe {
20062903ff01SAl Viro 	struct fd in;
20072903ff01SAl Viro 	int error;
200870524490SJens Axboe 
200970524490SJens Axboe 	if (unlikely(!len))
201070524490SJens Axboe 		return 0;
201170524490SJens Axboe 
201270524490SJens Axboe 	error = -EBADF;
20132903ff01SAl Viro 	in = fdget(fdin);
20142903ff01SAl Viro 	if (in.file) {
20152903ff01SAl Viro 		if (in.file->f_mode & FMODE_READ) {
20162903ff01SAl Viro 			struct fd out = fdget(fdout);
20172903ff01SAl Viro 			if (out.file) {
20182903ff01SAl Viro 				if (out.file->f_mode & FMODE_WRITE)
20192903ff01SAl Viro 					error = do_tee(in.file, out.file,
20202903ff01SAl Viro 							len, flags);
20212903ff01SAl Viro 				fdput(out);
202270524490SJens Axboe 			}
202370524490SJens Axboe 		}
20242903ff01SAl Viro  		fdput(in);
202570524490SJens Axboe  	}
202670524490SJens Axboe 
202770524490SJens Axboe 	return error;
202870524490SJens Axboe }
2029