1 /* 2 * linux/fs/pipe.c 3 * 4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 5 */ 6 7 #include <linux/mm.h> 8 #include <linux/file.h> 9 #include <linux/poll.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/pipe_fs_i.h> 16 #include <linux/uio.h> 17 #include <linux/highmem.h> 18 #include <linux/pagemap.h> 19 #include <linux/audit.h> 20 #include <linux/syscalls.h> 21 22 #include <asm/uaccess.h> 23 #include <asm/ioctls.h> 24 25 /* 26 * We use a start+len construction, which provides full use of the 27 * allocated memory. 28 * -- Florian Coosmann (FGC) 29 * 30 * Reads with count = 0 should always return 0. 31 * -- Julian Bradfield 1999-06-07. 32 * 33 * FIFOs and Pipes now generate SIGIO for both readers and writers. 34 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 35 * 36 * pipe_read & write cleanup 37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 38 */ 39 40 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 41 { 42 if (pipe->inode) 43 mutex_lock_nested(&pipe->inode->i_mutex, subclass); 44 } 45 46 void pipe_lock(struct pipe_inode_info *pipe) 47 { 48 /* 49 * pipe_lock() nests non-pipe inode locks (for writing to a file) 50 */ 51 pipe_lock_nested(pipe, I_MUTEX_PARENT); 52 } 53 EXPORT_SYMBOL(pipe_lock); 54 55 void pipe_unlock(struct pipe_inode_info *pipe) 56 { 57 if (pipe->inode) 58 mutex_unlock(&pipe->inode->i_mutex); 59 } 60 EXPORT_SYMBOL(pipe_unlock); 61 62 void pipe_double_lock(struct pipe_inode_info *pipe1, 63 struct pipe_inode_info *pipe2) 64 { 65 BUG_ON(pipe1 == pipe2); 66 67 if (pipe1 < pipe2) { 68 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 69 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 70 } else { 71 pipe_lock_nested(pipe2, I_MUTEX_PARENT); 72 pipe_lock_nested(pipe1, I_MUTEX_CHILD); 73 } 74 } 75 76 /* Drop the inode semaphore and wait for a pipe event, atomically */ 77 void pipe_wait(struct pipe_inode_info *pipe) 78 { 79 DEFINE_WAIT(wait); 80 81 /* 82 * Pipes are system-local resources, so sleeping on them 83 * is considered a noninteractive wait: 84 */ 85 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); 86 pipe_unlock(pipe); 87 schedule(); 88 finish_wait(&pipe->wait, &wait); 89 pipe_lock(pipe); 90 } 91 92 static int 93 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, 94 int atomic) 95 { 96 unsigned long copy; 97 98 while (len > 0) { 99 while (!iov->iov_len) 100 iov++; 101 copy = min_t(unsigned long, len, iov->iov_len); 102 103 if (atomic) { 104 if (__copy_from_user_inatomic(to, iov->iov_base, copy)) 105 return -EFAULT; 106 } else { 107 if (copy_from_user(to, iov->iov_base, copy)) 108 return -EFAULT; 109 } 110 to += copy; 111 len -= copy; 112 iov->iov_base += copy; 113 iov->iov_len -= copy; 114 } 115 return 0; 116 } 117 118 static int 119 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 120 int atomic) 121 { 122 unsigned long copy; 123 124 while (len > 0) { 125 while (!iov->iov_len) 126 iov++; 127 copy = min_t(unsigned long, len, iov->iov_len); 128 129 if (atomic) { 130 if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 131 return -EFAULT; 132 } else { 133 if (copy_to_user(iov->iov_base, from, copy)) 134 return -EFAULT; 135 } 136 from += copy; 137 len -= copy; 138 iov->iov_base += copy; 139 iov->iov_len -= copy; 140 } 141 return 0; 142 } 143 144 /* 145 * Attempt to pre-fault in the user memory, so we can use atomic copies. 146 * Returns the number of bytes not faulted in. 147 */ 148 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 149 { 150 while (!iov->iov_len) 151 iov++; 152 153 while (len > 0) { 154 unsigned long this_len; 155 156 this_len = min_t(unsigned long, len, iov->iov_len); 157 if (fault_in_pages_writeable(iov->iov_base, this_len)) 158 break; 159 160 len -= this_len; 161 iov++; 162 } 163 164 return len; 165 } 166 167 /* 168 * Pre-fault in the user memory, so we can use atomic copies. 169 */ 170 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) 171 { 172 while (!iov->iov_len) 173 iov++; 174 175 while (len > 0) { 176 unsigned long this_len; 177 178 this_len = min_t(unsigned long, len, iov->iov_len); 179 fault_in_pages_readable(iov->iov_base, this_len); 180 len -= this_len; 181 iov++; 182 } 183 } 184 185 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 186 struct pipe_buffer *buf) 187 { 188 struct page *page = buf->page; 189 190 /* 191 * If nobody else uses this page, and we don't already have a 192 * temporary page, let's keep track of it as a one-deep 193 * allocation cache. (Otherwise just release our reference to it) 194 */ 195 if (page_count(page) == 1 && !pipe->tmp_page) 196 pipe->tmp_page = page; 197 else 198 page_cache_release(page); 199 } 200 201 /** 202 * generic_pipe_buf_map - virtually map a pipe buffer 203 * @pipe: the pipe that the buffer belongs to 204 * @buf: the buffer that should be mapped 205 * @atomic: whether to use an atomic map 206 * 207 * Description: 208 * This function returns a kernel virtual address mapping for the 209 * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided 210 * and the caller has to be careful not to fault before calling 211 * the unmap function. 212 * 213 * Note that this function occupies KM_USER0 if @atomic != 0. 214 */ 215 void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 216 struct pipe_buffer *buf, int atomic) 217 { 218 if (atomic) { 219 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 220 return kmap_atomic(buf->page, KM_USER0); 221 } 222 223 return kmap(buf->page); 224 } 225 226 /** 227 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer 228 * @pipe: the pipe that the buffer belongs to 229 * @buf: the buffer that should be unmapped 230 * @map_data: the data that the mapping function returned 231 * 232 * Description: 233 * This function undoes the mapping that ->map() provided. 234 */ 235 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 236 struct pipe_buffer *buf, void *map_data) 237 { 238 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 239 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 240 kunmap_atomic(map_data, KM_USER0); 241 } else 242 kunmap(buf->page); 243 } 244 245 /** 246 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer 247 * @pipe: the pipe that the buffer belongs to 248 * @buf: the buffer to attempt to steal 249 * 250 * Description: 251 * This function attempts to steal the &struct page attached to 252 * @buf. If successful, this function returns 0 and returns with 253 * the page locked. The caller may then reuse the page for whatever 254 * he wishes; the typical use is insertion into a different file 255 * page cache. 256 */ 257 int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 258 struct pipe_buffer *buf) 259 { 260 struct page *page = buf->page; 261 262 /* 263 * A reference of one is golden, that means that the owner of this 264 * page is the only one holding a reference to it. lock the page 265 * and return OK. 266 */ 267 if (page_count(page) == 1) { 268 lock_page(page); 269 return 0; 270 } 271 272 return 1; 273 } 274 275 /** 276 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer 277 * @pipe: the pipe that the buffer belongs to 278 * @buf: the buffer to get a reference to 279 * 280 * Description: 281 * This function grabs an extra reference to @buf. It's used in 282 * in the tee() system call, when we duplicate the buffers in one 283 * pipe into another. 284 */ 285 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 286 { 287 page_cache_get(buf->page); 288 } 289 290 /** 291 * generic_pipe_buf_confirm - verify contents of the pipe buffer 292 * @info: the pipe that the buffer belongs to 293 * @buf: the buffer to confirm 294 * 295 * Description: 296 * This function does nothing, because the generic pipe code uses 297 * pages that are always good when inserted into the pipe. 298 */ 299 int generic_pipe_buf_confirm(struct pipe_inode_info *info, 300 struct pipe_buffer *buf) 301 { 302 return 0; 303 } 304 305 /** 306 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer 307 * @pipe: the pipe that the buffer belongs to 308 * @buf: the buffer to put a reference to 309 * 310 * Description: 311 * This function releases a reference to @buf. 312 */ 313 void generic_pipe_buf_release(struct pipe_inode_info *pipe, 314 struct pipe_buffer *buf) 315 { 316 page_cache_release(buf->page); 317 } 318 319 static const struct pipe_buf_operations anon_pipe_buf_ops = { 320 .can_merge = 1, 321 .map = generic_pipe_buf_map, 322 .unmap = generic_pipe_buf_unmap, 323 .confirm = generic_pipe_buf_confirm, 324 .release = anon_pipe_buf_release, 325 .steal = generic_pipe_buf_steal, 326 .get = generic_pipe_buf_get, 327 }; 328 329 static ssize_t 330 pipe_read(struct kiocb *iocb, const struct iovec *_iov, 331 unsigned long nr_segs, loff_t pos) 332 { 333 struct file *filp = iocb->ki_filp; 334 struct inode *inode = filp->f_path.dentry->d_inode; 335 struct pipe_inode_info *pipe; 336 int do_wakeup; 337 ssize_t ret; 338 struct iovec *iov = (struct iovec *)_iov; 339 size_t total_len; 340 341 total_len = iov_length(iov, nr_segs); 342 /* Null read succeeds. */ 343 if (unlikely(total_len == 0)) 344 return 0; 345 346 do_wakeup = 0; 347 ret = 0; 348 mutex_lock(&inode->i_mutex); 349 pipe = inode->i_pipe; 350 for (;;) { 351 int bufs = pipe->nrbufs; 352 if (bufs) { 353 int curbuf = pipe->curbuf; 354 struct pipe_buffer *buf = pipe->bufs + curbuf; 355 const struct pipe_buf_operations *ops = buf->ops; 356 void *addr; 357 size_t chars = buf->len; 358 int error, atomic; 359 360 if (chars > total_len) 361 chars = total_len; 362 363 error = ops->confirm(pipe, buf); 364 if (error) { 365 if (!ret) 366 error = ret; 367 break; 368 } 369 370 atomic = !iov_fault_in_pages_write(iov, chars); 371 redo: 372 addr = ops->map(pipe, buf, atomic); 373 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 374 ops->unmap(pipe, buf, addr); 375 if (unlikely(error)) { 376 /* 377 * Just retry with the slow path if we failed. 378 */ 379 if (atomic) { 380 atomic = 0; 381 goto redo; 382 } 383 if (!ret) 384 ret = error; 385 break; 386 } 387 ret += chars; 388 buf->offset += chars; 389 buf->len -= chars; 390 if (!buf->len) { 391 buf->ops = NULL; 392 ops->release(pipe, buf); 393 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 394 pipe->curbuf = curbuf; 395 pipe->nrbufs = --bufs; 396 do_wakeup = 1; 397 } 398 total_len -= chars; 399 if (!total_len) 400 break; /* common path: read succeeded */ 401 } 402 if (bufs) /* More to do? */ 403 continue; 404 if (!pipe->writers) 405 break; 406 if (!pipe->waiting_writers) { 407 /* syscall merging: Usually we must not sleep 408 * if O_NONBLOCK is set, or if we got some data. 409 * But if a writer sleeps in kernel space, then 410 * we can wait for that data without violating POSIX. 411 */ 412 if (ret) 413 break; 414 if (filp->f_flags & O_NONBLOCK) { 415 ret = -EAGAIN; 416 break; 417 } 418 } 419 if (signal_pending(current)) { 420 if (!ret) 421 ret = -ERESTARTSYS; 422 break; 423 } 424 if (do_wakeup) { 425 wake_up_interruptible_sync(&pipe->wait); 426 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 427 } 428 pipe_wait(pipe); 429 } 430 mutex_unlock(&inode->i_mutex); 431 432 /* Signal writers asynchronously that there is more room. */ 433 if (do_wakeup) { 434 wake_up_interruptible_sync(&pipe->wait); 435 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 436 } 437 if (ret > 0) 438 file_accessed(filp); 439 return ret; 440 } 441 442 static ssize_t 443 pipe_write(struct kiocb *iocb, const struct iovec *_iov, 444 unsigned long nr_segs, loff_t ppos) 445 { 446 struct file *filp = iocb->ki_filp; 447 struct inode *inode = filp->f_path.dentry->d_inode; 448 struct pipe_inode_info *pipe; 449 ssize_t ret; 450 int do_wakeup; 451 struct iovec *iov = (struct iovec *)_iov; 452 size_t total_len; 453 ssize_t chars; 454 455 total_len = iov_length(iov, nr_segs); 456 /* Null write succeeds. */ 457 if (unlikely(total_len == 0)) 458 return 0; 459 460 do_wakeup = 0; 461 ret = 0; 462 mutex_lock(&inode->i_mutex); 463 pipe = inode->i_pipe; 464 465 if (!pipe->readers) { 466 send_sig(SIGPIPE, current, 0); 467 ret = -EPIPE; 468 goto out; 469 } 470 471 /* We try to merge small writes */ 472 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 473 if (pipe->nrbufs && chars != 0) { 474 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 475 (PIPE_BUFFERS-1); 476 struct pipe_buffer *buf = pipe->bufs + lastbuf; 477 const struct pipe_buf_operations *ops = buf->ops; 478 int offset = buf->offset + buf->len; 479 480 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 481 int error, atomic = 1; 482 void *addr; 483 484 error = ops->confirm(pipe, buf); 485 if (error) 486 goto out; 487 488 iov_fault_in_pages_read(iov, chars); 489 redo1: 490 addr = ops->map(pipe, buf, atomic); 491 error = pipe_iov_copy_from_user(offset + addr, iov, 492 chars, atomic); 493 ops->unmap(pipe, buf, addr); 494 ret = error; 495 do_wakeup = 1; 496 if (error) { 497 if (atomic) { 498 atomic = 0; 499 goto redo1; 500 } 501 goto out; 502 } 503 buf->len += chars; 504 total_len -= chars; 505 ret = chars; 506 if (!total_len) 507 goto out; 508 } 509 } 510 511 for (;;) { 512 int bufs; 513 514 if (!pipe->readers) { 515 send_sig(SIGPIPE, current, 0); 516 if (!ret) 517 ret = -EPIPE; 518 break; 519 } 520 bufs = pipe->nrbufs; 521 if (bufs < PIPE_BUFFERS) { 522 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 523 struct pipe_buffer *buf = pipe->bufs + newbuf; 524 struct page *page = pipe->tmp_page; 525 char *src; 526 int error, atomic = 1; 527 528 if (!page) { 529 page = alloc_page(GFP_HIGHUSER); 530 if (unlikely(!page)) { 531 ret = ret ? : -ENOMEM; 532 break; 533 } 534 pipe->tmp_page = page; 535 } 536 /* Always wake up, even if the copy fails. Otherwise 537 * we lock up (O_NONBLOCK-)readers that sleep due to 538 * syscall merging. 539 * FIXME! Is this really true? 540 */ 541 do_wakeup = 1; 542 chars = PAGE_SIZE; 543 if (chars > total_len) 544 chars = total_len; 545 546 iov_fault_in_pages_read(iov, chars); 547 redo2: 548 if (atomic) 549 src = kmap_atomic(page, KM_USER0); 550 else 551 src = kmap(page); 552 553 error = pipe_iov_copy_from_user(src, iov, chars, 554 atomic); 555 if (atomic) 556 kunmap_atomic(src, KM_USER0); 557 else 558 kunmap(page); 559 560 if (unlikely(error)) { 561 if (atomic) { 562 atomic = 0; 563 goto redo2; 564 } 565 if (!ret) 566 ret = error; 567 break; 568 } 569 ret += chars; 570 571 /* Insert it into the buffer array */ 572 buf->page = page; 573 buf->ops = &anon_pipe_buf_ops; 574 buf->offset = 0; 575 buf->len = chars; 576 pipe->nrbufs = ++bufs; 577 pipe->tmp_page = NULL; 578 579 total_len -= chars; 580 if (!total_len) 581 break; 582 } 583 if (bufs < PIPE_BUFFERS) 584 continue; 585 if (filp->f_flags & O_NONBLOCK) { 586 if (!ret) 587 ret = -EAGAIN; 588 break; 589 } 590 if (signal_pending(current)) { 591 if (!ret) 592 ret = -ERESTARTSYS; 593 break; 594 } 595 if (do_wakeup) { 596 wake_up_interruptible_sync(&pipe->wait); 597 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 598 do_wakeup = 0; 599 } 600 pipe->waiting_writers++; 601 pipe_wait(pipe); 602 pipe->waiting_writers--; 603 } 604 out: 605 mutex_unlock(&inode->i_mutex); 606 if (do_wakeup) { 607 wake_up_interruptible_sync(&pipe->wait); 608 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 609 } 610 if (ret > 0) 611 file_update_time(filp); 612 return ret; 613 } 614 615 static ssize_t 616 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 617 { 618 return -EBADF; 619 } 620 621 static ssize_t 622 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, 623 loff_t *ppos) 624 { 625 return -EBADF; 626 } 627 628 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 629 { 630 struct inode *inode = filp->f_path.dentry->d_inode; 631 struct pipe_inode_info *pipe; 632 int count, buf, nrbufs; 633 634 switch (cmd) { 635 case FIONREAD: 636 mutex_lock(&inode->i_mutex); 637 pipe = inode->i_pipe; 638 count = 0; 639 buf = pipe->curbuf; 640 nrbufs = pipe->nrbufs; 641 while (--nrbufs >= 0) { 642 count += pipe->bufs[buf].len; 643 buf = (buf+1) & (PIPE_BUFFERS-1); 644 } 645 mutex_unlock(&inode->i_mutex); 646 647 return put_user(count, (int __user *)arg); 648 default: 649 return -EINVAL; 650 } 651 } 652 653 /* No kernel lock held - fine */ 654 static unsigned int 655 pipe_poll(struct file *filp, poll_table *wait) 656 { 657 unsigned int mask; 658 struct inode *inode = filp->f_path.dentry->d_inode; 659 struct pipe_inode_info *pipe = inode->i_pipe; 660 int nrbufs; 661 662 poll_wait(filp, &pipe->wait, wait); 663 664 /* Reading only -- no need for acquiring the semaphore. */ 665 nrbufs = pipe->nrbufs; 666 mask = 0; 667 if (filp->f_mode & FMODE_READ) { 668 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 669 if (!pipe->writers && filp->f_version != pipe->w_counter) 670 mask |= POLLHUP; 671 } 672 673 if (filp->f_mode & FMODE_WRITE) { 674 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 675 /* 676 * Most Unices do not set POLLERR for FIFOs but on Linux they 677 * behave exactly like pipes for poll(). 678 */ 679 if (!pipe->readers) 680 mask |= POLLERR; 681 } 682 683 return mask; 684 } 685 686 static int 687 pipe_release(struct inode *inode, int decr, int decw) 688 { 689 struct pipe_inode_info *pipe; 690 691 mutex_lock(&inode->i_mutex); 692 pipe = inode->i_pipe; 693 pipe->readers -= decr; 694 pipe->writers -= decw; 695 696 if (!pipe->readers && !pipe->writers) { 697 free_pipe_info(inode); 698 } else { 699 wake_up_interruptible_sync(&pipe->wait); 700 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 701 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 702 } 703 mutex_unlock(&inode->i_mutex); 704 705 return 0; 706 } 707 708 static int 709 pipe_read_fasync(int fd, struct file *filp, int on) 710 { 711 struct inode *inode = filp->f_path.dentry->d_inode; 712 int retval; 713 714 mutex_lock(&inode->i_mutex); 715 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 716 mutex_unlock(&inode->i_mutex); 717 718 return retval; 719 } 720 721 722 static int 723 pipe_write_fasync(int fd, struct file *filp, int on) 724 { 725 struct inode *inode = filp->f_path.dentry->d_inode; 726 int retval; 727 728 mutex_lock(&inode->i_mutex); 729 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 730 mutex_unlock(&inode->i_mutex); 731 732 return retval; 733 } 734 735 736 static int 737 pipe_rdwr_fasync(int fd, struct file *filp, int on) 738 { 739 struct inode *inode = filp->f_path.dentry->d_inode; 740 struct pipe_inode_info *pipe = inode->i_pipe; 741 int retval; 742 743 mutex_lock(&inode->i_mutex); 744 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 745 if (retval >= 0) { 746 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 747 if (retval < 0) /* this can happen only if on == T */ 748 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 749 } 750 mutex_unlock(&inode->i_mutex); 751 return retval; 752 } 753 754 755 static int 756 pipe_read_release(struct inode *inode, struct file *filp) 757 { 758 return pipe_release(inode, 1, 0); 759 } 760 761 static int 762 pipe_write_release(struct inode *inode, struct file *filp) 763 { 764 return pipe_release(inode, 0, 1); 765 } 766 767 static int 768 pipe_rdwr_release(struct inode *inode, struct file *filp) 769 { 770 int decr, decw; 771 772 decr = (filp->f_mode & FMODE_READ) != 0; 773 decw = (filp->f_mode & FMODE_WRITE) != 0; 774 return pipe_release(inode, decr, decw); 775 } 776 777 static int 778 pipe_read_open(struct inode *inode, struct file *filp) 779 { 780 int ret = -ENOENT; 781 782 mutex_lock(&inode->i_mutex); 783 784 if (inode->i_pipe) { 785 ret = 0; 786 inode->i_pipe->readers++; 787 } 788 789 mutex_unlock(&inode->i_mutex); 790 791 return ret; 792 } 793 794 static int 795 pipe_write_open(struct inode *inode, struct file *filp) 796 { 797 int ret = -ENOENT; 798 799 mutex_lock(&inode->i_mutex); 800 801 if (inode->i_pipe) { 802 ret = 0; 803 inode->i_pipe->writers++; 804 } 805 806 mutex_unlock(&inode->i_mutex); 807 808 return ret; 809 } 810 811 static int 812 pipe_rdwr_open(struct inode *inode, struct file *filp) 813 { 814 int ret = -ENOENT; 815 816 mutex_lock(&inode->i_mutex); 817 818 if (inode->i_pipe) { 819 ret = 0; 820 if (filp->f_mode & FMODE_READ) 821 inode->i_pipe->readers++; 822 if (filp->f_mode & FMODE_WRITE) 823 inode->i_pipe->writers++; 824 } 825 826 mutex_unlock(&inode->i_mutex); 827 828 return ret; 829 } 830 831 /* 832 * The file_operations structs are not static because they 833 * are also used in linux/fs/fifo.c to do operations on FIFOs. 834 * 835 * Pipes reuse fifos' file_operations structs. 836 */ 837 const struct file_operations read_pipefifo_fops = { 838 .llseek = no_llseek, 839 .read = do_sync_read, 840 .aio_read = pipe_read, 841 .write = bad_pipe_w, 842 .poll = pipe_poll, 843 .unlocked_ioctl = pipe_ioctl, 844 .open = pipe_read_open, 845 .release = pipe_read_release, 846 .fasync = pipe_read_fasync, 847 }; 848 849 const struct file_operations write_pipefifo_fops = { 850 .llseek = no_llseek, 851 .read = bad_pipe_r, 852 .write = do_sync_write, 853 .aio_write = pipe_write, 854 .poll = pipe_poll, 855 .unlocked_ioctl = pipe_ioctl, 856 .open = pipe_write_open, 857 .release = pipe_write_release, 858 .fasync = pipe_write_fasync, 859 }; 860 861 const struct file_operations rdwr_pipefifo_fops = { 862 .llseek = no_llseek, 863 .read = do_sync_read, 864 .aio_read = pipe_read, 865 .write = do_sync_write, 866 .aio_write = pipe_write, 867 .poll = pipe_poll, 868 .unlocked_ioctl = pipe_ioctl, 869 .open = pipe_rdwr_open, 870 .release = pipe_rdwr_release, 871 .fasync = pipe_rdwr_fasync, 872 }; 873 874 struct pipe_inode_info * alloc_pipe_info(struct inode *inode) 875 { 876 struct pipe_inode_info *pipe; 877 878 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 879 if (pipe) { 880 init_waitqueue_head(&pipe->wait); 881 pipe->r_counter = pipe->w_counter = 1; 882 pipe->inode = inode; 883 } 884 885 return pipe; 886 } 887 888 void __free_pipe_info(struct pipe_inode_info *pipe) 889 { 890 int i; 891 892 for (i = 0; i < PIPE_BUFFERS; i++) { 893 struct pipe_buffer *buf = pipe->bufs + i; 894 if (buf->ops) 895 buf->ops->release(pipe, buf); 896 } 897 if (pipe->tmp_page) 898 __free_page(pipe->tmp_page); 899 kfree(pipe); 900 } 901 902 void free_pipe_info(struct inode *inode) 903 { 904 __free_pipe_info(inode->i_pipe); 905 inode->i_pipe = NULL; 906 } 907 908 static struct vfsmount *pipe_mnt __read_mostly; 909 910 /* 911 * pipefs_dname() is called from d_path(). 912 */ 913 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 914 { 915 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 916 dentry->d_inode->i_ino); 917 } 918 919 static const struct dentry_operations pipefs_dentry_operations = { 920 .d_dname = pipefs_dname, 921 }; 922 923 static struct inode * get_pipe_inode(void) 924 { 925 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 926 struct pipe_inode_info *pipe; 927 928 if (!inode) 929 goto fail_inode; 930 931 pipe = alloc_pipe_info(inode); 932 if (!pipe) 933 goto fail_iput; 934 inode->i_pipe = pipe; 935 936 pipe->readers = pipe->writers = 1; 937 inode->i_fop = &rdwr_pipefifo_fops; 938 939 /* 940 * Mark the inode dirty from the very beginning, 941 * that way it will never be moved to the dirty 942 * list because "mark_inode_dirty()" will think 943 * that it already _is_ on the dirty list. 944 */ 945 inode->i_state = I_DIRTY; 946 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 947 inode->i_uid = current_fsuid(); 948 inode->i_gid = current_fsgid(); 949 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 950 951 return inode; 952 953 fail_iput: 954 iput(inode); 955 956 fail_inode: 957 return NULL; 958 } 959 960 struct file *create_write_pipe(int flags) 961 { 962 int err; 963 struct inode *inode; 964 struct file *f; 965 struct path path; 966 struct qstr name = { .name = "" }; 967 968 err = -ENFILE; 969 inode = get_pipe_inode(); 970 if (!inode) 971 goto err; 972 973 err = -ENOMEM; 974 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 975 if (!path.dentry) 976 goto err_inode; 977 path.mnt = mntget(pipe_mnt); 978 979 path.dentry->d_op = &pipefs_dentry_operations; 980 d_instantiate(path.dentry, inode); 981 982 err = -ENFILE; 983 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); 984 if (!f) 985 goto err_dentry; 986 f->f_mapping = inode->i_mapping; 987 988 f->f_flags = O_WRONLY | (flags & O_NONBLOCK); 989 f->f_version = 0; 990 991 return f; 992 993 err_dentry: 994 free_pipe_info(inode); 995 path_put(&path); 996 return ERR_PTR(err); 997 998 err_inode: 999 free_pipe_info(inode); 1000 iput(inode); 1001 err: 1002 return ERR_PTR(err); 1003 } 1004 1005 void free_write_pipe(struct file *f) 1006 { 1007 free_pipe_info(f->f_dentry->d_inode); 1008 path_put(&f->f_path); 1009 put_filp(f); 1010 } 1011 1012 struct file *create_read_pipe(struct file *wrf, int flags) 1013 { 1014 /* Grab pipe from the writer */ 1015 struct file *f = alloc_file(&wrf->f_path, FMODE_READ, 1016 &read_pipefifo_fops); 1017 if (!f) 1018 return ERR_PTR(-ENFILE); 1019 1020 path_get(&wrf->f_path); 1021 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1022 1023 return f; 1024 } 1025 1026 int do_pipe_flags(int *fd, int flags) 1027 { 1028 struct file *fw, *fr; 1029 int error; 1030 int fdw, fdr; 1031 1032 if (flags & ~(O_CLOEXEC | O_NONBLOCK)) 1033 return -EINVAL; 1034 1035 fw = create_write_pipe(flags); 1036 if (IS_ERR(fw)) 1037 return PTR_ERR(fw); 1038 fr = create_read_pipe(fw, flags); 1039 error = PTR_ERR(fr); 1040 if (IS_ERR(fr)) 1041 goto err_write_pipe; 1042 1043 error = get_unused_fd_flags(flags); 1044 if (error < 0) 1045 goto err_read_pipe; 1046 fdr = error; 1047 1048 error = get_unused_fd_flags(flags); 1049 if (error < 0) 1050 goto err_fdr; 1051 fdw = error; 1052 1053 audit_fd_pair(fdr, fdw); 1054 fd_install(fdr, fr); 1055 fd_install(fdw, fw); 1056 fd[0] = fdr; 1057 fd[1] = fdw; 1058 1059 return 0; 1060 1061 err_fdr: 1062 put_unused_fd(fdr); 1063 err_read_pipe: 1064 path_put(&fr->f_path); 1065 put_filp(fr); 1066 err_write_pipe: 1067 free_write_pipe(fw); 1068 return error; 1069 } 1070 1071 /* 1072 * sys_pipe() is the normal C calling standard for creating 1073 * a pipe. It's not the way Unix traditionally does this, though. 1074 */ 1075 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1076 { 1077 int fd[2]; 1078 int error; 1079 1080 error = do_pipe_flags(fd, flags); 1081 if (!error) { 1082 if (copy_to_user(fildes, fd, sizeof(fd))) { 1083 sys_close(fd[0]); 1084 sys_close(fd[1]); 1085 error = -EFAULT; 1086 } 1087 } 1088 return error; 1089 } 1090 1091 SYSCALL_DEFINE1(pipe, int __user *, fildes) 1092 { 1093 return sys_pipe2(fildes, 0); 1094 } 1095 1096 /* 1097 * pipefs should _never_ be mounted by userland - too much of security hassle, 1098 * no real gain from having the whole whorehouse mounted. So we don't need 1099 * any operations on the root directory. However, we need a non-trivial 1100 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1101 */ 1102 static int pipefs_get_sb(struct file_system_type *fs_type, 1103 int flags, const char *dev_name, void *data, 1104 struct vfsmount *mnt) 1105 { 1106 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1107 } 1108 1109 static struct file_system_type pipe_fs_type = { 1110 .name = "pipefs", 1111 .get_sb = pipefs_get_sb, 1112 .kill_sb = kill_anon_super, 1113 }; 1114 1115 static int __init init_pipe_fs(void) 1116 { 1117 int err = register_filesystem(&pipe_fs_type); 1118 1119 if (!err) { 1120 pipe_mnt = kern_mount(&pipe_fs_type); 1121 if (IS_ERR(pipe_mnt)) { 1122 err = PTR_ERR(pipe_mnt); 1123 unregister_filesystem(&pipe_fs_type); 1124 } 1125 } 1126 return err; 1127 } 1128 1129 static void __exit exit_pipe_fs(void) 1130 { 1131 unregister_filesystem(&pipe_fs_type); 1132 mntput(pipe_mnt); 1133 } 1134 1135 fs_initcall(init_pipe_fs); 1136 module_exit(exit_pipe_fs); 1137