1 /* 2 * linux/fs/pipe.c 3 * 4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 5 */ 6 7 #include <linux/mm.h> 8 #include <linux/file.h> 9 #include <linux/poll.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/pipe_fs_i.h> 16 #include <linux/uio.h> 17 #include <linux/highmem.h> 18 #include <linux/pagemap.h> 19 #include <linux/audit.h> 20 #include <linux/syscalls.h> 21 22 #include <asm/uaccess.h> 23 #include <asm/ioctls.h> 24 25 /* 26 * We use a start+len construction, which provides full use of the 27 * allocated memory. 28 * -- Florian Coosmann (FGC) 29 * 30 * Reads with count = 0 should always return 0. 31 * -- Julian Bradfield 1999-06-07. 32 * 33 * FIFOs and Pipes now generate SIGIO for both readers and writers. 34 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 35 * 36 * pipe_read & write cleanup 37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 38 */ 39 40 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 41 { 42 if (pipe->inode) 43 mutex_lock_nested(&pipe->inode->i_mutex, subclass); 44 } 45 46 void pipe_lock(struct pipe_inode_info *pipe) 47 { 48 /* 49 * pipe_lock() nests non-pipe inode locks (for writing to a file) 50 */ 51 pipe_lock_nested(pipe, I_MUTEX_PARENT); 52 } 53 EXPORT_SYMBOL(pipe_lock); 54 55 void pipe_unlock(struct pipe_inode_info *pipe) 56 { 57 if (pipe->inode) 58 mutex_unlock(&pipe->inode->i_mutex); 59 } 60 EXPORT_SYMBOL(pipe_unlock); 61 62 void pipe_double_lock(struct pipe_inode_info *pipe1, 63 struct pipe_inode_info *pipe2) 64 { 65 BUG_ON(pipe1 == pipe2); 66 67 if (pipe1 < pipe2) { 68 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 69 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 70 } else { 71 pipe_lock_nested(pipe2, I_MUTEX_PARENT); 72 pipe_lock_nested(pipe1, I_MUTEX_CHILD); 73 } 74 } 75 76 /* Drop the inode semaphore and wait for a pipe event, atomically */ 77 void pipe_wait(struct pipe_inode_info *pipe) 78 { 79 DEFINE_WAIT(wait); 80 81 /* 82 * Pipes are system-local resources, so sleeping on them 83 * is considered a noninteractive wait: 84 */ 85 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); 86 pipe_unlock(pipe); 87 schedule(); 88 finish_wait(&pipe->wait, &wait); 89 pipe_lock(pipe); 90 } 91 92 static int 93 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, 94 int atomic) 95 { 96 unsigned long copy; 97 98 while (len > 0) { 99 while (!iov->iov_len) 100 iov++; 101 copy = min_t(unsigned long, len, iov->iov_len); 102 103 if (atomic) { 104 if (__copy_from_user_inatomic(to, iov->iov_base, copy)) 105 return -EFAULT; 106 } else { 107 if (copy_from_user(to, iov->iov_base, copy)) 108 return -EFAULT; 109 } 110 to += copy; 111 len -= copy; 112 iov->iov_base += copy; 113 iov->iov_len -= copy; 114 } 115 return 0; 116 } 117 118 static int 119 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 120 int atomic) 121 { 122 unsigned long copy; 123 124 while (len > 0) { 125 while (!iov->iov_len) 126 iov++; 127 copy = min_t(unsigned long, len, iov->iov_len); 128 129 if (atomic) { 130 if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 131 return -EFAULT; 132 } else { 133 if (copy_to_user(iov->iov_base, from, copy)) 134 return -EFAULT; 135 } 136 from += copy; 137 len -= copy; 138 iov->iov_base += copy; 139 iov->iov_len -= copy; 140 } 141 return 0; 142 } 143 144 /* 145 * Attempt to pre-fault in the user memory, so we can use atomic copies. 146 * Returns the number of bytes not faulted in. 147 */ 148 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 149 { 150 while (!iov->iov_len) 151 iov++; 152 153 while (len > 0) { 154 unsigned long this_len; 155 156 this_len = min_t(unsigned long, len, iov->iov_len); 157 if (fault_in_pages_writeable(iov->iov_base, this_len)) 158 break; 159 160 len -= this_len; 161 iov++; 162 } 163 164 return len; 165 } 166 167 /* 168 * Pre-fault in the user memory, so we can use atomic copies. 169 */ 170 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) 171 { 172 while (!iov->iov_len) 173 iov++; 174 175 while (len > 0) { 176 unsigned long this_len; 177 178 this_len = min_t(unsigned long, len, iov->iov_len); 179 fault_in_pages_readable(iov->iov_base, this_len); 180 len -= this_len; 181 iov++; 182 } 183 } 184 185 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 186 struct pipe_buffer *buf) 187 { 188 struct page *page = buf->page; 189 190 /* 191 * If nobody else uses this page, and we don't already have a 192 * temporary page, let's keep track of it as a one-deep 193 * allocation cache. (Otherwise just release our reference to it) 194 */ 195 if (page_count(page) == 1 && !pipe->tmp_page) 196 pipe->tmp_page = page; 197 else 198 page_cache_release(page); 199 } 200 201 /** 202 * generic_pipe_buf_map - virtually map a pipe buffer 203 * @pipe: the pipe that the buffer belongs to 204 * @buf: the buffer that should be mapped 205 * @atomic: whether to use an atomic map 206 * 207 * Description: 208 * This function returns a kernel virtual address mapping for the 209 * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided 210 * and the caller has to be careful not to fault before calling 211 * the unmap function. 212 * 213 * Note that this function occupies KM_USER0 if @atomic != 0. 214 */ 215 void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 216 struct pipe_buffer *buf, int atomic) 217 { 218 if (atomic) { 219 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 220 return kmap_atomic(buf->page, KM_USER0); 221 } 222 223 return kmap(buf->page); 224 } 225 226 /** 227 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer 228 * @pipe: the pipe that the buffer belongs to 229 * @buf: the buffer that should be unmapped 230 * @map_data: the data that the mapping function returned 231 * 232 * Description: 233 * This function undoes the mapping that ->map() provided. 234 */ 235 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 236 struct pipe_buffer *buf, void *map_data) 237 { 238 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 239 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 240 kunmap_atomic(map_data, KM_USER0); 241 } else 242 kunmap(buf->page); 243 } 244 245 /** 246 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer 247 * @pipe: the pipe that the buffer belongs to 248 * @buf: the buffer to attempt to steal 249 * 250 * Description: 251 * This function attempts to steal the &struct page attached to 252 * @buf. If successful, this function returns 0 and returns with 253 * the page locked. The caller may then reuse the page for whatever 254 * he wishes; the typical use is insertion into a different file 255 * page cache. 256 */ 257 int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 258 struct pipe_buffer *buf) 259 { 260 struct page *page = buf->page; 261 262 /* 263 * A reference of one is golden, that means that the owner of this 264 * page is the only one holding a reference to it. lock the page 265 * and return OK. 266 */ 267 if (page_count(page) == 1) { 268 lock_page(page); 269 return 0; 270 } 271 272 return 1; 273 } 274 275 /** 276 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer 277 * @pipe: the pipe that the buffer belongs to 278 * @buf: the buffer to get a reference to 279 * 280 * Description: 281 * This function grabs an extra reference to @buf. It's used in 282 * in the tee() system call, when we duplicate the buffers in one 283 * pipe into another. 284 */ 285 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 286 { 287 page_cache_get(buf->page); 288 } 289 290 /** 291 * generic_pipe_buf_confirm - verify contents of the pipe buffer 292 * @info: the pipe that the buffer belongs to 293 * @buf: the buffer to confirm 294 * 295 * Description: 296 * This function does nothing, because the generic pipe code uses 297 * pages that are always good when inserted into the pipe. 298 */ 299 int generic_pipe_buf_confirm(struct pipe_inode_info *info, 300 struct pipe_buffer *buf) 301 { 302 return 0; 303 } 304 305 /** 306 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer 307 * @pipe: the pipe that the buffer belongs to 308 * @buf: the buffer to put a reference to 309 * 310 * Description: 311 * This function releases a reference to @buf. 312 */ 313 void generic_pipe_buf_release(struct pipe_inode_info *pipe, 314 struct pipe_buffer *buf) 315 { 316 page_cache_release(buf->page); 317 } 318 319 static const struct pipe_buf_operations anon_pipe_buf_ops = { 320 .can_merge = 1, 321 .map = generic_pipe_buf_map, 322 .unmap = generic_pipe_buf_unmap, 323 .confirm = generic_pipe_buf_confirm, 324 .release = anon_pipe_buf_release, 325 .steal = generic_pipe_buf_steal, 326 .get = generic_pipe_buf_get, 327 }; 328 329 static ssize_t 330 pipe_read(struct kiocb *iocb, const struct iovec *_iov, 331 unsigned long nr_segs, loff_t pos) 332 { 333 struct file *filp = iocb->ki_filp; 334 struct inode *inode = filp->f_path.dentry->d_inode; 335 struct pipe_inode_info *pipe; 336 int do_wakeup; 337 ssize_t ret; 338 struct iovec *iov = (struct iovec *)_iov; 339 size_t total_len; 340 341 total_len = iov_length(iov, nr_segs); 342 /* Null read succeeds. */ 343 if (unlikely(total_len == 0)) 344 return 0; 345 346 do_wakeup = 0; 347 ret = 0; 348 mutex_lock(&inode->i_mutex); 349 pipe = inode->i_pipe; 350 for (;;) { 351 int bufs = pipe->nrbufs; 352 if (bufs) { 353 int curbuf = pipe->curbuf; 354 struct pipe_buffer *buf = pipe->bufs + curbuf; 355 const struct pipe_buf_operations *ops = buf->ops; 356 void *addr; 357 size_t chars = buf->len; 358 int error, atomic; 359 360 if (chars > total_len) 361 chars = total_len; 362 363 error = ops->confirm(pipe, buf); 364 if (error) { 365 if (!ret) 366 error = ret; 367 break; 368 } 369 370 atomic = !iov_fault_in_pages_write(iov, chars); 371 redo: 372 addr = ops->map(pipe, buf, atomic); 373 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 374 ops->unmap(pipe, buf, addr); 375 if (unlikely(error)) { 376 /* 377 * Just retry with the slow path if we failed. 378 */ 379 if (atomic) { 380 atomic = 0; 381 goto redo; 382 } 383 if (!ret) 384 ret = error; 385 break; 386 } 387 ret += chars; 388 buf->offset += chars; 389 buf->len -= chars; 390 if (!buf->len) { 391 buf->ops = NULL; 392 ops->release(pipe, buf); 393 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 394 pipe->curbuf = curbuf; 395 pipe->nrbufs = --bufs; 396 do_wakeup = 1; 397 } 398 total_len -= chars; 399 if (!total_len) 400 break; /* common path: read succeeded */ 401 } 402 if (bufs) /* More to do? */ 403 continue; 404 if (!pipe->writers) 405 break; 406 if (!pipe->waiting_writers) { 407 /* syscall merging: Usually we must not sleep 408 * if O_NONBLOCK is set, or if we got some data. 409 * But if a writer sleeps in kernel space, then 410 * we can wait for that data without violating POSIX. 411 */ 412 if (ret) 413 break; 414 if (filp->f_flags & O_NONBLOCK) { 415 ret = -EAGAIN; 416 break; 417 } 418 } 419 if (signal_pending(current)) { 420 if (!ret) 421 ret = -ERESTARTSYS; 422 break; 423 } 424 if (do_wakeup) { 425 wake_up_interruptible_sync(&pipe->wait); 426 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 427 } 428 pipe_wait(pipe); 429 } 430 mutex_unlock(&inode->i_mutex); 431 432 /* Signal writers asynchronously that there is more room. */ 433 if (do_wakeup) { 434 wake_up_interruptible_sync(&pipe->wait); 435 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 436 } 437 if (ret > 0) 438 file_accessed(filp); 439 return ret; 440 } 441 442 static ssize_t 443 pipe_write(struct kiocb *iocb, const struct iovec *_iov, 444 unsigned long nr_segs, loff_t ppos) 445 { 446 struct file *filp = iocb->ki_filp; 447 struct inode *inode = filp->f_path.dentry->d_inode; 448 struct pipe_inode_info *pipe; 449 ssize_t ret; 450 int do_wakeup; 451 struct iovec *iov = (struct iovec *)_iov; 452 size_t total_len; 453 ssize_t chars; 454 455 total_len = iov_length(iov, nr_segs); 456 /* Null write succeeds. */ 457 if (unlikely(total_len == 0)) 458 return 0; 459 460 do_wakeup = 0; 461 ret = 0; 462 mutex_lock(&inode->i_mutex); 463 pipe = inode->i_pipe; 464 465 if (!pipe->readers) { 466 send_sig(SIGPIPE, current, 0); 467 ret = -EPIPE; 468 goto out; 469 } 470 471 /* We try to merge small writes */ 472 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 473 if (pipe->nrbufs && chars != 0) { 474 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 475 (PIPE_BUFFERS-1); 476 struct pipe_buffer *buf = pipe->bufs + lastbuf; 477 const struct pipe_buf_operations *ops = buf->ops; 478 int offset = buf->offset + buf->len; 479 480 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 481 int error, atomic = 1; 482 void *addr; 483 484 error = ops->confirm(pipe, buf); 485 if (error) 486 goto out; 487 488 iov_fault_in_pages_read(iov, chars); 489 redo1: 490 addr = ops->map(pipe, buf, atomic); 491 error = pipe_iov_copy_from_user(offset + addr, iov, 492 chars, atomic); 493 ops->unmap(pipe, buf, addr); 494 ret = error; 495 do_wakeup = 1; 496 if (error) { 497 if (atomic) { 498 atomic = 0; 499 goto redo1; 500 } 501 goto out; 502 } 503 buf->len += chars; 504 total_len -= chars; 505 ret = chars; 506 if (!total_len) 507 goto out; 508 } 509 } 510 511 for (;;) { 512 int bufs; 513 514 if (!pipe->readers) { 515 send_sig(SIGPIPE, current, 0); 516 if (!ret) 517 ret = -EPIPE; 518 break; 519 } 520 bufs = pipe->nrbufs; 521 if (bufs < PIPE_BUFFERS) { 522 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 523 struct pipe_buffer *buf = pipe->bufs + newbuf; 524 struct page *page = pipe->tmp_page; 525 char *src; 526 int error, atomic = 1; 527 528 if (!page) { 529 page = alloc_page(GFP_HIGHUSER); 530 if (unlikely(!page)) { 531 ret = ret ? : -ENOMEM; 532 break; 533 } 534 pipe->tmp_page = page; 535 } 536 /* Always wake up, even if the copy fails. Otherwise 537 * we lock up (O_NONBLOCK-)readers that sleep due to 538 * syscall merging. 539 * FIXME! Is this really true? 540 */ 541 do_wakeup = 1; 542 chars = PAGE_SIZE; 543 if (chars > total_len) 544 chars = total_len; 545 546 iov_fault_in_pages_read(iov, chars); 547 redo2: 548 if (atomic) 549 src = kmap_atomic(page, KM_USER0); 550 else 551 src = kmap(page); 552 553 error = pipe_iov_copy_from_user(src, iov, chars, 554 atomic); 555 if (atomic) 556 kunmap_atomic(src, KM_USER0); 557 else 558 kunmap(page); 559 560 if (unlikely(error)) { 561 if (atomic) { 562 atomic = 0; 563 goto redo2; 564 } 565 if (!ret) 566 ret = error; 567 break; 568 } 569 ret += chars; 570 571 /* Insert it into the buffer array */ 572 buf->page = page; 573 buf->ops = &anon_pipe_buf_ops; 574 buf->offset = 0; 575 buf->len = chars; 576 pipe->nrbufs = ++bufs; 577 pipe->tmp_page = NULL; 578 579 total_len -= chars; 580 if (!total_len) 581 break; 582 } 583 if (bufs < PIPE_BUFFERS) 584 continue; 585 if (filp->f_flags & O_NONBLOCK) { 586 if (!ret) 587 ret = -EAGAIN; 588 break; 589 } 590 if (signal_pending(current)) { 591 if (!ret) 592 ret = -ERESTARTSYS; 593 break; 594 } 595 if (do_wakeup) { 596 wake_up_interruptible_sync(&pipe->wait); 597 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 598 do_wakeup = 0; 599 } 600 pipe->waiting_writers++; 601 pipe_wait(pipe); 602 pipe->waiting_writers--; 603 } 604 out: 605 mutex_unlock(&inode->i_mutex); 606 if (do_wakeup) { 607 wake_up_interruptible_sync(&pipe->wait); 608 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 609 } 610 if (ret > 0) 611 file_update_time(filp); 612 return ret; 613 } 614 615 static ssize_t 616 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 617 { 618 return -EBADF; 619 } 620 621 static ssize_t 622 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, 623 loff_t *ppos) 624 { 625 return -EBADF; 626 } 627 628 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 629 { 630 struct inode *inode = filp->f_path.dentry->d_inode; 631 struct pipe_inode_info *pipe; 632 int count, buf, nrbufs; 633 634 switch (cmd) { 635 case FIONREAD: 636 mutex_lock(&inode->i_mutex); 637 pipe = inode->i_pipe; 638 count = 0; 639 buf = pipe->curbuf; 640 nrbufs = pipe->nrbufs; 641 while (--nrbufs >= 0) { 642 count += pipe->bufs[buf].len; 643 buf = (buf+1) & (PIPE_BUFFERS-1); 644 } 645 mutex_unlock(&inode->i_mutex); 646 647 return put_user(count, (int __user *)arg); 648 default: 649 return -EINVAL; 650 } 651 } 652 653 /* No kernel lock held - fine */ 654 static unsigned int 655 pipe_poll(struct file *filp, poll_table *wait) 656 { 657 unsigned int mask; 658 struct inode *inode = filp->f_path.dentry->d_inode; 659 struct pipe_inode_info *pipe = inode->i_pipe; 660 int nrbufs; 661 662 poll_wait(filp, &pipe->wait, wait); 663 664 /* Reading only -- no need for acquiring the semaphore. */ 665 nrbufs = pipe->nrbufs; 666 mask = 0; 667 if (filp->f_mode & FMODE_READ) { 668 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 669 if (!pipe->writers && filp->f_version != pipe->w_counter) 670 mask |= POLLHUP; 671 } 672 673 if (filp->f_mode & FMODE_WRITE) { 674 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 675 /* 676 * Most Unices do not set POLLERR for FIFOs but on Linux they 677 * behave exactly like pipes for poll(). 678 */ 679 if (!pipe->readers) 680 mask |= POLLERR; 681 } 682 683 return mask; 684 } 685 686 static int 687 pipe_release(struct inode *inode, int decr, int decw) 688 { 689 struct pipe_inode_info *pipe; 690 691 mutex_lock(&inode->i_mutex); 692 pipe = inode->i_pipe; 693 pipe->readers -= decr; 694 pipe->writers -= decw; 695 696 if (!pipe->readers && !pipe->writers) { 697 free_pipe_info(inode); 698 } else { 699 wake_up_interruptible_sync(&pipe->wait); 700 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 701 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 702 } 703 mutex_unlock(&inode->i_mutex); 704 705 return 0; 706 } 707 708 static int 709 pipe_read_fasync(int fd, struct file *filp, int on) 710 { 711 struct inode *inode = filp->f_path.dentry->d_inode; 712 int retval; 713 714 mutex_lock(&inode->i_mutex); 715 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 716 mutex_unlock(&inode->i_mutex); 717 718 return retval; 719 } 720 721 722 static int 723 pipe_write_fasync(int fd, struct file *filp, int on) 724 { 725 struct inode *inode = filp->f_path.dentry->d_inode; 726 int retval; 727 728 mutex_lock(&inode->i_mutex); 729 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 730 mutex_unlock(&inode->i_mutex); 731 732 return retval; 733 } 734 735 736 static int 737 pipe_rdwr_fasync(int fd, struct file *filp, int on) 738 { 739 struct inode *inode = filp->f_path.dentry->d_inode; 740 struct pipe_inode_info *pipe = inode->i_pipe; 741 int retval; 742 743 mutex_lock(&inode->i_mutex); 744 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 745 if (retval >= 0) { 746 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 747 if (retval < 0) /* this can happen only if on == T */ 748 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 749 } 750 mutex_unlock(&inode->i_mutex); 751 return retval; 752 } 753 754 755 static int 756 pipe_read_release(struct inode *inode, struct file *filp) 757 { 758 return pipe_release(inode, 1, 0); 759 } 760 761 static int 762 pipe_write_release(struct inode *inode, struct file *filp) 763 { 764 return pipe_release(inode, 0, 1); 765 } 766 767 static int 768 pipe_rdwr_release(struct inode *inode, struct file *filp) 769 { 770 int decr, decw; 771 772 decr = (filp->f_mode & FMODE_READ) != 0; 773 decw = (filp->f_mode & FMODE_WRITE) != 0; 774 return pipe_release(inode, decr, decw); 775 } 776 777 static int 778 pipe_read_open(struct inode *inode, struct file *filp) 779 { 780 /* We could have perhaps used atomic_t, but this and friends 781 below are the only places. So it doesn't seem worthwhile. */ 782 mutex_lock(&inode->i_mutex); 783 inode->i_pipe->readers++; 784 mutex_unlock(&inode->i_mutex); 785 786 return 0; 787 } 788 789 static int 790 pipe_write_open(struct inode *inode, struct file *filp) 791 { 792 mutex_lock(&inode->i_mutex); 793 inode->i_pipe->writers++; 794 mutex_unlock(&inode->i_mutex); 795 796 return 0; 797 } 798 799 static int 800 pipe_rdwr_open(struct inode *inode, struct file *filp) 801 { 802 mutex_lock(&inode->i_mutex); 803 if (filp->f_mode & FMODE_READ) 804 inode->i_pipe->readers++; 805 if (filp->f_mode & FMODE_WRITE) 806 inode->i_pipe->writers++; 807 mutex_unlock(&inode->i_mutex); 808 809 return 0; 810 } 811 812 /* 813 * The file_operations structs are not static because they 814 * are also used in linux/fs/fifo.c to do operations on FIFOs. 815 * 816 * Pipes reuse fifos' file_operations structs. 817 */ 818 const struct file_operations read_pipefifo_fops = { 819 .llseek = no_llseek, 820 .read = do_sync_read, 821 .aio_read = pipe_read, 822 .write = bad_pipe_w, 823 .poll = pipe_poll, 824 .unlocked_ioctl = pipe_ioctl, 825 .open = pipe_read_open, 826 .release = pipe_read_release, 827 .fasync = pipe_read_fasync, 828 }; 829 830 const struct file_operations write_pipefifo_fops = { 831 .llseek = no_llseek, 832 .read = bad_pipe_r, 833 .write = do_sync_write, 834 .aio_write = pipe_write, 835 .poll = pipe_poll, 836 .unlocked_ioctl = pipe_ioctl, 837 .open = pipe_write_open, 838 .release = pipe_write_release, 839 .fasync = pipe_write_fasync, 840 }; 841 842 const struct file_operations rdwr_pipefifo_fops = { 843 .llseek = no_llseek, 844 .read = do_sync_read, 845 .aio_read = pipe_read, 846 .write = do_sync_write, 847 .aio_write = pipe_write, 848 .poll = pipe_poll, 849 .unlocked_ioctl = pipe_ioctl, 850 .open = pipe_rdwr_open, 851 .release = pipe_rdwr_release, 852 .fasync = pipe_rdwr_fasync, 853 }; 854 855 struct pipe_inode_info * alloc_pipe_info(struct inode *inode) 856 { 857 struct pipe_inode_info *pipe; 858 859 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 860 if (pipe) { 861 init_waitqueue_head(&pipe->wait); 862 pipe->r_counter = pipe->w_counter = 1; 863 pipe->inode = inode; 864 } 865 866 return pipe; 867 } 868 869 void __free_pipe_info(struct pipe_inode_info *pipe) 870 { 871 int i; 872 873 for (i = 0; i < PIPE_BUFFERS; i++) { 874 struct pipe_buffer *buf = pipe->bufs + i; 875 if (buf->ops) 876 buf->ops->release(pipe, buf); 877 } 878 if (pipe->tmp_page) 879 __free_page(pipe->tmp_page); 880 kfree(pipe); 881 } 882 883 void free_pipe_info(struct inode *inode) 884 { 885 __free_pipe_info(inode->i_pipe); 886 inode->i_pipe = NULL; 887 } 888 889 static struct vfsmount *pipe_mnt __read_mostly; 890 static int pipefs_delete_dentry(struct dentry *dentry) 891 { 892 /* 893 * At creation time, we pretended this dentry was hashed 894 * (by clearing DCACHE_UNHASHED bit in d_flags) 895 * At delete time, we restore the truth : not hashed. 896 * (so that dput() can proceed correctly) 897 */ 898 dentry->d_flags |= DCACHE_UNHASHED; 899 return 0; 900 } 901 902 /* 903 * pipefs_dname() is called from d_path(). 904 */ 905 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 906 { 907 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 908 dentry->d_inode->i_ino); 909 } 910 911 static const struct dentry_operations pipefs_dentry_operations = { 912 .d_delete = pipefs_delete_dentry, 913 .d_dname = pipefs_dname, 914 }; 915 916 static struct inode * get_pipe_inode(void) 917 { 918 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 919 struct pipe_inode_info *pipe; 920 921 if (!inode) 922 goto fail_inode; 923 924 pipe = alloc_pipe_info(inode); 925 if (!pipe) 926 goto fail_iput; 927 inode->i_pipe = pipe; 928 929 pipe->readers = pipe->writers = 1; 930 inode->i_fop = &rdwr_pipefifo_fops; 931 932 /* 933 * Mark the inode dirty from the very beginning, 934 * that way it will never be moved to the dirty 935 * list because "mark_inode_dirty()" will think 936 * that it already _is_ on the dirty list. 937 */ 938 inode->i_state = I_DIRTY; 939 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 940 inode->i_uid = current_fsuid(); 941 inode->i_gid = current_fsgid(); 942 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 943 944 return inode; 945 946 fail_iput: 947 iput(inode); 948 949 fail_inode: 950 return NULL; 951 } 952 953 struct file *create_write_pipe(int flags) 954 { 955 int err; 956 struct inode *inode; 957 struct file *f; 958 struct dentry *dentry; 959 struct qstr name = { .name = "" }; 960 961 err = -ENFILE; 962 inode = get_pipe_inode(); 963 if (!inode) 964 goto err; 965 966 err = -ENOMEM; 967 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 968 if (!dentry) 969 goto err_inode; 970 971 dentry->d_op = &pipefs_dentry_operations; 972 /* 973 * We dont want to publish this dentry into global dentry hash table. 974 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED 975 * This permits a working /proc/$pid/fd/XXX on pipes 976 */ 977 dentry->d_flags &= ~DCACHE_UNHASHED; 978 d_instantiate(dentry, inode); 979 980 err = -ENFILE; 981 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); 982 if (!f) 983 goto err_dentry; 984 f->f_mapping = inode->i_mapping; 985 986 f->f_flags = O_WRONLY | (flags & O_NONBLOCK); 987 f->f_version = 0; 988 989 return f; 990 991 err_dentry: 992 free_pipe_info(inode); 993 dput(dentry); 994 return ERR_PTR(err); 995 996 err_inode: 997 free_pipe_info(inode); 998 iput(inode); 999 err: 1000 return ERR_PTR(err); 1001 } 1002 1003 void free_write_pipe(struct file *f) 1004 { 1005 free_pipe_info(f->f_dentry->d_inode); 1006 path_put(&f->f_path); 1007 put_filp(f); 1008 } 1009 1010 struct file *create_read_pipe(struct file *wrf, int flags) 1011 { 1012 struct file *f = get_empty_filp(); 1013 if (!f) 1014 return ERR_PTR(-ENFILE); 1015 1016 /* Grab pipe from the writer */ 1017 f->f_path = wrf->f_path; 1018 path_get(&wrf->f_path); 1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 1020 1021 f->f_pos = 0; 1022 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1023 f->f_op = &read_pipefifo_fops; 1024 f->f_mode = FMODE_READ; 1025 f->f_version = 0; 1026 1027 return f; 1028 } 1029 1030 int do_pipe_flags(int *fd, int flags) 1031 { 1032 struct file *fw, *fr; 1033 int error; 1034 int fdw, fdr; 1035 1036 if (flags & ~(O_CLOEXEC | O_NONBLOCK)) 1037 return -EINVAL; 1038 1039 fw = create_write_pipe(flags); 1040 if (IS_ERR(fw)) 1041 return PTR_ERR(fw); 1042 fr = create_read_pipe(fw, flags); 1043 error = PTR_ERR(fr); 1044 if (IS_ERR(fr)) 1045 goto err_write_pipe; 1046 1047 error = get_unused_fd_flags(flags); 1048 if (error < 0) 1049 goto err_read_pipe; 1050 fdr = error; 1051 1052 error = get_unused_fd_flags(flags); 1053 if (error < 0) 1054 goto err_fdr; 1055 fdw = error; 1056 1057 audit_fd_pair(fdr, fdw); 1058 fd_install(fdr, fr); 1059 fd_install(fdw, fw); 1060 fd[0] = fdr; 1061 fd[1] = fdw; 1062 1063 return 0; 1064 1065 err_fdr: 1066 put_unused_fd(fdr); 1067 err_read_pipe: 1068 path_put(&fr->f_path); 1069 put_filp(fr); 1070 err_write_pipe: 1071 free_write_pipe(fw); 1072 return error; 1073 } 1074 1075 /* 1076 * sys_pipe() is the normal C calling standard for creating 1077 * a pipe. It's not the way Unix traditionally does this, though. 1078 */ 1079 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1080 { 1081 int fd[2]; 1082 int error; 1083 1084 error = do_pipe_flags(fd, flags); 1085 if (!error) { 1086 if (copy_to_user(fildes, fd, sizeof(fd))) { 1087 sys_close(fd[0]); 1088 sys_close(fd[1]); 1089 error = -EFAULT; 1090 } 1091 } 1092 return error; 1093 } 1094 1095 SYSCALL_DEFINE1(pipe, int __user *, fildes) 1096 { 1097 return sys_pipe2(fildes, 0); 1098 } 1099 1100 /* 1101 * pipefs should _never_ be mounted by userland - too much of security hassle, 1102 * no real gain from having the whole whorehouse mounted. So we don't need 1103 * any operations on the root directory. However, we need a non-trivial 1104 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1105 */ 1106 static int pipefs_get_sb(struct file_system_type *fs_type, 1107 int flags, const char *dev_name, void *data, 1108 struct vfsmount *mnt) 1109 { 1110 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1111 } 1112 1113 static struct file_system_type pipe_fs_type = { 1114 .name = "pipefs", 1115 .get_sb = pipefs_get_sb, 1116 .kill_sb = kill_anon_super, 1117 }; 1118 1119 static int __init init_pipe_fs(void) 1120 { 1121 int err = register_filesystem(&pipe_fs_type); 1122 1123 if (!err) { 1124 pipe_mnt = kern_mount(&pipe_fs_type); 1125 if (IS_ERR(pipe_mnt)) { 1126 err = PTR_ERR(pipe_mnt); 1127 unregister_filesystem(&pipe_fs_type); 1128 } 1129 } 1130 return err; 1131 } 1132 1133 static void __exit exit_pipe_fs(void) 1134 { 1135 unregister_filesystem(&pipe_fs_type); 1136 mntput(pipe_mnt); 1137 } 1138 1139 fs_initcall(init_pipe_fs); 1140 module_exit(exit_pipe_fs); 1141