1 /* 2 * linux/fs/pipe.c 3 * 4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 5 */ 6 7 #include <linux/mm.h> 8 #include <linux/file.h> 9 #include <linux/poll.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/pipe_fs_i.h> 16 #include <linux/uio.h> 17 #include <linux/highmem.h> 18 #include <linux/pagemap.h> 19 #include <linux/audit.h> 20 21 #include <asm/uaccess.h> 22 #include <asm/ioctls.h> 23 24 /* 25 * We use a start+len construction, which provides full use of the 26 * allocated memory. 27 * -- Florian Coosmann (FGC) 28 * 29 * Reads with count = 0 should always return 0. 30 * -- Julian Bradfield 1999-06-07. 31 * 32 * FIFOs and Pipes now generate SIGIO for both readers and writers. 33 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 34 * 35 * pipe_read & write cleanup 36 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 37 */ 38 39 /* Drop the inode semaphore and wait for a pipe event, atomically */ 40 void pipe_wait(struct pipe_inode_info *pipe) 41 { 42 DEFINE_WAIT(wait); 43 44 /* 45 * Pipes are system-local resources, so sleeping on them 46 * is considered a noninteractive wait: 47 */ 48 prepare_to_wait(&pipe->wait, &wait, 49 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE); 50 if (pipe->inode) 51 mutex_unlock(&pipe->inode->i_mutex); 52 schedule(); 53 finish_wait(&pipe->wait, &wait); 54 if (pipe->inode) 55 mutex_lock(&pipe->inode->i_mutex); 56 } 57 58 static int 59 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, 60 int atomic) 61 { 62 unsigned long copy; 63 64 while (len > 0) { 65 while (!iov->iov_len) 66 iov++; 67 copy = min_t(unsigned long, len, iov->iov_len); 68 69 if (atomic) { 70 if (__copy_from_user_inatomic(to, iov->iov_base, copy)) 71 return -EFAULT; 72 } else { 73 if (copy_from_user(to, iov->iov_base, copy)) 74 return -EFAULT; 75 } 76 to += copy; 77 len -= copy; 78 iov->iov_base += copy; 79 iov->iov_len -= copy; 80 } 81 return 0; 82 } 83 84 static int 85 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 86 int atomic) 87 { 88 unsigned long copy; 89 90 while (len > 0) { 91 while (!iov->iov_len) 92 iov++; 93 copy = min_t(unsigned long, len, iov->iov_len); 94 95 if (atomic) { 96 if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 97 return -EFAULT; 98 } else { 99 if (copy_to_user(iov->iov_base, from, copy)) 100 return -EFAULT; 101 } 102 from += copy; 103 len -= copy; 104 iov->iov_base += copy; 105 iov->iov_len -= copy; 106 } 107 return 0; 108 } 109 110 /* 111 * Attempt to pre-fault in the user memory, so we can use atomic copies. 112 * Returns the number of bytes not faulted in. 113 */ 114 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 115 { 116 while (!iov->iov_len) 117 iov++; 118 119 while (len > 0) { 120 unsigned long this_len; 121 122 this_len = min_t(unsigned long, len, iov->iov_len); 123 if (fault_in_pages_writeable(iov->iov_base, this_len)) 124 break; 125 126 len -= this_len; 127 iov++; 128 } 129 130 return len; 131 } 132 133 /* 134 * Pre-fault in the user memory, so we can use atomic copies. 135 */ 136 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) 137 { 138 while (!iov->iov_len) 139 iov++; 140 141 while (len > 0) { 142 unsigned long this_len; 143 144 this_len = min_t(unsigned long, len, iov->iov_len); 145 fault_in_pages_readable(iov->iov_base, this_len); 146 len -= this_len; 147 iov++; 148 } 149 } 150 151 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 152 struct pipe_buffer *buf) 153 { 154 struct page *page = buf->page; 155 156 /* 157 * If nobody else uses this page, and we don't already have a 158 * temporary page, let's keep track of it as a one-deep 159 * allocation cache. (Otherwise just release our reference to it) 160 */ 161 if (page_count(page) == 1 && !pipe->tmp_page) 162 pipe->tmp_page = page; 163 else 164 page_cache_release(page); 165 } 166 167 void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 168 struct pipe_buffer *buf, int atomic) 169 { 170 if (atomic) { 171 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 172 return kmap_atomic(buf->page, KM_USER0); 173 } 174 175 return kmap(buf->page); 176 } 177 178 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 179 struct pipe_buffer *buf, void *map_data) 180 { 181 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 182 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 183 kunmap_atomic(map_data, KM_USER0); 184 } else 185 kunmap(buf->page); 186 } 187 188 int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 189 struct pipe_buffer *buf) 190 { 191 struct page *page = buf->page; 192 193 if (page_count(page) == 1) { 194 lock_page(page); 195 return 0; 196 } 197 198 return 1; 199 } 200 201 void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) 202 { 203 page_cache_get(buf->page); 204 } 205 206 int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) 207 { 208 return 0; 209 } 210 211 static const struct pipe_buf_operations anon_pipe_buf_ops = { 212 .can_merge = 1, 213 .map = generic_pipe_buf_map, 214 .unmap = generic_pipe_buf_unmap, 215 .pin = generic_pipe_buf_pin, 216 .release = anon_pipe_buf_release, 217 .steal = generic_pipe_buf_steal, 218 .get = generic_pipe_buf_get, 219 }; 220 221 static ssize_t 222 pipe_read(struct kiocb *iocb, const struct iovec *_iov, 223 unsigned long nr_segs, loff_t pos) 224 { 225 struct file *filp = iocb->ki_filp; 226 struct inode *inode = filp->f_path.dentry->d_inode; 227 struct pipe_inode_info *pipe; 228 int do_wakeup; 229 ssize_t ret; 230 struct iovec *iov = (struct iovec *)_iov; 231 size_t total_len; 232 233 total_len = iov_length(iov, nr_segs); 234 /* Null read succeeds. */ 235 if (unlikely(total_len == 0)) 236 return 0; 237 238 do_wakeup = 0; 239 ret = 0; 240 mutex_lock(&inode->i_mutex); 241 pipe = inode->i_pipe; 242 for (;;) { 243 int bufs = pipe->nrbufs; 244 if (bufs) { 245 int curbuf = pipe->curbuf; 246 struct pipe_buffer *buf = pipe->bufs + curbuf; 247 const struct pipe_buf_operations *ops = buf->ops; 248 void *addr; 249 size_t chars = buf->len; 250 int error, atomic; 251 252 if (chars > total_len) 253 chars = total_len; 254 255 error = ops->pin(pipe, buf); 256 if (error) { 257 if (!ret) 258 error = ret; 259 break; 260 } 261 262 atomic = !iov_fault_in_pages_write(iov, chars); 263 redo: 264 addr = ops->map(pipe, buf, atomic); 265 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 266 ops->unmap(pipe, buf, addr); 267 if (unlikely(error)) { 268 /* 269 * Just retry with the slow path if we failed. 270 */ 271 if (atomic) { 272 atomic = 0; 273 goto redo; 274 } 275 if (!ret) 276 ret = error; 277 break; 278 } 279 ret += chars; 280 buf->offset += chars; 281 buf->len -= chars; 282 if (!buf->len) { 283 buf->ops = NULL; 284 ops->release(pipe, buf); 285 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 286 pipe->curbuf = curbuf; 287 pipe->nrbufs = --bufs; 288 do_wakeup = 1; 289 } 290 total_len -= chars; 291 if (!total_len) 292 break; /* common path: read succeeded */ 293 } 294 if (bufs) /* More to do? */ 295 continue; 296 if (!pipe->writers) 297 break; 298 if (!pipe->waiting_writers) { 299 /* syscall merging: Usually we must not sleep 300 * if O_NONBLOCK is set, or if we got some data. 301 * But if a writer sleeps in kernel space, then 302 * we can wait for that data without violating POSIX. 303 */ 304 if (ret) 305 break; 306 if (filp->f_flags & O_NONBLOCK) { 307 ret = -EAGAIN; 308 break; 309 } 310 } 311 if (signal_pending(current)) { 312 if (!ret) 313 ret = -ERESTARTSYS; 314 break; 315 } 316 if (do_wakeup) { 317 wake_up_interruptible_sync(&pipe->wait); 318 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 319 } 320 pipe_wait(pipe); 321 } 322 mutex_unlock(&inode->i_mutex); 323 324 /* Signal writers asynchronously that there is more room. */ 325 if (do_wakeup) { 326 wake_up_interruptible(&pipe->wait); 327 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 328 } 329 if (ret > 0) 330 file_accessed(filp); 331 return ret; 332 } 333 334 static ssize_t 335 pipe_write(struct kiocb *iocb, const struct iovec *_iov, 336 unsigned long nr_segs, loff_t ppos) 337 { 338 struct file *filp = iocb->ki_filp; 339 struct inode *inode = filp->f_path.dentry->d_inode; 340 struct pipe_inode_info *pipe; 341 ssize_t ret; 342 int do_wakeup; 343 struct iovec *iov = (struct iovec *)_iov; 344 size_t total_len; 345 ssize_t chars; 346 347 total_len = iov_length(iov, nr_segs); 348 /* Null write succeeds. */ 349 if (unlikely(total_len == 0)) 350 return 0; 351 352 do_wakeup = 0; 353 ret = 0; 354 mutex_lock(&inode->i_mutex); 355 pipe = inode->i_pipe; 356 357 if (!pipe->readers) { 358 send_sig(SIGPIPE, current, 0); 359 ret = -EPIPE; 360 goto out; 361 } 362 363 /* We try to merge small writes */ 364 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 365 if (pipe->nrbufs && chars != 0) { 366 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 367 (PIPE_BUFFERS-1); 368 struct pipe_buffer *buf = pipe->bufs + lastbuf; 369 const struct pipe_buf_operations *ops = buf->ops; 370 int offset = buf->offset + buf->len; 371 372 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 373 int error, atomic = 1; 374 void *addr; 375 376 error = ops->pin(pipe, buf); 377 if (error) 378 goto out; 379 380 iov_fault_in_pages_read(iov, chars); 381 redo1: 382 addr = ops->map(pipe, buf, atomic); 383 error = pipe_iov_copy_from_user(offset + addr, iov, 384 chars, atomic); 385 ops->unmap(pipe, buf, addr); 386 ret = error; 387 do_wakeup = 1; 388 if (error) { 389 if (atomic) { 390 atomic = 0; 391 goto redo1; 392 } 393 goto out; 394 } 395 buf->len += chars; 396 total_len -= chars; 397 ret = chars; 398 if (!total_len) 399 goto out; 400 } 401 } 402 403 for (;;) { 404 int bufs; 405 406 if (!pipe->readers) { 407 send_sig(SIGPIPE, current, 0); 408 if (!ret) 409 ret = -EPIPE; 410 break; 411 } 412 bufs = pipe->nrbufs; 413 if (bufs < PIPE_BUFFERS) { 414 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 415 struct pipe_buffer *buf = pipe->bufs + newbuf; 416 struct page *page = pipe->tmp_page; 417 char *src; 418 int error, atomic = 1; 419 420 if (!page) { 421 page = alloc_page(GFP_HIGHUSER); 422 if (unlikely(!page)) { 423 ret = ret ? : -ENOMEM; 424 break; 425 } 426 pipe->tmp_page = page; 427 } 428 /* Always wake up, even if the copy fails. Otherwise 429 * we lock up (O_NONBLOCK-)readers that sleep due to 430 * syscall merging. 431 * FIXME! Is this really true? 432 */ 433 do_wakeup = 1; 434 chars = PAGE_SIZE; 435 if (chars > total_len) 436 chars = total_len; 437 438 iov_fault_in_pages_read(iov, chars); 439 redo2: 440 if (atomic) 441 src = kmap_atomic(page, KM_USER0); 442 else 443 src = kmap(page); 444 445 error = pipe_iov_copy_from_user(src, iov, chars, 446 atomic); 447 if (atomic) 448 kunmap_atomic(src, KM_USER0); 449 else 450 kunmap(page); 451 452 if (unlikely(error)) { 453 if (atomic) { 454 atomic = 0; 455 goto redo2; 456 } 457 if (!ret) 458 ret = error; 459 break; 460 } 461 ret += chars; 462 463 /* Insert it into the buffer array */ 464 buf->page = page; 465 buf->ops = &anon_pipe_buf_ops; 466 buf->offset = 0; 467 buf->len = chars; 468 pipe->nrbufs = ++bufs; 469 pipe->tmp_page = NULL; 470 471 total_len -= chars; 472 if (!total_len) 473 break; 474 } 475 if (bufs < PIPE_BUFFERS) 476 continue; 477 if (filp->f_flags & O_NONBLOCK) { 478 if (!ret) 479 ret = -EAGAIN; 480 break; 481 } 482 if (signal_pending(current)) { 483 if (!ret) 484 ret = -ERESTARTSYS; 485 break; 486 } 487 if (do_wakeup) { 488 wake_up_interruptible_sync(&pipe->wait); 489 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 490 do_wakeup = 0; 491 } 492 pipe->waiting_writers++; 493 pipe_wait(pipe); 494 pipe->waiting_writers--; 495 } 496 out: 497 mutex_unlock(&inode->i_mutex); 498 if (do_wakeup) { 499 wake_up_interruptible(&pipe->wait); 500 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 501 } 502 if (ret > 0) 503 file_update_time(filp); 504 return ret; 505 } 506 507 static ssize_t 508 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 509 { 510 return -EBADF; 511 } 512 513 static ssize_t 514 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, 515 loff_t *ppos) 516 { 517 return -EBADF; 518 } 519 520 static int 521 pipe_ioctl(struct inode *pino, struct file *filp, 522 unsigned int cmd, unsigned long arg) 523 { 524 struct inode *inode = filp->f_path.dentry->d_inode; 525 struct pipe_inode_info *pipe; 526 int count, buf, nrbufs; 527 528 switch (cmd) { 529 case FIONREAD: 530 mutex_lock(&inode->i_mutex); 531 pipe = inode->i_pipe; 532 count = 0; 533 buf = pipe->curbuf; 534 nrbufs = pipe->nrbufs; 535 while (--nrbufs >= 0) { 536 count += pipe->bufs[buf].len; 537 buf = (buf+1) & (PIPE_BUFFERS-1); 538 } 539 mutex_unlock(&inode->i_mutex); 540 541 return put_user(count, (int __user *)arg); 542 default: 543 return -EINVAL; 544 } 545 } 546 547 /* No kernel lock held - fine */ 548 static unsigned int 549 pipe_poll(struct file *filp, poll_table *wait) 550 { 551 unsigned int mask; 552 struct inode *inode = filp->f_path.dentry->d_inode; 553 struct pipe_inode_info *pipe = inode->i_pipe; 554 int nrbufs; 555 556 poll_wait(filp, &pipe->wait, wait); 557 558 /* Reading only -- no need for acquiring the semaphore. */ 559 nrbufs = pipe->nrbufs; 560 mask = 0; 561 if (filp->f_mode & FMODE_READ) { 562 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 563 if (!pipe->writers && filp->f_version != pipe->w_counter) 564 mask |= POLLHUP; 565 } 566 567 if (filp->f_mode & FMODE_WRITE) { 568 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 569 /* 570 * Most Unices do not set POLLERR for FIFOs but on Linux they 571 * behave exactly like pipes for poll(). 572 */ 573 if (!pipe->readers) 574 mask |= POLLERR; 575 } 576 577 return mask; 578 } 579 580 static int 581 pipe_release(struct inode *inode, int decr, int decw) 582 { 583 struct pipe_inode_info *pipe; 584 585 mutex_lock(&inode->i_mutex); 586 pipe = inode->i_pipe; 587 pipe->readers -= decr; 588 pipe->writers -= decw; 589 590 if (!pipe->readers && !pipe->writers) { 591 free_pipe_info(inode); 592 } else { 593 wake_up_interruptible(&pipe->wait); 594 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 595 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 596 } 597 mutex_unlock(&inode->i_mutex); 598 599 return 0; 600 } 601 602 static int 603 pipe_read_fasync(int fd, struct file *filp, int on) 604 { 605 struct inode *inode = filp->f_path.dentry->d_inode; 606 int retval; 607 608 mutex_lock(&inode->i_mutex); 609 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 610 mutex_unlock(&inode->i_mutex); 611 612 if (retval < 0) 613 return retval; 614 615 return 0; 616 } 617 618 619 static int 620 pipe_write_fasync(int fd, struct file *filp, int on) 621 { 622 struct inode *inode = filp->f_path.dentry->d_inode; 623 int retval; 624 625 mutex_lock(&inode->i_mutex); 626 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 627 mutex_unlock(&inode->i_mutex); 628 629 if (retval < 0) 630 return retval; 631 632 return 0; 633 } 634 635 636 static int 637 pipe_rdwr_fasync(int fd, struct file *filp, int on) 638 { 639 struct inode *inode = filp->f_path.dentry->d_inode; 640 struct pipe_inode_info *pipe = inode->i_pipe; 641 int retval; 642 643 mutex_lock(&inode->i_mutex); 644 645 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 646 647 if (retval >= 0) 648 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 649 650 mutex_unlock(&inode->i_mutex); 651 652 if (retval < 0) 653 return retval; 654 655 return 0; 656 } 657 658 659 static int 660 pipe_read_release(struct inode *inode, struct file *filp) 661 { 662 pipe_read_fasync(-1, filp, 0); 663 return pipe_release(inode, 1, 0); 664 } 665 666 static int 667 pipe_write_release(struct inode *inode, struct file *filp) 668 { 669 pipe_write_fasync(-1, filp, 0); 670 return pipe_release(inode, 0, 1); 671 } 672 673 static int 674 pipe_rdwr_release(struct inode *inode, struct file *filp) 675 { 676 int decr, decw; 677 678 pipe_rdwr_fasync(-1, filp, 0); 679 decr = (filp->f_mode & FMODE_READ) != 0; 680 decw = (filp->f_mode & FMODE_WRITE) != 0; 681 return pipe_release(inode, decr, decw); 682 } 683 684 static int 685 pipe_read_open(struct inode *inode, struct file *filp) 686 { 687 /* We could have perhaps used atomic_t, but this and friends 688 below are the only places. So it doesn't seem worthwhile. */ 689 mutex_lock(&inode->i_mutex); 690 inode->i_pipe->readers++; 691 mutex_unlock(&inode->i_mutex); 692 693 return 0; 694 } 695 696 static int 697 pipe_write_open(struct inode *inode, struct file *filp) 698 { 699 mutex_lock(&inode->i_mutex); 700 inode->i_pipe->writers++; 701 mutex_unlock(&inode->i_mutex); 702 703 return 0; 704 } 705 706 static int 707 pipe_rdwr_open(struct inode *inode, struct file *filp) 708 { 709 mutex_lock(&inode->i_mutex); 710 if (filp->f_mode & FMODE_READ) 711 inode->i_pipe->readers++; 712 if (filp->f_mode & FMODE_WRITE) 713 inode->i_pipe->writers++; 714 mutex_unlock(&inode->i_mutex); 715 716 return 0; 717 } 718 719 /* 720 * The file_operations structs are not static because they 721 * are also used in linux/fs/fifo.c to do operations on FIFOs. 722 */ 723 const struct file_operations read_fifo_fops = { 724 .llseek = no_llseek, 725 .read = do_sync_read, 726 .aio_read = pipe_read, 727 .write = bad_pipe_w, 728 .poll = pipe_poll, 729 .ioctl = pipe_ioctl, 730 .open = pipe_read_open, 731 .release = pipe_read_release, 732 .fasync = pipe_read_fasync, 733 }; 734 735 const struct file_operations write_fifo_fops = { 736 .llseek = no_llseek, 737 .read = bad_pipe_r, 738 .write = do_sync_write, 739 .aio_write = pipe_write, 740 .poll = pipe_poll, 741 .ioctl = pipe_ioctl, 742 .open = pipe_write_open, 743 .release = pipe_write_release, 744 .fasync = pipe_write_fasync, 745 }; 746 747 const struct file_operations rdwr_fifo_fops = { 748 .llseek = no_llseek, 749 .read = do_sync_read, 750 .aio_read = pipe_read, 751 .write = do_sync_write, 752 .aio_write = pipe_write, 753 .poll = pipe_poll, 754 .ioctl = pipe_ioctl, 755 .open = pipe_rdwr_open, 756 .release = pipe_rdwr_release, 757 .fasync = pipe_rdwr_fasync, 758 }; 759 760 static const struct file_operations read_pipe_fops = { 761 .llseek = no_llseek, 762 .read = do_sync_read, 763 .aio_read = pipe_read, 764 .write = bad_pipe_w, 765 .poll = pipe_poll, 766 .ioctl = pipe_ioctl, 767 .open = pipe_read_open, 768 .release = pipe_read_release, 769 .fasync = pipe_read_fasync, 770 }; 771 772 static const struct file_operations write_pipe_fops = { 773 .llseek = no_llseek, 774 .read = bad_pipe_r, 775 .write = do_sync_write, 776 .aio_write = pipe_write, 777 .poll = pipe_poll, 778 .ioctl = pipe_ioctl, 779 .open = pipe_write_open, 780 .release = pipe_write_release, 781 .fasync = pipe_write_fasync, 782 }; 783 784 static const struct file_operations rdwr_pipe_fops = { 785 .llseek = no_llseek, 786 .read = do_sync_read, 787 .aio_read = pipe_read, 788 .write = do_sync_write, 789 .aio_write = pipe_write, 790 .poll = pipe_poll, 791 .ioctl = pipe_ioctl, 792 .open = pipe_rdwr_open, 793 .release = pipe_rdwr_release, 794 .fasync = pipe_rdwr_fasync, 795 }; 796 797 struct pipe_inode_info * alloc_pipe_info(struct inode *inode) 798 { 799 struct pipe_inode_info *pipe; 800 801 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 802 if (pipe) { 803 init_waitqueue_head(&pipe->wait); 804 pipe->r_counter = pipe->w_counter = 1; 805 pipe->inode = inode; 806 } 807 808 return pipe; 809 } 810 811 void __free_pipe_info(struct pipe_inode_info *pipe) 812 { 813 int i; 814 815 for (i = 0; i < PIPE_BUFFERS; i++) { 816 struct pipe_buffer *buf = pipe->bufs + i; 817 if (buf->ops) 818 buf->ops->release(pipe, buf); 819 } 820 if (pipe->tmp_page) 821 __free_page(pipe->tmp_page); 822 kfree(pipe); 823 } 824 825 void free_pipe_info(struct inode *inode) 826 { 827 __free_pipe_info(inode->i_pipe); 828 inode->i_pipe = NULL; 829 } 830 831 static struct vfsmount *pipe_mnt __read_mostly; 832 static int pipefs_delete_dentry(struct dentry *dentry) 833 { 834 /* 835 * At creation time, we pretended this dentry was hashed 836 * (by clearing DCACHE_UNHASHED bit in d_flags) 837 * At delete time, we restore the truth : not hashed. 838 * (so that dput() can proceed correctly) 839 */ 840 dentry->d_flags |= DCACHE_UNHASHED; 841 return 0; 842 } 843 844 /* 845 * pipefs_dname() is called from d_path(). 846 */ 847 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 848 { 849 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 850 dentry->d_inode->i_ino); 851 } 852 853 static struct dentry_operations pipefs_dentry_operations = { 854 .d_delete = pipefs_delete_dentry, 855 .d_dname = pipefs_dname, 856 }; 857 858 static struct inode * get_pipe_inode(void) 859 { 860 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 861 struct pipe_inode_info *pipe; 862 863 if (!inode) 864 goto fail_inode; 865 866 pipe = alloc_pipe_info(inode); 867 if (!pipe) 868 goto fail_iput; 869 inode->i_pipe = pipe; 870 871 pipe->readers = pipe->writers = 1; 872 inode->i_fop = &rdwr_pipe_fops; 873 874 /* 875 * Mark the inode dirty from the very beginning, 876 * that way it will never be moved to the dirty 877 * list because "mark_inode_dirty()" will think 878 * that it already _is_ on the dirty list. 879 */ 880 inode->i_state = I_DIRTY; 881 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 882 inode->i_uid = current->fsuid; 883 inode->i_gid = current->fsgid; 884 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 885 886 return inode; 887 888 fail_iput: 889 iput(inode); 890 891 fail_inode: 892 return NULL; 893 } 894 895 struct file *create_write_pipe(void) 896 { 897 int err; 898 struct inode *inode; 899 struct file *f; 900 struct dentry *dentry; 901 struct qstr name = { .name = "" }; 902 903 f = get_empty_filp(); 904 if (!f) 905 return ERR_PTR(-ENFILE); 906 err = -ENFILE; 907 inode = get_pipe_inode(); 908 if (!inode) 909 goto err_file; 910 911 err = -ENOMEM; 912 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 913 if (!dentry) 914 goto err_inode; 915 916 dentry->d_op = &pipefs_dentry_operations; 917 /* 918 * We dont want to publish this dentry into global dentry hash table. 919 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED 920 * This permits a working /proc/$pid/fd/XXX on pipes 921 */ 922 dentry->d_flags &= ~DCACHE_UNHASHED; 923 d_instantiate(dentry, inode); 924 f->f_path.mnt = mntget(pipe_mnt); 925 f->f_path.dentry = dentry; 926 f->f_mapping = inode->i_mapping; 927 928 f->f_flags = O_WRONLY; 929 f->f_op = &write_pipe_fops; 930 f->f_mode = FMODE_WRITE; 931 f->f_version = 0; 932 933 return f; 934 935 err_inode: 936 free_pipe_info(inode); 937 iput(inode); 938 err_file: 939 put_filp(f); 940 return ERR_PTR(err); 941 } 942 943 void free_write_pipe(struct file *f) 944 { 945 free_pipe_info(f->f_dentry->d_inode); 946 dput(f->f_path.dentry); 947 mntput(f->f_path.mnt); 948 put_filp(f); 949 } 950 951 struct file *create_read_pipe(struct file *wrf) 952 { 953 struct file *f = get_empty_filp(); 954 if (!f) 955 return ERR_PTR(-ENFILE); 956 957 /* Grab pipe from the writer */ 958 f->f_path.mnt = mntget(wrf->f_path.mnt); 959 f->f_path.dentry = dget(wrf->f_path.dentry); 960 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 961 962 f->f_pos = 0; 963 f->f_flags = O_RDONLY; 964 f->f_op = &read_pipe_fops; 965 f->f_mode = FMODE_READ; 966 f->f_version = 0; 967 968 return f; 969 } 970 971 int do_pipe(int *fd) 972 { 973 struct file *fw, *fr; 974 int error; 975 int fdw, fdr; 976 977 fw = create_write_pipe(); 978 if (IS_ERR(fw)) 979 return PTR_ERR(fw); 980 fr = create_read_pipe(fw); 981 error = PTR_ERR(fr); 982 if (IS_ERR(fr)) 983 goto err_write_pipe; 984 985 error = get_unused_fd(); 986 if (error < 0) 987 goto err_read_pipe; 988 fdr = error; 989 990 error = get_unused_fd(); 991 if (error < 0) 992 goto err_fdr; 993 fdw = error; 994 995 error = audit_fd_pair(fdr, fdw); 996 if (error < 0) 997 goto err_fdw; 998 999 fd_install(fdr, fr); 1000 fd_install(fdw, fw); 1001 fd[0] = fdr; 1002 fd[1] = fdw; 1003 1004 return 0; 1005 1006 err_fdw: 1007 put_unused_fd(fdw); 1008 err_fdr: 1009 put_unused_fd(fdr); 1010 err_read_pipe: 1011 dput(fr->f_dentry); 1012 mntput(fr->f_vfsmnt); 1013 put_filp(fr); 1014 err_write_pipe: 1015 free_write_pipe(fw); 1016 return error; 1017 } 1018 1019 /* 1020 * pipefs should _never_ be mounted by userland - too much of security hassle, 1021 * no real gain from having the whole whorehouse mounted. So we don't need 1022 * any operations on the root directory. However, we need a non-trivial 1023 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1024 */ 1025 static int pipefs_get_sb(struct file_system_type *fs_type, 1026 int flags, const char *dev_name, void *data, 1027 struct vfsmount *mnt) 1028 { 1029 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1030 } 1031 1032 static struct file_system_type pipe_fs_type = { 1033 .name = "pipefs", 1034 .get_sb = pipefs_get_sb, 1035 .kill_sb = kill_anon_super, 1036 }; 1037 1038 static int __init init_pipe_fs(void) 1039 { 1040 int err = register_filesystem(&pipe_fs_type); 1041 1042 if (!err) { 1043 pipe_mnt = kern_mount(&pipe_fs_type); 1044 if (IS_ERR(pipe_mnt)) { 1045 err = PTR_ERR(pipe_mnt); 1046 unregister_filesystem(&pipe_fs_type); 1047 } 1048 } 1049 return err; 1050 } 1051 1052 static void __exit exit_pipe_fs(void) 1053 { 1054 unregister_filesystem(&pipe_fs_type); 1055 mntput(pipe_mnt); 1056 } 1057 1058 fs_initcall(init_pipe_fs); 1059 module_exit(exit_pipe_fs); 1060