1 /* 2 * linux/fs/pipe.c 3 * 4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 5 */ 6 7 #include <linux/mm.h> 8 #include <linux/file.h> 9 #include <linux/poll.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/pipe_fs_i.h> 16 #include <linux/uio.h> 17 #include <linux/highmem.h> 18 #include <linux/pagemap.h> 19 #include <linux/audit.h> 20 #include <linux/syscalls.h> 21 22 #include <asm/uaccess.h> 23 #include <asm/ioctls.h> 24 25 /* 26 * We use a start+len construction, which provides full use of the 27 * allocated memory. 28 * -- Florian Coosmann (FGC) 29 * 30 * Reads with count = 0 should always return 0. 31 * -- Julian Bradfield 1999-06-07. 32 * 33 * FIFOs and Pipes now generate SIGIO for both readers and writers. 34 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 35 * 36 * pipe_read & write cleanup 37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 38 */ 39 40 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 41 { 42 if (pipe->inode) 43 mutex_lock_nested(&pipe->inode->i_mutex, subclass); 44 } 45 46 void pipe_lock(struct pipe_inode_info *pipe) 47 { 48 /* 49 * pipe_lock() nests non-pipe inode locks (for writing to a file) 50 */ 51 pipe_lock_nested(pipe, I_MUTEX_PARENT); 52 } 53 EXPORT_SYMBOL(pipe_lock); 54 55 void pipe_unlock(struct pipe_inode_info *pipe) 56 { 57 if (pipe->inode) 58 mutex_unlock(&pipe->inode->i_mutex); 59 } 60 EXPORT_SYMBOL(pipe_unlock); 61 62 void pipe_double_lock(struct pipe_inode_info *pipe1, 63 struct pipe_inode_info *pipe2) 64 { 65 BUG_ON(pipe1 == pipe2); 66 67 if (pipe1 < pipe2) { 68 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 69 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 70 } else { 71 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 72 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 73 } 74 } 75 76 /* Drop the inode semaphore and wait for a pipe event, atomically */ 77 void pipe_wait(struct pipe_inode_info *pipe) 78 { 79 DEFINE_WAIT(wait); 80 81 /* 82 * Pipes are system-local resources, so sleeping on them 83 * is considered a noninteractive wait: 84 */ 85 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); 86 pipe_unlock(pipe); 87 schedule(); 88 finish_wait(&pipe->wait, &wait); 89 pipe_lock(pipe); 90 } 91 92 static int 93 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, 94 int atomic) 95 { 96 unsigned long copy; 97 98 while (len > 0) { 99 while (!iov->iov_len) 100 iov++; 101 copy = min_t(unsigned long, len, iov->iov_len); 102 103 if (atomic) { 104 if (__copy_from_user_inatomic(to, iov->iov_base, copy)) 105 return -EFAULT; 106 } else { 107 if (copy_from_user(to, iov->iov_base, copy)) 108 return -EFAULT; 109 } 110 to += copy; 111 len -= copy; 112 iov->iov_base += copy; 113 iov->iov_len -= copy; 114 } 115 return 0; 116 } 117 118 static int 119 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 120 int atomic) 121 { 122 unsigned long copy; 123 124 while (len > 0) { 125 while (!iov->iov_len) 126 iov++; 127 copy = min_t(unsigned long, len, iov->iov_len); 128 129 if (atomic) { 130 if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 131 return -EFAULT; 132 } else { 133 if (copy_to_user(iov->iov_base, from, copy)) 134 return -EFAULT; 135 } 136 from += copy; 137 len -= copy; 138 iov->iov_base += copy; 139 iov->iov_len -= copy; 140 } 141 return 0; 142 } 143 144 /* 145 * Attempt to pre-fault in the user memory, so we can use atomic copies. 146 * Returns the number of bytes not faulted in. 147 */ 148 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 149 { 150 while (!iov->iov_len) 151 iov++; 152 153 while (len > 0) { 154 unsigned long this_len; 155 156 this_len = min_t(unsigned long, len, iov->iov_len); 157 if (fault_in_pages_writeable(iov->iov_base, this_len)) 158 break; 159 160 len -= this_len; 161 iov++; 162 } 163 164 return len; 165 } 166 167 /* 168 * Pre-fault in the user memory, so we can use atomic copies. 169 */ 170 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) 171 { 172 while (!iov->iov_len) 173 iov++; 174 175 while (len > 0) { 176 unsigned long this_len; 177 178 this_len = min_t(unsigned long, len, iov->iov_len); 179 fault_in_pages_readable(iov->iov_base, this_len); 180 len -= this_len; 181 iov++; 182 } 183 } 184 185 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 186 struct pipe_buffer *buf) 187 { 188 struct page *page = buf->page; 189 190 /* 191 * If nobody else uses this page, and we don't already have a 192 * temporary page, let's keep track of it as a one-deep 193 * allocation cache. (Otherwise just release our reference to it) 194 */ 195 if (page_count(page) == 1 && !pipe->tmp_page) 196 pipe->tmp_page = page; 197 else 198 page_cache_release(page); 199 } 200 201 /** 202 * generic_pipe_buf_map - virtually map a pipe buffer 203 * @pipe: the pipe that the buffer belongs to 204 * @buf: the buffer that should be mapped 205 * @atomic: whether to use an atomic map 206 * 207 * Description: 208 * This function returns a kernel virtual address mapping for the 209 * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided 210 * and the caller has to be careful not to fault before calling 211 * the unmap function. 212 * 213 * Note that this function occupies KM_USER0 if @atomic != 0. 214 */ 215 void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 216 struct pipe_buffer *buf, int atomic) 217 { 218 if (atomic) { 219 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 220 return kmap_atomic(buf->page, KM_USER0); 221 } 222 223 return kmap(buf->page); 224 } 225 226 /** 227 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer 228 * @pipe: the pipe that the buffer belongs to 229 * @buf: the buffer that should be unmapped 230 * @map_data: the data that the mapping function returned 231 * 232 * Description: 233 * This function undoes the mapping that ->map() provided. 234 */ 235 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 236 struct pipe_buffer *buf, void *map_data) 237 { 238 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 239 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 240 kunmap_atomic(map_data, KM_USER0); 241 } else 242 kunmap(buf->page); 243 } 244 245 /** 246 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer 247 * @pipe: the pipe that the buffer belongs to 248 * @buf: the buffer to attempt to steal 249 * 250 * Description: 251 * This function attempts to steal the &struct page attached to 252 * @buf. If successful, this function returns 0 and returns with 253 * the page locked. The caller may then reuse the page for whatever 254 * he wishes; the typical use is insertion into a different file 255 * page cache. 256 */ 257 int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 258 struct pipe_buffer *buf) 259 { 260 struct page *page = buf->page; 261 262 /* 263 * A reference of one is golden, that means that the owner of this 264 * page is the only one holding a reference to it. lock the page 265 * and return OK. 266 */ 267 if (page_count(page) == 1) { 268 lock_page(page); 269 return 0; 270 } 271 272 return 1; 273 } 274 275 /** 276 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer 277 * @pipe: the pipe that the buffer belongs to 278 * @buf: the buffer to get a reference to 279 * 280 * Description: 281 * This function grabs an extra reference to @buf. It's used in 282 * in the tee() system call, when we duplicate the buffers in one 283 * pipe into another. 284 */ 285 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 286 { 287 page_cache_get(buf->page); 288 } 289 290 /** 291 * generic_pipe_buf_confirm - verify contents of the pipe buffer 292 * @info: the pipe that the buffer belongs to 293 * @buf: the buffer to confirm 294 * 295 * Description: 296 * This function does nothing, because the generic pipe code uses 297 * pages that are always good when inserted into the pipe. 298 */ 299 int generic_pipe_buf_confirm(struct pipe_inode_info *info, 300 struct pipe_buffer *buf) 301 { 302 return 0; 303 } 304 305 static const struct pipe_buf_operations anon_pipe_buf_ops = { 306 .can_merge = 1, 307 .map = generic_pipe_buf_map, 308 .unmap = generic_pipe_buf_unmap, 309 .confirm = generic_pipe_buf_confirm, 310 .release = anon_pipe_buf_release, 311 .steal = generic_pipe_buf_steal, 312 .get = generic_pipe_buf_get, 313 }; 314 315 static ssize_t 316 pipe_read(struct kiocb *iocb, const struct iovec *_iov, 317 unsigned long nr_segs, loff_t pos) 318 { 319 struct file *filp = iocb->ki_filp; 320 struct inode *inode = filp->f_path.dentry->d_inode; 321 struct pipe_inode_info *pipe; 322 int do_wakeup; 323 ssize_t ret; 324 struct iovec *iov = (struct iovec *)_iov; 325 size_t total_len; 326 327 total_len = iov_length(iov, nr_segs); 328 /* Null read succeeds. */ 329 if (unlikely(total_len == 0)) 330 return 0; 331 332 do_wakeup = 0; 333 ret = 0; 334 mutex_lock(&inode->i_mutex); 335 pipe = inode->i_pipe; 336 for (;;) { 337 int bufs = pipe->nrbufs; 338 if (bufs) { 339 int curbuf = pipe->curbuf; 340 struct pipe_buffer *buf = pipe->bufs + curbuf; 341 const struct pipe_buf_operations *ops = buf->ops; 342 void *addr; 343 size_t chars = buf->len; 344 int error, atomic; 345 346 if (chars > total_len) 347 chars = total_len; 348 349 error = ops->confirm(pipe, buf); 350 if (error) { 351 if (!ret) 352 error = ret; 353 break; 354 } 355 356 atomic = !iov_fault_in_pages_write(iov, chars); 357 redo: 358 addr = ops->map(pipe, buf, atomic); 359 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 360 ops->unmap(pipe, buf, addr); 361 if (unlikely(error)) { 362 /* 363 * Just retry with the slow path if we failed. 364 */ 365 if (atomic) { 366 atomic = 0; 367 goto redo; 368 } 369 if (!ret) 370 ret = error; 371 break; 372 } 373 ret += chars; 374 buf->offset += chars; 375 buf->len -= chars; 376 if (!buf->len) { 377 buf->ops = NULL; 378 ops->release(pipe, buf); 379 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 380 pipe->curbuf = curbuf; 381 pipe->nrbufs = --bufs; 382 do_wakeup = 1; 383 } 384 total_len -= chars; 385 if (!total_len) 386 break; /* common path: read succeeded */ 387 } 388 if (bufs) /* More to do? */ 389 continue; 390 if (!pipe->writers) 391 break; 392 if (!pipe->waiting_writers) { 393 /* syscall merging: Usually we must not sleep 394 * if O_NONBLOCK is set, or if we got some data. 395 * But if a writer sleeps in kernel space, then 396 * we can wait for that data without violating POSIX. 397 */ 398 if (ret) 399 break; 400 if (filp->f_flags & O_NONBLOCK) { 401 ret = -EAGAIN; 402 break; 403 } 404 } 405 if (signal_pending(current)) { 406 if (!ret) 407 ret = -ERESTARTSYS; 408 break; 409 } 410 if (do_wakeup) { 411 wake_up_interruptible_sync(&pipe->wait); 412 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 413 } 414 pipe_wait(pipe); 415 } 416 mutex_unlock(&inode->i_mutex); 417 418 /* Signal writers asynchronously that there is more room. */ 419 if (do_wakeup) { 420 wake_up_interruptible_sync(&pipe->wait); 421 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 422 } 423 if (ret > 0) 424 file_accessed(filp); 425 return ret; 426 } 427 428 static ssize_t 429 pipe_write(struct kiocb *iocb, const struct iovec *_iov, 430 unsigned long nr_segs, loff_t ppos) 431 { 432 struct file *filp = iocb->ki_filp; 433 struct inode *inode = filp->f_path.dentry->d_inode; 434 struct pipe_inode_info *pipe; 435 ssize_t ret; 436 int do_wakeup; 437 struct iovec *iov = (struct iovec *)_iov; 438 size_t total_len; 439 ssize_t chars; 440 441 total_len = iov_length(iov, nr_segs); 442 /* Null write succeeds. */ 443 if (unlikely(total_len == 0)) 444 return 0; 445 446 do_wakeup = 0; 447 ret = 0; 448 mutex_lock(&inode->i_mutex); 449 pipe = inode->i_pipe; 450 451 if (!pipe->readers) { 452 send_sig(SIGPIPE, current, 0); 453 ret = -EPIPE; 454 goto out; 455 } 456 457 /* We try to merge small writes */ 458 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 459 if (pipe->nrbufs && chars != 0) { 460 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 461 (PIPE_BUFFERS-1); 462 struct pipe_buffer *buf = pipe->bufs + lastbuf; 463 const struct pipe_buf_operations *ops = buf->ops; 464 int offset = buf->offset + buf->len; 465 466 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 467 int error, atomic = 1; 468 void *addr; 469 470 error = ops->confirm(pipe, buf); 471 if (error) 472 goto out; 473 474 iov_fault_in_pages_read(iov, chars); 475 redo1: 476 addr = ops->map(pipe, buf, atomic); 477 error = pipe_iov_copy_from_user(offset + addr, iov, 478 chars, atomic); 479 ops->unmap(pipe, buf, addr); 480 ret = error; 481 do_wakeup = 1; 482 if (error) { 483 if (atomic) { 484 atomic = 0; 485 goto redo1; 486 } 487 goto out; 488 } 489 buf->len += chars; 490 total_len -= chars; 491 ret = chars; 492 if (!total_len) 493 goto out; 494 } 495 } 496 497 for (;;) { 498 int bufs; 499 500 if (!pipe->readers) { 501 send_sig(SIGPIPE, current, 0); 502 if (!ret) 503 ret = -EPIPE; 504 break; 505 } 506 bufs = pipe->nrbufs; 507 if (bufs < PIPE_BUFFERS) { 508 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 509 struct pipe_buffer *buf = pipe->bufs + newbuf; 510 struct page *page = pipe->tmp_page; 511 char *src; 512 int error, atomic = 1; 513 514 if (!page) { 515 page = alloc_page(GFP_HIGHUSER); 516 if (unlikely(!page)) { 517 ret = ret ? : -ENOMEM; 518 break; 519 } 520 pipe->tmp_page = page; 521 } 522 /* Always wake up, even if the copy fails. Otherwise 523 * we lock up (O_NONBLOCK-)readers that sleep due to 524 * syscall merging. 525 * FIXME! Is this really true? 526 */ 527 do_wakeup = 1; 528 chars = PAGE_SIZE; 529 if (chars > total_len) 530 chars = total_len; 531 532 iov_fault_in_pages_read(iov, chars); 533 redo2: 534 if (atomic) 535 src = kmap_atomic(page, KM_USER0); 536 else 537 src = kmap(page); 538 539 error = pipe_iov_copy_from_user(src, iov, chars, 540 atomic); 541 if (atomic) 542 kunmap_atomic(src, KM_USER0); 543 else 544 kunmap(page); 545 546 if (unlikely(error)) { 547 if (atomic) { 548 atomic = 0; 549 goto redo2; 550 } 551 if (!ret) 552 ret = error; 553 break; 554 } 555 ret += chars; 556 557 /* Insert it into the buffer array */ 558 buf->page = page; 559 buf->ops = &anon_pipe_buf_ops; 560 buf->offset = 0; 561 buf->len = chars; 562 pipe->nrbufs = ++bufs; 563 pipe->tmp_page = NULL; 564 565 total_len -= chars; 566 if (!total_len) 567 break; 568 } 569 if (bufs < PIPE_BUFFERS) 570 continue; 571 if (filp->f_flags & O_NONBLOCK) { 572 if (!ret) 573 ret = -EAGAIN; 574 break; 575 } 576 if (signal_pending(current)) { 577 if (!ret) 578 ret = -ERESTARTSYS; 579 break; 580 } 581 if (do_wakeup) { 582 wake_up_interruptible_sync(&pipe->wait); 583 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 584 do_wakeup = 0; 585 } 586 pipe->waiting_writers++; 587 pipe_wait(pipe); 588 pipe->waiting_writers--; 589 } 590 out: 591 mutex_unlock(&inode->i_mutex); 592 if (do_wakeup) { 593 wake_up_interruptible_sync(&pipe->wait); 594 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 595 } 596 if (ret > 0) 597 file_update_time(filp); 598 return ret; 599 } 600 601 static ssize_t 602 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 603 { 604 return -EBADF; 605 } 606 607 static ssize_t 608 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, 609 loff_t *ppos) 610 { 611 return -EBADF; 612 } 613 614 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 615 { 616 struct inode *inode = filp->f_path.dentry->d_inode; 617 struct pipe_inode_info *pipe; 618 int count, buf, nrbufs; 619 620 switch (cmd) { 621 case FIONREAD: 622 mutex_lock(&inode->i_mutex); 623 pipe = inode->i_pipe; 624 count = 0; 625 buf = pipe->curbuf; 626 nrbufs = pipe->nrbufs; 627 while (--nrbufs >= 0) { 628 count += pipe->bufs[buf].len; 629 buf = (buf+1) & (PIPE_BUFFERS-1); 630 } 631 mutex_unlock(&inode->i_mutex); 632 633 return put_user(count, (int __user *)arg); 634 default: 635 return -EINVAL; 636 } 637 } 638 639 /* No kernel lock held - fine */ 640 static unsigned int 641 pipe_poll(struct file *filp, poll_table *wait) 642 { 643 unsigned int mask; 644 struct inode *inode = filp->f_path.dentry->d_inode; 645 struct pipe_inode_info *pipe = inode->i_pipe; 646 int nrbufs; 647 648 poll_wait(filp, &pipe->wait, wait); 649 650 /* Reading only -- no need for acquiring the semaphore. */ 651 nrbufs = pipe->nrbufs; 652 mask = 0; 653 if (filp->f_mode & FMODE_READ) { 654 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 655 if (!pipe->writers && filp->f_version != pipe->w_counter) 656 mask |= POLLHUP; 657 } 658 659 if (filp->f_mode & FMODE_WRITE) { 660 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 661 /* 662 * Most Unices do not set POLLERR for FIFOs but on Linux they 663 * behave exactly like pipes for poll(). 664 */ 665 if (!pipe->readers) 666 mask |= POLLERR; 667 } 668 669 return mask; 670 } 671 672 static int 673 pipe_release(struct inode *inode, int decr, int decw) 674 { 675 struct pipe_inode_info *pipe; 676 677 mutex_lock(&inode->i_mutex); 678 pipe = inode->i_pipe; 679 pipe->readers -= decr; 680 pipe->writers -= decw; 681 682 if (!pipe->readers && !pipe->writers) { 683 free_pipe_info(inode); 684 } else { 685 wake_up_interruptible_sync(&pipe->wait); 686 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 687 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 688 } 689 mutex_unlock(&inode->i_mutex); 690 691 return 0; 692 } 693 694 static int 695 pipe_read_fasync(int fd, struct file *filp, int on) 696 { 697 struct inode *inode = filp->f_path.dentry->d_inode; 698 int retval; 699 700 mutex_lock(&inode->i_mutex); 701 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 702 mutex_unlock(&inode->i_mutex); 703 704 return retval; 705 } 706 707 708 static int 709 pipe_write_fasync(int fd, struct file *filp, int on) 710 { 711 struct inode *inode = filp->f_path.dentry->d_inode; 712 int retval; 713 714 mutex_lock(&inode->i_mutex); 715 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 716 mutex_unlock(&inode->i_mutex); 717 718 return retval; 719 } 720 721 722 static int 723 pipe_rdwr_fasync(int fd, struct file *filp, int on) 724 { 725 struct inode *inode = filp->f_path.dentry->d_inode; 726 struct pipe_inode_info *pipe = inode->i_pipe; 727 int retval; 728 729 mutex_lock(&inode->i_mutex); 730 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 731 if (retval >= 0) { 732 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 733 if (retval < 0) /* this can happen only if on == T */ 734 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 735 } 736 mutex_unlock(&inode->i_mutex); 737 return retval; 738 } 739 740 741 static int 742 pipe_read_release(struct inode *inode, struct file *filp) 743 { 744 return pipe_release(inode, 1, 0); 745 } 746 747 static int 748 pipe_write_release(struct inode *inode, struct file *filp) 749 { 750 return pipe_release(inode, 0, 1); 751 } 752 753 static int 754 pipe_rdwr_release(struct inode *inode, struct file *filp) 755 { 756 int decr, decw; 757 758 decr = (filp->f_mode & FMODE_READ) != 0; 759 decw = (filp->f_mode & FMODE_WRITE) != 0; 760 return pipe_release(inode, decr, decw); 761 } 762 763 static int 764 pipe_read_open(struct inode *inode, struct file *filp) 765 { 766 /* We could have perhaps used atomic_t, but this and friends 767 below are the only places. So it doesn't seem worthwhile. */ 768 mutex_lock(&inode->i_mutex); 769 inode->i_pipe->readers++; 770 mutex_unlock(&inode->i_mutex); 771 772 return 0; 773 } 774 775 static int 776 pipe_write_open(struct inode *inode, struct file *filp) 777 { 778 mutex_lock(&inode->i_mutex); 779 inode->i_pipe->writers++; 780 mutex_unlock(&inode->i_mutex); 781 782 return 0; 783 } 784 785 static int 786 pipe_rdwr_open(struct inode *inode, struct file *filp) 787 { 788 mutex_lock(&inode->i_mutex); 789 if (filp->f_mode & FMODE_READ) 790 inode->i_pipe->readers++; 791 if (filp->f_mode & FMODE_WRITE) 792 inode->i_pipe->writers++; 793 mutex_unlock(&inode->i_mutex); 794 795 return 0; 796 } 797 798 /* 799 * The file_operations structs are not static because they 800 * are also used in linux/fs/fifo.c to do operations on FIFOs. 801 * 802 * Pipes reuse fifos' file_operations structs. 803 */ 804 const struct file_operations read_pipefifo_fops = { 805 .llseek = no_llseek, 806 .read = do_sync_read, 807 .aio_read = pipe_read, 808 .write = bad_pipe_w, 809 .poll = pipe_poll, 810 .unlocked_ioctl = pipe_ioctl, 811 .open = pipe_read_open, 812 .release = pipe_read_release, 813 .fasync = pipe_read_fasync, 814 }; 815 816 const struct file_operations write_pipefifo_fops = { 817 .llseek = no_llseek, 818 .read = bad_pipe_r, 819 .write = do_sync_write, 820 .aio_write = pipe_write, 821 .poll = pipe_poll, 822 .unlocked_ioctl = pipe_ioctl, 823 .open = pipe_write_open, 824 .release = pipe_write_release, 825 .fasync = pipe_write_fasync, 826 }; 827 828 const struct file_operations rdwr_pipefifo_fops = { 829 .llseek = no_llseek, 830 .read = do_sync_read, 831 .aio_read = pipe_read, 832 .write = do_sync_write, 833 .aio_write = pipe_write, 834 .poll = pipe_poll, 835 .unlocked_ioctl = pipe_ioctl, 836 .open = pipe_rdwr_open, 837 .release = pipe_rdwr_release, 838 .fasync = pipe_rdwr_fasync, 839 }; 840 841 struct pipe_inode_info * alloc_pipe_info(struct inode *inode) 842 { 843 struct pipe_inode_info *pipe; 844 845 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 846 if (pipe) { 847 init_waitqueue_head(&pipe->wait); 848 pipe->r_counter = pipe->w_counter = 1; 849 pipe->inode = inode; 850 } 851 852 return pipe; 853 } 854 855 void __free_pipe_info(struct pipe_inode_info *pipe) 856 { 857 int i; 858 859 for (i = 0; i < PIPE_BUFFERS; i++) { 860 struct pipe_buffer *buf = pipe->bufs + i; 861 if (buf->ops) 862 buf->ops->release(pipe, buf); 863 } 864 if (pipe->tmp_page) 865 __free_page(pipe->tmp_page); 866 kfree(pipe); 867 } 868 869 void free_pipe_info(struct inode *inode) 870 { 871 __free_pipe_info(inode->i_pipe); 872 inode->i_pipe = NULL; 873 } 874 875 static struct vfsmount *pipe_mnt __read_mostly; 876 static int pipefs_delete_dentry(struct dentry *dentry) 877 { 878 /* 879 * At creation time, we pretended this dentry was hashed 880 * (by clearing DCACHE_UNHASHED bit in d_flags) 881 * At delete time, we restore the truth : not hashed. 882 * (so that dput() can proceed correctly) 883 */ 884 dentry->d_flags |= DCACHE_UNHASHED; 885 return 0; 886 } 887 888 /* 889 * pipefs_dname() is called from d_path(). 890 */ 891 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 892 { 893 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 894 dentry->d_inode->i_ino); 895 } 896 897 static const struct dentry_operations pipefs_dentry_operations = { 898 .d_delete = pipefs_delete_dentry, 899 .d_dname = pipefs_dname, 900 }; 901 902 static struct inode * get_pipe_inode(void) 903 { 904 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 905 struct pipe_inode_info *pipe; 906 907 if (!inode) 908 goto fail_inode; 909 910 pipe = alloc_pipe_info(inode); 911 if (!pipe) 912 goto fail_iput; 913 inode->i_pipe = pipe; 914 915 pipe->readers = pipe->writers = 1; 916 inode->i_fop = &rdwr_pipefifo_fops; 917 918 /* 919 * Mark the inode dirty from the very beginning, 920 * that way it will never be moved to the dirty 921 * list because "mark_inode_dirty()" will think 922 * that it already _is_ on the dirty list. 923 */ 924 inode->i_state = I_DIRTY; 925 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 926 inode->i_uid = current_fsuid(); 927 inode->i_gid = current_fsgid(); 928 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 929 930 return inode; 931 932 fail_iput: 933 iput(inode); 934 935 fail_inode: 936 return NULL; 937 } 938 939 struct file *create_write_pipe(int flags) 940 { 941 int err; 942 struct inode *inode; 943 struct file *f; 944 struct dentry *dentry; 945 struct qstr name = { .name = "" }; 946 947 err = -ENFILE; 948 inode = get_pipe_inode(); 949 if (!inode) 950 goto err; 951 952 err = -ENOMEM; 953 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 954 if (!dentry) 955 goto err_inode; 956 957 dentry->d_op = &pipefs_dentry_operations; 958 /* 959 * We dont want to publish this dentry into global dentry hash table. 960 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED 961 * This permits a working /proc/$pid/fd/XXX on pipes 962 */ 963 dentry->d_flags &= ~DCACHE_UNHASHED; 964 d_instantiate(dentry, inode); 965 966 err = -ENFILE; 967 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); 968 if (!f) 969 goto err_dentry; 970 f->f_mapping = inode->i_mapping; 971 972 f->f_flags = O_WRONLY | (flags & O_NONBLOCK); 973 f->f_version = 0; 974 975 return f; 976 977 err_dentry: 978 free_pipe_info(inode); 979 dput(dentry); 980 return ERR_PTR(err); 981 982 err_inode: 983 free_pipe_info(inode); 984 iput(inode); 985 err: 986 return ERR_PTR(err); 987 } 988 989 void free_write_pipe(struct file *f) 990 { 991 free_pipe_info(f->f_dentry->d_inode); 992 path_put(&f->f_path); 993 put_filp(f); 994 } 995 996 struct file *create_read_pipe(struct file *wrf, int flags) 997 { 998 struct file *f = get_empty_filp(); 999 if (!f) 1000 return ERR_PTR(-ENFILE); 1001 1002 /* Grab pipe from the writer */ 1003 f->f_path = wrf->f_path; 1004 path_get(&wrf->f_path); 1005 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 1006 1007 f->f_pos = 0; 1008 f->f_flags = O_RDONLY | (flags & O_NONBLOCK); 1009 f->f_op = &read_pipefifo_fops; 1010 f->f_mode = FMODE_READ; 1011 f->f_version = 0; 1012 1013 return f; 1014 } 1015 1016 int do_pipe_flags(int *fd, int flags) 1017 { 1018 struct file *fw, *fr; 1019 int error; 1020 int fdw, fdr; 1021 1022 if (flags & ~(O_CLOEXEC | O_NONBLOCK)) 1023 return -EINVAL; 1024 1025 fw = create_write_pipe(flags); 1026 if (IS_ERR(fw)) 1027 return PTR_ERR(fw); 1028 fr = create_read_pipe(fw, flags); 1029 error = PTR_ERR(fr); 1030 if (IS_ERR(fr)) 1031 goto err_write_pipe; 1032 1033 error = get_unused_fd_flags(flags); 1034 if (error < 0) 1035 goto err_read_pipe; 1036 fdr = error; 1037 1038 error = get_unused_fd_flags(flags); 1039 if (error < 0) 1040 goto err_fdr; 1041 fdw = error; 1042 1043 audit_fd_pair(fdr, fdw); 1044 fd_install(fdr, fr); 1045 fd_install(fdw, fw); 1046 fd[0] = fdr; 1047 fd[1] = fdw; 1048 1049 return 0; 1050 1051 err_fdr: 1052 put_unused_fd(fdr); 1053 err_read_pipe: 1054 path_put(&fr->f_path); 1055 put_filp(fr); 1056 err_write_pipe: 1057 free_write_pipe(fw); 1058 return error; 1059 } 1060 1061 /* 1062 * sys_pipe() is the normal C calling standard for creating 1063 * a pipe. It's not the way Unix traditionally does this, though. 1064 */ 1065 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1066 { 1067 int fd[2]; 1068 int error; 1069 1070 error = do_pipe_flags(fd, flags); 1071 if (!error) { 1072 if (copy_to_user(fildes, fd, sizeof(fd))) { 1073 sys_close(fd[0]); 1074 sys_close(fd[1]); 1075 error = -EFAULT; 1076 } 1077 } 1078 return error; 1079 } 1080 1081 SYSCALL_DEFINE1(pipe, int __user *, fildes) 1082 { 1083 return sys_pipe2(fildes, 0); 1084 } 1085 1086 /* 1087 * pipefs should _never_ be mounted by userland - too much of security hassle, 1088 * no real gain from having the whole whorehouse mounted. So we don't need 1089 * any operations on the root directory. However, we need a non-trivial 1090 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1091 */ 1092 static int pipefs_get_sb(struct file_system_type *fs_type, 1093 int flags, const char *dev_name, void *data, 1094 struct vfsmount *mnt) 1095 { 1096 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 1097 } 1098 1099 static struct file_system_type pipe_fs_type = { 1100 .name = "pipefs", 1101 .get_sb = pipefs_get_sb, 1102 .kill_sb = kill_anon_super, 1103 }; 1104 1105 static int __init init_pipe_fs(void) 1106 { 1107 int err = register_filesystem(&pipe_fs_type); 1108 1109 if (!err) { 1110 pipe_mnt = kern_mount(&pipe_fs_type); 1111 if (IS_ERR(pipe_mnt)) { 1112 err = PTR_ERR(pipe_mnt); 1113 unregister_filesystem(&pipe_fs_type); 1114 } 1115 } 1116 return err; 1117 } 1118 1119 static void __exit exit_pipe_fs(void) 1120 { 1121 unregister_filesystem(&pipe_fs_type); 1122 mntput(pipe_mnt); 1123 } 1124 1125 fs_initcall(init_pipe_fs); 1126 module_exit(exit_pipe_fs); 1127