Lines Matching +full:sub +full:- +full:sampled

1 // SPDX-License-Identifier: GPL-2.0-only
3 * fs/direct-io.c
16 * added support for non-aligned IO.
59 * is determined on a per-invocation basis. When talking to the filesystem
61 * down by dio->blkfactor. Similarly, fs-blocksize quantities are converted
76 finer. blkfactor=2 means 1/4-block
78 unsigned start_zero_done; /* flag: sub-blocksize zeroing has
161 return sdio->tail - sdio->head; in dio_pages_present()
169 struct page **pages = dio->pages; in dio_refill_pages()
170 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in dio_refill_pages()
173 ret = iov_iter_extract_pages(sdio->iter, &pages, LONG_MAX, in dio_refill_pages()
174 DIO_PAGES, 0, &sdio->from); in dio_refill_pages()
176 if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) { in dio_refill_pages()
182 if (dio->page_errors == 0) in dio_refill_pages()
183 dio->page_errors = ret; in dio_refill_pages()
184 dio->pages[0] = ZERO_PAGE(0); in dio_refill_pages()
185 sdio->head = 0; in dio_refill_pages()
186 sdio->tail = 1; in dio_refill_pages()
187 sdio->from = 0; in dio_refill_pages()
188 sdio->to = PAGE_SIZE; in dio_refill_pages()
193 ret += sdio->from; in dio_refill_pages()
194 sdio->head = 0; in dio_refill_pages()
195 sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE; in dio_refill_pages()
196 sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1; in dio_refill_pages()
219 return dio->pages[sdio->head]; in dio_get_page()
224 if (dio->is_pinned) in dio_pin_page()
230 if (dio->is_pinned) in dio_unpin_page()
235 * dio_complete() - called when all DIO BIO I/O has been completed
247 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in dio_complete()
248 loff_t offset = dio->iocb->ki_pos; in dio_complete()
255 * In that case -EIOCBQUEUED is in fact not an error we want in dio_complete()
258 if (ret == -EIOCBQUEUED) in dio_complete()
261 if (dio->result) { in dio_complete()
262 transferred = dio->result; in dio_complete()
266 ((offset + transferred) > dio->i_size)) in dio_complete()
267 transferred = dio->i_size - offset; in dio_complete()
269 if (unlikely(ret == -EFAULT) && transferred) in dio_complete()
274 ret = dio->page_errors; in dio_complete()
276 ret = dio->io_error; in dio_complete()
280 if (dio->end_io) { in dio_complete()
282 err = dio->end_io(dio->iocb, offset, ret, dio->private); in dio_complete()
289 * non-direct readahead, or faulted in by get_user_pages() if the source in dio_complete()
294 * And this page cache invalidation has to be after dio->end_io(), as in dio_complete()
301 kiocb_invalidate_post_direct_write(dio->iocb, ret); in dio_complete()
303 inode_dio_end(dio->inode); in dio_complete()
311 dio->iocb->ki_pos += transferred; in dio_complete()
314 ret = generic_write_sync(dio->iocb, ret); in dio_complete()
315 dio->iocb->ki_complete(dio->iocb, ret); in dio_complete()
336 struct dio *dio = bio->bi_private; in dio_bio_end_aio()
337 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in dio_bio_end_aio()
345 spin_lock_irqsave(&dio->bio_lock, flags); in dio_bio_end_aio()
346 remaining = --dio->refcount; in dio_bio_end_aio()
347 if (remaining == 1 && dio->waiter) in dio_bio_end_aio()
348 wake_up_process(dio->waiter); in dio_bio_end_aio()
349 spin_unlock_irqrestore(&dio->bio_lock, flags); in dio_bio_end_aio()
360 if (dio->result) in dio_bio_end_aio()
361 defer_completion = dio->defer_completion || in dio_bio_end_aio()
363 dio->inode->i_mapping->nrpages); in dio_bio_end_aio()
365 INIT_WORK(&dio->complete_work, dio_aio_complete_work); in dio_bio_end_aio()
366 queue_work(dio->inode->i_sb->s_dio_done_wq, in dio_bio_end_aio()
367 &dio->complete_work); in dio_bio_end_aio()
375 * The BIO completion handler simply queues the BIO up for the process-context
379 * implement a singly-linked list of completed BIOs, at dio->bio_list.
383 struct dio *dio = bio->bi_private; in dio_bio_end_io()
386 spin_lock_irqsave(&dio->bio_lock, flags); in dio_bio_end_io()
387 bio->bi_private = dio->bio_list; in dio_bio_end_io()
388 dio->bio_list = bio; in dio_bio_end_io()
389 if (--dio->refcount == 1 && dio->waiter) in dio_bio_end_io()
390 wake_up_process(dio->waiter); in dio_bio_end_io()
391 spin_unlock_irqrestore(&dio->bio_lock, flags); in dio_bio_end_io()
405 bio = bio_alloc(bdev, nr_vecs, dio->opf, GFP_KERNEL); in dio_bio_alloc()
406 bio->bi_iter.bi_sector = first_sector; in dio_bio_alloc()
407 if (dio->is_async) in dio_bio_alloc()
408 bio->bi_end_io = dio_bio_end_aio; in dio_bio_alloc()
410 bio->bi_end_io = dio_bio_end_io; in dio_bio_alloc()
411 if (dio->is_pinned) in dio_bio_alloc()
413 sdio->bio = bio; in dio_bio_alloc()
414 sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; in dio_bio_alloc()
422 * bios hold a dio reference between submit_bio and ->end_io.
426 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in dio_bio_submit()
427 struct bio *bio = sdio->bio; in dio_bio_submit()
430 bio->bi_private = dio; in dio_bio_submit()
432 spin_lock_irqsave(&dio->bio_lock, flags); in dio_bio_submit()
433 dio->refcount++; in dio_bio_submit()
434 spin_unlock_irqrestore(&dio->bio_lock, flags); in dio_bio_submit()
436 if (dio->is_async && dio_op == REQ_OP_READ && dio->should_dirty) in dio_bio_submit()
439 dio->bio_disk = bio->bi_bdev->bd_disk; in dio_bio_submit()
443 sdio->bio = NULL; in dio_bio_submit()
444 sdio->boundary = 0; in dio_bio_submit()
445 sdio->logical_offset_in_bio = 0; in dio_bio_submit()
453 if (dio->is_pinned) in dio_cleanup()
454 unpin_user_pages(dio->pages + sdio->head, in dio_cleanup()
455 sdio->tail - sdio->head); in dio_cleanup()
456 sdio->head = sdio->tail; in dio_cleanup()
462 * all bios have been issued so that dio->refcount can only decrease. This
470 spin_lock_irqsave(&dio->bio_lock, flags); in dio_await_one()
478 while (dio->refcount > 1 && dio->bio_list == NULL) { in dio_await_one()
480 dio->waiter = current; in dio_await_one()
481 spin_unlock_irqrestore(&dio->bio_lock, flags); in dio_await_one()
484 spin_lock_irqsave(&dio->bio_lock, flags); in dio_await_one()
485 dio->waiter = NULL; in dio_await_one()
487 if (dio->bio_list) { in dio_await_one()
488 bio = dio->bio_list; in dio_await_one()
489 dio->bio_list = bio->bi_private; in dio_await_one()
491 spin_unlock_irqrestore(&dio->bio_lock, flags); in dio_await_one()
500 blk_status_t err = bio->bi_status; in dio_bio_complete()
501 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in dio_bio_complete()
502 bool should_dirty = dio_op == REQ_OP_READ && dio->should_dirty; in dio_bio_complete()
505 if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) in dio_bio_complete()
506 dio->io_error = -EAGAIN; in dio_bio_complete()
508 dio->io_error = -EIO; in dio_bio_complete()
511 if (dio->is_async && should_dirty) { in dio_bio_complete()
521 * Wait on and process all in-flight BIOs. This must only be called once
524 * errors are propagated through dio->io_error and should be propagated via
548 if (sdio->reap_counter++ >= 64) { in dio_bio_reap()
549 while (dio->bio_list) { in dio_bio_reap()
554 spin_lock_irqsave(&dio->bio_lock, flags); in dio_bio_reap()
555 bio = dio->bio_list; in dio_bio_reap()
556 dio->bio_list = bio->bi_private; in dio_bio_reap()
557 spin_unlock_irqrestore(&dio->bio_lock, flags); in dio_bio_reap()
562 sdio->reap_counter = 0; in dio_bio_reap()
569 struct super_block *sb = dio->inode->i_sb; in dio_set_defer_completion()
571 if (dio->defer_completion) in dio_set_defer_completion()
573 dio->defer_completion = true; in dio_set_defer_completion()
574 if (!sb->s_dio_done_wq) in dio_set_defer_completion()
581 * of available blocks at sdio->blocks_available. These are in units of the
585 * it uses the passed inode-relative block number as the file offset, as usual.
587 * get_block() is passed the number of i_blkbits-sized blocks which direct_io
590 * If the fs has mapped a lot of blocks, it should populate bh->b_size to
592 * bh->b_blocknr.
597 * In the case of filesystem holes: the fs may return an arbitrarily-large
599 * buffer_mapped(). However the direct-io code will only process holes one
600 * block at a time - it will repeatedly call get_block() as it walks the hole.
605 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in get_more_blocks()
607 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ in get_more_blocks()
608 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ in get_more_blocks()
609 unsigned long fs_count; /* Number of filesystem-sized blocks */ in get_more_blocks()
611 unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; in get_more_blocks()
618 ret = dio->page_errors; in get_more_blocks()
620 BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); in get_more_blocks()
621 fs_startblk = sdio->block_in_file >> sdio->blkfactor; in get_more_blocks()
622 fs_endblk = (sdio->final_block_in_request - 1) >> in get_more_blocks()
623 sdio->blkfactor; in get_more_blocks()
624 fs_count = fs_endblk - fs_startblk + 1; in get_more_blocks()
626 map_bh->b_state = 0; in get_more_blocks()
627 map_bh->b_size = fs_count << i_blkbits; in get_more_blocks()
641 if (dio->flags & DIO_SKIP_HOLES) { in get_more_blocks()
642 i_size = i_size_read(dio->inode); in get_more_blocks()
643 if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits) in get_more_blocks()
647 ret = (*sdio->get_block)(dio->inode, fs_startblk, in get_more_blocks()
651 dio->private = map_bh->b_private; in get_more_blocks()
671 sector = start_sector << (sdio->blkbits - 9); in dio_new_bio()
672 nr_pages = bio_max_segs(sdio->pages_in_io); in dio_new_bio()
674 dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); in dio_new_bio()
675 sdio->boundary = 0; in dio_new_bio()
683 * the just-added page.
685 * Return zero on success. Non-zero means the caller needs to start a new BIO.
691 ret = bio_add_page(sdio->bio, sdio->cur_page, in dio_bio_add_page()
692 sdio->cur_page_len, sdio->cur_page_offset); in dio_bio_add_page()
693 if (ret == sdio->cur_page_len) { in dio_bio_add_page()
697 if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE) in dio_bio_add_page()
698 sdio->pages_in_io--; in dio_bio_add_page()
699 dio_pin_page(dio, sdio->cur_page); in dio_bio_add_page()
700 sdio->final_block_in_bio = sdio->cur_page_block + in dio_bio_add_page()
701 (sdio->cur_page_len >> sdio->blkbits); in dio_bio_add_page()
712 * starts on-disk at cur_page_block.
724 if (sdio->bio) { in dio_send_cur_page()
725 loff_t cur_offset = sdio->cur_page_fs_offset; in dio_send_cur_page()
726 loff_t bio_next_offset = sdio->logical_offset_in_bio + in dio_send_cur_page()
727 sdio->bio->bi_iter.bi_size; in dio_send_cur_page()
732 * Btrfs cannot handle having logically non-contiguous requests in dio_send_cur_page()
735 * Logical: [0-4095][HOLE][8192-12287] in dio_send_cur_page()
736 * Physical: [0-4095] [4096-8191] in dio_send_cur_page()
743 if (sdio->final_block_in_bio != sdio->cur_page_block || in dio_send_cur_page()
748 if (sdio->bio == NULL) { in dio_send_cur_page()
749 ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); in dio_send_cur_page()
756 ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); in dio_send_cur_page()
774 * The chunk of page starts on-disk at blocknr.
776 * We perform deferred IO, by recording the last-submitted page inside our
788 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in submit_page_section()
790 int boundary = sdio->boundary; /* dio_send_cur_page may clear it */ in submit_page_section()
802 if (sdio->cur_page == page && in submit_page_section()
803 sdio->cur_page_offset + sdio->cur_page_len == offset && in submit_page_section()
804 sdio->cur_page_block + in submit_page_section()
805 (sdio->cur_page_len >> sdio->blkbits) == blocknr) { in submit_page_section()
806 sdio->cur_page_len += len; in submit_page_section()
813 if (sdio->cur_page) { in submit_page_section()
815 dio_unpin_page(dio, sdio->cur_page); in submit_page_section()
816 sdio->cur_page = NULL; in submit_page_section()
822 sdio->cur_page = page; in submit_page_section()
823 sdio->cur_page_offset = offset; in submit_page_section()
824 sdio->cur_page_len = len; in submit_page_section()
825 sdio->cur_page_block = blocknr; in submit_page_section()
826 sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits; in submit_page_section()
834 if (sdio->bio) in submit_page_section()
836 dio_unpin_page(dio, sdio->cur_page); in submit_page_section()
837 sdio->cur_page = NULL; in submit_page_section()
844 * the block for us, we need to fill-in the unused portion of the
845 * block with zeros. This happens only if user-buffer, fileoffset or
846 * io length is not filesystem block-size multiple.
859 sdio->start_zero_done = 1; in dio_zero_block()
860 if (!sdio->blkfactor || !buffer_new(map_bh)) in dio_zero_block()
863 dio_blocks_per_fs_block = 1 << sdio->blkfactor; in dio_zero_block()
864 this_chunk_blocks = sdio->block_in_file & (dio_blocks_per_fs_block - 1); in dio_zero_block()
874 this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks; in dio_zero_block()
876 this_chunk_bytes = this_chunk_blocks << sdio->blkbits; in dio_zero_block()
880 sdio->next_block_for_io, map_bh)) in dio_zero_block()
883 sdio->next_block_for_io += this_chunk_blocks; in dio_zero_block()
892 * happily perform page-sized but 512-byte aligned IOs. It is important that
895 * So what we do is to permit the ->get_block function to populate bh.b_size
898 * For best results, the blockdev should be set up with 512-byte i_blkbits and
905 const enum req_op dio_op = dio->opf & REQ_OP_MASK; in do_direct_IO()
906 const unsigned blkbits = sdio->blkbits; in do_direct_IO()
907 const unsigned i_blkbits = blkbits + sdio->blkfactor; in do_direct_IO()
910 while (sdio->block_in_file < sdio->final_block_in_request) { in do_direct_IO()
919 from = sdio->head ? 0 : sdio->from; in do_direct_IO()
920 to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE; in do_direct_IO()
921 sdio->head++; in do_direct_IO()
928 if (sdio->blocks_available == 0) { in do_direct_IO()
943 sdio->blocks_available = in do_direct_IO()
944 map_bh->b_size >> blkbits; in do_direct_IO()
945 sdio->next_block_for_io = in do_direct_IO()
946 map_bh->b_blocknr << sdio->blkfactor; in do_direct_IO()
949 map_bh->b_bdev, in do_direct_IO()
950 map_bh->b_blocknr, in do_direct_IO()
951 map_bh->b_size >> i_blkbits); in do_direct_IO()
954 if (!sdio->blkfactor) in do_direct_IO()
957 blkmask = (1 << sdio->blkfactor) - 1; in do_direct_IO()
958 dio_remainder = (sdio->block_in_file & blkmask); in do_direct_IO()
962 * starts partway into a fs-block, in do_direct_IO()
963 * dio_remainder will be non-zero. If the IO in do_direct_IO()
969 * on-disk in do_direct_IO()
972 sdio->next_block_for_io += dio_remainder; in do_direct_IO()
973 sdio->blocks_available -= dio_remainder; in do_direct_IO()
980 /* AKPM: eargh, -ENOTBLK is a hack */ in do_direct_IO()
983 return -ENOTBLK; in do_direct_IO()
990 i_size_aligned = ALIGN(i_size_read(dio->inode), in do_direct_IO()
992 if (sdio->block_in_file >= in do_direct_IO()
999 sdio->block_in_file++; in do_direct_IO()
1001 dio->result += 1 << blkbits; in do_direct_IO()
1010 if (unlikely(sdio->blkfactor && !sdio->start_zero_done)) in do_direct_IO()
1017 this_chunk_blocks = sdio->blocks_available; in do_direct_IO()
1018 u = (to - from) >> blkbits; in do_direct_IO()
1021 u = sdio->final_block_in_request - sdio->block_in_file; in do_direct_IO()
1027 if (this_chunk_blocks == sdio->blocks_available) in do_direct_IO()
1028 sdio->boundary = buffer_boundary(map_bh); in do_direct_IO()
1032 sdio->next_block_for_io, in do_direct_IO()
1038 sdio->next_block_for_io += this_chunk_blocks; in do_direct_IO()
1040 sdio->block_in_file += this_chunk_blocks; in do_direct_IO()
1042 dio->result += this_chunk_bytes; in do_direct_IO()
1043 sdio->blocks_available -= this_chunk_blocks; in do_direct_IO()
1045 BUG_ON(sdio->block_in_file > sdio->final_block_in_request); in do_direct_IO()
1046 if (sdio->block_in_file == sdio->final_block_in_request) in do_direct_IO()
1067 * return code that the caller will hand to ->complete(). in drop_refcount()
1073 spin_lock_irqsave(&dio->bio_lock, flags); in drop_refcount()
1074 ret2 = --dio->refcount; in drop_refcount()
1075 spin_unlock_irqrestore(&dio->bio_lock, flags); in drop_refcount()
1083 * - if the flags value contains DIO_LOCKING we use a fancy locking
1088 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1109 unsigned i_blkbits = READ_ONCE(inode->i_blkbits); in __blockdev_direct_IO()
1111 unsigned blocksize_mask = (1 << blkbits) - 1; in __blockdev_direct_IO()
1112 ssize_t retval = -EINVAL; in __blockdev_direct_IO()
1114 loff_t offset = iocb->ki_pos; in __blockdev_direct_IO()
1133 return -ENOMEM; in __blockdev_direct_IO()
1141 dio->flags = flags; in __blockdev_direct_IO()
1142 if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) { in __blockdev_direct_IO()
1146 dio->is_pinned = iov_iter_extract_will_pin(iter); in __blockdev_direct_IO()
1148 /* Once we sampled i_size check for reads beyond EOF */ in __blockdev_direct_IO()
1149 dio->i_size = i_size_read(inode); in __blockdev_direct_IO()
1150 if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { in __blockdev_direct_IO()
1158 blocksize_mask = (1 << blkbits) - 1; in __blockdev_direct_IO()
1163 if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) { in __blockdev_direct_IO()
1164 struct address_space *mapping = iocb->ki_filp->f_mapping; in __blockdev_direct_IO()
1166 retval = filemap_write_and_wait_range(mapping, offset, end - 1); in __blockdev_direct_IO()
1178 dio->is_async = false; in __blockdev_direct_IO()
1180 dio->is_async = false; in __blockdev_direct_IO()
1182 dio->is_async = true; in __blockdev_direct_IO()
1184 dio->inode = inode; in __blockdev_direct_IO()
1186 dio->opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; in __blockdev_direct_IO()
1187 if (iocb->ki_flags & IOCB_NOWAIT) in __blockdev_direct_IO()
1188 dio->opf |= REQ_NOWAIT; in __blockdev_direct_IO()
1190 dio->opf = REQ_OP_READ; in __blockdev_direct_IO()
1195 * so that we can call ->fsync. in __blockdev_direct_IO()
1197 if (dio->is_async && iov_iter_rw(iter) == WRITE) { in __blockdev_direct_IO()
1201 else if (!dio->inode->i_sb->s_dio_done_wq) { in __blockdev_direct_IO()
1207 retval = sb_init_dio_done_wq(dio->inode->i_sb); in __blockdev_direct_IO()
1220 sdio.blkfactor = i_blkbits - blkbits; in __blockdev_direct_IO()
1224 dio->end_io = end_io; in __blockdev_direct_IO()
1225 sdio.final_block_in_bio = -1; in __blockdev_direct_IO()
1226 sdio.next_block_for_io = -1; in __blockdev_direct_IO()
1228 dio->iocb = iocb; in __blockdev_direct_IO()
1230 spin_lock_init(&dio->bio_lock); in __blockdev_direct_IO()
1231 dio->refcount = 1; in __blockdev_direct_IO()
1233 dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ; in __blockdev_direct_IO()
1238 * In case of non-aligned buffers, we may need 2 more in __blockdev_direct_IO()
1252 if (retval == -ENOTBLK) { in __blockdev_direct_IO()
1260 * There may be some unwritten disk at the end of a part-written in __blockdev_direct_IO()
1261 * fs-block-sized block. Go zero that now. in __blockdev_direct_IO()
1290 if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING)) in __blockdev_direct_IO()
1291 inode_unlock(dio->inode); in __blockdev_direct_IO()
1297 * call aio_complete is when we return -EIOCBQUEUED, so we key on that. in __blockdev_direct_IO()
1298 * This had *better* be the only place that raises -EIOCBQUEUED. in __blockdev_direct_IO()
1300 BUG_ON(retval == -EIOCBQUEUED); in __blockdev_direct_IO()
1301 if (dio->is_async && retval == 0 && dio->result && in __blockdev_direct_IO()
1302 (iov_iter_rw(iter) == READ || dio->result == count)) in __blockdev_direct_IO()
1303 retval = -EIOCBQUEUED; in __blockdev_direct_IO()
1310 BUG_ON(retval != -EIOCBQUEUED); in __blockdev_direct_IO()
1315 if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) in __blockdev_direct_IO()