1 /* 2 * Copyright (C) 2009-2011 Red Hat, Inc. 3 * 4 * Author: Mikulas Patocka <mpatocka@redhat.com> 5 * 6 * This file is released under the GPL. 7 */ 8 9 #include <linux/dm-bufio.h> 10 11 #include <linux/device-mapper.h> 12 #include <linux/dm-io.h> 13 #include <linux/slab.h> 14 #include <linux/sched/mm.h> 15 #include <linux/jiffies.h> 16 #include <linux/vmalloc.h> 17 #include <linux/shrinker.h> 18 #include <linux/module.h> 19 #include <linux/rbtree.h> 20 #include <linux/stacktrace.h> 21 #include <linux/jump_label.h> 22 23 #define DM_MSG_PREFIX "bufio" 24 25 /* 26 * Memory management policy: 27 * Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory 28 * or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower). 29 * Always allocate at least DM_BUFIO_MIN_BUFFERS buffers. 30 * Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT 31 * dirty buffers. 32 */ 33 #define DM_BUFIO_MIN_BUFFERS 8 34 35 #define DM_BUFIO_MEMORY_PERCENT 2 36 #define DM_BUFIO_VMALLOC_PERCENT 25 37 #define DM_BUFIO_WRITEBACK_RATIO 3 38 #define DM_BUFIO_LOW_WATERMARK_RATIO 16 39 40 /* 41 * Check buffer ages in this interval (seconds) 42 */ 43 #define DM_BUFIO_WORK_TIMER_SECS 30 44 45 /* 46 * Free buffers when they are older than this (seconds) 47 */ 48 #define DM_BUFIO_DEFAULT_AGE_SECS 300 49 50 /* 51 * The nr of bytes of cached data to keep around. 52 */ 53 #define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) 54 55 /* 56 * Align buffer writes to this boundary. 57 * Tests show that SSDs have the highest IOPS when using 4k writes. 58 */ 59 #define DM_BUFIO_WRITE_ALIGN 4096 60 61 /* 62 * dm_buffer->list_mode 63 */ 64 #define LIST_CLEAN 0 65 #define LIST_DIRTY 1 66 #define LIST_SIZE 2 67 68 /* 69 * Linking of buffers: 70 * All buffers are linked to buffer_tree with their node field. 71 * 72 * Clean buffers that are not being written (B_WRITING not set) 73 * are linked to lru[LIST_CLEAN] with their lru_list field. 74 * 75 * Dirty and clean buffers that are being written are linked to 76 * lru[LIST_DIRTY] with their lru_list field. When the write 77 * finishes, the buffer cannot be relinked immediately (because we 78 * are in an interrupt context and relinking requires process 79 * context), so some clean-not-writing buffers can be held on 80 * dirty_lru too. They are later added to lru in the process 81 * context. 82 */ 83 struct dm_bufio_client { 84 struct mutex lock; 85 spinlock_t spinlock; 86 unsigned long spinlock_flags; 87 88 struct list_head lru[LIST_SIZE]; 89 unsigned long n_buffers[LIST_SIZE]; 90 91 struct block_device *bdev; 92 unsigned block_size; 93 s8 sectors_per_block_bits; 94 void (*alloc_callback)(struct dm_buffer *); 95 void (*write_callback)(struct dm_buffer *); 96 bool no_sleep; 97 98 struct kmem_cache *slab_buffer; 99 struct kmem_cache *slab_cache; 100 struct dm_io_client *dm_io; 101 102 struct list_head reserved_buffers; 103 unsigned need_reserved_buffers; 104 105 unsigned minimum_buffers; 106 107 struct rb_root buffer_tree; 108 wait_queue_head_t free_buffer_wait; 109 110 sector_t start; 111 112 int async_write_error; 113 114 struct list_head client_list; 115 116 struct shrinker shrinker; 117 struct work_struct shrink_work; 118 atomic_long_t need_shrink; 119 }; 120 121 /* 122 * Buffer state bits. 123 */ 124 #define B_READING 0 125 #define B_WRITING 1 126 #define B_DIRTY 2 127 128 /* 129 * Describes how the block was allocated: 130 * kmem_cache_alloc(), __get_free_pages() or vmalloc(). 131 * See the comment at alloc_buffer_data. 132 */ 133 enum data_mode { 134 DATA_MODE_SLAB = 0, 135 DATA_MODE_GET_FREE_PAGES = 1, 136 DATA_MODE_VMALLOC = 2, 137 DATA_MODE_LIMIT = 3 138 }; 139 140 struct dm_buffer { 141 struct rb_node node; 142 struct list_head lru_list; 143 struct list_head global_list; 144 sector_t block; 145 void *data; 146 unsigned char data_mode; /* DATA_MODE_* */ 147 unsigned char list_mode; /* LIST_* */ 148 blk_status_t read_error; 149 blk_status_t write_error; 150 unsigned accessed; 151 unsigned hold_count; 152 unsigned long state; 153 unsigned long last_accessed; 154 unsigned dirty_start; 155 unsigned dirty_end; 156 unsigned write_start; 157 unsigned write_end; 158 struct dm_bufio_client *c; 159 struct list_head write_list; 160 void (*end_io)(struct dm_buffer *, blk_status_t); 161 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 162 #define MAX_STACK 10 163 unsigned int stack_len; 164 unsigned long stack_entries[MAX_STACK]; 165 #endif 166 }; 167 168 static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); 169 170 /*----------------------------------------------------------------*/ 171 172 #define dm_bufio_in_request() (!!current->bio_list) 173 174 static void dm_bufio_lock(struct dm_bufio_client *c) 175 { 176 if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep) 177 spin_lock_irqsave_nested(&c->spinlock, c->spinlock_flags, dm_bufio_in_request()); 178 else 179 mutex_lock_nested(&c->lock, dm_bufio_in_request()); 180 } 181 182 static int dm_bufio_trylock(struct dm_bufio_client *c) 183 { 184 if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep) 185 return spin_trylock_irqsave(&c->spinlock, c->spinlock_flags); 186 else 187 return mutex_trylock(&c->lock); 188 } 189 190 static void dm_bufio_unlock(struct dm_bufio_client *c) 191 { 192 if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep) 193 spin_unlock_irqrestore(&c->spinlock, c->spinlock_flags); 194 else 195 mutex_unlock(&c->lock); 196 } 197 198 /*----------------------------------------------------------------*/ 199 200 /* 201 * Default cache size: available memory divided by the ratio. 202 */ 203 static unsigned long dm_bufio_default_cache_size; 204 205 /* 206 * Total cache size set by the user. 207 */ 208 static unsigned long dm_bufio_cache_size; 209 210 /* 211 * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change 212 * at any time. If it disagrees, the user has changed cache size. 213 */ 214 static unsigned long dm_bufio_cache_size_latch; 215 216 static DEFINE_SPINLOCK(global_spinlock); 217 218 static LIST_HEAD(global_queue); 219 220 static unsigned long global_num = 0; 221 222 /* 223 * Buffers are freed after this timeout 224 */ 225 static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; 226 static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; 227 228 static unsigned long dm_bufio_peak_allocated; 229 static unsigned long dm_bufio_allocated_kmem_cache; 230 static unsigned long dm_bufio_allocated_get_free_pages; 231 static unsigned long dm_bufio_allocated_vmalloc; 232 static unsigned long dm_bufio_current_allocated; 233 234 /*----------------------------------------------------------------*/ 235 236 /* 237 * The current number of clients. 238 */ 239 static int dm_bufio_client_count; 240 241 /* 242 * The list of all clients. 243 */ 244 static LIST_HEAD(dm_bufio_all_clients); 245 246 /* 247 * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count 248 */ 249 static DEFINE_MUTEX(dm_bufio_clients_lock); 250 251 static struct workqueue_struct *dm_bufio_wq; 252 static struct delayed_work dm_bufio_cleanup_old_work; 253 static struct work_struct dm_bufio_replacement_work; 254 255 256 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 257 static void buffer_record_stack(struct dm_buffer *b) 258 { 259 b->stack_len = stack_trace_save(b->stack_entries, MAX_STACK, 2); 260 } 261 #endif 262 263 /*---------------------------------------------------------------- 264 * A red/black tree acts as an index for all the buffers. 265 *--------------------------------------------------------------*/ 266 static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block) 267 { 268 struct rb_node *n = c->buffer_tree.rb_node; 269 struct dm_buffer *b; 270 271 while (n) { 272 b = container_of(n, struct dm_buffer, node); 273 274 if (b->block == block) 275 return b; 276 277 n = block < b->block ? n->rb_left : n->rb_right; 278 } 279 280 return NULL; 281 } 282 283 static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block) 284 { 285 struct rb_node *n = c->buffer_tree.rb_node; 286 struct dm_buffer *b; 287 struct dm_buffer *best = NULL; 288 289 while (n) { 290 b = container_of(n, struct dm_buffer, node); 291 292 if (b->block == block) 293 return b; 294 295 if (block <= b->block) { 296 n = n->rb_left; 297 best = b; 298 } else { 299 n = n->rb_right; 300 } 301 } 302 303 return best; 304 } 305 306 static void __insert(struct dm_bufio_client *c, struct dm_buffer *b) 307 { 308 struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL; 309 struct dm_buffer *found; 310 311 while (*new) { 312 found = container_of(*new, struct dm_buffer, node); 313 314 if (found->block == b->block) { 315 BUG_ON(found != b); 316 return; 317 } 318 319 parent = *new; 320 new = b->block < found->block ? 321 &found->node.rb_left : &found->node.rb_right; 322 } 323 324 rb_link_node(&b->node, parent, new); 325 rb_insert_color(&b->node, &c->buffer_tree); 326 } 327 328 static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) 329 { 330 rb_erase(&b->node, &c->buffer_tree); 331 } 332 333 /*----------------------------------------------------------------*/ 334 335 static void adjust_total_allocated(struct dm_buffer *b, bool unlink) 336 { 337 unsigned char data_mode; 338 long diff; 339 340 static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { 341 &dm_bufio_allocated_kmem_cache, 342 &dm_bufio_allocated_get_free_pages, 343 &dm_bufio_allocated_vmalloc, 344 }; 345 346 data_mode = b->data_mode; 347 diff = (long)b->c->block_size; 348 if (unlink) 349 diff = -diff; 350 351 spin_lock(&global_spinlock); 352 353 *class_ptr[data_mode] += diff; 354 355 dm_bufio_current_allocated += diff; 356 357 if (dm_bufio_current_allocated > dm_bufio_peak_allocated) 358 dm_bufio_peak_allocated = dm_bufio_current_allocated; 359 360 b->accessed = 1; 361 362 if (!unlink) { 363 list_add(&b->global_list, &global_queue); 364 global_num++; 365 if (dm_bufio_current_allocated > dm_bufio_cache_size) 366 queue_work(dm_bufio_wq, &dm_bufio_replacement_work); 367 } else { 368 list_del(&b->global_list); 369 global_num--; 370 } 371 372 spin_unlock(&global_spinlock); 373 } 374 375 /* 376 * Change the number of clients and recalculate per-client limit. 377 */ 378 static void __cache_size_refresh(void) 379 { 380 BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); 381 BUG_ON(dm_bufio_client_count < 0); 382 383 dm_bufio_cache_size_latch = READ_ONCE(dm_bufio_cache_size); 384 385 /* 386 * Use default if set to 0 and report the actual cache size used. 387 */ 388 if (!dm_bufio_cache_size_latch) { 389 (void)cmpxchg(&dm_bufio_cache_size, 0, 390 dm_bufio_default_cache_size); 391 dm_bufio_cache_size_latch = dm_bufio_default_cache_size; 392 } 393 } 394 395 /* 396 * Allocating buffer data. 397 * 398 * Small buffers are allocated with kmem_cache, to use space optimally. 399 * 400 * For large buffers, we choose between get_free_pages and vmalloc. 401 * Each has advantages and disadvantages. 402 * 403 * __get_free_pages can randomly fail if the memory is fragmented. 404 * __vmalloc won't randomly fail, but vmalloc space is limited (it may be 405 * as low as 128M) so using it for caching is not appropriate. 406 * 407 * If the allocation may fail we use __get_free_pages. Memory fragmentation 408 * won't have a fatal effect here, but it just causes flushes of some other 409 * buffers and more I/O will be performed. Don't use __get_free_pages if it 410 * always fails (i.e. order >= MAX_ORDER). 411 * 412 * If the allocation shouldn't fail we use __vmalloc. This is only for the 413 * initial reserve allocation, so there's no risk of wasting all vmalloc 414 * space. 415 */ 416 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, 417 unsigned char *data_mode) 418 { 419 if (unlikely(c->slab_cache != NULL)) { 420 *data_mode = DATA_MODE_SLAB; 421 return kmem_cache_alloc(c->slab_cache, gfp_mask); 422 } 423 424 if (c->block_size <= KMALLOC_MAX_SIZE && 425 gfp_mask & __GFP_NORETRY) { 426 *data_mode = DATA_MODE_GET_FREE_PAGES; 427 return (void *)__get_free_pages(gfp_mask, 428 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); 429 } 430 431 *data_mode = DATA_MODE_VMALLOC; 432 433 /* 434 * __vmalloc allocates the data pages and auxiliary structures with 435 * gfp_flags that were specified, but pagetables are always allocated 436 * with GFP_KERNEL, no matter what was specified as gfp_mask. 437 * 438 * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that 439 * all allocations done by this process (including pagetables) are done 440 * as if GFP_NOIO was specified. 441 */ 442 if (gfp_mask & __GFP_NORETRY) { 443 unsigned noio_flag = memalloc_noio_save(); 444 void *ptr = __vmalloc(c->block_size, gfp_mask); 445 446 memalloc_noio_restore(noio_flag); 447 return ptr; 448 } 449 450 return __vmalloc(c->block_size, gfp_mask); 451 } 452 453 /* 454 * Free buffer's data. 455 */ 456 static void free_buffer_data(struct dm_bufio_client *c, 457 void *data, unsigned char data_mode) 458 { 459 switch (data_mode) { 460 case DATA_MODE_SLAB: 461 kmem_cache_free(c->slab_cache, data); 462 break; 463 464 case DATA_MODE_GET_FREE_PAGES: 465 free_pages((unsigned long)data, 466 c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); 467 break; 468 469 case DATA_MODE_VMALLOC: 470 vfree(data); 471 break; 472 473 default: 474 DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d", 475 data_mode); 476 BUG(); 477 } 478 } 479 480 /* 481 * Allocate buffer and its data. 482 */ 483 static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) 484 { 485 struct dm_buffer *b = kmem_cache_alloc(c->slab_buffer, gfp_mask); 486 487 if (!b) 488 return NULL; 489 490 b->c = c; 491 492 b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode); 493 if (!b->data) { 494 kmem_cache_free(c->slab_buffer, b); 495 return NULL; 496 } 497 498 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 499 b->stack_len = 0; 500 #endif 501 return b; 502 } 503 504 /* 505 * Free buffer and its data. 506 */ 507 static void free_buffer(struct dm_buffer *b) 508 { 509 struct dm_bufio_client *c = b->c; 510 511 free_buffer_data(c, b->data, b->data_mode); 512 kmem_cache_free(c->slab_buffer, b); 513 } 514 515 /* 516 * Link buffer to the buffer tree and clean or dirty queue. 517 */ 518 static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) 519 { 520 struct dm_bufio_client *c = b->c; 521 522 c->n_buffers[dirty]++; 523 b->block = block; 524 b->list_mode = dirty; 525 list_add(&b->lru_list, &c->lru[dirty]); 526 __insert(b->c, b); 527 b->last_accessed = jiffies; 528 529 adjust_total_allocated(b, false); 530 } 531 532 /* 533 * Unlink buffer from the buffer tree and dirty or clean queue. 534 */ 535 static void __unlink_buffer(struct dm_buffer *b) 536 { 537 struct dm_bufio_client *c = b->c; 538 539 BUG_ON(!c->n_buffers[b->list_mode]); 540 541 c->n_buffers[b->list_mode]--; 542 __remove(b->c, b); 543 list_del(&b->lru_list); 544 545 adjust_total_allocated(b, true); 546 } 547 548 /* 549 * Place the buffer to the head of dirty or clean LRU queue. 550 */ 551 static void __relink_lru(struct dm_buffer *b, int dirty) 552 { 553 struct dm_bufio_client *c = b->c; 554 555 b->accessed = 1; 556 557 BUG_ON(!c->n_buffers[b->list_mode]); 558 559 c->n_buffers[b->list_mode]--; 560 c->n_buffers[dirty]++; 561 b->list_mode = dirty; 562 list_move(&b->lru_list, &c->lru[dirty]); 563 b->last_accessed = jiffies; 564 } 565 566 /*---------------------------------------------------------------- 567 * Submit I/O on the buffer. 568 * 569 * Bio interface is faster but it has some problems: 570 * the vector list is limited (increasing this limit increases 571 * memory-consumption per buffer, so it is not viable); 572 * 573 * the memory must be direct-mapped, not vmalloced; 574 * 575 * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and 576 * it is not vmalloced, try using the bio interface. 577 * 578 * If the buffer is big, if it is vmalloced or if the underlying device 579 * rejects the bio because it is too large, use dm-io layer to do the I/O. 580 * The dm-io layer splits the I/O into multiple requests, avoiding the above 581 * shortcomings. 582 *--------------------------------------------------------------*/ 583 584 /* 585 * dm-io completion routine. It just calls b->bio.bi_end_io, pretending 586 * that the request was handled directly with bio interface. 587 */ 588 static void dmio_complete(unsigned long error, void *context) 589 { 590 struct dm_buffer *b = context; 591 592 b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); 593 } 594 595 static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, 596 unsigned n_sectors, unsigned offset) 597 { 598 int r; 599 struct dm_io_request io_req = { 600 .bi_opf = op, 601 .notify.fn = dmio_complete, 602 .notify.context = b, 603 .client = b->c->dm_io, 604 }; 605 struct dm_io_region region = { 606 .bdev = b->c->bdev, 607 .sector = sector, 608 .count = n_sectors, 609 }; 610 611 if (b->data_mode != DATA_MODE_VMALLOC) { 612 io_req.mem.type = DM_IO_KMEM; 613 io_req.mem.ptr.addr = (char *)b->data + offset; 614 } else { 615 io_req.mem.type = DM_IO_VMA; 616 io_req.mem.ptr.vma = (char *)b->data + offset; 617 } 618 619 r = dm_io(&io_req, 1, ®ion, NULL); 620 if (unlikely(r)) 621 b->end_io(b, errno_to_blk_status(r)); 622 } 623 624 static void bio_complete(struct bio *bio) 625 { 626 struct dm_buffer *b = bio->bi_private; 627 blk_status_t status = bio->bi_status; 628 bio_uninit(bio); 629 kfree(bio); 630 b->end_io(b, status); 631 } 632 633 static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, 634 unsigned n_sectors, unsigned offset) 635 { 636 struct bio *bio; 637 char *ptr; 638 unsigned vec_size, len; 639 640 vec_size = b->c->block_size >> PAGE_SHIFT; 641 if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT)) 642 vec_size += 2; 643 644 bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); 645 if (!bio) { 646 dmio: 647 use_dmio(b, op, sector, n_sectors, offset); 648 return; 649 } 650 bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, op); 651 bio->bi_iter.bi_sector = sector; 652 bio->bi_end_io = bio_complete; 653 bio->bi_private = b; 654 655 ptr = (char *)b->data + offset; 656 len = n_sectors << SECTOR_SHIFT; 657 658 do { 659 unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); 660 if (!bio_add_page(bio, virt_to_page(ptr), this_step, 661 offset_in_page(ptr))) { 662 bio_put(bio); 663 goto dmio; 664 } 665 666 len -= this_step; 667 ptr += this_step; 668 } while (len > 0); 669 670 submit_bio(bio); 671 } 672 673 static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block) 674 { 675 sector_t sector; 676 677 if (likely(c->sectors_per_block_bits >= 0)) 678 sector = block << c->sectors_per_block_bits; 679 else 680 sector = block * (c->block_size >> SECTOR_SHIFT); 681 sector += c->start; 682 683 return sector; 684 } 685 686 static void submit_io(struct dm_buffer *b, enum req_op op, 687 void (*end_io)(struct dm_buffer *, blk_status_t)) 688 { 689 unsigned n_sectors; 690 sector_t sector; 691 unsigned offset, end; 692 693 b->end_io = end_io; 694 695 sector = block_to_sector(b->c, b->block); 696 697 if (op != REQ_OP_WRITE) { 698 n_sectors = b->c->block_size >> SECTOR_SHIFT; 699 offset = 0; 700 } else { 701 if (b->c->write_callback) 702 b->c->write_callback(b); 703 offset = b->write_start; 704 end = b->write_end; 705 offset &= -DM_BUFIO_WRITE_ALIGN; 706 end += DM_BUFIO_WRITE_ALIGN - 1; 707 end &= -DM_BUFIO_WRITE_ALIGN; 708 if (unlikely(end > b->c->block_size)) 709 end = b->c->block_size; 710 711 sector += offset >> SECTOR_SHIFT; 712 n_sectors = (end - offset) >> SECTOR_SHIFT; 713 } 714 715 if (b->data_mode != DATA_MODE_VMALLOC) 716 use_bio(b, op, sector, n_sectors, offset); 717 else 718 use_dmio(b, op, sector, n_sectors, offset); 719 } 720 721 /*---------------------------------------------------------------- 722 * Writing dirty buffers 723 *--------------------------------------------------------------*/ 724 725 /* 726 * The endio routine for write. 727 * 728 * Set the error, clear B_WRITING bit and wake anyone who was waiting on 729 * it. 730 */ 731 static void write_endio(struct dm_buffer *b, blk_status_t status) 732 { 733 b->write_error = status; 734 if (unlikely(status)) { 735 struct dm_bufio_client *c = b->c; 736 737 (void)cmpxchg(&c->async_write_error, 0, 738 blk_status_to_errno(status)); 739 } 740 741 BUG_ON(!test_bit(B_WRITING, &b->state)); 742 743 smp_mb__before_atomic(); 744 clear_bit(B_WRITING, &b->state); 745 smp_mb__after_atomic(); 746 747 wake_up_bit(&b->state, B_WRITING); 748 } 749 750 /* 751 * Initiate a write on a dirty buffer, but don't wait for it. 752 * 753 * - If the buffer is not dirty, exit. 754 * - If there some previous write going on, wait for it to finish (we can't 755 * have two writes on the same buffer simultaneously). 756 * - Submit our write and don't wait on it. We set B_WRITING indicating 757 * that there is a write in progress. 758 */ 759 static void __write_dirty_buffer(struct dm_buffer *b, 760 struct list_head *write_list) 761 { 762 if (!test_bit(B_DIRTY, &b->state)) 763 return; 764 765 clear_bit(B_DIRTY, &b->state); 766 wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 767 768 b->write_start = b->dirty_start; 769 b->write_end = b->dirty_end; 770 771 if (!write_list) 772 submit_io(b, REQ_OP_WRITE, write_endio); 773 else 774 list_add_tail(&b->write_list, write_list); 775 } 776 777 static void __flush_write_list(struct list_head *write_list) 778 { 779 struct blk_plug plug; 780 blk_start_plug(&plug); 781 while (!list_empty(write_list)) { 782 struct dm_buffer *b = 783 list_entry(write_list->next, struct dm_buffer, write_list); 784 list_del(&b->write_list); 785 submit_io(b, REQ_OP_WRITE, write_endio); 786 cond_resched(); 787 } 788 blk_finish_plug(&plug); 789 } 790 791 /* 792 * Wait until any activity on the buffer finishes. Possibly write the 793 * buffer if it is dirty. When this function finishes, there is no I/O 794 * running on the buffer and the buffer is not dirty. 795 */ 796 static void __make_buffer_clean(struct dm_buffer *b) 797 { 798 BUG_ON(b->hold_count); 799 800 if (!b->state) /* fast case */ 801 return; 802 803 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 804 __write_dirty_buffer(b, NULL); 805 wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); 806 } 807 808 /* 809 * Find some buffer that is not held by anybody, clean it, unlink it and 810 * return it. 811 */ 812 static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c) 813 { 814 struct dm_buffer *b; 815 816 list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) { 817 BUG_ON(test_bit(B_WRITING, &b->state)); 818 BUG_ON(test_bit(B_DIRTY, &b->state)); 819 820 if (!b->hold_count) { 821 __make_buffer_clean(b); 822 __unlink_buffer(b); 823 return b; 824 } 825 cond_resched(); 826 } 827 828 list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) { 829 BUG_ON(test_bit(B_READING, &b->state)); 830 831 if (!b->hold_count) { 832 __make_buffer_clean(b); 833 __unlink_buffer(b); 834 return b; 835 } 836 cond_resched(); 837 } 838 839 return NULL; 840 } 841 842 /* 843 * Wait until some other threads free some buffer or release hold count on 844 * some buffer. 845 * 846 * This function is entered with c->lock held, drops it and regains it 847 * before exiting. 848 */ 849 static void __wait_for_free_buffer(struct dm_bufio_client *c) 850 { 851 DECLARE_WAITQUEUE(wait, current); 852 853 add_wait_queue(&c->free_buffer_wait, &wait); 854 set_current_state(TASK_UNINTERRUPTIBLE); 855 dm_bufio_unlock(c); 856 857 io_schedule(); 858 859 remove_wait_queue(&c->free_buffer_wait, &wait); 860 861 dm_bufio_lock(c); 862 } 863 864 enum new_flag { 865 NF_FRESH = 0, 866 NF_READ = 1, 867 NF_GET = 2, 868 NF_PREFETCH = 3 869 }; 870 871 /* 872 * Allocate a new buffer. If the allocation is not possible, wait until 873 * some other thread frees a buffer. 874 * 875 * May drop the lock and regain it. 876 */ 877 static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf) 878 { 879 struct dm_buffer *b; 880 bool tried_noio_alloc = false; 881 882 /* 883 * dm-bufio is resistant to allocation failures (it just keeps 884 * one buffer reserved in cases all the allocations fail). 885 * So set flags to not try too hard: 886 * GFP_NOWAIT: don't wait; if we need to sleep we'll release our 887 * mutex and wait ourselves. 888 * __GFP_NORETRY: don't retry and rather return failure 889 * __GFP_NOMEMALLOC: don't use emergency reserves 890 * __GFP_NOWARN: don't print a warning in case of failure 891 * 892 * For debugging, if we set the cache size to 1, no new buffers will 893 * be allocated. 894 */ 895 while (1) { 896 if (dm_bufio_cache_size_latch != 1) { 897 b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 898 if (b) 899 return b; 900 } 901 902 if (nf == NF_PREFETCH) 903 return NULL; 904 905 if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) { 906 dm_bufio_unlock(c); 907 b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 908 dm_bufio_lock(c); 909 if (b) 910 return b; 911 tried_noio_alloc = true; 912 } 913 914 if (!list_empty(&c->reserved_buffers)) { 915 b = list_entry(c->reserved_buffers.next, 916 struct dm_buffer, lru_list); 917 list_del(&b->lru_list); 918 c->need_reserved_buffers++; 919 920 return b; 921 } 922 923 b = __get_unclaimed_buffer(c); 924 if (b) 925 return b; 926 927 __wait_for_free_buffer(c); 928 } 929 } 930 931 static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf) 932 { 933 struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf); 934 935 if (!b) 936 return NULL; 937 938 if (c->alloc_callback) 939 c->alloc_callback(b); 940 941 return b; 942 } 943 944 /* 945 * Free a buffer and wake other threads waiting for free buffers. 946 */ 947 static void __free_buffer_wake(struct dm_buffer *b) 948 { 949 struct dm_bufio_client *c = b->c; 950 951 if (!c->need_reserved_buffers) 952 free_buffer(b); 953 else { 954 list_add(&b->lru_list, &c->reserved_buffers); 955 c->need_reserved_buffers--; 956 } 957 958 wake_up(&c->free_buffer_wait); 959 } 960 961 static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, 962 struct list_head *write_list) 963 { 964 struct dm_buffer *b, *tmp; 965 966 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { 967 BUG_ON(test_bit(B_READING, &b->state)); 968 969 if (!test_bit(B_DIRTY, &b->state) && 970 !test_bit(B_WRITING, &b->state)) { 971 __relink_lru(b, LIST_CLEAN); 972 continue; 973 } 974 975 if (no_wait && test_bit(B_WRITING, &b->state)) 976 return; 977 978 __write_dirty_buffer(b, write_list); 979 cond_resched(); 980 } 981 } 982 983 /* 984 * Check if we're over watermark. 985 * If we are over threshold_buffers, start freeing buffers. 986 * If we're over "limit_buffers", block until we get under the limit. 987 */ 988 static void __check_watermark(struct dm_bufio_client *c, 989 struct list_head *write_list) 990 { 991 if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO) 992 __write_dirty_buffers_async(c, 1, write_list); 993 } 994 995 /*---------------------------------------------------------------- 996 * Getting a buffer 997 *--------------------------------------------------------------*/ 998 999 static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, 1000 enum new_flag nf, int *need_submit, 1001 struct list_head *write_list) 1002 { 1003 struct dm_buffer *b, *new_b = NULL; 1004 1005 *need_submit = 0; 1006 1007 b = __find(c, block); 1008 if (b) 1009 goto found_buffer; 1010 1011 if (nf == NF_GET) 1012 return NULL; 1013 1014 new_b = __alloc_buffer_wait(c, nf); 1015 if (!new_b) 1016 return NULL; 1017 1018 /* 1019 * We've had a period where the mutex was unlocked, so need to 1020 * recheck the buffer tree. 1021 */ 1022 b = __find(c, block); 1023 if (b) { 1024 __free_buffer_wake(new_b); 1025 goto found_buffer; 1026 } 1027 1028 __check_watermark(c, write_list); 1029 1030 b = new_b; 1031 b->hold_count = 1; 1032 b->read_error = 0; 1033 b->write_error = 0; 1034 __link_buffer(b, block, LIST_CLEAN); 1035 1036 if (nf == NF_FRESH) { 1037 b->state = 0; 1038 return b; 1039 } 1040 1041 b->state = 1 << B_READING; 1042 *need_submit = 1; 1043 1044 return b; 1045 1046 found_buffer: 1047 if (nf == NF_PREFETCH) 1048 return NULL; 1049 /* 1050 * Note: it is essential that we don't wait for the buffer to be 1051 * read if dm_bufio_get function is used. Both dm_bufio_get and 1052 * dm_bufio_prefetch can be used in the driver request routine. 1053 * If the user called both dm_bufio_prefetch and dm_bufio_get on 1054 * the same buffer, it would deadlock if we waited. 1055 */ 1056 if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state))) 1057 return NULL; 1058 1059 b->hold_count++; 1060 __relink_lru(b, test_bit(B_DIRTY, &b->state) || 1061 test_bit(B_WRITING, &b->state)); 1062 return b; 1063 } 1064 1065 /* 1066 * The endio routine for reading: set the error, clear the bit and wake up 1067 * anyone waiting on the buffer. 1068 */ 1069 static void read_endio(struct dm_buffer *b, blk_status_t status) 1070 { 1071 b->read_error = status; 1072 1073 BUG_ON(!test_bit(B_READING, &b->state)); 1074 1075 smp_mb__before_atomic(); 1076 clear_bit(B_READING, &b->state); 1077 smp_mb__after_atomic(); 1078 1079 wake_up_bit(&b->state, B_READING); 1080 } 1081 1082 /* 1083 * A common routine for dm_bufio_new and dm_bufio_read. Operation of these 1084 * functions is similar except that dm_bufio_new doesn't read the 1085 * buffer from the disk (assuming that the caller overwrites all the data 1086 * and uses dm_bufio_mark_buffer_dirty to write new data back). 1087 */ 1088 static void *new_read(struct dm_bufio_client *c, sector_t block, 1089 enum new_flag nf, struct dm_buffer **bp) 1090 { 1091 int need_submit; 1092 struct dm_buffer *b; 1093 1094 LIST_HEAD(write_list); 1095 1096 dm_bufio_lock(c); 1097 b = __bufio_new(c, block, nf, &need_submit, &write_list); 1098 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 1099 if (b && b->hold_count == 1) 1100 buffer_record_stack(b); 1101 #endif 1102 dm_bufio_unlock(c); 1103 1104 __flush_write_list(&write_list); 1105 1106 if (!b) 1107 return NULL; 1108 1109 if (need_submit) 1110 submit_io(b, REQ_OP_READ, read_endio); 1111 1112 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 1113 1114 if (b->read_error) { 1115 int error = blk_status_to_errno(b->read_error); 1116 1117 dm_bufio_release(b); 1118 1119 return ERR_PTR(error); 1120 } 1121 1122 *bp = b; 1123 1124 return b->data; 1125 } 1126 1127 void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, 1128 struct dm_buffer **bp) 1129 { 1130 return new_read(c, block, NF_GET, bp); 1131 } 1132 EXPORT_SYMBOL_GPL(dm_bufio_get); 1133 1134 void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, 1135 struct dm_buffer **bp) 1136 { 1137 BUG_ON(dm_bufio_in_request()); 1138 1139 return new_read(c, block, NF_READ, bp); 1140 } 1141 EXPORT_SYMBOL_GPL(dm_bufio_read); 1142 1143 void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, 1144 struct dm_buffer **bp) 1145 { 1146 BUG_ON(dm_bufio_in_request()); 1147 1148 return new_read(c, block, NF_FRESH, bp); 1149 } 1150 EXPORT_SYMBOL_GPL(dm_bufio_new); 1151 1152 void dm_bufio_prefetch(struct dm_bufio_client *c, 1153 sector_t block, unsigned n_blocks) 1154 { 1155 struct blk_plug plug; 1156 1157 LIST_HEAD(write_list); 1158 1159 BUG_ON(dm_bufio_in_request()); 1160 1161 blk_start_plug(&plug); 1162 dm_bufio_lock(c); 1163 1164 for (; n_blocks--; block++) { 1165 int need_submit; 1166 struct dm_buffer *b; 1167 b = __bufio_new(c, block, NF_PREFETCH, &need_submit, 1168 &write_list); 1169 if (unlikely(!list_empty(&write_list))) { 1170 dm_bufio_unlock(c); 1171 blk_finish_plug(&plug); 1172 __flush_write_list(&write_list); 1173 blk_start_plug(&plug); 1174 dm_bufio_lock(c); 1175 } 1176 if (unlikely(b != NULL)) { 1177 dm_bufio_unlock(c); 1178 1179 if (need_submit) 1180 submit_io(b, REQ_OP_READ, read_endio); 1181 dm_bufio_release(b); 1182 1183 cond_resched(); 1184 1185 if (!n_blocks) 1186 goto flush_plug; 1187 dm_bufio_lock(c); 1188 } 1189 } 1190 1191 dm_bufio_unlock(c); 1192 1193 flush_plug: 1194 blk_finish_plug(&plug); 1195 } 1196 EXPORT_SYMBOL_GPL(dm_bufio_prefetch); 1197 1198 void dm_bufio_release(struct dm_buffer *b) 1199 { 1200 struct dm_bufio_client *c = b->c; 1201 1202 dm_bufio_lock(c); 1203 1204 BUG_ON(!b->hold_count); 1205 1206 b->hold_count--; 1207 if (!b->hold_count) { 1208 wake_up(&c->free_buffer_wait); 1209 1210 /* 1211 * If there were errors on the buffer, and the buffer is not 1212 * to be written, free the buffer. There is no point in caching 1213 * invalid buffer. 1214 */ 1215 if ((b->read_error || b->write_error) && 1216 !test_bit(B_READING, &b->state) && 1217 !test_bit(B_WRITING, &b->state) && 1218 !test_bit(B_DIRTY, &b->state)) { 1219 __unlink_buffer(b); 1220 __free_buffer_wake(b); 1221 } 1222 } 1223 1224 dm_bufio_unlock(c); 1225 } 1226 EXPORT_SYMBOL_GPL(dm_bufio_release); 1227 1228 void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, 1229 unsigned start, unsigned end) 1230 { 1231 struct dm_bufio_client *c = b->c; 1232 1233 BUG_ON(start >= end); 1234 BUG_ON(end > b->c->block_size); 1235 1236 dm_bufio_lock(c); 1237 1238 BUG_ON(test_bit(B_READING, &b->state)); 1239 1240 if (!test_and_set_bit(B_DIRTY, &b->state)) { 1241 b->dirty_start = start; 1242 b->dirty_end = end; 1243 __relink_lru(b, LIST_DIRTY); 1244 } else { 1245 if (start < b->dirty_start) 1246 b->dirty_start = start; 1247 if (end > b->dirty_end) 1248 b->dirty_end = end; 1249 } 1250 1251 dm_bufio_unlock(c); 1252 } 1253 EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); 1254 1255 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) 1256 { 1257 dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); 1258 } 1259 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); 1260 1261 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) 1262 { 1263 LIST_HEAD(write_list); 1264 1265 BUG_ON(dm_bufio_in_request()); 1266 1267 dm_bufio_lock(c); 1268 __write_dirty_buffers_async(c, 0, &write_list); 1269 dm_bufio_unlock(c); 1270 __flush_write_list(&write_list); 1271 } 1272 EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async); 1273 1274 /* 1275 * For performance, it is essential that the buffers are written asynchronously 1276 * and simultaneously (so that the block layer can merge the writes) and then 1277 * waited upon. 1278 * 1279 * Finally, we flush hardware disk cache. 1280 */ 1281 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) 1282 { 1283 int a, f; 1284 unsigned long buffers_processed = 0; 1285 struct dm_buffer *b, *tmp; 1286 1287 LIST_HEAD(write_list); 1288 1289 dm_bufio_lock(c); 1290 __write_dirty_buffers_async(c, 0, &write_list); 1291 dm_bufio_unlock(c); 1292 __flush_write_list(&write_list); 1293 dm_bufio_lock(c); 1294 1295 again: 1296 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) { 1297 int dropped_lock = 0; 1298 1299 if (buffers_processed < c->n_buffers[LIST_DIRTY]) 1300 buffers_processed++; 1301 1302 BUG_ON(test_bit(B_READING, &b->state)); 1303 1304 if (test_bit(B_WRITING, &b->state)) { 1305 if (buffers_processed < c->n_buffers[LIST_DIRTY]) { 1306 dropped_lock = 1; 1307 b->hold_count++; 1308 dm_bufio_unlock(c); 1309 wait_on_bit_io(&b->state, B_WRITING, 1310 TASK_UNINTERRUPTIBLE); 1311 dm_bufio_lock(c); 1312 b->hold_count--; 1313 } else 1314 wait_on_bit_io(&b->state, B_WRITING, 1315 TASK_UNINTERRUPTIBLE); 1316 } 1317 1318 if (!test_bit(B_DIRTY, &b->state) && 1319 !test_bit(B_WRITING, &b->state)) 1320 __relink_lru(b, LIST_CLEAN); 1321 1322 cond_resched(); 1323 1324 /* 1325 * If we dropped the lock, the list is no longer consistent, 1326 * so we must restart the search. 1327 * 1328 * In the most common case, the buffer just processed is 1329 * relinked to the clean list, so we won't loop scanning the 1330 * same buffer again and again. 1331 * 1332 * This may livelock if there is another thread simultaneously 1333 * dirtying buffers, so we count the number of buffers walked 1334 * and if it exceeds the total number of buffers, it means that 1335 * someone is doing some writes simultaneously with us. In 1336 * this case, stop, dropping the lock. 1337 */ 1338 if (dropped_lock) 1339 goto again; 1340 } 1341 wake_up(&c->free_buffer_wait); 1342 dm_bufio_unlock(c); 1343 1344 a = xchg(&c->async_write_error, 0); 1345 f = dm_bufio_issue_flush(c); 1346 if (a) 1347 return a; 1348 1349 return f; 1350 } 1351 EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); 1352 1353 /* 1354 * Use dm-io to send an empty barrier to flush the device. 1355 */ 1356 int dm_bufio_issue_flush(struct dm_bufio_client *c) 1357 { 1358 struct dm_io_request io_req = { 1359 .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, 1360 .mem.type = DM_IO_KMEM, 1361 .mem.ptr.addr = NULL, 1362 .client = c->dm_io, 1363 }; 1364 struct dm_io_region io_reg = { 1365 .bdev = c->bdev, 1366 .sector = 0, 1367 .count = 0, 1368 }; 1369 1370 BUG_ON(dm_bufio_in_request()); 1371 1372 return dm_io(&io_req, 1, &io_reg, NULL); 1373 } 1374 EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); 1375 1376 /* 1377 * Use dm-io to send a discard request to flush the device. 1378 */ 1379 int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) 1380 { 1381 struct dm_io_request io_req = { 1382 .bi_opf = REQ_OP_DISCARD | REQ_SYNC, 1383 .mem.type = DM_IO_KMEM, 1384 .mem.ptr.addr = NULL, 1385 .client = c->dm_io, 1386 }; 1387 struct dm_io_region io_reg = { 1388 .bdev = c->bdev, 1389 .sector = block_to_sector(c, block), 1390 .count = block_to_sector(c, count), 1391 }; 1392 1393 BUG_ON(dm_bufio_in_request()); 1394 1395 return dm_io(&io_req, 1, &io_reg, NULL); 1396 } 1397 EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); 1398 1399 /* 1400 * We first delete any other buffer that may be at that new location. 1401 * 1402 * Then, we write the buffer to the original location if it was dirty. 1403 * 1404 * Then, if we are the only one who is holding the buffer, relink the buffer 1405 * in the buffer tree for the new location. 1406 * 1407 * If there was someone else holding the buffer, we write it to the new 1408 * location but not relink it, because that other user needs to have the buffer 1409 * at the same place. 1410 */ 1411 void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block) 1412 { 1413 struct dm_bufio_client *c = b->c; 1414 struct dm_buffer *new; 1415 1416 BUG_ON(dm_bufio_in_request()); 1417 1418 dm_bufio_lock(c); 1419 1420 retry: 1421 new = __find(c, new_block); 1422 if (new) { 1423 if (new->hold_count) { 1424 __wait_for_free_buffer(c); 1425 goto retry; 1426 } 1427 1428 /* 1429 * FIXME: Is there any point waiting for a write that's going 1430 * to be overwritten in a bit? 1431 */ 1432 __make_buffer_clean(new); 1433 __unlink_buffer(new); 1434 __free_buffer_wake(new); 1435 } 1436 1437 BUG_ON(!b->hold_count); 1438 BUG_ON(test_bit(B_READING, &b->state)); 1439 1440 __write_dirty_buffer(b, NULL); 1441 if (b->hold_count == 1) { 1442 wait_on_bit_io(&b->state, B_WRITING, 1443 TASK_UNINTERRUPTIBLE); 1444 set_bit(B_DIRTY, &b->state); 1445 b->dirty_start = 0; 1446 b->dirty_end = c->block_size; 1447 __unlink_buffer(b); 1448 __link_buffer(b, new_block, LIST_DIRTY); 1449 } else { 1450 sector_t old_block; 1451 wait_on_bit_lock_io(&b->state, B_WRITING, 1452 TASK_UNINTERRUPTIBLE); 1453 /* 1454 * Relink buffer to "new_block" so that write_callback 1455 * sees "new_block" as a block number. 1456 * After the write, link the buffer back to old_block. 1457 * All this must be done in bufio lock, so that block number 1458 * change isn't visible to other threads. 1459 */ 1460 old_block = b->block; 1461 __unlink_buffer(b); 1462 __link_buffer(b, new_block, b->list_mode); 1463 submit_io(b, REQ_OP_WRITE, write_endio); 1464 wait_on_bit_io(&b->state, B_WRITING, 1465 TASK_UNINTERRUPTIBLE); 1466 __unlink_buffer(b); 1467 __link_buffer(b, old_block, b->list_mode); 1468 } 1469 1470 dm_bufio_unlock(c); 1471 dm_bufio_release(b); 1472 } 1473 EXPORT_SYMBOL_GPL(dm_bufio_release_move); 1474 1475 static void forget_buffer_locked(struct dm_buffer *b) 1476 { 1477 if (likely(!b->hold_count) && likely(!b->state)) { 1478 __unlink_buffer(b); 1479 __free_buffer_wake(b); 1480 } 1481 } 1482 1483 /* 1484 * Free the given buffer. 1485 * 1486 * This is just a hint, if the buffer is in use or dirty, this function 1487 * does nothing. 1488 */ 1489 void dm_bufio_forget(struct dm_bufio_client *c, sector_t block) 1490 { 1491 struct dm_buffer *b; 1492 1493 dm_bufio_lock(c); 1494 1495 b = __find(c, block); 1496 if (b) 1497 forget_buffer_locked(b); 1498 1499 dm_bufio_unlock(c); 1500 } 1501 EXPORT_SYMBOL_GPL(dm_bufio_forget); 1502 1503 void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks) 1504 { 1505 struct dm_buffer *b; 1506 sector_t end_block = block + n_blocks; 1507 1508 while (block < end_block) { 1509 dm_bufio_lock(c); 1510 1511 b = __find_next(c, block); 1512 if (b) { 1513 block = b->block + 1; 1514 forget_buffer_locked(b); 1515 } 1516 1517 dm_bufio_unlock(c); 1518 1519 if (!b) 1520 break; 1521 } 1522 1523 } 1524 EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers); 1525 1526 void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n) 1527 { 1528 c->minimum_buffers = n; 1529 } 1530 EXPORT_SYMBOL_GPL(dm_bufio_set_minimum_buffers); 1531 1532 unsigned dm_bufio_get_block_size(struct dm_bufio_client *c) 1533 { 1534 return c->block_size; 1535 } 1536 EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); 1537 1538 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) 1539 { 1540 sector_t s = bdev_nr_sectors(c->bdev); 1541 if (s >= c->start) 1542 s -= c->start; 1543 else 1544 s = 0; 1545 if (likely(c->sectors_per_block_bits >= 0)) 1546 s >>= c->sectors_per_block_bits; 1547 else 1548 sector_div(s, c->block_size >> SECTOR_SHIFT); 1549 return s; 1550 } 1551 EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); 1552 1553 struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) 1554 { 1555 return c->dm_io; 1556 } 1557 EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); 1558 1559 sector_t dm_bufio_get_block_number(struct dm_buffer *b) 1560 { 1561 return b->block; 1562 } 1563 EXPORT_SYMBOL_GPL(dm_bufio_get_block_number); 1564 1565 void *dm_bufio_get_block_data(struct dm_buffer *b) 1566 { 1567 return b->data; 1568 } 1569 EXPORT_SYMBOL_GPL(dm_bufio_get_block_data); 1570 1571 void *dm_bufio_get_aux_data(struct dm_buffer *b) 1572 { 1573 return b + 1; 1574 } 1575 EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data); 1576 1577 struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b) 1578 { 1579 return b->c; 1580 } 1581 EXPORT_SYMBOL_GPL(dm_bufio_get_client); 1582 1583 static void drop_buffers(struct dm_bufio_client *c) 1584 { 1585 struct dm_buffer *b; 1586 int i; 1587 bool warned = false; 1588 1589 BUG_ON(dm_bufio_in_request()); 1590 1591 /* 1592 * An optimization so that the buffers are not written one-by-one. 1593 */ 1594 dm_bufio_write_dirty_buffers_async(c); 1595 1596 dm_bufio_lock(c); 1597 1598 while ((b = __get_unclaimed_buffer(c))) 1599 __free_buffer_wake(b); 1600 1601 for (i = 0; i < LIST_SIZE; i++) 1602 list_for_each_entry(b, &c->lru[i], lru_list) { 1603 WARN_ON(!warned); 1604 warned = true; 1605 DMERR("leaked buffer %llx, hold count %u, list %d", 1606 (unsigned long long)b->block, b->hold_count, i); 1607 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 1608 stack_trace_print(b->stack_entries, b->stack_len, 1); 1609 /* mark unclaimed to avoid BUG_ON below */ 1610 b->hold_count = 0; 1611 #endif 1612 } 1613 1614 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 1615 while ((b = __get_unclaimed_buffer(c))) 1616 __free_buffer_wake(b); 1617 #endif 1618 1619 for (i = 0; i < LIST_SIZE; i++) 1620 BUG_ON(!list_empty(&c->lru[i])); 1621 1622 dm_bufio_unlock(c); 1623 } 1624 1625 /* 1626 * We may not be able to evict this buffer if IO pending or the client 1627 * is still using it. Caller is expected to know buffer is too old. 1628 * 1629 * And if GFP_NOFS is used, we must not do any I/O because we hold 1630 * dm_bufio_clients_lock and we would risk deadlock if the I/O gets 1631 * rerouted to different bufio client. 1632 */ 1633 static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) 1634 { 1635 if (!(gfp & __GFP_FS)) { 1636 if (test_bit(B_READING, &b->state) || 1637 test_bit(B_WRITING, &b->state) || 1638 test_bit(B_DIRTY, &b->state)) 1639 return false; 1640 } 1641 1642 if (b->hold_count) 1643 return false; 1644 1645 __make_buffer_clean(b); 1646 __unlink_buffer(b); 1647 __free_buffer_wake(b); 1648 1649 return true; 1650 } 1651 1652 static unsigned long get_retain_buffers(struct dm_bufio_client *c) 1653 { 1654 unsigned long retain_bytes = READ_ONCE(dm_bufio_retain_bytes); 1655 if (likely(c->sectors_per_block_bits >= 0)) 1656 retain_bytes >>= c->sectors_per_block_bits + SECTOR_SHIFT; 1657 else 1658 retain_bytes /= c->block_size; 1659 return retain_bytes; 1660 } 1661 1662 static void __scan(struct dm_bufio_client *c) 1663 { 1664 int l; 1665 struct dm_buffer *b, *tmp; 1666 unsigned long freed = 0; 1667 unsigned long count = c->n_buffers[LIST_CLEAN] + 1668 c->n_buffers[LIST_DIRTY]; 1669 unsigned long retain_target = get_retain_buffers(c); 1670 1671 for (l = 0; l < LIST_SIZE; l++) { 1672 list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { 1673 if (count - freed <= retain_target) 1674 atomic_long_set(&c->need_shrink, 0); 1675 if (!atomic_long_read(&c->need_shrink)) 1676 return; 1677 if (__try_evict_buffer(b, GFP_KERNEL)) { 1678 atomic_long_dec(&c->need_shrink); 1679 freed++; 1680 } 1681 cond_resched(); 1682 } 1683 } 1684 } 1685 1686 static void shrink_work(struct work_struct *w) 1687 { 1688 struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work); 1689 1690 dm_bufio_lock(c); 1691 __scan(c); 1692 dm_bufio_unlock(c); 1693 } 1694 1695 static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) 1696 { 1697 struct dm_bufio_client *c; 1698 1699 c = container_of(shrink, struct dm_bufio_client, shrinker); 1700 atomic_long_add(sc->nr_to_scan, &c->need_shrink); 1701 queue_work(dm_bufio_wq, &c->shrink_work); 1702 1703 return sc->nr_to_scan; 1704 } 1705 1706 static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 1707 { 1708 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); 1709 unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + 1710 READ_ONCE(c->n_buffers[LIST_DIRTY]); 1711 unsigned long retain_target = get_retain_buffers(c); 1712 unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); 1713 1714 if (unlikely(count < retain_target)) 1715 count = 0; 1716 else 1717 count -= retain_target; 1718 1719 if (unlikely(count < queued_for_cleanup)) 1720 count = 0; 1721 else 1722 count -= queued_for_cleanup; 1723 1724 return count; 1725 } 1726 1727 /* 1728 * Create the buffering interface 1729 */ 1730 struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size, 1731 unsigned reserved_buffers, unsigned aux_size, 1732 void (*alloc_callback)(struct dm_buffer *), 1733 void (*write_callback)(struct dm_buffer *), 1734 unsigned int flags) 1735 { 1736 int r; 1737 struct dm_bufio_client *c; 1738 unsigned i; 1739 char slab_name[27]; 1740 1741 if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { 1742 DMERR("%s: block size not specified or is not multiple of 512b", __func__); 1743 r = -EINVAL; 1744 goto bad_client; 1745 } 1746 1747 c = kzalloc(sizeof(*c), GFP_KERNEL); 1748 if (!c) { 1749 r = -ENOMEM; 1750 goto bad_client; 1751 } 1752 c->buffer_tree = RB_ROOT; 1753 1754 c->bdev = bdev; 1755 c->block_size = block_size; 1756 if (is_power_of_2(block_size)) 1757 c->sectors_per_block_bits = __ffs(block_size) - SECTOR_SHIFT; 1758 else 1759 c->sectors_per_block_bits = -1; 1760 1761 c->alloc_callback = alloc_callback; 1762 c->write_callback = write_callback; 1763 1764 if (flags & DM_BUFIO_CLIENT_NO_SLEEP) { 1765 c->no_sleep = true; 1766 static_branch_inc(&no_sleep_enabled); 1767 } 1768 1769 for (i = 0; i < LIST_SIZE; i++) { 1770 INIT_LIST_HEAD(&c->lru[i]); 1771 c->n_buffers[i] = 0; 1772 } 1773 1774 mutex_init(&c->lock); 1775 spin_lock_init(&c->spinlock); 1776 INIT_LIST_HEAD(&c->reserved_buffers); 1777 c->need_reserved_buffers = reserved_buffers; 1778 1779 dm_bufio_set_minimum_buffers(c, DM_BUFIO_MIN_BUFFERS); 1780 1781 init_waitqueue_head(&c->free_buffer_wait); 1782 c->async_write_error = 0; 1783 1784 c->dm_io = dm_io_client_create(); 1785 if (IS_ERR(c->dm_io)) { 1786 r = PTR_ERR(c->dm_io); 1787 goto bad_dm_io; 1788 } 1789 1790 if (block_size <= KMALLOC_MAX_SIZE && 1791 (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { 1792 unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE); 1793 snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size); 1794 c->slab_cache = kmem_cache_create(slab_name, block_size, align, 1795 SLAB_RECLAIM_ACCOUNT, NULL); 1796 if (!c->slab_cache) { 1797 r = -ENOMEM; 1798 goto bad; 1799 } 1800 } 1801 if (aux_size) 1802 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer-%u", aux_size); 1803 else 1804 snprintf(slab_name, sizeof slab_name, "dm_bufio_buffer"); 1805 c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, 1806 0, SLAB_RECLAIM_ACCOUNT, NULL); 1807 if (!c->slab_buffer) { 1808 r = -ENOMEM; 1809 goto bad; 1810 } 1811 1812 while (c->need_reserved_buffers) { 1813 struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL); 1814 1815 if (!b) { 1816 r = -ENOMEM; 1817 goto bad; 1818 } 1819 __free_buffer_wake(b); 1820 } 1821 1822 INIT_WORK(&c->shrink_work, shrink_work); 1823 atomic_long_set(&c->need_shrink, 0); 1824 1825 c->shrinker.count_objects = dm_bufio_shrink_count; 1826 c->shrinker.scan_objects = dm_bufio_shrink_scan; 1827 c->shrinker.seeks = 1; 1828 c->shrinker.batch = 0; 1829 r = register_shrinker(&c->shrinker, "md-%s:(%u:%u)", slab_name, 1830 MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); 1831 if (r) 1832 goto bad; 1833 1834 mutex_lock(&dm_bufio_clients_lock); 1835 dm_bufio_client_count++; 1836 list_add(&c->client_list, &dm_bufio_all_clients); 1837 __cache_size_refresh(); 1838 mutex_unlock(&dm_bufio_clients_lock); 1839 1840 return c; 1841 1842 bad: 1843 while (!list_empty(&c->reserved_buffers)) { 1844 struct dm_buffer *b = list_entry(c->reserved_buffers.next, 1845 struct dm_buffer, lru_list); 1846 list_del(&b->lru_list); 1847 free_buffer(b); 1848 } 1849 kmem_cache_destroy(c->slab_cache); 1850 kmem_cache_destroy(c->slab_buffer); 1851 dm_io_client_destroy(c->dm_io); 1852 bad_dm_io: 1853 mutex_destroy(&c->lock); 1854 kfree(c); 1855 bad_client: 1856 return ERR_PTR(r); 1857 } 1858 EXPORT_SYMBOL_GPL(dm_bufio_client_create); 1859 1860 /* 1861 * Free the buffering interface. 1862 * It is required that there are no references on any buffers. 1863 */ 1864 void dm_bufio_client_destroy(struct dm_bufio_client *c) 1865 { 1866 unsigned i; 1867 1868 drop_buffers(c); 1869 1870 unregister_shrinker(&c->shrinker); 1871 flush_work(&c->shrink_work); 1872 1873 mutex_lock(&dm_bufio_clients_lock); 1874 1875 list_del(&c->client_list); 1876 dm_bufio_client_count--; 1877 __cache_size_refresh(); 1878 1879 mutex_unlock(&dm_bufio_clients_lock); 1880 1881 BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree)); 1882 BUG_ON(c->need_reserved_buffers); 1883 1884 while (!list_empty(&c->reserved_buffers)) { 1885 struct dm_buffer *b = list_entry(c->reserved_buffers.next, 1886 struct dm_buffer, lru_list); 1887 list_del(&b->lru_list); 1888 free_buffer(b); 1889 } 1890 1891 for (i = 0; i < LIST_SIZE; i++) 1892 if (c->n_buffers[i]) 1893 DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]); 1894 1895 for (i = 0; i < LIST_SIZE; i++) 1896 BUG_ON(c->n_buffers[i]); 1897 1898 kmem_cache_destroy(c->slab_cache); 1899 kmem_cache_destroy(c->slab_buffer); 1900 dm_io_client_destroy(c->dm_io); 1901 mutex_destroy(&c->lock); 1902 if (c->no_sleep) 1903 static_branch_dec(&no_sleep_enabled); 1904 kfree(c); 1905 } 1906 EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); 1907 1908 void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) 1909 { 1910 c->start = start; 1911 } 1912 EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); 1913 1914 static unsigned get_max_age_hz(void) 1915 { 1916 unsigned max_age = READ_ONCE(dm_bufio_max_age); 1917 1918 if (max_age > UINT_MAX / HZ) 1919 max_age = UINT_MAX / HZ; 1920 1921 return max_age * HZ; 1922 } 1923 1924 static bool older_than(struct dm_buffer *b, unsigned long age_hz) 1925 { 1926 return time_after_eq(jiffies, b->last_accessed + age_hz); 1927 } 1928 1929 static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) 1930 { 1931 struct dm_buffer *b, *tmp; 1932 unsigned long retain_target = get_retain_buffers(c); 1933 unsigned long count; 1934 LIST_HEAD(write_list); 1935 1936 dm_bufio_lock(c); 1937 1938 __check_watermark(c, &write_list); 1939 if (unlikely(!list_empty(&write_list))) { 1940 dm_bufio_unlock(c); 1941 __flush_write_list(&write_list); 1942 dm_bufio_lock(c); 1943 } 1944 1945 count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; 1946 list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) { 1947 if (count <= retain_target) 1948 break; 1949 1950 if (!older_than(b, age_hz)) 1951 break; 1952 1953 if (__try_evict_buffer(b, 0)) 1954 count--; 1955 1956 cond_resched(); 1957 } 1958 1959 dm_bufio_unlock(c); 1960 } 1961 1962 static void do_global_cleanup(struct work_struct *w) 1963 { 1964 struct dm_bufio_client *locked_client = NULL; 1965 struct dm_bufio_client *current_client; 1966 struct dm_buffer *b; 1967 unsigned spinlock_hold_count; 1968 unsigned long threshold = dm_bufio_cache_size - 1969 dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO; 1970 unsigned long loops = global_num * 2; 1971 1972 mutex_lock(&dm_bufio_clients_lock); 1973 1974 while (1) { 1975 cond_resched(); 1976 1977 spin_lock(&global_spinlock); 1978 if (unlikely(dm_bufio_current_allocated <= threshold)) 1979 break; 1980 1981 spinlock_hold_count = 0; 1982 get_next: 1983 if (!loops--) 1984 break; 1985 if (unlikely(list_empty(&global_queue))) 1986 break; 1987 b = list_entry(global_queue.prev, struct dm_buffer, global_list); 1988 1989 if (b->accessed) { 1990 b->accessed = 0; 1991 list_move(&b->global_list, &global_queue); 1992 if (likely(++spinlock_hold_count < 16)) 1993 goto get_next; 1994 spin_unlock(&global_spinlock); 1995 continue; 1996 } 1997 1998 current_client = b->c; 1999 if (unlikely(current_client != locked_client)) { 2000 if (locked_client) 2001 dm_bufio_unlock(locked_client); 2002 2003 if (!dm_bufio_trylock(current_client)) { 2004 spin_unlock(&global_spinlock); 2005 dm_bufio_lock(current_client); 2006 locked_client = current_client; 2007 continue; 2008 } 2009 2010 locked_client = current_client; 2011 } 2012 2013 spin_unlock(&global_spinlock); 2014 2015 if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) { 2016 spin_lock(&global_spinlock); 2017 list_move(&b->global_list, &global_queue); 2018 spin_unlock(&global_spinlock); 2019 } 2020 } 2021 2022 spin_unlock(&global_spinlock); 2023 2024 if (locked_client) 2025 dm_bufio_unlock(locked_client); 2026 2027 mutex_unlock(&dm_bufio_clients_lock); 2028 } 2029 2030 static void cleanup_old_buffers(void) 2031 { 2032 unsigned long max_age_hz = get_max_age_hz(); 2033 struct dm_bufio_client *c; 2034 2035 mutex_lock(&dm_bufio_clients_lock); 2036 2037 __cache_size_refresh(); 2038 2039 list_for_each_entry(c, &dm_bufio_all_clients, client_list) 2040 __evict_old_buffers(c, max_age_hz); 2041 2042 mutex_unlock(&dm_bufio_clients_lock); 2043 } 2044 2045 static void work_fn(struct work_struct *w) 2046 { 2047 cleanup_old_buffers(); 2048 2049 queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 2050 DM_BUFIO_WORK_TIMER_SECS * HZ); 2051 } 2052 2053 /*---------------------------------------------------------------- 2054 * Module setup 2055 *--------------------------------------------------------------*/ 2056 2057 /* 2058 * This is called only once for the whole dm_bufio module. 2059 * It initializes memory limit. 2060 */ 2061 static int __init dm_bufio_init(void) 2062 { 2063 __u64 mem; 2064 2065 dm_bufio_allocated_kmem_cache = 0; 2066 dm_bufio_allocated_get_free_pages = 0; 2067 dm_bufio_allocated_vmalloc = 0; 2068 dm_bufio_current_allocated = 0; 2069 2070 mem = (__u64)mult_frac(totalram_pages() - totalhigh_pages(), 2071 DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; 2072 2073 if (mem > ULONG_MAX) 2074 mem = ULONG_MAX; 2075 2076 #ifdef CONFIG_MMU 2077 if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100)) 2078 mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100); 2079 #endif 2080 2081 dm_bufio_default_cache_size = mem; 2082 2083 mutex_lock(&dm_bufio_clients_lock); 2084 __cache_size_refresh(); 2085 mutex_unlock(&dm_bufio_clients_lock); 2086 2087 dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); 2088 if (!dm_bufio_wq) 2089 return -ENOMEM; 2090 2091 INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); 2092 INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); 2093 queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 2094 DM_BUFIO_WORK_TIMER_SECS * HZ); 2095 2096 return 0; 2097 } 2098 2099 /* 2100 * This is called once when unloading the dm_bufio module. 2101 */ 2102 static void __exit dm_bufio_exit(void) 2103 { 2104 int bug = 0; 2105 2106 cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); 2107 destroy_workqueue(dm_bufio_wq); 2108 2109 if (dm_bufio_client_count) { 2110 DMCRIT("%s: dm_bufio_client_count leaked: %d", 2111 __func__, dm_bufio_client_count); 2112 bug = 1; 2113 } 2114 2115 if (dm_bufio_current_allocated) { 2116 DMCRIT("%s: dm_bufio_current_allocated leaked: %lu", 2117 __func__, dm_bufio_current_allocated); 2118 bug = 1; 2119 } 2120 2121 if (dm_bufio_allocated_get_free_pages) { 2122 DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu", 2123 __func__, dm_bufio_allocated_get_free_pages); 2124 bug = 1; 2125 } 2126 2127 if (dm_bufio_allocated_vmalloc) { 2128 DMCRIT("%s: dm_bufio_vmalloc leaked: %lu", 2129 __func__, dm_bufio_allocated_vmalloc); 2130 bug = 1; 2131 } 2132 2133 BUG_ON(bug); 2134 } 2135 2136 module_init(dm_bufio_init) 2137 module_exit(dm_bufio_exit) 2138 2139 module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR); 2140 MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); 2141 2142 module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); 2143 MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); 2144 2145 module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR); 2146 MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); 2147 2148 module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); 2149 MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); 2150 2151 module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO); 2152 MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc"); 2153 2154 module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO); 2155 MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages"); 2156 2157 module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO); 2158 MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc"); 2159 2160 module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO); 2161 MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache"); 2162 2163 MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); 2164 MODULE_DESCRIPTION(DM_NAME " buffered I/O library"); 2165 MODULE_LICENSE("GPL"); 2166