1 /* 2 * Copyright (C) 2015 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-cache-background-tracker.h" 8 #include "dm-cache-policy-internal.h" 9 #include "dm-cache-policy.h" 10 #include "dm.h" 11 12 #include <linux/hash.h> 13 #include <linux/jiffies.h> 14 #include <linux/module.h> 15 #include <linux/mutex.h> 16 #include <linux/vmalloc.h> 17 #include <linux/math64.h> 18 19 #define DM_MSG_PREFIX "cache-policy-smq" 20 21 /*----------------------------------------------------------------*/ 22 23 /* 24 * Safe division functions that return zero on divide by zero. 25 */ 26 static unsigned safe_div(unsigned n, unsigned d) 27 { 28 return d ? n / d : 0u; 29 } 30 31 static unsigned safe_mod(unsigned n, unsigned d) 32 { 33 return d ? n % d : 0u; 34 } 35 36 /*----------------------------------------------------------------*/ 37 38 struct entry { 39 unsigned hash_next:28; 40 unsigned prev:28; 41 unsigned next:28; 42 unsigned level:6; 43 bool dirty:1; 44 bool allocated:1; 45 bool sentinel:1; 46 bool pending_work:1; 47 48 dm_oblock_t oblock; 49 }; 50 51 /*----------------------------------------------------------------*/ 52 53 #define INDEXER_NULL ((1u << 28u) - 1u) 54 55 /* 56 * An entry_space manages a set of entries that we use for the queues. 57 * The clean and dirty queues share entries, so this object is separate 58 * from the queue itself. 59 */ 60 struct entry_space { 61 struct entry *begin; 62 struct entry *end; 63 }; 64 65 static int space_init(struct entry_space *es, unsigned nr_entries) 66 { 67 if (!nr_entries) { 68 es->begin = es->end = NULL; 69 return 0; 70 } 71 72 es->begin = vzalloc(sizeof(struct entry) * nr_entries); 73 if (!es->begin) 74 return -ENOMEM; 75 76 es->end = es->begin + nr_entries; 77 return 0; 78 } 79 80 static void space_exit(struct entry_space *es) 81 { 82 vfree(es->begin); 83 } 84 85 static struct entry *__get_entry(struct entry_space *es, unsigned block) 86 { 87 struct entry *e; 88 89 e = es->begin + block; 90 BUG_ON(e >= es->end); 91 92 return e; 93 } 94 95 static unsigned to_index(struct entry_space *es, struct entry *e) 96 { 97 BUG_ON(e < es->begin || e >= es->end); 98 return e - es->begin; 99 } 100 101 static struct entry *to_entry(struct entry_space *es, unsigned block) 102 { 103 if (block == INDEXER_NULL) 104 return NULL; 105 106 return __get_entry(es, block); 107 } 108 109 /*----------------------------------------------------------------*/ 110 111 struct ilist { 112 unsigned nr_elts; /* excluding sentinel entries */ 113 unsigned head, tail; 114 }; 115 116 static void l_init(struct ilist *l) 117 { 118 l->nr_elts = 0; 119 l->head = l->tail = INDEXER_NULL; 120 } 121 122 static struct entry *l_head(struct entry_space *es, struct ilist *l) 123 { 124 return to_entry(es, l->head); 125 } 126 127 static struct entry *l_tail(struct entry_space *es, struct ilist *l) 128 { 129 return to_entry(es, l->tail); 130 } 131 132 static struct entry *l_next(struct entry_space *es, struct entry *e) 133 { 134 return to_entry(es, e->next); 135 } 136 137 static struct entry *l_prev(struct entry_space *es, struct entry *e) 138 { 139 return to_entry(es, e->prev); 140 } 141 142 static bool l_empty(struct ilist *l) 143 { 144 return l->head == INDEXER_NULL; 145 } 146 147 static void l_add_head(struct entry_space *es, struct ilist *l, struct entry *e) 148 { 149 struct entry *head = l_head(es, l); 150 151 e->next = l->head; 152 e->prev = INDEXER_NULL; 153 154 if (head) 155 head->prev = l->head = to_index(es, e); 156 else 157 l->head = l->tail = to_index(es, e); 158 159 if (!e->sentinel) 160 l->nr_elts++; 161 } 162 163 static void l_add_tail(struct entry_space *es, struct ilist *l, struct entry *e) 164 { 165 struct entry *tail = l_tail(es, l); 166 167 e->next = INDEXER_NULL; 168 e->prev = l->tail; 169 170 if (tail) 171 tail->next = l->tail = to_index(es, e); 172 else 173 l->head = l->tail = to_index(es, e); 174 175 if (!e->sentinel) 176 l->nr_elts++; 177 } 178 179 static void l_add_before(struct entry_space *es, struct ilist *l, 180 struct entry *old, struct entry *e) 181 { 182 struct entry *prev = l_prev(es, old); 183 184 if (!prev) 185 l_add_head(es, l, e); 186 187 else { 188 e->prev = old->prev; 189 e->next = to_index(es, old); 190 prev->next = old->prev = to_index(es, e); 191 192 if (!e->sentinel) 193 l->nr_elts++; 194 } 195 } 196 197 static void l_del(struct entry_space *es, struct ilist *l, struct entry *e) 198 { 199 struct entry *prev = l_prev(es, e); 200 struct entry *next = l_next(es, e); 201 202 if (prev) 203 prev->next = e->next; 204 else 205 l->head = e->next; 206 207 if (next) 208 next->prev = e->prev; 209 else 210 l->tail = e->prev; 211 212 if (!e->sentinel) 213 l->nr_elts--; 214 } 215 216 static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l) 217 { 218 struct entry *e; 219 220 for (e = l_tail(es, l); e; e = l_prev(es, e)) 221 if (!e->sentinel) { 222 l_del(es, l, e); 223 return e; 224 } 225 226 return NULL; 227 } 228 229 /*----------------------------------------------------------------*/ 230 231 /* 232 * The stochastic-multi-queue is a set of lru lists stacked into levels. 233 * Entries are moved up levels when they are used, which loosely orders the 234 * most accessed entries in the top levels and least in the bottom. This 235 * structure is *much* better than a single lru list. 236 */ 237 #define MAX_LEVELS 64u 238 239 struct queue { 240 struct entry_space *es; 241 242 unsigned nr_elts; 243 unsigned nr_levels; 244 struct ilist qs[MAX_LEVELS]; 245 246 /* 247 * We maintain a count of the number of entries we would like in each 248 * level. 249 */ 250 unsigned last_target_nr_elts; 251 unsigned nr_top_levels; 252 unsigned nr_in_top_levels; 253 unsigned target_count[MAX_LEVELS]; 254 }; 255 256 static void q_init(struct queue *q, struct entry_space *es, unsigned nr_levels) 257 { 258 unsigned i; 259 260 q->es = es; 261 q->nr_elts = 0; 262 q->nr_levels = nr_levels; 263 264 for (i = 0; i < q->nr_levels; i++) { 265 l_init(q->qs + i); 266 q->target_count[i] = 0u; 267 } 268 269 q->last_target_nr_elts = 0u; 270 q->nr_top_levels = 0u; 271 q->nr_in_top_levels = 0u; 272 } 273 274 static unsigned q_size(struct queue *q) 275 { 276 return q->nr_elts; 277 } 278 279 /* 280 * Insert an entry to the back of the given level. 281 */ 282 static void q_push(struct queue *q, struct entry *e) 283 { 284 BUG_ON(e->pending_work); 285 286 if (!e->sentinel) 287 q->nr_elts++; 288 289 l_add_tail(q->es, q->qs + e->level, e); 290 } 291 292 static void q_push_front(struct queue *q, struct entry *e) 293 { 294 BUG_ON(e->pending_work); 295 296 if (!e->sentinel) 297 q->nr_elts++; 298 299 l_add_head(q->es, q->qs + e->level, e); 300 } 301 302 static void q_push_before(struct queue *q, struct entry *old, struct entry *e) 303 { 304 BUG_ON(e->pending_work); 305 306 if (!e->sentinel) 307 q->nr_elts++; 308 309 l_add_before(q->es, q->qs + e->level, old, e); 310 } 311 312 static void q_del(struct queue *q, struct entry *e) 313 { 314 l_del(q->es, q->qs + e->level, e); 315 if (!e->sentinel) 316 q->nr_elts--; 317 } 318 319 /* 320 * Return the oldest entry of the lowest populated level. 321 */ 322 static struct entry *q_peek(struct queue *q, unsigned max_level, bool can_cross_sentinel) 323 { 324 unsigned level; 325 struct entry *e; 326 327 max_level = min(max_level, q->nr_levels); 328 329 for (level = 0; level < max_level; level++) 330 for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) { 331 if (e->sentinel) { 332 if (can_cross_sentinel) 333 continue; 334 else 335 break; 336 } 337 338 return e; 339 } 340 341 return NULL; 342 } 343 344 static struct entry *q_pop(struct queue *q) 345 { 346 struct entry *e = q_peek(q, q->nr_levels, true); 347 348 if (e) 349 q_del(q, e); 350 351 return e; 352 } 353 354 /* 355 * This function assumes there is a non-sentinel entry to pop. It's only 356 * used by redistribute, so we know this is true. It also doesn't adjust 357 * the q->nr_elts count. 358 */ 359 static struct entry *__redist_pop_from(struct queue *q, unsigned level) 360 { 361 struct entry *e; 362 363 for (; level < q->nr_levels; level++) 364 for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) 365 if (!e->sentinel) { 366 l_del(q->es, q->qs + e->level, e); 367 return e; 368 } 369 370 return NULL; 371 } 372 373 static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend) 374 { 375 unsigned level, nr_levels, entries_per_level, remainder; 376 377 BUG_ON(lbegin > lend); 378 BUG_ON(lend > q->nr_levels); 379 nr_levels = lend - lbegin; 380 entries_per_level = safe_div(nr_elts, nr_levels); 381 remainder = safe_mod(nr_elts, nr_levels); 382 383 for (level = lbegin; level < lend; level++) 384 q->target_count[level] = 385 (level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level; 386 } 387 388 /* 389 * Typically we have fewer elements in the top few levels which allows us 390 * to adjust the promote threshold nicely. 391 */ 392 static void q_set_targets(struct queue *q) 393 { 394 if (q->last_target_nr_elts == q->nr_elts) 395 return; 396 397 q->last_target_nr_elts = q->nr_elts; 398 399 if (q->nr_top_levels > q->nr_levels) 400 q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels); 401 402 else { 403 q_set_targets_subrange_(q, q->nr_in_top_levels, 404 q->nr_levels - q->nr_top_levels, q->nr_levels); 405 406 if (q->nr_in_top_levels < q->nr_elts) 407 q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels, 408 0, q->nr_levels - q->nr_top_levels); 409 else 410 q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels); 411 } 412 } 413 414 static void q_redistribute(struct queue *q) 415 { 416 unsigned target, level; 417 struct ilist *l, *l_above; 418 struct entry *e; 419 420 q_set_targets(q); 421 422 for (level = 0u; level < q->nr_levels - 1u; level++) { 423 l = q->qs + level; 424 target = q->target_count[level]; 425 426 /* 427 * Pull down some entries from the level above. 428 */ 429 while (l->nr_elts < target) { 430 e = __redist_pop_from(q, level + 1u); 431 if (!e) { 432 /* bug in nr_elts */ 433 break; 434 } 435 436 e->level = level; 437 l_add_tail(q->es, l, e); 438 } 439 440 /* 441 * Push some entries up. 442 */ 443 l_above = q->qs + level + 1u; 444 while (l->nr_elts > target) { 445 e = l_pop_tail(q->es, l); 446 447 if (!e) 448 /* bug in nr_elts */ 449 break; 450 451 e->level = level + 1u; 452 l_add_tail(q->es, l_above, e); 453 } 454 } 455 } 456 457 static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels, 458 struct entry *s1, struct entry *s2) 459 { 460 struct entry *de; 461 unsigned sentinels_passed = 0; 462 unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels); 463 464 /* try and find an entry to swap with */ 465 if (extra_levels && (e->level < q->nr_levels - 1u)) { 466 for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de)) 467 sentinels_passed++; 468 469 if (de) { 470 q_del(q, de); 471 de->level = e->level; 472 if (s1) { 473 switch (sentinels_passed) { 474 case 0: 475 q_push_before(q, s1, de); 476 break; 477 478 case 1: 479 q_push_before(q, s2, de); 480 break; 481 482 default: 483 q_push(q, de); 484 } 485 } else 486 q_push(q, de); 487 } 488 } 489 490 q_del(q, e); 491 e->level = new_level; 492 q_push(q, e); 493 } 494 495 /*----------------------------------------------------------------*/ 496 497 #define FP_SHIFT 8 498 #define SIXTEENTH (1u << (FP_SHIFT - 4u)) 499 #define EIGHTH (1u << (FP_SHIFT - 3u)) 500 501 struct stats { 502 unsigned hit_threshold; 503 unsigned hits; 504 unsigned misses; 505 }; 506 507 enum performance { 508 Q_POOR, 509 Q_FAIR, 510 Q_WELL 511 }; 512 513 static void stats_init(struct stats *s, unsigned nr_levels) 514 { 515 s->hit_threshold = (nr_levels * 3u) / 4u; 516 s->hits = 0u; 517 s->misses = 0u; 518 } 519 520 static void stats_reset(struct stats *s) 521 { 522 s->hits = s->misses = 0u; 523 } 524 525 static void stats_level_accessed(struct stats *s, unsigned level) 526 { 527 if (level >= s->hit_threshold) 528 s->hits++; 529 else 530 s->misses++; 531 } 532 533 static void stats_miss(struct stats *s) 534 { 535 s->misses++; 536 } 537 538 /* 539 * There are times when we don't have any confidence in the hotspot queue. 540 * Such as when a fresh cache is created and the blocks have been spread 541 * out across the levels, or if an io load changes. We detect this by 542 * seeing how often a lookup is in the top levels of the hotspot queue. 543 */ 544 static enum performance stats_assess(struct stats *s) 545 { 546 unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses); 547 548 if (confidence < SIXTEENTH) 549 return Q_POOR; 550 551 else if (confidence < EIGHTH) 552 return Q_FAIR; 553 554 else 555 return Q_WELL; 556 } 557 558 /*----------------------------------------------------------------*/ 559 560 struct smq_hash_table { 561 struct entry_space *es; 562 unsigned long long hash_bits; 563 unsigned *buckets; 564 }; 565 566 /* 567 * All cache entries are stored in a chained hash table. To save space we 568 * use indexing again, and only store indexes to the next entry. 569 */ 570 static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries) 571 { 572 unsigned i, nr_buckets; 573 574 ht->es = es; 575 nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u)); 576 ht->hash_bits = __ffs(nr_buckets); 577 578 ht->buckets = vmalloc(sizeof(*ht->buckets) * nr_buckets); 579 if (!ht->buckets) 580 return -ENOMEM; 581 582 for (i = 0; i < nr_buckets; i++) 583 ht->buckets[i] = INDEXER_NULL; 584 585 return 0; 586 } 587 588 static void h_exit(struct smq_hash_table *ht) 589 { 590 vfree(ht->buckets); 591 } 592 593 static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket) 594 { 595 return to_entry(ht->es, ht->buckets[bucket]); 596 } 597 598 static struct entry *h_next(struct smq_hash_table *ht, struct entry *e) 599 { 600 return to_entry(ht->es, e->hash_next); 601 } 602 603 static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e) 604 { 605 e->hash_next = ht->buckets[bucket]; 606 ht->buckets[bucket] = to_index(ht->es, e); 607 } 608 609 static void h_insert(struct smq_hash_table *ht, struct entry *e) 610 { 611 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); 612 __h_insert(ht, h, e); 613 } 614 615 static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock, 616 struct entry **prev) 617 { 618 struct entry *e; 619 620 *prev = NULL; 621 for (e = h_head(ht, h); e; e = h_next(ht, e)) { 622 if (e->oblock == oblock) 623 return e; 624 625 *prev = e; 626 } 627 628 return NULL; 629 } 630 631 static void __h_unlink(struct smq_hash_table *ht, unsigned h, 632 struct entry *e, struct entry *prev) 633 { 634 if (prev) 635 prev->hash_next = e->hash_next; 636 else 637 ht->buckets[h] = e->hash_next; 638 } 639 640 /* 641 * Also moves each entry to the front of the bucket. 642 */ 643 static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock) 644 { 645 struct entry *e, *prev; 646 unsigned h = hash_64(from_oblock(oblock), ht->hash_bits); 647 648 e = __h_lookup(ht, h, oblock, &prev); 649 if (e && prev) { 650 /* 651 * Move to the front because this entry is likely 652 * to be hit again. 653 */ 654 __h_unlink(ht, h, e, prev); 655 __h_insert(ht, h, e); 656 } 657 658 return e; 659 } 660 661 static void h_remove(struct smq_hash_table *ht, struct entry *e) 662 { 663 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); 664 struct entry *prev; 665 666 /* 667 * The down side of using a singly linked list is we have to 668 * iterate the bucket to remove an item. 669 */ 670 e = __h_lookup(ht, h, e->oblock, &prev); 671 if (e) 672 __h_unlink(ht, h, e, prev); 673 } 674 675 /*----------------------------------------------------------------*/ 676 677 struct entry_alloc { 678 struct entry_space *es; 679 unsigned begin; 680 681 unsigned nr_allocated; 682 struct ilist free; 683 }; 684 685 static void init_allocator(struct entry_alloc *ea, struct entry_space *es, 686 unsigned begin, unsigned end) 687 { 688 unsigned i; 689 690 ea->es = es; 691 ea->nr_allocated = 0u; 692 ea->begin = begin; 693 694 l_init(&ea->free); 695 for (i = begin; i != end; i++) 696 l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i)); 697 } 698 699 static void init_entry(struct entry *e) 700 { 701 /* 702 * We can't memset because that would clear the hotspot and 703 * sentinel bits which remain constant. 704 */ 705 e->hash_next = INDEXER_NULL; 706 e->next = INDEXER_NULL; 707 e->prev = INDEXER_NULL; 708 e->level = 0u; 709 e->dirty = true; /* FIXME: audit */ 710 e->allocated = true; 711 e->sentinel = false; 712 e->pending_work = false; 713 } 714 715 static struct entry *alloc_entry(struct entry_alloc *ea) 716 { 717 struct entry *e; 718 719 if (l_empty(&ea->free)) 720 return NULL; 721 722 e = l_pop_tail(ea->es, &ea->free); 723 init_entry(e); 724 ea->nr_allocated++; 725 726 return e; 727 } 728 729 /* 730 * This assumes the cblock hasn't already been allocated. 731 */ 732 static struct entry *alloc_particular_entry(struct entry_alloc *ea, unsigned i) 733 { 734 struct entry *e = __get_entry(ea->es, ea->begin + i); 735 736 BUG_ON(e->allocated); 737 738 l_del(ea->es, &ea->free, e); 739 init_entry(e); 740 ea->nr_allocated++; 741 742 return e; 743 } 744 745 static void free_entry(struct entry_alloc *ea, struct entry *e) 746 { 747 BUG_ON(!ea->nr_allocated); 748 BUG_ON(!e->allocated); 749 750 ea->nr_allocated--; 751 e->allocated = false; 752 l_add_tail(ea->es, &ea->free, e); 753 } 754 755 static bool allocator_empty(struct entry_alloc *ea) 756 { 757 return l_empty(&ea->free); 758 } 759 760 static unsigned get_index(struct entry_alloc *ea, struct entry *e) 761 { 762 return to_index(ea->es, e) - ea->begin; 763 } 764 765 static struct entry *get_entry(struct entry_alloc *ea, unsigned index) 766 { 767 return __get_entry(ea->es, ea->begin + index); 768 } 769 770 /*----------------------------------------------------------------*/ 771 772 #define NR_HOTSPOT_LEVELS 64u 773 #define NR_CACHE_LEVELS 64u 774 775 #define WRITEBACK_PERIOD (10ul * HZ) 776 #define DEMOTE_PERIOD (60ul * HZ) 777 778 #define HOTSPOT_UPDATE_PERIOD (HZ) 779 #define CACHE_UPDATE_PERIOD (60ul * HZ) 780 781 struct smq_policy { 782 struct dm_cache_policy policy; 783 784 /* protects everything */ 785 spinlock_t lock; 786 dm_cblock_t cache_size; 787 sector_t cache_block_size; 788 789 sector_t hotspot_block_size; 790 unsigned nr_hotspot_blocks; 791 unsigned cache_blocks_per_hotspot_block; 792 unsigned hotspot_level_jump; 793 794 struct entry_space es; 795 struct entry_alloc writeback_sentinel_alloc; 796 struct entry_alloc demote_sentinel_alloc; 797 struct entry_alloc hotspot_alloc; 798 struct entry_alloc cache_alloc; 799 800 unsigned long *hotspot_hit_bits; 801 unsigned long *cache_hit_bits; 802 803 /* 804 * We maintain three queues of entries. The cache proper, 805 * consisting of a clean and dirty queue, containing the currently 806 * active mappings. The hotspot queue uses a larger block size to 807 * track blocks that are being hit frequently and potential 808 * candidates for promotion to the cache. 809 */ 810 struct queue hotspot; 811 struct queue clean; 812 struct queue dirty; 813 814 struct stats hotspot_stats; 815 struct stats cache_stats; 816 817 /* 818 * Keeps track of time, incremented by the core. We use this to 819 * avoid attributing multiple hits within the same tick. 820 */ 821 unsigned tick; 822 823 /* 824 * The hash tables allows us to quickly find an entry by origin 825 * block. 826 */ 827 struct smq_hash_table table; 828 struct smq_hash_table hotspot_table; 829 830 bool current_writeback_sentinels; 831 unsigned long next_writeback_period; 832 833 bool current_demote_sentinels; 834 unsigned long next_demote_period; 835 836 unsigned write_promote_level; 837 unsigned read_promote_level; 838 839 unsigned long next_hotspot_period; 840 unsigned long next_cache_period; 841 842 struct background_tracker *bg_work; 843 844 bool migrations_allowed; 845 }; 846 847 /*----------------------------------------------------------------*/ 848 849 static struct entry *get_sentinel(struct entry_alloc *ea, unsigned level, bool which) 850 { 851 return get_entry(ea, which ? level : NR_CACHE_LEVELS + level); 852 } 853 854 static struct entry *writeback_sentinel(struct smq_policy *mq, unsigned level) 855 { 856 return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels); 857 } 858 859 static struct entry *demote_sentinel(struct smq_policy *mq, unsigned level) 860 { 861 return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels); 862 } 863 864 static void __update_writeback_sentinels(struct smq_policy *mq) 865 { 866 unsigned level; 867 struct queue *q = &mq->dirty; 868 struct entry *sentinel; 869 870 for (level = 0; level < q->nr_levels; level++) { 871 sentinel = writeback_sentinel(mq, level); 872 q_del(q, sentinel); 873 q_push(q, sentinel); 874 } 875 } 876 877 static void __update_demote_sentinels(struct smq_policy *mq) 878 { 879 unsigned level; 880 struct queue *q = &mq->clean; 881 struct entry *sentinel; 882 883 for (level = 0; level < q->nr_levels; level++) { 884 sentinel = demote_sentinel(mq, level); 885 q_del(q, sentinel); 886 q_push(q, sentinel); 887 } 888 } 889 890 static void update_sentinels(struct smq_policy *mq) 891 { 892 if (time_after(jiffies, mq->next_writeback_period)) { 893 mq->next_writeback_period = jiffies + WRITEBACK_PERIOD; 894 mq->current_writeback_sentinels = !mq->current_writeback_sentinels; 895 __update_writeback_sentinels(mq); 896 } 897 898 if (time_after(jiffies, mq->next_demote_period)) { 899 mq->next_demote_period = jiffies + DEMOTE_PERIOD; 900 mq->current_demote_sentinels = !mq->current_demote_sentinels; 901 __update_demote_sentinels(mq); 902 } 903 } 904 905 static void __sentinels_init(struct smq_policy *mq) 906 { 907 unsigned level; 908 struct entry *sentinel; 909 910 for (level = 0; level < NR_CACHE_LEVELS; level++) { 911 sentinel = writeback_sentinel(mq, level); 912 sentinel->level = level; 913 q_push(&mq->dirty, sentinel); 914 915 sentinel = demote_sentinel(mq, level); 916 sentinel->level = level; 917 q_push(&mq->clean, sentinel); 918 } 919 } 920 921 static void sentinels_init(struct smq_policy *mq) 922 { 923 mq->next_writeback_period = jiffies + WRITEBACK_PERIOD; 924 mq->next_demote_period = jiffies + DEMOTE_PERIOD; 925 926 mq->current_writeback_sentinels = false; 927 mq->current_demote_sentinels = false; 928 __sentinels_init(mq); 929 930 mq->current_writeback_sentinels = !mq->current_writeback_sentinels; 931 mq->current_demote_sentinels = !mq->current_demote_sentinels; 932 __sentinels_init(mq); 933 } 934 935 /*----------------------------------------------------------------*/ 936 937 static void del_queue(struct smq_policy *mq, struct entry *e) 938 { 939 q_del(e->dirty ? &mq->dirty : &mq->clean, e); 940 } 941 942 static void push_queue(struct smq_policy *mq, struct entry *e) 943 { 944 if (e->dirty) 945 q_push(&mq->dirty, e); 946 else 947 q_push(&mq->clean, e); 948 } 949 950 // !h, !q, a -> h, q, a 951 static void push(struct smq_policy *mq, struct entry *e) 952 { 953 h_insert(&mq->table, e); 954 if (!e->pending_work) 955 push_queue(mq, e); 956 } 957 958 static void push_queue_front(struct smq_policy *mq, struct entry *e) 959 { 960 if (e->dirty) 961 q_push_front(&mq->dirty, e); 962 else 963 q_push_front(&mq->clean, e); 964 } 965 966 static void push_front(struct smq_policy *mq, struct entry *e) 967 { 968 h_insert(&mq->table, e); 969 if (!e->pending_work) 970 push_queue_front(mq, e); 971 } 972 973 static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e) 974 { 975 return to_cblock(get_index(&mq->cache_alloc, e)); 976 } 977 978 static void requeue(struct smq_policy *mq, struct entry *e) 979 { 980 /* 981 * Pending work has temporarily been taken out of the queues. 982 */ 983 if (e->pending_work) 984 return; 985 986 if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) { 987 if (!e->dirty) { 988 q_requeue(&mq->clean, e, 1u, NULL, NULL); 989 return; 990 } 991 992 q_requeue(&mq->dirty, e, 1u, 993 get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels), 994 get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels)); 995 } 996 } 997 998 static unsigned default_promote_level(struct smq_policy *mq) 999 { 1000 /* 1001 * The promote level depends on the current performance of the 1002 * cache. 1003 * 1004 * If the cache is performing badly, then we can't afford 1005 * to promote much without causing performance to drop below that 1006 * of the origin device. 1007 * 1008 * If the cache is performing well, then we don't need to promote 1009 * much. If it isn't broken, don't fix it. 1010 * 1011 * If the cache is middling then we promote more. 1012 * 1013 * This scheme reminds me of a graph of entropy vs probability of a 1014 * binary variable. 1015 */ 1016 static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1}; 1017 1018 unsigned hits = mq->cache_stats.hits; 1019 unsigned misses = mq->cache_stats.misses; 1020 unsigned index = safe_div(hits << 4u, hits + misses); 1021 return table[index]; 1022 } 1023 1024 static void update_promote_levels(struct smq_policy *mq) 1025 { 1026 /* 1027 * If there are unused cache entries then we want to be really 1028 * eager to promote. 1029 */ 1030 unsigned threshold_level = allocator_empty(&mq->cache_alloc) ? 1031 default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u); 1032 1033 threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS); 1034 1035 /* 1036 * If the hotspot queue is performing badly then we have little 1037 * confidence that we know which blocks to promote. So we cut down 1038 * the amount of promotions. 1039 */ 1040 switch (stats_assess(&mq->hotspot_stats)) { 1041 case Q_POOR: 1042 threshold_level /= 4u; 1043 break; 1044 1045 case Q_FAIR: 1046 threshold_level /= 2u; 1047 break; 1048 1049 case Q_WELL: 1050 break; 1051 } 1052 1053 mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level; 1054 mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level); 1055 } 1056 1057 /* 1058 * If the hotspot queue is performing badly, then we try and move entries 1059 * around more quickly. 1060 */ 1061 static void update_level_jump(struct smq_policy *mq) 1062 { 1063 switch (stats_assess(&mq->hotspot_stats)) { 1064 case Q_POOR: 1065 mq->hotspot_level_jump = 4u; 1066 break; 1067 1068 case Q_FAIR: 1069 mq->hotspot_level_jump = 2u; 1070 break; 1071 1072 case Q_WELL: 1073 mq->hotspot_level_jump = 1u; 1074 break; 1075 } 1076 } 1077 1078 static void end_hotspot_period(struct smq_policy *mq) 1079 { 1080 clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks); 1081 update_promote_levels(mq); 1082 1083 if (time_after(jiffies, mq->next_hotspot_period)) { 1084 update_level_jump(mq); 1085 q_redistribute(&mq->hotspot); 1086 stats_reset(&mq->hotspot_stats); 1087 mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD; 1088 } 1089 } 1090 1091 static void end_cache_period(struct smq_policy *mq) 1092 { 1093 if (time_after(jiffies, mq->next_cache_period)) { 1094 clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size)); 1095 1096 q_redistribute(&mq->dirty); 1097 q_redistribute(&mq->clean); 1098 stats_reset(&mq->cache_stats); 1099 1100 mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD; 1101 } 1102 } 1103 1104 /*----------------------------------------------------------------*/ 1105 1106 /* 1107 * Targets are given as a percentage. 1108 */ 1109 #define CLEAN_TARGET 25u 1110 #define FREE_TARGET 25u 1111 1112 static unsigned percent_to_target(struct smq_policy *mq, unsigned p) 1113 { 1114 return from_cblock(mq->cache_size) * p / 100u; 1115 } 1116 1117 static bool clean_target_met(struct smq_policy *mq, bool idle) 1118 { 1119 /* 1120 * Cache entries may not be populated. So we cannot rely on the 1121 * size of the clean queue. 1122 */ 1123 if (idle) { 1124 /* 1125 * We'd like to clean everything. 1126 */ 1127 return q_size(&mq->dirty) == 0u; 1128 } 1129 1130 /* 1131 * If we're busy we don't worry about cleaning at all. 1132 */ 1133 return true; 1134 } 1135 1136 static bool free_target_met(struct smq_policy *mq) 1137 { 1138 unsigned nr_free; 1139 1140 nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated; 1141 return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >= 1142 percent_to_target(mq, FREE_TARGET); 1143 } 1144 1145 /*----------------------------------------------------------------*/ 1146 1147 static void mark_pending(struct smq_policy *mq, struct entry *e) 1148 { 1149 BUG_ON(e->sentinel); 1150 BUG_ON(!e->allocated); 1151 BUG_ON(e->pending_work); 1152 e->pending_work = true; 1153 } 1154 1155 static void clear_pending(struct smq_policy *mq, struct entry *e) 1156 { 1157 BUG_ON(!e->pending_work); 1158 e->pending_work = false; 1159 } 1160 1161 static void queue_writeback(struct smq_policy *mq) 1162 { 1163 int r; 1164 struct policy_work work; 1165 struct entry *e; 1166 1167 e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed); 1168 if (e) { 1169 mark_pending(mq, e); 1170 q_del(&mq->dirty, e); 1171 1172 work.op = POLICY_WRITEBACK; 1173 work.oblock = e->oblock; 1174 work.cblock = infer_cblock(mq, e); 1175 1176 r = btracker_queue(mq->bg_work, &work, NULL); 1177 WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race. 1178 } 1179 } 1180 1181 static void queue_demotion(struct smq_policy *mq) 1182 { 1183 struct policy_work work; 1184 struct entry *e; 1185 1186 if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed))) 1187 return; 1188 1189 e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); 1190 if (!e) { 1191 if (!clean_target_met(mq, true)) 1192 queue_writeback(mq); 1193 return; 1194 } 1195 1196 mark_pending(mq, e); 1197 q_del(&mq->clean, e); 1198 1199 work.op = POLICY_DEMOTE; 1200 work.oblock = e->oblock; 1201 work.cblock = infer_cblock(mq, e); 1202 btracker_queue(mq->bg_work, &work, NULL); 1203 } 1204 1205 static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, 1206 struct policy_work **workp) 1207 { 1208 struct entry *e; 1209 struct policy_work work; 1210 1211 if (!mq->migrations_allowed) 1212 return; 1213 1214 if (allocator_empty(&mq->cache_alloc)) { 1215 /* 1216 * We always claim to be 'idle' to ensure some demotions happen 1217 * with continuous loads. 1218 */ 1219 if (!free_target_met(mq)) 1220 queue_demotion(mq); 1221 return; 1222 } 1223 1224 if (btracker_promotion_already_present(mq->bg_work, oblock)) 1225 return; 1226 1227 /* 1228 * We allocate the entry now to reserve the cblock. If the 1229 * background work is aborted we must remember to free it. 1230 */ 1231 e = alloc_entry(&mq->cache_alloc); 1232 BUG_ON(!e); 1233 e->pending_work = true; 1234 work.op = POLICY_PROMOTE; 1235 work.oblock = oblock; 1236 work.cblock = infer_cblock(mq, e); 1237 btracker_queue(mq->bg_work, &work, workp); 1238 } 1239 1240 /*----------------------------------------------------------------*/ 1241 1242 enum promote_result { 1243 PROMOTE_NOT, 1244 PROMOTE_TEMPORARY, 1245 PROMOTE_PERMANENT 1246 }; 1247 1248 /* 1249 * Converts a boolean into a promote result. 1250 */ 1251 static enum promote_result maybe_promote(bool promote) 1252 { 1253 return promote ? PROMOTE_PERMANENT : PROMOTE_NOT; 1254 } 1255 1256 static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, 1257 int data_dir, bool fast_promote) 1258 { 1259 if (data_dir == WRITE) { 1260 if (!allocator_empty(&mq->cache_alloc) && fast_promote) 1261 return PROMOTE_TEMPORARY; 1262 1263 return maybe_promote(hs_e->level >= mq->write_promote_level); 1264 } else 1265 return maybe_promote(hs_e->level >= mq->read_promote_level); 1266 } 1267 1268 static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b) 1269 { 1270 sector_t r = from_oblock(b); 1271 (void) sector_div(r, mq->cache_blocks_per_hotspot_block); 1272 return to_oblock(r); 1273 } 1274 1275 static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b) 1276 { 1277 unsigned hi; 1278 dm_oblock_t hb = to_hblock(mq, b); 1279 struct entry *e = h_lookup(&mq->hotspot_table, hb); 1280 1281 if (e) { 1282 stats_level_accessed(&mq->hotspot_stats, e->level); 1283 1284 hi = get_index(&mq->hotspot_alloc, e); 1285 q_requeue(&mq->hotspot, e, 1286 test_and_set_bit(hi, mq->hotspot_hit_bits) ? 1287 0u : mq->hotspot_level_jump, 1288 NULL, NULL); 1289 1290 } else { 1291 stats_miss(&mq->hotspot_stats); 1292 1293 e = alloc_entry(&mq->hotspot_alloc); 1294 if (!e) { 1295 e = q_pop(&mq->hotspot); 1296 if (e) { 1297 h_remove(&mq->hotspot_table, e); 1298 hi = get_index(&mq->hotspot_alloc, e); 1299 clear_bit(hi, mq->hotspot_hit_bits); 1300 } 1301 1302 } 1303 1304 if (e) { 1305 e->oblock = hb; 1306 q_push(&mq->hotspot, e); 1307 h_insert(&mq->hotspot_table, e); 1308 } 1309 } 1310 1311 return e; 1312 } 1313 1314 /*----------------------------------------------------------------*/ 1315 1316 /* 1317 * Public interface, via the policy struct. See dm-cache-policy.h for a 1318 * description of these. 1319 */ 1320 1321 static struct smq_policy *to_smq_policy(struct dm_cache_policy *p) 1322 { 1323 return container_of(p, struct smq_policy, policy); 1324 } 1325 1326 static void smq_destroy(struct dm_cache_policy *p) 1327 { 1328 struct smq_policy *mq = to_smq_policy(p); 1329 1330 btracker_destroy(mq->bg_work); 1331 h_exit(&mq->hotspot_table); 1332 h_exit(&mq->table); 1333 free_bitset(mq->hotspot_hit_bits); 1334 free_bitset(mq->cache_hit_bits); 1335 space_exit(&mq->es); 1336 kfree(mq); 1337 } 1338 1339 /*----------------------------------------------------------------*/ 1340 1341 static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock, 1342 int data_dir, bool fast_copy, 1343 struct policy_work **work, bool *background_work) 1344 { 1345 struct entry *e, *hs_e; 1346 enum promote_result pr; 1347 1348 *background_work = false; 1349 1350 e = h_lookup(&mq->table, oblock); 1351 if (e) { 1352 stats_level_accessed(&mq->cache_stats, e->level); 1353 1354 requeue(mq, e); 1355 *cblock = infer_cblock(mq, e); 1356 return 0; 1357 1358 } else { 1359 stats_miss(&mq->cache_stats); 1360 1361 /* 1362 * The hotspot queue only gets updated with misses. 1363 */ 1364 hs_e = update_hotspot_queue(mq, oblock); 1365 1366 pr = should_promote(mq, hs_e, data_dir, fast_copy); 1367 if (pr != PROMOTE_NOT) { 1368 queue_promotion(mq, oblock, work); 1369 *background_work = true; 1370 } 1371 1372 return -ENOENT; 1373 } 1374 } 1375 1376 static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock, 1377 int data_dir, bool fast_copy, 1378 bool *background_work) 1379 { 1380 int r; 1381 unsigned long flags; 1382 struct smq_policy *mq = to_smq_policy(p); 1383 1384 spin_lock_irqsave(&mq->lock, flags); 1385 r = __lookup(mq, oblock, cblock, 1386 data_dir, fast_copy, 1387 NULL, background_work); 1388 spin_unlock_irqrestore(&mq->lock, flags); 1389 1390 return r; 1391 } 1392 1393 static int smq_lookup_with_work(struct dm_cache_policy *p, 1394 dm_oblock_t oblock, dm_cblock_t *cblock, 1395 int data_dir, bool fast_copy, 1396 struct policy_work **work) 1397 { 1398 int r; 1399 bool background_queued; 1400 unsigned long flags; 1401 struct smq_policy *mq = to_smq_policy(p); 1402 1403 spin_lock_irqsave(&mq->lock, flags); 1404 r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued); 1405 spin_unlock_irqrestore(&mq->lock, flags); 1406 1407 return r; 1408 } 1409 1410 static int smq_get_background_work(struct dm_cache_policy *p, bool idle, 1411 struct policy_work **result) 1412 { 1413 int r; 1414 unsigned long flags; 1415 struct smq_policy *mq = to_smq_policy(p); 1416 1417 spin_lock_irqsave(&mq->lock, flags); 1418 r = btracker_issue(mq->bg_work, result); 1419 if (r == -ENODATA) { 1420 if (!clean_target_met(mq, idle)) { 1421 queue_writeback(mq); 1422 r = btracker_issue(mq->bg_work, result); 1423 } 1424 } 1425 spin_unlock_irqrestore(&mq->lock, flags); 1426 1427 return r; 1428 } 1429 1430 /* 1431 * We need to clear any pending work flags that have been set, and in the 1432 * case of promotion free the entry for the destination cblock. 1433 */ 1434 static void __complete_background_work(struct smq_policy *mq, 1435 struct policy_work *work, 1436 bool success) 1437 { 1438 struct entry *e = get_entry(&mq->cache_alloc, 1439 from_cblock(work->cblock)); 1440 1441 switch (work->op) { 1442 case POLICY_PROMOTE: 1443 // !h, !q, a 1444 clear_pending(mq, e); 1445 if (success) { 1446 e->oblock = work->oblock; 1447 e->level = NR_CACHE_LEVELS - 1; 1448 push(mq, e); 1449 // h, q, a 1450 } else { 1451 free_entry(&mq->cache_alloc, e); 1452 // !h, !q, !a 1453 } 1454 break; 1455 1456 case POLICY_DEMOTE: 1457 // h, !q, a 1458 if (success) { 1459 h_remove(&mq->table, e); 1460 free_entry(&mq->cache_alloc, e); 1461 // !h, !q, !a 1462 } else { 1463 clear_pending(mq, e); 1464 push_queue(mq, e); 1465 // h, q, a 1466 } 1467 break; 1468 1469 case POLICY_WRITEBACK: 1470 // h, !q, a 1471 clear_pending(mq, e); 1472 push_queue(mq, e); 1473 // h, q, a 1474 break; 1475 } 1476 1477 btracker_complete(mq->bg_work, work); 1478 } 1479 1480 static void smq_complete_background_work(struct dm_cache_policy *p, 1481 struct policy_work *work, 1482 bool success) 1483 { 1484 unsigned long flags; 1485 struct smq_policy *mq = to_smq_policy(p); 1486 1487 spin_lock_irqsave(&mq->lock, flags); 1488 __complete_background_work(mq, work, success); 1489 spin_unlock_irqrestore(&mq->lock, flags); 1490 } 1491 1492 // in_hash(oblock) -> in_hash(oblock) 1493 static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set) 1494 { 1495 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1496 1497 if (e->pending_work) 1498 e->dirty = set; 1499 else { 1500 del_queue(mq, e); 1501 e->dirty = set; 1502 push_queue(mq, e); 1503 } 1504 } 1505 1506 static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock) 1507 { 1508 unsigned long flags; 1509 struct smq_policy *mq = to_smq_policy(p); 1510 1511 spin_lock_irqsave(&mq->lock, flags); 1512 __smq_set_clear_dirty(mq, cblock, true); 1513 spin_unlock_irqrestore(&mq->lock, flags); 1514 } 1515 1516 static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock) 1517 { 1518 struct smq_policy *mq = to_smq_policy(p); 1519 unsigned long flags; 1520 1521 spin_lock_irqsave(&mq->lock, flags); 1522 __smq_set_clear_dirty(mq, cblock, false); 1523 spin_unlock_irqrestore(&mq->lock, flags); 1524 } 1525 1526 static unsigned random_level(dm_cblock_t cblock) 1527 { 1528 return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); 1529 } 1530 1531 static int smq_load_mapping(struct dm_cache_policy *p, 1532 dm_oblock_t oblock, dm_cblock_t cblock, 1533 bool dirty, uint32_t hint, bool hint_valid) 1534 { 1535 struct smq_policy *mq = to_smq_policy(p); 1536 struct entry *e; 1537 1538 e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock)); 1539 e->oblock = oblock; 1540 e->dirty = dirty; 1541 e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock); 1542 e->pending_work = false; 1543 1544 /* 1545 * When we load mappings we push ahead of both sentinels in order to 1546 * allow demotions and cleaning to occur immediately. 1547 */ 1548 push_front(mq, e); 1549 1550 return 0; 1551 } 1552 1553 static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock) 1554 { 1555 struct smq_policy *mq = to_smq_policy(p); 1556 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1557 1558 if (!e->allocated) 1559 return -ENODATA; 1560 1561 // FIXME: what if this block has pending background work? 1562 del_queue(mq, e); 1563 h_remove(&mq->table, e); 1564 free_entry(&mq->cache_alloc, e); 1565 return 0; 1566 } 1567 1568 static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) 1569 { 1570 struct smq_policy *mq = to_smq_policy(p); 1571 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1572 1573 if (!e->allocated) 1574 return 0; 1575 1576 return e->level; 1577 } 1578 1579 static dm_cblock_t smq_residency(struct dm_cache_policy *p) 1580 { 1581 dm_cblock_t r; 1582 unsigned long flags; 1583 struct smq_policy *mq = to_smq_policy(p); 1584 1585 spin_lock_irqsave(&mq->lock, flags); 1586 r = to_cblock(mq->cache_alloc.nr_allocated); 1587 spin_unlock_irqrestore(&mq->lock, flags); 1588 1589 return r; 1590 } 1591 1592 static void smq_tick(struct dm_cache_policy *p, bool can_block) 1593 { 1594 struct smq_policy *mq = to_smq_policy(p); 1595 unsigned long flags; 1596 1597 spin_lock_irqsave(&mq->lock, flags); 1598 mq->tick++; 1599 update_sentinels(mq); 1600 end_hotspot_period(mq); 1601 end_cache_period(mq); 1602 spin_unlock_irqrestore(&mq->lock, flags); 1603 } 1604 1605 static void smq_allow_migrations(struct dm_cache_policy *p, bool allow) 1606 { 1607 struct smq_policy *mq = to_smq_policy(p); 1608 mq->migrations_allowed = allow; 1609 } 1610 1611 /* 1612 * smq has no config values, but the old mq policy did. To avoid breaking 1613 * software we continue to accept these configurables for the mq policy, 1614 * but they have no effect. 1615 */ 1616 static int mq_set_config_value(struct dm_cache_policy *p, 1617 const char *key, const char *value) 1618 { 1619 unsigned long tmp; 1620 1621 if (kstrtoul(value, 10, &tmp)) 1622 return -EINVAL; 1623 1624 if (!strcasecmp(key, "random_threshold") || 1625 !strcasecmp(key, "sequential_threshold") || 1626 !strcasecmp(key, "discard_promote_adjustment") || 1627 !strcasecmp(key, "read_promote_adjustment") || 1628 !strcasecmp(key, "write_promote_adjustment")) { 1629 DMWARN("tunable '%s' no longer has any effect, mq policy is now an alias for smq", key); 1630 return 0; 1631 } 1632 1633 return -EINVAL; 1634 } 1635 1636 static int mq_emit_config_values(struct dm_cache_policy *p, char *result, 1637 unsigned maxlen, ssize_t *sz_ptr) 1638 { 1639 ssize_t sz = *sz_ptr; 1640 1641 DMEMIT("10 random_threshold 0 " 1642 "sequential_threshold 0 " 1643 "discard_promote_adjustment 0 " 1644 "read_promote_adjustment 0 " 1645 "write_promote_adjustment 0 "); 1646 1647 *sz_ptr = sz; 1648 return 0; 1649 } 1650 1651 /* Init the policy plugin interface function pointers. */ 1652 static void init_policy_functions(struct smq_policy *mq, bool mimic_mq) 1653 { 1654 mq->policy.destroy = smq_destroy; 1655 mq->policy.lookup = smq_lookup; 1656 mq->policy.lookup_with_work = smq_lookup_with_work; 1657 mq->policy.get_background_work = smq_get_background_work; 1658 mq->policy.complete_background_work = smq_complete_background_work; 1659 mq->policy.set_dirty = smq_set_dirty; 1660 mq->policy.clear_dirty = smq_clear_dirty; 1661 mq->policy.load_mapping = smq_load_mapping; 1662 mq->policy.invalidate_mapping = smq_invalidate_mapping; 1663 mq->policy.get_hint = smq_get_hint; 1664 mq->policy.residency = smq_residency; 1665 mq->policy.tick = smq_tick; 1666 mq->policy.allow_migrations = smq_allow_migrations; 1667 1668 if (mimic_mq) { 1669 mq->policy.set_config_value = mq_set_config_value; 1670 mq->policy.emit_config_values = mq_emit_config_values; 1671 } 1672 } 1673 1674 static bool too_many_hotspot_blocks(sector_t origin_size, 1675 sector_t hotspot_block_size, 1676 unsigned nr_hotspot_blocks) 1677 { 1678 return (hotspot_block_size * nr_hotspot_blocks) > origin_size; 1679 } 1680 1681 static void calc_hotspot_params(sector_t origin_size, 1682 sector_t cache_block_size, 1683 unsigned nr_cache_blocks, 1684 sector_t *hotspot_block_size, 1685 unsigned *nr_hotspot_blocks) 1686 { 1687 *hotspot_block_size = cache_block_size * 16u; 1688 *nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u); 1689 1690 while ((*hotspot_block_size > cache_block_size) && 1691 too_many_hotspot_blocks(origin_size, *hotspot_block_size, *nr_hotspot_blocks)) 1692 *hotspot_block_size /= 2u; 1693 } 1694 1695 static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, 1696 sector_t origin_size, 1697 sector_t cache_block_size, 1698 bool mimic_mq, 1699 bool migrations_allowed) 1700 { 1701 unsigned i; 1702 unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; 1703 unsigned total_sentinels = 2u * nr_sentinels_per_queue; 1704 struct smq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); 1705 1706 if (!mq) 1707 return NULL; 1708 1709 init_policy_functions(mq, mimic_mq); 1710 mq->cache_size = cache_size; 1711 mq->cache_block_size = cache_block_size; 1712 1713 calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size), 1714 &mq->hotspot_block_size, &mq->nr_hotspot_blocks); 1715 1716 mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size); 1717 mq->hotspot_level_jump = 1u; 1718 if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) { 1719 DMERR("couldn't initialize entry space"); 1720 goto bad_pool_init; 1721 } 1722 1723 init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue); 1724 for (i = 0; i < nr_sentinels_per_queue; i++) 1725 get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true; 1726 1727 init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels); 1728 for (i = 0; i < nr_sentinels_per_queue; i++) 1729 get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true; 1730 1731 init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels, 1732 total_sentinels + mq->nr_hotspot_blocks); 1733 1734 init_allocator(&mq->cache_alloc, &mq->es, 1735 total_sentinels + mq->nr_hotspot_blocks, 1736 total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size)); 1737 1738 mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks); 1739 if (!mq->hotspot_hit_bits) { 1740 DMERR("couldn't allocate hotspot hit bitset"); 1741 goto bad_hotspot_hit_bits; 1742 } 1743 clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks); 1744 1745 if (from_cblock(cache_size)) { 1746 mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size)); 1747 if (!mq->cache_hit_bits) { 1748 DMERR("couldn't allocate cache hit bitset"); 1749 goto bad_cache_hit_bits; 1750 } 1751 clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size)); 1752 } else 1753 mq->cache_hit_bits = NULL; 1754 1755 mq->tick = 0; 1756 spin_lock_init(&mq->lock); 1757 1758 q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS); 1759 mq->hotspot.nr_top_levels = 8; 1760 mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS, 1761 from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block); 1762 1763 q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS); 1764 q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS); 1765 1766 stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS); 1767 stats_init(&mq->cache_stats, NR_CACHE_LEVELS); 1768 1769 if (h_init(&mq->table, &mq->es, from_cblock(cache_size))) 1770 goto bad_alloc_table; 1771 1772 if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks)) 1773 goto bad_alloc_hotspot_table; 1774 1775 sentinels_init(mq); 1776 mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS; 1777 1778 mq->next_hotspot_period = jiffies; 1779 mq->next_cache_period = jiffies; 1780 1781 mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */ 1782 if (!mq->bg_work) 1783 goto bad_btracker; 1784 1785 mq->migrations_allowed = migrations_allowed; 1786 1787 return &mq->policy; 1788 1789 bad_btracker: 1790 h_exit(&mq->hotspot_table); 1791 bad_alloc_hotspot_table: 1792 h_exit(&mq->table); 1793 bad_alloc_table: 1794 free_bitset(mq->cache_hit_bits); 1795 bad_cache_hit_bits: 1796 free_bitset(mq->hotspot_hit_bits); 1797 bad_hotspot_hit_bits: 1798 space_exit(&mq->es); 1799 bad_pool_init: 1800 kfree(mq); 1801 1802 return NULL; 1803 } 1804 1805 static struct dm_cache_policy *smq_create(dm_cblock_t cache_size, 1806 sector_t origin_size, 1807 sector_t cache_block_size) 1808 { 1809 return __smq_create(cache_size, origin_size, cache_block_size, false, true); 1810 } 1811 1812 static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, 1813 sector_t origin_size, 1814 sector_t cache_block_size) 1815 { 1816 return __smq_create(cache_size, origin_size, cache_block_size, true, true); 1817 } 1818 1819 static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size, 1820 sector_t origin_size, 1821 sector_t cache_block_size) 1822 { 1823 return __smq_create(cache_size, origin_size, cache_block_size, false, false); 1824 } 1825 1826 /*----------------------------------------------------------------*/ 1827 1828 static struct dm_cache_policy_type smq_policy_type = { 1829 .name = "smq", 1830 .version = {2, 0, 0}, 1831 .hint_size = 4, 1832 .owner = THIS_MODULE, 1833 .create = smq_create 1834 }; 1835 1836 static struct dm_cache_policy_type mq_policy_type = { 1837 .name = "mq", 1838 .version = {2, 0, 0}, 1839 .hint_size = 4, 1840 .owner = THIS_MODULE, 1841 .create = mq_create, 1842 }; 1843 1844 static struct dm_cache_policy_type cleaner_policy_type = { 1845 .name = "cleaner", 1846 .version = {2, 0, 0}, 1847 .hint_size = 4, 1848 .owner = THIS_MODULE, 1849 .create = cleaner_create, 1850 }; 1851 1852 static struct dm_cache_policy_type default_policy_type = { 1853 .name = "default", 1854 .version = {2, 0, 0}, 1855 .hint_size = 4, 1856 .owner = THIS_MODULE, 1857 .create = smq_create, 1858 .real = &smq_policy_type 1859 }; 1860 1861 static int __init smq_init(void) 1862 { 1863 int r; 1864 1865 r = dm_cache_policy_register(&smq_policy_type); 1866 if (r) { 1867 DMERR("register failed %d", r); 1868 return -ENOMEM; 1869 } 1870 1871 r = dm_cache_policy_register(&mq_policy_type); 1872 if (r) { 1873 DMERR("register failed (as mq) %d", r); 1874 goto out_mq; 1875 } 1876 1877 r = dm_cache_policy_register(&cleaner_policy_type); 1878 if (r) { 1879 DMERR("register failed (as cleaner) %d", r); 1880 goto out_cleaner; 1881 } 1882 1883 r = dm_cache_policy_register(&default_policy_type); 1884 if (r) { 1885 DMERR("register failed (as default) %d", r); 1886 goto out_default; 1887 } 1888 1889 return 0; 1890 1891 out_default: 1892 dm_cache_policy_unregister(&cleaner_policy_type); 1893 out_cleaner: 1894 dm_cache_policy_unregister(&mq_policy_type); 1895 out_mq: 1896 dm_cache_policy_unregister(&smq_policy_type); 1897 1898 return -ENOMEM; 1899 } 1900 1901 static void __exit smq_exit(void) 1902 { 1903 dm_cache_policy_unregister(&cleaner_policy_type); 1904 dm_cache_policy_unregister(&smq_policy_type); 1905 dm_cache_policy_unregister(&mq_policy_type); 1906 dm_cache_policy_unregister(&default_policy_type); 1907 } 1908 1909 module_init(smq_init); 1910 module_exit(smq_exit); 1911 1912 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 1913 MODULE_LICENSE("GPL"); 1914 MODULE_DESCRIPTION("smq cache policy"); 1915 1916 MODULE_ALIAS("dm-cache-default"); 1917 MODULE_ALIAS("dm-cache-mq"); 1918 MODULE_ALIAS("dm-cache-cleaner"); 1919