1 /* 2 * linux/fs/mbcache.c 3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 */ 5 6 /* 7 * Filesystem Meta Information Block Cache (mbcache) 8 * 9 * The mbcache caches blocks of block devices that need to be located 10 * by their device/block number, as well as by other criteria (such 11 * as the block's contents). 12 * 13 * There can only be one cache entry in a cache per device and block number. 14 * Additional indexes need not be unique in this sense. The number of 15 * additional indexes (=other criteria) can be hardwired at compile time 16 * or specified at cache create time. 17 * 18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid' 19 * in the cache. A valid entry is in the main hash tables of the cache, 20 * and may also be in the lru list. An invalid entry is not in any hashes 21 * or lists. 22 * 23 * A valid cache entry is only in the lru list if no handles refer to it. 24 * Invalid cache entries will be freed when the last handle to the cache 25 * entry is released. Entries that cannot be freed immediately are put 26 * back on the lru list. 27 */ 28 29 /* 30 * Lock descriptions and usage: 31 * 32 * Each hash chain of both the block and index hash tables now contains 33 * a built-in lock used to serialize accesses to the hash chain. 34 * 35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list 36 * are serialized via the global spinlock mb_cache_spinlock. 37 * 38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize 39 * accesses to its local data, such as e_used and e_queued. 40 * 41 * Lock ordering: 42 * 43 * Each block hash chain's lock has the highest lock order, followed by an 44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's 45 * lock), and mb_cach_spinlock, with the lowest order. While holding 46 * either a block or index hash chain lock, a thread can acquire an 47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. 48 * 49 * Synchronization: 50 * 51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and 52 * index hash chian, it needs to lock the corresponding hash chain. For each 53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to 54 * prevent either any simultaneous release or free on the entry and also 55 * to serialize accesses to either the e_used or e_queued member of the entry. 56 * 57 * To avoid having a dangling reference to an already freed 58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a 59 * block hash chain and also no longer being referenced, both e_used, 60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is 61 * first removed from a block hash chain. 62 */ 63 64 #include <linux/kernel.h> 65 #include <linux/module.h> 66 67 #include <linux/hash.h> 68 #include <linux/fs.h> 69 #include <linux/mm.h> 70 #include <linux/slab.h> 71 #include <linux/sched.h> 72 #include <linux/list_bl.h> 73 #include <linux/mbcache.h> 74 #include <linux/init.h> 75 #include <linux/blockgroup_lock.h> 76 77 #ifdef MB_CACHE_DEBUG 78 # define mb_debug(f...) do { \ 79 printk(KERN_DEBUG f); \ 80 printk("\n"); \ 81 } while (0) 82 #define mb_assert(c) do { if (!(c)) \ 83 printk(KERN_ERR "assertion " #c " failed\n"); \ 84 } while(0) 85 #else 86 # define mb_debug(f...) do { } while(0) 87 # define mb_assert(c) do { } while(0) 88 #endif 89 #define mb_error(f...) do { \ 90 printk(KERN_ERR f); \ 91 printk("\n"); \ 92 } while(0) 93 94 #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 95 96 #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) 97 #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ 98 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) 99 100 static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); 101 static struct blockgroup_lock *mb_cache_bg_lock; 102 static struct kmem_cache *mb_cache_kmem_cache; 103 104 MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); 105 MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); 106 MODULE_LICENSE("GPL"); 107 108 EXPORT_SYMBOL(mb_cache_create); 109 EXPORT_SYMBOL(mb_cache_shrink); 110 EXPORT_SYMBOL(mb_cache_destroy); 111 EXPORT_SYMBOL(mb_cache_entry_alloc); 112 EXPORT_SYMBOL(mb_cache_entry_insert); 113 EXPORT_SYMBOL(mb_cache_entry_release); 114 EXPORT_SYMBOL(mb_cache_entry_free); 115 EXPORT_SYMBOL(mb_cache_entry_get); 116 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 117 EXPORT_SYMBOL(mb_cache_entry_find_first); 118 EXPORT_SYMBOL(mb_cache_entry_find_next); 119 #endif 120 121 /* 122 * Global data: list of all mbcache's, lru list, and a spinlock for 123 * accessing cache data structures on SMP machines. The lru list is 124 * global across all mbcaches. 125 */ 126 127 static LIST_HEAD(mb_cache_list); 128 static LIST_HEAD(mb_cache_lru_list); 129 static DEFINE_SPINLOCK(mb_cache_spinlock); 130 131 static inline void 132 __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) 133 { 134 spin_lock(bgl_lock_ptr(mb_cache_bg_lock, 135 MB_CACHE_ENTRY_LOCK_INDEX(ce))); 136 } 137 138 static inline void 139 __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) 140 { 141 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, 142 MB_CACHE_ENTRY_LOCK_INDEX(ce))); 143 } 144 145 static inline int 146 __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) 147 { 148 return !hlist_bl_unhashed(&ce->e_block_list); 149 } 150 151 152 static inline void 153 __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) 154 { 155 if (__mb_cache_entry_is_block_hashed(ce)) 156 hlist_bl_del_init(&ce->e_block_list); 157 } 158 159 static inline int 160 __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) 161 { 162 return !hlist_bl_unhashed(&ce->e_index.o_list); 163 } 164 165 static inline void 166 __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) 167 { 168 if (__mb_cache_entry_is_index_hashed(ce)) 169 hlist_bl_del_init(&ce->e_index.o_list); 170 } 171 172 /* 173 * __mb_cache_entry_unhash_unlock() 174 * 175 * This function is called to unhash both the block and index hash 176 * chain. 177 * It assumes both the block and index hash chain is locked upon entry. 178 * It also unlock both hash chains both exit 179 */ 180 static inline void 181 __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) 182 { 183 __mb_cache_entry_unhash_index(ce); 184 hlist_bl_unlock(ce->e_index_hash_p); 185 __mb_cache_entry_unhash_block(ce); 186 hlist_bl_unlock(ce->e_block_hash_p); 187 } 188 189 static void 190 __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) 191 { 192 struct mb_cache *cache = ce->e_cache; 193 194 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); 195 kmem_cache_free(cache->c_entry_cache, ce); 196 atomic_dec(&cache->c_entry_count); 197 } 198 199 static void 200 __mb_cache_entry_release(struct mb_cache_entry *ce) 201 { 202 /* First lock the entry to serialize access to its local data. */ 203 __spin_lock_mb_cache_entry(ce); 204 /* Wake up all processes queuing for this cache entry. */ 205 if (ce->e_queued) 206 wake_up_all(&mb_cache_queue); 207 if (ce->e_used >= MB_CACHE_WRITER) 208 ce->e_used -= MB_CACHE_WRITER; 209 /* 210 * Make sure that all cache entries on lru_list have 211 * both e_used and e_qued of 0s. 212 */ 213 ce->e_used--; 214 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { 215 if (!__mb_cache_entry_is_block_hashed(ce)) { 216 __spin_unlock_mb_cache_entry(ce); 217 goto forget; 218 } 219 /* 220 * Need access to lru list, first drop entry lock, 221 * then reacquire the lock in the proper order. 222 */ 223 spin_lock(&mb_cache_spinlock); 224 if (list_empty(&ce->e_lru_list)) 225 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 226 spin_unlock(&mb_cache_spinlock); 227 } 228 __spin_unlock_mb_cache_entry(ce); 229 return; 230 forget: 231 mb_assert(list_empty(&ce->e_lru_list)); 232 __mb_cache_entry_forget(ce, GFP_KERNEL); 233 } 234 235 /* 236 * mb_cache_shrink_scan() memory pressure callback 237 * 238 * This function is called by the kernel memory management when memory 239 * gets low. 240 * 241 * @shrink: (ignored) 242 * @sc: shrink_control passed from reclaim 243 * 244 * Returns the number of objects freed. 245 */ 246 static unsigned long 247 mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) 248 { 249 LIST_HEAD(free_list); 250 struct mb_cache_entry *entry, *tmp; 251 int nr_to_scan = sc->nr_to_scan; 252 gfp_t gfp_mask = sc->gfp_mask; 253 unsigned long freed = 0; 254 255 mb_debug("trying to free %d entries", nr_to_scan); 256 spin_lock(&mb_cache_spinlock); 257 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { 258 struct mb_cache_entry *ce = 259 list_entry(mb_cache_lru_list.next, 260 struct mb_cache_entry, e_lru_list); 261 list_del_init(&ce->e_lru_list); 262 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) 263 continue; 264 spin_unlock(&mb_cache_spinlock); 265 /* Prevent any find or get operation on the entry */ 266 hlist_bl_lock(ce->e_block_hash_p); 267 hlist_bl_lock(ce->e_index_hash_p); 268 /* Ignore if it is touched by a find/get */ 269 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || 270 !list_empty(&ce->e_lru_list)) { 271 hlist_bl_unlock(ce->e_index_hash_p); 272 hlist_bl_unlock(ce->e_block_hash_p); 273 spin_lock(&mb_cache_spinlock); 274 continue; 275 } 276 __mb_cache_entry_unhash_unlock(ce); 277 list_add_tail(&ce->e_lru_list, &free_list); 278 spin_lock(&mb_cache_spinlock); 279 } 280 spin_unlock(&mb_cache_spinlock); 281 282 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 283 __mb_cache_entry_forget(entry, gfp_mask); 284 freed++; 285 } 286 return freed; 287 } 288 289 static unsigned long 290 mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 291 { 292 struct mb_cache *cache; 293 unsigned long count = 0; 294 295 spin_lock(&mb_cache_spinlock); 296 list_for_each_entry(cache, &mb_cache_list, c_cache_list) { 297 mb_debug("cache %s (%d)", cache->c_name, 298 atomic_read(&cache->c_entry_count)); 299 count += atomic_read(&cache->c_entry_count); 300 } 301 spin_unlock(&mb_cache_spinlock); 302 303 return vfs_pressure_ratio(count); 304 } 305 306 static struct shrinker mb_cache_shrinker = { 307 .count_objects = mb_cache_shrink_count, 308 .scan_objects = mb_cache_shrink_scan, 309 .seeks = DEFAULT_SEEKS, 310 }; 311 312 /* 313 * mb_cache_create() create a new cache 314 * 315 * All entries in one cache are equal size. Cache entries may be from 316 * multiple devices. If this is the first mbcache created, registers 317 * the cache with kernel memory management. Returns NULL if no more 318 * memory was available. 319 * 320 * @name: name of the cache (informal) 321 * @bucket_bits: log2(number of hash buckets) 322 */ 323 struct mb_cache * 324 mb_cache_create(const char *name, int bucket_bits) 325 { 326 int n, bucket_count = 1 << bucket_bits; 327 struct mb_cache *cache = NULL; 328 329 if (!mb_cache_bg_lock) { 330 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), 331 GFP_KERNEL); 332 if (!mb_cache_bg_lock) 333 return NULL; 334 bgl_lock_init(mb_cache_bg_lock); 335 } 336 337 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); 338 if (!cache) 339 return NULL; 340 cache->c_name = name; 341 atomic_set(&cache->c_entry_count, 0); 342 cache->c_bucket_bits = bucket_bits; 343 cache->c_block_hash = kmalloc(bucket_count * 344 sizeof(struct hlist_bl_head), GFP_KERNEL); 345 if (!cache->c_block_hash) 346 goto fail; 347 for (n=0; n<bucket_count; n++) 348 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); 349 cache->c_index_hash = kmalloc(bucket_count * 350 sizeof(struct hlist_bl_head), GFP_KERNEL); 351 if (!cache->c_index_hash) 352 goto fail; 353 for (n=0; n<bucket_count; n++) 354 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); 355 if (!mb_cache_kmem_cache) { 356 mb_cache_kmem_cache = kmem_cache_create(name, 357 sizeof(struct mb_cache_entry), 0, 358 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 359 if (!mb_cache_kmem_cache) 360 goto fail2; 361 } 362 cache->c_entry_cache = mb_cache_kmem_cache; 363 364 /* 365 * Set an upper limit on the number of cache entries so that the hash 366 * chains won't grow too long. 367 */ 368 cache->c_max_entries = bucket_count << 4; 369 370 spin_lock(&mb_cache_spinlock); 371 list_add(&cache->c_cache_list, &mb_cache_list); 372 spin_unlock(&mb_cache_spinlock); 373 return cache; 374 375 fail2: 376 kfree(cache->c_index_hash); 377 378 fail: 379 kfree(cache->c_block_hash); 380 kfree(cache); 381 return NULL; 382 } 383 384 385 /* 386 * mb_cache_shrink() 387 * 388 * Removes all cache entries of a device from the cache. All cache entries 389 * currently in use cannot be freed, and thus remain in the cache. All others 390 * are freed. 391 * 392 * @bdev: which device's cache entries to shrink 393 */ 394 void 395 mb_cache_shrink(struct block_device *bdev) 396 { 397 LIST_HEAD(free_list); 398 struct list_head *l; 399 struct mb_cache_entry *ce, *tmp; 400 401 l = &mb_cache_lru_list; 402 spin_lock(&mb_cache_spinlock); 403 while (!list_is_last(l, &mb_cache_lru_list)) { 404 l = l->next; 405 ce = list_entry(l, struct mb_cache_entry, e_lru_list); 406 if (ce->e_bdev == bdev) { 407 list_del_init(&ce->e_lru_list); 408 if (ce->e_used || ce->e_queued || 409 atomic_read(&ce->e_refcnt)) 410 continue; 411 spin_unlock(&mb_cache_spinlock); 412 /* 413 * Prevent any find or get operation on the entry. 414 */ 415 hlist_bl_lock(ce->e_block_hash_p); 416 hlist_bl_lock(ce->e_index_hash_p); 417 /* Ignore if it is touched by a find/get */ 418 if (ce->e_used || ce->e_queued || 419 atomic_read(&ce->e_refcnt) || 420 !list_empty(&ce->e_lru_list)) { 421 hlist_bl_unlock(ce->e_index_hash_p); 422 hlist_bl_unlock(ce->e_block_hash_p); 423 l = &mb_cache_lru_list; 424 spin_lock(&mb_cache_spinlock); 425 continue; 426 } 427 __mb_cache_entry_unhash_unlock(ce); 428 mb_assert(!(ce->e_used || ce->e_queued || 429 atomic_read(&ce->e_refcnt))); 430 list_add_tail(&ce->e_lru_list, &free_list); 431 l = &mb_cache_lru_list; 432 spin_lock(&mb_cache_spinlock); 433 } 434 } 435 spin_unlock(&mb_cache_spinlock); 436 437 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { 438 __mb_cache_entry_forget(ce, GFP_KERNEL); 439 } 440 } 441 442 443 /* 444 * mb_cache_destroy() 445 * 446 * Shrinks the cache to its minimum possible size (hopefully 0 entries), 447 * and then destroys it. If this was the last mbcache, un-registers the 448 * mbcache from kernel memory management. 449 */ 450 void 451 mb_cache_destroy(struct mb_cache *cache) 452 { 453 LIST_HEAD(free_list); 454 struct mb_cache_entry *ce, *tmp; 455 456 spin_lock(&mb_cache_spinlock); 457 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { 458 if (ce->e_cache == cache) 459 list_move_tail(&ce->e_lru_list, &free_list); 460 } 461 list_del(&cache->c_cache_list); 462 spin_unlock(&mb_cache_spinlock); 463 464 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { 465 list_del_init(&ce->e_lru_list); 466 /* 467 * Prevent any find or get operation on the entry. 468 */ 469 hlist_bl_lock(ce->e_block_hash_p); 470 hlist_bl_lock(ce->e_index_hash_p); 471 mb_assert(!(ce->e_used || ce->e_queued || 472 atomic_read(&ce->e_refcnt))); 473 __mb_cache_entry_unhash_unlock(ce); 474 __mb_cache_entry_forget(ce, GFP_KERNEL); 475 } 476 477 if (atomic_read(&cache->c_entry_count) > 0) { 478 mb_error("cache %s: %d orphaned entries", 479 cache->c_name, 480 atomic_read(&cache->c_entry_count)); 481 } 482 483 if (list_empty(&mb_cache_list)) { 484 kmem_cache_destroy(mb_cache_kmem_cache); 485 mb_cache_kmem_cache = NULL; 486 } 487 kfree(cache->c_index_hash); 488 kfree(cache->c_block_hash); 489 kfree(cache); 490 } 491 492 /* 493 * mb_cache_entry_alloc() 494 * 495 * Allocates a new cache entry. The new entry will not be valid initially, 496 * and thus cannot be looked up yet. It should be filled with data, and 497 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL 498 * if no more memory was available. 499 */ 500 struct mb_cache_entry * 501 mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 502 { 503 struct mb_cache_entry *ce; 504 505 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { 506 struct list_head *l; 507 508 l = &mb_cache_lru_list; 509 spin_lock(&mb_cache_spinlock); 510 while (!list_is_last(l, &mb_cache_lru_list)) { 511 l = l->next; 512 ce = list_entry(l, struct mb_cache_entry, e_lru_list); 513 if (ce->e_cache == cache) { 514 list_del_init(&ce->e_lru_list); 515 if (ce->e_used || ce->e_queued || 516 atomic_read(&ce->e_refcnt)) 517 continue; 518 spin_unlock(&mb_cache_spinlock); 519 /* 520 * Prevent any find or get operation on the 521 * entry. 522 */ 523 hlist_bl_lock(ce->e_block_hash_p); 524 hlist_bl_lock(ce->e_index_hash_p); 525 /* Ignore if it is touched by a find/get */ 526 if (ce->e_used || ce->e_queued || 527 atomic_read(&ce->e_refcnt) || 528 !list_empty(&ce->e_lru_list)) { 529 hlist_bl_unlock(ce->e_index_hash_p); 530 hlist_bl_unlock(ce->e_block_hash_p); 531 l = &mb_cache_lru_list; 532 spin_lock(&mb_cache_spinlock); 533 continue; 534 } 535 mb_assert(list_empty(&ce->e_lru_list)); 536 mb_assert(!(ce->e_used || ce->e_queued || 537 atomic_read(&ce->e_refcnt))); 538 __mb_cache_entry_unhash_unlock(ce); 539 goto found; 540 } 541 } 542 spin_unlock(&mb_cache_spinlock); 543 } 544 545 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 546 if (!ce) 547 return NULL; 548 atomic_inc(&cache->c_entry_count); 549 INIT_LIST_HEAD(&ce->e_lru_list); 550 INIT_HLIST_BL_NODE(&ce->e_block_list); 551 INIT_HLIST_BL_NODE(&ce->e_index.o_list); 552 ce->e_cache = cache; 553 ce->e_queued = 0; 554 atomic_set(&ce->e_refcnt, 0); 555 found: 556 ce->e_block_hash_p = &cache->c_block_hash[0]; 557 ce->e_index_hash_p = &cache->c_index_hash[0]; 558 ce->e_used = 1 + MB_CACHE_WRITER; 559 return ce; 560 } 561 562 563 /* 564 * mb_cache_entry_insert() 565 * 566 * Inserts an entry that was allocated using mb_cache_entry_alloc() into 567 * the cache. After this, the cache entry can be looked up, but is not yet 568 * in the lru list as the caller still holds a handle to it. Returns 0 on 569 * success, or -EBUSY if a cache entry for that device + inode exists 570 * already (this may happen after a failed lookup, but when another process 571 * has inserted the same cache entry in the meantime). 572 * 573 * @bdev: device the cache entry belongs to 574 * @block: block number 575 * @key: lookup key 576 */ 577 int 578 mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, 579 sector_t block, unsigned int key) 580 { 581 struct mb_cache *cache = ce->e_cache; 582 unsigned int bucket; 583 struct hlist_bl_node *l; 584 struct hlist_bl_head *block_hash_p; 585 struct hlist_bl_head *index_hash_p; 586 struct mb_cache_entry *lce; 587 588 mb_assert(ce); 589 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 590 cache->c_bucket_bits); 591 block_hash_p = &cache->c_block_hash[bucket]; 592 hlist_bl_lock(block_hash_p); 593 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { 594 if (lce->e_bdev == bdev && lce->e_block == block) { 595 hlist_bl_unlock(block_hash_p); 596 return -EBUSY; 597 } 598 } 599 mb_assert(!__mb_cache_entry_is_block_hashed(ce)); 600 __mb_cache_entry_unhash_block(ce); 601 __mb_cache_entry_unhash_index(ce); 602 ce->e_bdev = bdev; 603 ce->e_block = block; 604 ce->e_block_hash_p = block_hash_p; 605 ce->e_index.o_key = key; 606 hlist_bl_add_head(&ce->e_block_list, block_hash_p); 607 hlist_bl_unlock(block_hash_p); 608 bucket = hash_long(key, cache->c_bucket_bits); 609 index_hash_p = &cache->c_index_hash[bucket]; 610 hlist_bl_lock(index_hash_p); 611 ce->e_index_hash_p = index_hash_p; 612 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); 613 hlist_bl_unlock(index_hash_p); 614 return 0; 615 } 616 617 618 /* 619 * mb_cache_entry_release() 620 * 621 * Release a handle to a cache entry. When the last handle to a cache entry 622 * is released it is either freed (if it is invalid) or otherwise inserted 623 * in to the lru list. 624 */ 625 void 626 mb_cache_entry_release(struct mb_cache_entry *ce) 627 { 628 __mb_cache_entry_release(ce); 629 } 630 631 632 /* 633 * mb_cache_entry_free() 634 * 635 */ 636 void 637 mb_cache_entry_free(struct mb_cache_entry *ce) 638 { 639 mb_assert(ce); 640 mb_assert(list_empty(&ce->e_lru_list)); 641 hlist_bl_lock(ce->e_index_hash_p); 642 __mb_cache_entry_unhash_index(ce); 643 hlist_bl_unlock(ce->e_index_hash_p); 644 hlist_bl_lock(ce->e_block_hash_p); 645 __mb_cache_entry_unhash_block(ce); 646 hlist_bl_unlock(ce->e_block_hash_p); 647 __mb_cache_entry_release(ce); 648 } 649 650 651 /* 652 * mb_cache_entry_get() 653 * 654 * Get a cache entry by device / block number. (There can only be one entry 655 * in the cache per device and block.) Returns NULL if no such cache entry 656 * exists. The returned cache entry is locked for exclusive access ("single 657 * writer"). 658 */ 659 struct mb_cache_entry * 660 mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, 661 sector_t block) 662 { 663 unsigned int bucket; 664 struct hlist_bl_node *l; 665 struct mb_cache_entry *ce; 666 struct hlist_bl_head *block_hash_p; 667 668 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 669 cache->c_bucket_bits); 670 block_hash_p = &cache->c_block_hash[bucket]; 671 /* First serialize access to the block corresponding hash chain. */ 672 hlist_bl_lock(block_hash_p); 673 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { 674 mb_assert(ce->e_block_hash_p == block_hash_p); 675 if (ce->e_bdev == bdev && ce->e_block == block) { 676 /* 677 * Prevent a free from removing the entry. 678 */ 679 atomic_inc(&ce->e_refcnt); 680 hlist_bl_unlock(block_hash_p); 681 __spin_lock_mb_cache_entry(ce); 682 atomic_dec(&ce->e_refcnt); 683 if (ce->e_used > 0) { 684 DEFINE_WAIT(wait); 685 while (ce->e_used > 0) { 686 ce->e_queued++; 687 prepare_to_wait(&mb_cache_queue, &wait, 688 TASK_UNINTERRUPTIBLE); 689 __spin_unlock_mb_cache_entry(ce); 690 schedule(); 691 __spin_lock_mb_cache_entry(ce); 692 ce->e_queued--; 693 } 694 finish_wait(&mb_cache_queue, &wait); 695 } 696 ce->e_used += 1 + MB_CACHE_WRITER; 697 __spin_unlock_mb_cache_entry(ce); 698 699 if (!list_empty(&ce->e_lru_list)) { 700 spin_lock(&mb_cache_spinlock); 701 list_del_init(&ce->e_lru_list); 702 spin_unlock(&mb_cache_spinlock); 703 } 704 if (!__mb_cache_entry_is_block_hashed(ce)) { 705 __mb_cache_entry_release(ce); 706 return NULL; 707 } 708 return ce; 709 } 710 } 711 hlist_bl_unlock(block_hash_p); 712 return NULL; 713 } 714 715 #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 716 717 static struct mb_cache_entry * 718 __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, 719 struct block_device *bdev, unsigned int key) 720 { 721 722 /* The index hash chain is alredy acquire by caller. */ 723 while (l != NULL) { 724 struct mb_cache_entry *ce = 725 hlist_bl_entry(l, struct mb_cache_entry, 726 e_index.o_list); 727 mb_assert(ce->e_index_hash_p == head); 728 if (ce->e_bdev == bdev && ce->e_index.o_key == key) { 729 /* 730 * Prevent a free from removing the entry. 731 */ 732 atomic_inc(&ce->e_refcnt); 733 hlist_bl_unlock(head); 734 __spin_lock_mb_cache_entry(ce); 735 atomic_dec(&ce->e_refcnt); 736 ce->e_used++; 737 /* Incrementing before holding the lock gives readers 738 priority over writers. */ 739 if (ce->e_used >= MB_CACHE_WRITER) { 740 DEFINE_WAIT(wait); 741 742 while (ce->e_used >= MB_CACHE_WRITER) { 743 ce->e_queued++; 744 prepare_to_wait(&mb_cache_queue, &wait, 745 TASK_UNINTERRUPTIBLE); 746 __spin_unlock_mb_cache_entry(ce); 747 schedule(); 748 __spin_lock_mb_cache_entry(ce); 749 ce->e_queued--; 750 } 751 finish_wait(&mb_cache_queue, &wait); 752 } 753 __spin_unlock_mb_cache_entry(ce); 754 if (!list_empty(&ce->e_lru_list)) { 755 spin_lock(&mb_cache_spinlock); 756 list_del_init(&ce->e_lru_list); 757 spin_unlock(&mb_cache_spinlock); 758 } 759 if (!__mb_cache_entry_is_block_hashed(ce)) { 760 __mb_cache_entry_release(ce); 761 return ERR_PTR(-EAGAIN); 762 } 763 return ce; 764 } 765 l = l->next; 766 } 767 hlist_bl_unlock(head); 768 return NULL; 769 } 770 771 772 /* 773 * mb_cache_entry_find_first() 774 * 775 * Find the first cache entry on a given device with a certain key in 776 * an additional index. Additional matches can be found with 777 * mb_cache_entry_find_next(). Returns NULL if no match was found. The 778 * returned cache entry is locked for shared access ("multiple readers"). 779 * 780 * @cache: the cache to search 781 * @bdev: the device the cache entry should belong to 782 * @key: the key in the index 783 */ 784 struct mb_cache_entry * 785 mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, 786 unsigned int key) 787 { 788 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 789 struct hlist_bl_node *l; 790 struct mb_cache_entry *ce = NULL; 791 struct hlist_bl_head *index_hash_p; 792 793 index_hash_p = &cache->c_index_hash[bucket]; 794 hlist_bl_lock(index_hash_p); 795 if (!hlist_bl_empty(index_hash_p)) { 796 l = hlist_bl_first(index_hash_p); 797 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 798 } else 799 hlist_bl_unlock(index_hash_p); 800 return ce; 801 } 802 803 804 /* 805 * mb_cache_entry_find_next() 806 * 807 * Find the next cache entry on a given device with a certain key in an 808 * additional index. Returns NULL if no match could be found. The previous 809 * entry is atomatically released, so that mb_cache_entry_find_next() can 810 * be called like this: 811 * 812 * entry = mb_cache_entry_find_first(); 813 * while (entry) { 814 * ... 815 * entry = mb_cache_entry_find_next(entry, ...); 816 * } 817 * 818 * @prev: The previous match 819 * @bdev: the device the cache entry should belong to 820 * @key: the key in the index 821 */ 822 struct mb_cache_entry * 823 mb_cache_entry_find_next(struct mb_cache_entry *prev, 824 struct block_device *bdev, unsigned int key) 825 { 826 struct mb_cache *cache = prev->e_cache; 827 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 828 struct hlist_bl_node *l; 829 struct mb_cache_entry *ce; 830 struct hlist_bl_head *index_hash_p; 831 832 index_hash_p = &cache->c_index_hash[bucket]; 833 mb_assert(prev->e_index_hash_p == index_hash_p); 834 hlist_bl_lock(index_hash_p); 835 mb_assert(!hlist_bl_empty(index_hash_p)); 836 l = prev->e_index.o_list.next; 837 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 838 __mb_cache_entry_release(prev); 839 return ce; 840 } 841 842 #endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ 843 844 static int __init init_mbcache(void) 845 { 846 register_shrinker(&mb_cache_shrinker); 847 return 0; 848 } 849 850 static void __exit exit_mbcache(void) 851 { 852 unregister_shrinker(&mb_cache_shrinker); 853 } 854 855 module_init(init_mbcache) 856 module_exit(exit_mbcache) 857 858