1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 265d45231SKent Overstreet /* 365d45231SKent Overstreet * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> 465d45231SKent Overstreet * 565d45231SKent Overstreet * Uses a block device as cache for other block devices; optimized for SSDs. 665d45231SKent Overstreet * All allocation is done in buckets, which should match the erase block size 765d45231SKent Overstreet * of the device. 865d45231SKent Overstreet * 965d45231SKent Overstreet * Buckets containing cached data are kept on a heap sorted by priority; 1065d45231SKent Overstreet * bucket priority is increased on cache hit, and periodically all the buckets 1165d45231SKent Overstreet * on the heap have their priority scaled down. This currently is just used as 1265d45231SKent Overstreet * an LRU but in the future should allow for more intelligent heuristics. 1365d45231SKent Overstreet * 1465d45231SKent Overstreet * Buckets have an 8 bit counter; freeing is accomplished by incrementing the 1565d45231SKent Overstreet * counter. Garbage collection is used to remove stale pointers. 1665d45231SKent Overstreet * 1765d45231SKent Overstreet * Indexing is done via a btree; nodes are not necessarily fully sorted, rather 1865d45231SKent Overstreet * as keys are inserted we only sort the pages that have not yet been written. 1965d45231SKent Overstreet * When garbage collection is run, we resort the entire node. 2065d45231SKent Overstreet * 215fb94e9cSMauro Carvalho Chehab * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst. 2265d45231SKent Overstreet */ 2365d45231SKent Overstreet 2465d45231SKent Overstreet #include "bcache.h" 2565d45231SKent Overstreet #include "btree.h" 2665d45231SKent Overstreet #include "debug.h" 2765d45231SKent Overstreet #include "extents.h" 2865d45231SKent Overstreet #include "writeback.h" 2965d45231SKent Overstreet 3065d45231SKent Overstreet static void sort_key_next(struct btree_iter *iter, 3165d45231SKent Overstreet struct btree_iter_set *i) 3265d45231SKent Overstreet { 3365d45231SKent Overstreet i->k = bkey_next(i->k); 3465d45231SKent Overstreet 3565d45231SKent Overstreet if (i->k == i->end) 3665d45231SKent Overstreet *i = iter->data[--iter->used]; 3765d45231SKent Overstreet } 3865d45231SKent Overstreet 3965d45231SKent Overstreet static bool bch_key_sort_cmp(struct btree_iter_set l, 4065d45231SKent Overstreet struct btree_iter_set r) 4165d45231SKent Overstreet { 4265d45231SKent Overstreet int64_t c = bkey_cmp(l.k, r.k); 4365d45231SKent Overstreet 4465d45231SKent Overstreet return c ? c > 0 : l.k < r.k; 4565d45231SKent Overstreet } 4665d45231SKent Overstreet 4765d45231SKent Overstreet static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) 4865d45231SKent Overstreet { 496f10f7d1SColy Li unsigned int i; 5065d45231SKent Overstreet 5165d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) 5265d45231SKent Overstreet if (ptr_available(c, k, i)) { 5365d45231SKent Overstreet struct cache *ca = PTR_CACHE(c, k, i); 5465d45231SKent Overstreet size_t bucket = PTR_BUCKET_NR(c, k, i); 5565d45231SKent Overstreet size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); 5665d45231SKent Overstreet 5765d45231SKent Overstreet if (KEY_SIZE(k) + r > c->sb.bucket_size || 5865d45231SKent Overstreet bucket < ca->sb.first_bucket || 5965d45231SKent Overstreet bucket >= ca->sb.nbuckets) 6065d45231SKent Overstreet return true; 6165d45231SKent Overstreet } 6265d45231SKent Overstreet 6365d45231SKent Overstreet return false; 6465d45231SKent Overstreet } 6565d45231SKent Overstreet 66dc9d98d6SKent Overstreet /* Common among btree and extent ptrs */ 67dc9d98d6SKent Overstreet 68dc9d98d6SKent Overstreet static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k) 69dc9d98d6SKent Overstreet { 706f10f7d1SColy Li unsigned int i; 71dc9d98d6SKent Overstreet 72dc9d98d6SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) 73dc9d98d6SKent Overstreet if (ptr_available(c, k, i)) { 74dc9d98d6SKent Overstreet struct cache *ca = PTR_CACHE(c, k, i); 75dc9d98d6SKent Overstreet size_t bucket = PTR_BUCKET_NR(c, k, i); 76dc9d98d6SKent Overstreet size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); 77dc9d98d6SKent Overstreet 78dc9d98d6SKent Overstreet if (KEY_SIZE(k) + r > c->sb.bucket_size) 79dc9d98d6SKent Overstreet return "bad, length too big"; 80dc9d98d6SKent Overstreet if (bucket < ca->sb.first_bucket) 81dc9d98d6SKent Overstreet return "bad, short offset"; 82dc9d98d6SKent Overstreet if (bucket >= ca->sb.nbuckets) 83dc9d98d6SKent Overstreet return "bad, offset past end of device"; 84dc9d98d6SKent Overstreet if (ptr_stale(c, k, i)) 85dc9d98d6SKent Overstreet return "stale"; 86dc9d98d6SKent Overstreet } 87dc9d98d6SKent Overstreet 88dc9d98d6SKent Overstreet if (!bkey_cmp(k, &ZERO_KEY)) 89dc9d98d6SKent Overstreet return "bad, null key"; 90dc9d98d6SKent Overstreet if (!KEY_PTRS(k)) 91dc9d98d6SKent Overstreet return "bad, no pointers"; 92dc9d98d6SKent Overstreet if (!KEY_SIZE(k)) 93dc9d98d6SKent Overstreet return "zeroed key"; 94dc9d98d6SKent Overstreet return ""; 95dc9d98d6SKent Overstreet } 96dc9d98d6SKent Overstreet 97dc9d98d6SKent Overstreet void bch_extent_to_text(char *buf, size_t size, const struct bkey *k) 98dc9d98d6SKent Overstreet { 996f10f7d1SColy Li unsigned int i = 0; 100dc9d98d6SKent Overstreet char *out = buf, *end = buf + size; 101dc9d98d6SKent Overstreet 102dc9d98d6SKent Overstreet #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__)) 103dc9d98d6SKent Overstreet 104dc9d98d6SKent Overstreet p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k)); 105dc9d98d6SKent Overstreet 106dc9d98d6SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) { 107dc9d98d6SKent Overstreet if (i) 108dc9d98d6SKent Overstreet p(", "); 109dc9d98d6SKent Overstreet 110dc9d98d6SKent Overstreet if (PTR_DEV(k, i) == PTR_CHECK_DEV) 111dc9d98d6SKent Overstreet p("check dev"); 112dc9d98d6SKent Overstreet else 113dc9d98d6SKent Overstreet p("%llu:%llu gen %llu", PTR_DEV(k, i), 114dc9d98d6SKent Overstreet PTR_OFFSET(k, i), PTR_GEN(k, i)); 115dc9d98d6SKent Overstreet } 116dc9d98d6SKent Overstreet 117dc9d98d6SKent Overstreet p("]"); 118dc9d98d6SKent Overstreet 119dc9d98d6SKent Overstreet if (KEY_DIRTY(k)) 120dc9d98d6SKent Overstreet p(" dirty"); 121dc9d98d6SKent Overstreet if (KEY_CSUM(k)) 122dc9d98d6SKent Overstreet p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]); 123dc9d98d6SKent Overstreet #undef p 124dc9d98d6SKent Overstreet } 125dc9d98d6SKent Overstreet 126dc9d98d6SKent Overstreet static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k) 127dc9d98d6SKent Overstreet { 128dc9d98d6SKent Overstreet struct btree *b = container_of(keys, struct btree, keys); 1296f10f7d1SColy Li unsigned int j; 130dc9d98d6SKent Overstreet char buf[80]; 131dc9d98d6SKent Overstreet 132dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k); 13346f5aa88SJoe Perches pr_cont(" %s", buf); 134dc9d98d6SKent Overstreet 135dc9d98d6SKent Overstreet for (j = 0; j < KEY_PTRS(k); j++) { 136dc9d98d6SKent Overstreet size_t n = PTR_BUCKET_NR(b->c, k, j); 137dc9d98d6SKent Overstreet 13846f5aa88SJoe Perches pr_cont(" bucket %zu", n); 139dc9d98d6SKent Overstreet if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) 14046f5aa88SJoe Perches pr_cont(" prio %i", 141dc9d98d6SKent Overstreet PTR_BUCKET(b->c, k, j)->prio); 142dc9d98d6SKent Overstreet } 143dc9d98d6SKent Overstreet 14446f5aa88SJoe Perches pr_cont(" %s\n", bch_ptr_status(b->c, k)); 145dc9d98d6SKent Overstreet } 146dc9d98d6SKent Overstreet 14765d45231SKent Overstreet /* Btree ptrs */ 14865d45231SKent Overstreet 14965d45231SKent Overstreet bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k) 15065d45231SKent Overstreet { 15165d45231SKent Overstreet char buf[80]; 15265d45231SKent Overstreet 15365d45231SKent Overstreet if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)) 15465d45231SKent Overstreet goto bad; 15565d45231SKent Overstreet 15665d45231SKent Overstreet if (__ptr_invalid(c, k)) 15765d45231SKent Overstreet goto bad; 15865d45231SKent Overstreet 15965d45231SKent Overstreet return false; 16065d45231SKent Overstreet bad: 161dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k); 16265d45231SKent Overstreet cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k)); 16365d45231SKent Overstreet return true; 16465d45231SKent Overstreet } 16565d45231SKent Overstreet 166a85e968eSKent Overstreet static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k) 16765d45231SKent Overstreet { 168a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys); 1691fae7cf0SColy Li 17065d45231SKent Overstreet return __bch_btree_ptr_invalid(b->c, k); 17165d45231SKent Overstreet } 17265d45231SKent Overstreet 17365d45231SKent Overstreet static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k) 17465d45231SKent Overstreet { 1756f10f7d1SColy Li unsigned int i; 17665d45231SKent Overstreet char buf[80]; 17765d45231SKent Overstreet struct bucket *g; 17865d45231SKent Overstreet 17965d45231SKent Overstreet if (mutex_trylock(&b->c->bucket_lock)) { 18065d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) 18165d45231SKent Overstreet if (ptr_available(b->c, k, i)) { 18265d45231SKent Overstreet g = PTR_BUCKET(b->c, k, i); 18365d45231SKent Overstreet 18465d45231SKent Overstreet if (KEY_DIRTY(k) || 18565d45231SKent Overstreet g->prio != BTREE_PRIO || 18665d45231SKent Overstreet (b->c->gc_mark_valid && 18765d45231SKent Overstreet GC_MARK(g) != GC_MARK_METADATA)) 18865d45231SKent Overstreet goto err; 18965d45231SKent Overstreet } 19065d45231SKent Overstreet 19165d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock); 19265d45231SKent Overstreet } 19365d45231SKent Overstreet 19465d45231SKent Overstreet return false; 19565d45231SKent Overstreet err: 19665d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock); 197dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k); 19865d45231SKent Overstreet btree_bug(b, 1993a2fd9d5SKent Overstreet "inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu", 20065d45231SKent Overstreet buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), 2013a2fd9d5SKent Overstreet g->prio, g->gen, g->last_gc, GC_MARK(g)); 20265d45231SKent Overstreet return true; 20365d45231SKent Overstreet } 20465d45231SKent Overstreet 205a85e968eSKent Overstreet static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k) 20665d45231SKent Overstreet { 207a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys); 2086f10f7d1SColy Li unsigned int i; 20965d45231SKent Overstreet 21065d45231SKent Overstreet if (!bkey_cmp(k, &ZERO_KEY) || 21165d45231SKent Overstreet !KEY_PTRS(k) || 212a85e968eSKent Overstreet bch_ptr_invalid(bk, k)) 21365d45231SKent Overstreet return true; 21465d45231SKent Overstreet 21565d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) 21665d45231SKent Overstreet if (!ptr_available(b->c, k, i) || 21765d45231SKent Overstreet ptr_stale(b->c, k, i)) 21865d45231SKent Overstreet return true; 21965d45231SKent Overstreet 22065d45231SKent Overstreet if (expensive_debug_checks(b->c) && 22165d45231SKent Overstreet btree_ptr_bad_expensive(b, k)) 22265d45231SKent Overstreet return true; 22365d45231SKent Overstreet 22465d45231SKent Overstreet return false; 22565d45231SKent Overstreet } 22665d45231SKent Overstreet 227829a60b9SKent Overstreet static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, 228829a60b9SKent Overstreet struct bkey *insert, 229829a60b9SKent Overstreet struct btree_iter *iter, 230829a60b9SKent Overstreet struct bkey *replace_key) 231829a60b9SKent Overstreet { 232829a60b9SKent Overstreet struct btree *b = container_of(bk, struct btree, keys); 233829a60b9SKent Overstreet 234829a60b9SKent Overstreet if (!KEY_OFFSET(insert)) 235829a60b9SKent Overstreet btree_current_write(b)->prio_blocked++; 236829a60b9SKent Overstreet 237829a60b9SKent Overstreet return false; 238829a60b9SKent Overstreet } 239829a60b9SKent Overstreet 24065d45231SKent Overstreet const struct btree_keys_ops bch_btree_keys_ops = { 24165d45231SKent Overstreet .sort_cmp = bch_key_sort_cmp, 242829a60b9SKent Overstreet .insert_fixup = bch_btree_ptr_insert_fixup, 24365d45231SKent Overstreet .key_invalid = bch_btree_ptr_invalid, 24465d45231SKent Overstreet .key_bad = bch_btree_ptr_bad, 245dc9d98d6SKent Overstreet .key_to_text = bch_extent_to_text, 246dc9d98d6SKent Overstreet .key_dump = bch_bkey_dump, 24765d45231SKent Overstreet }; 24865d45231SKent Overstreet 24965d45231SKent Overstreet /* Extents */ 25065d45231SKent Overstreet 25165d45231SKent Overstreet /* 25265d45231SKent Overstreet * Returns true if l > r - unless l == r, in which case returns true if l is 25365d45231SKent Overstreet * older than r. 25465d45231SKent Overstreet * 25565d45231SKent Overstreet * Necessary for btree_sort_fixup() - if there are multiple keys that compare 25665d45231SKent Overstreet * equal in different sets, we have to process them newest to oldest. 25765d45231SKent Overstreet */ 25865d45231SKent Overstreet static bool bch_extent_sort_cmp(struct btree_iter_set l, 25965d45231SKent Overstreet struct btree_iter_set r) 26065d45231SKent Overstreet { 26165d45231SKent Overstreet int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); 26265d45231SKent Overstreet 26365d45231SKent Overstreet return c ? c > 0 : l.k < r.k; 26465d45231SKent Overstreet } 26565d45231SKent Overstreet 26665d45231SKent Overstreet static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, 26765d45231SKent Overstreet struct bkey *tmp) 26865d45231SKent Overstreet { 26965d45231SKent Overstreet while (iter->used > 1) { 27065d45231SKent Overstreet struct btree_iter_set *top = iter->data, *i = top + 1; 27165d45231SKent Overstreet 27265d45231SKent Overstreet if (iter->used > 2 && 27365d45231SKent Overstreet bch_extent_sort_cmp(i[0], i[1])) 27465d45231SKent Overstreet i++; 27565d45231SKent Overstreet 27665d45231SKent Overstreet if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) 27765d45231SKent Overstreet break; 27865d45231SKent Overstreet 27965d45231SKent Overstreet if (!KEY_SIZE(i->k)) { 28065d45231SKent Overstreet sort_key_next(iter, i); 28165d45231SKent Overstreet heap_sift(iter, i - top, bch_extent_sort_cmp); 28265d45231SKent Overstreet continue; 28365d45231SKent Overstreet } 28465d45231SKent Overstreet 28565d45231SKent Overstreet if (top->k > i->k) { 28665d45231SKent Overstreet if (bkey_cmp(top->k, i->k) >= 0) 28765d45231SKent Overstreet sort_key_next(iter, i); 28865d45231SKent Overstreet else 28965d45231SKent Overstreet bch_cut_front(top->k, i->k); 29065d45231SKent Overstreet 29165d45231SKent Overstreet heap_sift(iter, i - top, bch_extent_sort_cmp); 29265d45231SKent Overstreet } else { 29365d45231SKent Overstreet /* can't happen because of comparison func */ 29465d45231SKent Overstreet BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); 29565d45231SKent Overstreet 29665d45231SKent Overstreet if (bkey_cmp(i->k, top->k) < 0) { 29765d45231SKent Overstreet bkey_copy(tmp, top->k); 29865d45231SKent Overstreet 29965d45231SKent Overstreet bch_cut_back(&START_KEY(i->k), tmp); 30065d45231SKent Overstreet bch_cut_front(i->k, top->k); 30165d45231SKent Overstreet heap_sift(iter, 0, bch_extent_sort_cmp); 30265d45231SKent Overstreet 30365d45231SKent Overstreet return tmp; 30465d45231SKent Overstreet } else { 30565d45231SKent Overstreet bch_cut_back(&START_KEY(i->k), top->k); 30665d45231SKent Overstreet } 30765d45231SKent Overstreet } 30865d45231SKent Overstreet } 30965d45231SKent Overstreet 31065d45231SKent Overstreet return NULL; 31165d45231SKent Overstreet } 31265d45231SKent Overstreet 313cb851149SJohn Sheu static void bch_subtract_dirty(struct bkey *k, 314cb851149SJohn Sheu struct cache_set *c, 315cb851149SJohn Sheu uint64_t offset, 316cb851149SJohn Sheu int sectors) 317cb851149SJohn Sheu { 318cb851149SJohn Sheu if (KEY_DIRTY(k)) 319cb851149SJohn Sheu bcache_dev_sectors_dirty_add(c, KEY_INODE(k), 320cb851149SJohn Sheu offset, -sectors); 321cb851149SJohn Sheu } 322cb851149SJohn Sheu 323829a60b9SKent Overstreet static bool bch_extent_insert_fixup(struct btree_keys *b, 324829a60b9SKent Overstreet struct bkey *insert, 325829a60b9SKent Overstreet struct btree_iter *iter, 326829a60b9SKent Overstreet struct bkey *replace_key) 327829a60b9SKent Overstreet { 328829a60b9SKent Overstreet struct cache_set *c = container_of(b, struct btree, keys)->c; 329829a60b9SKent Overstreet 330829a60b9SKent Overstreet uint64_t old_offset; 3316f10f7d1SColy Li unsigned int old_size, sectors_found = 0; 332829a60b9SKent Overstreet 333829a60b9SKent Overstreet BUG_ON(!KEY_OFFSET(insert)); 334829a60b9SKent Overstreet BUG_ON(!KEY_SIZE(insert)); 335829a60b9SKent Overstreet 336829a60b9SKent Overstreet while (1) { 337829a60b9SKent Overstreet struct bkey *k = bch_btree_iter_next(iter); 3381fae7cf0SColy Li 339829a60b9SKent Overstreet if (!k) 340829a60b9SKent Overstreet break; 341829a60b9SKent Overstreet 342829a60b9SKent Overstreet if (bkey_cmp(&START_KEY(k), insert) >= 0) { 343829a60b9SKent Overstreet if (KEY_SIZE(k)) 344829a60b9SKent Overstreet break; 345829a60b9SKent Overstreet else 346829a60b9SKent Overstreet continue; 347829a60b9SKent Overstreet } 348829a60b9SKent Overstreet 349829a60b9SKent Overstreet if (bkey_cmp(k, &START_KEY(insert)) <= 0) 350829a60b9SKent Overstreet continue; 351829a60b9SKent Overstreet 352829a60b9SKent Overstreet old_offset = KEY_START(k); 353829a60b9SKent Overstreet old_size = KEY_SIZE(k); 354829a60b9SKent Overstreet 355829a60b9SKent Overstreet /* 356829a60b9SKent Overstreet * We might overlap with 0 size extents; we can't skip these 357829a60b9SKent Overstreet * because if they're in the set we're inserting to we have to 358829a60b9SKent Overstreet * adjust them so they don't overlap with the key we're 359829a60b9SKent Overstreet * inserting. But we don't want to check them for replace 360829a60b9SKent Overstreet * operations. 361829a60b9SKent Overstreet */ 362829a60b9SKent Overstreet 363829a60b9SKent Overstreet if (replace_key && KEY_SIZE(k)) { 364829a60b9SKent Overstreet /* 365829a60b9SKent Overstreet * k might have been split since we inserted/found the 366829a60b9SKent Overstreet * key we're replacing 367829a60b9SKent Overstreet */ 3686f10f7d1SColy Li unsigned int i; 369829a60b9SKent Overstreet uint64_t offset = KEY_START(k) - 370829a60b9SKent Overstreet KEY_START(replace_key); 371829a60b9SKent Overstreet 372829a60b9SKent Overstreet /* But it must be a subset of the replace key */ 373829a60b9SKent Overstreet if (KEY_START(k) < KEY_START(replace_key) || 374829a60b9SKent Overstreet KEY_OFFSET(k) > KEY_OFFSET(replace_key)) 375829a60b9SKent Overstreet goto check_failed; 376829a60b9SKent Overstreet 377829a60b9SKent Overstreet /* We didn't find a key that we were supposed to */ 378829a60b9SKent Overstreet if (KEY_START(k) > KEY_START(insert) + sectors_found) 379829a60b9SKent Overstreet goto check_failed; 380829a60b9SKent Overstreet 3813bdad1e4SNicholas Swenson if (!bch_bkey_equal_header(k, replace_key)) 382829a60b9SKent Overstreet goto check_failed; 383829a60b9SKent Overstreet 384829a60b9SKent Overstreet /* skip past gen */ 385829a60b9SKent Overstreet offset <<= 8; 386829a60b9SKent Overstreet 387829a60b9SKent Overstreet BUG_ON(!KEY_PTRS(replace_key)); 388829a60b9SKent Overstreet 389829a60b9SKent Overstreet for (i = 0; i < KEY_PTRS(replace_key); i++) 390829a60b9SKent Overstreet if (k->ptr[i] != replace_key->ptr[i] + offset) 391829a60b9SKent Overstreet goto check_failed; 392829a60b9SKent Overstreet 393829a60b9SKent Overstreet sectors_found = KEY_OFFSET(k) - KEY_START(insert); 394829a60b9SKent Overstreet } 395829a60b9SKent Overstreet 396829a60b9SKent Overstreet if (bkey_cmp(insert, k) < 0 && 397829a60b9SKent Overstreet bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) { 398829a60b9SKent Overstreet /* 399829a60b9SKent Overstreet * We overlapped in the middle of an existing key: that 400829a60b9SKent Overstreet * means we have to split the old key. But we have to do 401829a60b9SKent Overstreet * slightly different things depending on whether the 402829a60b9SKent Overstreet * old key has been written out yet. 403829a60b9SKent Overstreet */ 404829a60b9SKent Overstreet 405829a60b9SKent Overstreet struct bkey *top; 406829a60b9SKent Overstreet 407cb851149SJohn Sheu bch_subtract_dirty(k, c, KEY_START(insert), 408cb851149SJohn Sheu KEY_SIZE(insert)); 409829a60b9SKent Overstreet 410829a60b9SKent Overstreet if (bkey_written(b, k)) { 411829a60b9SKent Overstreet /* 412829a60b9SKent Overstreet * We insert a new key to cover the top of the 413829a60b9SKent Overstreet * old key, and the old key is modified in place 414829a60b9SKent Overstreet * to represent the bottom split. 415829a60b9SKent Overstreet * 416829a60b9SKent Overstreet * It's completely arbitrary whether the new key 417829a60b9SKent Overstreet * is the top or the bottom, but it has to match 418829a60b9SKent Overstreet * up with what btree_sort_fixup() does - it 419829a60b9SKent Overstreet * doesn't check for this kind of overlap, it 420829a60b9SKent Overstreet * depends on us inserting a new key for the top 421829a60b9SKent Overstreet * here. 422829a60b9SKent Overstreet */ 423829a60b9SKent Overstreet top = bch_bset_search(b, bset_tree_last(b), 424829a60b9SKent Overstreet insert); 425829a60b9SKent Overstreet bch_bset_insert(b, top, k); 426829a60b9SKent Overstreet } else { 427829a60b9SKent Overstreet BKEY_PADDED(key) temp; 428829a60b9SKent Overstreet bkey_copy(&temp.key, k); 429829a60b9SKent Overstreet bch_bset_insert(b, k, &temp.key); 430829a60b9SKent Overstreet top = bkey_next(k); 431829a60b9SKent Overstreet } 432829a60b9SKent Overstreet 433829a60b9SKent Overstreet bch_cut_front(insert, top); 434829a60b9SKent Overstreet bch_cut_back(&START_KEY(insert), k); 435829a60b9SKent Overstreet bch_bset_fix_invalidated_key(b, k); 436829a60b9SKent Overstreet goto out; 437829a60b9SKent Overstreet } 438829a60b9SKent Overstreet 439829a60b9SKent Overstreet if (bkey_cmp(insert, k) < 0) { 440829a60b9SKent Overstreet bch_cut_front(insert, k); 441829a60b9SKent Overstreet } else { 442829a60b9SKent Overstreet if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) 443829a60b9SKent Overstreet old_offset = KEY_START(insert); 444829a60b9SKent Overstreet 445829a60b9SKent Overstreet if (bkey_written(b, k) && 446829a60b9SKent Overstreet bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) { 447829a60b9SKent Overstreet /* 448829a60b9SKent Overstreet * Completely overwrote, so we don't have to 449829a60b9SKent Overstreet * invalidate the binary search tree 450829a60b9SKent Overstreet */ 451829a60b9SKent Overstreet bch_cut_front(k, k); 452829a60b9SKent Overstreet } else { 453829a60b9SKent Overstreet __bch_cut_back(&START_KEY(insert), k); 454829a60b9SKent Overstreet bch_bset_fix_invalidated_key(b, k); 455829a60b9SKent Overstreet } 456829a60b9SKent Overstreet } 457829a60b9SKent Overstreet 458cb851149SJohn Sheu bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k)); 459829a60b9SKent Overstreet } 460829a60b9SKent Overstreet 461829a60b9SKent Overstreet check_failed: 462829a60b9SKent Overstreet if (replace_key) { 463829a60b9SKent Overstreet if (!sectors_found) { 464829a60b9SKent Overstreet return true; 465829a60b9SKent Overstreet } else if (sectors_found < KEY_SIZE(insert)) { 466829a60b9SKent Overstreet SET_KEY_OFFSET(insert, KEY_OFFSET(insert) - 467829a60b9SKent Overstreet (KEY_SIZE(insert) - sectors_found)); 468829a60b9SKent Overstreet SET_KEY_SIZE(insert, sectors_found); 469829a60b9SKent Overstreet } 470829a60b9SKent Overstreet } 471829a60b9SKent Overstreet out: 472829a60b9SKent Overstreet if (KEY_DIRTY(insert)) 473829a60b9SKent Overstreet bcache_dev_sectors_dirty_add(c, KEY_INODE(insert), 474829a60b9SKent Overstreet KEY_START(insert), 475829a60b9SKent Overstreet KEY_SIZE(insert)); 476829a60b9SKent Overstreet 477829a60b9SKent Overstreet return false; 478829a60b9SKent Overstreet } 479829a60b9SKent Overstreet 4809aa61a99SKent Overstreet bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k) 48165d45231SKent Overstreet { 48265d45231SKent Overstreet char buf[80]; 48365d45231SKent Overstreet 48465d45231SKent Overstreet if (!KEY_SIZE(k)) 48565d45231SKent Overstreet return true; 48665d45231SKent Overstreet 48765d45231SKent Overstreet if (KEY_SIZE(k) > KEY_OFFSET(k)) 48865d45231SKent Overstreet goto bad; 48965d45231SKent Overstreet 4909aa61a99SKent Overstreet if (__ptr_invalid(c, k)) 49165d45231SKent Overstreet goto bad; 49265d45231SKent Overstreet 49365d45231SKent Overstreet return false; 49465d45231SKent Overstreet bad: 495dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k); 4969aa61a99SKent Overstreet cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k)); 49765d45231SKent Overstreet return true; 49865d45231SKent Overstreet } 49965d45231SKent Overstreet 5009aa61a99SKent Overstreet static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) 5019aa61a99SKent Overstreet { 5029aa61a99SKent Overstreet struct btree *b = container_of(bk, struct btree, keys); 5031fae7cf0SColy Li 5049aa61a99SKent Overstreet return __bch_extent_invalid(b->c, k); 5059aa61a99SKent Overstreet } 5069aa61a99SKent Overstreet 50765d45231SKent Overstreet static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, 5086f10f7d1SColy Li unsigned int ptr) 50965d45231SKent Overstreet { 51065d45231SKent Overstreet struct bucket *g = PTR_BUCKET(b->c, k, ptr); 51165d45231SKent Overstreet char buf[80]; 51265d45231SKent Overstreet 51365d45231SKent Overstreet if (mutex_trylock(&b->c->bucket_lock)) { 51465d45231SKent Overstreet if (b->c->gc_mark_valid && 5154fe6a816SKent Overstreet (!GC_MARK(g) || 5164fe6a816SKent Overstreet GC_MARK(g) == GC_MARK_METADATA || 5174fe6a816SKent Overstreet (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k)))) 51865d45231SKent Overstreet goto err; 51965d45231SKent Overstreet 52065d45231SKent Overstreet if (g->prio == BTREE_PRIO) 52165d45231SKent Overstreet goto err; 52265d45231SKent Overstreet 52365d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock); 52465d45231SKent Overstreet } 52565d45231SKent Overstreet 52665d45231SKent Overstreet return false; 52765d45231SKent Overstreet err: 52865d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock); 529dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k); 53065d45231SKent Overstreet btree_bug(b, 5313a2fd9d5SKent Overstreet "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu", 53265d45231SKent Overstreet buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), 5333a2fd9d5SKent Overstreet g->prio, g->gen, g->last_gc, GC_MARK(g)); 53465d45231SKent Overstreet return true; 53565d45231SKent Overstreet } 53665d45231SKent Overstreet 537a85e968eSKent Overstreet static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) 53865d45231SKent Overstreet { 539a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys); 5406f10f7d1SColy Li unsigned int i, stale; 54158ac3230STang Junhui char buf[80]; 54265d45231SKent Overstreet 54365d45231SKent Overstreet if (!KEY_PTRS(k) || 544a85e968eSKent Overstreet bch_extent_invalid(bk, k)) 54565d45231SKent Overstreet return true; 54665d45231SKent Overstreet 54765d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) 54865d45231SKent Overstreet if (!ptr_available(b->c, k, i)) 54965d45231SKent Overstreet return true; 55065d45231SKent Overstreet 55165d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) { 55265d45231SKent Overstreet stale = ptr_stale(b->c, k, i); 55365d45231SKent Overstreet 55458ac3230STang Junhui if (stale && KEY_DIRTY(k)) { 55558ac3230STang Junhui bch_extent_to_text(buf, sizeof(buf), k); 55646f5aa88SJoe Perches pr_info("stale dirty pointer, stale %u, key: %s\n", 55758ac3230STang Junhui stale, buf); 55858ac3230STang Junhui } 55958ac3230STang Junhui 560149d0efaSColy Li btree_bug_on(stale > BUCKET_GC_GEN_MAX, b, 56165d45231SKent Overstreet "key too stale: %i, need_gc %u", 56265d45231SKent Overstreet stale, b->c->need_gc); 56365d45231SKent Overstreet 56465d45231SKent Overstreet if (stale) 56565d45231SKent Overstreet return true; 56665d45231SKent Overstreet 56765d45231SKent Overstreet if (expensive_debug_checks(b->c) && 56865d45231SKent Overstreet bch_extent_bad_expensive(b, k, i)) 56965d45231SKent Overstreet return true; 57065d45231SKent Overstreet } 57165d45231SKent Overstreet 57265d45231SKent Overstreet return false; 57365d45231SKent Overstreet } 57465d45231SKent Overstreet 57565d45231SKent Overstreet static uint64_t merge_chksums(struct bkey *l, struct bkey *r) 57665d45231SKent Overstreet { 57765d45231SKent Overstreet return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & 57865d45231SKent Overstreet ~((uint64_t)1 << 63); 57965d45231SKent Overstreet } 58065d45231SKent Overstreet 581b0d30981SColy Li static bool bch_extent_merge(struct btree_keys *bk, 582b0d30981SColy Li struct bkey *l, 583b0d30981SColy Li struct bkey *r) 58465d45231SKent Overstreet { 585a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys); 5866f10f7d1SColy Li unsigned int i; 58765d45231SKent Overstreet 58865d45231SKent Overstreet if (key_merging_disabled(b->c)) 58965d45231SKent Overstreet return false; 59065d45231SKent Overstreet 59165d45231SKent Overstreet for (i = 0; i < KEY_PTRS(l); i++) 592cf33c1eeSHuacai Chen if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || 59365d45231SKent Overstreet PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) 59465d45231SKent Overstreet return false; 59565d45231SKent Overstreet 59665d45231SKent Overstreet /* Keys with no pointers aren't restricted to one bucket and could 59765d45231SKent Overstreet * overflow KEY_SIZE 59865d45231SKent Overstreet */ 59965d45231SKent Overstreet if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { 60065d45231SKent Overstreet SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); 60165d45231SKent Overstreet SET_KEY_SIZE(l, USHRT_MAX); 60265d45231SKent Overstreet 60365d45231SKent Overstreet bch_cut_front(l, r); 60465d45231SKent Overstreet return false; 60565d45231SKent Overstreet } 60665d45231SKent Overstreet 60765d45231SKent Overstreet if (KEY_CSUM(l)) { 60865d45231SKent Overstreet if (KEY_CSUM(r)) 60965d45231SKent Overstreet l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); 61065d45231SKent Overstreet else 61165d45231SKent Overstreet SET_KEY_CSUM(l, 0); 61265d45231SKent Overstreet } 61365d45231SKent Overstreet 61465d45231SKent Overstreet SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); 61565d45231SKent Overstreet SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); 61665d45231SKent Overstreet 61765d45231SKent Overstreet return true; 61865d45231SKent Overstreet } 61965d45231SKent Overstreet 62065d45231SKent Overstreet const struct btree_keys_ops bch_extent_keys_ops = { 62165d45231SKent Overstreet .sort_cmp = bch_extent_sort_cmp, 62265d45231SKent Overstreet .sort_fixup = bch_extent_sort_fixup, 623829a60b9SKent Overstreet .insert_fixup = bch_extent_insert_fixup, 62465d45231SKent Overstreet .key_invalid = bch_extent_invalid, 62565d45231SKent Overstreet .key_bad = bch_extent_bad, 62665d45231SKent Overstreet .key_merge = bch_extent_merge, 627dc9d98d6SKent Overstreet .key_to_text = bch_extent_to_text, 628dc9d98d6SKent Overstreet .key_dump = bch_bkey_dump, 62965d45231SKent Overstreet .is_extents = true, 63065d45231SKent Overstreet }; 631