1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
265d45231SKent Overstreet /*
365d45231SKent Overstreet * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
465d45231SKent Overstreet *
565d45231SKent Overstreet * Uses a block device as cache for other block devices; optimized for SSDs.
665d45231SKent Overstreet * All allocation is done in buckets, which should match the erase block size
765d45231SKent Overstreet * of the device.
865d45231SKent Overstreet *
965d45231SKent Overstreet * Buckets containing cached data are kept on a heap sorted by priority;
1065d45231SKent Overstreet * bucket priority is increased on cache hit, and periodically all the buckets
1165d45231SKent Overstreet * on the heap have their priority scaled down. This currently is just used as
1265d45231SKent Overstreet * an LRU but in the future should allow for more intelligent heuristics.
1365d45231SKent Overstreet *
1465d45231SKent Overstreet * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
1565d45231SKent Overstreet * counter. Garbage collection is used to remove stale pointers.
1665d45231SKent Overstreet *
1765d45231SKent Overstreet * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
1865d45231SKent Overstreet * as keys are inserted we only sort the pages that have not yet been written.
1965d45231SKent Overstreet * When garbage collection is run, we resort the entire node.
2065d45231SKent Overstreet *
215fb94e9cSMauro Carvalho Chehab * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst.
2265d45231SKent Overstreet */
2365d45231SKent Overstreet
2465d45231SKent Overstreet #include "bcache.h"
2565d45231SKent Overstreet #include "btree.h"
2665d45231SKent Overstreet #include "debug.h"
2765d45231SKent Overstreet #include "extents.h"
2865d45231SKent Overstreet #include "writeback.h"
2965d45231SKent Overstreet
sort_key_next(struct btree_iter * iter,struct btree_iter_set * i)3065d45231SKent Overstreet static void sort_key_next(struct btree_iter *iter,
3165d45231SKent Overstreet struct btree_iter_set *i)
3265d45231SKent Overstreet {
3365d45231SKent Overstreet i->k = bkey_next(i->k);
3465d45231SKent Overstreet
3565d45231SKent Overstreet if (i->k == i->end)
3665d45231SKent Overstreet *i = iter->data[--iter->used];
3765d45231SKent Overstreet }
3865d45231SKent Overstreet
bch_key_sort_cmp(struct btree_iter_set l,struct btree_iter_set r)3965d45231SKent Overstreet static bool bch_key_sort_cmp(struct btree_iter_set l,
4065d45231SKent Overstreet struct btree_iter_set r)
4165d45231SKent Overstreet {
4265d45231SKent Overstreet int64_t c = bkey_cmp(l.k, r.k);
4365d45231SKent Overstreet
4465d45231SKent Overstreet return c ? c > 0 : l.k < r.k;
4565d45231SKent Overstreet }
4665d45231SKent Overstreet
__ptr_invalid(struct cache_set * c,const struct bkey * k)4765d45231SKent Overstreet static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
4865d45231SKent Overstreet {
496f10f7d1SColy Li unsigned int i;
5065d45231SKent Overstreet
5165d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++)
5265d45231SKent Overstreet if (ptr_available(c, k, i)) {
53*11e9560eSChristoph Hellwig struct cache *ca = c->cache;
5465d45231SKent Overstreet size_t bucket = PTR_BUCKET_NR(c, k, i);
5565d45231SKent Overstreet size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
5665d45231SKent Overstreet
574a784266SColy Li if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
5865d45231SKent Overstreet bucket < ca->sb.first_bucket ||
5965d45231SKent Overstreet bucket >= ca->sb.nbuckets)
6065d45231SKent Overstreet return true;
6165d45231SKent Overstreet }
6265d45231SKent Overstreet
6365d45231SKent Overstreet return false;
6465d45231SKent Overstreet }
6565d45231SKent Overstreet
66dc9d98d6SKent Overstreet /* Common among btree and extent ptrs */
67dc9d98d6SKent Overstreet
bch_ptr_status(struct cache_set * c,const struct bkey * k)68dc9d98d6SKent Overstreet static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
69dc9d98d6SKent Overstreet {
706f10f7d1SColy Li unsigned int i;
71dc9d98d6SKent Overstreet
72dc9d98d6SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++)
73dc9d98d6SKent Overstreet if (ptr_available(c, k, i)) {
74*11e9560eSChristoph Hellwig struct cache *ca = c->cache;
75dc9d98d6SKent Overstreet size_t bucket = PTR_BUCKET_NR(c, k, i);
76dc9d98d6SKent Overstreet size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
77dc9d98d6SKent Overstreet
784a784266SColy Li if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
79dc9d98d6SKent Overstreet return "bad, length too big";
80dc9d98d6SKent Overstreet if (bucket < ca->sb.first_bucket)
81dc9d98d6SKent Overstreet return "bad, short offset";
82dc9d98d6SKent Overstreet if (bucket >= ca->sb.nbuckets)
83dc9d98d6SKent Overstreet return "bad, offset past end of device";
84dc9d98d6SKent Overstreet if (ptr_stale(c, k, i))
85dc9d98d6SKent Overstreet return "stale";
86dc9d98d6SKent Overstreet }
87dc9d98d6SKent Overstreet
88dc9d98d6SKent Overstreet if (!bkey_cmp(k, &ZERO_KEY))
89dc9d98d6SKent Overstreet return "bad, null key";
90dc9d98d6SKent Overstreet if (!KEY_PTRS(k))
91dc9d98d6SKent Overstreet return "bad, no pointers";
92dc9d98d6SKent Overstreet if (!KEY_SIZE(k))
93dc9d98d6SKent Overstreet return "zeroed key";
94dc9d98d6SKent Overstreet return "";
95dc9d98d6SKent Overstreet }
96dc9d98d6SKent Overstreet
bch_extent_to_text(char * buf,size_t size,const struct bkey * k)97dc9d98d6SKent Overstreet void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
98dc9d98d6SKent Overstreet {
996f10f7d1SColy Li unsigned int i = 0;
100dc9d98d6SKent Overstreet char *out = buf, *end = buf + size;
101dc9d98d6SKent Overstreet
102dc9d98d6SKent Overstreet #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
103dc9d98d6SKent Overstreet
104dc9d98d6SKent Overstreet p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
105dc9d98d6SKent Overstreet
106dc9d98d6SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) {
107dc9d98d6SKent Overstreet if (i)
108dc9d98d6SKent Overstreet p(", ");
109dc9d98d6SKent Overstreet
110dc9d98d6SKent Overstreet if (PTR_DEV(k, i) == PTR_CHECK_DEV)
111dc9d98d6SKent Overstreet p("check dev");
112dc9d98d6SKent Overstreet else
113dc9d98d6SKent Overstreet p("%llu:%llu gen %llu", PTR_DEV(k, i),
114dc9d98d6SKent Overstreet PTR_OFFSET(k, i), PTR_GEN(k, i));
115dc9d98d6SKent Overstreet }
116dc9d98d6SKent Overstreet
117dc9d98d6SKent Overstreet p("]");
118dc9d98d6SKent Overstreet
119dc9d98d6SKent Overstreet if (KEY_DIRTY(k))
120dc9d98d6SKent Overstreet p(" dirty");
121dc9d98d6SKent Overstreet if (KEY_CSUM(k))
122dc9d98d6SKent Overstreet p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
123dc9d98d6SKent Overstreet #undef p
124dc9d98d6SKent Overstreet }
125dc9d98d6SKent Overstreet
bch_bkey_dump(struct btree_keys * keys,const struct bkey * k)126dc9d98d6SKent Overstreet static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
127dc9d98d6SKent Overstreet {
128dc9d98d6SKent Overstreet struct btree *b = container_of(keys, struct btree, keys);
1296f10f7d1SColy Li unsigned int j;
130dc9d98d6SKent Overstreet char buf[80];
131dc9d98d6SKent Overstreet
132dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k);
13346f5aa88SJoe Perches pr_cont(" %s", buf);
134dc9d98d6SKent Overstreet
135dc9d98d6SKent Overstreet for (j = 0; j < KEY_PTRS(k); j++) {
136dc9d98d6SKent Overstreet size_t n = PTR_BUCKET_NR(b->c, k, j);
137dc9d98d6SKent Overstreet
13846f5aa88SJoe Perches pr_cont(" bucket %zu", n);
1394a784266SColy Li if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets)
14046f5aa88SJoe Perches pr_cont(" prio %i",
141dc9d98d6SKent Overstreet PTR_BUCKET(b->c, k, j)->prio);
142dc9d98d6SKent Overstreet }
143dc9d98d6SKent Overstreet
14446f5aa88SJoe Perches pr_cont(" %s\n", bch_ptr_status(b->c, k));
145dc9d98d6SKent Overstreet }
146dc9d98d6SKent Overstreet
14765d45231SKent Overstreet /* Btree ptrs */
14865d45231SKent Overstreet
__bch_btree_ptr_invalid(struct cache_set * c,const struct bkey * k)14965d45231SKent Overstreet bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
15065d45231SKent Overstreet {
15165d45231SKent Overstreet char buf[80];
15265d45231SKent Overstreet
15365d45231SKent Overstreet if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
15465d45231SKent Overstreet goto bad;
15565d45231SKent Overstreet
15665d45231SKent Overstreet if (__ptr_invalid(c, k))
15765d45231SKent Overstreet goto bad;
15865d45231SKent Overstreet
15965d45231SKent Overstreet return false;
16065d45231SKent Overstreet bad:
161dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k);
16265d45231SKent Overstreet cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
16365d45231SKent Overstreet return true;
16465d45231SKent Overstreet }
16565d45231SKent Overstreet
bch_btree_ptr_invalid(struct btree_keys * bk,const struct bkey * k)166a85e968eSKent Overstreet static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
16765d45231SKent Overstreet {
168a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys);
1691fae7cf0SColy Li
17065d45231SKent Overstreet return __bch_btree_ptr_invalid(b->c, k);
17165d45231SKent Overstreet }
17265d45231SKent Overstreet
btree_ptr_bad_expensive(struct btree * b,const struct bkey * k)17365d45231SKent Overstreet static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
17465d45231SKent Overstreet {
1756f10f7d1SColy Li unsigned int i;
17665d45231SKent Overstreet char buf[80];
17765d45231SKent Overstreet struct bucket *g;
17865d45231SKent Overstreet
17965d45231SKent Overstreet if (mutex_trylock(&b->c->bucket_lock)) {
18065d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++)
18165d45231SKent Overstreet if (ptr_available(b->c, k, i)) {
18265d45231SKent Overstreet g = PTR_BUCKET(b->c, k, i);
18365d45231SKent Overstreet
18465d45231SKent Overstreet if (KEY_DIRTY(k) ||
18565d45231SKent Overstreet g->prio != BTREE_PRIO ||
18665d45231SKent Overstreet (b->c->gc_mark_valid &&
18765d45231SKent Overstreet GC_MARK(g) != GC_MARK_METADATA))
18865d45231SKent Overstreet goto err;
18965d45231SKent Overstreet }
19065d45231SKent Overstreet
19165d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock);
19265d45231SKent Overstreet }
19365d45231SKent Overstreet
19465d45231SKent Overstreet return false;
19565d45231SKent Overstreet err:
19665d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock);
197dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k);
19865d45231SKent Overstreet btree_bug(b,
1993a2fd9d5SKent Overstreet "inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
20065d45231SKent Overstreet buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
2013a2fd9d5SKent Overstreet g->prio, g->gen, g->last_gc, GC_MARK(g));
20265d45231SKent Overstreet return true;
20365d45231SKent Overstreet }
20465d45231SKent Overstreet
bch_btree_ptr_bad(struct btree_keys * bk,const struct bkey * k)205a85e968eSKent Overstreet static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
20665d45231SKent Overstreet {
207a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys);
2086f10f7d1SColy Li unsigned int i;
20965d45231SKent Overstreet
21065d45231SKent Overstreet if (!bkey_cmp(k, &ZERO_KEY) ||
21165d45231SKent Overstreet !KEY_PTRS(k) ||
212a85e968eSKent Overstreet bch_ptr_invalid(bk, k))
21365d45231SKent Overstreet return true;
21465d45231SKent Overstreet
21565d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++)
21665d45231SKent Overstreet if (!ptr_available(b->c, k, i) ||
21765d45231SKent Overstreet ptr_stale(b->c, k, i))
21865d45231SKent Overstreet return true;
21965d45231SKent Overstreet
22065d45231SKent Overstreet if (expensive_debug_checks(b->c) &&
22165d45231SKent Overstreet btree_ptr_bad_expensive(b, k))
22265d45231SKent Overstreet return true;
22365d45231SKent Overstreet
22465d45231SKent Overstreet return false;
22565d45231SKent Overstreet }
22665d45231SKent Overstreet
bch_btree_ptr_insert_fixup(struct btree_keys * bk,struct bkey * insert,struct btree_iter * iter,struct bkey * replace_key)227829a60b9SKent Overstreet static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
228829a60b9SKent Overstreet struct bkey *insert,
229829a60b9SKent Overstreet struct btree_iter *iter,
230829a60b9SKent Overstreet struct bkey *replace_key)
231829a60b9SKent Overstreet {
232829a60b9SKent Overstreet struct btree *b = container_of(bk, struct btree, keys);
233829a60b9SKent Overstreet
234829a60b9SKent Overstreet if (!KEY_OFFSET(insert))
235829a60b9SKent Overstreet btree_current_write(b)->prio_blocked++;
236829a60b9SKent Overstreet
237829a60b9SKent Overstreet return false;
238829a60b9SKent Overstreet }
239829a60b9SKent Overstreet
24065d45231SKent Overstreet const struct btree_keys_ops bch_btree_keys_ops = {
24165d45231SKent Overstreet .sort_cmp = bch_key_sort_cmp,
242829a60b9SKent Overstreet .insert_fixup = bch_btree_ptr_insert_fixup,
24365d45231SKent Overstreet .key_invalid = bch_btree_ptr_invalid,
24465d45231SKent Overstreet .key_bad = bch_btree_ptr_bad,
245dc9d98d6SKent Overstreet .key_to_text = bch_extent_to_text,
246dc9d98d6SKent Overstreet .key_dump = bch_bkey_dump,
24765d45231SKent Overstreet };
24865d45231SKent Overstreet
24965d45231SKent Overstreet /* Extents */
25065d45231SKent Overstreet
25165d45231SKent Overstreet /*
25265d45231SKent Overstreet * Returns true if l > r - unless l == r, in which case returns true if l is
25365d45231SKent Overstreet * older than r.
25465d45231SKent Overstreet *
25565d45231SKent Overstreet * Necessary for btree_sort_fixup() - if there are multiple keys that compare
25665d45231SKent Overstreet * equal in different sets, we have to process them newest to oldest.
25765d45231SKent Overstreet */
bch_extent_sort_cmp(struct btree_iter_set l,struct btree_iter_set r)25865d45231SKent Overstreet static bool bch_extent_sort_cmp(struct btree_iter_set l,
25965d45231SKent Overstreet struct btree_iter_set r)
26065d45231SKent Overstreet {
26165d45231SKent Overstreet int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
26265d45231SKent Overstreet
26365d45231SKent Overstreet return c ? c > 0 : l.k < r.k;
26465d45231SKent Overstreet }
26565d45231SKent Overstreet
bch_extent_sort_fixup(struct btree_iter * iter,struct bkey * tmp)26665d45231SKent Overstreet static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
26765d45231SKent Overstreet struct bkey *tmp)
26865d45231SKent Overstreet {
26965d45231SKent Overstreet while (iter->used > 1) {
27065d45231SKent Overstreet struct btree_iter_set *top = iter->data, *i = top + 1;
27165d45231SKent Overstreet
27265d45231SKent Overstreet if (iter->used > 2 &&
27365d45231SKent Overstreet bch_extent_sort_cmp(i[0], i[1]))
27465d45231SKent Overstreet i++;
27565d45231SKent Overstreet
27665d45231SKent Overstreet if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
27765d45231SKent Overstreet break;
27865d45231SKent Overstreet
27965d45231SKent Overstreet if (!KEY_SIZE(i->k)) {
28065d45231SKent Overstreet sort_key_next(iter, i);
28165d45231SKent Overstreet heap_sift(iter, i - top, bch_extent_sort_cmp);
28265d45231SKent Overstreet continue;
28365d45231SKent Overstreet }
28465d45231SKent Overstreet
28565d45231SKent Overstreet if (top->k > i->k) {
28665d45231SKent Overstreet if (bkey_cmp(top->k, i->k) >= 0)
28765d45231SKent Overstreet sort_key_next(iter, i);
28865d45231SKent Overstreet else
28965d45231SKent Overstreet bch_cut_front(top->k, i->k);
29065d45231SKent Overstreet
29165d45231SKent Overstreet heap_sift(iter, i - top, bch_extent_sort_cmp);
29265d45231SKent Overstreet } else {
29365d45231SKent Overstreet /* can't happen because of comparison func */
29465d45231SKent Overstreet BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
29565d45231SKent Overstreet
29665d45231SKent Overstreet if (bkey_cmp(i->k, top->k) < 0) {
29765d45231SKent Overstreet bkey_copy(tmp, top->k);
29865d45231SKent Overstreet
29965d45231SKent Overstreet bch_cut_back(&START_KEY(i->k), tmp);
30065d45231SKent Overstreet bch_cut_front(i->k, top->k);
30165d45231SKent Overstreet heap_sift(iter, 0, bch_extent_sort_cmp);
30265d45231SKent Overstreet
30365d45231SKent Overstreet return tmp;
30465d45231SKent Overstreet } else {
30565d45231SKent Overstreet bch_cut_back(&START_KEY(i->k), top->k);
30665d45231SKent Overstreet }
30765d45231SKent Overstreet }
30865d45231SKent Overstreet }
30965d45231SKent Overstreet
31065d45231SKent Overstreet return NULL;
31165d45231SKent Overstreet }
31265d45231SKent Overstreet
bch_subtract_dirty(struct bkey * k,struct cache_set * c,uint64_t offset,int sectors)313cb851149SJohn Sheu static void bch_subtract_dirty(struct bkey *k,
314cb851149SJohn Sheu struct cache_set *c,
315cb851149SJohn Sheu uint64_t offset,
316cb851149SJohn Sheu int sectors)
317cb851149SJohn Sheu {
318cb851149SJohn Sheu if (KEY_DIRTY(k))
319cb851149SJohn Sheu bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
320cb851149SJohn Sheu offset, -sectors);
321cb851149SJohn Sheu }
322cb851149SJohn Sheu
bch_extent_insert_fixup(struct btree_keys * b,struct bkey * insert,struct btree_iter * iter,struct bkey * replace_key)323829a60b9SKent Overstreet static bool bch_extent_insert_fixup(struct btree_keys *b,
324829a60b9SKent Overstreet struct bkey *insert,
325829a60b9SKent Overstreet struct btree_iter *iter,
326829a60b9SKent Overstreet struct bkey *replace_key)
327829a60b9SKent Overstreet {
328829a60b9SKent Overstreet struct cache_set *c = container_of(b, struct btree, keys)->c;
329829a60b9SKent Overstreet
330829a60b9SKent Overstreet uint64_t old_offset;
3316f10f7d1SColy Li unsigned int old_size, sectors_found = 0;
332829a60b9SKent Overstreet
333829a60b9SKent Overstreet BUG_ON(!KEY_OFFSET(insert));
334829a60b9SKent Overstreet BUG_ON(!KEY_SIZE(insert));
335829a60b9SKent Overstreet
336829a60b9SKent Overstreet while (1) {
337829a60b9SKent Overstreet struct bkey *k = bch_btree_iter_next(iter);
3381fae7cf0SColy Li
339829a60b9SKent Overstreet if (!k)
340829a60b9SKent Overstreet break;
341829a60b9SKent Overstreet
342829a60b9SKent Overstreet if (bkey_cmp(&START_KEY(k), insert) >= 0) {
343829a60b9SKent Overstreet if (KEY_SIZE(k))
344829a60b9SKent Overstreet break;
345829a60b9SKent Overstreet else
346829a60b9SKent Overstreet continue;
347829a60b9SKent Overstreet }
348829a60b9SKent Overstreet
349829a60b9SKent Overstreet if (bkey_cmp(k, &START_KEY(insert)) <= 0)
350829a60b9SKent Overstreet continue;
351829a60b9SKent Overstreet
352829a60b9SKent Overstreet old_offset = KEY_START(k);
353829a60b9SKent Overstreet old_size = KEY_SIZE(k);
354829a60b9SKent Overstreet
355829a60b9SKent Overstreet /*
356829a60b9SKent Overstreet * We might overlap with 0 size extents; we can't skip these
357829a60b9SKent Overstreet * because if they're in the set we're inserting to we have to
358829a60b9SKent Overstreet * adjust them so they don't overlap with the key we're
359829a60b9SKent Overstreet * inserting. But we don't want to check them for replace
360829a60b9SKent Overstreet * operations.
361829a60b9SKent Overstreet */
362829a60b9SKent Overstreet
363829a60b9SKent Overstreet if (replace_key && KEY_SIZE(k)) {
364829a60b9SKent Overstreet /*
365829a60b9SKent Overstreet * k might have been split since we inserted/found the
366829a60b9SKent Overstreet * key we're replacing
367829a60b9SKent Overstreet */
3686f10f7d1SColy Li unsigned int i;
369829a60b9SKent Overstreet uint64_t offset = KEY_START(k) -
370829a60b9SKent Overstreet KEY_START(replace_key);
371829a60b9SKent Overstreet
372829a60b9SKent Overstreet /* But it must be a subset of the replace key */
373829a60b9SKent Overstreet if (KEY_START(k) < KEY_START(replace_key) ||
374829a60b9SKent Overstreet KEY_OFFSET(k) > KEY_OFFSET(replace_key))
375829a60b9SKent Overstreet goto check_failed;
376829a60b9SKent Overstreet
377829a60b9SKent Overstreet /* We didn't find a key that we were supposed to */
378829a60b9SKent Overstreet if (KEY_START(k) > KEY_START(insert) + sectors_found)
379829a60b9SKent Overstreet goto check_failed;
380829a60b9SKent Overstreet
3813bdad1e4SNicholas Swenson if (!bch_bkey_equal_header(k, replace_key))
382829a60b9SKent Overstreet goto check_failed;
383829a60b9SKent Overstreet
384829a60b9SKent Overstreet /* skip past gen */
385829a60b9SKent Overstreet offset <<= 8;
386829a60b9SKent Overstreet
387829a60b9SKent Overstreet BUG_ON(!KEY_PTRS(replace_key));
388829a60b9SKent Overstreet
389829a60b9SKent Overstreet for (i = 0; i < KEY_PTRS(replace_key); i++)
390829a60b9SKent Overstreet if (k->ptr[i] != replace_key->ptr[i] + offset)
391829a60b9SKent Overstreet goto check_failed;
392829a60b9SKent Overstreet
393829a60b9SKent Overstreet sectors_found = KEY_OFFSET(k) - KEY_START(insert);
394829a60b9SKent Overstreet }
395829a60b9SKent Overstreet
396829a60b9SKent Overstreet if (bkey_cmp(insert, k) < 0 &&
397829a60b9SKent Overstreet bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
398829a60b9SKent Overstreet /*
399829a60b9SKent Overstreet * We overlapped in the middle of an existing key: that
400829a60b9SKent Overstreet * means we have to split the old key. But we have to do
401829a60b9SKent Overstreet * slightly different things depending on whether the
402829a60b9SKent Overstreet * old key has been written out yet.
403829a60b9SKent Overstreet */
404829a60b9SKent Overstreet
405829a60b9SKent Overstreet struct bkey *top;
406829a60b9SKent Overstreet
407cb851149SJohn Sheu bch_subtract_dirty(k, c, KEY_START(insert),
408cb851149SJohn Sheu KEY_SIZE(insert));
409829a60b9SKent Overstreet
410829a60b9SKent Overstreet if (bkey_written(b, k)) {
411829a60b9SKent Overstreet /*
412829a60b9SKent Overstreet * We insert a new key to cover the top of the
413829a60b9SKent Overstreet * old key, and the old key is modified in place
414829a60b9SKent Overstreet * to represent the bottom split.
415829a60b9SKent Overstreet *
416829a60b9SKent Overstreet * It's completely arbitrary whether the new key
417829a60b9SKent Overstreet * is the top or the bottom, but it has to match
418829a60b9SKent Overstreet * up with what btree_sort_fixup() does - it
419829a60b9SKent Overstreet * doesn't check for this kind of overlap, it
420829a60b9SKent Overstreet * depends on us inserting a new key for the top
421829a60b9SKent Overstreet * here.
422829a60b9SKent Overstreet */
423829a60b9SKent Overstreet top = bch_bset_search(b, bset_tree_last(b),
424829a60b9SKent Overstreet insert);
425829a60b9SKent Overstreet bch_bset_insert(b, top, k);
426829a60b9SKent Overstreet } else {
427829a60b9SKent Overstreet BKEY_PADDED(key) temp;
428829a60b9SKent Overstreet bkey_copy(&temp.key, k);
429829a60b9SKent Overstreet bch_bset_insert(b, k, &temp.key);
430829a60b9SKent Overstreet top = bkey_next(k);
431829a60b9SKent Overstreet }
432829a60b9SKent Overstreet
433829a60b9SKent Overstreet bch_cut_front(insert, top);
434829a60b9SKent Overstreet bch_cut_back(&START_KEY(insert), k);
435829a60b9SKent Overstreet bch_bset_fix_invalidated_key(b, k);
436829a60b9SKent Overstreet goto out;
437829a60b9SKent Overstreet }
438829a60b9SKent Overstreet
439829a60b9SKent Overstreet if (bkey_cmp(insert, k) < 0) {
440829a60b9SKent Overstreet bch_cut_front(insert, k);
441829a60b9SKent Overstreet } else {
442829a60b9SKent Overstreet if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
443829a60b9SKent Overstreet old_offset = KEY_START(insert);
444829a60b9SKent Overstreet
445829a60b9SKent Overstreet if (bkey_written(b, k) &&
446829a60b9SKent Overstreet bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
447829a60b9SKent Overstreet /*
448829a60b9SKent Overstreet * Completely overwrote, so we don't have to
449829a60b9SKent Overstreet * invalidate the binary search tree
450829a60b9SKent Overstreet */
451829a60b9SKent Overstreet bch_cut_front(k, k);
452829a60b9SKent Overstreet } else {
453829a60b9SKent Overstreet __bch_cut_back(&START_KEY(insert), k);
454829a60b9SKent Overstreet bch_bset_fix_invalidated_key(b, k);
455829a60b9SKent Overstreet }
456829a60b9SKent Overstreet }
457829a60b9SKent Overstreet
458cb851149SJohn Sheu bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
459829a60b9SKent Overstreet }
460829a60b9SKent Overstreet
461829a60b9SKent Overstreet check_failed:
462829a60b9SKent Overstreet if (replace_key) {
463829a60b9SKent Overstreet if (!sectors_found) {
464829a60b9SKent Overstreet return true;
465829a60b9SKent Overstreet } else if (sectors_found < KEY_SIZE(insert)) {
466829a60b9SKent Overstreet SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
467829a60b9SKent Overstreet (KEY_SIZE(insert) - sectors_found));
468829a60b9SKent Overstreet SET_KEY_SIZE(insert, sectors_found);
469829a60b9SKent Overstreet }
470829a60b9SKent Overstreet }
471829a60b9SKent Overstreet out:
472829a60b9SKent Overstreet if (KEY_DIRTY(insert))
473829a60b9SKent Overstreet bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
474829a60b9SKent Overstreet KEY_START(insert),
475829a60b9SKent Overstreet KEY_SIZE(insert));
476829a60b9SKent Overstreet
477829a60b9SKent Overstreet return false;
478829a60b9SKent Overstreet }
479829a60b9SKent Overstreet
__bch_extent_invalid(struct cache_set * c,const struct bkey * k)4809aa61a99SKent Overstreet bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
48165d45231SKent Overstreet {
48265d45231SKent Overstreet char buf[80];
48365d45231SKent Overstreet
48465d45231SKent Overstreet if (!KEY_SIZE(k))
48565d45231SKent Overstreet return true;
48665d45231SKent Overstreet
48765d45231SKent Overstreet if (KEY_SIZE(k) > KEY_OFFSET(k))
48865d45231SKent Overstreet goto bad;
48965d45231SKent Overstreet
4909aa61a99SKent Overstreet if (__ptr_invalid(c, k))
49165d45231SKent Overstreet goto bad;
49265d45231SKent Overstreet
49365d45231SKent Overstreet return false;
49465d45231SKent Overstreet bad:
495dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k);
4969aa61a99SKent Overstreet cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
49765d45231SKent Overstreet return true;
49865d45231SKent Overstreet }
49965d45231SKent Overstreet
bch_extent_invalid(struct btree_keys * bk,const struct bkey * k)5009aa61a99SKent Overstreet static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
5019aa61a99SKent Overstreet {
5029aa61a99SKent Overstreet struct btree *b = container_of(bk, struct btree, keys);
5031fae7cf0SColy Li
5049aa61a99SKent Overstreet return __bch_extent_invalid(b->c, k);
5059aa61a99SKent Overstreet }
5069aa61a99SKent Overstreet
bch_extent_bad_expensive(struct btree * b,const struct bkey * k,unsigned int ptr)50765d45231SKent Overstreet static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
5086f10f7d1SColy Li unsigned int ptr)
50965d45231SKent Overstreet {
51065d45231SKent Overstreet struct bucket *g = PTR_BUCKET(b->c, k, ptr);
51165d45231SKent Overstreet char buf[80];
51265d45231SKent Overstreet
51365d45231SKent Overstreet if (mutex_trylock(&b->c->bucket_lock)) {
51465d45231SKent Overstreet if (b->c->gc_mark_valid &&
5154fe6a816SKent Overstreet (!GC_MARK(g) ||
5164fe6a816SKent Overstreet GC_MARK(g) == GC_MARK_METADATA ||
5174fe6a816SKent Overstreet (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
51865d45231SKent Overstreet goto err;
51965d45231SKent Overstreet
52065d45231SKent Overstreet if (g->prio == BTREE_PRIO)
52165d45231SKent Overstreet goto err;
52265d45231SKent Overstreet
52365d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock);
52465d45231SKent Overstreet }
52565d45231SKent Overstreet
52665d45231SKent Overstreet return false;
52765d45231SKent Overstreet err:
52865d45231SKent Overstreet mutex_unlock(&b->c->bucket_lock);
529dc9d98d6SKent Overstreet bch_extent_to_text(buf, sizeof(buf), k);
53065d45231SKent Overstreet btree_bug(b,
5313a2fd9d5SKent Overstreet "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
53265d45231SKent Overstreet buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
5333a2fd9d5SKent Overstreet g->prio, g->gen, g->last_gc, GC_MARK(g));
53465d45231SKent Overstreet return true;
53565d45231SKent Overstreet }
53665d45231SKent Overstreet
bch_extent_bad(struct btree_keys * bk,const struct bkey * k)537a85e968eSKent Overstreet static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
53865d45231SKent Overstreet {
539a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys);
5406f10f7d1SColy Li unsigned int i, stale;
54158ac3230STang Junhui char buf[80];
54265d45231SKent Overstreet
54365d45231SKent Overstreet if (!KEY_PTRS(k) ||
544a85e968eSKent Overstreet bch_extent_invalid(bk, k))
54565d45231SKent Overstreet return true;
54665d45231SKent Overstreet
54765d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++)
54865d45231SKent Overstreet if (!ptr_available(b->c, k, i))
54965d45231SKent Overstreet return true;
55065d45231SKent Overstreet
55165d45231SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) {
55265d45231SKent Overstreet stale = ptr_stale(b->c, k, i);
55365d45231SKent Overstreet
55458ac3230STang Junhui if (stale && KEY_DIRTY(k)) {
55558ac3230STang Junhui bch_extent_to_text(buf, sizeof(buf), k);
55646f5aa88SJoe Perches pr_info("stale dirty pointer, stale %u, key: %s\n",
55758ac3230STang Junhui stale, buf);
55858ac3230STang Junhui }
55958ac3230STang Junhui
560149d0efaSColy Li btree_bug_on(stale > BUCKET_GC_GEN_MAX, b,
56165d45231SKent Overstreet "key too stale: %i, need_gc %u",
56265d45231SKent Overstreet stale, b->c->need_gc);
56365d45231SKent Overstreet
56465d45231SKent Overstreet if (stale)
56565d45231SKent Overstreet return true;
56665d45231SKent Overstreet
56765d45231SKent Overstreet if (expensive_debug_checks(b->c) &&
56865d45231SKent Overstreet bch_extent_bad_expensive(b, k, i))
56965d45231SKent Overstreet return true;
57065d45231SKent Overstreet }
57165d45231SKent Overstreet
57265d45231SKent Overstreet return false;
57365d45231SKent Overstreet }
57465d45231SKent Overstreet
merge_chksums(struct bkey * l,struct bkey * r)57565d45231SKent Overstreet static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
57665d45231SKent Overstreet {
57765d45231SKent Overstreet return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
57865d45231SKent Overstreet ~((uint64_t)1 << 63);
57965d45231SKent Overstreet }
58065d45231SKent Overstreet
bch_extent_merge(struct btree_keys * bk,struct bkey * l,struct bkey * r)581b0d30981SColy Li static bool bch_extent_merge(struct btree_keys *bk,
582b0d30981SColy Li struct bkey *l,
583b0d30981SColy Li struct bkey *r)
58465d45231SKent Overstreet {
585a85e968eSKent Overstreet struct btree *b = container_of(bk, struct btree, keys);
5866f10f7d1SColy Li unsigned int i;
58765d45231SKent Overstreet
58865d45231SKent Overstreet if (key_merging_disabled(b->c))
58965d45231SKent Overstreet return false;
59065d45231SKent Overstreet
59165d45231SKent Overstreet for (i = 0; i < KEY_PTRS(l); i++)
592cf33c1eeSHuacai Chen if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
59365d45231SKent Overstreet PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
59465d45231SKent Overstreet return false;
59565d45231SKent Overstreet
59665d45231SKent Overstreet /* Keys with no pointers aren't restricted to one bucket and could
59765d45231SKent Overstreet * overflow KEY_SIZE
59865d45231SKent Overstreet */
59965d45231SKent Overstreet if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
60065d45231SKent Overstreet SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
60165d45231SKent Overstreet SET_KEY_SIZE(l, USHRT_MAX);
60265d45231SKent Overstreet
60365d45231SKent Overstreet bch_cut_front(l, r);
60465d45231SKent Overstreet return false;
60565d45231SKent Overstreet }
60665d45231SKent Overstreet
60765d45231SKent Overstreet if (KEY_CSUM(l)) {
60865d45231SKent Overstreet if (KEY_CSUM(r))
60965d45231SKent Overstreet l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
61065d45231SKent Overstreet else
61165d45231SKent Overstreet SET_KEY_CSUM(l, 0);
61265d45231SKent Overstreet }
61365d45231SKent Overstreet
61465d45231SKent Overstreet SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
61565d45231SKent Overstreet SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
61665d45231SKent Overstreet
61765d45231SKent Overstreet return true;
61865d45231SKent Overstreet }
61965d45231SKent Overstreet
62065d45231SKent Overstreet const struct btree_keys_ops bch_extent_keys_ops = {
62165d45231SKent Overstreet .sort_cmp = bch_extent_sort_cmp,
62265d45231SKent Overstreet .sort_fixup = bch_extent_sort_fixup,
623829a60b9SKent Overstreet .insert_fixup = bch_extent_insert_fixup,
62465d45231SKent Overstreet .key_invalid = bch_extent_invalid,
62565d45231SKent Overstreet .key_bad = bch_extent_bad,
62665d45231SKent Overstreet .key_merge = bch_extent_merge,
627dc9d98d6SKent Overstreet .key_to_text = bch_extent_to_text,
628dc9d98d6SKent Overstreet .key_dump = bch_bkey_dump,
62965d45231SKent Overstreet .is_extents = true,
63065d45231SKent Overstreet };
631