xref: /openbmc/linux/drivers/md/bcache/extents.c (revision 11e9560e)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
265d45231SKent Overstreet /*
365d45231SKent Overstreet  * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
465d45231SKent Overstreet  *
565d45231SKent Overstreet  * Uses a block device as cache for other block devices; optimized for SSDs.
665d45231SKent Overstreet  * All allocation is done in buckets, which should match the erase block size
765d45231SKent Overstreet  * of the device.
865d45231SKent Overstreet  *
965d45231SKent Overstreet  * Buckets containing cached data are kept on a heap sorted by priority;
1065d45231SKent Overstreet  * bucket priority is increased on cache hit, and periodically all the buckets
1165d45231SKent Overstreet  * on the heap have their priority scaled down. This currently is just used as
1265d45231SKent Overstreet  * an LRU but in the future should allow for more intelligent heuristics.
1365d45231SKent Overstreet  *
1465d45231SKent Overstreet  * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
1565d45231SKent Overstreet  * counter. Garbage collection is used to remove stale pointers.
1665d45231SKent Overstreet  *
1765d45231SKent Overstreet  * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
1865d45231SKent Overstreet  * as keys are inserted we only sort the pages that have not yet been written.
1965d45231SKent Overstreet  * When garbage collection is run, we resort the entire node.
2065d45231SKent Overstreet  *
215fb94e9cSMauro Carvalho Chehab  * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst.
2265d45231SKent Overstreet  */
2365d45231SKent Overstreet 
2465d45231SKent Overstreet #include "bcache.h"
2565d45231SKent Overstreet #include "btree.h"
2665d45231SKent Overstreet #include "debug.h"
2765d45231SKent Overstreet #include "extents.h"
2865d45231SKent Overstreet #include "writeback.h"
2965d45231SKent Overstreet 
sort_key_next(struct btree_iter * iter,struct btree_iter_set * i)3065d45231SKent Overstreet static void sort_key_next(struct btree_iter *iter,
3165d45231SKent Overstreet 			  struct btree_iter_set *i)
3265d45231SKent Overstreet {
3365d45231SKent Overstreet 	i->k = bkey_next(i->k);
3465d45231SKent Overstreet 
3565d45231SKent Overstreet 	if (i->k == i->end)
3665d45231SKent Overstreet 		*i = iter->data[--iter->used];
3765d45231SKent Overstreet }
3865d45231SKent Overstreet 
bch_key_sort_cmp(struct btree_iter_set l,struct btree_iter_set r)3965d45231SKent Overstreet static bool bch_key_sort_cmp(struct btree_iter_set l,
4065d45231SKent Overstreet 			     struct btree_iter_set r)
4165d45231SKent Overstreet {
4265d45231SKent Overstreet 	int64_t c = bkey_cmp(l.k, r.k);
4365d45231SKent Overstreet 
4465d45231SKent Overstreet 	return c ? c > 0 : l.k < r.k;
4565d45231SKent Overstreet }
4665d45231SKent Overstreet 
__ptr_invalid(struct cache_set * c,const struct bkey * k)4765d45231SKent Overstreet static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
4865d45231SKent Overstreet {
496f10f7d1SColy Li 	unsigned int i;
5065d45231SKent Overstreet 
5165d45231SKent Overstreet 	for (i = 0; i < KEY_PTRS(k); i++)
5265d45231SKent Overstreet 		if (ptr_available(c, k, i)) {
53*11e9560eSChristoph Hellwig 			struct cache *ca = c->cache;
5465d45231SKent Overstreet 			size_t bucket = PTR_BUCKET_NR(c, k, i);
5565d45231SKent Overstreet 			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
5665d45231SKent Overstreet 
574a784266SColy Li 			if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
5865d45231SKent Overstreet 			    bucket <  ca->sb.first_bucket ||
5965d45231SKent Overstreet 			    bucket >= ca->sb.nbuckets)
6065d45231SKent Overstreet 				return true;
6165d45231SKent Overstreet 		}
6265d45231SKent Overstreet 
6365d45231SKent Overstreet 	return false;
6465d45231SKent Overstreet }
6565d45231SKent Overstreet 
66dc9d98d6SKent Overstreet /* Common among btree and extent ptrs */
67dc9d98d6SKent Overstreet 
bch_ptr_status(struct cache_set * c,const struct bkey * k)68dc9d98d6SKent Overstreet static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
69dc9d98d6SKent Overstreet {
706f10f7d1SColy Li 	unsigned int i;
71dc9d98d6SKent Overstreet 
72dc9d98d6SKent Overstreet 	for (i = 0; i < KEY_PTRS(k); i++)
73dc9d98d6SKent Overstreet 		if (ptr_available(c, k, i)) {
74*11e9560eSChristoph Hellwig 			struct cache *ca = c->cache;
75dc9d98d6SKent Overstreet 			size_t bucket = PTR_BUCKET_NR(c, k, i);
76dc9d98d6SKent Overstreet 			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
77dc9d98d6SKent Overstreet 
784a784266SColy Li 			if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
79dc9d98d6SKent Overstreet 				return "bad, length too big";
80dc9d98d6SKent Overstreet 			if (bucket <  ca->sb.first_bucket)
81dc9d98d6SKent Overstreet 				return "bad, short offset";
82dc9d98d6SKent Overstreet 			if (bucket >= ca->sb.nbuckets)
83dc9d98d6SKent Overstreet 				return "bad, offset past end of device";
84dc9d98d6SKent Overstreet 			if (ptr_stale(c, k, i))
85dc9d98d6SKent Overstreet 				return "stale";
86dc9d98d6SKent Overstreet 		}
87dc9d98d6SKent Overstreet 
88dc9d98d6SKent Overstreet 	if (!bkey_cmp(k, &ZERO_KEY))
89dc9d98d6SKent Overstreet 		return "bad, null key";
90dc9d98d6SKent Overstreet 	if (!KEY_PTRS(k))
91dc9d98d6SKent Overstreet 		return "bad, no pointers";
92dc9d98d6SKent Overstreet 	if (!KEY_SIZE(k))
93dc9d98d6SKent Overstreet 		return "zeroed key";
94dc9d98d6SKent Overstreet 	return "";
95dc9d98d6SKent Overstreet }
96dc9d98d6SKent Overstreet 
bch_extent_to_text(char * buf,size_t size,const struct bkey * k)97dc9d98d6SKent Overstreet void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
98dc9d98d6SKent Overstreet {
996f10f7d1SColy Li 	unsigned int i = 0;
100dc9d98d6SKent Overstreet 	char *out = buf, *end = buf + size;
101dc9d98d6SKent Overstreet 
102dc9d98d6SKent Overstreet #define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
103dc9d98d6SKent Overstreet 
104dc9d98d6SKent Overstreet 	p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
105dc9d98d6SKent Overstreet 
106dc9d98d6SKent Overstreet 	for (i = 0; i < KEY_PTRS(k); i++) {
107dc9d98d6SKent Overstreet 		if (i)
108dc9d98d6SKent Overstreet 			p(", ");
109dc9d98d6SKent Overstreet 
110dc9d98d6SKent Overstreet 		if (PTR_DEV(k, i) == PTR_CHECK_DEV)
111dc9d98d6SKent Overstreet 			p("check dev");
112dc9d98d6SKent Overstreet 		else
113dc9d98d6SKent Overstreet 			p("%llu:%llu gen %llu", PTR_DEV(k, i),
114dc9d98d6SKent Overstreet 			  PTR_OFFSET(k, i), PTR_GEN(k, i));
115dc9d98d6SKent Overstreet 	}
116dc9d98d6SKent Overstreet 
117dc9d98d6SKent Overstreet 	p("]");
118dc9d98d6SKent Overstreet 
119dc9d98d6SKent Overstreet 	if (KEY_DIRTY(k))
120dc9d98d6SKent Overstreet 		p(" dirty");
121dc9d98d6SKent Overstreet 	if (KEY_CSUM(k))
122dc9d98d6SKent Overstreet 		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
123dc9d98d6SKent Overstreet #undef p
124dc9d98d6SKent Overstreet }
125dc9d98d6SKent Overstreet 
bch_bkey_dump(struct btree_keys * keys,const struct bkey * k)126dc9d98d6SKent Overstreet static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
127dc9d98d6SKent Overstreet {
128dc9d98d6SKent Overstreet 	struct btree *b = container_of(keys, struct btree, keys);
1296f10f7d1SColy Li 	unsigned int j;
130dc9d98d6SKent Overstreet 	char buf[80];
131dc9d98d6SKent Overstreet 
132dc9d98d6SKent Overstreet 	bch_extent_to_text(buf, sizeof(buf), k);
13346f5aa88SJoe Perches 	pr_cont(" %s", buf);
134dc9d98d6SKent Overstreet 
135dc9d98d6SKent Overstreet 	for (j = 0; j < KEY_PTRS(k); j++) {
136dc9d98d6SKent Overstreet 		size_t n = PTR_BUCKET_NR(b->c, k, j);
137dc9d98d6SKent Overstreet 
13846f5aa88SJoe Perches 		pr_cont(" bucket %zu", n);
1394a784266SColy Li 		if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets)
14046f5aa88SJoe Perches 			pr_cont(" prio %i",
141dc9d98d6SKent Overstreet 				PTR_BUCKET(b->c, k, j)->prio);
142dc9d98d6SKent Overstreet 	}
143dc9d98d6SKent Overstreet 
14446f5aa88SJoe Perches 	pr_cont(" %s\n", bch_ptr_status(b->c, k));
145dc9d98d6SKent Overstreet }
146dc9d98d6SKent Overstreet 
14765d45231SKent Overstreet /* Btree ptrs */
14865d45231SKent Overstreet 
__bch_btree_ptr_invalid(struct cache_set * c,const struct bkey * k)14965d45231SKent Overstreet bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
15065d45231SKent Overstreet {
15165d45231SKent Overstreet 	char buf[80];
15265d45231SKent Overstreet 
15365d45231SKent Overstreet 	if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
15465d45231SKent Overstreet 		goto bad;
15565d45231SKent Overstreet 
15665d45231SKent Overstreet 	if (__ptr_invalid(c, k))
15765d45231SKent Overstreet 		goto bad;
15865d45231SKent Overstreet 
15965d45231SKent Overstreet 	return false;
16065d45231SKent Overstreet bad:
161dc9d98d6SKent Overstreet 	bch_extent_to_text(buf, sizeof(buf), k);
16265d45231SKent Overstreet 	cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
16365d45231SKent Overstreet 	return true;
16465d45231SKent Overstreet }
16565d45231SKent Overstreet 
bch_btree_ptr_invalid(struct btree_keys * bk,const struct bkey * k)166a85e968eSKent Overstreet static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
16765d45231SKent Overstreet {
168a85e968eSKent Overstreet 	struct btree *b = container_of(bk, struct btree, keys);
1691fae7cf0SColy Li 
17065d45231SKent Overstreet 	return __bch_btree_ptr_invalid(b->c, k);
17165d45231SKent Overstreet }
17265d45231SKent Overstreet 
btree_ptr_bad_expensive(struct btree * b,const struct bkey * k)17365d45231SKent Overstreet static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
17465d45231SKent Overstreet {
1756f10f7d1SColy Li 	unsigned int i;
17665d45231SKent Overstreet 	char buf[80];
17765d45231SKent Overstreet 	struct bucket *g;
17865d45231SKent Overstreet 
17965d45231SKent Overstreet 	if (mutex_trylock(&b->c->bucket_lock)) {
18065d45231SKent Overstreet 		for (i = 0; i < KEY_PTRS(k); i++)
18165d45231SKent Overstreet 			if (ptr_available(b->c, k, i)) {
18265d45231SKent Overstreet 				g = PTR_BUCKET(b->c, k, i);
18365d45231SKent Overstreet 
18465d45231SKent Overstreet 				if (KEY_DIRTY(k) ||
18565d45231SKent Overstreet 				    g->prio != BTREE_PRIO ||
18665d45231SKent Overstreet 				    (b->c->gc_mark_valid &&
18765d45231SKent Overstreet 				     GC_MARK(g) != GC_MARK_METADATA))
18865d45231SKent Overstreet 					goto err;
18965d45231SKent Overstreet 			}
19065d45231SKent Overstreet 
19165d45231SKent Overstreet 		mutex_unlock(&b->c->bucket_lock);
19265d45231SKent Overstreet 	}
19365d45231SKent Overstreet 
19465d45231SKent Overstreet 	return false;
19565d45231SKent Overstreet err:
19665d45231SKent Overstreet 	mutex_unlock(&b->c->bucket_lock);
197dc9d98d6SKent Overstreet 	bch_extent_to_text(buf, sizeof(buf), k);
19865d45231SKent Overstreet 	btree_bug(b,
1993a2fd9d5SKent Overstreet "inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
20065d45231SKent Overstreet 		  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
2013a2fd9d5SKent Overstreet 		  g->prio, g->gen, g->last_gc, GC_MARK(g));
20265d45231SKent Overstreet 	return true;
20365d45231SKent Overstreet }
20465d45231SKent Overstreet 
bch_btree_ptr_bad(struct btree_keys * bk,const struct bkey * k)205a85e968eSKent Overstreet static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
20665d45231SKent Overstreet {
207a85e968eSKent Overstreet 	struct btree *b = container_of(bk, struct btree, keys);
2086f10f7d1SColy Li 	unsigned int i;
20965d45231SKent Overstreet 
21065d45231SKent Overstreet 	if (!bkey_cmp(k, &ZERO_KEY) ||
21165d45231SKent Overstreet 	    !KEY_PTRS(k) ||
212a85e968eSKent Overstreet 	    bch_ptr_invalid(bk, k))
21365d45231SKent Overstreet 		return true;
21465d45231SKent Overstreet 
21565d45231SKent Overstreet 	for (i = 0; i < KEY_PTRS(k); i++)
21665d45231SKent Overstreet 		if (!ptr_available(b->c, k, i) ||
21765d45231SKent Overstreet 		    ptr_stale(b->c, k, i))
21865d45231SKent Overstreet 			return true;
21965d45231SKent Overstreet 
22065d45231SKent Overstreet 	if (expensive_debug_checks(b->c) &&
22165d45231SKent Overstreet 	    btree_ptr_bad_expensive(b, k))
22265d45231SKent Overstreet 		return true;
22365d45231SKent Overstreet 
22465d45231SKent Overstreet 	return false;
22565d45231SKent Overstreet }
22665d45231SKent Overstreet 
bch_btree_ptr_insert_fixup(struct btree_keys * bk,struct bkey * insert,struct btree_iter * iter,struct bkey * replace_key)227829a60b9SKent Overstreet static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
228829a60b9SKent Overstreet 				       struct bkey *insert,
229829a60b9SKent Overstreet 				       struct btree_iter *iter,
230829a60b9SKent Overstreet 				       struct bkey *replace_key)
231829a60b9SKent Overstreet {
232829a60b9SKent Overstreet 	struct btree *b = container_of(bk, struct btree, keys);
233829a60b9SKent Overstreet 
234829a60b9SKent Overstreet 	if (!KEY_OFFSET(insert))
235829a60b9SKent Overstreet 		btree_current_write(b)->prio_blocked++;
236829a60b9SKent Overstreet 
237829a60b9SKent Overstreet 	return false;
238829a60b9SKent Overstreet }
239829a60b9SKent Overstreet 
24065d45231SKent Overstreet const struct btree_keys_ops bch_btree_keys_ops = {
24165d45231SKent Overstreet 	.sort_cmp	= bch_key_sort_cmp,
242829a60b9SKent Overstreet 	.insert_fixup	= bch_btree_ptr_insert_fixup,
24365d45231SKent Overstreet 	.key_invalid	= bch_btree_ptr_invalid,
24465d45231SKent Overstreet 	.key_bad	= bch_btree_ptr_bad,
245dc9d98d6SKent Overstreet 	.key_to_text	= bch_extent_to_text,
246dc9d98d6SKent Overstreet 	.key_dump	= bch_bkey_dump,
24765d45231SKent Overstreet };
24865d45231SKent Overstreet 
24965d45231SKent Overstreet /* Extents */
25065d45231SKent Overstreet 
25165d45231SKent Overstreet /*
25265d45231SKent Overstreet  * Returns true if l > r - unless l == r, in which case returns true if l is
25365d45231SKent Overstreet  * older than r.
25465d45231SKent Overstreet  *
25565d45231SKent Overstreet  * Necessary for btree_sort_fixup() - if there are multiple keys that compare
25665d45231SKent Overstreet  * equal in different sets, we have to process them newest to oldest.
25765d45231SKent Overstreet  */
bch_extent_sort_cmp(struct btree_iter_set l,struct btree_iter_set r)25865d45231SKent Overstreet static bool bch_extent_sort_cmp(struct btree_iter_set l,
25965d45231SKent Overstreet 				struct btree_iter_set r)
26065d45231SKent Overstreet {
26165d45231SKent Overstreet 	int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
26265d45231SKent Overstreet 
26365d45231SKent Overstreet 	return c ? c > 0 : l.k < r.k;
26465d45231SKent Overstreet }
26565d45231SKent Overstreet 
bch_extent_sort_fixup(struct btree_iter * iter,struct bkey * tmp)26665d45231SKent Overstreet static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
26765d45231SKent Overstreet 					  struct bkey *tmp)
26865d45231SKent Overstreet {
26965d45231SKent Overstreet 	while (iter->used > 1) {
27065d45231SKent Overstreet 		struct btree_iter_set *top = iter->data, *i = top + 1;
27165d45231SKent Overstreet 
27265d45231SKent Overstreet 		if (iter->used > 2 &&
27365d45231SKent Overstreet 		    bch_extent_sort_cmp(i[0], i[1]))
27465d45231SKent Overstreet 			i++;
27565d45231SKent Overstreet 
27665d45231SKent Overstreet 		if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
27765d45231SKent Overstreet 			break;
27865d45231SKent Overstreet 
27965d45231SKent Overstreet 		if (!KEY_SIZE(i->k)) {
28065d45231SKent Overstreet 			sort_key_next(iter, i);
28165d45231SKent Overstreet 			heap_sift(iter, i - top, bch_extent_sort_cmp);
28265d45231SKent Overstreet 			continue;
28365d45231SKent Overstreet 		}
28465d45231SKent Overstreet 
28565d45231SKent Overstreet 		if (top->k > i->k) {
28665d45231SKent Overstreet 			if (bkey_cmp(top->k, i->k) >= 0)
28765d45231SKent Overstreet 				sort_key_next(iter, i);
28865d45231SKent Overstreet 			else
28965d45231SKent Overstreet 				bch_cut_front(top->k, i->k);
29065d45231SKent Overstreet 
29165d45231SKent Overstreet 			heap_sift(iter, i - top, bch_extent_sort_cmp);
29265d45231SKent Overstreet 		} else {
29365d45231SKent Overstreet 			/* can't happen because of comparison func */
29465d45231SKent Overstreet 			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
29565d45231SKent Overstreet 
29665d45231SKent Overstreet 			if (bkey_cmp(i->k, top->k) < 0) {
29765d45231SKent Overstreet 				bkey_copy(tmp, top->k);
29865d45231SKent Overstreet 
29965d45231SKent Overstreet 				bch_cut_back(&START_KEY(i->k), tmp);
30065d45231SKent Overstreet 				bch_cut_front(i->k, top->k);
30165d45231SKent Overstreet 				heap_sift(iter, 0, bch_extent_sort_cmp);
30265d45231SKent Overstreet 
30365d45231SKent Overstreet 				return tmp;
30465d45231SKent Overstreet 			} else {
30565d45231SKent Overstreet 				bch_cut_back(&START_KEY(i->k), top->k);
30665d45231SKent Overstreet 			}
30765d45231SKent Overstreet 		}
30865d45231SKent Overstreet 	}
30965d45231SKent Overstreet 
31065d45231SKent Overstreet 	return NULL;
31165d45231SKent Overstreet }
31265d45231SKent Overstreet 
bch_subtract_dirty(struct bkey * k,struct cache_set * c,uint64_t offset,int sectors)313cb851149SJohn Sheu static void bch_subtract_dirty(struct bkey *k,
314cb851149SJohn Sheu 			   struct cache_set *c,
315cb851149SJohn Sheu 			   uint64_t offset,
316cb851149SJohn Sheu 			   int sectors)
317cb851149SJohn Sheu {
318cb851149SJohn Sheu 	if (KEY_DIRTY(k))
319cb851149SJohn Sheu 		bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
320cb851149SJohn Sheu 					     offset, -sectors);
321cb851149SJohn Sheu }
322cb851149SJohn Sheu 
bch_extent_insert_fixup(struct btree_keys * b,struct bkey * insert,struct btree_iter * iter,struct bkey * replace_key)323829a60b9SKent Overstreet static bool bch_extent_insert_fixup(struct btree_keys *b,
324829a60b9SKent Overstreet 				    struct bkey *insert,
325829a60b9SKent Overstreet 				    struct btree_iter *iter,
326829a60b9SKent Overstreet 				    struct bkey *replace_key)
327829a60b9SKent Overstreet {
328829a60b9SKent Overstreet 	struct cache_set *c = container_of(b, struct btree, keys)->c;
329829a60b9SKent Overstreet 
330829a60b9SKent Overstreet 	uint64_t old_offset;
3316f10f7d1SColy Li 	unsigned int old_size, sectors_found = 0;
332829a60b9SKent Overstreet 
333829a60b9SKent Overstreet 	BUG_ON(!KEY_OFFSET(insert));
334829a60b9SKent Overstreet 	BUG_ON(!KEY_SIZE(insert));
335829a60b9SKent Overstreet 
336829a60b9SKent Overstreet 	while (1) {
337829a60b9SKent Overstreet 		struct bkey *k = bch_btree_iter_next(iter);
3381fae7cf0SColy Li 
339829a60b9SKent Overstreet 		if (!k)
340829a60b9SKent Overstreet 			break;
341829a60b9SKent Overstreet 
342829a60b9SKent Overstreet 		if (bkey_cmp(&START_KEY(k), insert) >= 0) {
343829a60b9SKent Overstreet 			if (KEY_SIZE(k))
344829a60b9SKent Overstreet 				break;
345829a60b9SKent Overstreet 			else
346829a60b9SKent Overstreet 				continue;
347829a60b9SKent Overstreet 		}
348829a60b9SKent Overstreet 
349829a60b9SKent Overstreet 		if (bkey_cmp(k, &START_KEY(insert)) <= 0)
350829a60b9SKent Overstreet 			continue;
351829a60b9SKent Overstreet 
352829a60b9SKent Overstreet 		old_offset = KEY_START(k);
353829a60b9SKent Overstreet 		old_size = KEY_SIZE(k);
354829a60b9SKent Overstreet 
355829a60b9SKent Overstreet 		/*
356829a60b9SKent Overstreet 		 * We might overlap with 0 size extents; we can't skip these
357829a60b9SKent Overstreet 		 * because if they're in the set we're inserting to we have to
358829a60b9SKent Overstreet 		 * adjust them so they don't overlap with the key we're
359829a60b9SKent Overstreet 		 * inserting. But we don't want to check them for replace
360829a60b9SKent Overstreet 		 * operations.
361829a60b9SKent Overstreet 		 */
362829a60b9SKent Overstreet 
363829a60b9SKent Overstreet 		if (replace_key && KEY_SIZE(k)) {
364829a60b9SKent Overstreet 			/*
365829a60b9SKent Overstreet 			 * k might have been split since we inserted/found the
366829a60b9SKent Overstreet 			 * key we're replacing
367829a60b9SKent Overstreet 			 */
3686f10f7d1SColy Li 			unsigned int i;
369829a60b9SKent Overstreet 			uint64_t offset = KEY_START(k) -
370829a60b9SKent Overstreet 				KEY_START(replace_key);
371829a60b9SKent Overstreet 
372829a60b9SKent Overstreet 			/* But it must be a subset of the replace key */
373829a60b9SKent Overstreet 			if (KEY_START(k) < KEY_START(replace_key) ||
374829a60b9SKent Overstreet 			    KEY_OFFSET(k) > KEY_OFFSET(replace_key))
375829a60b9SKent Overstreet 				goto check_failed;
376829a60b9SKent Overstreet 
377829a60b9SKent Overstreet 			/* We didn't find a key that we were supposed to */
378829a60b9SKent Overstreet 			if (KEY_START(k) > KEY_START(insert) + sectors_found)
379829a60b9SKent Overstreet 				goto check_failed;
380829a60b9SKent Overstreet 
3813bdad1e4SNicholas Swenson 			if (!bch_bkey_equal_header(k, replace_key))
382829a60b9SKent Overstreet 				goto check_failed;
383829a60b9SKent Overstreet 
384829a60b9SKent Overstreet 			/* skip past gen */
385829a60b9SKent Overstreet 			offset <<= 8;
386829a60b9SKent Overstreet 
387829a60b9SKent Overstreet 			BUG_ON(!KEY_PTRS(replace_key));
388829a60b9SKent Overstreet 
389829a60b9SKent Overstreet 			for (i = 0; i < KEY_PTRS(replace_key); i++)
390829a60b9SKent Overstreet 				if (k->ptr[i] != replace_key->ptr[i] + offset)
391829a60b9SKent Overstreet 					goto check_failed;
392829a60b9SKent Overstreet 
393829a60b9SKent Overstreet 			sectors_found = KEY_OFFSET(k) - KEY_START(insert);
394829a60b9SKent Overstreet 		}
395829a60b9SKent Overstreet 
396829a60b9SKent Overstreet 		if (bkey_cmp(insert, k) < 0 &&
397829a60b9SKent Overstreet 		    bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
398829a60b9SKent Overstreet 			/*
399829a60b9SKent Overstreet 			 * We overlapped in the middle of an existing key: that
400829a60b9SKent Overstreet 			 * means we have to split the old key. But we have to do
401829a60b9SKent Overstreet 			 * slightly different things depending on whether the
402829a60b9SKent Overstreet 			 * old key has been written out yet.
403829a60b9SKent Overstreet 			 */
404829a60b9SKent Overstreet 
405829a60b9SKent Overstreet 			struct bkey *top;
406829a60b9SKent Overstreet 
407cb851149SJohn Sheu 			bch_subtract_dirty(k, c, KEY_START(insert),
408cb851149SJohn Sheu 				       KEY_SIZE(insert));
409829a60b9SKent Overstreet 
410829a60b9SKent Overstreet 			if (bkey_written(b, k)) {
411829a60b9SKent Overstreet 				/*
412829a60b9SKent Overstreet 				 * We insert a new key to cover the top of the
413829a60b9SKent Overstreet 				 * old key, and the old key is modified in place
414829a60b9SKent Overstreet 				 * to represent the bottom split.
415829a60b9SKent Overstreet 				 *
416829a60b9SKent Overstreet 				 * It's completely arbitrary whether the new key
417829a60b9SKent Overstreet 				 * is the top or the bottom, but it has to match
418829a60b9SKent Overstreet 				 * up with what btree_sort_fixup() does - it
419829a60b9SKent Overstreet 				 * doesn't check for this kind of overlap, it
420829a60b9SKent Overstreet 				 * depends on us inserting a new key for the top
421829a60b9SKent Overstreet 				 * here.
422829a60b9SKent Overstreet 				 */
423829a60b9SKent Overstreet 				top = bch_bset_search(b, bset_tree_last(b),
424829a60b9SKent Overstreet 						      insert);
425829a60b9SKent Overstreet 				bch_bset_insert(b, top, k);
426829a60b9SKent Overstreet 			} else {
427829a60b9SKent Overstreet 				BKEY_PADDED(key) temp;
428829a60b9SKent Overstreet 				bkey_copy(&temp.key, k);
429829a60b9SKent Overstreet 				bch_bset_insert(b, k, &temp.key);
430829a60b9SKent Overstreet 				top = bkey_next(k);
431829a60b9SKent Overstreet 			}
432829a60b9SKent Overstreet 
433829a60b9SKent Overstreet 			bch_cut_front(insert, top);
434829a60b9SKent Overstreet 			bch_cut_back(&START_KEY(insert), k);
435829a60b9SKent Overstreet 			bch_bset_fix_invalidated_key(b, k);
436829a60b9SKent Overstreet 			goto out;
437829a60b9SKent Overstreet 		}
438829a60b9SKent Overstreet 
439829a60b9SKent Overstreet 		if (bkey_cmp(insert, k) < 0) {
440829a60b9SKent Overstreet 			bch_cut_front(insert, k);
441829a60b9SKent Overstreet 		} else {
442829a60b9SKent Overstreet 			if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
443829a60b9SKent Overstreet 				old_offset = KEY_START(insert);
444829a60b9SKent Overstreet 
445829a60b9SKent Overstreet 			if (bkey_written(b, k) &&
446829a60b9SKent Overstreet 			    bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
447829a60b9SKent Overstreet 				/*
448829a60b9SKent Overstreet 				 * Completely overwrote, so we don't have to
449829a60b9SKent Overstreet 				 * invalidate the binary search tree
450829a60b9SKent Overstreet 				 */
451829a60b9SKent Overstreet 				bch_cut_front(k, k);
452829a60b9SKent Overstreet 			} else {
453829a60b9SKent Overstreet 				__bch_cut_back(&START_KEY(insert), k);
454829a60b9SKent Overstreet 				bch_bset_fix_invalidated_key(b, k);
455829a60b9SKent Overstreet 			}
456829a60b9SKent Overstreet 		}
457829a60b9SKent Overstreet 
458cb851149SJohn Sheu 		bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
459829a60b9SKent Overstreet 	}
460829a60b9SKent Overstreet 
461829a60b9SKent Overstreet check_failed:
462829a60b9SKent Overstreet 	if (replace_key) {
463829a60b9SKent Overstreet 		if (!sectors_found) {
464829a60b9SKent Overstreet 			return true;
465829a60b9SKent Overstreet 		} else if (sectors_found < KEY_SIZE(insert)) {
466829a60b9SKent Overstreet 			SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
467829a60b9SKent Overstreet 				       (KEY_SIZE(insert) - sectors_found));
468829a60b9SKent Overstreet 			SET_KEY_SIZE(insert, sectors_found);
469829a60b9SKent Overstreet 		}
470829a60b9SKent Overstreet 	}
471829a60b9SKent Overstreet out:
472829a60b9SKent Overstreet 	if (KEY_DIRTY(insert))
473829a60b9SKent Overstreet 		bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
474829a60b9SKent Overstreet 					     KEY_START(insert),
475829a60b9SKent Overstreet 					     KEY_SIZE(insert));
476829a60b9SKent Overstreet 
477829a60b9SKent Overstreet 	return false;
478829a60b9SKent Overstreet }
479829a60b9SKent Overstreet 
__bch_extent_invalid(struct cache_set * c,const struct bkey * k)4809aa61a99SKent Overstreet bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
48165d45231SKent Overstreet {
48265d45231SKent Overstreet 	char buf[80];
48365d45231SKent Overstreet 
48465d45231SKent Overstreet 	if (!KEY_SIZE(k))
48565d45231SKent Overstreet 		return true;
48665d45231SKent Overstreet 
48765d45231SKent Overstreet 	if (KEY_SIZE(k) > KEY_OFFSET(k))
48865d45231SKent Overstreet 		goto bad;
48965d45231SKent Overstreet 
4909aa61a99SKent Overstreet 	if (__ptr_invalid(c, k))
49165d45231SKent Overstreet 		goto bad;
49265d45231SKent Overstreet 
49365d45231SKent Overstreet 	return false;
49465d45231SKent Overstreet bad:
495dc9d98d6SKent Overstreet 	bch_extent_to_text(buf, sizeof(buf), k);
4969aa61a99SKent Overstreet 	cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
49765d45231SKent Overstreet 	return true;
49865d45231SKent Overstreet }
49965d45231SKent Overstreet 
bch_extent_invalid(struct btree_keys * bk,const struct bkey * k)5009aa61a99SKent Overstreet static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
5019aa61a99SKent Overstreet {
5029aa61a99SKent Overstreet 	struct btree *b = container_of(bk, struct btree, keys);
5031fae7cf0SColy Li 
5049aa61a99SKent Overstreet 	return __bch_extent_invalid(b->c, k);
5059aa61a99SKent Overstreet }
5069aa61a99SKent Overstreet 
bch_extent_bad_expensive(struct btree * b,const struct bkey * k,unsigned int ptr)50765d45231SKent Overstreet static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
5086f10f7d1SColy Li 				     unsigned int ptr)
50965d45231SKent Overstreet {
51065d45231SKent Overstreet 	struct bucket *g = PTR_BUCKET(b->c, k, ptr);
51165d45231SKent Overstreet 	char buf[80];
51265d45231SKent Overstreet 
51365d45231SKent Overstreet 	if (mutex_trylock(&b->c->bucket_lock)) {
51465d45231SKent Overstreet 		if (b->c->gc_mark_valid &&
5154fe6a816SKent Overstreet 		    (!GC_MARK(g) ||
5164fe6a816SKent Overstreet 		     GC_MARK(g) == GC_MARK_METADATA ||
5174fe6a816SKent Overstreet 		     (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
51865d45231SKent Overstreet 			goto err;
51965d45231SKent Overstreet 
52065d45231SKent Overstreet 		if (g->prio == BTREE_PRIO)
52165d45231SKent Overstreet 			goto err;
52265d45231SKent Overstreet 
52365d45231SKent Overstreet 		mutex_unlock(&b->c->bucket_lock);
52465d45231SKent Overstreet 	}
52565d45231SKent Overstreet 
52665d45231SKent Overstreet 	return false;
52765d45231SKent Overstreet err:
52865d45231SKent Overstreet 	mutex_unlock(&b->c->bucket_lock);
529dc9d98d6SKent Overstreet 	bch_extent_to_text(buf, sizeof(buf), k);
53065d45231SKent Overstreet 	btree_bug(b,
5313a2fd9d5SKent Overstreet "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
53265d45231SKent Overstreet 		  buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
5333a2fd9d5SKent Overstreet 		  g->prio, g->gen, g->last_gc, GC_MARK(g));
53465d45231SKent Overstreet 	return true;
53565d45231SKent Overstreet }
53665d45231SKent Overstreet 
bch_extent_bad(struct btree_keys * bk,const struct bkey * k)537a85e968eSKent Overstreet static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
53865d45231SKent Overstreet {
539a85e968eSKent Overstreet 	struct btree *b = container_of(bk, struct btree, keys);
5406f10f7d1SColy Li 	unsigned int i, stale;
54158ac3230STang Junhui 	char buf[80];
54265d45231SKent Overstreet 
54365d45231SKent Overstreet 	if (!KEY_PTRS(k) ||
544a85e968eSKent Overstreet 	    bch_extent_invalid(bk, k))
54565d45231SKent Overstreet 		return true;
54665d45231SKent Overstreet 
54765d45231SKent Overstreet 	for (i = 0; i < KEY_PTRS(k); i++)
54865d45231SKent Overstreet 		if (!ptr_available(b->c, k, i))
54965d45231SKent Overstreet 			return true;
55065d45231SKent Overstreet 
55165d45231SKent Overstreet 	for (i = 0; i < KEY_PTRS(k); i++) {
55265d45231SKent Overstreet 		stale = ptr_stale(b->c, k, i);
55365d45231SKent Overstreet 
55458ac3230STang Junhui 		if (stale && KEY_DIRTY(k)) {
55558ac3230STang Junhui 			bch_extent_to_text(buf, sizeof(buf), k);
55646f5aa88SJoe Perches 			pr_info("stale dirty pointer, stale %u, key: %s\n",
55758ac3230STang Junhui 				stale, buf);
55858ac3230STang Junhui 		}
55958ac3230STang Junhui 
560149d0efaSColy Li 		btree_bug_on(stale > BUCKET_GC_GEN_MAX, b,
56165d45231SKent Overstreet 			     "key too stale: %i, need_gc %u",
56265d45231SKent Overstreet 			     stale, b->c->need_gc);
56365d45231SKent Overstreet 
56465d45231SKent Overstreet 		if (stale)
56565d45231SKent Overstreet 			return true;
56665d45231SKent Overstreet 
56765d45231SKent Overstreet 		if (expensive_debug_checks(b->c) &&
56865d45231SKent Overstreet 		    bch_extent_bad_expensive(b, k, i))
56965d45231SKent Overstreet 			return true;
57065d45231SKent Overstreet 	}
57165d45231SKent Overstreet 
57265d45231SKent Overstreet 	return false;
57365d45231SKent Overstreet }
57465d45231SKent Overstreet 
merge_chksums(struct bkey * l,struct bkey * r)57565d45231SKent Overstreet static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
57665d45231SKent Overstreet {
57765d45231SKent Overstreet 	return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
57865d45231SKent Overstreet 		~((uint64_t)1 << 63);
57965d45231SKent Overstreet }
58065d45231SKent Overstreet 
bch_extent_merge(struct btree_keys * bk,struct bkey * l,struct bkey * r)581b0d30981SColy Li static bool bch_extent_merge(struct btree_keys *bk,
582b0d30981SColy Li 			     struct bkey *l,
583b0d30981SColy Li 			     struct bkey *r)
58465d45231SKent Overstreet {
585a85e968eSKent Overstreet 	struct btree *b = container_of(bk, struct btree, keys);
5866f10f7d1SColy Li 	unsigned int i;
58765d45231SKent Overstreet 
58865d45231SKent Overstreet 	if (key_merging_disabled(b->c))
58965d45231SKent Overstreet 		return false;
59065d45231SKent Overstreet 
59165d45231SKent Overstreet 	for (i = 0; i < KEY_PTRS(l); i++)
592cf33c1eeSHuacai Chen 		if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
59365d45231SKent Overstreet 		    PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
59465d45231SKent Overstreet 			return false;
59565d45231SKent Overstreet 
59665d45231SKent Overstreet 	/* Keys with no pointers aren't restricted to one bucket and could
59765d45231SKent Overstreet 	 * overflow KEY_SIZE
59865d45231SKent Overstreet 	 */
59965d45231SKent Overstreet 	if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
60065d45231SKent Overstreet 		SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
60165d45231SKent Overstreet 		SET_KEY_SIZE(l, USHRT_MAX);
60265d45231SKent Overstreet 
60365d45231SKent Overstreet 		bch_cut_front(l, r);
60465d45231SKent Overstreet 		return false;
60565d45231SKent Overstreet 	}
60665d45231SKent Overstreet 
60765d45231SKent Overstreet 	if (KEY_CSUM(l)) {
60865d45231SKent Overstreet 		if (KEY_CSUM(r))
60965d45231SKent Overstreet 			l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
61065d45231SKent Overstreet 		else
61165d45231SKent Overstreet 			SET_KEY_CSUM(l, 0);
61265d45231SKent Overstreet 	}
61365d45231SKent Overstreet 
61465d45231SKent Overstreet 	SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
61565d45231SKent Overstreet 	SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
61665d45231SKent Overstreet 
61765d45231SKent Overstreet 	return true;
61865d45231SKent Overstreet }
61965d45231SKent Overstreet 
62065d45231SKent Overstreet const struct btree_keys_ops bch_extent_keys_ops = {
62165d45231SKent Overstreet 	.sort_cmp	= bch_extent_sort_cmp,
62265d45231SKent Overstreet 	.sort_fixup	= bch_extent_sort_fixup,
623829a60b9SKent Overstreet 	.insert_fixup	= bch_extent_insert_fixup,
62465d45231SKent Overstreet 	.key_invalid	= bch_extent_invalid,
62565d45231SKent Overstreet 	.key_bad	= bch_extent_bad,
62665d45231SKent Overstreet 	.key_merge	= bch_extent_merge,
627dc9d98d6SKent Overstreet 	.key_to_text	= bch_extent_to_text,
628dc9d98d6SKent Overstreet 	.key_dump	= bch_bkey_dump,
62965d45231SKent Overstreet 	.is_extents	= true,
63065d45231SKent Overstreet };
631