btree.c (400ffaa2acd72274e2c7293a9724382383bebf3e) btree.c (2452cc89063a2a6890368f185c4b6d7d8802179e)
1/*
2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
3 *
4 * Uses a block device as cache for other block devices; optimized for SSDs.
5 * All allocation is done in buckets, which should match the erase block size
6 * of the device.
7 *
8 * Buckets containing cached data are kept on a heap sorted by priority;

--- 103 unchanged lines hidden (view full) ---

112 * @key: key to recurse on
113 * @b: parent btree node
114 * @op: pointer to struct btree_op
115 */
116#define btree(fn, key, b, op, ...) \
117({ \
118 int _r, l = (b)->level - 1; \
119 bool _w = l <= (op)->lock; \
1/*
2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
3 *
4 * Uses a block device as cache for other block devices; optimized for SSDs.
5 * All allocation is done in buckets, which should match the erase block size
6 * of the device.
7 *
8 * Buckets containing cached data are kept on a heap sorted by priority;

--- 103 unchanged lines hidden (view full) ---

112 * @key: key to recurse on
113 * @b: parent btree node
114 * @op: pointer to struct btree_op
115 */
116#define btree(fn, key, b, op, ...) \
117({ \
118 int _r, l = (b)->level - 1; \
119 bool _w = l <= (op)->lock; \
120 struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\
120 struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
121 _w, b); \
121 if (!IS_ERR(_child)) { \
122 if (!IS_ERR(_child)) { \
122 _child->parent = (b); \
123 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
124 rw_unlock(_w, _child); \
125 } else \
126 _r = PTR_ERR(_child); \
127 _r; \
128})
129
130/**

--- 6 unchanged lines hidden (view full) ---

137({ \
138 int _r = -EINTR; \
139 do { \
140 struct btree *_b = (c)->root; \
141 bool _w = insert_lock(op, _b); \
142 rw_lock(_w, _b, _b->level); \
143 if (_b == (c)->root && \
144 _w == insert_lock(op, _b)) { \
123 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
124 rw_unlock(_w, _child); \
125 } else \
126 _r = PTR_ERR(_child); \
127 _r; \
128})
129
130/**

--- 6 unchanged lines hidden (view full) ---

137({ \
138 int _r = -EINTR; \
139 do { \
140 struct btree *_b = (c)->root; \
141 bool _w = insert_lock(op, _b); \
142 rw_lock(_w, _b, _b->level); \
143 if (_b == (c)->root && \
144 _w == insert_lock(op, _b)) { \
145 _b->parent = NULL; \
146 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
147 } \
148 rw_unlock(_w, _b); \
149 bch_cannibalize_unlock(c); \
150 if (_r == -EINTR) \
151 schedule(); \
152 } while (_r == -EINTR); \
153 \

--- 808 unchanged lines hidden (view full) ---

962 * in from disk if necessary.
963 *
964 * If IO is necessary and running under generic_make_request, returns -EAGAIN.
965 *
966 * The btree node will have either a read or a write lock held, depending on
967 * level and op->lock.
968 */
969struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
145 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
146 } \
147 rw_unlock(_w, _b); \
148 bch_cannibalize_unlock(c); \
149 if (_r == -EINTR) \
150 schedule(); \
151 } while (_r == -EINTR); \
152 \

--- 808 unchanged lines hidden (view full) ---

961 * in from disk if necessary.
962 *
963 * If IO is necessary and running under generic_make_request, returns -EAGAIN.
964 *
965 * The btree node will have either a read or a write lock held, depending on
966 * level and op->lock.
967 */
968struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
970 struct bkey *k, int level, bool write)
969 struct bkey *k, int level, bool write,
970 struct btree *parent)
971{
972 int i = 0;
973 struct btree *b;
974
975 BUG_ON(level < 0);
976retry:
977 b = mca_find(c, k);
978

--- 18 unchanged lines hidden (view full) ---

997 rw_lock(write, b, level);
998 if (PTR_HASH(c, &b->key) != PTR_HASH(c, k)) {
999 rw_unlock(write, b);
1000 goto retry;
1001 }
1002 BUG_ON(b->level != level);
1003 }
1004
971{
972 int i = 0;
973 struct btree *b;
974
975 BUG_ON(level < 0);
976retry:
977 b = mca_find(c, k);
978

--- 18 unchanged lines hidden (view full) ---

997 rw_lock(write, b, level);
998 if (PTR_HASH(c, &b->key) != PTR_HASH(c, k)) {
999 rw_unlock(write, b);
1000 goto retry;
1001 }
1002 BUG_ON(b->level != level);
1003 }
1004
1005 b->parent = parent;
1005 b->accessed = 1;
1006
1007 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
1008 prefetch(b->keys.set[i].tree);
1009 prefetch(b->keys.set[i].data);
1010 }
1011
1012 for (; i <= b->keys.nsets; i++)

--- 4 unchanged lines hidden (view full) ---

1017 return ERR_PTR(-EIO);
1018 }
1019
1020 BUG_ON(!b->written);
1021
1022 return b;
1023}
1024
1006 b->accessed = 1;
1007
1008 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
1009 prefetch(b->keys.set[i].tree);
1010 prefetch(b->keys.set[i].data);
1011 }
1012
1013 for (; i <= b->keys.nsets; i++)

--- 4 unchanged lines hidden (view full) ---

1018 return ERR_PTR(-EIO);
1019 }
1020
1021 BUG_ON(!b->written);
1022
1023 return b;
1024}
1025
1025static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
1026static void btree_node_prefetch(struct btree *parent, struct bkey *k)
1026{
1027 struct btree *b;
1028
1027{
1028 struct btree *b;
1029
1029 mutex_lock(&c->bucket_lock);
1030 b = mca_alloc(c, NULL, k, level);
1031 mutex_unlock(&c->bucket_lock);
1030 mutex_lock(&parent->c->bucket_lock);
1031 b = mca_alloc(parent->c, NULL, k, parent->level - 1);
1032 mutex_unlock(&parent->c->bucket_lock);
1032
1033 if (!IS_ERR_OR_NULL(b)) {
1033
1034 if (!IS_ERR_OR_NULL(b)) {
1035 b->parent = parent;
1034 bch_btree_node_read(b);
1035 rw_unlock(true, b);
1036 }
1037}
1038
1039/* Btree alloc */
1040
1041static void btree_node_free(struct btree *b)

--- 14 unchanged lines hidden (view full) ---

1056
1057 mutex_lock(&b->c->bucket_lock);
1058 bch_bucket_free(b->c, &b->key);
1059 mca_bucket_free(b);
1060 mutex_unlock(&b->c->bucket_lock);
1061}
1062
1063struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
1036 bch_btree_node_read(b);
1037 rw_unlock(true, b);
1038 }
1039}
1040
1041/* Btree alloc */
1042
1043static void btree_node_free(struct btree *b)

--- 14 unchanged lines hidden (view full) ---

1058
1059 mutex_lock(&b->c->bucket_lock);
1060 bch_bucket_free(b->c, &b->key);
1061 mca_bucket_free(b);
1062 mutex_unlock(&b->c->bucket_lock);
1063}
1064
1065struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
1064 int level, bool wait)
1066 int level, bool wait,
1067 struct btree *parent)
1065{
1066 BKEY_PADDED(key) k;
1067 struct btree *b = ERR_PTR(-EAGAIN);
1068
1069 mutex_lock(&c->bucket_lock);
1070retry:
1071 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
1072 goto err;

--- 7 unchanged lines hidden (view full) ---

1080
1081 if (!b) {
1082 cache_bug(c,
1083 "Tried to allocate bucket that was in btree cache");
1084 goto retry;
1085 }
1086
1087 b->accessed = 1;
1068{
1069 BKEY_PADDED(key) k;
1070 struct btree *b = ERR_PTR(-EAGAIN);
1071
1072 mutex_lock(&c->bucket_lock);
1073retry:
1074 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
1075 goto err;

--- 7 unchanged lines hidden (view full) ---

1083
1084 if (!b) {
1085 cache_bug(c,
1086 "Tried to allocate bucket that was in btree cache");
1087 goto retry;
1088 }
1089
1090 b->accessed = 1;
1091 b->parent = parent;
1088 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
1089
1090 mutex_unlock(&c->bucket_lock);
1091
1092 trace_bcache_btree_node_alloc(b);
1093 return b;
1094err_free:
1095 bch_bucket_free(c, &k.key);
1096err:
1097 mutex_unlock(&c->bucket_lock);
1098
1099 trace_bcache_btree_node_alloc_fail(c);
1100 return b;
1101}
1102
1103static struct btree *bch_btree_node_alloc(struct cache_set *c,
1092 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
1093
1094 mutex_unlock(&c->bucket_lock);
1095
1096 trace_bcache_btree_node_alloc(b);
1097 return b;
1098err_free:
1099 bch_bucket_free(c, &k.key);
1100err:
1101 mutex_unlock(&c->bucket_lock);
1102
1103 trace_bcache_btree_node_alloc_fail(c);
1104 return b;
1105}
1106
1107static struct btree *bch_btree_node_alloc(struct cache_set *c,
1104 struct btree_op *op, int level)
1108 struct btree_op *op, int level,
1109 struct btree *parent)
1105{
1110{
1106 return __bch_btree_node_alloc(c, op, level, op != NULL);
1111 return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
1107}
1108
1109static struct btree *btree_node_alloc_replacement(struct btree *b,
1110 struct btree_op *op)
1111{
1112}
1113
1114static struct btree *btree_node_alloc_replacement(struct btree *b,
1115 struct btree_op *op)
1116{
1112 struct btree *n = bch_btree_node_alloc(b->c, op, b->level);
1117 struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
1113 if (!IS_ERR_OR_NULL(n)) {
1114 mutex_lock(&n->write_lock);
1115 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
1116 bkey_copy_key(&n->key, &b->key);
1117 mutex_unlock(&n->write_lock);
1118 }
1119
1120 return n;

--- 397 unchanged lines hidden (view full) ---

1518
1519 for (i = r; i < r + ARRAY_SIZE(r); i++)
1520 i->b = ERR_PTR(-EINTR);
1521
1522 while (1) {
1523 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
1524 if (k) {
1525 r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
1118 if (!IS_ERR_OR_NULL(n)) {
1119 mutex_lock(&n->write_lock);
1120 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
1121 bkey_copy_key(&n->key, &b->key);
1122 mutex_unlock(&n->write_lock);
1123 }
1124
1125 return n;

--- 397 unchanged lines hidden (view full) ---

1523
1524 for (i = r; i < r + ARRAY_SIZE(r); i++)
1525 i->b = ERR_PTR(-EINTR);
1526
1527 while (1) {
1528 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
1529 if (k) {
1530 r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
1526 true);
1531 true, b);
1527 if (IS_ERR(r->b)) {
1528 ret = PTR_ERR(r->b);
1529 break;
1530 }
1531
1532 r->keys = btree_gc_count_keys(r->b);
1533
1534 ret = btree_gc_coalesce(b, op, gc, r);

--- 278 unchanged lines hidden (view full) ---

1813
1814 if (b->level) {
1815 bch_btree_iter_init(&b->keys, &iter, NULL);
1816
1817 do {
1818 k = bch_btree_iter_next_filter(&iter, &b->keys,
1819 bch_ptr_bad);
1820 if (k)
1532 if (IS_ERR(r->b)) {
1533 ret = PTR_ERR(r->b);
1534 break;
1535 }
1536
1537 r->keys = btree_gc_count_keys(r->b);
1538
1539 ret = btree_gc_coalesce(b, op, gc, r);

--- 278 unchanged lines hidden (view full) ---

1818
1819 if (b->level) {
1820 bch_btree_iter_init(&b->keys, &iter, NULL);
1821
1822 do {
1823 k = bch_btree_iter_next_filter(&iter, &b->keys,
1824 bch_ptr_bad);
1825 if (k)
1821 btree_node_prefetch(b->c, k, b->level - 1);
1826 btree_node_prefetch(b, k);
1822
1823 if (p)
1824 ret = btree(check_recurse, p, b, op);
1825
1826 p = k;
1827 } while (p && !ret);
1828 }
1829

--- 148 unchanged lines hidden (view full) ---

1978 split = set_blocks(btree_bset_first(n1),
1979 block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
1980
1981 if (split) {
1982 unsigned keys = 0;
1983
1984 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
1985
1827
1828 if (p)
1829 ret = btree(check_recurse, p, b, op);
1830
1831 p = k;
1832 } while (p && !ret);
1833 }
1834

--- 148 unchanged lines hidden (view full) ---

1983 split = set_blocks(btree_bset_first(n1),
1984 block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
1985
1986 if (split) {
1987 unsigned keys = 0;
1988
1989 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
1990
1986 n2 = bch_btree_node_alloc(b->c, op, b->level);
1991 n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
1987 if (IS_ERR(n2))
1988 goto err_free1;
1989
1990 if (!b->parent) {
1992 if (IS_ERR(n2))
1993 goto err_free1;
1994
1995 if (!b->parent) {
1991 n3 = bch_btree_node_alloc(b->c, op, b->level + 1);
1996 n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
1992 if (IS_ERR(n3))
1993 goto err_free2;
1994 }
1995
1996 mutex_lock(&n1->write_lock);
1997 mutex_lock(&n2->write_lock);
1998
1999 bch_btree_insert_keys(n1, op, insert_keys, replace_key);

--- 526 unchanged lines hidden ---
1997 if (IS_ERR(n3))
1998 goto err_free2;
1999 }
2000
2001 mutex_lock(&n1->write_lock);
2002 mutex_lock(&n2->write_lock);
2003
2004 bch_btree_insert_keys(n1, op, insert_keys, replace_key);

--- 526 unchanged lines hidden ---