1cafe5635SKent Overstreet /* 2cafe5635SKent Overstreet * Main bcache entry point - handle a read or a write request and decide what to 3cafe5635SKent Overstreet * do with it; the make_request functions are called by the block layer. 4cafe5635SKent Overstreet * 5cafe5635SKent Overstreet * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6cafe5635SKent Overstreet * Copyright 2012 Google, Inc. 7cafe5635SKent Overstreet */ 8cafe5635SKent Overstreet 9cafe5635SKent Overstreet #include "bcache.h" 10cafe5635SKent Overstreet #include "btree.h" 11cafe5635SKent Overstreet #include "debug.h" 12cafe5635SKent Overstreet #include "request.h" 13cafe5635SKent Overstreet 14cafe5635SKent Overstreet #include <linux/cgroup.h> 15cafe5635SKent Overstreet #include <linux/module.h> 16cafe5635SKent Overstreet #include <linux/hash.h> 17cafe5635SKent Overstreet #include <linux/random.h> 18cafe5635SKent Overstreet #include "blk-cgroup.h" 19cafe5635SKent Overstreet 20cafe5635SKent Overstreet #include <trace/events/bcache.h> 21cafe5635SKent Overstreet 22cafe5635SKent Overstreet #define CUTOFF_CACHE_ADD 95 23cafe5635SKent Overstreet #define CUTOFF_CACHE_READA 90 24cafe5635SKent Overstreet #define CUTOFF_WRITEBACK 50 25cafe5635SKent Overstreet #define CUTOFF_WRITEBACK_SYNC 75 26cafe5635SKent Overstreet 27cafe5635SKent Overstreet struct kmem_cache *bch_search_cache; 28cafe5635SKent Overstreet 29cafe5635SKent Overstreet static void check_should_skip(struct cached_dev *, struct search *); 30cafe5635SKent Overstreet 31cafe5635SKent Overstreet /* Cgroup interface */ 32cafe5635SKent Overstreet 33cafe5635SKent Overstreet #ifdef CONFIG_CGROUP_BCACHE 34cafe5635SKent Overstreet static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 }; 35cafe5635SKent Overstreet 36cafe5635SKent Overstreet static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup) 37cafe5635SKent Overstreet { 38cafe5635SKent Overstreet struct cgroup_subsys_state *css; 39cafe5635SKent Overstreet return cgroup && 40cafe5635SKent Overstreet (css = cgroup_subsys_state(cgroup, bcache_subsys_id)) 41cafe5635SKent Overstreet ? container_of(css, struct bch_cgroup, css) 42cafe5635SKent Overstreet : &bcache_default_cgroup; 43cafe5635SKent Overstreet } 44cafe5635SKent Overstreet 45cafe5635SKent Overstreet struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio) 46cafe5635SKent Overstreet { 47cafe5635SKent Overstreet struct cgroup_subsys_state *css = bio->bi_css 48cafe5635SKent Overstreet ? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id) 49cafe5635SKent Overstreet : task_subsys_state(current, bcache_subsys_id); 50cafe5635SKent Overstreet 51cafe5635SKent Overstreet return css 52cafe5635SKent Overstreet ? container_of(css, struct bch_cgroup, css) 53cafe5635SKent Overstreet : &bcache_default_cgroup; 54cafe5635SKent Overstreet } 55cafe5635SKent Overstreet 56cafe5635SKent Overstreet static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, 57cafe5635SKent Overstreet struct file *file, 58cafe5635SKent Overstreet char __user *buf, size_t nbytes, loff_t *ppos) 59cafe5635SKent Overstreet { 60cafe5635SKent Overstreet char tmp[1024]; 61cafe5635SKent Overstreet int len = snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, 62cafe5635SKent Overstreet cgroup_to_bcache(cgrp)->cache_mode + 1); 63cafe5635SKent Overstreet 64cafe5635SKent Overstreet if (len < 0) 65cafe5635SKent Overstreet return len; 66cafe5635SKent Overstreet 67cafe5635SKent Overstreet return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); 68cafe5635SKent Overstreet } 69cafe5635SKent Overstreet 70cafe5635SKent Overstreet static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, 71cafe5635SKent Overstreet const char *buf) 72cafe5635SKent Overstreet { 73cafe5635SKent Overstreet int v = read_string_list(buf, bch_cache_modes); 74cafe5635SKent Overstreet if (v < 0) 75cafe5635SKent Overstreet return v; 76cafe5635SKent Overstreet 77cafe5635SKent Overstreet cgroup_to_bcache(cgrp)->cache_mode = v - 1; 78cafe5635SKent Overstreet return 0; 79cafe5635SKent Overstreet } 80cafe5635SKent Overstreet 81cafe5635SKent Overstreet static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft) 82cafe5635SKent Overstreet { 83cafe5635SKent Overstreet return cgroup_to_bcache(cgrp)->verify; 84cafe5635SKent Overstreet } 85cafe5635SKent Overstreet 86cafe5635SKent Overstreet static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val) 87cafe5635SKent Overstreet { 88cafe5635SKent Overstreet cgroup_to_bcache(cgrp)->verify = val; 89cafe5635SKent Overstreet return 0; 90cafe5635SKent Overstreet } 91cafe5635SKent Overstreet 92cafe5635SKent Overstreet static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft) 93cafe5635SKent Overstreet { 94cafe5635SKent Overstreet struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); 95cafe5635SKent Overstreet return atomic_read(&bcachecg->stats.cache_hits); 96cafe5635SKent Overstreet } 97cafe5635SKent Overstreet 98cafe5635SKent Overstreet static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft) 99cafe5635SKent Overstreet { 100cafe5635SKent Overstreet struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); 101cafe5635SKent Overstreet return atomic_read(&bcachecg->stats.cache_misses); 102cafe5635SKent Overstreet } 103cafe5635SKent Overstreet 104cafe5635SKent Overstreet static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp, 105cafe5635SKent Overstreet struct cftype *cft) 106cafe5635SKent Overstreet { 107cafe5635SKent Overstreet struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); 108cafe5635SKent Overstreet return atomic_read(&bcachecg->stats.cache_bypass_hits); 109cafe5635SKent Overstreet } 110cafe5635SKent Overstreet 111cafe5635SKent Overstreet static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp, 112cafe5635SKent Overstreet struct cftype *cft) 113cafe5635SKent Overstreet { 114cafe5635SKent Overstreet struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); 115cafe5635SKent Overstreet return atomic_read(&bcachecg->stats.cache_bypass_misses); 116cafe5635SKent Overstreet } 117cafe5635SKent Overstreet 118cafe5635SKent Overstreet static struct cftype bch_files[] = { 119cafe5635SKent Overstreet { 120cafe5635SKent Overstreet .name = "cache_mode", 121cafe5635SKent Overstreet .read = cache_mode_read, 122cafe5635SKent Overstreet .write_string = cache_mode_write, 123cafe5635SKent Overstreet }, 124cafe5635SKent Overstreet { 125cafe5635SKent Overstreet .name = "verify", 126cafe5635SKent Overstreet .read_u64 = bch_verify_read, 127cafe5635SKent Overstreet .write_u64 = bch_verify_write, 128cafe5635SKent Overstreet }, 129cafe5635SKent Overstreet { 130cafe5635SKent Overstreet .name = "cache_hits", 131cafe5635SKent Overstreet .read_u64 = bch_cache_hits_read, 132cafe5635SKent Overstreet }, 133cafe5635SKent Overstreet { 134cafe5635SKent Overstreet .name = "cache_misses", 135cafe5635SKent Overstreet .read_u64 = bch_cache_misses_read, 136cafe5635SKent Overstreet }, 137cafe5635SKent Overstreet { 138cafe5635SKent Overstreet .name = "cache_bypass_hits", 139cafe5635SKent Overstreet .read_u64 = bch_cache_bypass_hits_read, 140cafe5635SKent Overstreet }, 141cafe5635SKent Overstreet { 142cafe5635SKent Overstreet .name = "cache_bypass_misses", 143cafe5635SKent Overstreet .read_u64 = bch_cache_bypass_misses_read, 144cafe5635SKent Overstreet }, 145cafe5635SKent Overstreet { } /* terminate */ 146cafe5635SKent Overstreet }; 147cafe5635SKent Overstreet 148cafe5635SKent Overstreet static void init_bch_cgroup(struct bch_cgroup *cg) 149cafe5635SKent Overstreet { 150cafe5635SKent Overstreet cg->cache_mode = -1; 151cafe5635SKent Overstreet } 152cafe5635SKent Overstreet 153cafe5635SKent Overstreet static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) 154cafe5635SKent Overstreet { 155cafe5635SKent Overstreet struct bch_cgroup *cg; 156cafe5635SKent Overstreet 157cafe5635SKent Overstreet cg = kzalloc(sizeof(*cg), GFP_KERNEL); 158cafe5635SKent Overstreet if (!cg) 159cafe5635SKent Overstreet return ERR_PTR(-ENOMEM); 160cafe5635SKent Overstreet init_bch_cgroup(cg); 161cafe5635SKent Overstreet return &cg->css; 162cafe5635SKent Overstreet } 163cafe5635SKent Overstreet 164cafe5635SKent Overstreet static void bcachecg_destroy(struct cgroup *cgroup) 165cafe5635SKent Overstreet { 166cafe5635SKent Overstreet struct bch_cgroup *cg = cgroup_to_bcache(cgroup); 167cafe5635SKent Overstreet free_css_id(&bcache_subsys, &cg->css); 168cafe5635SKent Overstreet kfree(cg); 169cafe5635SKent Overstreet } 170cafe5635SKent Overstreet 171cafe5635SKent Overstreet struct cgroup_subsys bcache_subsys = { 172cafe5635SKent Overstreet .create = bcachecg_create, 173cafe5635SKent Overstreet .destroy = bcachecg_destroy, 174cafe5635SKent Overstreet .subsys_id = bcache_subsys_id, 175cafe5635SKent Overstreet .name = "bcache", 176cafe5635SKent Overstreet .module = THIS_MODULE, 177cafe5635SKent Overstreet }; 178cafe5635SKent Overstreet EXPORT_SYMBOL_GPL(bcache_subsys); 179cafe5635SKent Overstreet #endif 180cafe5635SKent Overstreet 181cafe5635SKent Overstreet static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) 182cafe5635SKent Overstreet { 183cafe5635SKent Overstreet #ifdef CONFIG_CGROUP_BCACHE 184cafe5635SKent Overstreet int r = bch_bio_to_cgroup(bio)->cache_mode; 185cafe5635SKent Overstreet if (r >= 0) 186cafe5635SKent Overstreet return r; 187cafe5635SKent Overstreet #endif 188cafe5635SKent Overstreet return BDEV_CACHE_MODE(&dc->sb); 189cafe5635SKent Overstreet } 190cafe5635SKent Overstreet 191cafe5635SKent Overstreet static bool verify(struct cached_dev *dc, struct bio *bio) 192cafe5635SKent Overstreet { 193cafe5635SKent Overstreet #ifdef CONFIG_CGROUP_BCACHE 194cafe5635SKent Overstreet if (bch_bio_to_cgroup(bio)->verify) 195cafe5635SKent Overstreet return true; 196cafe5635SKent Overstreet #endif 197cafe5635SKent Overstreet return dc->verify; 198cafe5635SKent Overstreet } 199cafe5635SKent Overstreet 200cafe5635SKent Overstreet static void bio_csum(struct bio *bio, struct bkey *k) 201cafe5635SKent Overstreet { 202cafe5635SKent Overstreet struct bio_vec *bv; 203cafe5635SKent Overstreet uint64_t csum = 0; 204cafe5635SKent Overstreet int i; 205cafe5635SKent Overstreet 206cafe5635SKent Overstreet bio_for_each_segment(bv, bio, i) { 207cafe5635SKent Overstreet void *d = kmap(bv->bv_page) + bv->bv_offset; 208cafe5635SKent Overstreet csum = crc64_update(csum, d, bv->bv_len); 209cafe5635SKent Overstreet kunmap(bv->bv_page); 210cafe5635SKent Overstreet } 211cafe5635SKent Overstreet 212cafe5635SKent Overstreet k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); 213cafe5635SKent Overstreet } 214cafe5635SKent Overstreet 215cafe5635SKent Overstreet /* Insert data into cache */ 216cafe5635SKent Overstreet 217cafe5635SKent Overstreet static void bio_invalidate(struct closure *cl) 218cafe5635SKent Overstreet { 219cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 220cafe5635SKent Overstreet struct bio *bio = op->cache_bio; 221cafe5635SKent Overstreet 222cafe5635SKent Overstreet pr_debug("invalidating %i sectors from %llu", 223cafe5635SKent Overstreet bio_sectors(bio), (uint64_t) bio->bi_sector); 224cafe5635SKent Overstreet 225cafe5635SKent Overstreet while (bio_sectors(bio)) { 226cafe5635SKent Overstreet unsigned len = min(bio_sectors(bio), 1U << 14); 227cafe5635SKent Overstreet 228cafe5635SKent Overstreet if (bch_keylist_realloc(&op->keys, 0, op->c)) 229cafe5635SKent Overstreet goto out; 230cafe5635SKent Overstreet 231cafe5635SKent Overstreet bio->bi_sector += len; 232cafe5635SKent Overstreet bio->bi_size -= len << 9; 233cafe5635SKent Overstreet 234cafe5635SKent Overstreet bch_keylist_add(&op->keys, 235cafe5635SKent Overstreet &KEY(op->inode, bio->bi_sector, len)); 236cafe5635SKent Overstreet } 237cafe5635SKent Overstreet 238cafe5635SKent Overstreet op->insert_data_done = true; 239cafe5635SKent Overstreet bio_put(bio); 240cafe5635SKent Overstreet out: 241cafe5635SKent Overstreet continue_at(cl, bch_journal, bcache_wq); 242cafe5635SKent Overstreet } 243cafe5635SKent Overstreet 244cafe5635SKent Overstreet struct open_bucket { 245cafe5635SKent Overstreet struct list_head list; 246cafe5635SKent Overstreet struct task_struct *last; 247cafe5635SKent Overstreet unsigned sectors_free; 248cafe5635SKent Overstreet BKEY_PADDED(key); 249cafe5635SKent Overstreet }; 250cafe5635SKent Overstreet 251cafe5635SKent Overstreet void bch_open_buckets_free(struct cache_set *c) 252cafe5635SKent Overstreet { 253cafe5635SKent Overstreet struct open_bucket *b; 254cafe5635SKent Overstreet 255cafe5635SKent Overstreet while (!list_empty(&c->data_buckets)) { 256cafe5635SKent Overstreet b = list_first_entry(&c->data_buckets, 257cafe5635SKent Overstreet struct open_bucket, list); 258cafe5635SKent Overstreet list_del(&b->list); 259cafe5635SKent Overstreet kfree(b); 260cafe5635SKent Overstreet } 261cafe5635SKent Overstreet } 262cafe5635SKent Overstreet 263cafe5635SKent Overstreet int bch_open_buckets_alloc(struct cache_set *c) 264cafe5635SKent Overstreet { 265cafe5635SKent Overstreet int i; 266cafe5635SKent Overstreet 267cafe5635SKent Overstreet spin_lock_init(&c->data_bucket_lock); 268cafe5635SKent Overstreet 269cafe5635SKent Overstreet for (i = 0; i < 6; i++) { 270cafe5635SKent Overstreet struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); 271cafe5635SKent Overstreet if (!b) 272cafe5635SKent Overstreet return -ENOMEM; 273cafe5635SKent Overstreet 274cafe5635SKent Overstreet list_add(&b->list, &c->data_buckets); 275cafe5635SKent Overstreet } 276cafe5635SKent Overstreet 277cafe5635SKent Overstreet return 0; 278cafe5635SKent Overstreet } 279cafe5635SKent Overstreet 280cafe5635SKent Overstreet /* 281cafe5635SKent Overstreet * We keep multiple buckets open for writes, and try to segregate different 282cafe5635SKent Overstreet * write streams for better cache utilization: first we look for a bucket where 283cafe5635SKent Overstreet * the last write to it was sequential with the current write, and failing that 284cafe5635SKent Overstreet * we look for a bucket that was last used by the same task. 285cafe5635SKent Overstreet * 286cafe5635SKent Overstreet * The ideas is if you've got multiple tasks pulling data into the cache at the 287cafe5635SKent Overstreet * same time, you'll get better cache utilization if you try to segregate their 288cafe5635SKent Overstreet * data and preserve locality. 289cafe5635SKent Overstreet * 290cafe5635SKent Overstreet * For example, say you've starting Firefox at the same time you're copying a 291cafe5635SKent Overstreet * bunch of files. Firefox will likely end up being fairly hot and stay in the 292cafe5635SKent Overstreet * cache awhile, but the data you copied might not be; if you wrote all that 293cafe5635SKent Overstreet * data to the same buckets it'd get invalidated at the same time. 294cafe5635SKent Overstreet * 295cafe5635SKent Overstreet * Both of those tasks will be doing fairly random IO so we can't rely on 296cafe5635SKent Overstreet * detecting sequential IO to segregate their data, but going off of the task 297cafe5635SKent Overstreet * should be a sane heuristic. 298cafe5635SKent Overstreet */ 299cafe5635SKent Overstreet static struct open_bucket *pick_data_bucket(struct cache_set *c, 300cafe5635SKent Overstreet const struct bkey *search, 301cafe5635SKent Overstreet struct task_struct *task, 302cafe5635SKent Overstreet struct bkey *alloc) 303cafe5635SKent Overstreet { 304cafe5635SKent Overstreet struct open_bucket *ret, *ret_task = NULL; 305cafe5635SKent Overstreet 306cafe5635SKent Overstreet list_for_each_entry_reverse(ret, &c->data_buckets, list) 307cafe5635SKent Overstreet if (!bkey_cmp(&ret->key, search)) 308cafe5635SKent Overstreet goto found; 309cafe5635SKent Overstreet else if (ret->last == task) 310cafe5635SKent Overstreet ret_task = ret; 311cafe5635SKent Overstreet 312cafe5635SKent Overstreet ret = ret_task ?: list_first_entry(&c->data_buckets, 313cafe5635SKent Overstreet struct open_bucket, list); 314cafe5635SKent Overstreet found: 315cafe5635SKent Overstreet if (!ret->sectors_free && KEY_PTRS(alloc)) { 316cafe5635SKent Overstreet ret->sectors_free = c->sb.bucket_size; 317cafe5635SKent Overstreet bkey_copy(&ret->key, alloc); 318cafe5635SKent Overstreet bkey_init(alloc); 319cafe5635SKent Overstreet } 320cafe5635SKent Overstreet 321cafe5635SKent Overstreet if (!ret->sectors_free) 322cafe5635SKent Overstreet ret = NULL; 323cafe5635SKent Overstreet 324cafe5635SKent Overstreet return ret; 325cafe5635SKent Overstreet } 326cafe5635SKent Overstreet 327cafe5635SKent Overstreet /* 328cafe5635SKent Overstreet * Allocates some space in the cache to write to, and k to point to the newly 329cafe5635SKent Overstreet * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the 330cafe5635SKent Overstreet * end of the newly allocated space). 331cafe5635SKent Overstreet * 332cafe5635SKent Overstreet * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many 333cafe5635SKent Overstreet * sectors were actually allocated. 334cafe5635SKent Overstreet * 335cafe5635SKent Overstreet * If s->writeback is true, will not fail. 336cafe5635SKent Overstreet */ 337cafe5635SKent Overstreet static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, 338cafe5635SKent Overstreet struct search *s) 339cafe5635SKent Overstreet { 340cafe5635SKent Overstreet struct cache_set *c = s->op.c; 341cafe5635SKent Overstreet struct open_bucket *b; 342cafe5635SKent Overstreet BKEY_PADDED(key) alloc; 343cafe5635SKent Overstreet struct closure cl, *w = NULL; 344cafe5635SKent Overstreet unsigned i; 345cafe5635SKent Overstreet 346cafe5635SKent Overstreet if (s->writeback) { 347cafe5635SKent Overstreet closure_init_stack(&cl); 348cafe5635SKent Overstreet w = &cl; 349cafe5635SKent Overstreet } 350cafe5635SKent Overstreet 351cafe5635SKent Overstreet /* 352cafe5635SKent Overstreet * We might have to allocate a new bucket, which we can't do with a 353cafe5635SKent Overstreet * spinlock held. So if we have to allocate, we drop the lock, allocate 354cafe5635SKent Overstreet * and then retry. KEY_PTRS() indicates whether alloc points to 355cafe5635SKent Overstreet * allocated bucket(s). 356cafe5635SKent Overstreet */ 357cafe5635SKent Overstreet 358cafe5635SKent Overstreet bkey_init(&alloc.key); 359cafe5635SKent Overstreet spin_lock(&c->data_bucket_lock); 360cafe5635SKent Overstreet 361cafe5635SKent Overstreet while (!(b = pick_data_bucket(c, k, s->task, &alloc.key))) { 362cafe5635SKent Overstreet unsigned watermark = s->op.write_prio 363cafe5635SKent Overstreet ? WATERMARK_MOVINGGC 364cafe5635SKent Overstreet : WATERMARK_NONE; 365cafe5635SKent Overstreet 366cafe5635SKent Overstreet spin_unlock(&c->data_bucket_lock); 367cafe5635SKent Overstreet 368cafe5635SKent Overstreet if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, w)) 369cafe5635SKent Overstreet return false; 370cafe5635SKent Overstreet 371cafe5635SKent Overstreet spin_lock(&c->data_bucket_lock); 372cafe5635SKent Overstreet } 373cafe5635SKent Overstreet 374cafe5635SKent Overstreet /* 375cafe5635SKent Overstreet * If we had to allocate, we might race and not need to allocate the 376cafe5635SKent Overstreet * second time we call find_data_bucket(). If we allocated a bucket but 377cafe5635SKent Overstreet * didn't use it, drop the refcount bch_bucket_alloc_set() took: 378cafe5635SKent Overstreet */ 379cafe5635SKent Overstreet if (KEY_PTRS(&alloc.key)) 380cafe5635SKent Overstreet __bkey_put(c, &alloc.key); 381cafe5635SKent Overstreet 382cafe5635SKent Overstreet for (i = 0; i < KEY_PTRS(&b->key); i++) 383cafe5635SKent Overstreet EBUG_ON(ptr_stale(c, &b->key, i)); 384cafe5635SKent Overstreet 385cafe5635SKent Overstreet /* Set up the pointer to the space we're allocating: */ 386cafe5635SKent Overstreet 387cafe5635SKent Overstreet for (i = 0; i < KEY_PTRS(&b->key); i++) 388cafe5635SKent Overstreet k->ptr[i] = b->key.ptr[i]; 389cafe5635SKent Overstreet 390cafe5635SKent Overstreet sectors = min(sectors, b->sectors_free); 391cafe5635SKent Overstreet 392cafe5635SKent Overstreet SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); 393cafe5635SKent Overstreet SET_KEY_SIZE(k, sectors); 394cafe5635SKent Overstreet SET_KEY_PTRS(k, KEY_PTRS(&b->key)); 395cafe5635SKent Overstreet 396cafe5635SKent Overstreet /* 397cafe5635SKent Overstreet * Move b to the end of the lru, and keep track of what this bucket was 398cafe5635SKent Overstreet * last used for: 399cafe5635SKent Overstreet */ 400cafe5635SKent Overstreet list_move_tail(&b->list, &c->data_buckets); 401cafe5635SKent Overstreet bkey_copy_key(&b->key, k); 402cafe5635SKent Overstreet b->last = s->task; 403cafe5635SKent Overstreet 404cafe5635SKent Overstreet b->sectors_free -= sectors; 405cafe5635SKent Overstreet 406cafe5635SKent Overstreet for (i = 0; i < KEY_PTRS(&b->key); i++) { 407cafe5635SKent Overstreet SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); 408cafe5635SKent Overstreet 409cafe5635SKent Overstreet atomic_long_add(sectors, 410cafe5635SKent Overstreet &PTR_CACHE(c, &b->key, i)->sectors_written); 411cafe5635SKent Overstreet } 412cafe5635SKent Overstreet 413cafe5635SKent Overstreet if (b->sectors_free < c->sb.block_size) 414cafe5635SKent Overstreet b->sectors_free = 0; 415cafe5635SKent Overstreet 416cafe5635SKent Overstreet /* 417cafe5635SKent Overstreet * k takes refcounts on the buckets it points to until it's inserted 418cafe5635SKent Overstreet * into the btree, but if we're done with this bucket we just transfer 419cafe5635SKent Overstreet * get_data_bucket()'s refcount. 420cafe5635SKent Overstreet */ 421cafe5635SKent Overstreet if (b->sectors_free) 422cafe5635SKent Overstreet for (i = 0; i < KEY_PTRS(&b->key); i++) 423cafe5635SKent Overstreet atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); 424cafe5635SKent Overstreet 425cafe5635SKent Overstreet spin_unlock(&c->data_bucket_lock); 426cafe5635SKent Overstreet return true; 427cafe5635SKent Overstreet } 428cafe5635SKent Overstreet 429cafe5635SKent Overstreet static void bch_insert_data_error(struct closure *cl) 430cafe5635SKent Overstreet { 431cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 432cafe5635SKent Overstreet 433cafe5635SKent Overstreet /* 434cafe5635SKent Overstreet * Our data write just errored, which means we've got a bunch of keys to 435cafe5635SKent Overstreet * insert that point to data that wasn't succesfully written. 436cafe5635SKent Overstreet * 437cafe5635SKent Overstreet * We don't have to insert those keys but we still have to invalidate 438cafe5635SKent Overstreet * that region of the cache - so, if we just strip off all the pointers 439cafe5635SKent Overstreet * from the keys we'll accomplish just that. 440cafe5635SKent Overstreet */ 441cafe5635SKent Overstreet 442cafe5635SKent Overstreet struct bkey *src = op->keys.bottom, *dst = op->keys.bottom; 443cafe5635SKent Overstreet 444cafe5635SKent Overstreet while (src != op->keys.top) { 445cafe5635SKent Overstreet struct bkey *n = bkey_next(src); 446cafe5635SKent Overstreet 447cafe5635SKent Overstreet SET_KEY_PTRS(src, 0); 448cafe5635SKent Overstreet bkey_copy(dst, src); 449cafe5635SKent Overstreet 450cafe5635SKent Overstreet dst = bkey_next(dst); 451cafe5635SKent Overstreet src = n; 452cafe5635SKent Overstreet } 453cafe5635SKent Overstreet 454cafe5635SKent Overstreet op->keys.top = dst; 455cafe5635SKent Overstreet 456cafe5635SKent Overstreet bch_journal(cl); 457cafe5635SKent Overstreet } 458cafe5635SKent Overstreet 459cafe5635SKent Overstreet static void bch_insert_data_endio(struct bio *bio, int error) 460cafe5635SKent Overstreet { 461cafe5635SKent Overstreet struct closure *cl = bio->bi_private; 462cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 463cafe5635SKent Overstreet struct search *s = container_of(op, struct search, op); 464cafe5635SKent Overstreet 465cafe5635SKent Overstreet if (error) { 466cafe5635SKent Overstreet /* TODO: We could try to recover from this. */ 467cafe5635SKent Overstreet if (s->writeback) 468cafe5635SKent Overstreet s->error = error; 469cafe5635SKent Overstreet else if (s->write) 470cafe5635SKent Overstreet set_closure_fn(cl, bch_insert_data_error, bcache_wq); 471cafe5635SKent Overstreet else 472cafe5635SKent Overstreet set_closure_fn(cl, NULL, NULL); 473cafe5635SKent Overstreet } 474cafe5635SKent Overstreet 475cafe5635SKent Overstreet bch_bbio_endio(op->c, bio, error, "writing data to cache"); 476cafe5635SKent Overstreet } 477cafe5635SKent Overstreet 478cafe5635SKent Overstreet static void bch_insert_data_loop(struct closure *cl) 479cafe5635SKent Overstreet { 480cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 481cafe5635SKent Overstreet struct search *s = container_of(op, struct search, op); 482cafe5635SKent Overstreet struct bio *bio = op->cache_bio, *n; 483cafe5635SKent Overstreet 484cafe5635SKent Overstreet if (op->skip) 485cafe5635SKent Overstreet return bio_invalidate(cl); 486cafe5635SKent Overstreet 487cafe5635SKent Overstreet if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { 488cafe5635SKent Overstreet set_gc_sectors(op->c); 489cafe5635SKent Overstreet bch_queue_gc(op->c); 490cafe5635SKent Overstreet } 491cafe5635SKent Overstreet 492cafe5635SKent Overstreet do { 493cafe5635SKent Overstreet unsigned i; 494cafe5635SKent Overstreet struct bkey *k; 495cafe5635SKent Overstreet struct bio_set *split = s->d 496cafe5635SKent Overstreet ? s->d->bio_split : op->c->bio_split; 497cafe5635SKent Overstreet 498cafe5635SKent Overstreet /* 1 for the device pointer and 1 for the chksum */ 499cafe5635SKent Overstreet if (bch_keylist_realloc(&op->keys, 500cafe5635SKent Overstreet 1 + (op->csum ? 1 : 0), 501cafe5635SKent Overstreet op->c)) 502cafe5635SKent Overstreet continue_at(cl, bch_journal, bcache_wq); 503cafe5635SKent Overstreet 504cafe5635SKent Overstreet k = op->keys.top; 505cafe5635SKent Overstreet bkey_init(k); 506cafe5635SKent Overstreet SET_KEY_INODE(k, op->inode); 507cafe5635SKent Overstreet SET_KEY_OFFSET(k, bio->bi_sector); 508cafe5635SKent Overstreet 509cafe5635SKent Overstreet if (!bch_alloc_sectors(k, bio_sectors(bio), s)) 510cafe5635SKent Overstreet goto err; 511cafe5635SKent Overstreet 512cafe5635SKent Overstreet n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); 513cafe5635SKent Overstreet if (!n) { 514cafe5635SKent Overstreet __bkey_put(op->c, k); 515cafe5635SKent Overstreet continue_at(cl, bch_insert_data_loop, bcache_wq); 516cafe5635SKent Overstreet } 517cafe5635SKent Overstreet 518cafe5635SKent Overstreet n->bi_end_io = bch_insert_data_endio; 519cafe5635SKent Overstreet n->bi_private = cl; 520cafe5635SKent Overstreet 521cafe5635SKent Overstreet if (s->writeback) { 522cafe5635SKent Overstreet SET_KEY_DIRTY(k, true); 523cafe5635SKent Overstreet 524cafe5635SKent Overstreet for (i = 0; i < KEY_PTRS(k); i++) 525cafe5635SKent Overstreet SET_GC_MARK(PTR_BUCKET(op->c, k, i), 526cafe5635SKent Overstreet GC_MARK_DIRTY); 527cafe5635SKent Overstreet } 528cafe5635SKent Overstreet 529cafe5635SKent Overstreet SET_KEY_CSUM(k, op->csum); 530cafe5635SKent Overstreet if (KEY_CSUM(k)) 531cafe5635SKent Overstreet bio_csum(n, k); 532cafe5635SKent Overstreet 533cafe5635SKent Overstreet pr_debug("%s", pkey(k)); 534cafe5635SKent Overstreet bch_keylist_push(&op->keys); 535cafe5635SKent Overstreet 536cafe5635SKent Overstreet trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); 537cafe5635SKent Overstreet n->bi_rw |= REQ_WRITE; 538cafe5635SKent Overstreet bch_submit_bbio(n, op->c, k, 0); 539cafe5635SKent Overstreet } while (n != bio); 540cafe5635SKent Overstreet 541cafe5635SKent Overstreet op->insert_data_done = true; 542cafe5635SKent Overstreet continue_at(cl, bch_journal, bcache_wq); 543cafe5635SKent Overstreet err: 544cafe5635SKent Overstreet /* bch_alloc_sectors() blocks if s->writeback = true */ 545cafe5635SKent Overstreet BUG_ON(s->writeback); 546cafe5635SKent Overstreet 547cafe5635SKent Overstreet /* 548cafe5635SKent Overstreet * But if it's not a writeback write we'd rather just bail out if 549cafe5635SKent Overstreet * there aren't any buckets ready to write to - it might take awhile and 550cafe5635SKent Overstreet * we might be starving btree writes for gc or something. 551cafe5635SKent Overstreet */ 552cafe5635SKent Overstreet 553cafe5635SKent Overstreet if (s->write) { 554cafe5635SKent Overstreet /* 555cafe5635SKent Overstreet * Writethrough write: We can't complete the write until we've 556cafe5635SKent Overstreet * updated the index. But we don't want to delay the write while 557cafe5635SKent Overstreet * we wait for buckets to be freed up, so just invalidate the 558cafe5635SKent Overstreet * rest of the write. 559cafe5635SKent Overstreet */ 560cafe5635SKent Overstreet op->skip = true; 561cafe5635SKent Overstreet return bio_invalidate(cl); 562cafe5635SKent Overstreet } else { 563cafe5635SKent Overstreet /* 564cafe5635SKent Overstreet * From a cache miss, we can just insert the keys for the data 565cafe5635SKent Overstreet * we have written or bail out if we didn't do anything. 566cafe5635SKent Overstreet */ 567cafe5635SKent Overstreet op->insert_data_done = true; 568cafe5635SKent Overstreet bio_put(bio); 569cafe5635SKent Overstreet 570cafe5635SKent Overstreet if (!bch_keylist_empty(&op->keys)) 571cafe5635SKent Overstreet continue_at(cl, bch_journal, bcache_wq); 572cafe5635SKent Overstreet else 573cafe5635SKent Overstreet closure_return(cl); 574cafe5635SKent Overstreet } 575cafe5635SKent Overstreet } 576cafe5635SKent Overstreet 577cafe5635SKent Overstreet /** 578cafe5635SKent Overstreet * bch_insert_data - stick some data in the cache 579cafe5635SKent Overstreet * 580cafe5635SKent Overstreet * This is the starting point for any data to end up in a cache device; it could 581cafe5635SKent Overstreet * be from a normal write, or a writeback write, or a write to a flash only 582cafe5635SKent Overstreet * volume - it's also used by the moving garbage collector to compact data in 583cafe5635SKent Overstreet * mostly empty buckets. 584cafe5635SKent Overstreet * 585cafe5635SKent Overstreet * It first writes the data to the cache, creating a list of keys to be inserted 586cafe5635SKent Overstreet * (if the data had to be fragmented there will be multiple keys); after the 587cafe5635SKent Overstreet * data is written it calls bch_journal, and after the keys have been added to 588cafe5635SKent Overstreet * the next journal write they're inserted into the btree. 589cafe5635SKent Overstreet * 590cafe5635SKent Overstreet * It inserts the data in op->cache_bio; bi_sector is used for the key offset, 591cafe5635SKent Overstreet * and op->inode is used for the key inode. 592cafe5635SKent Overstreet * 593cafe5635SKent Overstreet * If op->skip is true, instead of inserting the data it invalidates the region 594cafe5635SKent Overstreet * of the cache represented by op->cache_bio and op->inode. 595cafe5635SKent Overstreet */ 596cafe5635SKent Overstreet void bch_insert_data(struct closure *cl) 597cafe5635SKent Overstreet { 598cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 599cafe5635SKent Overstreet 600cafe5635SKent Overstreet bch_keylist_init(&op->keys); 601cafe5635SKent Overstreet bio_get(op->cache_bio); 602cafe5635SKent Overstreet bch_insert_data_loop(cl); 603cafe5635SKent Overstreet } 604cafe5635SKent Overstreet 605cafe5635SKent Overstreet void bch_btree_insert_async(struct closure *cl) 606cafe5635SKent Overstreet { 607cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 608cafe5635SKent Overstreet struct search *s = container_of(op, struct search, op); 609cafe5635SKent Overstreet 610cafe5635SKent Overstreet if (bch_btree_insert(op, op->c)) { 611cafe5635SKent Overstreet s->error = -ENOMEM; 612cafe5635SKent Overstreet op->insert_data_done = true; 613cafe5635SKent Overstreet } 614cafe5635SKent Overstreet 615cafe5635SKent Overstreet if (op->insert_data_done) { 616cafe5635SKent Overstreet bch_keylist_free(&op->keys); 617cafe5635SKent Overstreet closure_return(cl); 618cafe5635SKent Overstreet } else 619cafe5635SKent Overstreet continue_at(cl, bch_insert_data_loop, bcache_wq); 620cafe5635SKent Overstreet } 621cafe5635SKent Overstreet 622cafe5635SKent Overstreet /* Common code for the make_request functions */ 623cafe5635SKent Overstreet 624cafe5635SKent Overstreet static void request_endio(struct bio *bio, int error) 625cafe5635SKent Overstreet { 626cafe5635SKent Overstreet struct closure *cl = bio->bi_private; 627cafe5635SKent Overstreet 628cafe5635SKent Overstreet if (error) { 629cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 630cafe5635SKent Overstreet s->error = error; 631cafe5635SKent Overstreet /* Only cache read errors are recoverable */ 632cafe5635SKent Overstreet s->recoverable = false; 633cafe5635SKent Overstreet } 634cafe5635SKent Overstreet 635cafe5635SKent Overstreet bio_put(bio); 636cafe5635SKent Overstreet closure_put(cl); 637cafe5635SKent Overstreet } 638cafe5635SKent Overstreet 639cafe5635SKent Overstreet void bch_cache_read_endio(struct bio *bio, int error) 640cafe5635SKent Overstreet { 641cafe5635SKent Overstreet struct bbio *b = container_of(bio, struct bbio, bio); 642cafe5635SKent Overstreet struct closure *cl = bio->bi_private; 643cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 644cafe5635SKent Overstreet 645cafe5635SKent Overstreet /* 646cafe5635SKent Overstreet * If the bucket was reused while our bio was in flight, we might have 647cafe5635SKent Overstreet * read the wrong data. Set s->error but not error so it doesn't get 648cafe5635SKent Overstreet * counted against the cache device, but we'll still reread the data 649cafe5635SKent Overstreet * from the backing device. 650cafe5635SKent Overstreet */ 651cafe5635SKent Overstreet 652cafe5635SKent Overstreet if (error) 653cafe5635SKent Overstreet s->error = error; 654cafe5635SKent Overstreet else if (ptr_stale(s->op.c, &b->key, 0)) { 655cafe5635SKent Overstreet atomic_long_inc(&s->op.c->cache_read_races); 656cafe5635SKent Overstreet s->error = -EINTR; 657cafe5635SKent Overstreet } 658cafe5635SKent Overstreet 659cafe5635SKent Overstreet bch_bbio_endio(s->op.c, bio, error, "reading from cache"); 660cafe5635SKent Overstreet } 661cafe5635SKent Overstreet 662cafe5635SKent Overstreet static void bio_complete(struct search *s) 663cafe5635SKent Overstreet { 664cafe5635SKent Overstreet if (s->orig_bio) { 665cafe5635SKent Overstreet int cpu, rw = bio_data_dir(s->orig_bio); 666cafe5635SKent Overstreet unsigned long duration = jiffies - s->start_time; 667cafe5635SKent Overstreet 668cafe5635SKent Overstreet cpu = part_stat_lock(); 669cafe5635SKent Overstreet part_round_stats(cpu, &s->d->disk->part0); 670cafe5635SKent Overstreet part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); 671cafe5635SKent Overstreet part_stat_unlock(); 672cafe5635SKent Overstreet 673cafe5635SKent Overstreet trace_bcache_request_end(s, s->orig_bio); 674cafe5635SKent Overstreet bio_endio(s->orig_bio, s->error); 675cafe5635SKent Overstreet s->orig_bio = NULL; 676cafe5635SKent Overstreet } 677cafe5635SKent Overstreet } 678cafe5635SKent Overstreet 679cafe5635SKent Overstreet static void do_bio_hook(struct search *s) 680cafe5635SKent Overstreet { 681cafe5635SKent Overstreet struct bio *bio = &s->bio.bio; 682cafe5635SKent Overstreet memcpy(bio, s->orig_bio, sizeof(struct bio)); 683cafe5635SKent Overstreet 684cafe5635SKent Overstreet bio->bi_end_io = request_endio; 685cafe5635SKent Overstreet bio->bi_private = &s->cl; 686cafe5635SKent Overstreet atomic_set(&bio->bi_cnt, 3); 687cafe5635SKent Overstreet } 688cafe5635SKent Overstreet 689cafe5635SKent Overstreet static void search_free(struct closure *cl) 690cafe5635SKent Overstreet { 691cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 692cafe5635SKent Overstreet bio_complete(s); 693cafe5635SKent Overstreet 694cafe5635SKent Overstreet if (s->op.cache_bio) 695cafe5635SKent Overstreet bio_put(s->op.cache_bio); 696cafe5635SKent Overstreet 697cafe5635SKent Overstreet if (s->unaligned_bvec) 698cafe5635SKent Overstreet mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); 699cafe5635SKent Overstreet 700cafe5635SKent Overstreet closure_debug_destroy(cl); 701cafe5635SKent Overstreet mempool_free(s, s->d->c->search); 702cafe5635SKent Overstreet } 703cafe5635SKent Overstreet 704cafe5635SKent Overstreet static struct search *search_alloc(struct bio *bio, struct bcache_device *d) 705cafe5635SKent Overstreet { 706cafe5635SKent Overstreet struct bio_vec *bv; 707cafe5635SKent Overstreet struct search *s = mempool_alloc(d->c->search, GFP_NOIO); 708cafe5635SKent Overstreet memset(s, 0, offsetof(struct search, op.keys)); 709cafe5635SKent Overstreet 710cafe5635SKent Overstreet __closure_init(&s->cl, NULL); 711cafe5635SKent Overstreet 712cafe5635SKent Overstreet s->op.inode = d->id; 713cafe5635SKent Overstreet s->op.c = d->c; 714cafe5635SKent Overstreet s->d = d; 715cafe5635SKent Overstreet s->op.lock = -1; 716cafe5635SKent Overstreet s->task = current; 717cafe5635SKent Overstreet s->orig_bio = bio; 718cafe5635SKent Overstreet s->write = (bio->bi_rw & REQ_WRITE) != 0; 719cafe5635SKent Overstreet s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; 720cafe5635SKent Overstreet s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; 721cafe5635SKent Overstreet s->recoverable = 1; 722cafe5635SKent Overstreet s->start_time = jiffies; 723cafe5635SKent Overstreet do_bio_hook(s); 724cafe5635SKent Overstreet 725cafe5635SKent Overstreet if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) { 726cafe5635SKent Overstreet bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO); 727cafe5635SKent Overstreet memcpy(bv, bio_iovec(bio), 728cafe5635SKent Overstreet sizeof(struct bio_vec) * bio_segments(bio)); 729cafe5635SKent Overstreet 730cafe5635SKent Overstreet s->bio.bio.bi_io_vec = bv; 731cafe5635SKent Overstreet s->unaligned_bvec = 1; 732cafe5635SKent Overstreet } 733cafe5635SKent Overstreet 734cafe5635SKent Overstreet return s; 735cafe5635SKent Overstreet } 736cafe5635SKent Overstreet 737cafe5635SKent Overstreet static void btree_read_async(struct closure *cl) 738cafe5635SKent Overstreet { 739cafe5635SKent Overstreet struct btree_op *op = container_of(cl, struct btree_op, cl); 740cafe5635SKent Overstreet 741cafe5635SKent Overstreet int ret = btree_root(search_recurse, op->c, op); 742cafe5635SKent Overstreet 743cafe5635SKent Overstreet if (ret == -EAGAIN) 744cafe5635SKent Overstreet continue_at(cl, btree_read_async, bcache_wq); 745cafe5635SKent Overstreet 746cafe5635SKent Overstreet closure_return(cl); 747cafe5635SKent Overstreet } 748cafe5635SKent Overstreet 749cafe5635SKent Overstreet /* Cached devices */ 750cafe5635SKent Overstreet 751cafe5635SKent Overstreet static void cached_dev_bio_complete(struct closure *cl) 752cafe5635SKent Overstreet { 753cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 754cafe5635SKent Overstreet struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 755cafe5635SKent Overstreet 756cafe5635SKent Overstreet search_free(cl); 757cafe5635SKent Overstreet cached_dev_put(dc); 758cafe5635SKent Overstreet } 759cafe5635SKent Overstreet 760cafe5635SKent Overstreet /* Process reads */ 761cafe5635SKent Overstreet 762cafe5635SKent Overstreet static void cached_dev_read_complete(struct closure *cl) 763cafe5635SKent Overstreet { 764cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 765cafe5635SKent Overstreet 766cafe5635SKent Overstreet if (s->op.insert_collision) 767cafe5635SKent Overstreet bch_mark_cache_miss_collision(s); 768cafe5635SKent Overstreet 769cafe5635SKent Overstreet if (s->op.cache_bio) { 770cafe5635SKent Overstreet int i; 771cafe5635SKent Overstreet struct bio_vec *bv; 772cafe5635SKent Overstreet 773cafe5635SKent Overstreet __bio_for_each_segment(bv, s->op.cache_bio, i, 0) 774cafe5635SKent Overstreet __free_page(bv->bv_page); 775cafe5635SKent Overstreet } 776cafe5635SKent Overstreet 777cafe5635SKent Overstreet cached_dev_bio_complete(cl); 778cafe5635SKent Overstreet } 779cafe5635SKent Overstreet 780cafe5635SKent Overstreet static void request_read_error(struct closure *cl) 781cafe5635SKent Overstreet { 782cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 783cafe5635SKent Overstreet struct bio_vec *bv; 784cafe5635SKent Overstreet int i; 785cafe5635SKent Overstreet 786cafe5635SKent Overstreet if (s->recoverable) { 787cafe5635SKent Overstreet /* The cache read failed, but we can retry from the backing 788cafe5635SKent Overstreet * device. 789cafe5635SKent Overstreet */ 790cafe5635SKent Overstreet pr_debug("recovering at sector %llu", 791cafe5635SKent Overstreet (uint64_t) s->orig_bio->bi_sector); 792cafe5635SKent Overstreet 793cafe5635SKent Overstreet s->error = 0; 794cafe5635SKent Overstreet bv = s->bio.bio.bi_io_vec; 795cafe5635SKent Overstreet do_bio_hook(s); 796cafe5635SKent Overstreet s->bio.bio.bi_io_vec = bv; 797cafe5635SKent Overstreet 798cafe5635SKent Overstreet if (!s->unaligned_bvec) 799cafe5635SKent Overstreet bio_for_each_segment(bv, s->orig_bio, i) 800cafe5635SKent Overstreet bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; 801cafe5635SKent Overstreet else 802cafe5635SKent Overstreet memcpy(s->bio.bio.bi_io_vec, 803cafe5635SKent Overstreet bio_iovec(s->orig_bio), 804cafe5635SKent Overstreet sizeof(struct bio_vec) * 805cafe5635SKent Overstreet bio_segments(s->orig_bio)); 806cafe5635SKent Overstreet 807cafe5635SKent Overstreet /* XXX: invalidate cache */ 808cafe5635SKent Overstreet 809cafe5635SKent Overstreet trace_bcache_read_retry(&s->bio.bio); 810cafe5635SKent Overstreet closure_bio_submit(&s->bio.bio, &s->cl, s->d); 811cafe5635SKent Overstreet } 812cafe5635SKent Overstreet 813cafe5635SKent Overstreet continue_at(cl, cached_dev_read_complete, NULL); 814cafe5635SKent Overstreet } 815cafe5635SKent Overstreet 816cafe5635SKent Overstreet static void request_read_done(struct closure *cl) 817cafe5635SKent Overstreet { 818cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 819cafe5635SKent Overstreet struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 820cafe5635SKent Overstreet 821cafe5635SKent Overstreet /* 822cafe5635SKent Overstreet * s->cache_bio != NULL implies that we had a cache miss; cache_bio now 823cafe5635SKent Overstreet * contains data ready to be inserted into the cache. 824cafe5635SKent Overstreet * 825cafe5635SKent Overstreet * First, we copy the data we just read from cache_bio's bounce buffers 826cafe5635SKent Overstreet * to the buffers the original bio pointed to: 827cafe5635SKent Overstreet */ 828cafe5635SKent Overstreet 829cafe5635SKent Overstreet if (s->op.cache_bio) { 830cafe5635SKent Overstreet struct bio_vec *src, *dst; 831cafe5635SKent Overstreet unsigned src_offset, dst_offset, bytes; 832cafe5635SKent Overstreet void *dst_ptr; 833cafe5635SKent Overstreet 834cafe5635SKent Overstreet bio_reset(s->op.cache_bio); 835cafe5635SKent Overstreet s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; 836cafe5635SKent Overstreet s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; 837cafe5635SKent Overstreet s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; 838cafe5635SKent Overstreet bio_map(s->op.cache_bio, NULL); 839cafe5635SKent Overstreet 840cafe5635SKent Overstreet src = bio_iovec(s->op.cache_bio); 841cafe5635SKent Overstreet dst = bio_iovec(s->cache_miss); 842cafe5635SKent Overstreet src_offset = src->bv_offset; 843cafe5635SKent Overstreet dst_offset = dst->bv_offset; 844cafe5635SKent Overstreet dst_ptr = kmap(dst->bv_page); 845cafe5635SKent Overstreet 846cafe5635SKent Overstreet while (1) { 847cafe5635SKent Overstreet if (dst_offset == dst->bv_offset + dst->bv_len) { 848cafe5635SKent Overstreet kunmap(dst->bv_page); 849cafe5635SKent Overstreet dst++; 850cafe5635SKent Overstreet if (dst == bio_iovec_idx(s->cache_miss, 851cafe5635SKent Overstreet s->cache_miss->bi_vcnt)) 852cafe5635SKent Overstreet break; 853cafe5635SKent Overstreet 854cafe5635SKent Overstreet dst_offset = dst->bv_offset; 855cafe5635SKent Overstreet dst_ptr = kmap(dst->bv_page); 856cafe5635SKent Overstreet } 857cafe5635SKent Overstreet 858cafe5635SKent Overstreet if (src_offset == src->bv_offset + src->bv_len) { 859cafe5635SKent Overstreet src++; 860cafe5635SKent Overstreet if (src == bio_iovec_idx(s->op.cache_bio, 861cafe5635SKent Overstreet s->op.cache_bio->bi_vcnt)) 862cafe5635SKent Overstreet BUG(); 863cafe5635SKent Overstreet 864cafe5635SKent Overstreet src_offset = src->bv_offset; 865cafe5635SKent Overstreet } 866cafe5635SKent Overstreet 867cafe5635SKent Overstreet bytes = min(dst->bv_offset + dst->bv_len - dst_offset, 868cafe5635SKent Overstreet src->bv_offset + src->bv_len - src_offset); 869cafe5635SKent Overstreet 870cafe5635SKent Overstreet memcpy(dst_ptr + dst_offset, 871cafe5635SKent Overstreet page_address(src->bv_page) + src_offset, 872cafe5635SKent Overstreet bytes); 873cafe5635SKent Overstreet 874cafe5635SKent Overstreet src_offset += bytes; 875cafe5635SKent Overstreet dst_offset += bytes; 876cafe5635SKent Overstreet } 877cafe5635SKent Overstreet 878cafe5635SKent Overstreet bio_put(s->cache_miss); 879cafe5635SKent Overstreet s->cache_miss = NULL; 880cafe5635SKent Overstreet } 881cafe5635SKent Overstreet 882cafe5635SKent Overstreet if (verify(dc, &s->bio.bio) && s->recoverable) 883cafe5635SKent Overstreet bch_data_verify(s); 884cafe5635SKent Overstreet 885cafe5635SKent Overstreet bio_complete(s); 886cafe5635SKent Overstreet 887cafe5635SKent Overstreet if (s->op.cache_bio && 888cafe5635SKent Overstreet !test_bit(CACHE_SET_STOPPING, &s->op.c->flags)) { 889cafe5635SKent Overstreet s->op.type = BTREE_REPLACE; 890cafe5635SKent Overstreet closure_call(&s->op.cl, bch_insert_data, NULL, cl); 891cafe5635SKent Overstreet } 892cafe5635SKent Overstreet 893cafe5635SKent Overstreet continue_at(cl, cached_dev_read_complete, NULL); 894cafe5635SKent Overstreet } 895cafe5635SKent Overstreet 896cafe5635SKent Overstreet static void request_read_done_bh(struct closure *cl) 897cafe5635SKent Overstreet { 898cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 899cafe5635SKent Overstreet struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 900cafe5635SKent Overstreet 901cafe5635SKent Overstreet bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); 902cafe5635SKent Overstreet 903cafe5635SKent Overstreet if (s->error) 904cafe5635SKent Overstreet continue_at_nobarrier(cl, request_read_error, bcache_wq); 905cafe5635SKent Overstreet else if (s->op.cache_bio || verify(dc, &s->bio.bio)) 906cafe5635SKent Overstreet continue_at_nobarrier(cl, request_read_done, bcache_wq); 907cafe5635SKent Overstreet else 908cafe5635SKent Overstreet continue_at_nobarrier(cl, cached_dev_read_complete, NULL); 909cafe5635SKent Overstreet } 910cafe5635SKent Overstreet 911cafe5635SKent Overstreet static int cached_dev_cache_miss(struct btree *b, struct search *s, 912cafe5635SKent Overstreet struct bio *bio, unsigned sectors) 913cafe5635SKent Overstreet { 914cafe5635SKent Overstreet int ret = 0; 915cafe5635SKent Overstreet unsigned reada; 916cafe5635SKent Overstreet struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 917cafe5635SKent Overstreet struct bio *miss; 918cafe5635SKent Overstreet 919cafe5635SKent Overstreet miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); 920cafe5635SKent Overstreet if (!miss) 921cafe5635SKent Overstreet return -EAGAIN; 922cafe5635SKent Overstreet 923cafe5635SKent Overstreet if (miss == bio) 924cafe5635SKent Overstreet s->op.lookup_done = true; 925cafe5635SKent Overstreet 926cafe5635SKent Overstreet miss->bi_end_io = request_endio; 927cafe5635SKent Overstreet miss->bi_private = &s->cl; 928cafe5635SKent Overstreet 929cafe5635SKent Overstreet if (s->cache_miss || s->op.skip) 930cafe5635SKent Overstreet goto out_submit; 931cafe5635SKent Overstreet 932cafe5635SKent Overstreet if (miss != bio || 933cafe5635SKent Overstreet (bio->bi_rw & REQ_RAHEAD) || 934cafe5635SKent Overstreet (bio->bi_rw & REQ_META) || 935cafe5635SKent Overstreet s->op.c->gc_stats.in_use >= CUTOFF_CACHE_READA) 936cafe5635SKent Overstreet reada = 0; 937cafe5635SKent Overstreet else { 938cafe5635SKent Overstreet reada = min(dc->readahead >> 9, 939cafe5635SKent Overstreet sectors - bio_sectors(miss)); 940cafe5635SKent Overstreet 941cafe5635SKent Overstreet if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) 942cafe5635SKent Overstreet reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); 943cafe5635SKent Overstreet } 944cafe5635SKent Overstreet 945cafe5635SKent Overstreet s->cache_bio_sectors = bio_sectors(miss) + reada; 946cafe5635SKent Overstreet s->op.cache_bio = bio_alloc_bioset(GFP_NOWAIT, 947cafe5635SKent Overstreet DIV_ROUND_UP(s->cache_bio_sectors, PAGE_SECTORS), 948cafe5635SKent Overstreet dc->disk.bio_split); 949cafe5635SKent Overstreet 950cafe5635SKent Overstreet if (!s->op.cache_bio) 951cafe5635SKent Overstreet goto out_submit; 952cafe5635SKent Overstreet 953cafe5635SKent Overstreet s->op.cache_bio->bi_sector = miss->bi_sector; 954cafe5635SKent Overstreet s->op.cache_bio->bi_bdev = miss->bi_bdev; 955cafe5635SKent Overstreet s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; 956cafe5635SKent Overstreet 957cafe5635SKent Overstreet s->op.cache_bio->bi_end_io = request_endio; 958cafe5635SKent Overstreet s->op.cache_bio->bi_private = &s->cl; 959cafe5635SKent Overstreet 960cafe5635SKent Overstreet /* btree_search_recurse()'s btree iterator is no good anymore */ 961cafe5635SKent Overstreet ret = -EINTR; 962cafe5635SKent Overstreet if (!bch_btree_insert_check_key(b, &s->op, s->op.cache_bio)) 963cafe5635SKent Overstreet goto out_put; 964cafe5635SKent Overstreet 965cafe5635SKent Overstreet bio_map(s->op.cache_bio, NULL); 966cafe5635SKent Overstreet if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) 967cafe5635SKent Overstreet goto out_put; 968cafe5635SKent Overstreet 969cafe5635SKent Overstreet s->cache_miss = miss; 970cafe5635SKent Overstreet bio_get(s->op.cache_bio); 971cafe5635SKent Overstreet 972cafe5635SKent Overstreet trace_bcache_cache_miss(s->orig_bio); 973cafe5635SKent Overstreet closure_bio_submit(s->op.cache_bio, &s->cl, s->d); 974cafe5635SKent Overstreet 975cafe5635SKent Overstreet return ret; 976cafe5635SKent Overstreet out_put: 977cafe5635SKent Overstreet bio_put(s->op.cache_bio); 978cafe5635SKent Overstreet s->op.cache_bio = NULL; 979cafe5635SKent Overstreet out_submit: 980cafe5635SKent Overstreet closure_bio_submit(miss, &s->cl, s->d); 981cafe5635SKent Overstreet return ret; 982cafe5635SKent Overstreet } 983cafe5635SKent Overstreet 984cafe5635SKent Overstreet static void request_read(struct cached_dev *dc, struct search *s) 985cafe5635SKent Overstreet { 986cafe5635SKent Overstreet struct closure *cl = &s->cl; 987cafe5635SKent Overstreet 988cafe5635SKent Overstreet check_should_skip(dc, s); 989cafe5635SKent Overstreet closure_call(&s->op.cl, btree_read_async, NULL, cl); 990cafe5635SKent Overstreet 991cafe5635SKent Overstreet continue_at(cl, request_read_done_bh, NULL); 992cafe5635SKent Overstreet } 993cafe5635SKent Overstreet 994cafe5635SKent Overstreet /* Process writes */ 995cafe5635SKent Overstreet 996cafe5635SKent Overstreet static void cached_dev_write_complete(struct closure *cl) 997cafe5635SKent Overstreet { 998cafe5635SKent Overstreet struct search *s = container_of(cl, struct search, cl); 999cafe5635SKent Overstreet struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 1000cafe5635SKent Overstreet 1001cafe5635SKent Overstreet up_read_non_owner(&dc->writeback_lock); 1002cafe5635SKent Overstreet cached_dev_bio_complete(cl); 1003cafe5635SKent Overstreet } 1004cafe5635SKent Overstreet 1005cafe5635SKent Overstreet static bool should_writeback(struct cached_dev *dc, struct bio *bio) 1006cafe5635SKent Overstreet { 1007cafe5635SKent Overstreet unsigned threshold = (bio->bi_rw & REQ_SYNC) 1008cafe5635SKent Overstreet ? CUTOFF_WRITEBACK_SYNC 1009cafe5635SKent Overstreet : CUTOFF_WRITEBACK; 1010cafe5635SKent Overstreet 1011cafe5635SKent Overstreet return !atomic_read(&dc->disk.detaching) && 1012cafe5635SKent Overstreet cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && 1013cafe5635SKent Overstreet dc->disk.c->gc_stats.in_use < threshold; 1014cafe5635SKent Overstreet } 1015cafe5635SKent Overstreet 1016cafe5635SKent Overstreet static void request_write(struct cached_dev *dc, struct search *s) 1017cafe5635SKent Overstreet { 1018cafe5635SKent Overstreet struct closure *cl = &s->cl; 1019cafe5635SKent Overstreet struct bio *bio = &s->bio.bio; 1020cafe5635SKent Overstreet struct bkey start, end; 1021cafe5635SKent Overstreet start = KEY(dc->disk.id, bio->bi_sector, 0); 1022cafe5635SKent Overstreet end = KEY(dc->disk.id, bio_end(bio), 0); 1023cafe5635SKent Overstreet 1024cafe5635SKent Overstreet bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); 1025cafe5635SKent Overstreet 1026cafe5635SKent Overstreet check_should_skip(dc, s); 1027cafe5635SKent Overstreet down_read_non_owner(&dc->writeback_lock); 1028cafe5635SKent Overstreet 1029cafe5635SKent Overstreet if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) { 1030cafe5635SKent Overstreet s->op.skip = false; 1031cafe5635SKent Overstreet s->writeback = true; 1032cafe5635SKent Overstreet } 1033cafe5635SKent Overstreet 1034cafe5635SKent Overstreet if (bio->bi_rw & REQ_DISCARD) 1035cafe5635SKent Overstreet goto skip; 1036cafe5635SKent Overstreet 1037cafe5635SKent Overstreet if (s->op.skip) 1038cafe5635SKent Overstreet goto skip; 1039cafe5635SKent Overstreet 1040cafe5635SKent Overstreet if (should_writeback(dc, s->orig_bio)) 1041cafe5635SKent Overstreet s->writeback = true; 1042cafe5635SKent Overstreet 1043cafe5635SKent Overstreet if (!s->writeback) { 1044cafe5635SKent Overstreet s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, 1045cafe5635SKent Overstreet dc->disk.bio_split); 1046cafe5635SKent Overstreet 1047cafe5635SKent Overstreet trace_bcache_writethrough(s->orig_bio); 1048cafe5635SKent Overstreet closure_bio_submit(bio, cl, s->d); 1049cafe5635SKent Overstreet } else { 1050cafe5635SKent Overstreet s->op.cache_bio = bio; 1051cafe5635SKent Overstreet trace_bcache_writeback(s->orig_bio); 1052cafe5635SKent Overstreet bch_writeback_add(dc, bio_sectors(bio)); 1053cafe5635SKent Overstreet } 1054cafe5635SKent Overstreet out: 1055cafe5635SKent Overstreet closure_call(&s->op.cl, bch_insert_data, NULL, cl); 1056cafe5635SKent Overstreet continue_at(cl, cached_dev_write_complete, NULL); 1057cafe5635SKent Overstreet skip: 1058cafe5635SKent Overstreet s->op.skip = true; 1059cafe5635SKent Overstreet s->op.cache_bio = s->orig_bio; 1060cafe5635SKent Overstreet bio_get(s->op.cache_bio); 1061cafe5635SKent Overstreet trace_bcache_write_skip(s->orig_bio); 1062cafe5635SKent Overstreet 1063cafe5635SKent Overstreet if ((bio->bi_rw & REQ_DISCARD) && 1064cafe5635SKent Overstreet !blk_queue_discard(bdev_get_queue(dc->bdev))) 1065cafe5635SKent Overstreet goto out; 1066cafe5635SKent Overstreet 1067cafe5635SKent Overstreet closure_bio_submit(bio, cl, s->d); 1068cafe5635SKent Overstreet goto out; 1069cafe5635SKent Overstreet } 1070cafe5635SKent Overstreet 1071cafe5635SKent Overstreet static void request_nodata(struct cached_dev *dc, struct search *s) 1072cafe5635SKent Overstreet { 1073cafe5635SKent Overstreet struct closure *cl = &s->cl; 1074cafe5635SKent Overstreet struct bio *bio = &s->bio.bio; 1075cafe5635SKent Overstreet 1076cafe5635SKent Overstreet if (bio->bi_rw & REQ_DISCARD) { 1077cafe5635SKent Overstreet request_write(dc, s); 1078cafe5635SKent Overstreet return; 1079cafe5635SKent Overstreet } 1080cafe5635SKent Overstreet 1081cafe5635SKent Overstreet if (s->op.flush_journal) 1082cafe5635SKent Overstreet bch_journal_meta(s->op.c, cl); 1083cafe5635SKent Overstreet 1084cafe5635SKent Overstreet closure_bio_submit(bio, cl, s->d); 1085cafe5635SKent Overstreet 1086cafe5635SKent Overstreet continue_at(cl, cached_dev_bio_complete, NULL); 1087cafe5635SKent Overstreet } 1088cafe5635SKent Overstreet 1089cafe5635SKent Overstreet /* Cached devices - read & write stuff */ 1090cafe5635SKent Overstreet 1091cafe5635SKent Overstreet int bch_get_congested(struct cache_set *c) 1092cafe5635SKent Overstreet { 1093cafe5635SKent Overstreet int i; 1094cafe5635SKent Overstreet 1095cafe5635SKent Overstreet if (!c->congested_read_threshold_us && 1096cafe5635SKent Overstreet !c->congested_write_threshold_us) 1097cafe5635SKent Overstreet return 0; 1098cafe5635SKent Overstreet 1099cafe5635SKent Overstreet i = (local_clock_us() - c->congested_last_us) / 1024; 1100cafe5635SKent Overstreet if (i < 0) 1101cafe5635SKent Overstreet return 0; 1102cafe5635SKent Overstreet 1103cafe5635SKent Overstreet i += atomic_read(&c->congested); 1104cafe5635SKent Overstreet if (i >= 0) 1105cafe5635SKent Overstreet return 0; 1106cafe5635SKent Overstreet 1107cafe5635SKent Overstreet i += CONGESTED_MAX; 1108cafe5635SKent Overstreet 1109cafe5635SKent Overstreet return i <= 0 ? 1 : fract_exp_two(i, 6); 1110cafe5635SKent Overstreet } 1111cafe5635SKent Overstreet 1112cafe5635SKent Overstreet static void add_sequential(struct task_struct *t) 1113cafe5635SKent Overstreet { 1114cafe5635SKent Overstreet ewma_add(t->sequential_io_avg, 1115cafe5635SKent Overstreet t->sequential_io, 8, 0); 1116cafe5635SKent Overstreet 1117cafe5635SKent Overstreet t->sequential_io = 0; 1118cafe5635SKent Overstreet } 1119cafe5635SKent Overstreet 1120b1a67b0fSKent Overstreet static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) 1121b1a67b0fSKent Overstreet { 1122b1a67b0fSKent Overstreet return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; 1123b1a67b0fSKent Overstreet } 1124b1a67b0fSKent Overstreet 1125cafe5635SKent Overstreet static void check_should_skip(struct cached_dev *dc, struct search *s) 1126cafe5635SKent Overstreet { 1127cafe5635SKent Overstreet struct cache_set *c = s->op.c; 1128cafe5635SKent Overstreet struct bio *bio = &s->bio.bio; 1129cafe5635SKent Overstreet 1130cafe5635SKent Overstreet long rand; 1131cafe5635SKent Overstreet int cutoff = bch_get_congested(c); 1132cafe5635SKent Overstreet unsigned mode = cache_mode(dc, bio); 1133cafe5635SKent Overstreet 1134cafe5635SKent Overstreet if (atomic_read(&dc->disk.detaching) || 1135cafe5635SKent Overstreet c->gc_stats.in_use > CUTOFF_CACHE_ADD || 1136cafe5635SKent Overstreet (bio->bi_rw & REQ_DISCARD)) 1137cafe5635SKent Overstreet goto skip; 1138cafe5635SKent Overstreet 1139cafe5635SKent Overstreet if (mode == CACHE_MODE_NONE || 1140cafe5635SKent Overstreet (mode == CACHE_MODE_WRITEAROUND && 1141cafe5635SKent Overstreet (bio->bi_rw & REQ_WRITE))) 1142cafe5635SKent Overstreet goto skip; 1143cafe5635SKent Overstreet 1144cafe5635SKent Overstreet if (bio->bi_sector & (c->sb.block_size - 1) || 1145cafe5635SKent Overstreet bio_sectors(bio) & (c->sb.block_size - 1)) { 1146cafe5635SKent Overstreet pr_debug("skipping unaligned io"); 1147cafe5635SKent Overstreet goto skip; 1148cafe5635SKent Overstreet } 1149cafe5635SKent Overstreet 1150cafe5635SKent Overstreet if (!cutoff) { 1151cafe5635SKent Overstreet cutoff = dc->sequential_cutoff >> 9; 1152cafe5635SKent Overstreet 1153cafe5635SKent Overstreet if (!cutoff) 1154cafe5635SKent Overstreet goto rescale; 1155cafe5635SKent Overstreet 1156cafe5635SKent Overstreet if (mode == CACHE_MODE_WRITEBACK && 1157cafe5635SKent Overstreet (bio->bi_rw & REQ_WRITE) && 1158cafe5635SKent Overstreet (bio->bi_rw & REQ_SYNC)) 1159cafe5635SKent Overstreet goto rescale; 1160cafe5635SKent Overstreet } 1161cafe5635SKent Overstreet 1162cafe5635SKent Overstreet if (dc->sequential_merge) { 1163cafe5635SKent Overstreet struct io *i; 1164cafe5635SKent Overstreet 1165cafe5635SKent Overstreet spin_lock(&dc->io_lock); 1166cafe5635SKent Overstreet 1167b1a67b0fSKent Overstreet hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) 1168cafe5635SKent Overstreet if (i->last == bio->bi_sector && 1169cafe5635SKent Overstreet time_before(jiffies, i->jiffies)) 1170cafe5635SKent Overstreet goto found; 1171cafe5635SKent Overstreet 1172cafe5635SKent Overstreet i = list_first_entry(&dc->io_lru, struct io, lru); 1173cafe5635SKent Overstreet 1174cafe5635SKent Overstreet add_sequential(s->task); 1175cafe5635SKent Overstreet i->sequential = 0; 1176cafe5635SKent Overstreet found: 1177cafe5635SKent Overstreet if (i->sequential + bio->bi_size > i->sequential) 1178cafe5635SKent Overstreet i->sequential += bio->bi_size; 1179cafe5635SKent Overstreet 1180cafe5635SKent Overstreet i->last = bio_end(bio); 1181cafe5635SKent Overstreet i->jiffies = jiffies + msecs_to_jiffies(5000); 1182cafe5635SKent Overstreet s->task->sequential_io = i->sequential; 1183cafe5635SKent Overstreet 1184cafe5635SKent Overstreet hlist_del(&i->hash); 1185b1a67b0fSKent Overstreet hlist_add_head(&i->hash, iohash(dc, i->last)); 1186cafe5635SKent Overstreet list_move_tail(&i->lru, &dc->io_lru); 1187cafe5635SKent Overstreet 1188cafe5635SKent Overstreet spin_unlock(&dc->io_lock); 1189cafe5635SKent Overstreet } else { 1190cafe5635SKent Overstreet s->task->sequential_io = bio->bi_size; 1191cafe5635SKent Overstreet 1192cafe5635SKent Overstreet add_sequential(s->task); 1193cafe5635SKent Overstreet } 1194cafe5635SKent Overstreet 1195cafe5635SKent Overstreet rand = get_random_int(); 1196cafe5635SKent Overstreet cutoff -= bitmap_weight(&rand, BITS_PER_LONG); 1197cafe5635SKent Overstreet 1198cafe5635SKent Overstreet if (cutoff <= (int) (max(s->task->sequential_io, 1199cafe5635SKent Overstreet s->task->sequential_io_avg) >> 9)) 1200cafe5635SKent Overstreet goto skip; 1201cafe5635SKent Overstreet 1202cafe5635SKent Overstreet rescale: 1203cafe5635SKent Overstreet bch_rescale_priorities(c, bio_sectors(bio)); 1204cafe5635SKent Overstreet return; 1205cafe5635SKent Overstreet skip: 1206cafe5635SKent Overstreet bch_mark_sectors_bypassed(s, bio_sectors(bio)); 1207cafe5635SKent Overstreet s->op.skip = true; 1208cafe5635SKent Overstreet } 1209cafe5635SKent Overstreet 1210cafe5635SKent Overstreet static void cached_dev_make_request(struct request_queue *q, struct bio *bio) 1211cafe5635SKent Overstreet { 1212cafe5635SKent Overstreet struct search *s; 1213cafe5635SKent Overstreet struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; 1214cafe5635SKent Overstreet struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1215cafe5635SKent Overstreet int cpu, rw = bio_data_dir(bio); 1216cafe5635SKent Overstreet 1217cafe5635SKent Overstreet cpu = part_stat_lock(); 1218cafe5635SKent Overstreet part_stat_inc(cpu, &d->disk->part0, ios[rw]); 1219cafe5635SKent Overstreet part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); 1220cafe5635SKent Overstreet part_stat_unlock(); 1221cafe5635SKent Overstreet 1222cafe5635SKent Overstreet bio->bi_bdev = dc->bdev; 1223cafe5635SKent Overstreet bio->bi_sector += BDEV_DATA_START; 1224cafe5635SKent Overstreet 1225cafe5635SKent Overstreet if (cached_dev_get(dc)) { 1226cafe5635SKent Overstreet s = search_alloc(bio, d); 1227cafe5635SKent Overstreet trace_bcache_request_start(s, bio); 1228cafe5635SKent Overstreet 1229cafe5635SKent Overstreet if (!bio_has_data(bio)) 1230cafe5635SKent Overstreet request_nodata(dc, s); 1231cafe5635SKent Overstreet else if (rw) 1232cafe5635SKent Overstreet request_write(dc, s); 1233cafe5635SKent Overstreet else 1234cafe5635SKent Overstreet request_read(dc, s); 1235cafe5635SKent Overstreet } else { 1236cafe5635SKent Overstreet if ((bio->bi_rw & REQ_DISCARD) && 1237cafe5635SKent Overstreet !blk_queue_discard(bdev_get_queue(dc->bdev))) 1238cafe5635SKent Overstreet bio_endio(bio, 0); 1239cafe5635SKent Overstreet else 1240cafe5635SKent Overstreet bch_generic_make_request(bio, &d->bio_split_hook); 1241cafe5635SKent Overstreet } 1242cafe5635SKent Overstreet } 1243cafe5635SKent Overstreet 1244cafe5635SKent Overstreet static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, 1245cafe5635SKent Overstreet unsigned int cmd, unsigned long arg) 1246cafe5635SKent Overstreet { 1247cafe5635SKent Overstreet struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1248cafe5635SKent Overstreet return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); 1249cafe5635SKent Overstreet } 1250cafe5635SKent Overstreet 1251cafe5635SKent Overstreet static int cached_dev_congested(void *data, int bits) 1252cafe5635SKent Overstreet { 1253cafe5635SKent Overstreet struct bcache_device *d = data; 1254cafe5635SKent Overstreet struct cached_dev *dc = container_of(d, struct cached_dev, disk); 1255cafe5635SKent Overstreet struct request_queue *q = bdev_get_queue(dc->bdev); 1256cafe5635SKent Overstreet int ret = 0; 1257cafe5635SKent Overstreet 1258cafe5635SKent Overstreet if (bdi_congested(&q->backing_dev_info, bits)) 1259cafe5635SKent Overstreet return 1; 1260cafe5635SKent Overstreet 1261cafe5635SKent Overstreet if (cached_dev_get(dc)) { 1262cafe5635SKent Overstreet unsigned i; 1263cafe5635SKent Overstreet struct cache *ca; 1264cafe5635SKent Overstreet 1265cafe5635SKent Overstreet for_each_cache(ca, d->c, i) { 1266cafe5635SKent Overstreet q = bdev_get_queue(ca->bdev); 1267cafe5635SKent Overstreet ret |= bdi_congested(&q->backing_dev_info, bits); 1268cafe5635SKent Overstreet } 1269cafe5635SKent Overstreet 1270cafe5635SKent Overstreet cached_dev_put(dc); 1271cafe5635SKent Overstreet } 1272cafe5635SKent Overstreet 1273cafe5635SKent Overstreet return ret; 1274cafe5635SKent Overstreet } 1275cafe5635SKent Overstreet 1276cafe5635SKent Overstreet void bch_cached_dev_request_init(struct cached_dev *dc) 1277cafe5635SKent Overstreet { 1278cafe5635SKent Overstreet struct gendisk *g = dc->disk.disk; 1279cafe5635SKent Overstreet 1280cafe5635SKent Overstreet g->queue->make_request_fn = cached_dev_make_request; 1281cafe5635SKent Overstreet g->queue->backing_dev_info.congested_fn = cached_dev_congested; 1282cafe5635SKent Overstreet dc->disk.cache_miss = cached_dev_cache_miss; 1283cafe5635SKent Overstreet dc->disk.ioctl = cached_dev_ioctl; 1284cafe5635SKent Overstreet } 1285cafe5635SKent Overstreet 1286cafe5635SKent Overstreet /* Flash backed devices */ 1287cafe5635SKent Overstreet 1288cafe5635SKent Overstreet static int flash_dev_cache_miss(struct btree *b, struct search *s, 1289cafe5635SKent Overstreet struct bio *bio, unsigned sectors) 1290cafe5635SKent Overstreet { 1291cafe5635SKent Overstreet /* Zero fill bio */ 1292cafe5635SKent Overstreet 1293cafe5635SKent Overstreet while (bio->bi_idx != bio->bi_vcnt) { 1294cafe5635SKent Overstreet struct bio_vec *bv = bio_iovec(bio); 1295cafe5635SKent Overstreet unsigned j = min(bv->bv_len >> 9, sectors); 1296cafe5635SKent Overstreet 1297cafe5635SKent Overstreet void *p = kmap(bv->bv_page); 1298cafe5635SKent Overstreet memset(p + bv->bv_offset, 0, j << 9); 1299cafe5635SKent Overstreet kunmap(bv->bv_page); 1300cafe5635SKent Overstreet 1301cafe5635SKent Overstreet bv->bv_len -= j << 9; 1302cafe5635SKent Overstreet bv->bv_offset += j << 9; 1303cafe5635SKent Overstreet 1304cafe5635SKent Overstreet if (bv->bv_len) 1305cafe5635SKent Overstreet return 0; 1306cafe5635SKent Overstreet 1307cafe5635SKent Overstreet bio->bi_sector += j; 1308cafe5635SKent Overstreet bio->bi_size -= j << 9; 1309cafe5635SKent Overstreet 1310cafe5635SKent Overstreet bio->bi_idx++; 1311cafe5635SKent Overstreet sectors -= j; 1312cafe5635SKent Overstreet } 1313cafe5635SKent Overstreet 1314cafe5635SKent Overstreet s->op.lookup_done = true; 1315cafe5635SKent Overstreet 1316cafe5635SKent Overstreet return 0; 1317cafe5635SKent Overstreet } 1318cafe5635SKent Overstreet 1319cafe5635SKent Overstreet static void flash_dev_make_request(struct request_queue *q, struct bio *bio) 1320cafe5635SKent Overstreet { 1321cafe5635SKent Overstreet struct search *s; 1322cafe5635SKent Overstreet struct closure *cl; 1323cafe5635SKent Overstreet struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; 1324cafe5635SKent Overstreet int cpu, rw = bio_data_dir(bio); 1325cafe5635SKent Overstreet 1326cafe5635SKent Overstreet cpu = part_stat_lock(); 1327cafe5635SKent Overstreet part_stat_inc(cpu, &d->disk->part0, ios[rw]); 1328cafe5635SKent Overstreet part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); 1329cafe5635SKent Overstreet part_stat_unlock(); 1330cafe5635SKent Overstreet 1331cafe5635SKent Overstreet s = search_alloc(bio, d); 1332cafe5635SKent Overstreet cl = &s->cl; 1333cafe5635SKent Overstreet bio = &s->bio.bio; 1334cafe5635SKent Overstreet 1335cafe5635SKent Overstreet trace_bcache_request_start(s, bio); 1336cafe5635SKent Overstreet 1337cafe5635SKent Overstreet if (bio_has_data(bio) && !rw) { 1338cafe5635SKent Overstreet closure_call(&s->op.cl, btree_read_async, NULL, cl); 1339cafe5635SKent Overstreet } else if (bio_has_data(bio) || s->op.skip) { 1340cafe5635SKent Overstreet bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, 1341cafe5635SKent Overstreet &KEY(d->id, bio->bi_sector, 0), 1342cafe5635SKent Overstreet &KEY(d->id, bio_end(bio), 0)); 1343cafe5635SKent Overstreet 1344cafe5635SKent Overstreet s->writeback = true; 1345cafe5635SKent Overstreet s->op.cache_bio = bio; 1346cafe5635SKent Overstreet 1347cafe5635SKent Overstreet closure_call(&s->op.cl, bch_insert_data, NULL, cl); 1348cafe5635SKent Overstreet } else { 1349cafe5635SKent Overstreet /* No data - probably a cache flush */ 1350cafe5635SKent Overstreet if (s->op.flush_journal) 1351cafe5635SKent Overstreet bch_journal_meta(s->op.c, cl); 1352cafe5635SKent Overstreet } 1353cafe5635SKent Overstreet 1354cafe5635SKent Overstreet continue_at(cl, search_free, NULL); 1355cafe5635SKent Overstreet } 1356cafe5635SKent Overstreet 1357cafe5635SKent Overstreet static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, 1358cafe5635SKent Overstreet unsigned int cmd, unsigned long arg) 1359cafe5635SKent Overstreet { 1360cafe5635SKent Overstreet return -ENOTTY; 1361cafe5635SKent Overstreet } 1362cafe5635SKent Overstreet 1363cafe5635SKent Overstreet static int flash_dev_congested(void *data, int bits) 1364cafe5635SKent Overstreet { 1365cafe5635SKent Overstreet struct bcache_device *d = data; 1366cafe5635SKent Overstreet struct request_queue *q; 1367cafe5635SKent Overstreet struct cache *ca; 1368cafe5635SKent Overstreet unsigned i; 1369cafe5635SKent Overstreet int ret = 0; 1370cafe5635SKent Overstreet 1371cafe5635SKent Overstreet for_each_cache(ca, d->c, i) { 1372cafe5635SKent Overstreet q = bdev_get_queue(ca->bdev); 1373cafe5635SKent Overstreet ret |= bdi_congested(&q->backing_dev_info, bits); 1374cafe5635SKent Overstreet } 1375cafe5635SKent Overstreet 1376cafe5635SKent Overstreet return ret; 1377cafe5635SKent Overstreet } 1378cafe5635SKent Overstreet 1379cafe5635SKent Overstreet void bch_flash_dev_request_init(struct bcache_device *d) 1380cafe5635SKent Overstreet { 1381cafe5635SKent Overstreet struct gendisk *g = d->disk; 1382cafe5635SKent Overstreet 1383cafe5635SKent Overstreet g->queue->make_request_fn = flash_dev_make_request; 1384cafe5635SKent Overstreet g->queue->backing_dev_info.congested_fn = flash_dev_congested; 1385cafe5635SKent Overstreet d->cache_miss = flash_dev_cache_miss; 1386cafe5635SKent Overstreet d->ioctl = flash_dev_ioctl; 1387cafe5635SKent Overstreet } 1388cafe5635SKent Overstreet 1389cafe5635SKent Overstreet void bch_request_exit(void) 1390cafe5635SKent Overstreet { 1391cafe5635SKent Overstreet #ifdef CONFIG_CGROUP_BCACHE 1392cafe5635SKent Overstreet cgroup_unload_subsys(&bcache_subsys); 1393cafe5635SKent Overstreet #endif 1394cafe5635SKent Overstreet if (bch_search_cache) 1395cafe5635SKent Overstreet kmem_cache_destroy(bch_search_cache); 1396cafe5635SKent Overstreet } 1397cafe5635SKent Overstreet 1398cafe5635SKent Overstreet int __init bch_request_init(void) 1399cafe5635SKent Overstreet { 1400cafe5635SKent Overstreet bch_search_cache = KMEM_CACHE(search, 0); 1401cafe5635SKent Overstreet if (!bch_search_cache) 1402cafe5635SKent Overstreet return -ENOMEM; 1403cafe5635SKent Overstreet 1404cafe5635SKent Overstreet #ifdef CONFIG_CGROUP_BCACHE 1405cafe5635SKent Overstreet cgroup_load_subsys(&bcache_subsys); 1406cafe5635SKent Overstreet init_bch_cgroup(&bcache_default_cgroup); 1407cafe5635SKent Overstreet 1408cafe5635SKent Overstreet cgroup_add_cftypes(&bcache_subsys, bch_files); 1409cafe5635SKent Overstreet #endif 1410cafe5635SKent Overstreet return 0; 1411cafe5635SKent Overstreet } 1412