1 /* 2 * Some low level IO code, and hacks for various block layer limitations 3 * 4 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 5 * Copyright 2012 Google, Inc. 6 */ 7 8 #include "bcache.h" 9 #include "bset.h" 10 #include "debug.h" 11 12 #include <linux/blkdev.h> 13 14 static void bch_bi_idx_hack_endio(struct bio *bio, int error) 15 { 16 struct bio *p = bio->bi_private; 17 18 bio_endio(p, error); 19 bio_put(bio); 20 } 21 22 static void bch_generic_make_request_hack(struct bio *bio) 23 { 24 if (bio->bi_idx) { 25 struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio)); 26 27 memcpy(clone->bi_io_vec, 28 bio_iovec(bio), 29 bio_segments(bio) * sizeof(struct bio_vec)); 30 31 clone->bi_sector = bio->bi_sector; 32 clone->bi_bdev = bio->bi_bdev; 33 clone->bi_rw = bio->bi_rw; 34 clone->bi_vcnt = bio_segments(bio); 35 clone->bi_size = bio->bi_size; 36 37 clone->bi_private = bio; 38 clone->bi_end_io = bch_bi_idx_hack_endio; 39 40 bio = clone; 41 } 42 43 /* 44 * Hack, since drivers that clone bios clone up to bi_max_vecs, but our 45 * bios might have had more than that (before we split them per device 46 * limitations). 47 * 48 * To be taken out once immutable bvec stuff is in. 49 */ 50 bio->bi_max_vecs = bio->bi_vcnt; 51 52 generic_make_request(bio); 53 } 54 55 /** 56 * bch_bio_split - split a bio 57 * @bio: bio to split 58 * @sectors: number of sectors to split from the front of @bio 59 * @gfp: gfp mask 60 * @bs: bio set to allocate from 61 * 62 * Allocates and returns a new bio which represents @sectors from the start of 63 * @bio, and updates @bio to represent the remaining sectors. 64 * 65 * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio 66 * unchanged. 67 * 68 * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a 69 * bvec boundry; it is the caller's responsibility to ensure that @bio is not 70 * freed before the split. 71 */ 72 struct bio *bch_bio_split(struct bio *bio, int sectors, 73 gfp_t gfp, struct bio_set *bs) 74 { 75 unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9; 76 struct bio_vec *bv; 77 struct bio *ret = NULL; 78 79 BUG_ON(sectors <= 0); 80 81 if (sectors >= bio_sectors(bio)) 82 return bio; 83 84 if (bio->bi_rw & REQ_DISCARD) { 85 ret = bio_alloc_bioset(gfp, 1, bs); 86 if (!ret) 87 return NULL; 88 idx = 0; 89 goto out; 90 } 91 92 bio_for_each_segment(bv, bio, idx) { 93 vcnt = idx - bio->bi_idx; 94 95 if (!nbytes) { 96 ret = bio_alloc_bioset(gfp, vcnt, bs); 97 if (!ret) 98 return NULL; 99 100 memcpy(ret->bi_io_vec, bio_iovec(bio), 101 sizeof(struct bio_vec) * vcnt); 102 103 break; 104 } else if (nbytes < bv->bv_len) { 105 ret = bio_alloc_bioset(gfp, ++vcnt, bs); 106 if (!ret) 107 return NULL; 108 109 memcpy(ret->bi_io_vec, bio_iovec(bio), 110 sizeof(struct bio_vec) * vcnt); 111 112 ret->bi_io_vec[vcnt - 1].bv_len = nbytes; 113 bv->bv_offset += nbytes; 114 bv->bv_len -= nbytes; 115 break; 116 } 117 118 nbytes -= bv->bv_len; 119 } 120 out: 121 ret->bi_bdev = bio->bi_bdev; 122 ret->bi_sector = bio->bi_sector; 123 ret->bi_size = sectors << 9; 124 ret->bi_rw = bio->bi_rw; 125 ret->bi_vcnt = vcnt; 126 ret->bi_max_vecs = vcnt; 127 128 bio->bi_sector += sectors; 129 bio->bi_size -= sectors << 9; 130 bio->bi_idx = idx; 131 132 if (bio_integrity(bio)) { 133 if (bio_integrity_clone(ret, bio, gfp)) { 134 bio_put(ret); 135 return NULL; 136 } 137 138 bio_integrity_trim(ret, 0, bio_sectors(ret)); 139 bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio)); 140 } 141 142 return ret; 143 } 144 145 static unsigned bch_bio_max_sectors(struct bio *bio) 146 { 147 unsigned ret = bio_sectors(bio); 148 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 149 unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, 150 queue_max_segments(q)); 151 152 if (bio->bi_rw & REQ_DISCARD) 153 return min(ret, q->limits.max_discard_sectors); 154 155 if (bio_segments(bio) > max_segments || 156 q->merge_bvec_fn) { 157 struct bio_vec *bv; 158 int i, seg = 0; 159 160 ret = 0; 161 162 bio_for_each_segment(bv, bio, i) { 163 struct bvec_merge_data bvm = { 164 .bi_bdev = bio->bi_bdev, 165 .bi_sector = bio->bi_sector, 166 .bi_size = ret << 9, 167 .bi_rw = bio->bi_rw, 168 }; 169 170 if (seg == max_segments) 171 break; 172 173 if (q->merge_bvec_fn && 174 q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) 175 break; 176 177 seg++; 178 ret += bv->bv_len >> 9; 179 } 180 } 181 182 ret = min(ret, queue_max_sectors(q)); 183 184 WARN_ON(!ret); 185 ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9); 186 187 return ret; 188 } 189 190 static void bch_bio_submit_split_done(struct closure *cl) 191 { 192 struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); 193 194 s->bio->bi_end_io = s->bi_end_io; 195 s->bio->bi_private = s->bi_private; 196 bio_endio(s->bio, 0); 197 198 closure_debug_destroy(&s->cl); 199 mempool_free(s, s->p->bio_split_hook); 200 } 201 202 static void bch_bio_submit_split_endio(struct bio *bio, int error) 203 { 204 struct closure *cl = bio->bi_private; 205 struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); 206 207 if (error) 208 clear_bit(BIO_UPTODATE, &s->bio->bi_flags); 209 210 bio_put(bio); 211 closure_put(cl); 212 } 213 214 void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) 215 { 216 struct bio_split_hook *s; 217 struct bio *n; 218 219 if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) 220 goto submit; 221 222 if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) 223 goto submit; 224 225 s = mempool_alloc(p->bio_split_hook, GFP_NOIO); 226 closure_init(&s->cl, NULL); 227 228 s->bio = bio; 229 s->p = p; 230 s->bi_end_io = bio->bi_end_io; 231 s->bi_private = bio->bi_private; 232 bio_get(bio); 233 234 do { 235 n = bch_bio_split(bio, bch_bio_max_sectors(bio), 236 GFP_NOIO, s->p->bio_split); 237 238 n->bi_end_io = bch_bio_submit_split_endio; 239 n->bi_private = &s->cl; 240 241 closure_get(&s->cl); 242 bch_generic_make_request_hack(n); 243 } while (n != bio); 244 245 continue_at(&s->cl, bch_bio_submit_split_done, NULL); 246 submit: 247 bch_generic_make_request_hack(bio); 248 } 249 250 /* Bios with headers */ 251 252 void bch_bbio_free(struct bio *bio, struct cache_set *c) 253 { 254 struct bbio *b = container_of(bio, struct bbio, bio); 255 mempool_free(b, c->bio_meta); 256 } 257 258 struct bio *bch_bbio_alloc(struct cache_set *c) 259 { 260 struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); 261 struct bio *bio = &b->bio; 262 263 bio_init(bio); 264 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; 265 bio->bi_max_vecs = bucket_pages(c); 266 bio->bi_io_vec = bio->bi_inline_vecs; 267 268 return bio; 269 } 270 271 void __bch_submit_bbio(struct bio *bio, struct cache_set *c) 272 { 273 struct bbio *b = container_of(bio, struct bbio, bio); 274 275 bio->bi_sector = PTR_OFFSET(&b->key, 0); 276 bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; 277 278 b->submit_time_us = local_clock_us(); 279 closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); 280 } 281 282 void bch_submit_bbio(struct bio *bio, struct cache_set *c, 283 struct bkey *k, unsigned ptr) 284 { 285 struct bbio *b = container_of(bio, struct bbio, bio); 286 bch_bkey_copy_single_ptr(&b->key, k, ptr); 287 __bch_submit_bbio(bio, c); 288 } 289 290 /* IO errors */ 291 292 void bch_count_io_errors(struct cache *ca, int error, const char *m) 293 { 294 /* 295 * The halflife of an error is: 296 * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh 297 */ 298 299 if (ca->set->error_decay) { 300 unsigned count = atomic_inc_return(&ca->io_count); 301 302 while (count > ca->set->error_decay) { 303 unsigned errors; 304 unsigned old = count; 305 unsigned new = count - ca->set->error_decay; 306 307 /* 308 * First we subtract refresh from count; each time we 309 * succesfully do so, we rescale the errors once: 310 */ 311 312 count = atomic_cmpxchg(&ca->io_count, old, new); 313 314 if (count == old) { 315 count = new; 316 317 errors = atomic_read(&ca->io_errors); 318 do { 319 old = errors; 320 new = ((uint64_t) errors * 127) / 128; 321 errors = atomic_cmpxchg(&ca->io_errors, 322 old, new); 323 } while (old != errors); 324 } 325 } 326 } 327 328 if (error) { 329 char buf[BDEVNAME_SIZE]; 330 unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, 331 &ca->io_errors); 332 errors >>= IO_ERROR_SHIFT; 333 334 if (errors < ca->set->error_limit) 335 pr_err("%s: IO error on %s, recovering", 336 bdevname(ca->bdev, buf), m); 337 else 338 bch_cache_set_error(ca->set, 339 "%s: too many IO errors %s", 340 bdevname(ca->bdev, buf), m); 341 } 342 } 343 344 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, 345 int error, const char *m) 346 { 347 struct bbio *b = container_of(bio, struct bbio, bio); 348 struct cache *ca = PTR_CACHE(c, &b->key, 0); 349 350 unsigned threshold = bio->bi_rw & REQ_WRITE 351 ? c->congested_write_threshold_us 352 : c->congested_read_threshold_us; 353 354 if (threshold) { 355 unsigned t = local_clock_us(); 356 357 int us = t - b->submit_time_us; 358 int congested = atomic_read(&c->congested); 359 360 if (us > (int) threshold) { 361 int ms = us / 1024; 362 c->congested_last_us = t; 363 364 ms = min(ms, CONGESTED_MAX + congested); 365 atomic_sub(ms, &c->congested); 366 } else if (congested < 0) 367 atomic_inc(&c->congested); 368 } 369 370 bch_count_io_errors(ca, error, m); 371 } 372 373 void bch_bbio_endio(struct cache_set *c, struct bio *bio, 374 int error, const char *m) 375 { 376 struct closure *cl = bio->bi_private; 377 378 bch_bbio_count_io_errors(c, bio, error, m); 379 bio_put(bio); 380 closure_put(cl); 381 } 382