1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 #include "dm-block-manager.h" 7 #include "dm-persistent-data-internal.h" 8 #include "../dm-bufio.h" 9 10 #include <linux/crc32c.h> 11 #include <linux/module.h> 12 #include <linux/slab.h> 13 #include <linux/rwsem.h> 14 #include <linux/device-mapper.h> 15 #include <linux/stacktrace.h> 16 17 #define DM_MSG_PREFIX "block manager" 18 19 /*----------------------------------------------------------------*/ 20 21 /* 22 * This is a read/write semaphore with a couple of differences. 23 * 24 * i) There is a restriction on the number of concurrent read locks that 25 * may be held at once. This is just an implementation detail. 26 * 27 * ii) Recursive locking attempts are detected and return EINVAL. A stack 28 * trace is also emitted for the previous lock acquisition. 29 * 30 * iii) Priority is given to write locks. 31 */ 32 #define MAX_HOLDERS 4 33 #define MAX_STACK 10 34 35 typedef unsigned long stack_entries[MAX_STACK]; 36 37 struct block_lock { 38 spinlock_t lock; 39 __s32 count; 40 struct list_head waiters; 41 struct task_struct *holders[MAX_HOLDERS]; 42 43 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 44 struct stack_trace traces[MAX_HOLDERS]; 45 stack_entries entries[MAX_HOLDERS]; 46 #endif 47 }; 48 49 struct waiter { 50 struct list_head list; 51 struct task_struct *task; 52 int wants_write; 53 }; 54 55 static unsigned __find_holder(struct block_lock *lock, 56 struct task_struct *task) 57 { 58 unsigned i; 59 60 for (i = 0; i < MAX_HOLDERS; i++) 61 if (lock->holders[i] == task) 62 break; 63 64 BUG_ON(i == MAX_HOLDERS); 65 return i; 66 } 67 68 /* call this *after* you increment lock->count */ 69 static void __add_holder(struct block_lock *lock, struct task_struct *task) 70 { 71 unsigned h = __find_holder(lock, NULL); 72 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 73 struct stack_trace *t; 74 #endif 75 76 get_task_struct(task); 77 lock->holders[h] = task; 78 79 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 80 t = lock->traces + h; 81 t->nr_entries = 0; 82 t->max_entries = MAX_STACK; 83 t->entries = lock->entries[h]; 84 t->skip = 2; 85 save_stack_trace(t); 86 #endif 87 } 88 89 /* call this *before* you decrement lock->count */ 90 static void __del_holder(struct block_lock *lock, struct task_struct *task) 91 { 92 unsigned h = __find_holder(lock, task); 93 lock->holders[h] = NULL; 94 put_task_struct(task); 95 } 96 97 static int __check_holder(struct block_lock *lock) 98 { 99 unsigned i; 100 101 for (i = 0; i < MAX_HOLDERS; i++) { 102 if (lock->holders[i] == current) { 103 DMERR("recursive lock detected in metadata"); 104 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 105 DMERR("previously held here:"); 106 print_stack_trace(lock->traces + i, 4); 107 108 DMERR("subsequent acquisition attempted here:"); 109 dump_stack(); 110 #endif 111 return -EINVAL; 112 } 113 } 114 115 return 0; 116 } 117 118 static void __wait(struct waiter *w) 119 { 120 for (;;) { 121 set_task_state(current, TASK_UNINTERRUPTIBLE); 122 123 if (!w->task) 124 break; 125 126 schedule(); 127 } 128 129 set_task_state(current, TASK_RUNNING); 130 } 131 132 static void __wake_waiter(struct waiter *w) 133 { 134 struct task_struct *task; 135 136 list_del(&w->list); 137 task = w->task; 138 smp_mb(); 139 w->task = NULL; 140 wake_up_process(task); 141 } 142 143 /* 144 * We either wake a few readers or a single writer. 145 */ 146 static void __wake_many(struct block_lock *lock) 147 { 148 struct waiter *w, *tmp; 149 150 BUG_ON(lock->count < 0); 151 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 152 if (lock->count >= MAX_HOLDERS) 153 return; 154 155 if (w->wants_write) { 156 if (lock->count > 0) 157 return; /* still read locked */ 158 159 lock->count = -1; 160 __add_holder(lock, w->task); 161 __wake_waiter(w); 162 return; 163 } 164 165 lock->count++; 166 __add_holder(lock, w->task); 167 __wake_waiter(w); 168 } 169 } 170 171 static void bl_init(struct block_lock *lock) 172 { 173 int i; 174 175 spin_lock_init(&lock->lock); 176 lock->count = 0; 177 INIT_LIST_HEAD(&lock->waiters); 178 for (i = 0; i < MAX_HOLDERS; i++) 179 lock->holders[i] = NULL; 180 } 181 182 static int __available_for_read(struct block_lock *lock) 183 { 184 return lock->count >= 0 && 185 lock->count < MAX_HOLDERS && 186 list_empty(&lock->waiters); 187 } 188 189 static int bl_down_read(struct block_lock *lock) 190 { 191 int r; 192 struct waiter w; 193 194 spin_lock(&lock->lock); 195 r = __check_holder(lock); 196 if (r) { 197 spin_unlock(&lock->lock); 198 return r; 199 } 200 201 if (__available_for_read(lock)) { 202 lock->count++; 203 __add_holder(lock, current); 204 spin_unlock(&lock->lock); 205 return 0; 206 } 207 208 get_task_struct(current); 209 210 w.task = current; 211 w.wants_write = 0; 212 list_add_tail(&w.list, &lock->waiters); 213 spin_unlock(&lock->lock); 214 215 __wait(&w); 216 put_task_struct(current); 217 return 0; 218 } 219 220 static int bl_down_read_nonblock(struct block_lock *lock) 221 { 222 int r; 223 224 spin_lock(&lock->lock); 225 r = __check_holder(lock); 226 if (r) 227 goto out; 228 229 if (__available_for_read(lock)) { 230 lock->count++; 231 __add_holder(lock, current); 232 r = 0; 233 } else 234 r = -EWOULDBLOCK; 235 236 out: 237 spin_unlock(&lock->lock); 238 return r; 239 } 240 241 static void bl_up_read(struct block_lock *lock) 242 { 243 spin_lock(&lock->lock); 244 BUG_ON(lock->count <= 0); 245 __del_holder(lock, current); 246 --lock->count; 247 if (!list_empty(&lock->waiters)) 248 __wake_many(lock); 249 spin_unlock(&lock->lock); 250 } 251 252 static int bl_down_write(struct block_lock *lock) 253 { 254 int r; 255 struct waiter w; 256 257 spin_lock(&lock->lock); 258 r = __check_holder(lock); 259 if (r) { 260 spin_unlock(&lock->lock); 261 return r; 262 } 263 264 if (lock->count == 0 && list_empty(&lock->waiters)) { 265 lock->count = -1; 266 __add_holder(lock, current); 267 spin_unlock(&lock->lock); 268 return 0; 269 } 270 271 get_task_struct(current); 272 w.task = current; 273 w.wants_write = 1; 274 275 /* 276 * Writers given priority. We know there's only one mutator in the 277 * system, so ignoring the ordering reversal. 278 */ 279 list_add(&w.list, &lock->waiters); 280 spin_unlock(&lock->lock); 281 282 __wait(&w); 283 put_task_struct(current); 284 285 return 0; 286 } 287 288 static void bl_up_write(struct block_lock *lock) 289 { 290 spin_lock(&lock->lock); 291 __del_holder(lock, current); 292 lock->count = 0; 293 if (!list_empty(&lock->waiters)) 294 __wake_many(lock); 295 spin_unlock(&lock->lock); 296 } 297 298 static void report_recursive_bug(dm_block_t b, int r) 299 { 300 if (r == -EINVAL) 301 DMERR("recursive acquisition of block %llu requested.", 302 (unsigned long long) b); 303 } 304 305 /*----------------------------------------------------------------*/ 306 307 /* 308 * Block manager is currently implemented using dm-bufio. struct 309 * dm_block_manager and struct dm_block map directly onto a couple of 310 * structs in the bufio interface. I want to retain the freedom to move 311 * away from bufio in the future. So these structs are just cast within 312 * this .c file, rather than making it through to the public interface. 313 */ 314 static struct dm_buffer *to_buffer(struct dm_block *b) 315 { 316 return (struct dm_buffer *) b; 317 } 318 319 dm_block_t dm_block_location(struct dm_block *b) 320 { 321 return dm_bufio_get_block_number(to_buffer(b)); 322 } 323 EXPORT_SYMBOL_GPL(dm_block_location); 324 325 void *dm_block_data(struct dm_block *b) 326 { 327 return dm_bufio_get_block_data(to_buffer(b)); 328 } 329 EXPORT_SYMBOL_GPL(dm_block_data); 330 331 struct buffer_aux { 332 struct dm_block_validator *validator; 333 struct block_lock lock; 334 int write_locked; 335 }; 336 337 static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 338 { 339 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 340 aux->validator = NULL; 341 bl_init(&aux->lock); 342 } 343 344 static void dm_block_manager_write_callback(struct dm_buffer *buf) 345 { 346 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 347 if (aux->validator) { 348 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 349 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 350 } 351 } 352 353 /*---------------------------------------------------------------- 354 * Public interface 355 *--------------------------------------------------------------*/ 356 struct dm_block_manager { 357 struct dm_bufio_client *bufio; 358 bool read_only:1; 359 }; 360 361 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 362 unsigned block_size, 363 unsigned cache_size, 364 unsigned max_held_per_thread) 365 { 366 int r; 367 struct dm_block_manager *bm; 368 369 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 370 if (!bm) { 371 r = -ENOMEM; 372 goto bad; 373 } 374 375 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 376 sizeof(struct buffer_aux), 377 dm_block_manager_alloc_callback, 378 dm_block_manager_write_callback); 379 if (IS_ERR(bm->bufio)) { 380 r = PTR_ERR(bm->bufio); 381 kfree(bm); 382 goto bad; 383 } 384 385 bm->read_only = false; 386 387 return bm; 388 389 bad: 390 return ERR_PTR(r); 391 } 392 EXPORT_SYMBOL_GPL(dm_block_manager_create); 393 394 void dm_block_manager_destroy(struct dm_block_manager *bm) 395 { 396 dm_bufio_client_destroy(bm->bufio); 397 kfree(bm); 398 } 399 EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 400 401 unsigned dm_bm_block_size(struct dm_block_manager *bm) 402 { 403 return dm_bufio_get_block_size(bm->bufio); 404 } 405 EXPORT_SYMBOL_GPL(dm_bm_block_size); 406 407 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 408 { 409 return dm_bufio_get_device_size(bm->bufio); 410 } 411 412 static int dm_bm_validate_buffer(struct dm_block_manager *bm, 413 struct dm_buffer *buf, 414 struct buffer_aux *aux, 415 struct dm_block_validator *v) 416 { 417 if (unlikely(!aux->validator)) { 418 int r; 419 if (!v) 420 return 0; 421 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 422 if (unlikely(r)) { 423 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 424 (unsigned long long) dm_bufio_get_block_number(buf)); 425 return r; 426 } 427 aux->validator = v; 428 } else { 429 if (unlikely(aux->validator != v)) { 430 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 431 aux->validator->name, v ? v->name : "NULL", 432 (unsigned long long) dm_bufio_get_block_number(buf)); 433 return -EINVAL; 434 } 435 } 436 437 return 0; 438 } 439 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 440 struct dm_block_validator *v, 441 struct dm_block **result) 442 { 443 struct buffer_aux *aux; 444 void *p; 445 int r; 446 447 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 448 if (IS_ERR(p)) 449 return PTR_ERR(p); 450 451 aux = dm_bufio_get_aux_data(to_buffer(*result)); 452 r = bl_down_read(&aux->lock); 453 if (unlikely(r)) { 454 dm_bufio_release(to_buffer(*result)); 455 report_recursive_bug(b, r); 456 return r; 457 } 458 459 aux->write_locked = 0; 460 461 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 462 if (unlikely(r)) { 463 bl_up_read(&aux->lock); 464 dm_bufio_release(to_buffer(*result)); 465 return r; 466 } 467 468 return 0; 469 } 470 EXPORT_SYMBOL_GPL(dm_bm_read_lock); 471 472 int dm_bm_write_lock(struct dm_block_manager *bm, 473 dm_block_t b, struct dm_block_validator *v, 474 struct dm_block **result) 475 { 476 struct buffer_aux *aux; 477 void *p; 478 int r; 479 480 if (bm->read_only) 481 return -EPERM; 482 483 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 484 if (IS_ERR(p)) 485 return PTR_ERR(p); 486 487 aux = dm_bufio_get_aux_data(to_buffer(*result)); 488 r = bl_down_write(&aux->lock); 489 if (r) { 490 dm_bufio_release(to_buffer(*result)); 491 report_recursive_bug(b, r); 492 return r; 493 } 494 495 aux->write_locked = 1; 496 497 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 498 if (unlikely(r)) { 499 bl_up_write(&aux->lock); 500 dm_bufio_release(to_buffer(*result)); 501 return r; 502 } 503 504 return 0; 505 } 506 EXPORT_SYMBOL_GPL(dm_bm_write_lock); 507 508 int dm_bm_read_try_lock(struct dm_block_manager *bm, 509 dm_block_t b, struct dm_block_validator *v, 510 struct dm_block **result) 511 { 512 struct buffer_aux *aux; 513 void *p; 514 int r; 515 516 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 517 if (IS_ERR(p)) 518 return PTR_ERR(p); 519 if (unlikely(!p)) 520 return -EWOULDBLOCK; 521 522 aux = dm_bufio_get_aux_data(to_buffer(*result)); 523 r = bl_down_read_nonblock(&aux->lock); 524 if (r < 0) { 525 dm_bufio_release(to_buffer(*result)); 526 report_recursive_bug(b, r); 527 return r; 528 } 529 aux->write_locked = 0; 530 531 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 532 if (unlikely(r)) { 533 bl_up_read(&aux->lock); 534 dm_bufio_release(to_buffer(*result)); 535 return r; 536 } 537 538 return 0; 539 } 540 541 int dm_bm_write_lock_zero(struct dm_block_manager *bm, 542 dm_block_t b, struct dm_block_validator *v, 543 struct dm_block **result) 544 { 545 int r; 546 struct buffer_aux *aux; 547 void *p; 548 549 if (bm->read_only) 550 return -EPERM; 551 552 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 553 if (IS_ERR(p)) 554 return PTR_ERR(p); 555 556 memset(p, 0, dm_bm_block_size(bm)); 557 558 aux = dm_bufio_get_aux_data(to_buffer(*result)); 559 r = bl_down_write(&aux->lock); 560 if (r) { 561 dm_bufio_release(to_buffer(*result)); 562 return r; 563 } 564 565 aux->write_locked = 1; 566 aux->validator = v; 567 568 return 0; 569 } 570 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 571 572 void dm_bm_unlock(struct dm_block *b) 573 { 574 struct buffer_aux *aux; 575 aux = dm_bufio_get_aux_data(to_buffer(b)); 576 577 if (aux->write_locked) { 578 dm_bufio_mark_buffer_dirty(to_buffer(b)); 579 bl_up_write(&aux->lock); 580 } else 581 bl_up_read(&aux->lock); 582 583 dm_bufio_release(to_buffer(b)); 584 } 585 EXPORT_SYMBOL_GPL(dm_bm_unlock); 586 587 int dm_bm_flush(struct dm_block_manager *bm) 588 { 589 if (bm->read_only) 590 return -EPERM; 591 592 return dm_bufio_write_dirty_buffers(bm->bufio); 593 } 594 EXPORT_SYMBOL_GPL(dm_bm_flush); 595 596 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 597 { 598 dm_bufio_prefetch(bm->bufio, b, 1); 599 } 600 601 bool dm_bm_is_read_only(struct dm_block_manager *bm) 602 { 603 return bm->read_only; 604 } 605 EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 606 607 void dm_bm_set_read_only(struct dm_block_manager *bm) 608 { 609 bm->read_only = true; 610 } 611 EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 612 613 void dm_bm_set_read_write(struct dm_block_manager *bm) 614 { 615 bm->read_only = false; 616 } 617 EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 618 619 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 620 { 621 return crc32c(~(u32) 0, data, len) ^ init_xor; 622 } 623 EXPORT_SYMBOL_GPL(dm_bm_checksum); 624 625 /*----------------------------------------------------------------*/ 626 627 MODULE_LICENSE("GPL"); 628 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 629 MODULE_DESCRIPTION("Immutable metadata library for dm"); 630 631 /*----------------------------------------------------------------*/ 632