1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 #include "dm-block-manager.h" 7 #include "dm-persistent-data-internal.h" 8 9 #include <linux/dm-bufio.h> 10 #include <linux/crc32c.h> 11 #include <linux/module.h> 12 #include <linux/slab.h> 13 #include <linux/rwsem.h> 14 #include <linux/device-mapper.h> 15 #include <linux/stacktrace.h> 16 #include <linux/sched/task.h> 17 18 #define DM_MSG_PREFIX "block manager" 19 20 /*----------------------------------------------------------------*/ 21 22 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 23 24 /* 25 * This is a read/write semaphore with a couple of differences. 26 * 27 * i) There is a restriction on the number of concurrent read locks that 28 * may be held at once. This is just an implementation detail. 29 * 30 * ii) Recursive locking attempts are detected and return EINVAL. A stack 31 * trace is also emitted for the previous lock acquisition. 32 * 33 * iii) Priority is given to write locks. 34 */ 35 #define MAX_HOLDERS 4 36 #define MAX_STACK 10 37 38 typedef unsigned long stack_entries[MAX_STACK]; 39 40 struct block_lock { 41 spinlock_t lock; 42 __s32 count; 43 struct list_head waiters; 44 struct task_struct *holders[MAX_HOLDERS]; 45 46 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 47 struct stack_trace traces[MAX_HOLDERS]; 48 stack_entries entries[MAX_HOLDERS]; 49 #endif 50 }; 51 52 struct waiter { 53 struct list_head list; 54 struct task_struct *task; 55 int wants_write; 56 }; 57 58 static unsigned __find_holder(struct block_lock *lock, 59 struct task_struct *task) 60 { 61 unsigned i; 62 63 for (i = 0; i < MAX_HOLDERS; i++) 64 if (lock->holders[i] == task) 65 break; 66 67 BUG_ON(i == MAX_HOLDERS); 68 return i; 69 } 70 71 /* call this *after* you increment lock->count */ 72 static void __add_holder(struct block_lock *lock, struct task_struct *task) 73 { 74 unsigned h = __find_holder(lock, NULL); 75 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 76 struct stack_trace *t; 77 #endif 78 79 get_task_struct(task); 80 lock->holders[h] = task; 81 82 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 83 t = lock->traces + h; 84 t->nr_entries = 0; 85 t->max_entries = MAX_STACK; 86 t->entries = lock->entries[h]; 87 t->skip = 2; 88 save_stack_trace(t); 89 #endif 90 } 91 92 /* call this *before* you decrement lock->count */ 93 static void __del_holder(struct block_lock *lock, struct task_struct *task) 94 { 95 unsigned h = __find_holder(lock, task); 96 lock->holders[h] = NULL; 97 put_task_struct(task); 98 } 99 100 static int __check_holder(struct block_lock *lock) 101 { 102 unsigned i; 103 104 for (i = 0; i < MAX_HOLDERS; i++) { 105 if (lock->holders[i] == current) { 106 DMERR("recursive lock detected in metadata"); 107 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 108 DMERR("previously held here:"); 109 print_stack_trace(lock->traces + i, 4); 110 111 DMERR("subsequent acquisition attempted here:"); 112 dump_stack(); 113 #endif 114 return -EINVAL; 115 } 116 } 117 118 return 0; 119 } 120 121 static void __wait(struct waiter *w) 122 { 123 for (;;) { 124 set_current_state(TASK_UNINTERRUPTIBLE); 125 126 if (!w->task) 127 break; 128 129 schedule(); 130 } 131 132 set_current_state(TASK_RUNNING); 133 } 134 135 static void __wake_waiter(struct waiter *w) 136 { 137 struct task_struct *task; 138 139 list_del(&w->list); 140 task = w->task; 141 smp_mb(); 142 w->task = NULL; 143 wake_up_process(task); 144 } 145 146 /* 147 * We either wake a few readers or a single writer. 148 */ 149 static void __wake_many(struct block_lock *lock) 150 { 151 struct waiter *w, *tmp; 152 153 BUG_ON(lock->count < 0); 154 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 155 if (lock->count >= MAX_HOLDERS) 156 return; 157 158 if (w->wants_write) { 159 if (lock->count > 0) 160 return; /* still read locked */ 161 162 lock->count = -1; 163 __add_holder(lock, w->task); 164 __wake_waiter(w); 165 return; 166 } 167 168 lock->count++; 169 __add_holder(lock, w->task); 170 __wake_waiter(w); 171 } 172 } 173 174 static void bl_init(struct block_lock *lock) 175 { 176 int i; 177 178 spin_lock_init(&lock->lock); 179 lock->count = 0; 180 INIT_LIST_HEAD(&lock->waiters); 181 for (i = 0; i < MAX_HOLDERS; i++) 182 lock->holders[i] = NULL; 183 } 184 185 static int __available_for_read(struct block_lock *lock) 186 { 187 return lock->count >= 0 && 188 lock->count < MAX_HOLDERS && 189 list_empty(&lock->waiters); 190 } 191 192 static int bl_down_read(struct block_lock *lock) 193 { 194 int r; 195 struct waiter w; 196 197 spin_lock(&lock->lock); 198 r = __check_holder(lock); 199 if (r) { 200 spin_unlock(&lock->lock); 201 return r; 202 } 203 204 if (__available_for_read(lock)) { 205 lock->count++; 206 __add_holder(lock, current); 207 spin_unlock(&lock->lock); 208 return 0; 209 } 210 211 get_task_struct(current); 212 213 w.task = current; 214 w.wants_write = 0; 215 list_add_tail(&w.list, &lock->waiters); 216 spin_unlock(&lock->lock); 217 218 __wait(&w); 219 put_task_struct(current); 220 return 0; 221 } 222 223 static int bl_down_read_nonblock(struct block_lock *lock) 224 { 225 int r; 226 227 spin_lock(&lock->lock); 228 r = __check_holder(lock); 229 if (r) 230 goto out; 231 232 if (__available_for_read(lock)) { 233 lock->count++; 234 __add_holder(lock, current); 235 r = 0; 236 } else 237 r = -EWOULDBLOCK; 238 239 out: 240 spin_unlock(&lock->lock); 241 return r; 242 } 243 244 static void bl_up_read(struct block_lock *lock) 245 { 246 spin_lock(&lock->lock); 247 BUG_ON(lock->count <= 0); 248 __del_holder(lock, current); 249 --lock->count; 250 if (!list_empty(&lock->waiters)) 251 __wake_many(lock); 252 spin_unlock(&lock->lock); 253 } 254 255 static int bl_down_write(struct block_lock *lock) 256 { 257 int r; 258 struct waiter w; 259 260 spin_lock(&lock->lock); 261 r = __check_holder(lock); 262 if (r) { 263 spin_unlock(&lock->lock); 264 return r; 265 } 266 267 if (lock->count == 0 && list_empty(&lock->waiters)) { 268 lock->count = -1; 269 __add_holder(lock, current); 270 spin_unlock(&lock->lock); 271 return 0; 272 } 273 274 get_task_struct(current); 275 w.task = current; 276 w.wants_write = 1; 277 278 /* 279 * Writers given priority. We know there's only one mutator in the 280 * system, so ignoring the ordering reversal. 281 */ 282 list_add(&w.list, &lock->waiters); 283 spin_unlock(&lock->lock); 284 285 __wait(&w); 286 put_task_struct(current); 287 288 return 0; 289 } 290 291 static void bl_up_write(struct block_lock *lock) 292 { 293 spin_lock(&lock->lock); 294 __del_holder(lock, current); 295 lock->count = 0; 296 if (!list_empty(&lock->waiters)) 297 __wake_many(lock); 298 spin_unlock(&lock->lock); 299 } 300 301 static void report_recursive_bug(dm_block_t b, int r) 302 { 303 if (r == -EINVAL) 304 DMERR("recursive acquisition of block %llu requested.", 305 (unsigned long long) b); 306 } 307 308 #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 309 310 #define bl_init(x) do { } while (0) 311 #define bl_down_read(x) 0 312 #define bl_down_read_nonblock(x) 0 313 #define bl_up_read(x) do { } while (0) 314 #define bl_down_write(x) 0 315 #define bl_up_write(x) do { } while (0) 316 #define report_recursive_bug(x, y) do { } while (0) 317 318 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 319 320 /*----------------------------------------------------------------*/ 321 322 /* 323 * Block manager is currently implemented using dm-bufio. struct 324 * dm_block_manager and struct dm_block map directly onto a couple of 325 * structs in the bufio interface. I want to retain the freedom to move 326 * away from bufio in the future. So these structs are just cast within 327 * this .c file, rather than making it through to the public interface. 328 */ 329 static struct dm_buffer *to_buffer(struct dm_block *b) 330 { 331 return (struct dm_buffer *) b; 332 } 333 334 dm_block_t dm_block_location(struct dm_block *b) 335 { 336 return dm_bufio_get_block_number(to_buffer(b)); 337 } 338 EXPORT_SYMBOL_GPL(dm_block_location); 339 340 void *dm_block_data(struct dm_block *b) 341 { 342 return dm_bufio_get_block_data(to_buffer(b)); 343 } 344 EXPORT_SYMBOL_GPL(dm_block_data); 345 346 struct buffer_aux { 347 struct dm_block_validator *validator; 348 int write_locked; 349 350 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 351 struct block_lock lock; 352 #endif 353 }; 354 355 static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 356 { 357 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 358 aux->validator = NULL; 359 bl_init(&aux->lock); 360 } 361 362 static void dm_block_manager_write_callback(struct dm_buffer *buf) 363 { 364 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 365 if (aux->validator) { 366 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 367 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 368 } 369 } 370 371 /*---------------------------------------------------------------- 372 * Public interface 373 *--------------------------------------------------------------*/ 374 struct dm_block_manager { 375 struct dm_bufio_client *bufio; 376 bool read_only:1; 377 }; 378 379 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 380 unsigned block_size, 381 unsigned max_held_per_thread) 382 { 383 int r; 384 struct dm_block_manager *bm; 385 386 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 387 if (!bm) { 388 r = -ENOMEM; 389 goto bad; 390 } 391 392 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 393 sizeof(struct buffer_aux), 394 dm_block_manager_alloc_callback, 395 dm_block_manager_write_callback); 396 if (IS_ERR(bm->bufio)) { 397 r = PTR_ERR(bm->bufio); 398 kfree(bm); 399 goto bad; 400 } 401 402 bm->read_only = false; 403 404 return bm; 405 406 bad: 407 return ERR_PTR(r); 408 } 409 EXPORT_SYMBOL_GPL(dm_block_manager_create); 410 411 void dm_block_manager_destroy(struct dm_block_manager *bm) 412 { 413 dm_bufio_client_destroy(bm->bufio); 414 kfree(bm); 415 } 416 EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 417 418 unsigned dm_bm_block_size(struct dm_block_manager *bm) 419 { 420 return dm_bufio_get_block_size(bm->bufio); 421 } 422 EXPORT_SYMBOL_GPL(dm_bm_block_size); 423 424 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 425 { 426 return dm_bufio_get_device_size(bm->bufio); 427 } 428 429 static int dm_bm_validate_buffer(struct dm_block_manager *bm, 430 struct dm_buffer *buf, 431 struct buffer_aux *aux, 432 struct dm_block_validator *v) 433 { 434 if (unlikely(!aux->validator)) { 435 int r; 436 if (!v) 437 return 0; 438 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 439 if (unlikely(r)) { 440 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 441 (unsigned long long) dm_bufio_get_block_number(buf)); 442 return r; 443 } 444 aux->validator = v; 445 } else { 446 if (unlikely(aux->validator != v)) { 447 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 448 aux->validator->name, v ? v->name : "NULL", 449 (unsigned long long) dm_bufio_get_block_number(buf)); 450 return -EINVAL; 451 } 452 } 453 454 return 0; 455 } 456 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 457 struct dm_block_validator *v, 458 struct dm_block **result) 459 { 460 struct buffer_aux *aux; 461 void *p; 462 int r; 463 464 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 465 if (IS_ERR(p)) 466 return PTR_ERR(p); 467 468 aux = dm_bufio_get_aux_data(to_buffer(*result)); 469 r = bl_down_read(&aux->lock); 470 if (unlikely(r)) { 471 dm_bufio_release(to_buffer(*result)); 472 report_recursive_bug(b, r); 473 return r; 474 } 475 476 aux->write_locked = 0; 477 478 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 479 if (unlikely(r)) { 480 bl_up_read(&aux->lock); 481 dm_bufio_release(to_buffer(*result)); 482 return r; 483 } 484 485 return 0; 486 } 487 EXPORT_SYMBOL_GPL(dm_bm_read_lock); 488 489 int dm_bm_write_lock(struct dm_block_manager *bm, 490 dm_block_t b, struct dm_block_validator *v, 491 struct dm_block **result) 492 { 493 struct buffer_aux *aux; 494 void *p; 495 int r; 496 497 if (bm->read_only) 498 return -EPERM; 499 500 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 501 if (IS_ERR(p)) 502 return PTR_ERR(p); 503 504 aux = dm_bufio_get_aux_data(to_buffer(*result)); 505 r = bl_down_write(&aux->lock); 506 if (r) { 507 dm_bufio_release(to_buffer(*result)); 508 report_recursive_bug(b, r); 509 return r; 510 } 511 512 aux->write_locked = 1; 513 514 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 515 if (unlikely(r)) { 516 bl_up_write(&aux->lock); 517 dm_bufio_release(to_buffer(*result)); 518 return r; 519 } 520 521 return 0; 522 } 523 EXPORT_SYMBOL_GPL(dm_bm_write_lock); 524 525 int dm_bm_read_try_lock(struct dm_block_manager *bm, 526 dm_block_t b, struct dm_block_validator *v, 527 struct dm_block **result) 528 { 529 struct buffer_aux *aux; 530 void *p; 531 int r; 532 533 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 534 if (IS_ERR(p)) 535 return PTR_ERR(p); 536 if (unlikely(!p)) 537 return -EWOULDBLOCK; 538 539 aux = dm_bufio_get_aux_data(to_buffer(*result)); 540 r = bl_down_read_nonblock(&aux->lock); 541 if (r < 0) { 542 dm_bufio_release(to_buffer(*result)); 543 report_recursive_bug(b, r); 544 return r; 545 } 546 aux->write_locked = 0; 547 548 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 549 if (unlikely(r)) { 550 bl_up_read(&aux->lock); 551 dm_bufio_release(to_buffer(*result)); 552 return r; 553 } 554 555 return 0; 556 } 557 558 int dm_bm_write_lock_zero(struct dm_block_manager *bm, 559 dm_block_t b, struct dm_block_validator *v, 560 struct dm_block **result) 561 { 562 int r; 563 struct buffer_aux *aux; 564 void *p; 565 566 if (bm->read_only) 567 return -EPERM; 568 569 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 570 if (IS_ERR(p)) 571 return PTR_ERR(p); 572 573 memset(p, 0, dm_bm_block_size(bm)); 574 575 aux = dm_bufio_get_aux_data(to_buffer(*result)); 576 r = bl_down_write(&aux->lock); 577 if (r) { 578 dm_bufio_release(to_buffer(*result)); 579 return r; 580 } 581 582 aux->write_locked = 1; 583 aux->validator = v; 584 585 return 0; 586 } 587 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 588 589 void dm_bm_unlock(struct dm_block *b) 590 { 591 struct buffer_aux *aux; 592 aux = dm_bufio_get_aux_data(to_buffer(b)); 593 594 if (aux->write_locked) { 595 dm_bufio_mark_buffer_dirty(to_buffer(b)); 596 bl_up_write(&aux->lock); 597 } else 598 bl_up_read(&aux->lock); 599 600 dm_bufio_release(to_buffer(b)); 601 } 602 EXPORT_SYMBOL_GPL(dm_bm_unlock); 603 604 int dm_bm_flush(struct dm_block_manager *bm) 605 { 606 if (bm->read_only) 607 return -EPERM; 608 609 return dm_bufio_write_dirty_buffers(bm->bufio); 610 } 611 EXPORT_SYMBOL_GPL(dm_bm_flush); 612 613 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 614 { 615 dm_bufio_prefetch(bm->bufio, b, 1); 616 } 617 618 bool dm_bm_is_read_only(struct dm_block_manager *bm) 619 { 620 return bm->read_only; 621 } 622 EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 623 624 void dm_bm_set_read_only(struct dm_block_manager *bm) 625 { 626 bm->read_only = true; 627 } 628 EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 629 630 void dm_bm_set_read_write(struct dm_block_manager *bm) 631 { 632 bm->read_only = false; 633 } 634 EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 635 636 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 637 { 638 return crc32c(~(u32) 0, data, len) ^ init_xor; 639 } 640 EXPORT_SYMBOL_GPL(dm_bm_checksum); 641 642 /*----------------------------------------------------------------*/ 643 644 MODULE_LICENSE("GPL"); 645 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 646 MODULE_DESCRIPTION("Immutable metadata library for dm"); 647 648 /*----------------------------------------------------------------*/ 649