1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 #include "dm-block-manager.h" 7 #include "dm-persistent-data-internal.h" 8 9 #include <linux/dm-bufio.h> 10 #include <linux/crc32c.h> 11 #include <linux/module.h> 12 #include <linux/slab.h> 13 #include <linux/rwsem.h> 14 #include <linux/device-mapper.h> 15 #include <linux/stacktrace.h> 16 #include <linux/sched/task.h> 17 18 #define DM_MSG_PREFIX "block manager" 19 20 /*----------------------------------------------------------------*/ 21 22 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 23 24 /* 25 * This is a read/write semaphore with a couple of differences. 26 * 27 * i) There is a restriction on the number of concurrent read locks that 28 * may be held at once. This is just an implementation detail. 29 * 30 * ii) Recursive locking attempts are detected and return EINVAL. A stack 31 * trace is also emitted for the previous lock acquisition. 32 * 33 * iii) Priority is given to write locks. 34 */ 35 #define MAX_HOLDERS 4 36 #define MAX_STACK 10 37 38 struct stack_store { 39 unsigned int nr_entries; 40 unsigned long entries[MAX_STACK]; 41 }; 42 43 struct block_lock { 44 spinlock_t lock; 45 __s32 count; 46 struct list_head waiters; 47 struct task_struct *holders[MAX_HOLDERS]; 48 49 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 50 struct stack_store traces[MAX_HOLDERS]; 51 #endif 52 }; 53 54 struct waiter { 55 struct list_head list; 56 struct task_struct *task; 57 int wants_write; 58 }; 59 60 static unsigned __find_holder(struct block_lock *lock, 61 struct task_struct *task) 62 { 63 unsigned i; 64 65 for (i = 0; i < MAX_HOLDERS; i++) 66 if (lock->holders[i] == task) 67 break; 68 69 BUG_ON(i == MAX_HOLDERS); 70 return i; 71 } 72 73 /* call this *after* you increment lock->count */ 74 static void __add_holder(struct block_lock *lock, struct task_struct *task) 75 { 76 unsigned h = __find_holder(lock, NULL); 77 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 78 struct stack_store *t; 79 #endif 80 81 get_task_struct(task); 82 lock->holders[h] = task; 83 84 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 85 t = lock->traces + h; 86 t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2); 87 #endif 88 } 89 90 /* call this *before* you decrement lock->count */ 91 static void __del_holder(struct block_lock *lock, struct task_struct *task) 92 { 93 unsigned h = __find_holder(lock, task); 94 lock->holders[h] = NULL; 95 put_task_struct(task); 96 } 97 98 static int __check_holder(struct block_lock *lock) 99 { 100 unsigned i; 101 102 for (i = 0; i < MAX_HOLDERS; i++) { 103 if (lock->holders[i] == current) { 104 DMERR("recursive lock detected in metadata"); 105 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 106 DMERR("previously held here:"); 107 stack_trace_print(lock->traces[i].entries, 108 lock->traces[i].nr_entries, 4); 109 110 DMERR("subsequent acquisition attempted here:"); 111 dump_stack(); 112 #endif 113 return -EINVAL; 114 } 115 } 116 117 return 0; 118 } 119 120 static void __wait(struct waiter *w) 121 { 122 for (;;) { 123 set_current_state(TASK_UNINTERRUPTIBLE); 124 125 if (!w->task) 126 break; 127 128 schedule(); 129 } 130 131 set_current_state(TASK_RUNNING); 132 } 133 134 static void __wake_waiter(struct waiter *w) 135 { 136 struct task_struct *task; 137 138 list_del(&w->list); 139 task = w->task; 140 smp_mb(); 141 w->task = NULL; 142 wake_up_process(task); 143 } 144 145 /* 146 * We either wake a few readers or a single writer. 147 */ 148 static void __wake_many(struct block_lock *lock) 149 { 150 struct waiter *w, *tmp; 151 152 BUG_ON(lock->count < 0); 153 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 154 if (lock->count >= MAX_HOLDERS) 155 return; 156 157 if (w->wants_write) { 158 if (lock->count > 0) 159 return; /* still read locked */ 160 161 lock->count = -1; 162 __add_holder(lock, w->task); 163 __wake_waiter(w); 164 return; 165 } 166 167 lock->count++; 168 __add_holder(lock, w->task); 169 __wake_waiter(w); 170 } 171 } 172 173 static void bl_init(struct block_lock *lock) 174 { 175 int i; 176 177 spin_lock_init(&lock->lock); 178 lock->count = 0; 179 INIT_LIST_HEAD(&lock->waiters); 180 for (i = 0; i < MAX_HOLDERS; i++) 181 lock->holders[i] = NULL; 182 } 183 184 static int __available_for_read(struct block_lock *lock) 185 { 186 return lock->count >= 0 && 187 lock->count < MAX_HOLDERS && 188 list_empty(&lock->waiters); 189 } 190 191 static int bl_down_read(struct block_lock *lock) 192 { 193 int r; 194 struct waiter w; 195 196 spin_lock(&lock->lock); 197 r = __check_holder(lock); 198 if (r) { 199 spin_unlock(&lock->lock); 200 return r; 201 } 202 203 if (__available_for_read(lock)) { 204 lock->count++; 205 __add_holder(lock, current); 206 spin_unlock(&lock->lock); 207 return 0; 208 } 209 210 get_task_struct(current); 211 212 w.task = current; 213 w.wants_write = 0; 214 list_add_tail(&w.list, &lock->waiters); 215 spin_unlock(&lock->lock); 216 217 __wait(&w); 218 put_task_struct(current); 219 return 0; 220 } 221 222 static int bl_down_read_nonblock(struct block_lock *lock) 223 { 224 int r; 225 226 spin_lock(&lock->lock); 227 r = __check_holder(lock); 228 if (r) 229 goto out; 230 231 if (__available_for_read(lock)) { 232 lock->count++; 233 __add_holder(lock, current); 234 r = 0; 235 } else 236 r = -EWOULDBLOCK; 237 238 out: 239 spin_unlock(&lock->lock); 240 return r; 241 } 242 243 static void bl_up_read(struct block_lock *lock) 244 { 245 spin_lock(&lock->lock); 246 BUG_ON(lock->count <= 0); 247 __del_holder(lock, current); 248 --lock->count; 249 if (!list_empty(&lock->waiters)) 250 __wake_many(lock); 251 spin_unlock(&lock->lock); 252 } 253 254 static int bl_down_write(struct block_lock *lock) 255 { 256 int r; 257 struct waiter w; 258 259 spin_lock(&lock->lock); 260 r = __check_holder(lock); 261 if (r) { 262 spin_unlock(&lock->lock); 263 return r; 264 } 265 266 if (lock->count == 0 && list_empty(&lock->waiters)) { 267 lock->count = -1; 268 __add_holder(lock, current); 269 spin_unlock(&lock->lock); 270 return 0; 271 } 272 273 get_task_struct(current); 274 w.task = current; 275 w.wants_write = 1; 276 277 /* 278 * Writers given priority. We know there's only one mutator in the 279 * system, so ignoring the ordering reversal. 280 */ 281 list_add(&w.list, &lock->waiters); 282 spin_unlock(&lock->lock); 283 284 __wait(&w); 285 put_task_struct(current); 286 287 return 0; 288 } 289 290 static void bl_up_write(struct block_lock *lock) 291 { 292 spin_lock(&lock->lock); 293 __del_holder(lock, current); 294 lock->count = 0; 295 if (!list_empty(&lock->waiters)) 296 __wake_many(lock); 297 spin_unlock(&lock->lock); 298 } 299 300 static void report_recursive_bug(dm_block_t b, int r) 301 { 302 if (r == -EINVAL) 303 DMERR("recursive acquisition of block %llu requested.", 304 (unsigned long long) b); 305 } 306 307 #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 308 309 #define bl_init(x) do { } while (0) 310 #define bl_down_read(x) 0 311 #define bl_down_read_nonblock(x) 0 312 #define bl_up_read(x) do { } while (0) 313 #define bl_down_write(x) 0 314 #define bl_up_write(x) do { } while (0) 315 #define report_recursive_bug(x, y) do { } while (0) 316 317 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 318 319 /*----------------------------------------------------------------*/ 320 321 /* 322 * Block manager is currently implemented using dm-bufio. struct 323 * dm_block_manager and struct dm_block map directly onto a couple of 324 * structs in the bufio interface. I want to retain the freedom to move 325 * away from bufio in the future. So these structs are just cast within 326 * this .c file, rather than making it through to the public interface. 327 */ 328 static struct dm_buffer *to_buffer(struct dm_block *b) 329 { 330 return (struct dm_buffer *) b; 331 } 332 333 dm_block_t dm_block_location(struct dm_block *b) 334 { 335 return dm_bufio_get_block_number(to_buffer(b)); 336 } 337 EXPORT_SYMBOL_GPL(dm_block_location); 338 339 void *dm_block_data(struct dm_block *b) 340 { 341 return dm_bufio_get_block_data(to_buffer(b)); 342 } 343 EXPORT_SYMBOL_GPL(dm_block_data); 344 345 struct buffer_aux { 346 struct dm_block_validator *validator; 347 int write_locked; 348 349 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 350 struct block_lock lock; 351 #endif 352 }; 353 354 static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 355 { 356 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 357 aux->validator = NULL; 358 bl_init(&aux->lock); 359 } 360 361 static void dm_block_manager_write_callback(struct dm_buffer *buf) 362 { 363 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 364 if (aux->validator) { 365 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 366 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 367 } 368 } 369 370 /*---------------------------------------------------------------- 371 * Public interface 372 *--------------------------------------------------------------*/ 373 struct dm_block_manager { 374 struct dm_bufio_client *bufio; 375 bool read_only:1; 376 }; 377 378 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 379 unsigned block_size, 380 unsigned max_held_per_thread) 381 { 382 int r; 383 struct dm_block_manager *bm; 384 385 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 386 if (!bm) { 387 r = -ENOMEM; 388 goto bad; 389 } 390 391 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 392 sizeof(struct buffer_aux), 393 dm_block_manager_alloc_callback, 394 dm_block_manager_write_callback, 395 0); 396 if (IS_ERR(bm->bufio)) { 397 r = PTR_ERR(bm->bufio); 398 kfree(bm); 399 goto bad; 400 } 401 402 bm->read_only = false; 403 404 return bm; 405 406 bad: 407 return ERR_PTR(r); 408 } 409 EXPORT_SYMBOL_GPL(dm_block_manager_create); 410 411 void dm_block_manager_destroy(struct dm_block_manager *bm) 412 { 413 dm_bufio_client_destroy(bm->bufio); 414 kfree(bm); 415 } 416 EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 417 418 unsigned dm_bm_block_size(struct dm_block_manager *bm) 419 { 420 return dm_bufio_get_block_size(bm->bufio); 421 } 422 EXPORT_SYMBOL_GPL(dm_bm_block_size); 423 424 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 425 { 426 return dm_bufio_get_device_size(bm->bufio); 427 } 428 429 static int dm_bm_validate_buffer(struct dm_block_manager *bm, 430 struct dm_buffer *buf, 431 struct buffer_aux *aux, 432 struct dm_block_validator *v) 433 { 434 if (unlikely(!aux->validator)) { 435 int r; 436 if (!v) 437 return 0; 438 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 439 if (unlikely(r)) { 440 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 441 (unsigned long long) dm_bufio_get_block_number(buf)); 442 return r; 443 } 444 aux->validator = v; 445 } else { 446 if (unlikely(aux->validator != v)) { 447 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 448 aux->validator->name, v ? v->name : "NULL", 449 (unsigned long long) dm_bufio_get_block_number(buf)); 450 return -EINVAL; 451 } 452 } 453 454 return 0; 455 } 456 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 457 struct dm_block_validator *v, 458 struct dm_block **result) 459 { 460 struct buffer_aux *aux; 461 void *p; 462 int r; 463 464 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 465 if (IS_ERR(p)) 466 return PTR_ERR(p); 467 468 aux = dm_bufio_get_aux_data(to_buffer(*result)); 469 r = bl_down_read(&aux->lock); 470 if (unlikely(r)) { 471 dm_bufio_release(to_buffer(*result)); 472 report_recursive_bug(b, r); 473 return r; 474 } 475 476 aux->write_locked = 0; 477 478 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 479 if (unlikely(r)) { 480 bl_up_read(&aux->lock); 481 dm_bufio_release(to_buffer(*result)); 482 return r; 483 } 484 485 return 0; 486 } 487 EXPORT_SYMBOL_GPL(dm_bm_read_lock); 488 489 int dm_bm_write_lock(struct dm_block_manager *bm, 490 dm_block_t b, struct dm_block_validator *v, 491 struct dm_block **result) 492 { 493 struct buffer_aux *aux; 494 void *p; 495 int r; 496 497 if (dm_bm_is_read_only(bm)) 498 return -EPERM; 499 500 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 501 if (IS_ERR(p)) 502 return PTR_ERR(p); 503 504 aux = dm_bufio_get_aux_data(to_buffer(*result)); 505 r = bl_down_write(&aux->lock); 506 if (r) { 507 dm_bufio_release(to_buffer(*result)); 508 report_recursive_bug(b, r); 509 return r; 510 } 511 512 aux->write_locked = 1; 513 514 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 515 if (unlikely(r)) { 516 bl_up_write(&aux->lock); 517 dm_bufio_release(to_buffer(*result)); 518 return r; 519 } 520 521 return 0; 522 } 523 EXPORT_SYMBOL_GPL(dm_bm_write_lock); 524 525 int dm_bm_read_try_lock(struct dm_block_manager *bm, 526 dm_block_t b, struct dm_block_validator *v, 527 struct dm_block **result) 528 { 529 struct buffer_aux *aux; 530 void *p; 531 int r; 532 533 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 534 if (IS_ERR(p)) 535 return PTR_ERR(p); 536 if (unlikely(!p)) 537 return -EWOULDBLOCK; 538 539 aux = dm_bufio_get_aux_data(to_buffer(*result)); 540 r = bl_down_read_nonblock(&aux->lock); 541 if (r < 0) { 542 dm_bufio_release(to_buffer(*result)); 543 report_recursive_bug(b, r); 544 return r; 545 } 546 aux->write_locked = 0; 547 548 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 549 if (unlikely(r)) { 550 bl_up_read(&aux->lock); 551 dm_bufio_release(to_buffer(*result)); 552 return r; 553 } 554 555 return 0; 556 } 557 558 int dm_bm_write_lock_zero(struct dm_block_manager *bm, 559 dm_block_t b, struct dm_block_validator *v, 560 struct dm_block **result) 561 { 562 int r; 563 struct buffer_aux *aux; 564 void *p; 565 566 if (dm_bm_is_read_only(bm)) 567 return -EPERM; 568 569 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 570 if (IS_ERR(p)) 571 return PTR_ERR(p); 572 573 memset(p, 0, dm_bm_block_size(bm)); 574 575 aux = dm_bufio_get_aux_data(to_buffer(*result)); 576 r = bl_down_write(&aux->lock); 577 if (r) { 578 dm_bufio_release(to_buffer(*result)); 579 return r; 580 } 581 582 aux->write_locked = 1; 583 aux->validator = v; 584 585 return 0; 586 } 587 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 588 589 void dm_bm_unlock(struct dm_block *b) 590 { 591 struct buffer_aux *aux; 592 aux = dm_bufio_get_aux_data(to_buffer(b)); 593 594 if (aux->write_locked) { 595 dm_bufio_mark_buffer_dirty(to_buffer(b)); 596 bl_up_write(&aux->lock); 597 } else 598 bl_up_read(&aux->lock); 599 600 dm_bufio_release(to_buffer(b)); 601 } 602 EXPORT_SYMBOL_GPL(dm_bm_unlock); 603 604 int dm_bm_flush(struct dm_block_manager *bm) 605 { 606 if (dm_bm_is_read_only(bm)) 607 return -EPERM; 608 609 return dm_bufio_write_dirty_buffers(bm->bufio); 610 } 611 EXPORT_SYMBOL_GPL(dm_bm_flush); 612 613 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 614 { 615 dm_bufio_prefetch(bm->bufio, b, 1); 616 } 617 618 bool dm_bm_is_read_only(struct dm_block_manager *bm) 619 { 620 return (bm ? bm->read_only : true); 621 } 622 EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 623 624 void dm_bm_set_read_only(struct dm_block_manager *bm) 625 { 626 if (bm) 627 bm->read_only = true; 628 } 629 EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 630 631 void dm_bm_set_read_write(struct dm_block_manager *bm) 632 { 633 if (bm) 634 bm->read_only = false; 635 } 636 EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 637 638 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 639 { 640 return crc32c(~(u32) 0, data, len) ^ init_xor; 641 } 642 EXPORT_SYMBOL_GPL(dm_bm_checksum); 643 644 /*----------------------------------------------------------------*/ 645 646 MODULE_LICENSE("GPL"); 647 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 648 MODULE_DESCRIPTION("Immutable metadata library for dm"); 649 650 /*----------------------------------------------------------------*/ 651