1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011 Red Hat, Inc. 4 * 5 * This file is released under the GPL. 6 */ 7 #include "dm-block-manager.h" 8 #include "dm-persistent-data-internal.h" 9 10 #include <linux/dm-bufio.h> 11 #include <linux/crc32c.h> 12 #include <linux/module.h> 13 #include <linux/slab.h> 14 #include <linux/rwsem.h> 15 #include <linux/device-mapper.h> 16 #include <linux/stacktrace.h> 17 #include <linux/sched/task.h> 18 19 #define DM_MSG_PREFIX "block manager" 20 21 /*----------------------------------------------------------------*/ 22 23 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 24 25 /* 26 * This is a read/write semaphore with a couple of differences. 27 * 28 * i) There is a restriction on the number of concurrent read locks that 29 * may be held at once. This is just an implementation detail. 30 * 31 * ii) Recursive locking attempts are detected and return EINVAL. A stack 32 * trace is also emitted for the previous lock acquisition. 33 * 34 * iii) Priority is given to write locks. 35 */ 36 #define MAX_HOLDERS 4 37 #define MAX_STACK 10 38 39 struct stack_store { 40 unsigned int nr_entries; 41 unsigned long entries[MAX_STACK]; 42 }; 43 44 struct block_lock { 45 spinlock_t lock; 46 __s32 count; 47 struct list_head waiters; 48 struct task_struct *holders[MAX_HOLDERS]; 49 50 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 51 struct stack_store traces[MAX_HOLDERS]; 52 #endif 53 }; 54 55 struct waiter { 56 struct list_head list; 57 struct task_struct *task; 58 int wants_write; 59 }; 60 61 static unsigned int __find_holder(struct block_lock *lock, 62 struct task_struct *task) 63 { 64 unsigned int i; 65 66 for (i = 0; i < MAX_HOLDERS; i++) 67 if (lock->holders[i] == task) 68 break; 69 70 BUG_ON(i == MAX_HOLDERS); 71 return i; 72 } 73 74 /* call this *after* you increment lock->count */ 75 static void __add_holder(struct block_lock *lock, struct task_struct *task) 76 { 77 unsigned int h = __find_holder(lock, NULL); 78 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 79 struct stack_store *t; 80 #endif 81 82 get_task_struct(task); 83 lock->holders[h] = task; 84 85 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 86 t = lock->traces + h; 87 t->nr_entries = stack_trace_save(t->entries, MAX_STACK, 2); 88 #endif 89 } 90 91 /* call this *before* you decrement lock->count */ 92 static void __del_holder(struct block_lock *lock, struct task_struct *task) 93 { 94 unsigned int h = __find_holder(lock, task); 95 96 lock->holders[h] = NULL; 97 put_task_struct(task); 98 } 99 100 static int __check_holder(struct block_lock *lock) 101 { 102 unsigned int i; 103 104 for (i = 0; i < MAX_HOLDERS; i++) { 105 if (lock->holders[i] == current) { 106 DMERR("recursive lock detected in metadata"); 107 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING 108 DMERR("previously held here:"); 109 stack_trace_print(lock->traces[i].entries, 110 lock->traces[i].nr_entries, 4); 111 112 DMERR("subsequent acquisition attempted here:"); 113 dump_stack(); 114 #endif 115 return -EINVAL; 116 } 117 } 118 119 return 0; 120 } 121 122 static void __wait(struct waiter *w) 123 { 124 for (;;) { 125 set_current_state(TASK_UNINTERRUPTIBLE); 126 127 if (!w->task) 128 break; 129 130 schedule(); 131 } 132 133 set_current_state(TASK_RUNNING); 134 } 135 136 static void __wake_waiter(struct waiter *w) 137 { 138 struct task_struct *task; 139 140 list_del(&w->list); 141 task = w->task; 142 smp_mb(); 143 w->task = NULL; 144 wake_up_process(task); 145 } 146 147 /* 148 * We either wake a few readers or a single writer. 149 */ 150 static void __wake_many(struct block_lock *lock) 151 { 152 struct waiter *w, *tmp; 153 154 BUG_ON(lock->count < 0); 155 list_for_each_entry_safe(w, tmp, &lock->waiters, list) { 156 if (lock->count >= MAX_HOLDERS) 157 return; 158 159 if (w->wants_write) { 160 if (lock->count > 0) 161 return; /* still read locked */ 162 163 lock->count = -1; 164 __add_holder(lock, w->task); 165 __wake_waiter(w); 166 return; 167 } 168 169 lock->count++; 170 __add_holder(lock, w->task); 171 __wake_waiter(w); 172 } 173 } 174 175 static void bl_init(struct block_lock *lock) 176 { 177 int i; 178 179 spin_lock_init(&lock->lock); 180 lock->count = 0; 181 INIT_LIST_HEAD(&lock->waiters); 182 for (i = 0; i < MAX_HOLDERS; i++) 183 lock->holders[i] = NULL; 184 } 185 186 static int __available_for_read(struct block_lock *lock) 187 { 188 return lock->count >= 0 && 189 lock->count < MAX_HOLDERS && 190 list_empty(&lock->waiters); 191 } 192 193 static int bl_down_read(struct block_lock *lock) 194 { 195 int r; 196 struct waiter w; 197 198 spin_lock(&lock->lock); 199 r = __check_holder(lock); 200 if (r) { 201 spin_unlock(&lock->lock); 202 return r; 203 } 204 205 if (__available_for_read(lock)) { 206 lock->count++; 207 __add_holder(lock, current); 208 spin_unlock(&lock->lock); 209 return 0; 210 } 211 212 get_task_struct(current); 213 214 w.task = current; 215 w.wants_write = 0; 216 list_add_tail(&w.list, &lock->waiters); 217 spin_unlock(&lock->lock); 218 219 __wait(&w); 220 put_task_struct(current); 221 return 0; 222 } 223 224 static int bl_down_read_nonblock(struct block_lock *lock) 225 { 226 int r; 227 228 spin_lock(&lock->lock); 229 r = __check_holder(lock); 230 if (r) 231 goto out; 232 233 if (__available_for_read(lock)) { 234 lock->count++; 235 __add_holder(lock, current); 236 r = 0; 237 } else 238 r = -EWOULDBLOCK; 239 240 out: 241 spin_unlock(&lock->lock); 242 return r; 243 } 244 245 static void bl_up_read(struct block_lock *lock) 246 { 247 spin_lock(&lock->lock); 248 BUG_ON(lock->count <= 0); 249 __del_holder(lock, current); 250 --lock->count; 251 if (!list_empty(&lock->waiters)) 252 __wake_many(lock); 253 spin_unlock(&lock->lock); 254 } 255 256 static int bl_down_write(struct block_lock *lock) 257 { 258 int r; 259 struct waiter w; 260 261 spin_lock(&lock->lock); 262 r = __check_holder(lock); 263 if (r) { 264 spin_unlock(&lock->lock); 265 return r; 266 } 267 268 if (lock->count == 0 && list_empty(&lock->waiters)) { 269 lock->count = -1; 270 __add_holder(lock, current); 271 spin_unlock(&lock->lock); 272 return 0; 273 } 274 275 get_task_struct(current); 276 w.task = current; 277 w.wants_write = 1; 278 279 /* 280 * Writers given priority. We know there's only one mutator in the 281 * system, so ignoring the ordering reversal. 282 */ 283 list_add(&w.list, &lock->waiters); 284 spin_unlock(&lock->lock); 285 286 __wait(&w); 287 put_task_struct(current); 288 289 return 0; 290 } 291 292 static void bl_up_write(struct block_lock *lock) 293 { 294 spin_lock(&lock->lock); 295 __del_holder(lock, current); 296 lock->count = 0; 297 if (!list_empty(&lock->waiters)) 298 __wake_many(lock); 299 spin_unlock(&lock->lock); 300 } 301 302 static void report_recursive_bug(dm_block_t b, int r) 303 { 304 if (r == -EINVAL) 305 DMERR("recursive acquisition of block %llu requested.", 306 (unsigned long long) b); 307 } 308 309 #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 310 311 #define bl_init(x) do { } while (0) 312 #define bl_down_read(x) 0 313 #define bl_down_read_nonblock(x) 0 314 #define bl_up_read(x) do { } while (0) 315 #define bl_down_write(x) 0 316 #define bl_up_write(x) do { } while (0) 317 #define report_recursive_bug(x, y) do { } while (0) 318 319 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */ 320 321 /*----------------------------------------------------------------*/ 322 323 /* 324 * Block manager is currently implemented using dm-bufio. struct 325 * dm_block_manager and struct dm_block map directly onto a couple of 326 * structs in the bufio interface. I want to retain the freedom to move 327 * away from bufio in the future. So these structs are just cast within 328 * this .c file, rather than making it through to the public interface. 329 */ 330 static struct dm_buffer *to_buffer(struct dm_block *b) 331 { 332 return (struct dm_buffer *) b; 333 } 334 335 dm_block_t dm_block_location(struct dm_block *b) 336 { 337 return dm_bufio_get_block_number(to_buffer(b)); 338 } 339 EXPORT_SYMBOL_GPL(dm_block_location); 340 341 void *dm_block_data(struct dm_block *b) 342 { 343 return dm_bufio_get_block_data(to_buffer(b)); 344 } 345 EXPORT_SYMBOL_GPL(dm_block_data); 346 347 struct buffer_aux { 348 struct dm_block_validator *validator; 349 int write_locked; 350 351 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING 352 struct block_lock lock; 353 #endif 354 }; 355 356 static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 357 { 358 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 359 360 aux->validator = NULL; 361 bl_init(&aux->lock); 362 } 363 364 static void dm_block_manager_write_callback(struct dm_buffer *buf) 365 { 366 struct buffer_aux *aux = dm_bufio_get_aux_data(buf); 367 368 if (aux->validator) { 369 aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, 370 dm_bufio_get_block_size(dm_bufio_get_client(buf))); 371 } 372 } 373 374 /* 375 * ------------------------------------------------------------- 376 * Public interface 377 *-------------------------------------------------------------- 378 */ 379 struct dm_block_manager { 380 struct dm_bufio_client *bufio; 381 bool read_only:1; 382 }; 383 384 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, 385 unsigned int block_size, 386 unsigned int max_held_per_thread) 387 { 388 int r; 389 struct dm_block_manager *bm; 390 391 bm = kmalloc(sizeof(*bm), GFP_KERNEL); 392 if (!bm) { 393 r = -ENOMEM; 394 goto bad; 395 } 396 397 bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, 398 sizeof(struct buffer_aux), 399 dm_block_manager_alloc_callback, 400 dm_block_manager_write_callback, 401 0); 402 if (IS_ERR(bm->bufio)) { 403 r = PTR_ERR(bm->bufio); 404 kfree(bm); 405 goto bad; 406 } 407 408 bm->read_only = false; 409 410 return bm; 411 412 bad: 413 return ERR_PTR(r); 414 } 415 EXPORT_SYMBOL_GPL(dm_block_manager_create); 416 417 void dm_block_manager_destroy(struct dm_block_manager *bm) 418 { 419 dm_bufio_client_destroy(bm->bufio); 420 kfree(bm); 421 } 422 EXPORT_SYMBOL_GPL(dm_block_manager_destroy); 423 424 unsigned int dm_bm_block_size(struct dm_block_manager *bm) 425 { 426 return dm_bufio_get_block_size(bm->bufio); 427 } 428 EXPORT_SYMBOL_GPL(dm_bm_block_size); 429 430 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) 431 { 432 return dm_bufio_get_device_size(bm->bufio); 433 } 434 435 static int dm_bm_validate_buffer(struct dm_block_manager *bm, 436 struct dm_buffer *buf, 437 struct buffer_aux *aux, 438 struct dm_block_validator *v) 439 { 440 if (unlikely(!aux->validator)) { 441 int r; 442 443 if (!v) 444 return 0; 445 r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); 446 if (unlikely(r)) { 447 DMERR_LIMIT("%s validator check failed for block %llu", v->name, 448 (unsigned long long) dm_bufio_get_block_number(buf)); 449 return r; 450 } 451 aux->validator = v; 452 } else { 453 if (unlikely(aux->validator != v)) { 454 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", 455 aux->validator->name, v ? v->name : "NULL", 456 (unsigned long long) dm_bufio_get_block_number(buf)); 457 return -EINVAL; 458 } 459 } 460 461 return 0; 462 } 463 int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, 464 struct dm_block_validator *v, 465 struct dm_block **result) 466 { 467 struct buffer_aux *aux; 468 void *p; 469 int r; 470 471 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 472 if (IS_ERR(p)) 473 return PTR_ERR(p); 474 475 aux = dm_bufio_get_aux_data(to_buffer(*result)); 476 r = bl_down_read(&aux->lock); 477 if (unlikely(r)) { 478 dm_bufio_release(to_buffer(*result)); 479 report_recursive_bug(b, r); 480 return r; 481 } 482 483 aux->write_locked = 0; 484 485 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 486 if (unlikely(r)) { 487 bl_up_read(&aux->lock); 488 dm_bufio_release(to_buffer(*result)); 489 return r; 490 } 491 492 return 0; 493 } 494 EXPORT_SYMBOL_GPL(dm_bm_read_lock); 495 496 int dm_bm_write_lock(struct dm_block_manager *bm, 497 dm_block_t b, struct dm_block_validator *v, 498 struct dm_block **result) 499 { 500 struct buffer_aux *aux; 501 void *p; 502 int r; 503 504 if (dm_bm_is_read_only(bm)) 505 return -EPERM; 506 507 p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); 508 if (IS_ERR(p)) 509 return PTR_ERR(p); 510 511 aux = dm_bufio_get_aux_data(to_buffer(*result)); 512 r = bl_down_write(&aux->lock); 513 if (r) { 514 dm_bufio_release(to_buffer(*result)); 515 report_recursive_bug(b, r); 516 return r; 517 } 518 519 aux->write_locked = 1; 520 521 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 522 if (unlikely(r)) { 523 bl_up_write(&aux->lock); 524 dm_bufio_release(to_buffer(*result)); 525 return r; 526 } 527 528 return 0; 529 } 530 EXPORT_SYMBOL_GPL(dm_bm_write_lock); 531 532 int dm_bm_read_try_lock(struct dm_block_manager *bm, 533 dm_block_t b, struct dm_block_validator *v, 534 struct dm_block **result) 535 { 536 struct buffer_aux *aux; 537 void *p; 538 int r; 539 540 p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); 541 if (IS_ERR(p)) 542 return PTR_ERR(p); 543 if (unlikely(!p)) 544 return -EWOULDBLOCK; 545 546 aux = dm_bufio_get_aux_data(to_buffer(*result)); 547 r = bl_down_read_nonblock(&aux->lock); 548 if (r < 0) { 549 dm_bufio_release(to_buffer(*result)); 550 report_recursive_bug(b, r); 551 return r; 552 } 553 aux->write_locked = 0; 554 555 r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); 556 if (unlikely(r)) { 557 bl_up_read(&aux->lock); 558 dm_bufio_release(to_buffer(*result)); 559 return r; 560 } 561 562 return 0; 563 } 564 565 int dm_bm_write_lock_zero(struct dm_block_manager *bm, 566 dm_block_t b, struct dm_block_validator *v, 567 struct dm_block **result) 568 { 569 int r; 570 struct buffer_aux *aux; 571 void *p; 572 573 if (dm_bm_is_read_only(bm)) 574 return -EPERM; 575 576 p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); 577 if (IS_ERR(p)) 578 return PTR_ERR(p); 579 580 memset(p, 0, dm_bm_block_size(bm)); 581 582 aux = dm_bufio_get_aux_data(to_buffer(*result)); 583 r = bl_down_write(&aux->lock); 584 if (r) { 585 dm_bufio_release(to_buffer(*result)); 586 return r; 587 } 588 589 aux->write_locked = 1; 590 aux->validator = v; 591 592 return 0; 593 } 594 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); 595 596 void dm_bm_unlock(struct dm_block *b) 597 { 598 struct buffer_aux *aux = dm_bufio_get_aux_data(to_buffer(b)); 599 600 if (aux->write_locked) { 601 dm_bufio_mark_buffer_dirty(to_buffer(b)); 602 bl_up_write(&aux->lock); 603 } else 604 bl_up_read(&aux->lock); 605 606 dm_bufio_release(to_buffer(b)); 607 } 608 EXPORT_SYMBOL_GPL(dm_bm_unlock); 609 610 int dm_bm_flush(struct dm_block_manager *bm) 611 { 612 if (dm_bm_is_read_only(bm)) 613 return -EPERM; 614 615 return dm_bufio_write_dirty_buffers(bm->bufio); 616 } 617 EXPORT_SYMBOL_GPL(dm_bm_flush); 618 619 void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) 620 { 621 dm_bufio_prefetch(bm->bufio, b, 1); 622 } 623 624 bool dm_bm_is_read_only(struct dm_block_manager *bm) 625 { 626 return bm ? bm->read_only : true; 627 } 628 EXPORT_SYMBOL_GPL(dm_bm_is_read_only); 629 630 void dm_bm_set_read_only(struct dm_block_manager *bm) 631 { 632 if (bm) 633 bm->read_only = true; 634 } 635 EXPORT_SYMBOL_GPL(dm_bm_set_read_only); 636 637 void dm_bm_set_read_write(struct dm_block_manager *bm) 638 { 639 if (bm) 640 bm->read_only = false; 641 } 642 EXPORT_SYMBOL_GPL(dm_bm_set_read_write); 643 644 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) 645 { 646 return crc32c(~(u32) 0, data, len) ^ init_xor; 647 } 648 EXPORT_SYMBOL_GPL(dm_bm_checksum); 649 650 /*----------------------------------------------------------------*/ 651 652 MODULE_LICENSE("GPL"); 653 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 654 MODULE_DESCRIPTION("Immutable metadata library for dm"); 655 656 /*----------------------------------------------------------------*/ 657