1 /* 2 * Copyright (C) 2001 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm-core.h" 9 10 #include <linux/module.h> 11 #include <linux/vmalloc.h> 12 #include <linux/blkdev.h> 13 #include <linux/namei.h> 14 #include <linux/ctype.h> 15 #include <linux/string.h> 16 #include <linux/slab.h> 17 #include <linux/interrupt.h> 18 #include <linux/mutex.h> 19 #include <linux/delay.h> 20 #include <linux/atomic.h> 21 #include <linux/blk-mq.h> 22 #include <linux/mount.h> 23 #include <linux/dax.h> 24 25 #define DM_MSG_PREFIX "table" 26 27 #define NODE_SIZE L1_CACHE_BYTES 28 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) 29 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) 30 31 /* 32 * Similar to ceiling(log_size(n)) 33 */ 34 static unsigned int int_log(unsigned int n, unsigned int base) 35 { 36 int result = 0; 37 38 while (n > 1) { 39 n = dm_div_up(n, base); 40 result++; 41 } 42 43 return result; 44 } 45 46 /* 47 * Calculate the index of the child node of the n'th node k'th key. 48 */ 49 static inline unsigned int get_child(unsigned int n, unsigned int k) 50 { 51 return (n * CHILDREN_PER_NODE) + k; 52 } 53 54 /* 55 * Return the n'th node of level l from table t. 56 */ 57 static inline sector_t *get_node(struct dm_table *t, 58 unsigned int l, unsigned int n) 59 { 60 return t->index[l] + (n * KEYS_PER_NODE); 61 } 62 63 /* 64 * Return the highest key that you could lookup from the n'th 65 * node on level l of the btree. 66 */ 67 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) 68 { 69 for (; l < t->depth - 1; l++) 70 n = get_child(n, CHILDREN_PER_NODE - 1); 71 72 if (n >= t->counts[l]) 73 return (sector_t) - 1; 74 75 return get_node(t, l, n)[KEYS_PER_NODE - 1]; 76 } 77 78 /* 79 * Fills in a level of the btree based on the highs of the level 80 * below it. 81 */ 82 static int setup_btree_index(unsigned int l, struct dm_table *t) 83 { 84 unsigned int n, k; 85 sector_t *node; 86 87 for (n = 0U; n < t->counts[l]; n++) { 88 node = get_node(t, l, n); 89 90 for (k = 0U; k < KEYS_PER_NODE; k++) 91 node[k] = high(t, l + 1, get_child(n, k)); 92 } 93 94 return 0; 95 } 96 97 void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) 98 { 99 unsigned long size; 100 void *addr; 101 102 /* 103 * Check that we're not going to overflow. 104 */ 105 if (nmemb > (ULONG_MAX / elem_size)) 106 return NULL; 107 108 size = nmemb * elem_size; 109 addr = vzalloc(size); 110 111 return addr; 112 } 113 EXPORT_SYMBOL(dm_vcalloc); 114 115 /* 116 * highs, and targets are managed as dynamic arrays during a 117 * table load. 118 */ 119 static int alloc_targets(struct dm_table *t, unsigned int num) 120 { 121 sector_t *n_highs; 122 struct dm_target *n_targets; 123 124 /* 125 * Allocate both the target array and offset array at once. 126 */ 127 n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) + 128 sizeof(sector_t)); 129 if (!n_highs) 130 return -ENOMEM; 131 132 n_targets = (struct dm_target *) (n_highs + num); 133 134 memset(n_highs, -1, sizeof(*n_highs) * num); 135 vfree(t->highs); 136 137 t->num_allocated = num; 138 t->highs = n_highs; 139 t->targets = n_targets; 140 141 return 0; 142 } 143 144 int dm_table_create(struct dm_table **result, fmode_t mode, 145 unsigned num_targets, struct mapped_device *md) 146 { 147 struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); 148 149 if (!t) 150 return -ENOMEM; 151 152 INIT_LIST_HEAD(&t->devices); 153 154 if (!num_targets) 155 num_targets = KEYS_PER_NODE; 156 157 num_targets = dm_round_up(num_targets, KEYS_PER_NODE); 158 159 if (!num_targets) { 160 kfree(t); 161 return -ENOMEM; 162 } 163 164 if (alloc_targets(t, num_targets)) { 165 kfree(t); 166 return -ENOMEM; 167 } 168 169 t->type = DM_TYPE_NONE; 170 t->mode = mode; 171 t->md = md; 172 *result = t; 173 return 0; 174 } 175 176 static void free_devices(struct list_head *devices, struct mapped_device *md) 177 { 178 struct list_head *tmp, *next; 179 180 list_for_each_safe(tmp, next, devices) { 181 struct dm_dev_internal *dd = 182 list_entry(tmp, struct dm_dev_internal, list); 183 DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s", 184 dm_device_name(md), dd->dm_dev->name); 185 dm_put_table_device(md, dd->dm_dev); 186 kfree(dd); 187 } 188 } 189 190 static void dm_table_destroy_keyslot_manager(struct dm_table *t); 191 192 void dm_table_destroy(struct dm_table *t) 193 { 194 unsigned int i; 195 196 if (!t) 197 return; 198 199 /* free the indexes */ 200 if (t->depth >= 2) 201 vfree(t->index[t->depth - 2]); 202 203 /* free the targets */ 204 for (i = 0; i < t->num_targets; i++) { 205 struct dm_target *tgt = t->targets + i; 206 207 if (tgt->type->dtr) 208 tgt->type->dtr(tgt); 209 210 dm_put_target_type(tgt->type); 211 } 212 213 vfree(t->highs); 214 215 /* free the device list */ 216 free_devices(&t->devices, t->md); 217 218 dm_free_md_mempools(t->mempools); 219 220 dm_table_destroy_keyslot_manager(t); 221 222 kfree(t); 223 } 224 225 /* 226 * See if we've already got a device in the list. 227 */ 228 static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) 229 { 230 struct dm_dev_internal *dd; 231 232 list_for_each_entry (dd, l, list) 233 if (dd->dm_dev->bdev->bd_dev == dev) 234 return dd; 235 236 return NULL; 237 } 238 239 /* 240 * If possible, this checks an area of a destination device is invalid. 241 */ 242 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, 243 sector_t start, sector_t len, void *data) 244 { 245 struct queue_limits *limits = data; 246 struct block_device *bdev = dev->bdev; 247 sector_t dev_size = 248 i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; 249 unsigned short logical_block_size_sectors = 250 limits->logical_block_size >> SECTOR_SHIFT; 251 char b[BDEVNAME_SIZE]; 252 253 if (!dev_size) 254 return 0; 255 256 if ((start >= dev_size) || (start + len > dev_size)) { 257 DMWARN("%s: %s too small for target: " 258 "start=%llu, len=%llu, dev_size=%llu", 259 dm_device_name(ti->table->md), bdevname(bdev, b), 260 (unsigned long long)start, 261 (unsigned long long)len, 262 (unsigned long long)dev_size); 263 return 1; 264 } 265 266 /* 267 * If the target is mapped to zoned block device(s), check 268 * that the zones are not partially mapped. 269 */ 270 if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) { 271 unsigned int zone_sectors = bdev_zone_sectors(bdev); 272 273 if (start & (zone_sectors - 1)) { 274 DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s", 275 dm_device_name(ti->table->md), 276 (unsigned long long)start, 277 zone_sectors, bdevname(bdev, b)); 278 return 1; 279 } 280 281 /* 282 * Note: The last zone of a zoned block device may be smaller 283 * than other zones. So for a target mapping the end of a 284 * zoned block device with such a zone, len would not be zone 285 * aligned. We do not allow such last smaller zone to be part 286 * of the mapping here to ensure that mappings with multiple 287 * devices do not end up with a smaller zone in the middle of 288 * the sector range. 289 */ 290 if (len & (zone_sectors - 1)) { 291 DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s", 292 dm_device_name(ti->table->md), 293 (unsigned long long)len, 294 zone_sectors, bdevname(bdev, b)); 295 return 1; 296 } 297 } 298 299 if (logical_block_size_sectors <= 1) 300 return 0; 301 302 if (start & (logical_block_size_sectors - 1)) { 303 DMWARN("%s: start=%llu not aligned to h/w " 304 "logical block size %u of %s", 305 dm_device_name(ti->table->md), 306 (unsigned long long)start, 307 limits->logical_block_size, bdevname(bdev, b)); 308 return 1; 309 } 310 311 if (len & (logical_block_size_sectors - 1)) { 312 DMWARN("%s: len=%llu not aligned to h/w " 313 "logical block size %u of %s", 314 dm_device_name(ti->table->md), 315 (unsigned long long)len, 316 limits->logical_block_size, bdevname(bdev, b)); 317 return 1; 318 } 319 320 return 0; 321 } 322 323 /* 324 * This upgrades the mode on an already open dm_dev, being 325 * careful to leave things as they were if we fail to reopen the 326 * device and not to touch the existing bdev field in case 327 * it is accessed concurrently. 328 */ 329 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, 330 struct mapped_device *md) 331 { 332 int r; 333 struct dm_dev *old_dev, *new_dev; 334 335 old_dev = dd->dm_dev; 336 337 r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, 338 dd->dm_dev->mode | new_mode, &new_dev); 339 if (r) 340 return r; 341 342 dd->dm_dev = new_dev; 343 dm_put_table_device(md, old_dev); 344 345 return 0; 346 } 347 348 /* 349 * Convert the path to a device 350 */ 351 dev_t dm_get_dev_t(const char *path) 352 { 353 dev_t dev; 354 355 if (lookup_bdev(path, &dev)) 356 dev = name_to_dev_t(path); 357 return dev; 358 } 359 EXPORT_SYMBOL_GPL(dm_get_dev_t); 360 361 /* 362 * Add a device to the list, or just increment the usage count if 363 * it's already present. 364 */ 365 int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, 366 struct dm_dev **result) 367 { 368 int r; 369 dev_t dev; 370 unsigned int major, minor; 371 char dummy; 372 struct dm_dev_internal *dd; 373 struct dm_table *t = ti->table; 374 375 BUG_ON(!t); 376 377 if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { 378 /* Extract the major/minor numbers */ 379 dev = MKDEV(major, minor); 380 if (MAJOR(dev) != major || MINOR(dev) != minor) 381 return -EOVERFLOW; 382 } else { 383 dev = dm_get_dev_t(path); 384 if (!dev) 385 return -ENODEV; 386 } 387 388 dd = find_device(&t->devices, dev); 389 if (!dd) { 390 dd = kmalloc(sizeof(*dd), GFP_KERNEL); 391 if (!dd) 392 return -ENOMEM; 393 394 if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) { 395 kfree(dd); 396 return r; 397 } 398 399 refcount_set(&dd->count, 1); 400 list_add(&dd->list, &t->devices); 401 goto out; 402 403 } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { 404 r = upgrade_mode(dd, mode, t->md); 405 if (r) 406 return r; 407 } 408 refcount_inc(&dd->count); 409 out: 410 *result = dd->dm_dev; 411 return 0; 412 } 413 EXPORT_SYMBOL(dm_get_device); 414 415 static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 416 sector_t start, sector_t len, void *data) 417 { 418 struct queue_limits *limits = data; 419 struct block_device *bdev = dev->bdev; 420 struct request_queue *q = bdev_get_queue(bdev); 421 char b[BDEVNAME_SIZE]; 422 423 if (unlikely(!q)) { 424 DMWARN("%s: Cannot set limits for nonexistent device %s", 425 dm_device_name(ti->table->md), bdevname(bdev, b)); 426 return 0; 427 } 428 429 if (blk_stack_limits(limits, &q->limits, 430 get_start_sect(bdev) + start) < 0) 431 DMWARN("%s: adding target device %s caused an alignment inconsistency: " 432 "physical_block_size=%u, logical_block_size=%u, " 433 "alignment_offset=%u, start=%llu", 434 dm_device_name(ti->table->md), bdevname(bdev, b), 435 q->limits.physical_block_size, 436 q->limits.logical_block_size, 437 q->limits.alignment_offset, 438 (unsigned long long) start << SECTOR_SHIFT); 439 return 0; 440 } 441 442 /* 443 * Decrement a device's use count and remove it if necessary. 444 */ 445 void dm_put_device(struct dm_target *ti, struct dm_dev *d) 446 { 447 int found = 0; 448 struct list_head *devices = &ti->table->devices; 449 struct dm_dev_internal *dd; 450 451 list_for_each_entry(dd, devices, list) { 452 if (dd->dm_dev == d) { 453 found = 1; 454 break; 455 } 456 } 457 if (!found) { 458 DMWARN("%s: device %s not in table devices list", 459 dm_device_name(ti->table->md), d->name); 460 return; 461 } 462 if (refcount_dec_and_test(&dd->count)) { 463 dm_put_table_device(ti->table->md, d); 464 list_del(&dd->list); 465 kfree(dd); 466 } 467 } 468 EXPORT_SYMBOL(dm_put_device); 469 470 /* 471 * Checks to see if the target joins onto the end of the table. 472 */ 473 static int adjoin(struct dm_table *table, struct dm_target *ti) 474 { 475 struct dm_target *prev; 476 477 if (!table->num_targets) 478 return !ti->begin; 479 480 prev = &table->targets[table->num_targets - 1]; 481 return (ti->begin == (prev->begin + prev->len)); 482 } 483 484 /* 485 * Used to dynamically allocate the arg array. 486 * 487 * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must 488 * process messages even if some device is suspended. These messages have a 489 * small fixed number of arguments. 490 * 491 * On the other hand, dm-switch needs to process bulk data using messages and 492 * excessive use of GFP_NOIO could cause trouble. 493 */ 494 static char **realloc_argv(unsigned *size, char **old_argv) 495 { 496 char **argv; 497 unsigned new_size; 498 gfp_t gfp; 499 500 if (*size) { 501 new_size = *size * 2; 502 gfp = GFP_KERNEL; 503 } else { 504 new_size = 8; 505 gfp = GFP_NOIO; 506 } 507 argv = kmalloc_array(new_size, sizeof(*argv), gfp); 508 if (argv && old_argv) { 509 memcpy(argv, old_argv, *size * sizeof(*argv)); 510 *size = new_size; 511 } 512 513 kfree(old_argv); 514 return argv; 515 } 516 517 /* 518 * Destructively splits up the argument list to pass to ctr. 519 */ 520 int dm_split_args(int *argc, char ***argvp, char *input) 521 { 522 char *start, *end = input, *out, **argv = NULL; 523 unsigned array_size = 0; 524 525 *argc = 0; 526 527 if (!input) { 528 *argvp = NULL; 529 return 0; 530 } 531 532 argv = realloc_argv(&array_size, argv); 533 if (!argv) 534 return -ENOMEM; 535 536 while (1) { 537 /* Skip whitespace */ 538 start = skip_spaces(end); 539 540 if (!*start) 541 break; /* success, we hit the end */ 542 543 /* 'out' is used to remove any back-quotes */ 544 end = out = start; 545 while (*end) { 546 /* Everything apart from '\0' can be quoted */ 547 if (*end == '\\' && *(end + 1)) { 548 *out++ = *(end + 1); 549 end += 2; 550 continue; 551 } 552 553 if (isspace(*end)) 554 break; /* end of token */ 555 556 *out++ = *end++; 557 } 558 559 /* have we already filled the array ? */ 560 if ((*argc + 1) > array_size) { 561 argv = realloc_argv(&array_size, argv); 562 if (!argv) 563 return -ENOMEM; 564 } 565 566 /* we know this is whitespace */ 567 if (*end) 568 end++; 569 570 /* terminate the string and put it in the array */ 571 *out = '\0'; 572 argv[*argc] = start; 573 (*argc)++; 574 } 575 576 *argvp = argv; 577 return 0; 578 } 579 580 /* 581 * Impose necessary and sufficient conditions on a devices's table such 582 * that any incoming bio which respects its logical_block_size can be 583 * processed successfully. If it falls across the boundary between 584 * two or more targets, the size of each piece it gets split into must 585 * be compatible with the logical_block_size of the target processing it. 586 */ 587 static int validate_hardware_logical_block_alignment(struct dm_table *table, 588 struct queue_limits *limits) 589 { 590 /* 591 * This function uses arithmetic modulo the logical_block_size 592 * (in units of 512-byte sectors). 593 */ 594 unsigned short device_logical_block_size_sects = 595 limits->logical_block_size >> SECTOR_SHIFT; 596 597 /* 598 * Offset of the start of the next table entry, mod logical_block_size. 599 */ 600 unsigned short next_target_start = 0; 601 602 /* 603 * Given an aligned bio that extends beyond the end of a 604 * target, how many sectors must the next target handle? 605 */ 606 unsigned short remaining = 0; 607 608 struct dm_target *ti; 609 struct queue_limits ti_limits; 610 unsigned i; 611 612 /* 613 * Check each entry in the table in turn. 614 */ 615 for (i = 0; i < dm_table_get_num_targets(table); i++) { 616 ti = dm_table_get_target(table, i); 617 618 blk_set_stacking_limits(&ti_limits); 619 620 /* combine all target devices' limits */ 621 if (ti->type->iterate_devices) 622 ti->type->iterate_devices(ti, dm_set_device_limits, 623 &ti_limits); 624 625 /* 626 * If the remaining sectors fall entirely within this 627 * table entry are they compatible with its logical_block_size? 628 */ 629 if (remaining < ti->len && 630 remaining & ((ti_limits.logical_block_size >> 631 SECTOR_SHIFT) - 1)) 632 break; /* Error */ 633 634 next_target_start = 635 (unsigned short) ((next_target_start + ti->len) & 636 (device_logical_block_size_sects - 1)); 637 remaining = next_target_start ? 638 device_logical_block_size_sects - next_target_start : 0; 639 } 640 641 if (remaining) { 642 DMWARN("%s: table line %u (start sect %llu len %llu) " 643 "not aligned to h/w logical block size %u", 644 dm_device_name(table->md), i, 645 (unsigned long long) ti->begin, 646 (unsigned long long) ti->len, 647 limits->logical_block_size); 648 return -EINVAL; 649 } 650 651 return 0; 652 } 653 654 int dm_table_add_target(struct dm_table *t, const char *type, 655 sector_t start, sector_t len, char *params) 656 { 657 int r = -EINVAL, argc; 658 char **argv; 659 struct dm_target *tgt; 660 661 if (t->singleton) { 662 DMERR("%s: target type %s must appear alone in table", 663 dm_device_name(t->md), t->targets->type->name); 664 return -EINVAL; 665 } 666 667 BUG_ON(t->num_targets >= t->num_allocated); 668 669 tgt = t->targets + t->num_targets; 670 memset(tgt, 0, sizeof(*tgt)); 671 672 if (!len) { 673 DMERR("%s: zero-length target", dm_device_name(t->md)); 674 return -EINVAL; 675 } 676 677 tgt->type = dm_get_target_type(type); 678 if (!tgt->type) { 679 DMERR("%s: %s: unknown target type", dm_device_name(t->md), type); 680 return -EINVAL; 681 } 682 683 if (dm_target_needs_singleton(tgt->type)) { 684 if (t->num_targets) { 685 tgt->error = "singleton target type must appear alone in table"; 686 goto bad; 687 } 688 t->singleton = true; 689 } 690 691 if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) { 692 tgt->error = "target type may not be included in a read-only table"; 693 goto bad; 694 } 695 696 if (t->immutable_target_type) { 697 if (t->immutable_target_type != tgt->type) { 698 tgt->error = "immutable target type cannot be mixed with other target types"; 699 goto bad; 700 } 701 } else if (dm_target_is_immutable(tgt->type)) { 702 if (t->num_targets) { 703 tgt->error = "immutable target type cannot be mixed with other target types"; 704 goto bad; 705 } 706 t->immutable_target_type = tgt->type; 707 } 708 709 if (dm_target_has_integrity(tgt->type)) 710 t->integrity_added = 1; 711 712 tgt->table = t; 713 tgt->begin = start; 714 tgt->len = len; 715 tgt->error = "Unknown error"; 716 717 /* 718 * Does this target adjoin the previous one ? 719 */ 720 if (!adjoin(t, tgt)) { 721 tgt->error = "Gap in table"; 722 goto bad; 723 } 724 725 r = dm_split_args(&argc, &argv, params); 726 if (r) { 727 tgt->error = "couldn't split parameters (insufficient memory)"; 728 goto bad; 729 } 730 731 r = tgt->type->ctr(tgt, argc, argv); 732 kfree(argv); 733 if (r) 734 goto bad; 735 736 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 737 738 if (!tgt->num_discard_bios && tgt->discards_supported) 739 DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", 740 dm_device_name(t->md), type); 741 742 return 0; 743 744 bad: 745 DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error); 746 dm_put_target_type(tgt->type); 747 return r; 748 } 749 750 /* 751 * Target argument parsing helpers. 752 */ 753 static int validate_next_arg(const struct dm_arg *arg, 754 struct dm_arg_set *arg_set, 755 unsigned *value, char **error, unsigned grouped) 756 { 757 const char *arg_str = dm_shift_arg(arg_set); 758 char dummy; 759 760 if (!arg_str || 761 (sscanf(arg_str, "%u%c", value, &dummy) != 1) || 762 (*value < arg->min) || 763 (*value > arg->max) || 764 (grouped && arg_set->argc < *value)) { 765 *error = arg->error; 766 return -EINVAL; 767 } 768 769 return 0; 770 } 771 772 int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, 773 unsigned *value, char **error) 774 { 775 return validate_next_arg(arg, arg_set, value, error, 0); 776 } 777 EXPORT_SYMBOL(dm_read_arg); 778 779 int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, 780 unsigned *value, char **error) 781 { 782 return validate_next_arg(arg, arg_set, value, error, 1); 783 } 784 EXPORT_SYMBOL(dm_read_arg_group); 785 786 const char *dm_shift_arg(struct dm_arg_set *as) 787 { 788 char *r; 789 790 if (as->argc) { 791 as->argc--; 792 r = *as->argv; 793 as->argv++; 794 return r; 795 } 796 797 return NULL; 798 } 799 EXPORT_SYMBOL(dm_shift_arg); 800 801 void dm_consume_args(struct dm_arg_set *as, unsigned num_args) 802 { 803 BUG_ON(as->argc < num_args); 804 as->argc -= num_args; 805 as->argv += num_args; 806 } 807 EXPORT_SYMBOL(dm_consume_args); 808 809 static bool __table_type_bio_based(enum dm_queue_mode table_type) 810 { 811 return (table_type == DM_TYPE_BIO_BASED || 812 table_type == DM_TYPE_DAX_BIO_BASED); 813 } 814 815 static bool __table_type_request_based(enum dm_queue_mode table_type) 816 { 817 return table_type == DM_TYPE_REQUEST_BASED; 818 } 819 820 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) 821 { 822 t->type = type; 823 } 824 EXPORT_SYMBOL_GPL(dm_table_set_type); 825 826 /* validate the dax capability of the target device span */ 827 int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, 828 sector_t start, sector_t len, void *data) 829 { 830 int blocksize = *(int *) data, id; 831 bool rc; 832 833 id = dax_read_lock(); 834 rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); 835 dax_read_unlock(id); 836 837 return rc; 838 } 839 840 /* Check devices support synchronous DAX */ 841 static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev, 842 sector_t start, sector_t len, void *data) 843 { 844 return !dev->dax_dev || !dax_synchronous(dev->dax_dev); 845 } 846 847 bool dm_table_supports_dax(struct dm_table *t, 848 iterate_devices_callout_fn iterate_fn, int *blocksize) 849 { 850 struct dm_target *ti; 851 unsigned i; 852 853 /* Ensure that all targets support DAX. */ 854 for (i = 0; i < dm_table_get_num_targets(t); i++) { 855 ti = dm_table_get_target(t, i); 856 857 if (!ti->type->direct_access) 858 return false; 859 860 if (!ti->type->iterate_devices || 861 ti->type->iterate_devices(ti, iterate_fn, blocksize)) 862 return false; 863 } 864 865 return true; 866 } 867 868 static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, 869 sector_t start, sector_t len, void *data) 870 { 871 struct block_device *bdev = dev->bdev; 872 struct request_queue *q = bdev_get_queue(bdev); 873 874 /* request-based cannot stack on partitions! */ 875 if (bdev_is_partition(bdev)) 876 return false; 877 878 return queue_is_mq(q); 879 } 880 881 static int dm_table_determine_type(struct dm_table *t) 882 { 883 unsigned i; 884 unsigned bio_based = 0, request_based = 0, hybrid = 0; 885 struct dm_target *tgt; 886 struct list_head *devices = dm_table_get_devices(t); 887 enum dm_queue_mode live_md_type = dm_get_md_type(t->md); 888 int page_size = PAGE_SIZE; 889 890 if (t->type != DM_TYPE_NONE) { 891 /* target already set the table's type */ 892 if (t->type == DM_TYPE_BIO_BASED) { 893 /* possibly upgrade to a variant of bio-based */ 894 goto verify_bio_based; 895 } 896 BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); 897 goto verify_rq_based; 898 } 899 900 for (i = 0; i < t->num_targets; i++) { 901 tgt = t->targets + i; 902 if (dm_target_hybrid(tgt)) 903 hybrid = 1; 904 else if (dm_target_request_based(tgt)) 905 request_based = 1; 906 else 907 bio_based = 1; 908 909 if (bio_based && request_based) { 910 DMERR("Inconsistent table: different target types" 911 " can't be mixed up"); 912 return -EINVAL; 913 } 914 } 915 916 if (hybrid && !bio_based && !request_based) { 917 /* 918 * The targets can work either way. 919 * Determine the type from the live device. 920 * Default to bio-based if device is new. 921 */ 922 if (__table_type_request_based(live_md_type)) 923 request_based = 1; 924 else 925 bio_based = 1; 926 } 927 928 if (bio_based) { 929 verify_bio_based: 930 /* We must use this table as bio-based */ 931 t->type = DM_TYPE_BIO_BASED; 932 if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || 933 (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { 934 t->type = DM_TYPE_DAX_BIO_BASED; 935 } 936 return 0; 937 } 938 939 BUG_ON(!request_based); /* No targets in this table */ 940 941 t->type = DM_TYPE_REQUEST_BASED; 942 943 verify_rq_based: 944 /* 945 * Request-based dm supports only tables that have a single target now. 946 * To support multiple targets, request splitting support is needed, 947 * and that needs lots of changes in the block-layer. 948 * (e.g. request completion process for partial completion.) 949 */ 950 if (t->num_targets > 1) { 951 DMERR("request-based DM doesn't support multiple targets"); 952 return -EINVAL; 953 } 954 955 if (list_empty(devices)) { 956 int srcu_idx; 957 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); 958 959 /* inherit live table's type */ 960 if (live_table) 961 t->type = live_table->type; 962 dm_put_live_table(t->md, srcu_idx); 963 return 0; 964 } 965 966 tgt = dm_table_get_immutable_target(t); 967 if (!tgt) { 968 DMERR("table load rejected: immutable target is required"); 969 return -EINVAL; 970 } else if (tgt->max_io_len) { 971 DMERR("table load rejected: immutable target that splits IO is not supported"); 972 return -EINVAL; 973 } 974 975 /* Non-request-stackable devices can't be used for request-based dm */ 976 if (!tgt->type->iterate_devices || 977 !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) { 978 DMERR("table load rejected: including non-request-stackable devices"); 979 return -EINVAL; 980 } 981 982 return 0; 983 } 984 985 enum dm_queue_mode dm_table_get_type(struct dm_table *t) 986 { 987 return t->type; 988 } 989 990 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t) 991 { 992 return t->immutable_target_type; 993 } 994 995 struct dm_target *dm_table_get_immutable_target(struct dm_table *t) 996 { 997 /* Immutable target is implicitly a singleton */ 998 if (t->num_targets > 1 || 999 !dm_target_is_immutable(t->targets[0].type)) 1000 return NULL; 1001 1002 return t->targets; 1003 } 1004 1005 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t) 1006 { 1007 struct dm_target *ti; 1008 unsigned i; 1009 1010 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1011 ti = dm_table_get_target(t, i); 1012 if (dm_target_is_wildcard(ti->type)) 1013 return ti; 1014 } 1015 1016 return NULL; 1017 } 1018 1019 bool dm_table_bio_based(struct dm_table *t) 1020 { 1021 return __table_type_bio_based(dm_table_get_type(t)); 1022 } 1023 1024 bool dm_table_request_based(struct dm_table *t) 1025 { 1026 return __table_type_request_based(dm_table_get_type(t)); 1027 } 1028 1029 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) 1030 { 1031 enum dm_queue_mode type = dm_table_get_type(t); 1032 unsigned per_io_data_size = 0; 1033 unsigned min_pool_size = 0; 1034 struct dm_target *ti; 1035 unsigned i; 1036 1037 if (unlikely(type == DM_TYPE_NONE)) { 1038 DMWARN("no table type is set, can't allocate mempools"); 1039 return -EINVAL; 1040 } 1041 1042 if (__table_type_bio_based(type)) 1043 for (i = 0; i < t->num_targets; i++) { 1044 ti = t->targets + i; 1045 per_io_data_size = max(per_io_data_size, ti->per_io_data_size); 1046 min_pool_size = max(min_pool_size, ti->num_flush_bios); 1047 } 1048 1049 t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, 1050 per_io_data_size, min_pool_size); 1051 if (!t->mempools) 1052 return -ENOMEM; 1053 1054 return 0; 1055 } 1056 1057 void dm_table_free_md_mempools(struct dm_table *t) 1058 { 1059 dm_free_md_mempools(t->mempools); 1060 t->mempools = NULL; 1061 } 1062 1063 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t) 1064 { 1065 return t->mempools; 1066 } 1067 1068 static int setup_indexes(struct dm_table *t) 1069 { 1070 int i; 1071 unsigned int total = 0; 1072 sector_t *indexes; 1073 1074 /* allocate the space for *all* the indexes */ 1075 for (i = t->depth - 2; i >= 0; i--) { 1076 t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE); 1077 total += t->counts[i]; 1078 } 1079 1080 indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); 1081 if (!indexes) 1082 return -ENOMEM; 1083 1084 /* set up internal nodes, bottom-up */ 1085 for (i = t->depth - 2; i >= 0; i--) { 1086 t->index[i] = indexes; 1087 indexes += (KEYS_PER_NODE * t->counts[i]); 1088 setup_btree_index(i, t); 1089 } 1090 1091 return 0; 1092 } 1093 1094 /* 1095 * Builds the btree to index the map. 1096 */ 1097 static int dm_table_build_index(struct dm_table *t) 1098 { 1099 int r = 0; 1100 unsigned int leaf_nodes; 1101 1102 /* how many indexes will the btree have ? */ 1103 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 1104 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 1105 1106 /* leaf layer has already been set up */ 1107 t->counts[t->depth - 1] = leaf_nodes; 1108 t->index[t->depth - 1] = t->highs; 1109 1110 if (t->depth >= 2) 1111 r = setup_indexes(t); 1112 1113 return r; 1114 } 1115 1116 static bool integrity_profile_exists(struct gendisk *disk) 1117 { 1118 return !!blk_get_integrity(disk); 1119 } 1120 1121 /* 1122 * Get a disk whose integrity profile reflects the table's profile. 1123 * Returns NULL if integrity support was inconsistent or unavailable. 1124 */ 1125 static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t) 1126 { 1127 struct list_head *devices = dm_table_get_devices(t); 1128 struct dm_dev_internal *dd = NULL; 1129 struct gendisk *prev_disk = NULL, *template_disk = NULL; 1130 unsigned i; 1131 1132 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1133 struct dm_target *ti = dm_table_get_target(t, i); 1134 if (!dm_target_passes_integrity(ti->type)) 1135 goto no_integrity; 1136 } 1137 1138 list_for_each_entry(dd, devices, list) { 1139 template_disk = dd->dm_dev->bdev->bd_disk; 1140 if (!integrity_profile_exists(template_disk)) 1141 goto no_integrity; 1142 else if (prev_disk && 1143 blk_integrity_compare(prev_disk, template_disk) < 0) 1144 goto no_integrity; 1145 prev_disk = template_disk; 1146 } 1147 1148 return template_disk; 1149 1150 no_integrity: 1151 if (prev_disk) 1152 DMWARN("%s: integrity not set: %s and %s profile mismatch", 1153 dm_device_name(t->md), 1154 prev_disk->disk_name, 1155 template_disk->disk_name); 1156 return NULL; 1157 } 1158 1159 /* 1160 * Register the mapped device for blk_integrity support if the 1161 * underlying devices have an integrity profile. But all devices may 1162 * not have matching profiles (checking all devices isn't reliable 1163 * during table load because this table may use other DM device(s) which 1164 * must be resumed before they will have an initialized integity 1165 * profile). Consequently, stacked DM devices force a 2 stage integrity 1166 * profile validation: First pass during table load, final pass during 1167 * resume. 1168 */ 1169 static int dm_table_register_integrity(struct dm_table *t) 1170 { 1171 struct mapped_device *md = t->md; 1172 struct gendisk *template_disk = NULL; 1173 1174 /* If target handles integrity itself do not register it here. */ 1175 if (t->integrity_added) 1176 return 0; 1177 1178 template_disk = dm_table_get_integrity_disk(t); 1179 if (!template_disk) 1180 return 0; 1181 1182 if (!integrity_profile_exists(dm_disk(md))) { 1183 t->integrity_supported = true; 1184 /* 1185 * Register integrity profile during table load; we can do 1186 * this because the final profile must match during resume. 1187 */ 1188 blk_integrity_register(dm_disk(md), 1189 blk_get_integrity(template_disk)); 1190 return 0; 1191 } 1192 1193 /* 1194 * If DM device already has an initialized integrity 1195 * profile the new profile should not conflict. 1196 */ 1197 if (blk_integrity_compare(dm_disk(md), template_disk) < 0) { 1198 DMWARN("%s: conflict with existing integrity profile: " 1199 "%s profile mismatch", 1200 dm_device_name(t->md), 1201 template_disk->disk_name); 1202 return 1; 1203 } 1204 1205 /* Preserve existing integrity profile */ 1206 t->integrity_supported = true; 1207 return 0; 1208 } 1209 1210 #ifdef CONFIG_BLK_INLINE_ENCRYPTION 1211 1212 struct dm_keyslot_manager { 1213 struct blk_keyslot_manager ksm; 1214 struct mapped_device *md; 1215 }; 1216 1217 struct dm_keyslot_evict_args { 1218 const struct blk_crypto_key *key; 1219 int err; 1220 }; 1221 1222 static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, 1223 sector_t start, sector_t len, void *data) 1224 { 1225 struct dm_keyslot_evict_args *args = data; 1226 int err; 1227 1228 err = blk_crypto_evict_key(bdev_get_queue(dev->bdev), args->key); 1229 if (!args->err) 1230 args->err = err; 1231 /* Always try to evict the key from all devices. */ 1232 return 0; 1233 } 1234 1235 /* 1236 * When an inline encryption key is evicted from a device-mapper device, evict 1237 * it from all the underlying devices. 1238 */ 1239 static int dm_keyslot_evict(struct blk_keyslot_manager *ksm, 1240 const struct blk_crypto_key *key, unsigned int slot) 1241 { 1242 struct dm_keyslot_manager *dksm = container_of(ksm, 1243 struct dm_keyslot_manager, 1244 ksm); 1245 struct mapped_device *md = dksm->md; 1246 struct dm_keyslot_evict_args args = { key }; 1247 struct dm_table *t; 1248 int srcu_idx; 1249 int i; 1250 struct dm_target *ti; 1251 1252 t = dm_get_live_table(md, &srcu_idx); 1253 if (!t) 1254 return 0; 1255 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1256 ti = dm_table_get_target(t, i); 1257 if (!ti->type->iterate_devices) 1258 continue; 1259 ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args); 1260 } 1261 dm_put_live_table(md, srcu_idx); 1262 return args.err; 1263 } 1264 1265 static struct blk_ksm_ll_ops dm_ksm_ll_ops = { 1266 .keyslot_evict = dm_keyslot_evict, 1267 }; 1268 1269 static int device_intersect_crypto_modes(struct dm_target *ti, 1270 struct dm_dev *dev, sector_t start, 1271 sector_t len, void *data) 1272 { 1273 struct blk_keyslot_manager *parent = data; 1274 struct blk_keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm; 1275 1276 blk_ksm_intersect_modes(parent, child); 1277 return 0; 1278 } 1279 1280 void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm) 1281 { 1282 struct dm_keyslot_manager *dksm = container_of(ksm, 1283 struct dm_keyslot_manager, 1284 ksm); 1285 1286 if (!ksm) 1287 return; 1288 1289 blk_ksm_destroy(ksm); 1290 kfree(dksm); 1291 } 1292 1293 static void dm_table_destroy_keyslot_manager(struct dm_table *t) 1294 { 1295 dm_destroy_keyslot_manager(t->ksm); 1296 t->ksm = NULL; 1297 } 1298 1299 /* 1300 * Constructs and initializes t->ksm with a keyslot manager that 1301 * represents the common set of crypto capabilities of the devices 1302 * described by the dm_table. However, if the constructed keyslot 1303 * manager does not support a superset of the crypto capabilities 1304 * supported by the current keyslot manager of the mapped_device, 1305 * it returns an error instead, since we don't support restricting 1306 * crypto capabilities on table changes. Finally, if the constructed 1307 * keyslot manager doesn't actually support any crypto modes at all, 1308 * it just returns NULL. 1309 */ 1310 static int dm_table_construct_keyslot_manager(struct dm_table *t) 1311 { 1312 struct dm_keyslot_manager *dksm; 1313 struct blk_keyslot_manager *ksm; 1314 struct dm_target *ti; 1315 unsigned int i; 1316 bool ksm_is_empty = true; 1317 1318 dksm = kmalloc(sizeof(*dksm), GFP_KERNEL); 1319 if (!dksm) 1320 return -ENOMEM; 1321 dksm->md = t->md; 1322 1323 ksm = &dksm->ksm; 1324 blk_ksm_init_passthrough(ksm); 1325 ksm->ksm_ll_ops = dm_ksm_ll_ops; 1326 ksm->max_dun_bytes_supported = UINT_MAX; 1327 memset(ksm->crypto_modes_supported, 0xFF, 1328 sizeof(ksm->crypto_modes_supported)); 1329 1330 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1331 ti = dm_table_get_target(t, i); 1332 1333 if (!dm_target_passes_crypto(ti->type)) { 1334 blk_ksm_intersect_modes(ksm, NULL); 1335 break; 1336 } 1337 if (!ti->type->iterate_devices) 1338 continue; 1339 ti->type->iterate_devices(ti, device_intersect_crypto_modes, 1340 ksm); 1341 } 1342 1343 if (t->md->queue && !blk_ksm_is_superset(ksm, t->md->queue->ksm)) { 1344 DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!"); 1345 dm_destroy_keyslot_manager(ksm); 1346 return -EINVAL; 1347 } 1348 1349 /* 1350 * If the new KSM doesn't actually support any crypto modes, we may as 1351 * well represent it with a NULL ksm. 1352 */ 1353 ksm_is_empty = true; 1354 for (i = 0; i < ARRAY_SIZE(ksm->crypto_modes_supported); i++) { 1355 if (ksm->crypto_modes_supported[i]) { 1356 ksm_is_empty = false; 1357 break; 1358 } 1359 } 1360 1361 if (ksm_is_empty) { 1362 dm_destroy_keyslot_manager(ksm); 1363 ksm = NULL; 1364 } 1365 1366 /* 1367 * t->ksm is only set temporarily while the table is being set 1368 * up, and it gets set to NULL after the capabilities have 1369 * been transferred to the request_queue. 1370 */ 1371 t->ksm = ksm; 1372 1373 return 0; 1374 } 1375 1376 static void dm_update_keyslot_manager(struct request_queue *q, 1377 struct dm_table *t) 1378 { 1379 if (!t->ksm) 1380 return; 1381 1382 /* Make the ksm less restrictive */ 1383 if (!q->ksm) { 1384 blk_ksm_register(t->ksm, q); 1385 } else { 1386 blk_ksm_update_capabilities(q->ksm, t->ksm); 1387 dm_destroy_keyslot_manager(t->ksm); 1388 } 1389 t->ksm = NULL; 1390 } 1391 1392 #else /* CONFIG_BLK_INLINE_ENCRYPTION */ 1393 1394 static int dm_table_construct_keyslot_manager(struct dm_table *t) 1395 { 1396 return 0; 1397 } 1398 1399 void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm) 1400 { 1401 } 1402 1403 static void dm_table_destroy_keyslot_manager(struct dm_table *t) 1404 { 1405 } 1406 1407 static void dm_update_keyslot_manager(struct request_queue *q, 1408 struct dm_table *t) 1409 { 1410 } 1411 1412 #endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ 1413 1414 /* 1415 * Prepares the table for use by building the indices, 1416 * setting the type, and allocating mempools. 1417 */ 1418 int dm_table_complete(struct dm_table *t) 1419 { 1420 int r; 1421 1422 r = dm_table_determine_type(t); 1423 if (r) { 1424 DMERR("unable to determine table type"); 1425 return r; 1426 } 1427 1428 r = dm_table_build_index(t); 1429 if (r) { 1430 DMERR("unable to build btrees"); 1431 return r; 1432 } 1433 1434 r = dm_table_register_integrity(t); 1435 if (r) { 1436 DMERR("could not register integrity profile."); 1437 return r; 1438 } 1439 1440 r = dm_table_construct_keyslot_manager(t); 1441 if (r) { 1442 DMERR("could not construct keyslot manager."); 1443 return r; 1444 } 1445 1446 r = dm_table_alloc_md_mempools(t, t->md); 1447 if (r) 1448 DMERR("unable to allocate mempools"); 1449 1450 return r; 1451 } 1452 1453 static DEFINE_MUTEX(_event_lock); 1454 void dm_table_event_callback(struct dm_table *t, 1455 void (*fn)(void *), void *context) 1456 { 1457 mutex_lock(&_event_lock); 1458 t->event_fn = fn; 1459 t->event_context = context; 1460 mutex_unlock(&_event_lock); 1461 } 1462 1463 void dm_table_event(struct dm_table *t) 1464 { 1465 mutex_lock(&_event_lock); 1466 if (t->event_fn) 1467 t->event_fn(t->event_context); 1468 mutex_unlock(&_event_lock); 1469 } 1470 EXPORT_SYMBOL(dm_table_event); 1471 1472 inline sector_t dm_table_get_size(struct dm_table *t) 1473 { 1474 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; 1475 } 1476 EXPORT_SYMBOL(dm_table_get_size); 1477 1478 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) 1479 { 1480 if (index >= t->num_targets) 1481 return NULL; 1482 1483 return t->targets + index; 1484 } 1485 1486 /* 1487 * Search the btree for the correct target. 1488 * 1489 * Caller should check returned pointer for NULL 1490 * to trap I/O beyond end of device. 1491 */ 1492 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) 1493 { 1494 unsigned int l, n = 0, k = 0; 1495 sector_t *node; 1496 1497 if (unlikely(sector >= dm_table_get_size(t))) 1498 return NULL; 1499 1500 for (l = 0; l < t->depth; l++) { 1501 n = get_child(n, k); 1502 node = get_node(t, l, n); 1503 1504 for (k = 0; k < KEYS_PER_NODE; k++) 1505 if (node[k] >= sector) 1506 break; 1507 } 1508 1509 return &t->targets[(KEYS_PER_NODE * n) + k]; 1510 } 1511 1512 /* 1513 * type->iterate_devices() should be called when the sanity check needs to 1514 * iterate and check all underlying data devices. iterate_devices() will 1515 * iterate all underlying data devices until it encounters a non-zero return 1516 * code, returned by whether the input iterate_devices_callout_fn, or 1517 * iterate_devices() itself internally. 1518 * 1519 * For some target type (e.g. dm-stripe), one call of iterate_devices() may 1520 * iterate multiple underlying devices internally, in which case a non-zero 1521 * return code returned by iterate_devices_callout_fn will stop the iteration 1522 * in advance. 1523 * 1524 * Cases requiring _any_ underlying device supporting some kind of attribute, 1525 * should use the iteration structure like dm_table_any_dev_attr(), or call 1526 * it directly. @func should handle semantics of positive examples, e.g. 1527 * capable of something. 1528 * 1529 * Cases requiring _all_ underlying devices supporting some kind of attribute, 1530 * should use the iteration structure like dm_table_supports_nowait() or 1531 * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that 1532 * uses an @anti_func that handle semantics of counter examples, e.g. not 1533 * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data); 1534 */ 1535 static bool dm_table_any_dev_attr(struct dm_table *t, 1536 iterate_devices_callout_fn func, void *data) 1537 { 1538 struct dm_target *ti; 1539 unsigned int i; 1540 1541 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1542 ti = dm_table_get_target(t, i); 1543 1544 if (ti->type->iterate_devices && 1545 ti->type->iterate_devices(ti, func, data)) 1546 return true; 1547 } 1548 1549 return false; 1550 } 1551 1552 static int count_device(struct dm_target *ti, struct dm_dev *dev, 1553 sector_t start, sector_t len, void *data) 1554 { 1555 unsigned *num_devices = data; 1556 1557 (*num_devices)++; 1558 1559 return 0; 1560 } 1561 1562 /* 1563 * Check whether a table has no data devices attached using each 1564 * target's iterate_devices method. 1565 * Returns false if the result is unknown because a target doesn't 1566 * support iterate_devices. 1567 */ 1568 bool dm_table_has_no_data_devices(struct dm_table *table) 1569 { 1570 struct dm_target *ti; 1571 unsigned i, num_devices; 1572 1573 for (i = 0; i < dm_table_get_num_targets(table); i++) { 1574 ti = dm_table_get_target(table, i); 1575 1576 if (!ti->type->iterate_devices) 1577 return false; 1578 1579 num_devices = 0; 1580 ti->type->iterate_devices(ti, count_device, &num_devices); 1581 if (num_devices) 1582 return false; 1583 } 1584 1585 return true; 1586 } 1587 1588 static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, 1589 sector_t start, sector_t len, void *data) 1590 { 1591 struct request_queue *q = bdev_get_queue(dev->bdev); 1592 enum blk_zoned_model *zoned_model = data; 1593 1594 return blk_queue_zoned_model(q) != *zoned_model; 1595 } 1596 1597 static bool dm_table_supports_zoned_model(struct dm_table *t, 1598 enum blk_zoned_model zoned_model) 1599 { 1600 struct dm_target *ti; 1601 unsigned i; 1602 1603 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1604 ti = dm_table_get_target(t, i); 1605 1606 if (zoned_model == BLK_ZONED_HM && 1607 !dm_target_supports_zoned_hm(ti->type)) 1608 return false; 1609 1610 if (!ti->type->iterate_devices || 1611 ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model)) 1612 return false; 1613 } 1614 1615 return true; 1616 } 1617 1618 static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, 1619 sector_t start, sector_t len, void *data) 1620 { 1621 struct request_queue *q = bdev_get_queue(dev->bdev); 1622 unsigned int *zone_sectors = data; 1623 1624 return blk_queue_zone_sectors(q) != *zone_sectors; 1625 } 1626 1627 static int validate_hardware_zoned_model(struct dm_table *table, 1628 enum blk_zoned_model zoned_model, 1629 unsigned int zone_sectors) 1630 { 1631 if (zoned_model == BLK_ZONED_NONE) 1632 return 0; 1633 1634 if (!dm_table_supports_zoned_model(table, zoned_model)) { 1635 DMERR("%s: zoned model is not consistent across all devices", 1636 dm_device_name(table->md)); 1637 return -EINVAL; 1638 } 1639 1640 /* Check zone size validity and compatibility */ 1641 if (!zone_sectors || !is_power_of_2(zone_sectors)) 1642 return -EINVAL; 1643 1644 if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) { 1645 DMERR("%s: zone sectors is not consistent across all devices", 1646 dm_device_name(table->md)); 1647 return -EINVAL; 1648 } 1649 1650 return 0; 1651 } 1652 1653 /* 1654 * Establish the new table's queue_limits and validate them. 1655 */ 1656 int dm_calculate_queue_limits(struct dm_table *table, 1657 struct queue_limits *limits) 1658 { 1659 struct dm_target *ti; 1660 struct queue_limits ti_limits; 1661 unsigned i; 1662 enum blk_zoned_model zoned_model = BLK_ZONED_NONE; 1663 unsigned int zone_sectors = 0; 1664 1665 blk_set_stacking_limits(limits); 1666 1667 for (i = 0; i < dm_table_get_num_targets(table); i++) { 1668 blk_set_stacking_limits(&ti_limits); 1669 1670 ti = dm_table_get_target(table, i); 1671 1672 if (!ti->type->iterate_devices) 1673 goto combine_limits; 1674 1675 /* 1676 * Combine queue limits of all the devices this target uses. 1677 */ 1678 ti->type->iterate_devices(ti, dm_set_device_limits, 1679 &ti_limits); 1680 1681 if (zoned_model == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { 1682 /* 1683 * After stacking all limits, validate all devices 1684 * in table support this zoned model and zone sectors. 1685 */ 1686 zoned_model = ti_limits.zoned; 1687 zone_sectors = ti_limits.chunk_sectors; 1688 } 1689 1690 /* Set I/O hints portion of queue limits */ 1691 if (ti->type->io_hints) 1692 ti->type->io_hints(ti, &ti_limits); 1693 1694 /* 1695 * Check each device area is consistent with the target's 1696 * overall queue limits. 1697 */ 1698 if (ti->type->iterate_devices(ti, device_area_is_invalid, 1699 &ti_limits)) 1700 return -EINVAL; 1701 1702 combine_limits: 1703 /* 1704 * Merge this target's queue limits into the overall limits 1705 * for the table. 1706 */ 1707 if (blk_stack_limits(limits, &ti_limits, 0) < 0) 1708 DMWARN("%s: adding target device " 1709 "(start sect %llu len %llu) " 1710 "caused an alignment inconsistency", 1711 dm_device_name(table->md), 1712 (unsigned long long) ti->begin, 1713 (unsigned long long) ti->len); 1714 } 1715 1716 /* 1717 * Verify that the zoned model and zone sectors, as determined before 1718 * any .io_hints override, are the same across all devices in the table. 1719 * - this is especially relevant if .io_hints is emulating a disk-managed 1720 * zoned model (aka BLK_ZONED_NONE) on host-managed zoned block devices. 1721 * BUT... 1722 */ 1723 if (limits->zoned != BLK_ZONED_NONE) { 1724 /* 1725 * ...IF the above limits stacking determined a zoned model 1726 * validate that all of the table's devices conform to it. 1727 */ 1728 zoned_model = limits->zoned; 1729 zone_sectors = limits->chunk_sectors; 1730 } 1731 if (validate_hardware_zoned_model(table, zoned_model, zone_sectors)) 1732 return -EINVAL; 1733 1734 return validate_hardware_logical_block_alignment(table, limits); 1735 } 1736 1737 /* 1738 * Verify that all devices have an integrity profile that matches the 1739 * DM device's registered integrity profile. If the profiles don't 1740 * match then unregister the DM device's integrity profile. 1741 */ 1742 static void dm_table_verify_integrity(struct dm_table *t) 1743 { 1744 struct gendisk *template_disk = NULL; 1745 1746 if (t->integrity_added) 1747 return; 1748 1749 if (t->integrity_supported) { 1750 /* 1751 * Verify that the original integrity profile 1752 * matches all the devices in this table. 1753 */ 1754 template_disk = dm_table_get_integrity_disk(t); 1755 if (template_disk && 1756 blk_integrity_compare(dm_disk(t->md), template_disk) >= 0) 1757 return; 1758 } 1759 1760 if (integrity_profile_exists(dm_disk(t->md))) { 1761 DMWARN("%s: unable to establish an integrity profile", 1762 dm_device_name(t->md)); 1763 blk_integrity_unregister(dm_disk(t->md)); 1764 } 1765 } 1766 1767 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, 1768 sector_t start, sector_t len, void *data) 1769 { 1770 unsigned long flush = (unsigned long) data; 1771 struct request_queue *q = bdev_get_queue(dev->bdev); 1772 1773 return (q->queue_flags & flush); 1774 } 1775 1776 static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) 1777 { 1778 struct dm_target *ti; 1779 unsigned i; 1780 1781 /* 1782 * Require at least one underlying device to support flushes. 1783 * t->devices includes internal dm devices such as mirror logs 1784 * so we need to use iterate_devices here, which targets 1785 * supporting flushes must provide. 1786 */ 1787 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1788 ti = dm_table_get_target(t, i); 1789 1790 if (!ti->num_flush_bios) 1791 continue; 1792 1793 if (ti->flush_supported) 1794 return true; 1795 1796 if (ti->type->iterate_devices && 1797 ti->type->iterate_devices(ti, device_flush_capable, (void *) flush)) 1798 return true; 1799 } 1800 1801 return false; 1802 } 1803 1804 static int device_dax_write_cache_enabled(struct dm_target *ti, 1805 struct dm_dev *dev, sector_t start, 1806 sector_t len, void *data) 1807 { 1808 struct dax_device *dax_dev = dev->dax_dev; 1809 1810 if (!dax_dev) 1811 return false; 1812 1813 if (dax_write_cache_enabled(dax_dev)) 1814 return true; 1815 return false; 1816 } 1817 1818 static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, 1819 sector_t start, sector_t len, void *data) 1820 { 1821 struct request_queue *q = bdev_get_queue(dev->bdev); 1822 1823 return !blk_queue_nonrot(q); 1824 } 1825 1826 static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, 1827 sector_t start, sector_t len, void *data) 1828 { 1829 struct request_queue *q = bdev_get_queue(dev->bdev); 1830 1831 return !blk_queue_add_random(q); 1832 } 1833 1834 static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, 1835 sector_t start, sector_t len, void *data) 1836 { 1837 struct request_queue *q = bdev_get_queue(dev->bdev); 1838 1839 return !q->limits.max_write_same_sectors; 1840 } 1841 1842 static bool dm_table_supports_write_same(struct dm_table *t) 1843 { 1844 struct dm_target *ti; 1845 unsigned i; 1846 1847 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1848 ti = dm_table_get_target(t, i); 1849 1850 if (!ti->num_write_same_bios) 1851 return false; 1852 1853 if (!ti->type->iterate_devices || 1854 ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) 1855 return false; 1856 } 1857 1858 return true; 1859 } 1860 1861 static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev, 1862 sector_t start, sector_t len, void *data) 1863 { 1864 struct request_queue *q = bdev_get_queue(dev->bdev); 1865 1866 return !q->limits.max_write_zeroes_sectors; 1867 } 1868 1869 static bool dm_table_supports_write_zeroes(struct dm_table *t) 1870 { 1871 struct dm_target *ti; 1872 unsigned i = 0; 1873 1874 while (i < dm_table_get_num_targets(t)) { 1875 ti = dm_table_get_target(t, i++); 1876 1877 if (!ti->num_write_zeroes_bios) 1878 return false; 1879 1880 if (!ti->type->iterate_devices || 1881 ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL)) 1882 return false; 1883 } 1884 1885 return true; 1886 } 1887 1888 static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev, 1889 sector_t start, sector_t len, void *data) 1890 { 1891 struct request_queue *q = bdev_get_queue(dev->bdev); 1892 1893 return !blk_queue_nowait(q); 1894 } 1895 1896 static bool dm_table_supports_nowait(struct dm_table *t) 1897 { 1898 struct dm_target *ti; 1899 unsigned i = 0; 1900 1901 while (i < dm_table_get_num_targets(t)) { 1902 ti = dm_table_get_target(t, i++); 1903 1904 if (!dm_target_supports_nowait(ti->type)) 1905 return false; 1906 1907 if (!ti->type->iterate_devices || 1908 ti->type->iterate_devices(ti, device_not_nowait_capable, NULL)) 1909 return false; 1910 } 1911 1912 return true; 1913 } 1914 1915 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, 1916 sector_t start, sector_t len, void *data) 1917 { 1918 struct request_queue *q = bdev_get_queue(dev->bdev); 1919 1920 return !blk_queue_discard(q); 1921 } 1922 1923 static bool dm_table_supports_discards(struct dm_table *t) 1924 { 1925 struct dm_target *ti; 1926 unsigned i; 1927 1928 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1929 ti = dm_table_get_target(t, i); 1930 1931 if (!ti->num_discard_bios) 1932 return false; 1933 1934 /* 1935 * Either the target provides discard support (as implied by setting 1936 * 'discards_supported') or it relies on _all_ data devices having 1937 * discard support. 1938 */ 1939 if (!ti->discards_supported && 1940 (!ti->type->iterate_devices || 1941 ti->type->iterate_devices(ti, device_not_discard_capable, NULL))) 1942 return false; 1943 } 1944 1945 return true; 1946 } 1947 1948 static int device_not_secure_erase_capable(struct dm_target *ti, 1949 struct dm_dev *dev, sector_t start, 1950 sector_t len, void *data) 1951 { 1952 struct request_queue *q = bdev_get_queue(dev->bdev); 1953 1954 return !blk_queue_secure_erase(q); 1955 } 1956 1957 static bool dm_table_supports_secure_erase(struct dm_table *t) 1958 { 1959 struct dm_target *ti; 1960 unsigned int i; 1961 1962 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1963 ti = dm_table_get_target(t, i); 1964 1965 if (!ti->num_secure_erase_bios) 1966 return false; 1967 1968 if (!ti->type->iterate_devices || 1969 ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL)) 1970 return false; 1971 } 1972 1973 return true; 1974 } 1975 1976 static int device_requires_stable_pages(struct dm_target *ti, 1977 struct dm_dev *dev, sector_t start, 1978 sector_t len, void *data) 1979 { 1980 struct request_queue *q = bdev_get_queue(dev->bdev); 1981 1982 return blk_queue_stable_writes(q); 1983 } 1984 1985 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 1986 struct queue_limits *limits) 1987 { 1988 bool wc = false, fua = false; 1989 int page_size = PAGE_SIZE; 1990 1991 /* 1992 * Copy table's limits to the DM device's request_queue 1993 */ 1994 q->limits = *limits; 1995 1996 if (dm_table_supports_nowait(t)) 1997 blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); 1998 else 1999 blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); 2000 2001 if (!dm_table_supports_discards(t)) { 2002 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); 2003 /* Must also clear discard limits... */ 2004 q->limits.max_discard_sectors = 0; 2005 q->limits.max_hw_discard_sectors = 0; 2006 q->limits.discard_granularity = 0; 2007 q->limits.discard_alignment = 0; 2008 q->limits.discard_misaligned = 0; 2009 } else 2010 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 2011 2012 if (dm_table_supports_secure_erase(t)) 2013 blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); 2014 2015 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { 2016 wc = true; 2017 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA))) 2018 fua = true; 2019 } 2020 blk_queue_write_cache(q, wc, fua); 2021 2022 if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) { 2023 blk_queue_flag_set(QUEUE_FLAG_DAX, q); 2024 if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL)) 2025 set_dax_synchronous(t->md->dax_dev); 2026 } 2027 else 2028 blk_queue_flag_clear(QUEUE_FLAG_DAX, q); 2029 2030 if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) 2031 dax_write_cache(t->md->dax_dev, true); 2032 2033 /* Ensure that all underlying devices are non-rotational. */ 2034 if (dm_table_any_dev_attr(t, device_is_rotational, NULL)) 2035 blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); 2036 else 2037 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 2038 2039 if (!dm_table_supports_write_same(t)) 2040 q->limits.max_write_same_sectors = 0; 2041 if (!dm_table_supports_write_zeroes(t)) 2042 q->limits.max_write_zeroes_sectors = 0; 2043 2044 dm_table_verify_integrity(t); 2045 2046 /* 2047 * Some devices don't use blk_integrity but still want stable pages 2048 * because they do their own checksumming. 2049 * If any underlying device requires stable pages, a table must require 2050 * them as well. Only targets that support iterate_devices are considered: 2051 * don't want error, zero, etc to require stable pages. 2052 */ 2053 if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL)) 2054 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); 2055 else 2056 blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); 2057 2058 /* 2059 * Determine whether or not this queue's I/O timings contribute 2060 * to the entropy pool, Only request-based targets use this. 2061 * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not 2062 * have it set. 2063 */ 2064 if (blk_queue_add_random(q) && 2065 dm_table_any_dev_attr(t, device_is_not_random, NULL)) 2066 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); 2067 2068 /* 2069 * For a zoned target, the number of zones should be updated for the 2070 * correct value to be exposed in sysfs queue/nr_zones. For a BIO based 2071 * target, this is all that is needed. 2072 */ 2073 #ifdef CONFIG_BLK_DEV_ZONED 2074 if (blk_queue_is_zoned(q)) { 2075 WARN_ON_ONCE(queue_is_mq(q)); 2076 q->nr_zones = blkdev_nr_zones(t->md->disk); 2077 } 2078 #endif 2079 2080 dm_update_keyslot_manager(q, t); 2081 blk_queue_update_readahead(q); 2082 } 2083 2084 unsigned int dm_table_get_num_targets(struct dm_table *t) 2085 { 2086 return t->num_targets; 2087 } 2088 2089 struct list_head *dm_table_get_devices(struct dm_table *t) 2090 { 2091 return &t->devices; 2092 } 2093 2094 fmode_t dm_table_get_mode(struct dm_table *t) 2095 { 2096 return t->mode; 2097 } 2098 EXPORT_SYMBOL(dm_table_get_mode); 2099 2100 enum suspend_mode { 2101 PRESUSPEND, 2102 PRESUSPEND_UNDO, 2103 POSTSUSPEND, 2104 }; 2105 2106 static void suspend_targets(struct dm_table *t, enum suspend_mode mode) 2107 { 2108 int i = t->num_targets; 2109 struct dm_target *ti = t->targets; 2110 2111 lockdep_assert_held(&t->md->suspend_lock); 2112 2113 while (i--) { 2114 switch (mode) { 2115 case PRESUSPEND: 2116 if (ti->type->presuspend) 2117 ti->type->presuspend(ti); 2118 break; 2119 case PRESUSPEND_UNDO: 2120 if (ti->type->presuspend_undo) 2121 ti->type->presuspend_undo(ti); 2122 break; 2123 case POSTSUSPEND: 2124 if (ti->type->postsuspend) 2125 ti->type->postsuspend(ti); 2126 break; 2127 } 2128 ti++; 2129 } 2130 } 2131 2132 void dm_table_presuspend_targets(struct dm_table *t) 2133 { 2134 if (!t) 2135 return; 2136 2137 suspend_targets(t, PRESUSPEND); 2138 } 2139 2140 void dm_table_presuspend_undo_targets(struct dm_table *t) 2141 { 2142 if (!t) 2143 return; 2144 2145 suspend_targets(t, PRESUSPEND_UNDO); 2146 } 2147 2148 void dm_table_postsuspend_targets(struct dm_table *t) 2149 { 2150 if (!t) 2151 return; 2152 2153 suspend_targets(t, POSTSUSPEND); 2154 } 2155 2156 int dm_table_resume_targets(struct dm_table *t) 2157 { 2158 int i, r = 0; 2159 2160 lockdep_assert_held(&t->md->suspend_lock); 2161 2162 for (i = 0; i < t->num_targets; i++) { 2163 struct dm_target *ti = t->targets + i; 2164 2165 if (!ti->type->preresume) 2166 continue; 2167 2168 r = ti->type->preresume(ti); 2169 if (r) { 2170 DMERR("%s: %s: preresume failed, error = %d", 2171 dm_device_name(t->md), ti->type->name, r); 2172 return r; 2173 } 2174 } 2175 2176 for (i = 0; i < t->num_targets; i++) { 2177 struct dm_target *ti = t->targets + i; 2178 2179 if (ti->type->resume) 2180 ti->type->resume(ti); 2181 } 2182 2183 return 0; 2184 } 2185 2186 struct mapped_device *dm_table_get_md(struct dm_table *t) 2187 { 2188 return t->md; 2189 } 2190 EXPORT_SYMBOL(dm_table_get_md); 2191 2192 const char *dm_table_device_name(struct dm_table *t) 2193 { 2194 return dm_device_name(t->md); 2195 } 2196 EXPORT_SYMBOL_GPL(dm_table_device_name); 2197 2198 void dm_table_run_md_queue_async(struct dm_table *t) 2199 { 2200 if (!dm_table_request_based(t)) 2201 return; 2202 2203 if (t->md->queue) 2204 blk_mq_run_hw_queues(t->md->queue, true); 2205 } 2206 EXPORT_SYMBOL(dm_table_run_md_queue_async); 2207 2208