1 /* 2 * Copyright (C) 2001 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm-core.h" 9 10 #include <linux/module.h> 11 #include <linux/vmalloc.h> 12 #include <linux/blkdev.h> 13 #include <linux/namei.h> 14 #include <linux/ctype.h> 15 #include <linux/string.h> 16 #include <linux/slab.h> 17 #include <linux/interrupt.h> 18 #include <linux/mutex.h> 19 #include <linux/delay.h> 20 #include <linux/atomic.h> 21 #include <linux/blk-mq.h> 22 #include <linux/mount.h> 23 #include <linux/dax.h> 24 25 #define DM_MSG_PREFIX "table" 26 27 #define NODE_SIZE L1_CACHE_BYTES 28 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) 29 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) 30 31 /* 32 * Similar to ceiling(log_size(n)) 33 */ 34 static unsigned int int_log(unsigned int n, unsigned int base) 35 { 36 int result = 0; 37 38 while (n > 1) { 39 n = dm_div_up(n, base); 40 result++; 41 } 42 43 return result; 44 } 45 46 /* 47 * Calculate the index of the child node of the n'th node k'th key. 48 */ 49 static inline unsigned int get_child(unsigned int n, unsigned int k) 50 { 51 return (n * CHILDREN_PER_NODE) + k; 52 } 53 54 /* 55 * Return the n'th node of level l from table t. 56 */ 57 static inline sector_t *get_node(struct dm_table *t, 58 unsigned int l, unsigned int n) 59 { 60 return t->index[l] + (n * KEYS_PER_NODE); 61 } 62 63 /* 64 * Return the highest key that you could lookup from the n'th 65 * node on level l of the btree. 66 */ 67 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) 68 { 69 for (; l < t->depth - 1; l++) 70 n = get_child(n, CHILDREN_PER_NODE - 1); 71 72 if (n >= t->counts[l]) 73 return (sector_t) - 1; 74 75 return get_node(t, l, n)[KEYS_PER_NODE - 1]; 76 } 77 78 /* 79 * Fills in a level of the btree based on the highs of the level 80 * below it. 81 */ 82 static int setup_btree_index(unsigned int l, struct dm_table *t) 83 { 84 unsigned int n, k; 85 sector_t *node; 86 87 for (n = 0U; n < t->counts[l]; n++) { 88 node = get_node(t, l, n); 89 90 for (k = 0U; k < KEYS_PER_NODE; k++) 91 node[k] = high(t, l + 1, get_child(n, k)); 92 } 93 94 return 0; 95 } 96 97 void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) 98 { 99 unsigned long size; 100 void *addr; 101 102 /* 103 * Check that we're not going to overflow. 104 */ 105 if (nmemb > (ULONG_MAX / elem_size)) 106 return NULL; 107 108 size = nmemb * elem_size; 109 addr = vzalloc(size); 110 111 return addr; 112 } 113 EXPORT_SYMBOL(dm_vcalloc); 114 115 /* 116 * highs, and targets are managed as dynamic arrays during a 117 * table load. 118 */ 119 static int alloc_targets(struct dm_table *t, unsigned int num) 120 { 121 sector_t *n_highs; 122 struct dm_target *n_targets; 123 124 /* 125 * Allocate both the target array and offset array at once. 126 */ 127 n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) + 128 sizeof(sector_t)); 129 if (!n_highs) 130 return -ENOMEM; 131 132 n_targets = (struct dm_target *) (n_highs + num); 133 134 memset(n_highs, -1, sizeof(*n_highs) * num); 135 vfree(t->highs); 136 137 t->num_allocated = num; 138 t->highs = n_highs; 139 t->targets = n_targets; 140 141 return 0; 142 } 143 144 int dm_table_create(struct dm_table **result, fmode_t mode, 145 unsigned num_targets, struct mapped_device *md) 146 { 147 struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); 148 149 if (!t) 150 return -ENOMEM; 151 152 INIT_LIST_HEAD(&t->devices); 153 154 if (!num_targets) 155 num_targets = KEYS_PER_NODE; 156 157 num_targets = dm_round_up(num_targets, KEYS_PER_NODE); 158 159 if (!num_targets) { 160 kfree(t); 161 return -ENOMEM; 162 } 163 164 if (alloc_targets(t, num_targets)) { 165 kfree(t); 166 return -ENOMEM; 167 } 168 169 t->type = DM_TYPE_NONE; 170 t->mode = mode; 171 t->md = md; 172 *result = t; 173 return 0; 174 } 175 176 static void free_devices(struct list_head *devices, struct mapped_device *md) 177 { 178 struct list_head *tmp, *next; 179 180 list_for_each_safe(tmp, next, devices) { 181 struct dm_dev_internal *dd = 182 list_entry(tmp, struct dm_dev_internal, list); 183 DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s", 184 dm_device_name(md), dd->dm_dev->name); 185 dm_put_table_device(md, dd->dm_dev); 186 kfree(dd); 187 } 188 } 189 190 void dm_table_destroy(struct dm_table *t) 191 { 192 unsigned int i; 193 194 if (!t) 195 return; 196 197 /* free the indexes */ 198 if (t->depth >= 2) 199 vfree(t->index[t->depth - 2]); 200 201 /* free the targets */ 202 for (i = 0; i < t->num_targets; i++) { 203 struct dm_target *tgt = t->targets + i; 204 205 if (tgt->type->dtr) 206 tgt->type->dtr(tgt); 207 208 dm_put_target_type(tgt->type); 209 } 210 211 vfree(t->highs); 212 213 /* free the device list */ 214 free_devices(&t->devices, t->md); 215 216 dm_free_md_mempools(t->mempools); 217 218 kfree(t); 219 } 220 221 /* 222 * See if we've already got a device in the list. 223 */ 224 static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) 225 { 226 struct dm_dev_internal *dd; 227 228 list_for_each_entry (dd, l, list) 229 if (dd->dm_dev->bdev->bd_dev == dev) 230 return dd; 231 232 return NULL; 233 } 234 235 /* 236 * If possible, this checks an area of a destination device is invalid. 237 */ 238 static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, 239 sector_t start, sector_t len, void *data) 240 { 241 struct queue_limits *limits = data; 242 struct block_device *bdev = dev->bdev; 243 sector_t dev_size = 244 i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; 245 unsigned short logical_block_size_sectors = 246 limits->logical_block_size >> SECTOR_SHIFT; 247 char b[BDEVNAME_SIZE]; 248 249 if (!dev_size) 250 return 0; 251 252 if ((start >= dev_size) || (start + len > dev_size)) { 253 DMWARN("%s: %s too small for target: " 254 "start=%llu, len=%llu, dev_size=%llu", 255 dm_device_name(ti->table->md), bdevname(bdev, b), 256 (unsigned long long)start, 257 (unsigned long long)len, 258 (unsigned long long)dev_size); 259 return 1; 260 } 261 262 /* 263 * If the target is mapped to zoned block device(s), check 264 * that the zones are not partially mapped. 265 */ 266 if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) { 267 unsigned int zone_sectors = bdev_zone_sectors(bdev); 268 269 if (start & (zone_sectors - 1)) { 270 DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s", 271 dm_device_name(ti->table->md), 272 (unsigned long long)start, 273 zone_sectors, bdevname(bdev, b)); 274 return 1; 275 } 276 277 /* 278 * Note: The last zone of a zoned block device may be smaller 279 * than other zones. So for a target mapping the end of a 280 * zoned block device with such a zone, len would not be zone 281 * aligned. We do not allow such last smaller zone to be part 282 * of the mapping here to ensure that mappings with multiple 283 * devices do not end up with a smaller zone in the middle of 284 * the sector range. 285 */ 286 if (len & (zone_sectors - 1)) { 287 DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s", 288 dm_device_name(ti->table->md), 289 (unsigned long long)len, 290 zone_sectors, bdevname(bdev, b)); 291 return 1; 292 } 293 } 294 295 if (logical_block_size_sectors <= 1) 296 return 0; 297 298 if (start & (logical_block_size_sectors - 1)) { 299 DMWARN("%s: start=%llu not aligned to h/w " 300 "logical block size %u of %s", 301 dm_device_name(ti->table->md), 302 (unsigned long long)start, 303 limits->logical_block_size, bdevname(bdev, b)); 304 return 1; 305 } 306 307 if (len & (logical_block_size_sectors - 1)) { 308 DMWARN("%s: len=%llu not aligned to h/w " 309 "logical block size %u of %s", 310 dm_device_name(ti->table->md), 311 (unsigned long long)len, 312 limits->logical_block_size, bdevname(bdev, b)); 313 return 1; 314 } 315 316 return 0; 317 } 318 319 /* 320 * This upgrades the mode on an already open dm_dev, being 321 * careful to leave things as they were if we fail to reopen the 322 * device and not to touch the existing bdev field in case 323 * it is accessed concurrently. 324 */ 325 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, 326 struct mapped_device *md) 327 { 328 int r; 329 struct dm_dev *old_dev, *new_dev; 330 331 old_dev = dd->dm_dev; 332 333 r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev, 334 dd->dm_dev->mode | new_mode, &new_dev); 335 if (r) 336 return r; 337 338 dd->dm_dev = new_dev; 339 dm_put_table_device(md, old_dev); 340 341 return 0; 342 } 343 344 /* 345 * Convert the path to a device 346 */ 347 dev_t dm_get_dev_t(const char *path) 348 { 349 dev_t dev; 350 struct block_device *bdev; 351 352 bdev = lookup_bdev(path); 353 if (IS_ERR(bdev)) 354 dev = name_to_dev_t(path); 355 else { 356 dev = bdev->bd_dev; 357 bdput(bdev); 358 } 359 360 return dev; 361 } 362 EXPORT_SYMBOL_GPL(dm_get_dev_t); 363 364 /* 365 * Add a device to the list, or just increment the usage count if 366 * it's already present. 367 */ 368 int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, 369 struct dm_dev **result) 370 { 371 int r; 372 dev_t dev; 373 struct dm_dev_internal *dd; 374 struct dm_table *t = ti->table; 375 376 BUG_ON(!t); 377 378 dev = dm_get_dev_t(path); 379 if (!dev) 380 return -ENODEV; 381 382 dd = find_device(&t->devices, dev); 383 if (!dd) { 384 dd = kmalloc(sizeof(*dd), GFP_KERNEL); 385 if (!dd) 386 return -ENOMEM; 387 388 if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) { 389 kfree(dd); 390 return r; 391 } 392 393 refcount_set(&dd->count, 1); 394 list_add(&dd->list, &t->devices); 395 goto out; 396 397 } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { 398 r = upgrade_mode(dd, mode, t->md); 399 if (r) 400 return r; 401 } 402 refcount_inc(&dd->count); 403 out: 404 *result = dd->dm_dev; 405 return 0; 406 } 407 EXPORT_SYMBOL(dm_get_device); 408 409 static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 410 sector_t start, sector_t len, void *data) 411 { 412 struct queue_limits *limits = data; 413 struct block_device *bdev = dev->bdev; 414 struct request_queue *q = bdev_get_queue(bdev); 415 char b[BDEVNAME_SIZE]; 416 417 if (unlikely(!q)) { 418 DMWARN("%s: Cannot set limits for nonexistent device %s", 419 dm_device_name(ti->table->md), bdevname(bdev, b)); 420 return 0; 421 } 422 423 if (blk_stack_limits(limits, &q->limits, 424 get_start_sect(bdev) + start) < 0) 425 DMWARN("%s: adding target device %s caused an alignment inconsistency: " 426 "physical_block_size=%u, logical_block_size=%u, " 427 "alignment_offset=%u, start=%llu", 428 dm_device_name(ti->table->md), bdevname(bdev, b), 429 q->limits.physical_block_size, 430 q->limits.logical_block_size, 431 q->limits.alignment_offset, 432 (unsigned long long) start << SECTOR_SHIFT); 433 return 0; 434 } 435 436 /* 437 * Decrement a device's use count and remove it if necessary. 438 */ 439 void dm_put_device(struct dm_target *ti, struct dm_dev *d) 440 { 441 int found = 0; 442 struct list_head *devices = &ti->table->devices; 443 struct dm_dev_internal *dd; 444 445 list_for_each_entry(dd, devices, list) { 446 if (dd->dm_dev == d) { 447 found = 1; 448 break; 449 } 450 } 451 if (!found) { 452 DMWARN("%s: device %s not in table devices list", 453 dm_device_name(ti->table->md), d->name); 454 return; 455 } 456 if (refcount_dec_and_test(&dd->count)) { 457 dm_put_table_device(ti->table->md, d); 458 list_del(&dd->list); 459 kfree(dd); 460 } 461 } 462 EXPORT_SYMBOL(dm_put_device); 463 464 /* 465 * Checks to see if the target joins onto the end of the table. 466 */ 467 static int adjoin(struct dm_table *table, struct dm_target *ti) 468 { 469 struct dm_target *prev; 470 471 if (!table->num_targets) 472 return !ti->begin; 473 474 prev = &table->targets[table->num_targets - 1]; 475 return (ti->begin == (prev->begin + prev->len)); 476 } 477 478 /* 479 * Used to dynamically allocate the arg array. 480 * 481 * We do first allocation with GFP_NOIO because dm-mpath and dm-thin must 482 * process messages even if some device is suspended. These messages have a 483 * small fixed number of arguments. 484 * 485 * On the other hand, dm-switch needs to process bulk data using messages and 486 * excessive use of GFP_NOIO could cause trouble. 487 */ 488 static char **realloc_argv(unsigned *size, char **old_argv) 489 { 490 char **argv; 491 unsigned new_size; 492 gfp_t gfp; 493 494 if (*size) { 495 new_size = *size * 2; 496 gfp = GFP_KERNEL; 497 } else { 498 new_size = 8; 499 gfp = GFP_NOIO; 500 } 501 argv = kmalloc_array(new_size, sizeof(*argv), gfp); 502 if (argv && old_argv) { 503 memcpy(argv, old_argv, *size * sizeof(*argv)); 504 *size = new_size; 505 } 506 507 kfree(old_argv); 508 return argv; 509 } 510 511 /* 512 * Destructively splits up the argument list to pass to ctr. 513 */ 514 int dm_split_args(int *argc, char ***argvp, char *input) 515 { 516 char *start, *end = input, *out, **argv = NULL; 517 unsigned array_size = 0; 518 519 *argc = 0; 520 521 if (!input) { 522 *argvp = NULL; 523 return 0; 524 } 525 526 argv = realloc_argv(&array_size, argv); 527 if (!argv) 528 return -ENOMEM; 529 530 while (1) { 531 /* Skip whitespace */ 532 start = skip_spaces(end); 533 534 if (!*start) 535 break; /* success, we hit the end */ 536 537 /* 'out' is used to remove any back-quotes */ 538 end = out = start; 539 while (*end) { 540 /* Everything apart from '\0' can be quoted */ 541 if (*end == '\\' && *(end + 1)) { 542 *out++ = *(end + 1); 543 end += 2; 544 continue; 545 } 546 547 if (isspace(*end)) 548 break; /* end of token */ 549 550 *out++ = *end++; 551 } 552 553 /* have we already filled the array ? */ 554 if ((*argc + 1) > array_size) { 555 argv = realloc_argv(&array_size, argv); 556 if (!argv) 557 return -ENOMEM; 558 } 559 560 /* we know this is whitespace */ 561 if (*end) 562 end++; 563 564 /* terminate the string and put it in the array */ 565 *out = '\0'; 566 argv[*argc] = start; 567 (*argc)++; 568 } 569 570 *argvp = argv; 571 return 0; 572 } 573 574 /* 575 * Impose necessary and sufficient conditions on a devices's table such 576 * that any incoming bio which respects its logical_block_size can be 577 * processed successfully. If it falls across the boundary between 578 * two or more targets, the size of each piece it gets split into must 579 * be compatible with the logical_block_size of the target processing it. 580 */ 581 static int validate_hardware_logical_block_alignment(struct dm_table *table, 582 struct queue_limits *limits) 583 { 584 /* 585 * This function uses arithmetic modulo the logical_block_size 586 * (in units of 512-byte sectors). 587 */ 588 unsigned short device_logical_block_size_sects = 589 limits->logical_block_size >> SECTOR_SHIFT; 590 591 /* 592 * Offset of the start of the next table entry, mod logical_block_size. 593 */ 594 unsigned short next_target_start = 0; 595 596 /* 597 * Given an aligned bio that extends beyond the end of a 598 * target, how many sectors must the next target handle? 599 */ 600 unsigned short remaining = 0; 601 602 struct dm_target *ti; 603 struct queue_limits ti_limits; 604 unsigned i; 605 606 /* 607 * Check each entry in the table in turn. 608 */ 609 for (i = 0; i < dm_table_get_num_targets(table); i++) { 610 ti = dm_table_get_target(table, i); 611 612 blk_set_stacking_limits(&ti_limits); 613 614 /* combine all target devices' limits */ 615 if (ti->type->iterate_devices) 616 ti->type->iterate_devices(ti, dm_set_device_limits, 617 &ti_limits); 618 619 /* 620 * If the remaining sectors fall entirely within this 621 * table entry are they compatible with its logical_block_size? 622 */ 623 if (remaining < ti->len && 624 remaining & ((ti_limits.logical_block_size >> 625 SECTOR_SHIFT) - 1)) 626 break; /* Error */ 627 628 next_target_start = 629 (unsigned short) ((next_target_start + ti->len) & 630 (device_logical_block_size_sects - 1)); 631 remaining = next_target_start ? 632 device_logical_block_size_sects - next_target_start : 0; 633 } 634 635 if (remaining) { 636 DMWARN("%s: table line %u (start sect %llu len %llu) " 637 "not aligned to h/w logical block size %u", 638 dm_device_name(table->md), i, 639 (unsigned long long) ti->begin, 640 (unsigned long long) ti->len, 641 limits->logical_block_size); 642 return -EINVAL; 643 } 644 645 return 0; 646 } 647 648 int dm_table_add_target(struct dm_table *t, const char *type, 649 sector_t start, sector_t len, char *params) 650 { 651 int r = -EINVAL, argc; 652 char **argv; 653 struct dm_target *tgt; 654 655 if (t->singleton) { 656 DMERR("%s: target type %s must appear alone in table", 657 dm_device_name(t->md), t->targets->type->name); 658 return -EINVAL; 659 } 660 661 BUG_ON(t->num_targets >= t->num_allocated); 662 663 tgt = t->targets + t->num_targets; 664 memset(tgt, 0, sizeof(*tgt)); 665 666 if (!len) { 667 DMERR("%s: zero-length target", dm_device_name(t->md)); 668 return -EINVAL; 669 } 670 671 tgt->type = dm_get_target_type(type); 672 if (!tgt->type) { 673 DMERR("%s: %s: unknown target type", dm_device_name(t->md), type); 674 return -EINVAL; 675 } 676 677 if (dm_target_needs_singleton(tgt->type)) { 678 if (t->num_targets) { 679 tgt->error = "singleton target type must appear alone in table"; 680 goto bad; 681 } 682 t->singleton = true; 683 } 684 685 if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) { 686 tgt->error = "target type may not be included in a read-only table"; 687 goto bad; 688 } 689 690 if (t->immutable_target_type) { 691 if (t->immutable_target_type != tgt->type) { 692 tgt->error = "immutable target type cannot be mixed with other target types"; 693 goto bad; 694 } 695 } else if (dm_target_is_immutable(tgt->type)) { 696 if (t->num_targets) { 697 tgt->error = "immutable target type cannot be mixed with other target types"; 698 goto bad; 699 } 700 t->immutable_target_type = tgt->type; 701 } 702 703 if (dm_target_has_integrity(tgt->type)) 704 t->integrity_added = 1; 705 706 tgt->table = t; 707 tgt->begin = start; 708 tgt->len = len; 709 tgt->error = "Unknown error"; 710 711 /* 712 * Does this target adjoin the previous one ? 713 */ 714 if (!adjoin(t, tgt)) { 715 tgt->error = "Gap in table"; 716 goto bad; 717 } 718 719 r = dm_split_args(&argc, &argv, params); 720 if (r) { 721 tgt->error = "couldn't split parameters (insufficient memory)"; 722 goto bad; 723 } 724 725 r = tgt->type->ctr(tgt, argc, argv); 726 kfree(argv); 727 if (r) 728 goto bad; 729 730 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 731 732 if (!tgt->num_discard_bios && tgt->discards_supported) 733 DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", 734 dm_device_name(t->md), type); 735 736 return 0; 737 738 bad: 739 DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error); 740 dm_put_target_type(tgt->type); 741 return r; 742 } 743 744 /* 745 * Target argument parsing helpers. 746 */ 747 static int validate_next_arg(const struct dm_arg *arg, 748 struct dm_arg_set *arg_set, 749 unsigned *value, char **error, unsigned grouped) 750 { 751 const char *arg_str = dm_shift_arg(arg_set); 752 char dummy; 753 754 if (!arg_str || 755 (sscanf(arg_str, "%u%c", value, &dummy) != 1) || 756 (*value < arg->min) || 757 (*value > arg->max) || 758 (grouped && arg_set->argc < *value)) { 759 *error = arg->error; 760 return -EINVAL; 761 } 762 763 return 0; 764 } 765 766 int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, 767 unsigned *value, char **error) 768 { 769 return validate_next_arg(arg, arg_set, value, error, 0); 770 } 771 EXPORT_SYMBOL(dm_read_arg); 772 773 int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, 774 unsigned *value, char **error) 775 { 776 return validate_next_arg(arg, arg_set, value, error, 1); 777 } 778 EXPORT_SYMBOL(dm_read_arg_group); 779 780 const char *dm_shift_arg(struct dm_arg_set *as) 781 { 782 char *r; 783 784 if (as->argc) { 785 as->argc--; 786 r = *as->argv; 787 as->argv++; 788 return r; 789 } 790 791 return NULL; 792 } 793 EXPORT_SYMBOL(dm_shift_arg); 794 795 void dm_consume_args(struct dm_arg_set *as, unsigned num_args) 796 { 797 BUG_ON(as->argc < num_args); 798 as->argc -= num_args; 799 as->argv += num_args; 800 } 801 EXPORT_SYMBOL(dm_consume_args); 802 803 static bool __table_type_bio_based(enum dm_queue_mode table_type) 804 { 805 return (table_type == DM_TYPE_BIO_BASED || 806 table_type == DM_TYPE_DAX_BIO_BASED); 807 } 808 809 static bool __table_type_request_based(enum dm_queue_mode table_type) 810 { 811 return table_type == DM_TYPE_REQUEST_BASED; 812 } 813 814 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) 815 { 816 t->type = type; 817 } 818 EXPORT_SYMBOL_GPL(dm_table_set_type); 819 820 /* validate the dax capability of the target device span */ 821 int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, 822 sector_t start, sector_t len, void *data) 823 { 824 int blocksize = *(int *) data, id; 825 bool rc; 826 827 id = dax_read_lock(); 828 rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); 829 dax_read_unlock(id); 830 831 return rc; 832 } 833 834 /* Check devices support synchronous DAX */ 835 static int device_dax_synchronous(struct dm_target *ti, struct dm_dev *dev, 836 sector_t start, sector_t len, void *data) 837 { 838 return dev->dax_dev && dax_synchronous(dev->dax_dev); 839 } 840 841 bool dm_table_supports_dax(struct dm_table *t, 842 iterate_devices_callout_fn iterate_fn, int *blocksize) 843 { 844 struct dm_target *ti; 845 unsigned i; 846 847 /* Ensure that all targets support DAX. */ 848 for (i = 0; i < dm_table_get_num_targets(t); i++) { 849 ti = dm_table_get_target(t, i); 850 851 if (!ti->type->direct_access) 852 return false; 853 854 if (!ti->type->iterate_devices || 855 !ti->type->iterate_devices(ti, iterate_fn, blocksize)) 856 return false; 857 } 858 859 return true; 860 } 861 862 static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, 863 sector_t start, sector_t len, void *data) 864 { 865 struct block_device *bdev = dev->bdev; 866 struct request_queue *q = bdev_get_queue(bdev); 867 868 /* request-based cannot stack on partitions! */ 869 if (bdev_is_partition(bdev)) 870 return false; 871 872 return queue_is_mq(q); 873 } 874 875 static int dm_table_determine_type(struct dm_table *t) 876 { 877 unsigned i; 878 unsigned bio_based = 0, request_based = 0, hybrid = 0; 879 struct dm_target *tgt; 880 struct list_head *devices = dm_table_get_devices(t); 881 enum dm_queue_mode live_md_type = dm_get_md_type(t->md); 882 int page_size = PAGE_SIZE; 883 884 if (t->type != DM_TYPE_NONE) { 885 /* target already set the table's type */ 886 if (t->type == DM_TYPE_BIO_BASED) { 887 /* possibly upgrade to a variant of bio-based */ 888 goto verify_bio_based; 889 } 890 BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); 891 goto verify_rq_based; 892 } 893 894 for (i = 0; i < t->num_targets; i++) { 895 tgt = t->targets + i; 896 if (dm_target_hybrid(tgt)) 897 hybrid = 1; 898 else if (dm_target_request_based(tgt)) 899 request_based = 1; 900 else 901 bio_based = 1; 902 903 if (bio_based && request_based) { 904 DMERR("Inconsistent table: different target types" 905 " can't be mixed up"); 906 return -EINVAL; 907 } 908 } 909 910 if (hybrid && !bio_based && !request_based) { 911 /* 912 * The targets can work either way. 913 * Determine the type from the live device. 914 * Default to bio-based if device is new. 915 */ 916 if (__table_type_request_based(live_md_type)) 917 request_based = 1; 918 else 919 bio_based = 1; 920 } 921 922 if (bio_based) { 923 verify_bio_based: 924 /* We must use this table as bio-based */ 925 t->type = DM_TYPE_BIO_BASED; 926 if (dm_table_supports_dax(t, device_supports_dax, &page_size) || 927 (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { 928 t->type = DM_TYPE_DAX_BIO_BASED; 929 } 930 return 0; 931 } 932 933 BUG_ON(!request_based); /* No targets in this table */ 934 935 t->type = DM_TYPE_REQUEST_BASED; 936 937 verify_rq_based: 938 /* 939 * Request-based dm supports only tables that have a single target now. 940 * To support multiple targets, request splitting support is needed, 941 * and that needs lots of changes in the block-layer. 942 * (e.g. request completion process for partial completion.) 943 */ 944 if (t->num_targets > 1) { 945 DMERR("request-based DM doesn't support multiple targets"); 946 return -EINVAL; 947 } 948 949 if (list_empty(devices)) { 950 int srcu_idx; 951 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); 952 953 /* inherit live table's type */ 954 if (live_table) 955 t->type = live_table->type; 956 dm_put_live_table(t->md, srcu_idx); 957 return 0; 958 } 959 960 tgt = dm_table_get_immutable_target(t); 961 if (!tgt) { 962 DMERR("table load rejected: immutable target is required"); 963 return -EINVAL; 964 } else if (tgt->max_io_len) { 965 DMERR("table load rejected: immutable target that splits IO is not supported"); 966 return -EINVAL; 967 } 968 969 /* Non-request-stackable devices can't be used for request-based dm */ 970 if (!tgt->type->iterate_devices || 971 !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) { 972 DMERR("table load rejected: including non-request-stackable devices"); 973 return -EINVAL; 974 } 975 976 return 0; 977 } 978 979 enum dm_queue_mode dm_table_get_type(struct dm_table *t) 980 { 981 return t->type; 982 } 983 984 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t) 985 { 986 return t->immutable_target_type; 987 } 988 989 struct dm_target *dm_table_get_immutable_target(struct dm_table *t) 990 { 991 /* Immutable target is implicitly a singleton */ 992 if (t->num_targets > 1 || 993 !dm_target_is_immutable(t->targets[0].type)) 994 return NULL; 995 996 return t->targets; 997 } 998 999 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t) 1000 { 1001 struct dm_target *ti; 1002 unsigned i; 1003 1004 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1005 ti = dm_table_get_target(t, i); 1006 if (dm_target_is_wildcard(ti->type)) 1007 return ti; 1008 } 1009 1010 return NULL; 1011 } 1012 1013 bool dm_table_bio_based(struct dm_table *t) 1014 { 1015 return __table_type_bio_based(dm_table_get_type(t)); 1016 } 1017 1018 bool dm_table_request_based(struct dm_table *t) 1019 { 1020 return __table_type_request_based(dm_table_get_type(t)); 1021 } 1022 1023 static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) 1024 { 1025 enum dm_queue_mode type = dm_table_get_type(t); 1026 unsigned per_io_data_size = 0; 1027 unsigned min_pool_size = 0; 1028 struct dm_target *ti; 1029 unsigned i; 1030 1031 if (unlikely(type == DM_TYPE_NONE)) { 1032 DMWARN("no table type is set, can't allocate mempools"); 1033 return -EINVAL; 1034 } 1035 1036 if (__table_type_bio_based(type)) 1037 for (i = 0; i < t->num_targets; i++) { 1038 ti = t->targets + i; 1039 per_io_data_size = max(per_io_data_size, ti->per_io_data_size); 1040 min_pool_size = max(min_pool_size, ti->num_flush_bios); 1041 } 1042 1043 t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, 1044 per_io_data_size, min_pool_size); 1045 if (!t->mempools) 1046 return -ENOMEM; 1047 1048 return 0; 1049 } 1050 1051 void dm_table_free_md_mempools(struct dm_table *t) 1052 { 1053 dm_free_md_mempools(t->mempools); 1054 t->mempools = NULL; 1055 } 1056 1057 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t) 1058 { 1059 return t->mempools; 1060 } 1061 1062 static int setup_indexes(struct dm_table *t) 1063 { 1064 int i; 1065 unsigned int total = 0; 1066 sector_t *indexes; 1067 1068 /* allocate the space for *all* the indexes */ 1069 for (i = t->depth - 2; i >= 0; i--) { 1070 t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE); 1071 total += t->counts[i]; 1072 } 1073 1074 indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); 1075 if (!indexes) 1076 return -ENOMEM; 1077 1078 /* set up internal nodes, bottom-up */ 1079 for (i = t->depth - 2; i >= 0; i--) { 1080 t->index[i] = indexes; 1081 indexes += (KEYS_PER_NODE * t->counts[i]); 1082 setup_btree_index(i, t); 1083 } 1084 1085 return 0; 1086 } 1087 1088 /* 1089 * Builds the btree to index the map. 1090 */ 1091 static int dm_table_build_index(struct dm_table *t) 1092 { 1093 int r = 0; 1094 unsigned int leaf_nodes; 1095 1096 /* how many indexes will the btree have ? */ 1097 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 1098 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 1099 1100 /* leaf layer has already been set up */ 1101 t->counts[t->depth - 1] = leaf_nodes; 1102 t->index[t->depth - 1] = t->highs; 1103 1104 if (t->depth >= 2) 1105 r = setup_indexes(t); 1106 1107 return r; 1108 } 1109 1110 static bool integrity_profile_exists(struct gendisk *disk) 1111 { 1112 return !!blk_get_integrity(disk); 1113 } 1114 1115 /* 1116 * Get a disk whose integrity profile reflects the table's profile. 1117 * Returns NULL if integrity support was inconsistent or unavailable. 1118 */ 1119 static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t) 1120 { 1121 struct list_head *devices = dm_table_get_devices(t); 1122 struct dm_dev_internal *dd = NULL; 1123 struct gendisk *prev_disk = NULL, *template_disk = NULL; 1124 unsigned i; 1125 1126 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1127 struct dm_target *ti = dm_table_get_target(t, i); 1128 if (!dm_target_passes_integrity(ti->type)) 1129 goto no_integrity; 1130 } 1131 1132 list_for_each_entry(dd, devices, list) { 1133 template_disk = dd->dm_dev->bdev->bd_disk; 1134 if (!integrity_profile_exists(template_disk)) 1135 goto no_integrity; 1136 else if (prev_disk && 1137 blk_integrity_compare(prev_disk, template_disk) < 0) 1138 goto no_integrity; 1139 prev_disk = template_disk; 1140 } 1141 1142 return template_disk; 1143 1144 no_integrity: 1145 if (prev_disk) 1146 DMWARN("%s: integrity not set: %s and %s profile mismatch", 1147 dm_device_name(t->md), 1148 prev_disk->disk_name, 1149 template_disk->disk_name); 1150 return NULL; 1151 } 1152 1153 /* 1154 * Register the mapped device for blk_integrity support if the 1155 * underlying devices have an integrity profile. But all devices may 1156 * not have matching profiles (checking all devices isn't reliable 1157 * during table load because this table may use other DM device(s) which 1158 * must be resumed before they will have an initialized integity 1159 * profile). Consequently, stacked DM devices force a 2 stage integrity 1160 * profile validation: First pass during table load, final pass during 1161 * resume. 1162 */ 1163 static int dm_table_register_integrity(struct dm_table *t) 1164 { 1165 struct mapped_device *md = t->md; 1166 struct gendisk *template_disk = NULL; 1167 1168 /* If target handles integrity itself do not register it here. */ 1169 if (t->integrity_added) 1170 return 0; 1171 1172 template_disk = dm_table_get_integrity_disk(t); 1173 if (!template_disk) 1174 return 0; 1175 1176 if (!integrity_profile_exists(dm_disk(md))) { 1177 t->integrity_supported = true; 1178 /* 1179 * Register integrity profile during table load; we can do 1180 * this because the final profile must match during resume. 1181 */ 1182 blk_integrity_register(dm_disk(md), 1183 blk_get_integrity(template_disk)); 1184 return 0; 1185 } 1186 1187 /* 1188 * If DM device already has an initialized integrity 1189 * profile the new profile should not conflict. 1190 */ 1191 if (blk_integrity_compare(dm_disk(md), template_disk) < 0) { 1192 DMWARN("%s: conflict with existing integrity profile: " 1193 "%s profile mismatch", 1194 dm_device_name(t->md), 1195 template_disk->disk_name); 1196 return 1; 1197 } 1198 1199 /* Preserve existing integrity profile */ 1200 t->integrity_supported = true; 1201 return 0; 1202 } 1203 1204 /* 1205 * Prepares the table for use by building the indices, 1206 * setting the type, and allocating mempools. 1207 */ 1208 int dm_table_complete(struct dm_table *t) 1209 { 1210 int r; 1211 1212 r = dm_table_determine_type(t); 1213 if (r) { 1214 DMERR("unable to determine table type"); 1215 return r; 1216 } 1217 1218 r = dm_table_build_index(t); 1219 if (r) { 1220 DMERR("unable to build btrees"); 1221 return r; 1222 } 1223 1224 r = dm_table_register_integrity(t); 1225 if (r) { 1226 DMERR("could not register integrity profile."); 1227 return r; 1228 } 1229 1230 r = dm_table_alloc_md_mempools(t, t->md); 1231 if (r) 1232 DMERR("unable to allocate mempools"); 1233 1234 return r; 1235 } 1236 1237 static DEFINE_MUTEX(_event_lock); 1238 void dm_table_event_callback(struct dm_table *t, 1239 void (*fn)(void *), void *context) 1240 { 1241 mutex_lock(&_event_lock); 1242 t->event_fn = fn; 1243 t->event_context = context; 1244 mutex_unlock(&_event_lock); 1245 } 1246 1247 void dm_table_event(struct dm_table *t) 1248 { 1249 mutex_lock(&_event_lock); 1250 if (t->event_fn) 1251 t->event_fn(t->event_context); 1252 mutex_unlock(&_event_lock); 1253 } 1254 EXPORT_SYMBOL(dm_table_event); 1255 1256 inline sector_t dm_table_get_size(struct dm_table *t) 1257 { 1258 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; 1259 } 1260 EXPORT_SYMBOL(dm_table_get_size); 1261 1262 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) 1263 { 1264 if (index >= t->num_targets) 1265 return NULL; 1266 1267 return t->targets + index; 1268 } 1269 1270 /* 1271 * Search the btree for the correct target. 1272 * 1273 * Caller should check returned pointer for NULL 1274 * to trap I/O beyond end of device. 1275 */ 1276 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) 1277 { 1278 unsigned int l, n = 0, k = 0; 1279 sector_t *node; 1280 1281 if (unlikely(sector >= dm_table_get_size(t))) 1282 return NULL; 1283 1284 for (l = 0; l < t->depth; l++) { 1285 n = get_child(n, k); 1286 node = get_node(t, l, n); 1287 1288 for (k = 0; k < KEYS_PER_NODE; k++) 1289 if (node[k] >= sector) 1290 break; 1291 } 1292 1293 return &t->targets[(KEYS_PER_NODE * n) + k]; 1294 } 1295 1296 static int count_device(struct dm_target *ti, struct dm_dev *dev, 1297 sector_t start, sector_t len, void *data) 1298 { 1299 unsigned *num_devices = data; 1300 1301 (*num_devices)++; 1302 1303 return 0; 1304 } 1305 1306 /* 1307 * Check whether a table has no data devices attached using each 1308 * target's iterate_devices method. 1309 * Returns false if the result is unknown because a target doesn't 1310 * support iterate_devices. 1311 */ 1312 bool dm_table_has_no_data_devices(struct dm_table *table) 1313 { 1314 struct dm_target *ti; 1315 unsigned i, num_devices; 1316 1317 for (i = 0; i < dm_table_get_num_targets(table); i++) { 1318 ti = dm_table_get_target(table, i); 1319 1320 if (!ti->type->iterate_devices) 1321 return false; 1322 1323 num_devices = 0; 1324 ti->type->iterate_devices(ti, count_device, &num_devices); 1325 if (num_devices) 1326 return false; 1327 } 1328 1329 return true; 1330 } 1331 1332 static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, 1333 sector_t start, sector_t len, void *data) 1334 { 1335 struct request_queue *q = bdev_get_queue(dev->bdev); 1336 enum blk_zoned_model *zoned_model = data; 1337 1338 return q && blk_queue_zoned_model(q) == *zoned_model; 1339 } 1340 1341 static bool dm_table_supports_zoned_model(struct dm_table *t, 1342 enum blk_zoned_model zoned_model) 1343 { 1344 struct dm_target *ti; 1345 unsigned i; 1346 1347 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1348 ti = dm_table_get_target(t, i); 1349 1350 if (zoned_model == BLK_ZONED_HM && 1351 !dm_target_supports_zoned_hm(ti->type)) 1352 return false; 1353 1354 if (!ti->type->iterate_devices || 1355 !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model)) 1356 return false; 1357 } 1358 1359 return true; 1360 } 1361 1362 static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, 1363 sector_t start, sector_t len, void *data) 1364 { 1365 struct request_queue *q = bdev_get_queue(dev->bdev); 1366 unsigned int *zone_sectors = data; 1367 1368 return q && blk_queue_zone_sectors(q) == *zone_sectors; 1369 } 1370 1371 static bool dm_table_matches_zone_sectors(struct dm_table *t, 1372 unsigned int zone_sectors) 1373 { 1374 struct dm_target *ti; 1375 unsigned i; 1376 1377 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1378 ti = dm_table_get_target(t, i); 1379 1380 if (!ti->type->iterate_devices || 1381 !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors)) 1382 return false; 1383 } 1384 1385 return true; 1386 } 1387 1388 static int validate_hardware_zoned_model(struct dm_table *table, 1389 enum blk_zoned_model zoned_model, 1390 unsigned int zone_sectors) 1391 { 1392 if (zoned_model == BLK_ZONED_NONE) 1393 return 0; 1394 1395 if (!dm_table_supports_zoned_model(table, zoned_model)) { 1396 DMERR("%s: zoned model is not consistent across all devices", 1397 dm_device_name(table->md)); 1398 return -EINVAL; 1399 } 1400 1401 /* Check zone size validity and compatibility */ 1402 if (!zone_sectors || !is_power_of_2(zone_sectors)) 1403 return -EINVAL; 1404 1405 if (!dm_table_matches_zone_sectors(table, zone_sectors)) { 1406 DMERR("%s: zone sectors is not consistent across all devices", 1407 dm_device_name(table->md)); 1408 return -EINVAL; 1409 } 1410 1411 return 0; 1412 } 1413 1414 /* 1415 * Establish the new table's queue_limits and validate them. 1416 */ 1417 int dm_calculate_queue_limits(struct dm_table *table, 1418 struct queue_limits *limits) 1419 { 1420 struct dm_target *ti; 1421 struct queue_limits ti_limits; 1422 unsigned i; 1423 enum blk_zoned_model zoned_model = BLK_ZONED_NONE; 1424 unsigned int zone_sectors = 0; 1425 1426 blk_set_stacking_limits(limits); 1427 1428 for (i = 0; i < dm_table_get_num_targets(table); i++) { 1429 blk_set_stacking_limits(&ti_limits); 1430 1431 ti = dm_table_get_target(table, i); 1432 1433 if (!ti->type->iterate_devices) 1434 goto combine_limits; 1435 1436 /* 1437 * Combine queue limits of all the devices this target uses. 1438 */ 1439 ti->type->iterate_devices(ti, dm_set_device_limits, 1440 &ti_limits); 1441 1442 if (zoned_model == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { 1443 /* 1444 * After stacking all limits, validate all devices 1445 * in table support this zoned model and zone sectors. 1446 */ 1447 zoned_model = ti_limits.zoned; 1448 zone_sectors = ti_limits.chunk_sectors; 1449 } 1450 1451 /* Set I/O hints portion of queue limits */ 1452 if (ti->type->io_hints) 1453 ti->type->io_hints(ti, &ti_limits); 1454 1455 /* 1456 * Check each device area is consistent with the target's 1457 * overall queue limits. 1458 */ 1459 if (ti->type->iterate_devices(ti, device_area_is_invalid, 1460 &ti_limits)) 1461 return -EINVAL; 1462 1463 combine_limits: 1464 /* 1465 * Merge this target's queue limits into the overall limits 1466 * for the table. 1467 */ 1468 if (blk_stack_limits(limits, &ti_limits, 0) < 0) 1469 DMWARN("%s: adding target device " 1470 "(start sect %llu len %llu) " 1471 "caused an alignment inconsistency", 1472 dm_device_name(table->md), 1473 (unsigned long long) ti->begin, 1474 (unsigned long long) ti->len); 1475 } 1476 1477 /* 1478 * Verify that the zoned model and zone sectors, as determined before 1479 * any .io_hints override, are the same across all devices in the table. 1480 * - this is especially relevant if .io_hints is emulating a disk-managed 1481 * zoned model (aka BLK_ZONED_NONE) on host-managed zoned block devices. 1482 * BUT... 1483 */ 1484 if (limits->zoned != BLK_ZONED_NONE) { 1485 /* 1486 * ...IF the above limits stacking determined a zoned model 1487 * validate that all of the table's devices conform to it. 1488 */ 1489 zoned_model = limits->zoned; 1490 zone_sectors = limits->chunk_sectors; 1491 } 1492 if (validate_hardware_zoned_model(table, zoned_model, zone_sectors)) 1493 return -EINVAL; 1494 1495 return validate_hardware_logical_block_alignment(table, limits); 1496 } 1497 1498 /* 1499 * Verify that all devices have an integrity profile that matches the 1500 * DM device's registered integrity profile. If the profiles don't 1501 * match then unregister the DM device's integrity profile. 1502 */ 1503 static void dm_table_verify_integrity(struct dm_table *t) 1504 { 1505 struct gendisk *template_disk = NULL; 1506 1507 if (t->integrity_added) 1508 return; 1509 1510 if (t->integrity_supported) { 1511 /* 1512 * Verify that the original integrity profile 1513 * matches all the devices in this table. 1514 */ 1515 template_disk = dm_table_get_integrity_disk(t); 1516 if (template_disk && 1517 blk_integrity_compare(dm_disk(t->md), template_disk) >= 0) 1518 return; 1519 } 1520 1521 if (integrity_profile_exists(dm_disk(t->md))) { 1522 DMWARN("%s: unable to establish an integrity profile", 1523 dm_device_name(t->md)); 1524 blk_integrity_unregister(dm_disk(t->md)); 1525 } 1526 } 1527 1528 static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, 1529 sector_t start, sector_t len, void *data) 1530 { 1531 unsigned long flush = (unsigned long) data; 1532 struct request_queue *q = bdev_get_queue(dev->bdev); 1533 1534 return q && (q->queue_flags & flush); 1535 } 1536 1537 static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush) 1538 { 1539 struct dm_target *ti; 1540 unsigned i; 1541 1542 /* 1543 * Require at least one underlying device to support flushes. 1544 * t->devices includes internal dm devices such as mirror logs 1545 * so we need to use iterate_devices here, which targets 1546 * supporting flushes must provide. 1547 */ 1548 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1549 ti = dm_table_get_target(t, i); 1550 1551 if (!ti->num_flush_bios) 1552 continue; 1553 1554 if (ti->flush_supported) 1555 return true; 1556 1557 if (ti->type->iterate_devices && 1558 ti->type->iterate_devices(ti, device_flush_capable, (void *) flush)) 1559 return true; 1560 } 1561 1562 return false; 1563 } 1564 1565 static int device_dax_write_cache_enabled(struct dm_target *ti, 1566 struct dm_dev *dev, sector_t start, 1567 sector_t len, void *data) 1568 { 1569 struct dax_device *dax_dev = dev->dax_dev; 1570 1571 if (!dax_dev) 1572 return false; 1573 1574 if (dax_write_cache_enabled(dax_dev)) 1575 return true; 1576 return false; 1577 } 1578 1579 static int dm_table_supports_dax_write_cache(struct dm_table *t) 1580 { 1581 struct dm_target *ti; 1582 unsigned i; 1583 1584 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1585 ti = dm_table_get_target(t, i); 1586 1587 if (ti->type->iterate_devices && 1588 ti->type->iterate_devices(ti, 1589 device_dax_write_cache_enabled, NULL)) 1590 return true; 1591 } 1592 1593 return false; 1594 } 1595 1596 static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, 1597 sector_t start, sector_t len, void *data) 1598 { 1599 struct request_queue *q = bdev_get_queue(dev->bdev); 1600 1601 return q && blk_queue_nonrot(q); 1602 } 1603 1604 static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, 1605 sector_t start, sector_t len, void *data) 1606 { 1607 struct request_queue *q = bdev_get_queue(dev->bdev); 1608 1609 return q && !blk_queue_add_random(q); 1610 } 1611 1612 static bool dm_table_all_devices_attribute(struct dm_table *t, 1613 iterate_devices_callout_fn func) 1614 { 1615 struct dm_target *ti; 1616 unsigned i; 1617 1618 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1619 ti = dm_table_get_target(t, i); 1620 1621 if (!ti->type->iterate_devices || 1622 !ti->type->iterate_devices(ti, func, NULL)) 1623 return false; 1624 } 1625 1626 return true; 1627 } 1628 1629 static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, 1630 sector_t start, sector_t len, void *data) 1631 { 1632 struct request_queue *q = bdev_get_queue(dev->bdev); 1633 1634 return q && !q->limits.max_write_same_sectors; 1635 } 1636 1637 static bool dm_table_supports_write_same(struct dm_table *t) 1638 { 1639 struct dm_target *ti; 1640 unsigned i; 1641 1642 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1643 ti = dm_table_get_target(t, i); 1644 1645 if (!ti->num_write_same_bios) 1646 return false; 1647 1648 if (!ti->type->iterate_devices || 1649 ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) 1650 return false; 1651 } 1652 1653 return true; 1654 } 1655 1656 static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev, 1657 sector_t start, sector_t len, void *data) 1658 { 1659 struct request_queue *q = bdev_get_queue(dev->bdev); 1660 1661 return q && !q->limits.max_write_zeroes_sectors; 1662 } 1663 1664 static bool dm_table_supports_write_zeroes(struct dm_table *t) 1665 { 1666 struct dm_target *ti; 1667 unsigned i = 0; 1668 1669 while (i < dm_table_get_num_targets(t)) { 1670 ti = dm_table_get_target(t, i++); 1671 1672 if (!ti->num_write_zeroes_bios) 1673 return false; 1674 1675 if (!ti->type->iterate_devices || 1676 ti->type->iterate_devices(ti, device_not_write_zeroes_capable, NULL)) 1677 return false; 1678 } 1679 1680 return true; 1681 } 1682 1683 static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev, 1684 sector_t start, sector_t len, void *data) 1685 { 1686 struct request_queue *q = bdev_get_queue(dev->bdev); 1687 1688 return q && !blk_queue_nowait(q); 1689 } 1690 1691 static bool dm_table_supports_nowait(struct dm_table *t) 1692 { 1693 struct dm_target *ti; 1694 unsigned i = 0; 1695 1696 while (i < dm_table_get_num_targets(t)) { 1697 ti = dm_table_get_target(t, i++); 1698 1699 if (!dm_target_supports_nowait(ti->type)) 1700 return false; 1701 1702 if (!ti->type->iterate_devices || 1703 ti->type->iterate_devices(ti, device_not_nowait_capable, NULL)) 1704 return false; 1705 } 1706 1707 return true; 1708 } 1709 1710 static int device_not_discard_capable(struct dm_target *ti, struct dm_dev *dev, 1711 sector_t start, sector_t len, void *data) 1712 { 1713 struct request_queue *q = bdev_get_queue(dev->bdev); 1714 1715 return q && !blk_queue_discard(q); 1716 } 1717 1718 static bool dm_table_supports_discards(struct dm_table *t) 1719 { 1720 struct dm_target *ti; 1721 unsigned i; 1722 1723 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1724 ti = dm_table_get_target(t, i); 1725 1726 if (!ti->num_discard_bios) 1727 return false; 1728 1729 /* 1730 * Either the target provides discard support (as implied by setting 1731 * 'discards_supported') or it relies on _all_ data devices having 1732 * discard support. 1733 */ 1734 if (!ti->discards_supported && 1735 (!ti->type->iterate_devices || 1736 ti->type->iterate_devices(ti, device_not_discard_capable, NULL))) 1737 return false; 1738 } 1739 1740 return true; 1741 } 1742 1743 static int device_not_secure_erase_capable(struct dm_target *ti, 1744 struct dm_dev *dev, sector_t start, 1745 sector_t len, void *data) 1746 { 1747 struct request_queue *q = bdev_get_queue(dev->bdev); 1748 1749 return q && !blk_queue_secure_erase(q); 1750 } 1751 1752 static bool dm_table_supports_secure_erase(struct dm_table *t) 1753 { 1754 struct dm_target *ti; 1755 unsigned int i; 1756 1757 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1758 ti = dm_table_get_target(t, i); 1759 1760 if (!ti->num_secure_erase_bios) 1761 return false; 1762 1763 if (!ti->type->iterate_devices || 1764 ti->type->iterate_devices(ti, device_not_secure_erase_capable, NULL)) 1765 return false; 1766 } 1767 1768 return true; 1769 } 1770 1771 static int device_requires_stable_pages(struct dm_target *ti, 1772 struct dm_dev *dev, sector_t start, 1773 sector_t len, void *data) 1774 { 1775 struct request_queue *q = bdev_get_queue(dev->bdev); 1776 1777 return q && blk_queue_stable_writes(q); 1778 } 1779 1780 /* 1781 * If any underlying device requires stable pages, a table must require 1782 * them as well. Only targets that support iterate_devices are considered: 1783 * don't want error, zero, etc to require stable pages. 1784 */ 1785 static bool dm_table_requires_stable_pages(struct dm_table *t) 1786 { 1787 struct dm_target *ti; 1788 unsigned i; 1789 1790 for (i = 0; i < dm_table_get_num_targets(t); i++) { 1791 ti = dm_table_get_target(t, i); 1792 1793 if (ti->type->iterate_devices && 1794 ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) 1795 return true; 1796 } 1797 1798 return false; 1799 } 1800 1801 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 1802 struct queue_limits *limits) 1803 { 1804 bool wc = false, fua = false; 1805 int page_size = PAGE_SIZE; 1806 1807 /* 1808 * Copy table's limits to the DM device's request_queue 1809 */ 1810 q->limits = *limits; 1811 1812 if (dm_table_supports_nowait(t)) 1813 blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q); 1814 else 1815 blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q); 1816 1817 if (!dm_table_supports_discards(t)) { 1818 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); 1819 /* Must also clear discard limits... */ 1820 q->limits.max_discard_sectors = 0; 1821 q->limits.max_hw_discard_sectors = 0; 1822 q->limits.discard_granularity = 0; 1823 q->limits.discard_alignment = 0; 1824 q->limits.discard_misaligned = 0; 1825 } else 1826 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 1827 1828 if (dm_table_supports_secure_erase(t)) 1829 blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); 1830 1831 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { 1832 wc = true; 1833 if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA))) 1834 fua = true; 1835 } 1836 blk_queue_write_cache(q, wc, fua); 1837 1838 if (dm_table_supports_dax(t, device_supports_dax, &page_size)) { 1839 blk_queue_flag_set(QUEUE_FLAG_DAX, q); 1840 if (dm_table_supports_dax(t, device_dax_synchronous, NULL)) 1841 set_dax_synchronous(t->md->dax_dev); 1842 } 1843 else 1844 blk_queue_flag_clear(QUEUE_FLAG_DAX, q); 1845 1846 if (dm_table_supports_dax_write_cache(t)) 1847 dax_write_cache(t->md->dax_dev, true); 1848 1849 /* Ensure that all underlying devices are non-rotational. */ 1850 if (dm_table_all_devices_attribute(t, device_is_nonrot)) 1851 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 1852 else 1853 blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); 1854 1855 if (!dm_table_supports_write_same(t)) 1856 q->limits.max_write_same_sectors = 0; 1857 if (!dm_table_supports_write_zeroes(t)) 1858 q->limits.max_write_zeroes_sectors = 0; 1859 1860 dm_table_verify_integrity(t); 1861 1862 /* 1863 * Some devices don't use blk_integrity but still want stable pages 1864 * because they do their own checksumming. 1865 */ 1866 if (dm_table_requires_stable_pages(t)) 1867 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); 1868 else 1869 blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); 1870 1871 /* 1872 * Determine whether or not this queue's I/O timings contribute 1873 * to the entropy pool, Only request-based targets use this. 1874 * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not 1875 * have it set. 1876 */ 1877 if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) 1878 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); 1879 1880 /* 1881 * For a zoned target, the number of zones should be updated for the 1882 * correct value to be exposed in sysfs queue/nr_zones. For a BIO based 1883 * target, this is all that is needed. 1884 */ 1885 #ifdef CONFIG_BLK_DEV_ZONED 1886 if (blk_queue_is_zoned(q)) { 1887 WARN_ON_ONCE(queue_is_mq(q)); 1888 q->nr_zones = blkdev_nr_zones(t->md->disk); 1889 } 1890 #endif 1891 1892 blk_queue_update_readahead(q); 1893 } 1894 1895 unsigned int dm_table_get_num_targets(struct dm_table *t) 1896 { 1897 return t->num_targets; 1898 } 1899 1900 struct list_head *dm_table_get_devices(struct dm_table *t) 1901 { 1902 return &t->devices; 1903 } 1904 1905 fmode_t dm_table_get_mode(struct dm_table *t) 1906 { 1907 return t->mode; 1908 } 1909 EXPORT_SYMBOL(dm_table_get_mode); 1910 1911 enum suspend_mode { 1912 PRESUSPEND, 1913 PRESUSPEND_UNDO, 1914 POSTSUSPEND, 1915 }; 1916 1917 static void suspend_targets(struct dm_table *t, enum suspend_mode mode) 1918 { 1919 int i = t->num_targets; 1920 struct dm_target *ti = t->targets; 1921 1922 lockdep_assert_held(&t->md->suspend_lock); 1923 1924 while (i--) { 1925 switch (mode) { 1926 case PRESUSPEND: 1927 if (ti->type->presuspend) 1928 ti->type->presuspend(ti); 1929 break; 1930 case PRESUSPEND_UNDO: 1931 if (ti->type->presuspend_undo) 1932 ti->type->presuspend_undo(ti); 1933 break; 1934 case POSTSUSPEND: 1935 if (ti->type->postsuspend) 1936 ti->type->postsuspend(ti); 1937 break; 1938 } 1939 ti++; 1940 } 1941 } 1942 1943 void dm_table_presuspend_targets(struct dm_table *t) 1944 { 1945 if (!t) 1946 return; 1947 1948 suspend_targets(t, PRESUSPEND); 1949 } 1950 1951 void dm_table_presuspend_undo_targets(struct dm_table *t) 1952 { 1953 if (!t) 1954 return; 1955 1956 suspend_targets(t, PRESUSPEND_UNDO); 1957 } 1958 1959 void dm_table_postsuspend_targets(struct dm_table *t) 1960 { 1961 if (!t) 1962 return; 1963 1964 suspend_targets(t, POSTSUSPEND); 1965 } 1966 1967 int dm_table_resume_targets(struct dm_table *t) 1968 { 1969 int i, r = 0; 1970 1971 lockdep_assert_held(&t->md->suspend_lock); 1972 1973 for (i = 0; i < t->num_targets; i++) { 1974 struct dm_target *ti = t->targets + i; 1975 1976 if (!ti->type->preresume) 1977 continue; 1978 1979 r = ti->type->preresume(ti); 1980 if (r) { 1981 DMERR("%s: %s: preresume failed, error = %d", 1982 dm_device_name(t->md), ti->type->name, r); 1983 return r; 1984 } 1985 } 1986 1987 for (i = 0; i < t->num_targets; i++) { 1988 struct dm_target *ti = t->targets + i; 1989 1990 if (ti->type->resume) 1991 ti->type->resume(ti); 1992 } 1993 1994 return 0; 1995 } 1996 1997 struct mapped_device *dm_table_get_md(struct dm_table *t) 1998 { 1999 return t->md; 2000 } 2001 EXPORT_SYMBOL(dm_table_get_md); 2002 2003 const char *dm_table_device_name(struct dm_table *t) 2004 { 2005 return dm_device_name(t->md); 2006 } 2007 EXPORT_SYMBOL_GPL(dm_table_device_name); 2008 2009 void dm_table_run_md_queue_async(struct dm_table *t) 2010 { 2011 if (!dm_table_request_based(t)) 2012 return; 2013 2014 if (t->md->queue) 2015 blk_mq_run_hw_queues(t->md->queue, true); 2016 } 2017 EXPORT_SYMBOL(dm_table_run_md_queue_async); 2018 2019