1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * gendisk handling 4 * 5 * Portions Copyright (C) 2020 Christoph Hellwig 6 */ 7 8 #include <linux/module.h> 9 #include <linux/ctype.h> 10 #include <linux/fs.h> 11 #include <linux/genhd.h> 12 #include <linux/kdev_t.h> 13 #include <linux/kernel.h> 14 #include <linux/blkdev.h> 15 #include <linux/backing-dev.h> 16 #include <linux/init.h> 17 #include <linux/spinlock.h> 18 #include <linux/proc_fs.h> 19 #include <linux/seq_file.h> 20 #include <linux/slab.h> 21 #include <linux/kmod.h> 22 #include <linux/mutex.h> 23 #include <linux/idr.h> 24 #include <linux/log2.h> 25 #include <linux/pm_runtime.h> 26 #include <linux/badblocks.h> 27 28 #include "blk.h" 29 30 static struct kobject *block_depr; 31 32 DECLARE_RWSEM(bdev_lookup_sem); 33 34 /* for extended dynamic devt allocation, currently only one major is used */ 35 #define NR_EXT_DEVT (1 << MINORBITS) 36 static DEFINE_IDA(ext_devt_ida); 37 38 static void disk_check_events(struct disk_events *ev, 39 unsigned int *clearing_ptr); 40 static void disk_alloc_events(struct gendisk *disk); 41 static void disk_add_events(struct gendisk *disk); 42 static void disk_del_events(struct gendisk *disk); 43 static void disk_release_events(struct gendisk *disk); 44 45 void set_capacity(struct gendisk *disk, sector_t sectors) 46 { 47 struct block_device *bdev = disk->part0; 48 unsigned long flags; 49 50 spin_lock_irqsave(&bdev->bd_size_lock, flags); 51 i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); 52 spin_unlock_irqrestore(&bdev->bd_size_lock, flags); 53 } 54 EXPORT_SYMBOL(set_capacity); 55 56 /* 57 * Set disk capacity and notify if the size is not currently zero and will not 58 * be set to zero. Returns true if a uevent was sent, otherwise false. 59 */ 60 bool set_capacity_and_notify(struct gendisk *disk, sector_t size) 61 { 62 sector_t capacity = get_capacity(disk); 63 char *envp[] = { "RESIZE=1", NULL }; 64 65 set_capacity(disk, size); 66 67 /* 68 * Only print a message and send a uevent if the gendisk is user visible 69 * and alive. This avoids spamming the log and udev when setting the 70 * initial capacity during probing. 71 */ 72 if (size == capacity || 73 (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) 74 return false; 75 76 pr_info("%s: detected capacity change from %lld to %lld\n", 77 disk->disk_name, size, capacity); 78 79 /* 80 * Historically we did not send a uevent for changes to/from an empty 81 * device. 82 */ 83 if (!capacity || !size) 84 return false; 85 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 86 return true; 87 } 88 EXPORT_SYMBOL_GPL(set_capacity_and_notify); 89 90 /* 91 * Format the device name of the indicated disk into the supplied buffer and 92 * return a pointer to that same buffer for convenience. 93 */ 94 char *disk_name(struct gendisk *hd, int partno, char *buf) 95 { 96 if (!partno) 97 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 98 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 99 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 100 else 101 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 102 103 return buf; 104 } 105 106 const char *bdevname(struct block_device *bdev, char *buf) 107 { 108 return disk_name(bdev->bd_disk, bdev->bd_partno, buf); 109 } 110 EXPORT_SYMBOL(bdevname); 111 112 static void part_stat_read_all(struct block_device *part, 113 struct disk_stats *stat) 114 { 115 int cpu; 116 117 memset(stat, 0, sizeof(struct disk_stats)); 118 for_each_possible_cpu(cpu) { 119 struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); 120 int group; 121 122 for (group = 0; group < NR_STAT_GROUPS; group++) { 123 stat->nsecs[group] += ptr->nsecs[group]; 124 stat->sectors[group] += ptr->sectors[group]; 125 stat->ios[group] += ptr->ios[group]; 126 stat->merges[group] += ptr->merges[group]; 127 } 128 129 stat->io_ticks += ptr->io_ticks; 130 } 131 } 132 133 static unsigned int part_in_flight(struct block_device *part) 134 { 135 unsigned int inflight = 0; 136 int cpu; 137 138 for_each_possible_cpu(cpu) { 139 inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) + 140 part_stat_local_read_cpu(part, in_flight[1], cpu); 141 } 142 if ((int)inflight < 0) 143 inflight = 0; 144 145 return inflight; 146 } 147 148 static void part_in_flight_rw(struct block_device *part, 149 unsigned int inflight[2]) 150 { 151 int cpu; 152 153 inflight[0] = 0; 154 inflight[1] = 0; 155 for_each_possible_cpu(cpu) { 156 inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu); 157 inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu); 158 } 159 if ((int)inflight[0] < 0) 160 inflight[0] = 0; 161 if ((int)inflight[1] < 0) 162 inflight[1] = 0; 163 } 164 165 struct block_device *__disk_get_part(struct gendisk *disk, int partno) 166 { 167 struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); 168 169 if (unlikely(partno < 0 || partno >= ptbl->len)) 170 return NULL; 171 return rcu_dereference(ptbl->part[partno]); 172 } 173 174 /** 175 * disk_part_iter_init - initialize partition iterator 176 * @piter: iterator to initialize 177 * @disk: disk to iterate over 178 * @flags: DISK_PITER_* flags 179 * 180 * Initialize @piter so that it iterates over partitions of @disk. 181 * 182 * CONTEXT: 183 * Don't care. 184 */ 185 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 186 unsigned int flags) 187 { 188 struct disk_part_tbl *ptbl; 189 190 rcu_read_lock(); 191 ptbl = rcu_dereference(disk->part_tbl); 192 193 piter->disk = disk; 194 piter->part = NULL; 195 196 if (flags & DISK_PITER_REVERSE) 197 piter->idx = ptbl->len - 1; 198 else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) 199 piter->idx = 0; 200 else 201 piter->idx = 1; 202 203 piter->flags = flags; 204 205 rcu_read_unlock(); 206 } 207 EXPORT_SYMBOL_GPL(disk_part_iter_init); 208 209 /** 210 * disk_part_iter_next - proceed iterator to the next partition and return it 211 * @piter: iterator of interest 212 * 213 * Proceed @piter to the next partition and return it. 214 * 215 * CONTEXT: 216 * Don't care. 217 */ 218 struct block_device *disk_part_iter_next(struct disk_part_iter *piter) 219 { 220 struct disk_part_tbl *ptbl; 221 int inc, end; 222 223 /* put the last partition */ 224 disk_part_iter_exit(piter); 225 226 /* get part_tbl */ 227 rcu_read_lock(); 228 ptbl = rcu_dereference(piter->disk->part_tbl); 229 230 /* determine iteration parameters */ 231 if (piter->flags & DISK_PITER_REVERSE) { 232 inc = -1; 233 if (piter->flags & (DISK_PITER_INCL_PART0 | 234 DISK_PITER_INCL_EMPTY_PART0)) 235 end = -1; 236 else 237 end = 0; 238 } else { 239 inc = 1; 240 end = ptbl->len; 241 } 242 243 /* iterate to the next partition */ 244 for (; piter->idx != end; piter->idx += inc) { 245 struct block_device *part; 246 247 part = rcu_dereference(ptbl->part[piter->idx]); 248 if (!part) 249 continue; 250 piter->part = bdgrab(part); 251 if (!piter->part) 252 continue; 253 if (!bdev_nr_sectors(part) && 254 !(piter->flags & DISK_PITER_INCL_EMPTY) && 255 !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 256 piter->idx == 0)) { 257 bdput(piter->part); 258 piter->part = NULL; 259 continue; 260 } 261 262 piter->idx += inc; 263 break; 264 } 265 266 rcu_read_unlock(); 267 268 return piter->part; 269 } 270 EXPORT_SYMBOL_GPL(disk_part_iter_next); 271 272 /** 273 * disk_part_iter_exit - finish up partition iteration 274 * @piter: iter of interest 275 * 276 * Called when iteration is over. Cleans up @piter. 277 * 278 * CONTEXT: 279 * Don't care. 280 */ 281 void disk_part_iter_exit(struct disk_part_iter *piter) 282 { 283 if (piter->part) 284 bdput(piter->part); 285 piter->part = NULL; 286 } 287 EXPORT_SYMBOL_GPL(disk_part_iter_exit); 288 289 static inline int sector_in_part(struct block_device *part, sector_t sector) 290 { 291 return part->bd_start_sect <= sector && 292 sector < part->bd_start_sect + bdev_nr_sectors(part); 293 } 294 295 /** 296 * disk_map_sector_rcu - map sector to partition 297 * @disk: gendisk of interest 298 * @sector: sector to map 299 * 300 * Find out which partition @sector maps to on @disk. This is 301 * primarily used for stats accounting. 302 * 303 * CONTEXT: 304 * RCU read locked. 305 * 306 * RETURNS: 307 * Found partition on success, part0 is returned if no partition matches 308 * or the matched partition is being deleted. 309 */ 310 struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 311 { 312 struct disk_part_tbl *ptbl; 313 struct block_device *part; 314 int i; 315 316 rcu_read_lock(); 317 ptbl = rcu_dereference(disk->part_tbl); 318 319 part = rcu_dereference(ptbl->last_lookup); 320 if (part && sector_in_part(part, sector)) 321 goto out_unlock; 322 323 for (i = 1; i < ptbl->len; i++) { 324 part = rcu_dereference(ptbl->part[i]); 325 if (part && sector_in_part(part, sector)) { 326 rcu_assign_pointer(ptbl->last_lookup, part); 327 goto out_unlock; 328 } 329 } 330 331 part = disk->part0; 332 out_unlock: 333 rcu_read_unlock(); 334 return part; 335 } 336 337 /** 338 * disk_has_partitions 339 * @disk: gendisk of interest 340 * 341 * Walk through the partition table and check if valid partition exists. 342 * 343 * CONTEXT: 344 * Don't care. 345 * 346 * RETURNS: 347 * True if the gendisk has at least one valid non-zero size partition. 348 * Otherwise false. 349 */ 350 bool disk_has_partitions(struct gendisk *disk) 351 { 352 struct disk_part_tbl *ptbl; 353 int i; 354 bool ret = false; 355 356 rcu_read_lock(); 357 ptbl = rcu_dereference(disk->part_tbl); 358 359 /* Iterate partitions skipping the whole device at index 0 */ 360 for (i = 1; i < ptbl->len; i++) { 361 if (rcu_dereference(ptbl->part[i])) { 362 ret = true; 363 break; 364 } 365 } 366 367 rcu_read_unlock(); 368 369 return ret; 370 } 371 EXPORT_SYMBOL_GPL(disk_has_partitions); 372 373 /* 374 * Can be deleted altogether. Later. 375 * 376 */ 377 #define BLKDEV_MAJOR_HASH_SIZE 255 378 static struct blk_major_name { 379 struct blk_major_name *next; 380 int major; 381 char name[16]; 382 void (*probe)(dev_t devt); 383 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 384 static DEFINE_MUTEX(major_names_lock); 385 386 /* index in the above - for now: assume no multimajor ranges */ 387 static inline int major_to_index(unsigned major) 388 { 389 return major % BLKDEV_MAJOR_HASH_SIZE; 390 } 391 392 #ifdef CONFIG_PROC_FS 393 void blkdev_show(struct seq_file *seqf, off_t offset) 394 { 395 struct blk_major_name *dp; 396 397 mutex_lock(&major_names_lock); 398 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) 399 if (dp->major == offset) 400 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 401 mutex_unlock(&major_names_lock); 402 } 403 #endif /* CONFIG_PROC_FS */ 404 405 /** 406 * __register_blkdev - register a new block device 407 * 408 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If 409 * @major = 0, try to allocate any unused major number. 410 * @name: the name of the new block device as a zero terminated string 411 * @probe: allback that is called on access to any minor number of @major 412 * 413 * The @name must be unique within the system. 414 * 415 * The return value depends on the @major input parameter: 416 * 417 * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1] 418 * then the function returns zero on success, or a negative error code 419 * - if any unused major number was requested with @major = 0 parameter 420 * then the return value is the allocated major number in range 421 * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise 422 * 423 * See Documentation/admin-guide/devices.txt for the list of allocated 424 * major numbers. 425 * 426 * Use register_blkdev instead for any new code. 427 */ 428 int __register_blkdev(unsigned int major, const char *name, 429 void (*probe)(dev_t devt)) 430 { 431 struct blk_major_name **n, *p; 432 int index, ret = 0; 433 434 mutex_lock(&major_names_lock); 435 436 /* temporary */ 437 if (major == 0) { 438 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 439 if (major_names[index] == NULL) 440 break; 441 } 442 443 if (index == 0) { 444 printk("%s: failed to get major for %s\n", 445 __func__, name); 446 ret = -EBUSY; 447 goto out; 448 } 449 major = index; 450 ret = major; 451 } 452 453 if (major >= BLKDEV_MAJOR_MAX) { 454 pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n", 455 __func__, major, BLKDEV_MAJOR_MAX-1, name); 456 457 ret = -EINVAL; 458 goto out; 459 } 460 461 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 462 if (p == NULL) { 463 ret = -ENOMEM; 464 goto out; 465 } 466 467 p->major = major; 468 p->probe = probe; 469 strlcpy(p->name, name, sizeof(p->name)); 470 p->next = NULL; 471 index = major_to_index(major); 472 473 for (n = &major_names[index]; *n; n = &(*n)->next) { 474 if ((*n)->major == major) 475 break; 476 } 477 if (!*n) 478 *n = p; 479 else 480 ret = -EBUSY; 481 482 if (ret < 0) { 483 printk("register_blkdev: cannot get major %u for %s\n", 484 major, name); 485 kfree(p); 486 } 487 out: 488 mutex_unlock(&major_names_lock); 489 return ret; 490 } 491 EXPORT_SYMBOL(__register_blkdev); 492 493 void unregister_blkdev(unsigned int major, const char *name) 494 { 495 struct blk_major_name **n; 496 struct blk_major_name *p = NULL; 497 int index = major_to_index(major); 498 499 mutex_lock(&major_names_lock); 500 for (n = &major_names[index]; *n; n = &(*n)->next) 501 if ((*n)->major == major) 502 break; 503 if (!*n || strcmp((*n)->name, name)) { 504 WARN_ON(1); 505 } else { 506 p = *n; 507 *n = p->next; 508 } 509 mutex_unlock(&major_names_lock); 510 kfree(p); 511 } 512 513 EXPORT_SYMBOL(unregister_blkdev); 514 515 /** 516 * blk_mangle_minor - scatter minor numbers apart 517 * @minor: minor number to mangle 518 * 519 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 520 * is enabled. Mangling twice gives the original value. 521 * 522 * RETURNS: 523 * Mangled value. 524 * 525 * CONTEXT: 526 * Don't care. 527 */ 528 static int blk_mangle_minor(int minor) 529 { 530 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 531 int i; 532 533 for (i = 0; i < MINORBITS / 2; i++) { 534 int low = minor & (1 << i); 535 int high = minor & (1 << (MINORBITS - 1 - i)); 536 int distance = MINORBITS - 1 - 2 * i; 537 538 minor ^= low | high; /* clear both bits */ 539 low <<= distance; /* swap the positions */ 540 high >>= distance; 541 minor |= low | high; /* and set */ 542 } 543 #endif 544 return minor; 545 } 546 547 /** 548 * blk_alloc_devt - allocate a dev_t for a block device 549 * @bdev: block device to allocate dev_t for 550 * @devt: out parameter for resulting dev_t 551 * 552 * Allocate a dev_t for block device. 553 * 554 * RETURNS: 555 * 0 on success, allocated dev_t is returned in *@devt. -errno on 556 * failure. 557 * 558 * CONTEXT: 559 * Might sleep. 560 */ 561 int blk_alloc_devt(struct block_device *bdev, dev_t *devt) 562 { 563 struct gendisk *disk = bdev->bd_disk; 564 int idx; 565 566 /* in consecutive minor range? */ 567 if (bdev->bd_partno < disk->minors) { 568 *devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno); 569 return 0; 570 } 571 572 idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); 573 if (idx < 0) 574 return idx == -ENOSPC ? -EBUSY : idx; 575 576 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 577 return 0; 578 } 579 580 /** 581 * blk_free_devt - free a dev_t 582 * @devt: dev_t to free 583 * 584 * Free @devt which was allocated using blk_alloc_devt(). 585 * 586 * CONTEXT: 587 * Might sleep. 588 */ 589 void blk_free_devt(dev_t devt) 590 { 591 if (MAJOR(devt) == BLOCK_EXT_MAJOR) 592 ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt))); 593 } 594 595 static char *bdevt_str(dev_t devt, char *buf) 596 { 597 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 598 char tbuf[BDEVT_SIZE]; 599 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 600 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 601 } else 602 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 603 604 return buf; 605 } 606 607 static void disk_scan_partitions(struct gendisk *disk) 608 { 609 struct block_device *bdev; 610 611 if (!get_capacity(disk) || !disk_part_scan_enabled(disk)) 612 return; 613 614 set_bit(GD_NEED_PART_SCAN, &disk->state); 615 bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL); 616 if (!IS_ERR(bdev)) 617 blkdev_put(bdev, FMODE_READ); 618 } 619 620 static void register_disk(struct device *parent, struct gendisk *disk, 621 const struct attribute_group **groups) 622 { 623 struct device *ddev = disk_to_dev(disk); 624 struct disk_part_iter piter; 625 struct block_device *part; 626 int err; 627 628 ddev->parent = parent; 629 630 dev_set_name(ddev, "%s", disk->disk_name); 631 632 /* delay uevents, until we scanned partition table */ 633 dev_set_uevent_suppress(ddev, 1); 634 635 if (groups) { 636 WARN_ON(ddev->groups); 637 ddev->groups = groups; 638 } 639 if (device_add(ddev)) 640 return; 641 if (!sysfs_deprecated) { 642 err = sysfs_create_link(block_depr, &ddev->kobj, 643 kobject_name(&ddev->kobj)); 644 if (err) { 645 device_del(ddev); 646 return; 647 } 648 } 649 650 /* 651 * avoid probable deadlock caused by allocating memory with 652 * GFP_KERNEL in runtime_resume callback of its all ancestor 653 * devices 654 */ 655 pm_runtime_set_memalloc_noio(ddev, true); 656 657 disk->part0->bd_holder_dir = 658 kobject_create_and_add("holders", &ddev->kobj); 659 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 660 661 if (disk->flags & GENHD_FL_HIDDEN) { 662 dev_set_uevent_suppress(ddev, 0); 663 return; 664 } 665 666 disk_scan_partitions(disk); 667 668 /* announce disk after possible partitions are created */ 669 dev_set_uevent_suppress(ddev, 0); 670 kobject_uevent(&ddev->kobj, KOBJ_ADD); 671 672 /* announce possible partitions */ 673 disk_part_iter_init(&piter, disk, 0); 674 while ((part = disk_part_iter_next(&piter))) 675 kobject_uevent(bdev_kobj(part), KOBJ_ADD); 676 disk_part_iter_exit(&piter); 677 678 if (disk->queue->backing_dev_info->dev) { 679 err = sysfs_create_link(&ddev->kobj, 680 &disk->queue->backing_dev_info->dev->kobj, 681 "bdi"); 682 WARN_ON(err); 683 } 684 } 685 686 /** 687 * __device_add_disk - add disk information to kernel list 688 * @parent: parent device for the disk 689 * @disk: per-device partitioning information 690 * @groups: Additional per-device sysfs groups 691 * @register_queue: register the queue if set to true 692 * 693 * This function registers the partitioning information in @disk 694 * with the kernel. 695 * 696 * FIXME: error handling 697 */ 698 static void __device_add_disk(struct device *parent, struct gendisk *disk, 699 const struct attribute_group **groups, 700 bool register_queue) 701 { 702 dev_t devt; 703 int retval; 704 705 /* 706 * The disk queue should now be all set with enough information about 707 * the device for the elevator code to pick an adequate default 708 * elevator if one is needed, that is, for devices requesting queue 709 * registration. 710 */ 711 if (register_queue) 712 elevator_init_mq(disk->queue); 713 714 /* minors == 0 indicates to use ext devt from part0 and should 715 * be accompanied with EXT_DEVT flag. Make sure all 716 * parameters make sense. 717 */ 718 WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 719 WARN_ON(!disk->minors && 720 !(disk->flags & (GENHD_FL_EXT_DEVT | GENHD_FL_HIDDEN))); 721 722 disk->flags |= GENHD_FL_UP; 723 724 retval = blk_alloc_devt(disk->part0, &devt); 725 if (retval) { 726 WARN_ON(1); 727 return; 728 } 729 disk->major = MAJOR(devt); 730 disk->first_minor = MINOR(devt); 731 732 disk_alloc_events(disk); 733 734 if (disk->flags & GENHD_FL_HIDDEN) { 735 /* 736 * Don't let hidden disks show up in /proc/partitions, 737 * and don't bother scanning for partitions either. 738 */ 739 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; 740 disk->flags |= GENHD_FL_NO_PART_SCAN; 741 } else { 742 struct backing_dev_info *bdi = disk->queue->backing_dev_info; 743 struct device *dev = disk_to_dev(disk); 744 int ret; 745 746 /* Register BDI before referencing it from bdev */ 747 dev->devt = devt; 748 ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); 749 WARN_ON(ret); 750 bdi_set_owner(bdi, dev); 751 bdev_add(disk->part0, devt); 752 } 753 register_disk(parent, disk, groups); 754 if (register_queue) 755 blk_register_queue(disk); 756 757 /* 758 * Take an extra ref on queue which will be put on disk_release() 759 * so that it sticks around as long as @disk is there. 760 */ 761 WARN_ON_ONCE(!blk_get_queue(disk->queue)); 762 763 disk_add_events(disk); 764 blk_integrity_add(disk); 765 } 766 767 void device_add_disk(struct device *parent, struct gendisk *disk, 768 const struct attribute_group **groups) 769 770 { 771 __device_add_disk(parent, disk, groups, true); 772 } 773 EXPORT_SYMBOL(device_add_disk); 774 775 void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) 776 { 777 __device_add_disk(parent, disk, NULL, false); 778 } 779 EXPORT_SYMBOL(device_add_disk_no_queue_reg); 780 781 static void invalidate_partition(struct block_device *bdev) 782 { 783 fsync_bdev(bdev); 784 __invalidate_device(bdev, true); 785 786 /* 787 * Unhash the bdev inode for this device so that it can't be looked 788 * up any more even if openers still hold references to it. 789 */ 790 remove_inode_hash(bdev->bd_inode); 791 } 792 793 /** 794 * del_gendisk - remove the gendisk 795 * @disk: the struct gendisk to remove 796 * 797 * Removes the gendisk and all its associated resources. This deletes the 798 * partitions associated with the gendisk, and unregisters the associated 799 * request_queue. 800 * 801 * This is the counter to the respective __device_add_disk() call. 802 * 803 * The final removal of the struct gendisk happens when its refcount reaches 0 804 * with put_disk(), which should be called after del_gendisk(), if 805 * __device_add_disk() was used. 806 * 807 * Drivers exist which depend on the release of the gendisk to be synchronous, 808 * it should not be deferred. 809 * 810 * Context: can sleep 811 */ 812 void del_gendisk(struct gendisk *disk) 813 { 814 struct disk_part_iter piter; 815 struct block_device *part; 816 817 might_sleep(); 818 819 if (WARN_ON_ONCE(!disk->queue)) 820 return; 821 822 blk_integrity_del(disk); 823 disk_del_events(disk); 824 825 /* 826 * Block lookups of the disk until all bdevs are unhashed and the 827 * disk is marked as dead (GENHD_FL_UP cleared). 828 */ 829 down_write(&bdev_lookup_sem); 830 831 /* invalidate stuff */ 832 disk_part_iter_init(&piter, disk, 833 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 834 while ((part = disk_part_iter_next(&piter))) { 835 invalidate_partition(part); 836 delete_partition(part); 837 } 838 disk_part_iter_exit(&piter); 839 840 invalidate_partition(disk->part0); 841 set_capacity(disk, 0); 842 disk->flags &= ~GENHD_FL_UP; 843 up_write(&bdev_lookup_sem); 844 845 if (!(disk->flags & GENHD_FL_HIDDEN)) { 846 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 847 848 /* 849 * Unregister bdi before releasing device numbers (as they can 850 * get reused and we'd get clashes in sysfs). 851 */ 852 bdi_unregister(disk->queue->backing_dev_info); 853 } 854 855 blk_unregister_queue(disk); 856 857 kobject_put(disk->part0->bd_holder_dir); 858 kobject_put(disk->slave_dir); 859 860 part_stat_set_all(disk->part0, 0); 861 disk->part0->bd_stamp = 0; 862 if (!sysfs_deprecated) 863 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 864 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 865 device_del(disk_to_dev(disk)); 866 } 867 EXPORT_SYMBOL(del_gendisk); 868 869 /* sysfs access to bad-blocks list. */ 870 static ssize_t disk_badblocks_show(struct device *dev, 871 struct device_attribute *attr, 872 char *page) 873 { 874 struct gendisk *disk = dev_to_disk(dev); 875 876 if (!disk->bb) 877 return sprintf(page, "\n"); 878 879 return badblocks_show(disk->bb, page, 0); 880 } 881 882 static ssize_t disk_badblocks_store(struct device *dev, 883 struct device_attribute *attr, 884 const char *page, size_t len) 885 { 886 struct gendisk *disk = dev_to_disk(dev); 887 888 if (!disk->bb) 889 return -ENXIO; 890 891 return badblocks_store(disk->bb, page, len, 0); 892 } 893 894 void blk_request_module(dev_t devt) 895 { 896 unsigned int major = MAJOR(devt); 897 struct blk_major_name **n; 898 899 mutex_lock(&major_names_lock); 900 for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { 901 if ((*n)->major == major && (*n)->probe) { 902 (*n)->probe(devt); 903 mutex_unlock(&major_names_lock); 904 return; 905 } 906 } 907 mutex_unlock(&major_names_lock); 908 909 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 910 /* Make old-style 2.4 aliases work */ 911 request_module("block-major-%d", MAJOR(devt)); 912 } 913 914 /** 915 * bdget_disk - do bdget() by gendisk and partition number 916 * @disk: gendisk of interest 917 * @partno: partition number 918 * 919 * Find partition @partno from @disk, do bdget() on it. 920 * 921 * CONTEXT: 922 * Don't care. 923 * 924 * RETURNS: 925 * Resulting block_device on success, NULL on failure. 926 */ 927 struct block_device *bdget_disk(struct gendisk *disk, int partno) 928 { 929 struct block_device *bdev = NULL; 930 931 rcu_read_lock(); 932 bdev = __disk_get_part(disk, partno); 933 if (bdev && !bdgrab(bdev)) 934 bdev = NULL; 935 rcu_read_unlock(); 936 937 return bdev; 938 } 939 940 /* 941 * print a full list of all partitions - intended for places where the root 942 * filesystem can't be mounted and thus to give the victim some idea of what 943 * went wrong 944 */ 945 void __init printk_all_partitions(void) 946 { 947 struct class_dev_iter iter; 948 struct device *dev; 949 950 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 951 while ((dev = class_dev_iter_next(&iter))) { 952 struct gendisk *disk = dev_to_disk(dev); 953 struct disk_part_iter piter; 954 struct block_device *part; 955 char name_buf[BDEVNAME_SIZE]; 956 char devt_buf[BDEVT_SIZE]; 957 958 /* 959 * Don't show empty devices or things that have been 960 * suppressed 961 */ 962 if (get_capacity(disk) == 0 || 963 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 964 continue; 965 966 /* 967 * Note, unlike /proc/partitions, I am showing the 968 * numbers in hex - the same format as the root= 969 * option takes. 970 */ 971 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 972 while ((part = disk_part_iter_next(&piter))) { 973 bool is_part0 = part == disk->part0; 974 975 printk("%s%s %10llu %s %s", is_part0 ? "" : " ", 976 bdevt_str(part->bd_dev, devt_buf), 977 bdev_nr_sectors(part) >> 1, 978 disk_name(disk, part->bd_partno, name_buf), 979 part->bd_meta_info ? 980 part->bd_meta_info->uuid : ""); 981 if (is_part0) { 982 if (dev->parent && dev->parent->driver) 983 printk(" driver: %s\n", 984 dev->parent->driver->name); 985 else 986 printk(" (driver?)\n"); 987 } else 988 printk("\n"); 989 } 990 disk_part_iter_exit(&piter); 991 } 992 class_dev_iter_exit(&iter); 993 } 994 995 #ifdef CONFIG_PROC_FS 996 /* iterator */ 997 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 998 { 999 loff_t skip = *pos; 1000 struct class_dev_iter *iter; 1001 struct device *dev; 1002 1003 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 1004 if (!iter) 1005 return ERR_PTR(-ENOMEM); 1006 1007 seqf->private = iter; 1008 class_dev_iter_init(iter, &block_class, NULL, &disk_type); 1009 do { 1010 dev = class_dev_iter_next(iter); 1011 if (!dev) 1012 return NULL; 1013 } while (skip--); 1014 1015 return dev_to_disk(dev); 1016 } 1017 1018 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 1019 { 1020 struct device *dev; 1021 1022 (*pos)++; 1023 dev = class_dev_iter_next(seqf->private); 1024 if (dev) 1025 return dev_to_disk(dev); 1026 1027 return NULL; 1028 } 1029 1030 static void disk_seqf_stop(struct seq_file *seqf, void *v) 1031 { 1032 struct class_dev_iter *iter = seqf->private; 1033 1034 /* stop is called even after start failed :-( */ 1035 if (iter) { 1036 class_dev_iter_exit(iter); 1037 kfree(iter); 1038 seqf->private = NULL; 1039 } 1040 } 1041 1042 static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 1043 { 1044 void *p; 1045 1046 p = disk_seqf_start(seqf, pos); 1047 if (!IS_ERR_OR_NULL(p) && !*pos) 1048 seq_puts(seqf, "major minor #blocks name\n\n"); 1049 return p; 1050 } 1051 1052 static int show_partition(struct seq_file *seqf, void *v) 1053 { 1054 struct gendisk *sgp = v; 1055 struct disk_part_iter piter; 1056 struct block_device *part; 1057 char buf[BDEVNAME_SIZE]; 1058 1059 /* Don't show non-partitionable removeable devices or empty devices */ 1060 if (!get_capacity(sgp) || (!disk_max_parts(sgp) && 1061 (sgp->flags & GENHD_FL_REMOVABLE))) 1062 return 0; 1063 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 1064 return 0; 1065 1066 /* show the full disk and all non-0 size partitions of it */ 1067 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 1068 while ((part = disk_part_iter_next(&piter))) 1069 seq_printf(seqf, "%4d %7d %10llu %s\n", 1070 MAJOR(part->bd_dev), MINOR(part->bd_dev), 1071 bdev_nr_sectors(part) >> 1, 1072 disk_name(sgp, part->bd_partno, buf)); 1073 disk_part_iter_exit(&piter); 1074 1075 return 0; 1076 } 1077 1078 static const struct seq_operations partitions_op = { 1079 .start = show_partition_start, 1080 .next = disk_seqf_next, 1081 .stop = disk_seqf_stop, 1082 .show = show_partition 1083 }; 1084 #endif 1085 1086 static int __init genhd_device_init(void) 1087 { 1088 int error; 1089 1090 block_class.dev_kobj = sysfs_dev_block_kobj; 1091 error = class_register(&block_class); 1092 if (unlikely(error)) 1093 return error; 1094 blk_dev_init(); 1095 1096 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 1097 1098 /* create top-level block dir */ 1099 if (!sysfs_deprecated) 1100 block_depr = kobject_create_and_add("block", NULL); 1101 return 0; 1102 } 1103 1104 subsys_initcall(genhd_device_init); 1105 1106 static ssize_t disk_range_show(struct device *dev, 1107 struct device_attribute *attr, char *buf) 1108 { 1109 struct gendisk *disk = dev_to_disk(dev); 1110 1111 return sprintf(buf, "%d\n", disk->minors); 1112 } 1113 1114 static ssize_t disk_ext_range_show(struct device *dev, 1115 struct device_attribute *attr, char *buf) 1116 { 1117 struct gendisk *disk = dev_to_disk(dev); 1118 1119 return sprintf(buf, "%d\n", disk_max_parts(disk)); 1120 } 1121 1122 static ssize_t disk_removable_show(struct device *dev, 1123 struct device_attribute *attr, char *buf) 1124 { 1125 struct gendisk *disk = dev_to_disk(dev); 1126 1127 return sprintf(buf, "%d\n", 1128 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 1129 } 1130 1131 static ssize_t disk_hidden_show(struct device *dev, 1132 struct device_attribute *attr, char *buf) 1133 { 1134 struct gendisk *disk = dev_to_disk(dev); 1135 1136 return sprintf(buf, "%d\n", 1137 (disk->flags & GENHD_FL_HIDDEN ? 1 : 0)); 1138 } 1139 1140 static ssize_t disk_ro_show(struct device *dev, 1141 struct device_attribute *attr, char *buf) 1142 { 1143 struct gendisk *disk = dev_to_disk(dev); 1144 1145 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 1146 } 1147 1148 ssize_t part_size_show(struct device *dev, 1149 struct device_attribute *attr, char *buf) 1150 { 1151 return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); 1152 } 1153 1154 ssize_t part_stat_show(struct device *dev, 1155 struct device_attribute *attr, char *buf) 1156 { 1157 struct block_device *bdev = dev_to_bdev(dev); 1158 struct request_queue *q = bdev->bd_disk->queue; 1159 struct disk_stats stat; 1160 unsigned int inflight; 1161 1162 part_stat_read_all(bdev, &stat); 1163 if (queue_is_mq(q)) 1164 inflight = blk_mq_in_flight(q, bdev); 1165 else 1166 inflight = part_in_flight(bdev); 1167 1168 return sprintf(buf, 1169 "%8lu %8lu %8llu %8u " 1170 "%8lu %8lu %8llu %8u " 1171 "%8u %8u %8u " 1172 "%8lu %8lu %8llu %8u " 1173 "%8lu %8u" 1174 "\n", 1175 stat.ios[STAT_READ], 1176 stat.merges[STAT_READ], 1177 (unsigned long long)stat.sectors[STAT_READ], 1178 (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC), 1179 stat.ios[STAT_WRITE], 1180 stat.merges[STAT_WRITE], 1181 (unsigned long long)stat.sectors[STAT_WRITE], 1182 (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC), 1183 inflight, 1184 jiffies_to_msecs(stat.io_ticks), 1185 (unsigned int)div_u64(stat.nsecs[STAT_READ] + 1186 stat.nsecs[STAT_WRITE] + 1187 stat.nsecs[STAT_DISCARD] + 1188 stat.nsecs[STAT_FLUSH], 1189 NSEC_PER_MSEC), 1190 stat.ios[STAT_DISCARD], 1191 stat.merges[STAT_DISCARD], 1192 (unsigned long long)stat.sectors[STAT_DISCARD], 1193 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC), 1194 stat.ios[STAT_FLUSH], 1195 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); 1196 } 1197 1198 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 1199 char *buf) 1200 { 1201 struct block_device *bdev = dev_to_bdev(dev); 1202 struct request_queue *q = bdev->bd_disk->queue; 1203 unsigned int inflight[2]; 1204 1205 if (queue_is_mq(q)) 1206 blk_mq_in_flight_rw(q, bdev, inflight); 1207 else 1208 part_in_flight_rw(bdev, inflight); 1209 1210 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); 1211 } 1212 1213 static ssize_t disk_capability_show(struct device *dev, 1214 struct device_attribute *attr, char *buf) 1215 { 1216 struct gendisk *disk = dev_to_disk(dev); 1217 1218 return sprintf(buf, "%x\n", disk->flags); 1219 } 1220 1221 static ssize_t disk_alignment_offset_show(struct device *dev, 1222 struct device_attribute *attr, 1223 char *buf) 1224 { 1225 struct gendisk *disk = dev_to_disk(dev); 1226 1227 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); 1228 } 1229 1230 static ssize_t disk_discard_alignment_show(struct device *dev, 1231 struct device_attribute *attr, 1232 char *buf) 1233 { 1234 struct gendisk *disk = dev_to_disk(dev); 1235 1236 return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); 1237 } 1238 1239 static DEVICE_ATTR(range, 0444, disk_range_show, NULL); 1240 static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL); 1241 static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL); 1242 static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL); 1243 static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL); 1244 static DEVICE_ATTR(size, 0444, part_size_show, NULL); 1245 static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL); 1246 static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL); 1247 static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); 1248 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); 1249 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); 1250 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); 1251 1252 #ifdef CONFIG_FAIL_MAKE_REQUEST 1253 ssize_t part_fail_show(struct device *dev, 1254 struct device_attribute *attr, char *buf) 1255 { 1256 return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); 1257 } 1258 1259 ssize_t part_fail_store(struct device *dev, 1260 struct device_attribute *attr, 1261 const char *buf, size_t count) 1262 { 1263 int i; 1264 1265 if (count > 0 && sscanf(buf, "%d", &i) > 0) 1266 dev_to_bdev(dev)->bd_make_it_fail = i; 1267 1268 return count; 1269 } 1270 1271 static struct device_attribute dev_attr_fail = 1272 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); 1273 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 1274 1275 #ifdef CONFIG_FAIL_IO_TIMEOUT 1276 static struct device_attribute dev_attr_fail_timeout = 1277 __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store); 1278 #endif 1279 1280 static struct attribute *disk_attrs[] = { 1281 &dev_attr_range.attr, 1282 &dev_attr_ext_range.attr, 1283 &dev_attr_removable.attr, 1284 &dev_attr_hidden.attr, 1285 &dev_attr_ro.attr, 1286 &dev_attr_size.attr, 1287 &dev_attr_alignment_offset.attr, 1288 &dev_attr_discard_alignment.attr, 1289 &dev_attr_capability.attr, 1290 &dev_attr_stat.attr, 1291 &dev_attr_inflight.attr, 1292 &dev_attr_badblocks.attr, 1293 #ifdef CONFIG_FAIL_MAKE_REQUEST 1294 &dev_attr_fail.attr, 1295 #endif 1296 #ifdef CONFIG_FAIL_IO_TIMEOUT 1297 &dev_attr_fail_timeout.attr, 1298 #endif 1299 NULL 1300 }; 1301 1302 static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n) 1303 { 1304 struct device *dev = container_of(kobj, typeof(*dev), kobj); 1305 struct gendisk *disk = dev_to_disk(dev); 1306 1307 if (a == &dev_attr_badblocks.attr && !disk->bb) 1308 return 0; 1309 return a->mode; 1310 } 1311 1312 static struct attribute_group disk_attr_group = { 1313 .attrs = disk_attrs, 1314 .is_visible = disk_visible, 1315 }; 1316 1317 static const struct attribute_group *disk_attr_groups[] = { 1318 &disk_attr_group, 1319 NULL 1320 }; 1321 1322 /** 1323 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 1324 * @disk: disk to replace part_tbl for 1325 * @new_ptbl: new part_tbl to install 1326 * 1327 * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 1328 * original ptbl is freed using RCU callback. 1329 * 1330 * LOCKING: 1331 * Matching bd_mutex locked or the caller is the only user of @disk. 1332 */ 1333 static void disk_replace_part_tbl(struct gendisk *disk, 1334 struct disk_part_tbl *new_ptbl) 1335 { 1336 struct disk_part_tbl *old_ptbl = 1337 rcu_dereference_protected(disk->part_tbl, 1); 1338 1339 rcu_assign_pointer(disk->part_tbl, new_ptbl); 1340 1341 if (old_ptbl) { 1342 rcu_assign_pointer(old_ptbl->last_lookup, NULL); 1343 kfree_rcu(old_ptbl, rcu_head); 1344 } 1345 } 1346 1347 /** 1348 * disk_expand_part_tbl - expand disk->part_tbl 1349 * @disk: disk to expand part_tbl for 1350 * @partno: expand such that this partno can fit in 1351 * 1352 * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 1353 * uses RCU to allow unlocked dereferencing for stats and other stuff. 1354 * 1355 * LOCKING: 1356 * Matching bd_mutex locked or the caller is the only user of @disk. 1357 * Might sleep. 1358 * 1359 * RETURNS: 1360 * 0 on success, -errno on failure. 1361 */ 1362 int disk_expand_part_tbl(struct gendisk *disk, int partno) 1363 { 1364 struct disk_part_tbl *old_ptbl = 1365 rcu_dereference_protected(disk->part_tbl, 1); 1366 struct disk_part_tbl *new_ptbl; 1367 int len = old_ptbl ? old_ptbl->len : 0; 1368 int i, target; 1369 1370 /* 1371 * check for int overflow, since we can get here from blkpg_ioctl() 1372 * with a user passed 'partno'. 1373 */ 1374 target = partno + 1; 1375 if (target < 0) 1376 return -EINVAL; 1377 1378 /* disk_max_parts() is zero during initialization, ignore if so */ 1379 if (disk_max_parts(disk) && target > disk_max_parts(disk)) 1380 return -EINVAL; 1381 1382 if (target <= len) 1383 return 0; 1384 1385 new_ptbl = kzalloc_node(struct_size(new_ptbl, part, target), GFP_KERNEL, 1386 disk->node_id); 1387 if (!new_ptbl) 1388 return -ENOMEM; 1389 1390 new_ptbl->len = target; 1391 1392 for (i = 0; i < len; i++) 1393 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 1394 1395 disk_replace_part_tbl(disk, new_ptbl); 1396 return 0; 1397 } 1398 1399 /** 1400 * disk_release - releases all allocated resources of the gendisk 1401 * @dev: the device representing this disk 1402 * 1403 * This function releases all allocated resources of the gendisk. 1404 * 1405 * Drivers which used __device_add_disk() have a gendisk with a request_queue 1406 * assigned. Since the request_queue sits on top of the gendisk for these 1407 * drivers we also call blk_put_queue() for them, and we expect the 1408 * request_queue refcount to reach 0 at this point, and so the request_queue 1409 * will also be freed prior to the disk. 1410 * 1411 * Context: can sleep 1412 */ 1413 static void disk_release(struct device *dev) 1414 { 1415 struct gendisk *disk = dev_to_disk(dev); 1416 1417 might_sleep(); 1418 1419 blk_free_devt(dev->devt); 1420 disk_release_events(disk); 1421 kfree(disk->random); 1422 disk_replace_part_tbl(disk, NULL); 1423 bdput(disk->part0); 1424 if (disk->queue) 1425 blk_put_queue(disk->queue); 1426 kfree(disk); 1427 } 1428 struct class block_class = { 1429 .name = "block", 1430 }; 1431 1432 static char *block_devnode(struct device *dev, umode_t *mode, 1433 kuid_t *uid, kgid_t *gid) 1434 { 1435 struct gendisk *disk = dev_to_disk(dev); 1436 1437 if (disk->fops->devnode) 1438 return disk->fops->devnode(disk, mode); 1439 return NULL; 1440 } 1441 1442 const struct device_type disk_type = { 1443 .name = "disk", 1444 .groups = disk_attr_groups, 1445 .release = disk_release, 1446 .devnode = block_devnode, 1447 }; 1448 1449 #ifdef CONFIG_PROC_FS 1450 /* 1451 * aggregate disk stat collector. Uses the same stats that the sysfs 1452 * entries do, above, but makes them available through one seq_file. 1453 * 1454 * The output looks suspiciously like /proc/partitions with a bunch of 1455 * extra fields. 1456 */ 1457 static int diskstats_show(struct seq_file *seqf, void *v) 1458 { 1459 struct gendisk *gp = v; 1460 struct disk_part_iter piter; 1461 struct block_device *hd; 1462 char buf[BDEVNAME_SIZE]; 1463 unsigned int inflight; 1464 struct disk_stats stat; 1465 1466 /* 1467 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1468 seq_puts(seqf, "major minor name" 1469 " rio rmerge rsect ruse wio wmerge " 1470 "wsect wuse running use aveq" 1471 "\n\n"); 1472 */ 1473 1474 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1475 while ((hd = disk_part_iter_next(&piter))) { 1476 part_stat_read_all(hd, &stat); 1477 if (queue_is_mq(gp->queue)) 1478 inflight = blk_mq_in_flight(gp->queue, hd); 1479 else 1480 inflight = part_in_flight(hd); 1481 1482 seq_printf(seqf, "%4d %7d %s " 1483 "%lu %lu %lu %u " 1484 "%lu %lu %lu %u " 1485 "%u %u %u " 1486 "%lu %lu %lu %u " 1487 "%lu %u" 1488 "\n", 1489 MAJOR(hd->bd_dev), MINOR(hd->bd_dev), 1490 disk_name(gp, hd->bd_partno, buf), 1491 stat.ios[STAT_READ], 1492 stat.merges[STAT_READ], 1493 stat.sectors[STAT_READ], 1494 (unsigned int)div_u64(stat.nsecs[STAT_READ], 1495 NSEC_PER_MSEC), 1496 stat.ios[STAT_WRITE], 1497 stat.merges[STAT_WRITE], 1498 stat.sectors[STAT_WRITE], 1499 (unsigned int)div_u64(stat.nsecs[STAT_WRITE], 1500 NSEC_PER_MSEC), 1501 inflight, 1502 jiffies_to_msecs(stat.io_ticks), 1503 (unsigned int)div_u64(stat.nsecs[STAT_READ] + 1504 stat.nsecs[STAT_WRITE] + 1505 stat.nsecs[STAT_DISCARD] + 1506 stat.nsecs[STAT_FLUSH], 1507 NSEC_PER_MSEC), 1508 stat.ios[STAT_DISCARD], 1509 stat.merges[STAT_DISCARD], 1510 stat.sectors[STAT_DISCARD], 1511 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], 1512 NSEC_PER_MSEC), 1513 stat.ios[STAT_FLUSH], 1514 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], 1515 NSEC_PER_MSEC) 1516 ); 1517 } 1518 disk_part_iter_exit(&piter); 1519 1520 return 0; 1521 } 1522 1523 static const struct seq_operations diskstats_op = { 1524 .start = disk_seqf_start, 1525 .next = disk_seqf_next, 1526 .stop = disk_seqf_stop, 1527 .show = diskstats_show 1528 }; 1529 1530 static int __init proc_genhd_init(void) 1531 { 1532 proc_create_seq("diskstats", 0, NULL, &diskstats_op); 1533 proc_create_seq("partitions", 0, NULL, &partitions_op); 1534 return 0; 1535 } 1536 module_init(proc_genhd_init); 1537 #endif /* CONFIG_PROC_FS */ 1538 1539 dev_t blk_lookup_devt(const char *name, int partno) 1540 { 1541 dev_t devt = MKDEV(0, 0); 1542 struct class_dev_iter iter; 1543 struct device *dev; 1544 1545 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1546 while ((dev = class_dev_iter_next(&iter))) { 1547 struct gendisk *disk = dev_to_disk(dev); 1548 struct block_device *part; 1549 1550 if (strcmp(dev_name(dev), name)) 1551 continue; 1552 1553 if (partno < disk->minors) { 1554 /* We need to return the right devno, even 1555 * if the partition doesn't exist yet. 1556 */ 1557 devt = MKDEV(MAJOR(dev->devt), 1558 MINOR(dev->devt) + partno); 1559 break; 1560 } 1561 part = bdget_disk(disk, partno); 1562 if (part) { 1563 devt = part->bd_dev; 1564 bdput(part); 1565 break; 1566 } 1567 } 1568 class_dev_iter_exit(&iter); 1569 return devt; 1570 } 1571 1572 struct gendisk *__alloc_disk_node(int minors, int node_id) 1573 { 1574 struct gendisk *disk; 1575 struct disk_part_tbl *ptbl; 1576 1577 if (minors > DISK_MAX_PARTS) { 1578 printk(KERN_ERR 1579 "block: can't allocate more than %d partitions\n", 1580 DISK_MAX_PARTS); 1581 minors = DISK_MAX_PARTS; 1582 } 1583 1584 disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); 1585 if (!disk) 1586 return NULL; 1587 1588 disk->part0 = bdev_alloc(disk, 0); 1589 if (!disk->part0) 1590 goto out_free_disk; 1591 1592 disk->node_id = node_id; 1593 if (disk_expand_part_tbl(disk, 0)) 1594 goto out_bdput; 1595 1596 ptbl = rcu_dereference_protected(disk->part_tbl, 1); 1597 rcu_assign_pointer(ptbl->part[0], disk->part0); 1598 1599 disk->minors = minors; 1600 rand_initialize_disk(disk); 1601 disk_to_dev(disk)->class = &block_class; 1602 disk_to_dev(disk)->type = &disk_type; 1603 device_initialize(disk_to_dev(disk)); 1604 return disk; 1605 1606 out_bdput: 1607 bdput(disk->part0); 1608 out_free_disk: 1609 kfree(disk); 1610 return NULL; 1611 } 1612 EXPORT_SYMBOL(__alloc_disk_node); 1613 1614 /** 1615 * put_disk - decrements the gendisk refcount 1616 * @disk: the struct gendisk to decrement the refcount for 1617 * 1618 * This decrements the refcount for the struct gendisk. When this reaches 0 1619 * we'll have disk_release() called. 1620 * 1621 * Context: Any context, but the last reference must not be dropped from 1622 * atomic context. 1623 */ 1624 void put_disk(struct gendisk *disk) 1625 { 1626 if (disk) 1627 put_device(disk_to_dev(disk)); 1628 } 1629 EXPORT_SYMBOL(put_disk); 1630 1631 static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1632 { 1633 char event[] = "DISK_RO=1"; 1634 char *envp[] = { event, NULL }; 1635 1636 if (!ro) 1637 event[8] = '0'; 1638 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1639 } 1640 1641 void set_disk_ro(struct gendisk *disk, int flag) 1642 { 1643 struct disk_part_iter piter; 1644 struct block_device *part; 1645 1646 if (disk->part0->bd_read_only != flag) { 1647 set_disk_ro_uevent(disk, flag); 1648 disk->part0->bd_read_only = flag; 1649 } 1650 1651 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1652 while ((part = disk_part_iter_next(&piter))) 1653 part->bd_read_only = flag; 1654 disk_part_iter_exit(&piter); 1655 } 1656 1657 EXPORT_SYMBOL(set_disk_ro); 1658 1659 int bdev_read_only(struct block_device *bdev) 1660 { 1661 if (!bdev) 1662 return 0; 1663 return bdev->bd_read_only; 1664 } 1665 1666 EXPORT_SYMBOL(bdev_read_only); 1667 1668 /* 1669 * Disk events - monitor disk events like media change and eject request. 1670 */ 1671 struct disk_events { 1672 struct list_head node; /* all disk_event's */ 1673 struct gendisk *disk; /* the associated disk */ 1674 spinlock_t lock; 1675 1676 struct mutex block_mutex; /* protects blocking */ 1677 int block; /* event blocking depth */ 1678 unsigned int pending; /* events already sent out */ 1679 unsigned int clearing; /* events being cleared */ 1680 1681 long poll_msecs; /* interval, -1 for default */ 1682 struct delayed_work dwork; 1683 }; 1684 1685 static const char *disk_events_strs[] = { 1686 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change", 1687 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request", 1688 }; 1689 1690 static char *disk_uevents[] = { 1691 [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1", 1692 [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1", 1693 }; 1694 1695 /* list of all disk_events */ 1696 static DEFINE_MUTEX(disk_events_mutex); 1697 static LIST_HEAD(disk_events); 1698 1699 /* disable in-kernel polling by default */ 1700 static unsigned long disk_events_dfl_poll_msecs; 1701 1702 static unsigned long disk_events_poll_jiffies(struct gendisk *disk) 1703 { 1704 struct disk_events *ev = disk->ev; 1705 long intv_msecs = 0; 1706 1707 /* 1708 * If device-specific poll interval is set, always use it. If 1709 * the default is being used, poll if the POLL flag is set. 1710 */ 1711 if (ev->poll_msecs >= 0) 1712 intv_msecs = ev->poll_msecs; 1713 else if (disk->event_flags & DISK_EVENT_FLAG_POLL) 1714 intv_msecs = disk_events_dfl_poll_msecs; 1715 1716 return msecs_to_jiffies(intv_msecs); 1717 } 1718 1719 /** 1720 * disk_block_events - block and flush disk event checking 1721 * @disk: disk to block events for 1722 * 1723 * On return from this function, it is guaranteed that event checking 1724 * isn't in progress and won't happen until unblocked by 1725 * disk_unblock_events(). Events blocking is counted and the actual 1726 * unblocking happens after the matching number of unblocks are done. 1727 * 1728 * Note that this intentionally does not block event checking from 1729 * disk_clear_events(). 1730 * 1731 * CONTEXT: 1732 * Might sleep. 1733 */ 1734 void disk_block_events(struct gendisk *disk) 1735 { 1736 struct disk_events *ev = disk->ev; 1737 unsigned long flags; 1738 bool cancel; 1739 1740 if (!ev) 1741 return; 1742 1743 /* 1744 * Outer mutex ensures that the first blocker completes canceling 1745 * the event work before further blockers are allowed to finish. 1746 */ 1747 mutex_lock(&ev->block_mutex); 1748 1749 spin_lock_irqsave(&ev->lock, flags); 1750 cancel = !ev->block++; 1751 spin_unlock_irqrestore(&ev->lock, flags); 1752 1753 if (cancel) 1754 cancel_delayed_work_sync(&disk->ev->dwork); 1755 1756 mutex_unlock(&ev->block_mutex); 1757 } 1758 1759 static void __disk_unblock_events(struct gendisk *disk, bool check_now) 1760 { 1761 struct disk_events *ev = disk->ev; 1762 unsigned long intv; 1763 unsigned long flags; 1764 1765 spin_lock_irqsave(&ev->lock, flags); 1766 1767 if (WARN_ON_ONCE(ev->block <= 0)) 1768 goto out_unlock; 1769 1770 if (--ev->block) 1771 goto out_unlock; 1772 1773 intv = disk_events_poll_jiffies(disk); 1774 if (check_now) 1775 queue_delayed_work(system_freezable_power_efficient_wq, 1776 &ev->dwork, 0); 1777 else if (intv) 1778 queue_delayed_work(system_freezable_power_efficient_wq, 1779 &ev->dwork, intv); 1780 out_unlock: 1781 spin_unlock_irqrestore(&ev->lock, flags); 1782 } 1783 1784 /** 1785 * disk_unblock_events - unblock disk event checking 1786 * @disk: disk to unblock events for 1787 * 1788 * Undo disk_block_events(). When the block count reaches zero, it 1789 * starts events polling if configured. 1790 * 1791 * CONTEXT: 1792 * Don't care. Safe to call from irq context. 1793 */ 1794 void disk_unblock_events(struct gendisk *disk) 1795 { 1796 if (disk->ev) 1797 __disk_unblock_events(disk, false); 1798 } 1799 1800 /** 1801 * disk_flush_events - schedule immediate event checking and flushing 1802 * @disk: disk to check and flush events for 1803 * @mask: events to flush 1804 * 1805 * Schedule immediate event checking on @disk if not blocked. Events in 1806 * @mask are scheduled to be cleared from the driver. Note that this 1807 * doesn't clear the events from @disk->ev. 1808 * 1809 * CONTEXT: 1810 * If @mask is non-zero must be called with bdev->bd_mutex held. 1811 */ 1812 void disk_flush_events(struct gendisk *disk, unsigned int mask) 1813 { 1814 struct disk_events *ev = disk->ev; 1815 1816 if (!ev) 1817 return; 1818 1819 spin_lock_irq(&ev->lock); 1820 ev->clearing |= mask; 1821 if (!ev->block) 1822 mod_delayed_work(system_freezable_power_efficient_wq, 1823 &ev->dwork, 0); 1824 spin_unlock_irq(&ev->lock); 1825 } 1826 1827 /** 1828 * disk_clear_events - synchronously check, clear and return pending events 1829 * @disk: disk to fetch and clear events from 1830 * @mask: mask of events to be fetched and cleared 1831 * 1832 * Disk events are synchronously checked and pending events in @mask 1833 * are cleared and returned. This ignores the block count. 1834 * 1835 * CONTEXT: 1836 * Might sleep. 1837 */ 1838 static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) 1839 { 1840 struct disk_events *ev = disk->ev; 1841 unsigned int pending; 1842 unsigned int clearing = mask; 1843 1844 if (!ev) 1845 return 0; 1846 1847 disk_block_events(disk); 1848 1849 /* 1850 * store the union of mask and ev->clearing on the stack so that the 1851 * race with disk_flush_events does not cause ambiguity (ev->clearing 1852 * can still be modified even if events are blocked). 1853 */ 1854 spin_lock_irq(&ev->lock); 1855 clearing |= ev->clearing; 1856 ev->clearing = 0; 1857 spin_unlock_irq(&ev->lock); 1858 1859 disk_check_events(ev, &clearing); 1860 /* 1861 * if ev->clearing is not 0, the disk_flush_events got called in the 1862 * middle of this function, so we want to run the workfn without delay. 1863 */ 1864 __disk_unblock_events(disk, ev->clearing ? true : false); 1865 1866 /* then, fetch and clear pending events */ 1867 spin_lock_irq(&ev->lock); 1868 pending = ev->pending & mask; 1869 ev->pending &= ~mask; 1870 spin_unlock_irq(&ev->lock); 1871 WARN_ON_ONCE(clearing & mask); 1872 1873 return pending; 1874 } 1875 1876 /** 1877 * bdev_check_media_change - check if a removable media has been changed 1878 * @bdev: block device to check 1879 * 1880 * Check whether a removable media has been changed, and attempt to free all 1881 * dentries and inodes and invalidates all block device page cache entries in 1882 * that case. 1883 * 1884 * Returns %true if the block device changed, or %false if not. 1885 */ 1886 bool bdev_check_media_change(struct block_device *bdev) 1887 { 1888 unsigned int events; 1889 1890 events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE | 1891 DISK_EVENT_EJECT_REQUEST); 1892 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1893 return false; 1894 1895 if (__invalidate_device(bdev, true)) 1896 pr_warn("VFS: busy inodes on changed media %s\n", 1897 bdev->bd_disk->disk_name); 1898 set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); 1899 return true; 1900 } 1901 EXPORT_SYMBOL(bdev_check_media_change); 1902 1903 /* 1904 * Separate this part out so that a different pointer for clearing_ptr can be 1905 * passed in for disk_clear_events. 1906 */ 1907 static void disk_events_workfn(struct work_struct *work) 1908 { 1909 struct delayed_work *dwork = to_delayed_work(work); 1910 struct disk_events *ev = container_of(dwork, struct disk_events, dwork); 1911 1912 disk_check_events(ev, &ev->clearing); 1913 } 1914 1915 static void disk_check_events(struct disk_events *ev, 1916 unsigned int *clearing_ptr) 1917 { 1918 struct gendisk *disk = ev->disk; 1919 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; 1920 unsigned int clearing = *clearing_ptr; 1921 unsigned int events; 1922 unsigned long intv; 1923 int nr_events = 0, i; 1924 1925 /* check events */ 1926 events = disk->fops->check_events(disk, clearing); 1927 1928 /* accumulate pending events and schedule next poll if necessary */ 1929 spin_lock_irq(&ev->lock); 1930 1931 events &= ~ev->pending; 1932 ev->pending |= events; 1933 *clearing_ptr &= ~clearing; 1934 1935 intv = disk_events_poll_jiffies(disk); 1936 if (!ev->block && intv) 1937 queue_delayed_work(system_freezable_power_efficient_wq, 1938 &ev->dwork, intv); 1939 1940 spin_unlock_irq(&ev->lock); 1941 1942 /* 1943 * Tell userland about new events. Only the events listed in 1944 * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT 1945 * is set. Otherwise, events are processed internally but never 1946 * get reported to userland. 1947 */ 1948 for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) 1949 if ((events & disk->events & (1 << i)) && 1950 (disk->event_flags & DISK_EVENT_FLAG_UEVENT)) 1951 envp[nr_events++] = disk_uevents[i]; 1952 1953 if (nr_events) 1954 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 1955 } 1956 1957 /* 1958 * A disk events enabled device has the following sysfs nodes under 1959 * its /sys/block/X/ directory. 1960 * 1961 * events : list of all supported events 1962 * events_async : list of events which can be detected w/o polling 1963 * (always empty, only for backwards compatibility) 1964 * events_poll_msecs : polling interval, 0: disable, -1: system default 1965 */ 1966 static ssize_t __disk_events_show(unsigned int events, char *buf) 1967 { 1968 const char *delim = ""; 1969 ssize_t pos = 0; 1970 int i; 1971 1972 for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) 1973 if (events & (1 << i)) { 1974 pos += sprintf(buf + pos, "%s%s", 1975 delim, disk_events_strs[i]); 1976 delim = " "; 1977 } 1978 if (pos) 1979 pos += sprintf(buf + pos, "\n"); 1980 return pos; 1981 } 1982 1983 static ssize_t disk_events_show(struct device *dev, 1984 struct device_attribute *attr, char *buf) 1985 { 1986 struct gendisk *disk = dev_to_disk(dev); 1987 1988 if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT)) 1989 return 0; 1990 1991 return __disk_events_show(disk->events, buf); 1992 } 1993 1994 static ssize_t disk_events_async_show(struct device *dev, 1995 struct device_attribute *attr, char *buf) 1996 { 1997 return 0; 1998 } 1999 2000 static ssize_t disk_events_poll_msecs_show(struct device *dev, 2001 struct device_attribute *attr, 2002 char *buf) 2003 { 2004 struct gendisk *disk = dev_to_disk(dev); 2005 2006 if (!disk->ev) 2007 return sprintf(buf, "-1\n"); 2008 2009 return sprintf(buf, "%ld\n", disk->ev->poll_msecs); 2010 } 2011 2012 static ssize_t disk_events_poll_msecs_store(struct device *dev, 2013 struct device_attribute *attr, 2014 const char *buf, size_t count) 2015 { 2016 struct gendisk *disk = dev_to_disk(dev); 2017 long intv; 2018 2019 if (!count || !sscanf(buf, "%ld", &intv)) 2020 return -EINVAL; 2021 2022 if (intv < 0 && intv != -1) 2023 return -EINVAL; 2024 2025 if (!disk->ev) 2026 return -ENODEV; 2027 2028 disk_block_events(disk); 2029 disk->ev->poll_msecs = intv; 2030 __disk_unblock_events(disk, true); 2031 2032 return count; 2033 } 2034 2035 static const DEVICE_ATTR(events, 0444, disk_events_show, NULL); 2036 static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL); 2037 static const DEVICE_ATTR(events_poll_msecs, 0644, 2038 disk_events_poll_msecs_show, 2039 disk_events_poll_msecs_store); 2040 2041 static const struct attribute *disk_events_attrs[] = { 2042 &dev_attr_events.attr, 2043 &dev_attr_events_async.attr, 2044 &dev_attr_events_poll_msecs.attr, 2045 NULL, 2046 }; 2047 2048 /* 2049 * The default polling interval can be specified by the kernel 2050 * parameter block.events_dfl_poll_msecs which defaults to 0 2051 * (disable). This can also be modified runtime by writing to 2052 * /sys/module/block/parameters/events_dfl_poll_msecs. 2053 */ 2054 static int disk_events_set_dfl_poll_msecs(const char *val, 2055 const struct kernel_param *kp) 2056 { 2057 struct disk_events *ev; 2058 int ret; 2059 2060 ret = param_set_ulong(val, kp); 2061 if (ret < 0) 2062 return ret; 2063 2064 mutex_lock(&disk_events_mutex); 2065 2066 list_for_each_entry(ev, &disk_events, node) 2067 disk_flush_events(ev->disk, 0); 2068 2069 mutex_unlock(&disk_events_mutex); 2070 2071 return 0; 2072 } 2073 2074 static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { 2075 .set = disk_events_set_dfl_poll_msecs, 2076 .get = param_get_ulong, 2077 }; 2078 2079 #undef MODULE_PARAM_PREFIX 2080 #define MODULE_PARAM_PREFIX "block." 2081 2082 module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, 2083 &disk_events_dfl_poll_msecs, 0644); 2084 2085 /* 2086 * disk_{alloc|add|del|release}_events - initialize and destroy disk_events. 2087 */ 2088 static void disk_alloc_events(struct gendisk *disk) 2089 { 2090 struct disk_events *ev; 2091 2092 if (!disk->fops->check_events || !disk->events) 2093 return; 2094 2095 ev = kzalloc(sizeof(*ev), GFP_KERNEL); 2096 if (!ev) { 2097 pr_warn("%s: failed to initialize events\n", disk->disk_name); 2098 return; 2099 } 2100 2101 INIT_LIST_HEAD(&ev->node); 2102 ev->disk = disk; 2103 spin_lock_init(&ev->lock); 2104 mutex_init(&ev->block_mutex); 2105 ev->block = 1; 2106 ev->poll_msecs = -1; 2107 INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); 2108 2109 disk->ev = ev; 2110 } 2111 2112 static void disk_add_events(struct gendisk *disk) 2113 { 2114 /* FIXME: error handling */ 2115 if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0) 2116 pr_warn("%s: failed to create sysfs files for events\n", 2117 disk->disk_name); 2118 2119 if (!disk->ev) 2120 return; 2121 2122 mutex_lock(&disk_events_mutex); 2123 list_add_tail(&disk->ev->node, &disk_events); 2124 mutex_unlock(&disk_events_mutex); 2125 2126 /* 2127 * Block count is initialized to 1 and the following initial 2128 * unblock kicks it into action. 2129 */ 2130 __disk_unblock_events(disk, true); 2131 } 2132 2133 static void disk_del_events(struct gendisk *disk) 2134 { 2135 if (disk->ev) { 2136 disk_block_events(disk); 2137 2138 mutex_lock(&disk_events_mutex); 2139 list_del_init(&disk->ev->node); 2140 mutex_unlock(&disk_events_mutex); 2141 } 2142 2143 sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs); 2144 } 2145 2146 static void disk_release_events(struct gendisk *disk) 2147 { 2148 /* the block count should be 1 from disk_del_events() */ 2149 WARN_ON_ONCE(disk->ev && disk->ev->block != 1); 2150 kfree(disk->ev); 2151 } 2152