1 /* 2 * gendisk handling 3 */ 4 5 #include <linux/module.h> 6 #include <linux/fs.h> 7 #include <linux/genhd.h> 8 #include <linux/kdev_t.h> 9 #include <linux/kernel.h> 10 #include <linux/blkdev.h> 11 #include <linux/init.h> 12 #include <linux/spinlock.h> 13 #include <linux/seq_file.h> 14 #include <linux/slab.h> 15 #include <linux/kmod.h> 16 #include <linux/kobj_map.h> 17 #include <linux/buffer_head.h> 18 #include <linux/mutex.h> 19 20 static DEFINE_MUTEX(block_class_lock); 21 #ifndef CONFIG_SYSFS_DEPRECATED 22 struct kobject *block_depr; 23 #endif 24 25 /* 26 * Can be deleted altogether. Later. 27 * 28 */ 29 static struct blk_major_name { 30 struct blk_major_name *next; 31 int major; 32 char name[16]; 33 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 34 35 /* index in the above - for now: assume no multimajor ranges */ 36 static inline int major_to_index(int major) 37 { 38 return major % BLKDEV_MAJOR_HASH_SIZE; 39 } 40 41 #ifdef CONFIG_PROC_FS 42 void blkdev_show(struct seq_file *f, off_t offset) 43 { 44 struct blk_major_name *dp; 45 46 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 47 mutex_lock(&block_class_lock); 48 for (dp = major_names[offset]; dp; dp = dp->next) 49 seq_printf(f, "%3d %s\n", dp->major, dp->name); 50 mutex_unlock(&block_class_lock); 51 } 52 } 53 #endif /* CONFIG_PROC_FS */ 54 55 int register_blkdev(unsigned int major, const char *name) 56 { 57 struct blk_major_name **n, *p; 58 int index, ret = 0; 59 60 mutex_lock(&block_class_lock); 61 62 /* temporary */ 63 if (major == 0) { 64 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 65 if (major_names[index] == NULL) 66 break; 67 } 68 69 if (index == 0) { 70 printk("register_blkdev: failed to get major for %s\n", 71 name); 72 ret = -EBUSY; 73 goto out; 74 } 75 major = index; 76 ret = major; 77 } 78 79 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 80 if (p == NULL) { 81 ret = -ENOMEM; 82 goto out; 83 } 84 85 p->major = major; 86 strlcpy(p->name, name, sizeof(p->name)); 87 p->next = NULL; 88 index = major_to_index(major); 89 90 for (n = &major_names[index]; *n; n = &(*n)->next) { 91 if ((*n)->major == major) 92 break; 93 } 94 if (!*n) 95 *n = p; 96 else 97 ret = -EBUSY; 98 99 if (ret < 0) { 100 printk("register_blkdev: cannot get major %d for %s\n", 101 major, name); 102 kfree(p); 103 } 104 out: 105 mutex_unlock(&block_class_lock); 106 return ret; 107 } 108 109 EXPORT_SYMBOL(register_blkdev); 110 111 void unregister_blkdev(unsigned int major, const char *name) 112 { 113 struct blk_major_name **n; 114 struct blk_major_name *p = NULL; 115 int index = major_to_index(major); 116 117 mutex_lock(&block_class_lock); 118 for (n = &major_names[index]; *n; n = &(*n)->next) 119 if ((*n)->major == major) 120 break; 121 if (!*n || strcmp((*n)->name, name)) { 122 WARN_ON(1); 123 } else { 124 p = *n; 125 *n = p->next; 126 } 127 mutex_unlock(&block_class_lock); 128 kfree(p); 129 } 130 131 EXPORT_SYMBOL(unregister_blkdev); 132 133 static struct kobj_map *bdev_map; 134 135 /* 136 * Register device numbers dev..(dev+range-1) 137 * range must be nonzero 138 * The hash chain is sorted on range, so that subranges can override. 139 */ 140 void blk_register_region(dev_t devt, unsigned long range, struct module *module, 141 struct kobject *(*probe)(dev_t, int *, void *), 142 int (*lock)(dev_t, void *), void *data) 143 { 144 kobj_map(bdev_map, devt, range, module, probe, lock, data); 145 } 146 147 EXPORT_SYMBOL(blk_register_region); 148 149 void blk_unregister_region(dev_t devt, unsigned long range) 150 { 151 kobj_unmap(bdev_map, devt, range); 152 } 153 154 EXPORT_SYMBOL(blk_unregister_region); 155 156 static struct kobject *exact_match(dev_t devt, int *part, void *data) 157 { 158 struct gendisk *p = data; 159 160 return &p->dev.kobj; 161 } 162 163 static int exact_lock(dev_t devt, void *data) 164 { 165 struct gendisk *p = data; 166 167 if (!get_disk(p)) 168 return -1; 169 return 0; 170 } 171 172 /** 173 * add_disk - add partitioning information to kernel list 174 * @disk: per-device partitioning information 175 * 176 * This function registers the partitioning information in @disk 177 * with the kernel. 178 */ 179 void add_disk(struct gendisk *disk) 180 { 181 disk->flags |= GENHD_FL_UP; 182 blk_register_region(MKDEV(disk->major, disk->first_minor), 183 disk->minors, NULL, exact_match, exact_lock, disk); 184 register_disk(disk); 185 blk_register_queue(disk); 186 } 187 188 EXPORT_SYMBOL(add_disk); 189 EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ 190 191 void unlink_gendisk(struct gendisk *disk) 192 { 193 blk_unregister_queue(disk); 194 blk_unregister_region(MKDEV(disk->major, disk->first_minor), 195 disk->minors); 196 } 197 198 /** 199 * get_gendisk - get partitioning information for a given device 200 * @dev: device to get partitioning information for 201 * 202 * This function gets the structure containing partitioning 203 * information for the given device @dev. 204 */ 205 struct gendisk *get_gendisk(dev_t devt, int *part) 206 { 207 struct kobject *kobj = kobj_lookup(bdev_map, devt, part); 208 struct device *dev = kobj_to_dev(kobj); 209 210 return kobj ? dev_to_disk(dev) : NULL; 211 } 212 213 /* 214 * print a full list of all partitions - intended for places where the root 215 * filesystem can't be mounted and thus to give the victim some idea of what 216 * went wrong 217 */ 218 void __init printk_all_partitions(void) 219 { 220 struct device *dev; 221 struct gendisk *sgp; 222 char buf[BDEVNAME_SIZE]; 223 int n; 224 225 mutex_lock(&block_class_lock); 226 /* For each block device... */ 227 list_for_each_entry(dev, &block_class.devices, node) { 228 if (dev->type != &disk_type) 229 continue; 230 sgp = dev_to_disk(dev); 231 /* 232 * Don't show empty devices or things that have been surpressed 233 */ 234 if (get_capacity(sgp) == 0 || 235 (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 236 continue; 237 238 /* 239 * Note, unlike /proc/partitions, I am showing the numbers in 240 * hex - the same format as the root= option takes. 241 */ 242 printk("%02x%02x %10llu %s", 243 sgp->major, sgp->first_minor, 244 (unsigned long long)get_capacity(sgp) >> 1, 245 disk_name(sgp, 0, buf)); 246 if (sgp->driverfs_dev != NULL && 247 sgp->driverfs_dev->driver != NULL) 248 printk(" driver: %s\n", 249 sgp->driverfs_dev->driver->name); 250 else 251 printk(" (driver?)\n"); 252 253 /* now show the partitions */ 254 for (n = 0; n < sgp->minors - 1; ++n) { 255 if (sgp->part[n] == NULL) 256 continue; 257 if (sgp->part[n]->nr_sects == 0) 258 continue; 259 printk(" %02x%02x %10llu %s\n", 260 sgp->major, n + 1 + sgp->first_minor, 261 (unsigned long long)sgp->part[n]->nr_sects >> 1, 262 disk_name(sgp, n + 1, buf)); 263 } 264 } 265 266 mutex_unlock(&block_class_lock); 267 } 268 269 #ifdef CONFIG_PROC_FS 270 /* iterator */ 271 static void *part_start(struct seq_file *part, loff_t *pos) 272 { 273 loff_t k = *pos; 274 struct device *dev; 275 276 mutex_lock(&block_class_lock); 277 list_for_each_entry(dev, &block_class.devices, node) { 278 if (dev->type != &disk_type) 279 continue; 280 if (!k--) 281 return dev_to_disk(dev); 282 } 283 return NULL; 284 } 285 286 static void *part_next(struct seq_file *part, void *v, loff_t *pos) 287 { 288 struct gendisk *gp = v; 289 struct device *dev; 290 ++*pos; 291 list_for_each_entry(dev, &gp->dev.node, node) { 292 if (&dev->node == &block_class.devices) 293 return NULL; 294 if (dev->type == &disk_type) 295 return dev_to_disk(dev); 296 } 297 return NULL; 298 } 299 300 static void part_stop(struct seq_file *part, void *v) 301 { 302 mutex_unlock(&block_class_lock); 303 } 304 305 static int show_partition(struct seq_file *part, void *v) 306 { 307 struct gendisk *sgp = v; 308 int n; 309 char buf[BDEVNAME_SIZE]; 310 311 if (&sgp->dev.node == block_class.devices.next) 312 seq_puts(part, "major minor #blocks name\n\n"); 313 314 /* Don't show non-partitionable removeable devices or empty devices */ 315 if (!get_capacity(sgp) || 316 (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) 317 return 0; 318 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 319 return 0; 320 321 /* show the full disk and all non-0 size partitions of it */ 322 seq_printf(part, "%4d %4d %10llu %s\n", 323 sgp->major, sgp->first_minor, 324 (unsigned long long)get_capacity(sgp) >> 1, 325 disk_name(sgp, 0, buf)); 326 for (n = 0; n < sgp->minors - 1; n++) { 327 if (!sgp->part[n]) 328 continue; 329 if (sgp->part[n]->nr_sects == 0) 330 continue; 331 seq_printf(part, "%4d %4d %10llu %s\n", 332 sgp->major, n + 1 + sgp->first_minor, 333 (unsigned long long)sgp->part[n]->nr_sects >> 1 , 334 disk_name(sgp, n + 1, buf)); 335 } 336 337 return 0; 338 } 339 340 const struct seq_operations partitions_op = { 341 .start = part_start, 342 .next = part_next, 343 .stop = part_stop, 344 .show = show_partition 345 }; 346 #endif 347 348 349 extern int blk_dev_init(void); 350 351 static struct kobject *base_probe(dev_t devt, int *part, void *data) 352 { 353 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 354 /* Make old-style 2.4 aliases work */ 355 request_module("block-major-%d", MAJOR(devt)); 356 return NULL; 357 } 358 359 static int __init genhd_device_init(void) 360 { 361 class_register(&block_class); 362 bdev_map = kobj_map_init(base_probe, &block_class_lock); 363 blk_dev_init(); 364 365 #ifndef CONFIG_SYSFS_DEPRECATED 366 /* create top-level block dir */ 367 block_depr = kobject_create_and_add("block", NULL); 368 #endif 369 return 0; 370 } 371 372 subsys_initcall(genhd_device_init); 373 374 static ssize_t disk_range_show(struct device *dev, 375 struct device_attribute *attr, char *buf) 376 { 377 struct gendisk *disk = dev_to_disk(dev); 378 379 return sprintf(buf, "%d\n", disk->minors); 380 } 381 382 static ssize_t disk_removable_show(struct device *dev, 383 struct device_attribute *attr, char *buf) 384 { 385 struct gendisk *disk = dev_to_disk(dev); 386 387 return sprintf(buf, "%d\n", 388 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 389 } 390 391 static ssize_t disk_size_show(struct device *dev, 392 struct device_attribute *attr, char *buf) 393 { 394 struct gendisk *disk = dev_to_disk(dev); 395 396 return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk)); 397 } 398 399 static ssize_t disk_capability_show(struct device *dev, 400 struct device_attribute *attr, char *buf) 401 { 402 struct gendisk *disk = dev_to_disk(dev); 403 404 return sprintf(buf, "%x\n", disk->flags); 405 } 406 407 static ssize_t disk_stat_show(struct device *dev, 408 struct device_attribute *attr, char *buf) 409 { 410 struct gendisk *disk = dev_to_disk(dev); 411 412 preempt_disable(); 413 disk_round_stats(disk); 414 preempt_enable(); 415 return sprintf(buf, 416 "%8lu %8lu %8llu %8u " 417 "%8lu %8lu %8llu %8u " 418 "%8u %8u %8u" 419 "\n", 420 disk_stat_read(disk, ios[READ]), 421 disk_stat_read(disk, merges[READ]), 422 (unsigned long long)disk_stat_read(disk, sectors[READ]), 423 jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), 424 disk_stat_read(disk, ios[WRITE]), 425 disk_stat_read(disk, merges[WRITE]), 426 (unsigned long long)disk_stat_read(disk, sectors[WRITE]), 427 jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), 428 disk->in_flight, 429 jiffies_to_msecs(disk_stat_read(disk, io_ticks)), 430 jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); 431 } 432 433 #ifdef CONFIG_FAIL_MAKE_REQUEST 434 static ssize_t disk_fail_show(struct device *dev, 435 struct device_attribute *attr, char *buf) 436 { 437 struct gendisk *disk = dev_to_disk(dev); 438 439 return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0); 440 } 441 442 static ssize_t disk_fail_store(struct device *dev, 443 struct device_attribute *attr, 444 const char *buf, size_t count) 445 { 446 struct gendisk *disk = dev_to_disk(dev); 447 int i; 448 449 if (count > 0 && sscanf(buf, "%d", &i) > 0) { 450 if (i == 0) 451 disk->flags &= ~GENHD_FL_FAIL; 452 else 453 disk->flags |= GENHD_FL_FAIL; 454 } 455 456 return count; 457 } 458 459 #endif 460 461 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 462 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 463 static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); 464 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 465 static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); 466 #ifdef CONFIG_FAIL_MAKE_REQUEST 467 static struct device_attribute dev_attr_fail = 468 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store); 469 #endif 470 471 static struct attribute *disk_attrs[] = { 472 &dev_attr_range.attr, 473 &dev_attr_removable.attr, 474 &dev_attr_size.attr, 475 &dev_attr_capability.attr, 476 &dev_attr_stat.attr, 477 #ifdef CONFIG_FAIL_MAKE_REQUEST 478 &dev_attr_fail.attr, 479 #endif 480 NULL 481 }; 482 483 static struct attribute_group disk_attr_group = { 484 .attrs = disk_attrs, 485 }; 486 487 static struct attribute_group *disk_attr_groups[] = { 488 &disk_attr_group, 489 NULL 490 }; 491 492 static void disk_release(struct device *dev) 493 { 494 struct gendisk *disk = dev_to_disk(dev); 495 496 kfree(disk->random); 497 kfree(disk->part); 498 free_disk_stats(disk); 499 kfree(disk); 500 } 501 struct class block_class = { 502 .name = "block", 503 }; 504 505 struct device_type disk_type = { 506 .name = "disk", 507 .groups = disk_attr_groups, 508 .release = disk_release, 509 }; 510 511 /* 512 * aggregate disk stat collector. Uses the same stats that the sysfs 513 * entries do, above, but makes them available through one seq_file. 514 * 515 * The output looks suspiciously like /proc/partitions with a bunch of 516 * extra fields. 517 */ 518 519 static void *diskstats_start(struct seq_file *part, loff_t *pos) 520 { 521 loff_t k = *pos; 522 struct device *dev; 523 524 mutex_lock(&block_class_lock); 525 list_for_each_entry(dev, &block_class.devices, node) { 526 if (dev->type != &disk_type) 527 continue; 528 if (!k--) 529 return dev_to_disk(dev); 530 } 531 return NULL; 532 } 533 534 static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) 535 { 536 struct gendisk *gp = v; 537 struct device *dev; 538 539 ++*pos; 540 list_for_each_entry(dev, &gp->dev.node, node) { 541 if (&dev->node == &block_class.devices) 542 return NULL; 543 if (dev->type == &disk_type) 544 return dev_to_disk(dev); 545 } 546 return NULL; 547 } 548 549 static void diskstats_stop(struct seq_file *part, void *v) 550 { 551 mutex_unlock(&block_class_lock); 552 } 553 554 static int diskstats_show(struct seq_file *s, void *v) 555 { 556 struct gendisk *gp = v; 557 char buf[BDEVNAME_SIZE]; 558 int n = 0; 559 560 /* 561 if (&gp->dev.kobj.entry == block_class.devices.next) 562 seq_puts(s, "major minor name" 563 " rio rmerge rsect ruse wio wmerge " 564 "wsect wuse running use aveq" 565 "\n\n"); 566 */ 567 568 preempt_disable(); 569 disk_round_stats(gp); 570 preempt_enable(); 571 seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", 572 gp->major, n + gp->first_minor, disk_name(gp, n, buf), 573 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), 574 (unsigned long long)disk_stat_read(gp, sectors[0]), 575 jiffies_to_msecs(disk_stat_read(gp, ticks[0])), 576 disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), 577 (unsigned long long)disk_stat_read(gp, sectors[1]), 578 jiffies_to_msecs(disk_stat_read(gp, ticks[1])), 579 gp->in_flight, 580 jiffies_to_msecs(disk_stat_read(gp, io_ticks)), 581 jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); 582 583 /* now show all non-0 size partitions of it */ 584 for (n = 0; n < gp->minors - 1; n++) { 585 struct hd_struct *hd = gp->part[n]; 586 587 if (hd && hd->nr_sects) 588 seq_printf(s, "%4d %4d %s %u %u %u %u\n", 589 gp->major, n + gp->first_minor + 1, 590 disk_name(gp, n + 1, buf), 591 hd->ios[0], hd->sectors[0], 592 hd->ios[1], hd->sectors[1]); 593 } 594 595 return 0; 596 } 597 598 const struct seq_operations diskstats_op = { 599 .start = diskstats_start, 600 .next = diskstats_next, 601 .stop = diskstats_stop, 602 .show = diskstats_show 603 }; 604 605 static void media_change_notify_thread(struct work_struct *work) 606 { 607 struct gendisk *gd = container_of(work, struct gendisk, async_notify); 608 char event[] = "MEDIA_CHANGE=1"; 609 char *envp[] = { event, NULL }; 610 611 /* 612 * set enviroment vars to indicate which event this is for 613 * so that user space will know to go check the media status. 614 */ 615 kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); 616 put_device(gd->driverfs_dev); 617 } 618 619 void genhd_media_change_notify(struct gendisk *disk) 620 { 621 get_device(disk->driverfs_dev); 622 schedule_work(&disk->async_notify); 623 } 624 EXPORT_SYMBOL_GPL(genhd_media_change_notify); 625 626 dev_t blk_lookup_devt(const char *name) 627 { 628 struct device *dev; 629 dev_t devt = MKDEV(0, 0); 630 631 mutex_lock(&block_class_lock); 632 list_for_each_entry(dev, &block_class.devices, node) { 633 if (strcmp(dev->bus_id, name) == 0) { 634 devt = dev->devt; 635 break; 636 } 637 } 638 mutex_unlock(&block_class_lock); 639 640 return devt; 641 } 642 643 EXPORT_SYMBOL(blk_lookup_devt); 644 645 struct gendisk *alloc_disk(int minors) 646 { 647 return alloc_disk_node(minors, -1); 648 } 649 650 struct gendisk *alloc_disk_node(int minors, int node_id) 651 { 652 struct gendisk *disk; 653 654 disk = kmalloc_node(sizeof(struct gendisk), 655 GFP_KERNEL | __GFP_ZERO, node_id); 656 if (disk) { 657 if (!init_disk_stats(disk)) { 658 kfree(disk); 659 return NULL; 660 } 661 if (minors > 1) { 662 int size = (minors - 1) * sizeof(struct hd_struct *); 663 disk->part = kmalloc_node(size, 664 GFP_KERNEL | __GFP_ZERO, node_id); 665 if (!disk->part) { 666 free_disk_stats(disk); 667 kfree(disk); 668 return NULL; 669 } 670 } 671 disk->minors = minors; 672 rand_initialize_disk(disk); 673 disk->dev.class = &block_class; 674 disk->dev.type = &disk_type; 675 device_initialize(&disk->dev); 676 INIT_WORK(&disk->async_notify, 677 media_change_notify_thread); 678 } 679 return disk; 680 } 681 682 EXPORT_SYMBOL(alloc_disk); 683 EXPORT_SYMBOL(alloc_disk_node); 684 685 struct kobject *get_disk(struct gendisk *disk) 686 { 687 struct module *owner; 688 struct kobject *kobj; 689 690 if (!disk->fops) 691 return NULL; 692 owner = disk->fops->owner; 693 if (owner && !try_module_get(owner)) 694 return NULL; 695 kobj = kobject_get(&disk->dev.kobj); 696 if (kobj == NULL) { 697 module_put(owner); 698 return NULL; 699 } 700 return kobj; 701 702 } 703 704 EXPORT_SYMBOL(get_disk); 705 706 void put_disk(struct gendisk *disk) 707 { 708 if (disk) 709 kobject_put(&disk->dev.kobj); 710 } 711 712 EXPORT_SYMBOL(put_disk); 713 714 void set_device_ro(struct block_device *bdev, int flag) 715 { 716 if (bdev->bd_contains != bdev) 717 bdev->bd_part->policy = flag; 718 else 719 bdev->bd_disk->policy = flag; 720 } 721 722 EXPORT_SYMBOL(set_device_ro); 723 724 void set_disk_ro(struct gendisk *disk, int flag) 725 { 726 int i; 727 disk->policy = flag; 728 for (i = 0; i < disk->minors - 1; i++) 729 if (disk->part[i]) disk->part[i]->policy = flag; 730 } 731 732 EXPORT_SYMBOL(set_disk_ro); 733 734 int bdev_read_only(struct block_device *bdev) 735 { 736 if (!bdev) 737 return 0; 738 else if (bdev->bd_contains != bdev) 739 return bdev->bd_part->policy; 740 else 741 return bdev->bd_disk->policy; 742 } 743 744 EXPORT_SYMBOL(bdev_read_only); 745 746 int invalidate_partition(struct gendisk *disk, int index) 747 { 748 int res = 0; 749 struct block_device *bdev = bdget_disk(disk, index); 750 if (bdev) { 751 fsync_bdev(bdev); 752 res = __invalidate_device(bdev); 753 bdput(bdev); 754 } 755 return res; 756 } 757 758 EXPORT_SYMBOL(invalidate_partition); 759