1 /* 2 * gendisk handling 3 */ 4 5 #include <linux/module.h> 6 #include <linux/fs.h> 7 #include <linux/genhd.h> 8 #include <linux/kdev_t.h> 9 #include <linux/kernel.h> 10 #include <linux/blkdev.h> 11 #include <linux/init.h> 12 #include <linux/spinlock.h> 13 #include <linux/proc_fs.h> 14 #include <linux/seq_file.h> 15 #include <linux/slab.h> 16 #include <linux/kmod.h> 17 #include <linux/kobj_map.h> 18 #include <linux/buffer_head.h> 19 #include <linux/mutex.h> 20 #include <linux/idr.h> 21 22 #include "blk.h" 23 24 static DEFINE_MUTEX(block_class_lock); 25 #ifndef CONFIG_SYSFS_DEPRECATED 26 struct kobject *block_depr; 27 #endif 28 29 /* for extended dynamic devt allocation, currently only one major is used */ 30 #define MAX_EXT_DEVT (1 << MINORBITS) 31 32 /* For extended devt allocation. ext_devt_mutex prevents look up 33 * results from going away underneath its user. 34 */ 35 static DEFINE_MUTEX(ext_devt_mutex); 36 static DEFINE_IDR(ext_devt_idr); 37 38 static struct device_type disk_type; 39 40 /** 41 * disk_get_part - get partition 42 * @disk: disk to look partition from 43 * @partno: partition number 44 * 45 * Look for partition @partno from @disk. If found, increment 46 * reference count and return it. 47 * 48 * CONTEXT: 49 * Don't care. 50 * 51 * RETURNS: 52 * Pointer to the found partition on success, NULL if not found. 53 */ 54 struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 55 { 56 struct hd_struct *part = NULL; 57 struct disk_part_tbl *ptbl; 58 59 if (unlikely(partno < 0)) 60 return NULL; 61 62 rcu_read_lock(); 63 64 ptbl = rcu_dereference(disk->part_tbl); 65 if (likely(partno < ptbl->len)) { 66 part = rcu_dereference(ptbl->part[partno]); 67 if (part) 68 get_device(part_to_dev(part)); 69 } 70 71 rcu_read_unlock(); 72 73 return part; 74 } 75 EXPORT_SYMBOL_GPL(disk_get_part); 76 77 /** 78 * disk_part_iter_init - initialize partition iterator 79 * @piter: iterator to initialize 80 * @disk: disk to iterate over 81 * @flags: DISK_PITER_* flags 82 * 83 * Initialize @piter so that it iterates over partitions of @disk. 84 * 85 * CONTEXT: 86 * Don't care. 87 */ 88 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 89 unsigned int flags) 90 { 91 struct disk_part_tbl *ptbl; 92 93 rcu_read_lock(); 94 ptbl = rcu_dereference(disk->part_tbl); 95 96 piter->disk = disk; 97 piter->part = NULL; 98 99 if (flags & DISK_PITER_REVERSE) 100 piter->idx = ptbl->len - 1; 101 else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) 102 piter->idx = 0; 103 else 104 piter->idx = 1; 105 106 piter->flags = flags; 107 108 rcu_read_unlock(); 109 } 110 EXPORT_SYMBOL_GPL(disk_part_iter_init); 111 112 /** 113 * disk_part_iter_next - proceed iterator to the next partition and return it 114 * @piter: iterator of interest 115 * 116 * Proceed @piter to the next partition and return it. 117 * 118 * CONTEXT: 119 * Don't care. 120 */ 121 struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 122 { 123 struct disk_part_tbl *ptbl; 124 int inc, end; 125 126 /* put the last partition */ 127 disk_put_part(piter->part); 128 piter->part = NULL; 129 130 /* get part_tbl */ 131 rcu_read_lock(); 132 ptbl = rcu_dereference(piter->disk->part_tbl); 133 134 /* determine iteration parameters */ 135 if (piter->flags & DISK_PITER_REVERSE) { 136 inc = -1; 137 if (piter->flags & (DISK_PITER_INCL_PART0 | 138 DISK_PITER_INCL_EMPTY_PART0)) 139 end = -1; 140 else 141 end = 0; 142 } else { 143 inc = 1; 144 end = ptbl->len; 145 } 146 147 /* iterate to the next partition */ 148 for (; piter->idx != end; piter->idx += inc) { 149 struct hd_struct *part; 150 151 part = rcu_dereference(ptbl->part[piter->idx]); 152 if (!part) 153 continue; 154 if (!part->nr_sects && 155 !(piter->flags & DISK_PITER_INCL_EMPTY) && 156 !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 157 piter->idx == 0)) 158 continue; 159 160 get_device(part_to_dev(part)); 161 piter->part = part; 162 piter->idx += inc; 163 break; 164 } 165 166 rcu_read_unlock(); 167 168 return piter->part; 169 } 170 EXPORT_SYMBOL_GPL(disk_part_iter_next); 171 172 /** 173 * disk_part_iter_exit - finish up partition iteration 174 * @piter: iter of interest 175 * 176 * Called when iteration is over. Cleans up @piter. 177 * 178 * CONTEXT: 179 * Don't care. 180 */ 181 void disk_part_iter_exit(struct disk_part_iter *piter) 182 { 183 disk_put_part(piter->part); 184 piter->part = NULL; 185 } 186 EXPORT_SYMBOL_GPL(disk_part_iter_exit); 187 188 static inline int sector_in_part(struct hd_struct *part, sector_t sector) 189 { 190 return part->start_sect <= sector && 191 sector < part->start_sect + part->nr_sects; 192 } 193 194 /** 195 * disk_map_sector_rcu - map sector to partition 196 * @disk: gendisk of interest 197 * @sector: sector to map 198 * 199 * Find out which partition @sector maps to on @disk. This is 200 * primarily used for stats accounting. 201 * 202 * CONTEXT: 203 * RCU read locked. The returned partition pointer is valid only 204 * while preemption is disabled. 205 * 206 * RETURNS: 207 * Found partition on success, part0 is returned if no partition matches 208 */ 209 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 210 { 211 struct disk_part_tbl *ptbl; 212 struct hd_struct *part; 213 int i; 214 215 ptbl = rcu_dereference(disk->part_tbl); 216 217 part = rcu_dereference(ptbl->last_lookup); 218 if (part && sector_in_part(part, sector)) 219 return part; 220 221 for (i = 1; i < ptbl->len; i++) { 222 part = rcu_dereference(ptbl->part[i]); 223 224 if (part && sector_in_part(part, sector)) { 225 rcu_assign_pointer(ptbl->last_lookup, part); 226 return part; 227 } 228 } 229 return &disk->part0; 230 } 231 EXPORT_SYMBOL_GPL(disk_map_sector_rcu); 232 233 /* 234 * Can be deleted altogether. Later. 235 * 236 */ 237 static struct blk_major_name { 238 struct blk_major_name *next; 239 int major; 240 char name[16]; 241 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 242 243 /* index in the above - for now: assume no multimajor ranges */ 244 static inline int major_to_index(int major) 245 { 246 return major % BLKDEV_MAJOR_HASH_SIZE; 247 } 248 249 #ifdef CONFIG_PROC_FS 250 void blkdev_show(struct seq_file *seqf, off_t offset) 251 { 252 struct blk_major_name *dp; 253 254 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 255 mutex_lock(&block_class_lock); 256 for (dp = major_names[offset]; dp; dp = dp->next) 257 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 258 mutex_unlock(&block_class_lock); 259 } 260 } 261 #endif /* CONFIG_PROC_FS */ 262 263 /** 264 * register_blkdev - register a new block device 265 * 266 * @major: the requested major device number [1..255]. If @major=0, try to 267 * allocate any unused major number. 268 * @name: the name of the new block device as a zero terminated string 269 * 270 * The @name must be unique within the system. 271 * 272 * The return value depends on the @major input parameter. 273 * - if a major device number was requested in range [1..255] then the 274 * function returns zero on success, or a negative error code 275 * - if any unused major number was requested with @major=0 parameter 276 * then the return value is the allocated major number in range 277 * [1..255] or a negative error code otherwise 278 */ 279 int register_blkdev(unsigned int major, const char *name) 280 { 281 struct blk_major_name **n, *p; 282 int index, ret = 0; 283 284 mutex_lock(&block_class_lock); 285 286 /* temporary */ 287 if (major == 0) { 288 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 289 if (major_names[index] == NULL) 290 break; 291 } 292 293 if (index == 0) { 294 printk("register_blkdev: failed to get major for %s\n", 295 name); 296 ret = -EBUSY; 297 goto out; 298 } 299 major = index; 300 ret = major; 301 } 302 303 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 304 if (p == NULL) { 305 ret = -ENOMEM; 306 goto out; 307 } 308 309 p->major = major; 310 strlcpy(p->name, name, sizeof(p->name)); 311 p->next = NULL; 312 index = major_to_index(major); 313 314 for (n = &major_names[index]; *n; n = &(*n)->next) { 315 if ((*n)->major == major) 316 break; 317 } 318 if (!*n) 319 *n = p; 320 else 321 ret = -EBUSY; 322 323 if (ret < 0) { 324 printk("register_blkdev: cannot get major %d for %s\n", 325 major, name); 326 kfree(p); 327 } 328 out: 329 mutex_unlock(&block_class_lock); 330 return ret; 331 } 332 333 EXPORT_SYMBOL(register_blkdev); 334 335 void unregister_blkdev(unsigned int major, const char *name) 336 { 337 struct blk_major_name **n; 338 struct blk_major_name *p = NULL; 339 int index = major_to_index(major); 340 341 mutex_lock(&block_class_lock); 342 for (n = &major_names[index]; *n; n = &(*n)->next) 343 if ((*n)->major == major) 344 break; 345 if (!*n || strcmp((*n)->name, name)) { 346 WARN_ON(1); 347 } else { 348 p = *n; 349 *n = p->next; 350 } 351 mutex_unlock(&block_class_lock); 352 kfree(p); 353 } 354 355 EXPORT_SYMBOL(unregister_blkdev); 356 357 static struct kobj_map *bdev_map; 358 359 /** 360 * blk_mangle_minor - scatter minor numbers apart 361 * @minor: minor number to mangle 362 * 363 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 364 * is enabled. Mangling twice gives the original value. 365 * 366 * RETURNS: 367 * Mangled value. 368 * 369 * CONTEXT: 370 * Don't care. 371 */ 372 static int blk_mangle_minor(int minor) 373 { 374 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 375 int i; 376 377 for (i = 0; i < MINORBITS / 2; i++) { 378 int low = minor & (1 << i); 379 int high = minor & (1 << (MINORBITS - 1 - i)); 380 int distance = MINORBITS - 1 - 2 * i; 381 382 minor ^= low | high; /* clear both bits */ 383 low <<= distance; /* swap the positions */ 384 high >>= distance; 385 minor |= low | high; /* and set */ 386 } 387 #endif 388 return minor; 389 } 390 391 /** 392 * blk_alloc_devt - allocate a dev_t for a partition 393 * @part: partition to allocate dev_t for 394 * @devt: out parameter for resulting dev_t 395 * 396 * Allocate a dev_t for block device. 397 * 398 * RETURNS: 399 * 0 on success, allocated dev_t is returned in *@devt. -errno on 400 * failure. 401 * 402 * CONTEXT: 403 * Might sleep. 404 */ 405 int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 406 { 407 struct gendisk *disk = part_to_disk(part); 408 int idx, rc; 409 410 /* in consecutive minor range? */ 411 if (part->partno < disk->minors) { 412 *devt = MKDEV(disk->major, disk->first_minor + part->partno); 413 return 0; 414 } 415 416 /* allocate ext devt */ 417 do { 418 if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) 419 return -ENOMEM; 420 rc = idr_get_new(&ext_devt_idr, part, &idx); 421 } while (rc == -EAGAIN); 422 423 if (rc) 424 return rc; 425 426 if (idx > MAX_EXT_DEVT) { 427 idr_remove(&ext_devt_idr, idx); 428 return -EBUSY; 429 } 430 431 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 432 return 0; 433 } 434 435 /** 436 * blk_free_devt - free a dev_t 437 * @devt: dev_t to free 438 * 439 * Free @devt which was allocated using blk_alloc_devt(). 440 * 441 * CONTEXT: 442 * Might sleep. 443 */ 444 void blk_free_devt(dev_t devt) 445 { 446 might_sleep(); 447 448 if (devt == MKDEV(0, 0)) 449 return; 450 451 if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 452 mutex_lock(&ext_devt_mutex); 453 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 454 mutex_unlock(&ext_devt_mutex); 455 } 456 } 457 458 static char *bdevt_str(dev_t devt, char *buf) 459 { 460 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 461 char tbuf[BDEVT_SIZE]; 462 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 463 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 464 } else 465 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 466 467 return buf; 468 } 469 470 /* 471 * Register device numbers dev..(dev+range-1) 472 * range must be nonzero 473 * The hash chain is sorted on range, so that subranges can override. 474 */ 475 void blk_register_region(dev_t devt, unsigned long range, struct module *module, 476 struct kobject *(*probe)(dev_t, int *, void *), 477 int (*lock)(dev_t, void *), void *data) 478 { 479 kobj_map(bdev_map, devt, range, module, probe, lock, data); 480 } 481 482 EXPORT_SYMBOL(blk_register_region); 483 484 void blk_unregister_region(dev_t devt, unsigned long range) 485 { 486 kobj_unmap(bdev_map, devt, range); 487 } 488 489 EXPORT_SYMBOL(blk_unregister_region); 490 491 static struct kobject *exact_match(dev_t devt, int *partno, void *data) 492 { 493 struct gendisk *p = data; 494 495 return &disk_to_dev(p)->kobj; 496 } 497 498 static int exact_lock(dev_t devt, void *data) 499 { 500 struct gendisk *p = data; 501 502 if (!get_disk(p)) 503 return -1; 504 return 0; 505 } 506 507 /** 508 * add_disk - add partitioning information to kernel list 509 * @disk: per-device partitioning information 510 * 511 * This function registers the partitioning information in @disk 512 * with the kernel. 513 * 514 * FIXME: error handling 515 */ 516 void add_disk(struct gendisk *disk) 517 { 518 struct backing_dev_info *bdi; 519 dev_t devt; 520 int retval; 521 522 /* minors == 0 indicates to use ext devt from part0 and should 523 * be accompanied with EXT_DEVT flag. Make sure all 524 * parameters make sense. 525 */ 526 WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 527 WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); 528 529 disk->flags |= GENHD_FL_UP; 530 531 retval = blk_alloc_devt(&disk->part0, &devt); 532 if (retval) { 533 WARN_ON(1); 534 return; 535 } 536 disk_to_dev(disk)->devt = devt; 537 538 /* ->major and ->first_minor aren't supposed to be 539 * dereferenced from here on, but set them just in case. 540 */ 541 disk->major = MAJOR(devt); 542 disk->first_minor = MINOR(devt); 543 544 blk_register_region(disk_devt(disk), disk->minors, NULL, 545 exact_match, exact_lock, disk); 546 register_disk(disk); 547 blk_register_queue(disk); 548 549 bdi = &disk->queue->backing_dev_info; 550 bdi_register_dev(bdi, disk_devt(disk)); 551 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 552 "bdi"); 553 WARN_ON(retval); 554 } 555 556 EXPORT_SYMBOL(add_disk); 557 EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ 558 559 void unlink_gendisk(struct gendisk *disk) 560 { 561 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 562 bdi_unregister(&disk->queue->backing_dev_info); 563 blk_unregister_queue(disk); 564 blk_unregister_region(disk_devt(disk), disk->minors); 565 } 566 567 /** 568 * get_gendisk - get partitioning information for a given device 569 * @devt: device to get partitioning information for 570 * @partno: returned partition index 571 * 572 * This function gets the structure containing partitioning 573 * information for the given device @devt. 574 */ 575 struct gendisk *get_gendisk(dev_t devt, int *partno) 576 { 577 struct gendisk *disk = NULL; 578 579 if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 580 struct kobject *kobj; 581 582 kobj = kobj_lookup(bdev_map, devt, partno); 583 if (kobj) 584 disk = dev_to_disk(kobj_to_dev(kobj)); 585 } else { 586 struct hd_struct *part; 587 588 mutex_lock(&ext_devt_mutex); 589 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 590 if (part && get_disk(part_to_disk(part))) { 591 *partno = part->partno; 592 disk = part_to_disk(part); 593 } 594 mutex_unlock(&ext_devt_mutex); 595 } 596 597 return disk; 598 } 599 600 /** 601 * bdget_disk - do bdget() by gendisk and partition number 602 * @disk: gendisk of interest 603 * @partno: partition number 604 * 605 * Find partition @partno from @disk, do bdget() on it. 606 * 607 * CONTEXT: 608 * Don't care. 609 * 610 * RETURNS: 611 * Resulting block_device on success, NULL on failure. 612 */ 613 struct block_device *bdget_disk(struct gendisk *disk, int partno) 614 { 615 struct hd_struct *part; 616 struct block_device *bdev = NULL; 617 618 part = disk_get_part(disk, partno); 619 if (part) 620 bdev = bdget(part_devt(part)); 621 disk_put_part(part); 622 623 return bdev; 624 } 625 EXPORT_SYMBOL(bdget_disk); 626 627 /* 628 * print a full list of all partitions - intended for places where the root 629 * filesystem can't be mounted and thus to give the victim some idea of what 630 * went wrong 631 */ 632 void __init printk_all_partitions(void) 633 { 634 struct class_dev_iter iter; 635 struct device *dev; 636 637 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 638 while ((dev = class_dev_iter_next(&iter))) { 639 struct gendisk *disk = dev_to_disk(dev); 640 struct disk_part_iter piter; 641 struct hd_struct *part; 642 char name_buf[BDEVNAME_SIZE]; 643 char devt_buf[BDEVT_SIZE]; 644 645 /* 646 * Don't show empty devices or things that have been 647 * surpressed 648 */ 649 if (get_capacity(disk) == 0 || 650 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 651 continue; 652 653 /* 654 * Note, unlike /proc/partitions, I am showing the 655 * numbers in hex - the same format as the root= 656 * option takes. 657 */ 658 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 659 while ((part = disk_part_iter_next(&piter))) { 660 bool is_part0 = part == &disk->part0; 661 662 printk("%s%s %10llu %s", is_part0 ? "" : " ", 663 bdevt_str(part_devt(part), devt_buf), 664 (unsigned long long)part->nr_sects >> 1, 665 disk_name(disk, part->partno, name_buf)); 666 if (is_part0) { 667 if (disk->driverfs_dev != NULL && 668 disk->driverfs_dev->driver != NULL) 669 printk(" driver: %s\n", 670 disk->driverfs_dev->driver->name); 671 else 672 printk(" (driver?)\n"); 673 } else 674 printk("\n"); 675 } 676 disk_part_iter_exit(&piter); 677 } 678 class_dev_iter_exit(&iter); 679 } 680 681 #ifdef CONFIG_PROC_FS 682 /* iterator */ 683 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 684 { 685 loff_t skip = *pos; 686 struct class_dev_iter *iter; 687 struct device *dev; 688 689 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 690 if (!iter) 691 return ERR_PTR(-ENOMEM); 692 693 seqf->private = iter; 694 class_dev_iter_init(iter, &block_class, NULL, &disk_type); 695 do { 696 dev = class_dev_iter_next(iter); 697 if (!dev) 698 return NULL; 699 } while (skip--); 700 701 return dev_to_disk(dev); 702 } 703 704 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 705 { 706 struct device *dev; 707 708 (*pos)++; 709 dev = class_dev_iter_next(seqf->private); 710 if (dev) 711 return dev_to_disk(dev); 712 713 return NULL; 714 } 715 716 static void disk_seqf_stop(struct seq_file *seqf, void *v) 717 { 718 struct class_dev_iter *iter = seqf->private; 719 720 /* stop is called even after start failed :-( */ 721 if (iter) { 722 class_dev_iter_exit(iter); 723 kfree(iter); 724 } 725 } 726 727 static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 728 { 729 static void *p; 730 731 p = disk_seqf_start(seqf, pos); 732 if (!IS_ERR(p) && p && !*pos) 733 seq_puts(seqf, "major minor #blocks name\n\n"); 734 return p; 735 } 736 737 static int show_partition(struct seq_file *seqf, void *v) 738 { 739 struct gendisk *sgp = v; 740 struct disk_part_iter piter; 741 struct hd_struct *part; 742 char buf[BDEVNAME_SIZE]; 743 744 /* Don't show non-partitionable removeable devices or empty devices */ 745 if (!get_capacity(sgp) || (!disk_partitionable(sgp) && 746 (sgp->flags & GENHD_FL_REMOVABLE))) 747 return 0; 748 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 749 return 0; 750 751 /* show the full disk and all non-0 size partitions of it */ 752 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 753 while ((part = disk_part_iter_next(&piter))) 754 seq_printf(seqf, "%4d %7d %10llu %s\n", 755 MAJOR(part_devt(part)), MINOR(part_devt(part)), 756 (unsigned long long)part->nr_sects >> 1, 757 disk_name(sgp, part->partno, buf)); 758 disk_part_iter_exit(&piter); 759 760 return 0; 761 } 762 763 static const struct seq_operations partitions_op = { 764 .start = show_partition_start, 765 .next = disk_seqf_next, 766 .stop = disk_seqf_stop, 767 .show = show_partition 768 }; 769 770 static int partitions_open(struct inode *inode, struct file *file) 771 { 772 return seq_open(file, &partitions_op); 773 } 774 775 static const struct file_operations proc_partitions_operations = { 776 .open = partitions_open, 777 .read = seq_read, 778 .llseek = seq_lseek, 779 .release = seq_release, 780 }; 781 #endif 782 783 784 static struct kobject *base_probe(dev_t devt, int *partno, void *data) 785 { 786 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 787 /* Make old-style 2.4 aliases work */ 788 request_module("block-major-%d", MAJOR(devt)); 789 return NULL; 790 } 791 792 static int __init genhd_device_init(void) 793 { 794 int error; 795 796 block_class.dev_kobj = sysfs_dev_block_kobj; 797 error = class_register(&block_class); 798 if (unlikely(error)) 799 return error; 800 bdev_map = kobj_map_init(base_probe, &block_class_lock); 801 blk_dev_init(); 802 803 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 804 805 #ifndef CONFIG_SYSFS_DEPRECATED 806 /* create top-level block dir */ 807 block_depr = kobject_create_and_add("block", NULL); 808 #endif 809 return 0; 810 } 811 812 subsys_initcall(genhd_device_init); 813 814 static ssize_t disk_range_show(struct device *dev, 815 struct device_attribute *attr, char *buf) 816 { 817 struct gendisk *disk = dev_to_disk(dev); 818 819 return sprintf(buf, "%d\n", disk->minors); 820 } 821 822 static ssize_t disk_ext_range_show(struct device *dev, 823 struct device_attribute *attr, char *buf) 824 { 825 struct gendisk *disk = dev_to_disk(dev); 826 827 return sprintf(buf, "%d\n", disk_max_parts(disk)); 828 } 829 830 static ssize_t disk_removable_show(struct device *dev, 831 struct device_attribute *attr, char *buf) 832 { 833 struct gendisk *disk = dev_to_disk(dev); 834 835 return sprintf(buf, "%d\n", 836 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 837 } 838 839 static ssize_t disk_ro_show(struct device *dev, 840 struct device_attribute *attr, char *buf) 841 { 842 struct gendisk *disk = dev_to_disk(dev); 843 844 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 845 } 846 847 static ssize_t disk_capability_show(struct device *dev, 848 struct device_attribute *attr, char *buf) 849 { 850 struct gendisk *disk = dev_to_disk(dev); 851 852 return sprintf(buf, "%x\n", disk->flags); 853 } 854 855 static ssize_t disk_alignment_offset_show(struct device *dev, 856 struct device_attribute *attr, 857 char *buf) 858 { 859 struct gendisk *disk = dev_to_disk(dev); 860 861 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); 862 } 863 864 static ssize_t disk_discard_alignment_show(struct device *dev, 865 struct device_attribute *attr, 866 char *buf) 867 { 868 struct gendisk *disk = dev_to_disk(dev); 869 870 return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue)); 871 } 872 873 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 874 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 875 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 876 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 877 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 878 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 879 static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show, 880 NULL); 881 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 882 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 883 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 884 #ifdef CONFIG_FAIL_MAKE_REQUEST 885 static struct device_attribute dev_attr_fail = 886 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 887 #endif 888 #ifdef CONFIG_FAIL_IO_TIMEOUT 889 static struct device_attribute dev_attr_fail_timeout = 890 __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, 891 part_timeout_store); 892 #endif 893 894 static struct attribute *disk_attrs[] = { 895 &dev_attr_range.attr, 896 &dev_attr_ext_range.attr, 897 &dev_attr_removable.attr, 898 &dev_attr_ro.attr, 899 &dev_attr_size.attr, 900 &dev_attr_alignment_offset.attr, 901 &dev_attr_discard_alignment.attr, 902 &dev_attr_capability.attr, 903 &dev_attr_stat.attr, 904 &dev_attr_inflight.attr, 905 #ifdef CONFIG_FAIL_MAKE_REQUEST 906 &dev_attr_fail.attr, 907 #endif 908 #ifdef CONFIG_FAIL_IO_TIMEOUT 909 &dev_attr_fail_timeout.attr, 910 #endif 911 NULL 912 }; 913 914 static struct attribute_group disk_attr_group = { 915 .attrs = disk_attrs, 916 }; 917 918 static const struct attribute_group *disk_attr_groups[] = { 919 &disk_attr_group, 920 NULL 921 }; 922 923 static void disk_free_ptbl_rcu_cb(struct rcu_head *head) 924 { 925 struct disk_part_tbl *ptbl = 926 container_of(head, struct disk_part_tbl, rcu_head); 927 928 kfree(ptbl); 929 } 930 931 /** 932 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 933 * @disk: disk to replace part_tbl for 934 * @new_ptbl: new part_tbl to install 935 * 936 * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 937 * original ptbl is freed using RCU callback. 938 * 939 * LOCKING: 940 * Matching bd_mutx locked. 941 */ 942 static void disk_replace_part_tbl(struct gendisk *disk, 943 struct disk_part_tbl *new_ptbl) 944 { 945 struct disk_part_tbl *old_ptbl = disk->part_tbl; 946 947 rcu_assign_pointer(disk->part_tbl, new_ptbl); 948 949 if (old_ptbl) { 950 rcu_assign_pointer(old_ptbl->last_lookup, NULL); 951 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); 952 } 953 } 954 955 /** 956 * disk_expand_part_tbl - expand disk->part_tbl 957 * @disk: disk to expand part_tbl for 958 * @partno: expand such that this partno can fit in 959 * 960 * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 961 * uses RCU to allow unlocked dereferencing for stats and other stuff. 962 * 963 * LOCKING: 964 * Matching bd_mutex locked, might sleep. 965 * 966 * RETURNS: 967 * 0 on success, -errno on failure. 968 */ 969 int disk_expand_part_tbl(struct gendisk *disk, int partno) 970 { 971 struct disk_part_tbl *old_ptbl = disk->part_tbl; 972 struct disk_part_tbl *new_ptbl; 973 int len = old_ptbl ? old_ptbl->len : 0; 974 int target = partno + 1; 975 size_t size; 976 int i; 977 978 /* disk_max_parts() is zero during initialization, ignore if so */ 979 if (disk_max_parts(disk) && target > disk_max_parts(disk)) 980 return -EINVAL; 981 982 if (target <= len) 983 return 0; 984 985 size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); 986 new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); 987 if (!new_ptbl) 988 return -ENOMEM; 989 990 INIT_RCU_HEAD(&new_ptbl->rcu_head); 991 new_ptbl->len = target; 992 993 for (i = 0; i < len; i++) 994 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 995 996 disk_replace_part_tbl(disk, new_ptbl); 997 return 0; 998 } 999 1000 static void disk_release(struct device *dev) 1001 { 1002 struct gendisk *disk = dev_to_disk(dev); 1003 1004 kfree(disk->random); 1005 disk_replace_part_tbl(disk, NULL); 1006 free_part_stats(&disk->part0); 1007 kfree(disk); 1008 } 1009 struct class block_class = { 1010 .name = "block", 1011 }; 1012 1013 static char *block_devnode(struct device *dev, mode_t *mode) 1014 { 1015 struct gendisk *disk = dev_to_disk(dev); 1016 1017 if (disk->devnode) 1018 return disk->devnode(disk, mode); 1019 return NULL; 1020 } 1021 1022 static struct device_type disk_type = { 1023 .name = "disk", 1024 .groups = disk_attr_groups, 1025 .release = disk_release, 1026 .devnode = block_devnode, 1027 }; 1028 1029 #ifdef CONFIG_PROC_FS 1030 /* 1031 * aggregate disk stat collector. Uses the same stats that the sysfs 1032 * entries do, above, but makes them available through one seq_file. 1033 * 1034 * The output looks suspiciously like /proc/partitions with a bunch of 1035 * extra fields. 1036 */ 1037 static int diskstats_show(struct seq_file *seqf, void *v) 1038 { 1039 struct gendisk *gp = v; 1040 struct disk_part_iter piter; 1041 struct hd_struct *hd; 1042 char buf[BDEVNAME_SIZE]; 1043 int cpu; 1044 1045 /* 1046 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1047 seq_puts(seqf, "major minor name" 1048 " rio rmerge rsect ruse wio wmerge " 1049 "wsect wuse running use aveq" 1050 "\n\n"); 1051 */ 1052 1053 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1054 while ((hd = disk_part_iter_next(&piter))) { 1055 cpu = part_stat_lock(); 1056 part_round_stats(cpu, hd); 1057 part_stat_unlock(); 1058 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1059 "%u %lu %lu %llu %u %u %u %u\n", 1060 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1061 disk_name(gp, hd->partno, buf), 1062 part_stat_read(hd, ios[0]), 1063 part_stat_read(hd, merges[0]), 1064 (unsigned long long)part_stat_read(hd, sectors[0]), 1065 jiffies_to_msecs(part_stat_read(hd, ticks[0])), 1066 part_stat_read(hd, ios[1]), 1067 part_stat_read(hd, merges[1]), 1068 (unsigned long long)part_stat_read(hd, sectors[1]), 1069 jiffies_to_msecs(part_stat_read(hd, ticks[1])), 1070 part_in_flight(hd), 1071 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1072 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 1073 ); 1074 } 1075 disk_part_iter_exit(&piter); 1076 1077 return 0; 1078 } 1079 1080 static const struct seq_operations diskstats_op = { 1081 .start = disk_seqf_start, 1082 .next = disk_seqf_next, 1083 .stop = disk_seqf_stop, 1084 .show = diskstats_show 1085 }; 1086 1087 static int diskstats_open(struct inode *inode, struct file *file) 1088 { 1089 return seq_open(file, &diskstats_op); 1090 } 1091 1092 static const struct file_operations proc_diskstats_operations = { 1093 .open = diskstats_open, 1094 .read = seq_read, 1095 .llseek = seq_lseek, 1096 .release = seq_release, 1097 }; 1098 1099 static int __init proc_genhd_init(void) 1100 { 1101 proc_create("diskstats", 0, NULL, &proc_diskstats_operations); 1102 proc_create("partitions", 0, NULL, &proc_partitions_operations); 1103 return 0; 1104 } 1105 module_init(proc_genhd_init); 1106 #endif /* CONFIG_PROC_FS */ 1107 1108 static void media_change_notify_thread(struct work_struct *work) 1109 { 1110 struct gendisk *gd = container_of(work, struct gendisk, async_notify); 1111 char event[] = "MEDIA_CHANGE=1"; 1112 char *envp[] = { event, NULL }; 1113 1114 /* 1115 * set enviroment vars to indicate which event this is for 1116 * so that user space will know to go check the media status. 1117 */ 1118 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1119 put_device(gd->driverfs_dev); 1120 } 1121 1122 #if 0 1123 void genhd_media_change_notify(struct gendisk *disk) 1124 { 1125 get_device(disk->driverfs_dev); 1126 schedule_work(&disk->async_notify); 1127 } 1128 EXPORT_SYMBOL_GPL(genhd_media_change_notify); 1129 #endif /* 0 */ 1130 1131 dev_t blk_lookup_devt(const char *name, int partno) 1132 { 1133 dev_t devt = MKDEV(0, 0); 1134 struct class_dev_iter iter; 1135 struct device *dev; 1136 1137 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1138 while ((dev = class_dev_iter_next(&iter))) { 1139 struct gendisk *disk = dev_to_disk(dev); 1140 struct hd_struct *part; 1141 1142 if (strcmp(dev_name(dev), name)) 1143 continue; 1144 1145 if (partno < disk->minors) { 1146 /* We need to return the right devno, even 1147 * if the partition doesn't exist yet. 1148 */ 1149 devt = MKDEV(MAJOR(dev->devt), 1150 MINOR(dev->devt) + partno); 1151 break; 1152 } 1153 part = disk_get_part(disk, partno); 1154 if (part) { 1155 devt = part_devt(part); 1156 disk_put_part(part); 1157 break; 1158 } 1159 disk_put_part(part); 1160 } 1161 class_dev_iter_exit(&iter); 1162 return devt; 1163 } 1164 EXPORT_SYMBOL(blk_lookup_devt); 1165 1166 struct gendisk *alloc_disk(int minors) 1167 { 1168 return alloc_disk_node(minors, -1); 1169 } 1170 EXPORT_SYMBOL(alloc_disk); 1171 1172 struct gendisk *alloc_disk_node(int minors, int node_id) 1173 { 1174 struct gendisk *disk; 1175 1176 disk = kmalloc_node(sizeof(struct gendisk), 1177 GFP_KERNEL | __GFP_ZERO, node_id); 1178 if (disk) { 1179 if (!init_part_stats(&disk->part0)) { 1180 kfree(disk); 1181 return NULL; 1182 } 1183 disk->node_id = node_id; 1184 if (disk_expand_part_tbl(disk, 0)) { 1185 free_part_stats(&disk->part0); 1186 kfree(disk); 1187 return NULL; 1188 } 1189 disk->part_tbl->part[0] = &disk->part0; 1190 1191 disk->minors = minors; 1192 rand_initialize_disk(disk); 1193 disk_to_dev(disk)->class = &block_class; 1194 disk_to_dev(disk)->type = &disk_type; 1195 device_initialize(disk_to_dev(disk)); 1196 INIT_WORK(&disk->async_notify, 1197 media_change_notify_thread); 1198 } 1199 return disk; 1200 } 1201 EXPORT_SYMBOL(alloc_disk_node); 1202 1203 struct kobject *get_disk(struct gendisk *disk) 1204 { 1205 struct module *owner; 1206 struct kobject *kobj; 1207 1208 if (!disk->fops) 1209 return NULL; 1210 owner = disk->fops->owner; 1211 if (owner && !try_module_get(owner)) 1212 return NULL; 1213 kobj = kobject_get(&disk_to_dev(disk)->kobj); 1214 if (kobj == NULL) { 1215 module_put(owner); 1216 return NULL; 1217 } 1218 return kobj; 1219 1220 } 1221 1222 EXPORT_SYMBOL(get_disk); 1223 1224 void put_disk(struct gendisk *disk) 1225 { 1226 if (disk) 1227 kobject_put(&disk_to_dev(disk)->kobj); 1228 } 1229 1230 EXPORT_SYMBOL(put_disk); 1231 1232 static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1233 { 1234 char event[] = "DISK_RO=1"; 1235 char *envp[] = { event, NULL }; 1236 1237 if (!ro) 1238 event[8] = '0'; 1239 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1240 } 1241 1242 void set_device_ro(struct block_device *bdev, int flag) 1243 { 1244 bdev->bd_part->policy = flag; 1245 } 1246 1247 EXPORT_SYMBOL(set_device_ro); 1248 1249 void set_disk_ro(struct gendisk *disk, int flag) 1250 { 1251 struct disk_part_iter piter; 1252 struct hd_struct *part; 1253 1254 if (disk->part0.policy != flag) { 1255 set_disk_ro_uevent(disk, flag); 1256 disk->part0.policy = flag; 1257 } 1258 1259 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1260 while ((part = disk_part_iter_next(&piter))) 1261 part->policy = flag; 1262 disk_part_iter_exit(&piter); 1263 } 1264 1265 EXPORT_SYMBOL(set_disk_ro); 1266 1267 int bdev_read_only(struct block_device *bdev) 1268 { 1269 if (!bdev) 1270 return 0; 1271 return bdev->bd_part->policy; 1272 } 1273 1274 EXPORT_SYMBOL(bdev_read_only); 1275 1276 int invalidate_partition(struct gendisk *disk, int partno) 1277 { 1278 int res = 0; 1279 struct block_device *bdev = bdget_disk(disk, partno); 1280 if (bdev) { 1281 fsync_bdev(bdev); 1282 res = __invalidate_device(bdev); 1283 bdput(bdev); 1284 } 1285 return res; 1286 } 1287 1288 EXPORT_SYMBOL(invalidate_partition); 1289