1 /* 2 * gendisk handling 3 */ 4 5 #include <linux/module.h> 6 #include <linux/fs.h> 7 #include <linux/genhd.h> 8 #include <linux/kdev_t.h> 9 #include <linux/kernel.h> 10 #include <linux/blkdev.h> 11 #include <linux/init.h> 12 #include <linux/spinlock.h> 13 #include <linux/proc_fs.h> 14 #include <linux/seq_file.h> 15 #include <linux/slab.h> 16 #include <linux/kmod.h> 17 #include <linux/kobj_map.h> 18 #include <linux/buffer_head.h> 19 #include <linux/mutex.h> 20 #include <linux/idr.h> 21 22 #include "blk.h" 23 24 static DEFINE_MUTEX(block_class_lock); 25 #ifndef CONFIG_SYSFS_DEPRECATED 26 struct kobject *block_depr; 27 #endif 28 29 /* for extended dynamic devt allocation, currently only one major is used */ 30 #define MAX_EXT_DEVT (1 << MINORBITS) 31 32 /* For extended devt allocation. ext_devt_mutex prevents look up 33 * results from going away underneath its user. 34 */ 35 static DEFINE_MUTEX(ext_devt_mutex); 36 static DEFINE_IDR(ext_devt_idr); 37 38 static struct device_type disk_type; 39 40 /** 41 * disk_get_part - get partition 42 * @disk: disk to look partition from 43 * @partno: partition number 44 * 45 * Look for partition @partno from @disk. If found, increment 46 * reference count and return it. 47 * 48 * CONTEXT: 49 * Don't care. 50 * 51 * RETURNS: 52 * Pointer to the found partition on success, NULL if not found. 53 */ 54 struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 55 { 56 struct hd_struct *part = NULL; 57 struct disk_part_tbl *ptbl; 58 59 if (unlikely(partno < 0)) 60 return NULL; 61 62 rcu_read_lock(); 63 64 ptbl = rcu_dereference(disk->part_tbl); 65 if (likely(partno < ptbl->len)) { 66 part = rcu_dereference(ptbl->part[partno]); 67 if (part) 68 get_device(part_to_dev(part)); 69 } 70 71 rcu_read_unlock(); 72 73 return part; 74 } 75 EXPORT_SYMBOL_GPL(disk_get_part); 76 77 /** 78 * disk_part_iter_init - initialize partition iterator 79 * @piter: iterator to initialize 80 * @disk: disk to iterate over 81 * @flags: DISK_PITER_* flags 82 * 83 * Initialize @piter so that it iterates over partitions of @disk. 84 * 85 * CONTEXT: 86 * Don't care. 87 */ 88 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 89 unsigned int flags) 90 { 91 struct disk_part_tbl *ptbl; 92 93 rcu_read_lock(); 94 ptbl = rcu_dereference(disk->part_tbl); 95 96 piter->disk = disk; 97 piter->part = NULL; 98 99 if (flags & DISK_PITER_REVERSE) 100 piter->idx = ptbl->len - 1; 101 else if (flags & DISK_PITER_INCL_PART0) 102 piter->idx = 0; 103 else 104 piter->idx = 1; 105 106 piter->flags = flags; 107 108 rcu_read_unlock(); 109 } 110 EXPORT_SYMBOL_GPL(disk_part_iter_init); 111 112 /** 113 * disk_part_iter_next - proceed iterator to the next partition and return it 114 * @piter: iterator of interest 115 * 116 * Proceed @piter to the next partition and return it. 117 * 118 * CONTEXT: 119 * Don't care. 120 */ 121 struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 122 { 123 struct disk_part_tbl *ptbl; 124 int inc, end; 125 126 /* put the last partition */ 127 disk_put_part(piter->part); 128 piter->part = NULL; 129 130 /* get part_tbl */ 131 rcu_read_lock(); 132 ptbl = rcu_dereference(piter->disk->part_tbl); 133 134 /* determine iteration parameters */ 135 if (piter->flags & DISK_PITER_REVERSE) { 136 inc = -1; 137 if (piter->flags & DISK_PITER_INCL_PART0) 138 end = -1; 139 else 140 end = 0; 141 } else { 142 inc = 1; 143 end = ptbl->len; 144 } 145 146 /* iterate to the next partition */ 147 for (; piter->idx != end; piter->idx += inc) { 148 struct hd_struct *part; 149 150 part = rcu_dereference(ptbl->part[piter->idx]); 151 if (!part) 152 continue; 153 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) 154 continue; 155 156 get_device(part_to_dev(part)); 157 piter->part = part; 158 piter->idx += inc; 159 break; 160 } 161 162 rcu_read_unlock(); 163 164 return piter->part; 165 } 166 EXPORT_SYMBOL_GPL(disk_part_iter_next); 167 168 /** 169 * disk_part_iter_exit - finish up partition iteration 170 * @piter: iter of interest 171 * 172 * Called when iteration is over. Cleans up @piter. 173 * 174 * CONTEXT: 175 * Don't care. 176 */ 177 void disk_part_iter_exit(struct disk_part_iter *piter) 178 { 179 disk_put_part(piter->part); 180 piter->part = NULL; 181 } 182 EXPORT_SYMBOL_GPL(disk_part_iter_exit); 183 184 static inline int sector_in_part(struct hd_struct *part, sector_t sector) 185 { 186 return part->start_sect <= sector && 187 sector < part->start_sect + part->nr_sects; 188 } 189 190 /** 191 * disk_map_sector_rcu - map sector to partition 192 * @disk: gendisk of interest 193 * @sector: sector to map 194 * 195 * Find out which partition @sector maps to on @disk. This is 196 * primarily used for stats accounting. 197 * 198 * CONTEXT: 199 * RCU read locked. The returned partition pointer is valid only 200 * while preemption is disabled. 201 * 202 * RETURNS: 203 * Found partition on success, part0 is returned if no partition matches 204 */ 205 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 206 { 207 struct disk_part_tbl *ptbl; 208 struct hd_struct *part; 209 int i; 210 211 ptbl = rcu_dereference(disk->part_tbl); 212 213 part = rcu_dereference(ptbl->last_lookup); 214 if (part && sector_in_part(part, sector)) 215 return part; 216 217 for (i = 1; i < ptbl->len; i++) { 218 part = rcu_dereference(ptbl->part[i]); 219 220 if (part && sector_in_part(part, sector)) { 221 rcu_assign_pointer(ptbl->last_lookup, part); 222 return part; 223 } 224 } 225 return &disk->part0; 226 } 227 EXPORT_SYMBOL_GPL(disk_map_sector_rcu); 228 229 /* 230 * Can be deleted altogether. Later. 231 * 232 */ 233 static struct blk_major_name { 234 struct blk_major_name *next; 235 int major; 236 char name[16]; 237 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 238 239 /* index in the above - for now: assume no multimajor ranges */ 240 static inline int major_to_index(int major) 241 { 242 return major % BLKDEV_MAJOR_HASH_SIZE; 243 } 244 245 #ifdef CONFIG_PROC_FS 246 void blkdev_show(struct seq_file *seqf, off_t offset) 247 { 248 struct blk_major_name *dp; 249 250 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 251 mutex_lock(&block_class_lock); 252 for (dp = major_names[offset]; dp; dp = dp->next) 253 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 254 mutex_unlock(&block_class_lock); 255 } 256 } 257 #endif /* CONFIG_PROC_FS */ 258 259 /** 260 * register_blkdev - register a new block device 261 * 262 * @major: the requested major device number [1..255]. If @major=0, try to 263 * allocate any unused major number. 264 * @name: the name of the new block device as a zero terminated string 265 * 266 * The @name must be unique within the system. 267 * 268 * The return value depends on the @major input parameter. 269 * - if a major device number was requested in range [1..255] then the 270 * function returns zero on success, or a negative error code 271 * - if any unused major number was requested with @major=0 parameter 272 * then the return value is the allocated major number in range 273 * [1..255] or a negative error code otherwise 274 */ 275 int register_blkdev(unsigned int major, const char *name) 276 { 277 struct blk_major_name **n, *p; 278 int index, ret = 0; 279 280 mutex_lock(&block_class_lock); 281 282 /* temporary */ 283 if (major == 0) { 284 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 285 if (major_names[index] == NULL) 286 break; 287 } 288 289 if (index == 0) { 290 printk("register_blkdev: failed to get major for %s\n", 291 name); 292 ret = -EBUSY; 293 goto out; 294 } 295 major = index; 296 ret = major; 297 } 298 299 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 300 if (p == NULL) { 301 ret = -ENOMEM; 302 goto out; 303 } 304 305 p->major = major; 306 strlcpy(p->name, name, sizeof(p->name)); 307 p->next = NULL; 308 index = major_to_index(major); 309 310 for (n = &major_names[index]; *n; n = &(*n)->next) { 311 if ((*n)->major == major) 312 break; 313 } 314 if (!*n) 315 *n = p; 316 else 317 ret = -EBUSY; 318 319 if (ret < 0) { 320 printk("register_blkdev: cannot get major %d for %s\n", 321 major, name); 322 kfree(p); 323 } 324 out: 325 mutex_unlock(&block_class_lock); 326 return ret; 327 } 328 329 EXPORT_SYMBOL(register_blkdev); 330 331 void unregister_blkdev(unsigned int major, const char *name) 332 { 333 struct blk_major_name **n; 334 struct blk_major_name *p = NULL; 335 int index = major_to_index(major); 336 337 mutex_lock(&block_class_lock); 338 for (n = &major_names[index]; *n; n = &(*n)->next) 339 if ((*n)->major == major) 340 break; 341 if (!*n || strcmp((*n)->name, name)) { 342 WARN_ON(1); 343 } else { 344 p = *n; 345 *n = p->next; 346 } 347 mutex_unlock(&block_class_lock); 348 kfree(p); 349 } 350 351 EXPORT_SYMBOL(unregister_blkdev); 352 353 static struct kobj_map *bdev_map; 354 355 /** 356 * blk_mangle_minor - scatter minor numbers apart 357 * @minor: minor number to mangle 358 * 359 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 360 * is enabled. Mangling twice gives the original value. 361 * 362 * RETURNS: 363 * Mangled value. 364 * 365 * CONTEXT: 366 * Don't care. 367 */ 368 static int blk_mangle_minor(int minor) 369 { 370 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 371 int i; 372 373 for (i = 0; i < MINORBITS / 2; i++) { 374 int low = minor & (1 << i); 375 int high = minor & (1 << (MINORBITS - 1 - i)); 376 int distance = MINORBITS - 1 - 2 * i; 377 378 minor ^= low | high; /* clear both bits */ 379 low <<= distance; /* swap the positions */ 380 high >>= distance; 381 minor |= low | high; /* and set */ 382 } 383 #endif 384 return minor; 385 } 386 387 /** 388 * blk_alloc_devt - allocate a dev_t for a partition 389 * @part: partition to allocate dev_t for 390 * @devt: out parameter for resulting dev_t 391 * 392 * Allocate a dev_t for block device. 393 * 394 * RETURNS: 395 * 0 on success, allocated dev_t is returned in *@devt. -errno on 396 * failure. 397 * 398 * CONTEXT: 399 * Might sleep. 400 */ 401 int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 402 { 403 struct gendisk *disk = part_to_disk(part); 404 int idx, rc; 405 406 /* in consecutive minor range? */ 407 if (part->partno < disk->minors) { 408 *devt = MKDEV(disk->major, disk->first_minor + part->partno); 409 return 0; 410 } 411 412 /* allocate ext devt */ 413 do { 414 if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) 415 return -ENOMEM; 416 rc = idr_get_new(&ext_devt_idr, part, &idx); 417 } while (rc == -EAGAIN); 418 419 if (rc) 420 return rc; 421 422 if (idx > MAX_EXT_DEVT) { 423 idr_remove(&ext_devt_idr, idx); 424 return -EBUSY; 425 } 426 427 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 428 return 0; 429 } 430 431 /** 432 * blk_free_devt - free a dev_t 433 * @devt: dev_t to free 434 * 435 * Free @devt which was allocated using blk_alloc_devt(). 436 * 437 * CONTEXT: 438 * Might sleep. 439 */ 440 void blk_free_devt(dev_t devt) 441 { 442 might_sleep(); 443 444 if (devt == MKDEV(0, 0)) 445 return; 446 447 if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 448 mutex_lock(&ext_devt_mutex); 449 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 450 mutex_unlock(&ext_devt_mutex); 451 } 452 } 453 454 static char *bdevt_str(dev_t devt, char *buf) 455 { 456 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 457 char tbuf[BDEVT_SIZE]; 458 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 459 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 460 } else 461 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 462 463 return buf; 464 } 465 466 /* 467 * Register device numbers dev..(dev+range-1) 468 * range must be nonzero 469 * The hash chain is sorted on range, so that subranges can override. 470 */ 471 void blk_register_region(dev_t devt, unsigned long range, struct module *module, 472 struct kobject *(*probe)(dev_t, int *, void *), 473 int (*lock)(dev_t, void *), void *data) 474 { 475 kobj_map(bdev_map, devt, range, module, probe, lock, data); 476 } 477 478 EXPORT_SYMBOL(blk_register_region); 479 480 void blk_unregister_region(dev_t devt, unsigned long range) 481 { 482 kobj_unmap(bdev_map, devt, range); 483 } 484 485 EXPORT_SYMBOL(blk_unregister_region); 486 487 static struct kobject *exact_match(dev_t devt, int *partno, void *data) 488 { 489 struct gendisk *p = data; 490 491 return &disk_to_dev(p)->kobj; 492 } 493 494 static int exact_lock(dev_t devt, void *data) 495 { 496 struct gendisk *p = data; 497 498 if (!get_disk(p)) 499 return -1; 500 return 0; 501 } 502 503 /** 504 * add_disk - add partitioning information to kernel list 505 * @disk: per-device partitioning information 506 * 507 * This function registers the partitioning information in @disk 508 * with the kernel. 509 * 510 * FIXME: error handling 511 */ 512 void add_disk(struct gendisk *disk) 513 { 514 struct backing_dev_info *bdi; 515 dev_t devt; 516 int retval; 517 518 /* minors == 0 indicates to use ext devt from part0 and should 519 * be accompanied with EXT_DEVT flag. Make sure all 520 * parameters make sense. 521 */ 522 WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 523 WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); 524 525 disk->flags |= GENHD_FL_UP; 526 527 retval = blk_alloc_devt(&disk->part0, &devt); 528 if (retval) { 529 WARN_ON(1); 530 return; 531 } 532 disk_to_dev(disk)->devt = devt; 533 534 /* ->major and ->first_minor aren't supposed to be 535 * dereferenced from here on, but set them just in case. 536 */ 537 disk->major = MAJOR(devt); 538 disk->first_minor = MINOR(devt); 539 540 blk_register_region(disk_devt(disk), disk->minors, NULL, 541 exact_match, exact_lock, disk); 542 register_disk(disk); 543 blk_register_queue(disk); 544 545 bdi = &disk->queue->backing_dev_info; 546 bdi_register_dev(bdi, disk_devt(disk)); 547 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 548 "bdi"); 549 WARN_ON(retval); 550 } 551 552 EXPORT_SYMBOL(add_disk); 553 EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ 554 555 void unlink_gendisk(struct gendisk *disk) 556 { 557 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 558 bdi_unregister(&disk->queue->backing_dev_info); 559 blk_unregister_queue(disk); 560 blk_unregister_region(disk_devt(disk), disk->minors); 561 } 562 563 /** 564 * get_gendisk - get partitioning information for a given device 565 * @devt: device to get partitioning information for 566 * @partno: returned partition index 567 * 568 * This function gets the structure containing partitioning 569 * information for the given device @devt. 570 */ 571 struct gendisk *get_gendisk(dev_t devt, int *partno) 572 { 573 struct gendisk *disk = NULL; 574 575 if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 576 struct kobject *kobj; 577 578 kobj = kobj_lookup(bdev_map, devt, partno); 579 if (kobj) 580 disk = dev_to_disk(kobj_to_dev(kobj)); 581 } else { 582 struct hd_struct *part; 583 584 mutex_lock(&ext_devt_mutex); 585 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 586 if (part && get_disk(part_to_disk(part))) { 587 *partno = part->partno; 588 disk = part_to_disk(part); 589 } 590 mutex_unlock(&ext_devt_mutex); 591 } 592 593 return disk; 594 } 595 596 /** 597 * bdget_disk - do bdget() by gendisk and partition number 598 * @disk: gendisk of interest 599 * @partno: partition number 600 * 601 * Find partition @partno from @disk, do bdget() on it. 602 * 603 * CONTEXT: 604 * Don't care. 605 * 606 * RETURNS: 607 * Resulting block_device on success, NULL on failure. 608 */ 609 struct block_device *bdget_disk(struct gendisk *disk, int partno) 610 { 611 struct hd_struct *part; 612 struct block_device *bdev = NULL; 613 614 part = disk_get_part(disk, partno); 615 if (part) 616 bdev = bdget(part_devt(part)); 617 disk_put_part(part); 618 619 return bdev; 620 } 621 EXPORT_SYMBOL(bdget_disk); 622 623 /* 624 * print a full list of all partitions - intended for places where the root 625 * filesystem can't be mounted and thus to give the victim some idea of what 626 * went wrong 627 */ 628 void __init printk_all_partitions(void) 629 { 630 struct class_dev_iter iter; 631 struct device *dev; 632 633 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 634 while ((dev = class_dev_iter_next(&iter))) { 635 struct gendisk *disk = dev_to_disk(dev); 636 struct disk_part_iter piter; 637 struct hd_struct *part; 638 char name_buf[BDEVNAME_SIZE]; 639 char devt_buf[BDEVT_SIZE]; 640 641 /* 642 * Don't show empty devices or things that have been 643 * surpressed 644 */ 645 if (get_capacity(disk) == 0 || 646 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 647 continue; 648 649 /* 650 * Note, unlike /proc/partitions, I am showing the 651 * numbers in hex - the same format as the root= 652 * option takes. 653 */ 654 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 655 while ((part = disk_part_iter_next(&piter))) { 656 bool is_part0 = part == &disk->part0; 657 658 printk("%s%s %10llu %s", is_part0 ? "" : " ", 659 bdevt_str(part_devt(part), devt_buf), 660 (unsigned long long)part->nr_sects >> 1, 661 disk_name(disk, part->partno, name_buf)); 662 if (is_part0) { 663 if (disk->driverfs_dev != NULL && 664 disk->driverfs_dev->driver != NULL) 665 printk(" driver: %s\n", 666 disk->driverfs_dev->driver->name); 667 else 668 printk(" (driver?)\n"); 669 } else 670 printk("\n"); 671 } 672 disk_part_iter_exit(&piter); 673 } 674 class_dev_iter_exit(&iter); 675 } 676 677 #ifdef CONFIG_PROC_FS 678 /* iterator */ 679 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 680 { 681 loff_t skip = *pos; 682 struct class_dev_iter *iter; 683 struct device *dev; 684 685 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 686 if (!iter) 687 return ERR_PTR(-ENOMEM); 688 689 seqf->private = iter; 690 class_dev_iter_init(iter, &block_class, NULL, &disk_type); 691 do { 692 dev = class_dev_iter_next(iter); 693 if (!dev) 694 return NULL; 695 } while (skip--); 696 697 return dev_to_disk(dev); 698 } 699 700 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 701 { 702 struct device *dev; 703 704 (*pos)++; 705 dev = class_dev_iter_next(seqf->private); 706 if (dev) 707 return dev_to_disk(dev); 708 709 return NULL; 710 } 711 712 static void disk_seqf_stop(struct seq_file *seqf, void *v) 713 { 714 struct class_dev_iter *iter = seqf->private; 715 716 /* stop is called even after start failed :-( */ 717 if (iter) { 718 class_dev_iter_exit(iter); 719 kfree(iter); 720 } 721 } 722 723 static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 724 { 725 static void *p; 726 727 p = disk_seqf_start(seqf, pos); 728 if (!IS_ERR(p) && p && !*pos) 729 seq_puts(seqf, "major minor #blocks name\n\n"); 730 return p; 731 } 732 733 static int show_partition(struct seq_file *seqf, void *v) 734 { 735 struct gendisk *sgp = v; 736 struct disk_part_iter piter; 737 struct hd_struct *part; 738 char buf[BDEVNAME_SIZE]; 739 740 /* Don't show non-partitionable removeable devices or empty devices */ 741 if (!get_capacity(sgp) || (!disk_partitionable(sgp) && 742 (sgp->flags & GENHD_FL_REMOVABLE))) 743 return 0; 744 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 745 return 0; 746 747 /* show the full disk and all non-0 size partitions of it */ 748 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 749 while ((part = disk_part_iter_next(&piter))) 750 seq_printf(seqf, "%4d %7d %10llu %s\n", 751 MAJOR(part_devt(part)), MINOR(part_devt(part)), 752 (unsigned long long)part->nr_sects >> 1, 753 disk_name(sgp, part->partno, buf)); 754 disk_part_iter_exit(&piter); 755 756 return 0; 757 } 758 759 static const struct seq_operations partitions_op = { 760 .start = show_partition_start, 761 .next = disk_seqf_next, 762 .stop = disk_seqf_stop, 763 .show = show_partition 764 }; 765 766 static int partitions_open(struct inode *inode, struct file *file) 767 { 768 return seq_open(file, &partitions_op); 769 } 770 771 static const struct file_operations proc_partitions_operations = { 772 .open = partitions_open, 773 .read = seq_read, 774 .llseek = seq_lseek, 775 .release = seq_release, 776 }; 777 #endif 778 779 780 static struct kobject *base_probe(dev_t devt, int *partno, void *data) 781 { 782 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 783 /* Make old-style 2.4 aliases work */ 784 request_module("block-major-%d", MAJOR(devt)); 785 return NULL; 786 } 787 788 static int __init genhd_device_init(void) 789 { 790 int error; 791 792 block_class.dev_kobj = sysfs_dev_block_kobj; 793 error = class_register(&block_class); 794 if (unlikely(error)) 795 return error; 796 bdev_map = kobj_map_init(base_probe, &block_class_lock); 797 blk_dev_init(); 798 799 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 800 801 #ifndef CONFIG_SYSFS_DEPRECATED 802 /* create top-level block dir */ 803 block_depr = kobject_create_and_add("block", NULL); 804 #endif 805 return 0; 806 } 807 808 subsys_initcall(genhd_device_init); 809 810 static ssize_t disk_range_show(struct device *dev, 811 struct device_attribute *attr, char *buf) 812 { 813 struct gendisk *disk = dev_to_disk(dev); 814 815 return sprintf(buf, "%d\n", disk->minors); 816 } 817 818 static ssize_t disk_ext_range_show(struct device *dev, 819 struct device_attribute *attr, char *buf) 820 { 821 struct gendisk *disk = dev_to_disk(dev); 822 823 return sprintf(buf, "%d\n", disk_max_parts(disk)); 824 } 825 826 static ssize_t disk_removable_show(struct device *dev, 827 struct device_attribute *attr, char *buf) 828 { 829 struct gendisk *disk = dev_to_disk(dev); 830 831 return sprintf(buf, "%d\n", 832 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 833 } 834 835 static ssize_t disk_ro_show(struct device *dev, 836 struct device_attribute *attr, char *buf) 837 { 838 struct gendisk *disk = dev_to_disk(dev); 839 840 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 841 } 842 843 static ssize_t disk_capability_show(struct device *dev, 844 struct device_attribute *attr, char *buf) 845 { 846 struct gendisk *disk = dev_to_disk(dev); 847 848 return sprintf(buf, "%x\n", disk->flags); 849 } 850 851 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 852 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 853 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 854 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 855 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 856 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 857 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 858 #ifdef CONFIG_FAIL_MAKE_REQUEST 859 static struct device_attribute dev_attr_fail = 860 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 861 #endif 862 #ifdef CONFIG_FAIL_IO_TIMEOUT 863 static struct device_attribute dev_attr_fail_timeout = 864 __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, 865 part_timeout_store); 866 #endif 867 868 static struct attribute *disk_attrs[] = { 869 &dev_attr_range.attr, 870 &dev_attr_ext_range.attr, 871 &dev_attr_removable.attr, 872 &dev_attr_ro.attr, 873 &dev_attr_size.attr, 874 &dev_attr_capability.attr, 875 &dev_attr_stat.attr, 876 #ifdef CONFIG_FAIL_MAKE_REQUEST 877 &dev_attr_fail.attr, 878 #endif 879 #ifdef CONFIG_FAIL_IO_TIMEOUT 880 &dev_attr_fail_timeout.attr, 881 #endif 882 NULL 883 }; 884 885 static struct attribute_group disk_attr_group = { 886 .attrs = disk_attrs, 887 }; 888 889 static struct attribute_group *disk_attr_groups[] = { 890 &disk_attr_group, 891 NULL 892 }; 893 894 static void disk_free_ptbl_rcu_cb(struct rcu_head *head) 895 { 896 struct disk_part_tbl *ptbl = 897 container_of(head, struct disk_part_tbl, rcu_head); 898 899 kfree(ptbl); 900 } 901 902 /** 903 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 904 * @disk: disk to replace part_tbl for 905 * @new_ptbl: new part_tbl to install 906 * 907 * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 908 * original ptbl is freed using RCU callback. 909 * 910 * LOCKING: 911 * Matching bd_mutx locked. 912 */ 913 static void disk_replace_part_tbl(struct gendisk *disk, 914 struct disk_part_tbl *new_ptbl) 915 { 916 struct disk_part_tbl *old_ptbl = disk->part_tbl; 917 918 rcu_assign_pointer(disk->part_tbl, new_ptbl); 919 920 if (old_ptbl) { 921 rcu_assign_pointer(old_ptbl->last_lookup, NULL); 922 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); 923 } 924 } 925 926 /** 927 * disk_expand_part_tbl - expand disk->part_tbl 928 * @disk: disk to expand part_tbl for 929 * @partno: expand such that this partno can fit in 930 * 931 * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 932 * uses RCU to allow unlocked dereferencing for stats and other stuff. 933 * 934 * LOCKING: 935 * Matching bd_mutex locked, might sleep. 936 * 937 * RETURNS: 938 * 0 on success, -errno on failure. 939 */ 940 int disk_expand_part_tbl(struct gendisk *disk, int partno) 941 { 942 struct disk_part_tbl *old_ptbl = disk->part_tbl; 943 struct disk_part_tbl *new_ptbl; 944 int len = old_ptbl ? old_ptbl->len : 0; 945 int target = partno + 1; 946 size_t size; 947 int i; 948 949 /* disk_max_parts() is zero during initialization, ignore if so */ 950 if (disk_max_parts(disk) && target > disk_max_parts(disk)) 951 return -EINVAL; 952 953 if (target <= len) 954 return 0; 955 956 size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); 957 new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); 958 if (!new_ptbl) 959 return -ENOMEM; 960 961 INIT_RCU_HEAD(&new_ptbl->rcu_head); 962 new_ptbl->len = target; 963 964 for (i = 0; i < len; i++) 965 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 966 967 disk_replace_part_tbl(disk, new_ptbl); 968 return 0; 969 } 970 971 static void disk_release(struct device *dev) 972 { 973 struct gendisk *disk = dev_to_disk(dev); 974 975 kfree(disk->random); 976 disk_replace_part_tbl(disk, NULL); 977 free_part_stats(&disk->part0); 978 kfree(disk); 979 } 980 struct class block_class = { 981 .name = "block", 982 }; 983 984 static struct device_type disk_type = { 985 .name = "disk", 986 .groups = disk_attr_groups, 987 .release = disk_release, 988 }; 989 990 #ifdef CONFIG_PROC_FS 991 /* 992 * aggregate disk stat collector. Uses the same stats that the sysfs 993 * entries do, above, but makes them available through one seq_file. 994 * 995 * The output looks suspiciously like /proc/partitions with a bunch of 996 * extra fields. 997 */ 998 static int diskstats_show(struct seq_file *seqf, void *v) 999 { 1000 struct gendisk *gp = v; 1001 struct disk_part_iter piter; 1002 struct hd_struct *hd; 1003 char buf[BDEVNAME_SIZE]; 1004 int cpu; 1005 1006 /* 1007 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1008 seq_puts(seqf, "major minor name" 1009 " rio rmerge rsect ruse wio wmerge " 1010 "wsect wuse running use aveq" 1011 "\n\n"); 1012 */ 1013 1014 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); 1015 while ((hd = disk_part_iter_next(&piter))) { 1016 cpu = part_stat_lock(); 1017 part_round_stats(cpu, hd); 1018 part_stat_unlock(); 1019 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1020 "%u %lu %lu %llu %u %u %u %u\n", 1021 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1022 disk_name(gp, hd->partno, buf), 1023 part_stat_read(hd, ios[0]), 1024 part_stat_read(hd, merges[0]), 1025 (unsigned long long)part_stat_read(hd, sectors[0]), 1026 jiffies_to_msecs(part_stat_read(hd, ticks[0])), 1027 part_stat_read(hd, ios[1]), 1028 part_stat_read(hd, merges[1]), 1029 (unsigned long long)part_stat_read(hd, sectors[1]), 1030 jiffies_to_msecs(part_stat_read(hd, ticks[1])), 1031 hd->in_flight, 1032 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1033 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 1034 ); 1035 } 1036 disk_part_iter_exit(&piter); 1037 1038 return 0; 1039 } 1040 1041 static const struct seq_operations diskstats_op = { 1042 .start = disk_seqf_start, 1043 .next = disk_seqf_next, 1044 .stop = disk_seqf_stop, 1045 .show = diskstats_show 1046 }; 1047 1048 static int diskstats_open(struct inode *inode, struct file *file) 1049 { 1050 return seq_open(file, &diskstats_op); 1051 } 1052 1053 static const struct file_operations proc_diskstats_operations = { 1054 .open = diskstats_open, 1055 .read = seq_read, 1056 .llseek = seq_lseek, 1057 .release = seq_release, 1058 }; 1059 1060 static int __init proc_genhd_init(void) 1061 { 1062 proc_create("diskstats", 0, NULL, &proc_diskstats_operations); 1063 proc_create("partitions", 0, NULL, &proc_partitions_operations); 1064 return 0; 1065 } 1066 module_init(proc_genhd_init); 1067 #endif /* CONFIG_PROC_FS */ 1068 1069 static void media_change_notify_thread(struct work_struct *work) 1070 { 1071 struct gendisk *gd = container_of(work, struct gendisk, async_notify); 1072 char event[] = "MEDIA_CHANGE=1"; 1073 char *envp[] = { event, NULL }; 1074 1075 /* 1076 * set enviroment vars to indicate which event this is for 1077 * so that user space will know to go check the media status. 1078 */ 1079 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1080 put_device(gd->driverfs_dev); 1081 } 1082 1083 #if 0 1084 void genhd_media_change_notify(struct gendisk *disk) 1085 { 1086 get_device(disk->driverfs_dev); 1087 schedule_work(&disk->async_notify); 1088 } 1089 EXPORT_SYMBOL_GPL(genhd_media_change_notify); 1090 #endif /* 0 */ 1091 1092 dev_t blk_lookup_devt(const char *name, int partno) 1093 { 1094 dev_t devt = MKDEV(0, 0); 1095 struct class_dev_iter iter; 1096 struct device *dev; 1097 1098 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1099 while ((dev = class_dev_iter_next(&iter))) { 1100 struct gendisk *disk = dev_to_disk(dev); 1101 struct hd_struct *part; 1102 1103 if (strcmp(dev_name(dev), name)) 1104 continue; 1105 1106 if (partno < disk->minors) { 1107 /* We need to return the right devno, even 1108 * if the partition doesn't exist yet. 1109 */ 1110 devt = MKDEV(MAJOR(dev->devt), 1111 MINOR(dev->devt) + partno); 1112 break; 1113 } 1114 part = disk_get_part(disk, partno); 1115 if (part) { 1116 devt = part_devt(part); 1117 disk_put_part(part); 1118 break; 1119 } 1120 disk_put_part(part); 1121 } 1122 class_dev_iter_exit(&iter); 1123 return devt; 1124 } 1125 EXPORT_SYMBOL(blk_lookup_devt); 1126 1127 struct gendisk *alloc_disk(int minors) 1128 { 1129 return alloc_disk_node(minors, -1); 1130 } 1131 EXPORT_SYMBOL(alloc_disk); 1132 1133 struct gendisk *alloc_disk_node(int minors, int node_id) 1134 { 1135 struct gendisk *disk; 1136 1137 disk = kmalloc_node(sizeof(struct gendisk), 1138 GFP_KERNEL | __GFP_ZERO, node_id); 1139 if (disk) { 1140 if (!init_part_stats(&disk->part0)) { 1141 kfree(disk); 1142 return NULL; 1143 } 1144 disk->node_id = node_id; 1145 if (disk_expand_part_tbl(disk, 0)) { 1146 free_part_stats(&disk->part0); 1147 kfree(disk); 1148 return NULL; 1149 } 1150 disk->part_tbl->part[0] = &disk->part0; 1151 1152 disk->minors = minors; 1153 rand_initialize_disk(disk); 1154 disk_to_dev(disk)->class = &block_class; 1155 disk_to_dev(disk)->type = &disk_type; 1156 device_initialize(disk_to_dev(disk)); 1157 INIT_WORK(&disk->async_notify, 1158 media_change_notify_thread); 1159 } 1160 return disk; 1161 } 1162 EXPORT_SYMBOL(alloc_disk_node); 1163 1164 struct kobject *get_disk(struct gendisk *disk) 1165 { 1166 struct module *owner; 1167 struct kobject *kobj; 1168 1169 if (!disk->fops) 1170 return NULL; 1171 owner = disk->fops->owner; 1172 if (owner && !try_module_get(owner)) 1173 return NULL; 1174 kobj = kobject_get(&disk_to_dev(disk)->kobj); 1175 if (kobj == NULL) { 1176 module_put(owner); 1177 return NULL; 1178 } 1179 return kobj; 1180 1181 } 1182 1183 EXPORT_SYMBOL(get_disk); 1184 1185 void put_disk(struct gendisk *disk) 1186 { 1187 if (disk) 1188 kobject_put(&disk_to_dev(disk)->kobj); 1189 } 1190 1191 EXPORT_SYMBOL(put_disk); 1192 1193 void set_device_ro(struct block_device *bdev, int flag) 1194 { 1195 bdev->bd_part->policy = flag; 1196 } 1197 1198 EXPORT_SYMBOL(set_device_ro); 1199 1200 void set_disk_ro(struct gendisk *disk, int flag) 1201 { 1202 struct disk_part_iter piter; 1203 struct hd_struct *part; 1204 1205 disk_part_iter_init(&piter, disk, 1206 DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); 1207 while ((part = disk_part_iter_next(&piter))) 1208 part->policy = flag; 1209 disk_part_iter_exit(&piter); 1210 } 1211 1212 EXPORT_SYMBOL(set_disk_ro); 1213 1214 int bdev_read_only(struct block_device *bdev) 1215 { 1216 if (!bdev) 1217 return 0; 1218 return bdev->bd_part->policy; 1219 } 1220 1221 EXPORT_SYMBOL(bdev_read_only); 1222 1223 int invalidate_partition(struct gendisk *disk, int partno) 1224 { 1225 int res = 0; 1226 struct block_device *bdev = bdget_disk(disk, partno); 1227 if (bdev) { 1228 fsync_bdev(bdev); 1229 res = __invalidate_device(bdev); 1230 bdput(bdev); 1231 } 1232 return res; 1233 } 1234 1235 EXPORT_SYMBOL(invalidate_partition); 1236