1 /* 2 * gendisk handling 3 */ 4 5 #include <linux/module.h> 6 #include <linux/fs.h> 7 #include <linux/genhd.h> 8 #include <linux/kdev_t.h> 9 #include <linux/kernel.h> 10 #include <linux/blkdev.h> 11 #include <linux/init.h> 12 #include <linux/spinlock.h> 13 #include <linux/proc_fs.h> 14 #include <linux/seq_file.h> 15 #include <linux/slab.h> 16 #include <linux/kmod.h> 17 #include <linux/kobj_map.h> 18 #include <linux/buffer_head.h> 19 #include <linux/mutex.h> 20 #include <linux/idr.h> 21 22 #include "blk.h" 23 24 static DEFINE_MUTEX(block_class_lock); 25 struct kobject *block_depr; 26 27 /* for extended dynamic devt allocation, currently only one major is used */ 28 #define MAX_EXT_DEVT (1 << MINORBITS) 29 30 /* For extended devt allocation. ext_devt_mutex prevents look up 31 * results from going away underneath its user. 32 */ 33 static DEFINE_MUTEX(ext_devt_mutex); 34 static DEFINE_IDR(ext_devt_idr); 35 36 static struct device_type disk_type; 37 38 /** 39 * disk_get_part - get partition 40 * @disk: disk to look partition from 41 * @partno: partition number 42 * 43 * Look for partition @partno from @disk. If found, increment 44 * reference count and return it. 45 * 46 * CONTEXT: 47 * Don't care. 48 * 49 * RETURNS: 50 * Pointer to the found partition on success, NULL if not found. 51 */ 52 struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 53 { 54 struct hd_struct *part = NULL; 55 struct disk_part_tbl *ptbl; 56 57 if (unlikely(partno < 0)) 58 return NULL; 59 60 rcu_read_lock(); 61 62 ptbl = rcu_dereference(disk->part_tbl); 63 if (likely(partno < ptbl->len)) { 64 part = rcu_dereference(ptbl->part[partno]); 65 if (part) 66 get_device(part_to_dev(part)); 67 } 68 69 rcu_read_unlock(); 70 71 return part; 72 } 73 EXPORT_SYMBOL_GPL(disk_get_part); 74 75 /** 76 * disk_part_iter_init - initialize partition iterator 77 * @piter: iterator to initialize 78 * @disk: disk to iterate over 79 * @flags: DISK_PITER_* flags 80 * 81 * Initialize @piter so that it iterates over partitions of @disk. 82 * 83 * CONTEXT: 84 * Don't care. 85 */ 86 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, 87 unsigned int flags) 88 { 89 struct disk_part_tbl *ptbl; 90 91 rcu_read_lock(); 92 ptbl = rcu_dereference(disk->part_tbl); 93 94 piter->disk = disk; 95 piter->part = NULL; 96 97 if (flags & DISK_PITER_REVERSE) 98 piter->idx = ptbl->len - 1; 99 else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) 100 piter->idx = 0; 101 else 102 piter->idx = 1; 103 104 piter->flags = flags; 105 106 rcu_read_unlock(); 107 } 108 EXPORT_SYMBOL_GPL(disk_part_iter_init); 109 110 /** 111 * disk_part_iter_next - proceed iterator to the next partition and return it 112 * @piter: iterator of interest 113 * 114 * Proceed @piter to the next partition and return it. 115 * 116 * CONTEXT: 117 * Don't care. 118 */ 119 struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 120 { 121 struct disk_part_tbl *ptbl; 122 int inc, end; 123 124 /* put the last partition */ 125 disk_put_part(piter->part); 126 piter->part = NULL; 127 128 /* get part_tbl */ 129 rcu_read_lock(); 130 ptbl = rcu_dereference(piter->disk->part_tbl); 131 132 /* determine iteration parameters */ 133 if (piter->flags & DISK_PITER_REVERSE) { 134 inc = -1; 135 if (piter->flags & (DISK_PITER_INCL_PART0 | 136 DISK_PITER_INCL_EMPTY_PART0)) 137 end = -1; 138 else 139 end = 0; 140 } else { 141 inc = 1; 142 end = ptbl->len; 143 } 144 145 /* iterate to the next partition */ 146 for (; piter->idx != end; piter->idx += inc) { 147 struct hd_struct *part; 148 149 part = rcu_dereference(ptbl->part[piter->idx]); 150 if (!part) 151 continue; 152 if (!part->nr_sects && 153 !(piter->flags & DISK_PITER_INCL_EMPTY) && 154 !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 155 piter->idx == 0)) 156 continue; 157 158 get_device(part_to_dev(part)); 159 piter->part = part; 160 piter->idx += inc; 161 break; 162 } 163 164 rcu_read_unlock(); 165 166 return piter->part; 167 } 168 EXPORT_SYMBOL_GPL(disk_part_iter_next); 169 170 /** 171 * disk_part_iter_exit - finish up partition iteration 172 * @piter: iter of interest 173 * 174 * Called when iteration is over. Cleans up @piter. 175 * 176 * CONTEXT: 177 * Don't care. 178 */ 179 void disk_part_iter_exit(struct disk_part_iter *piter) 180 { 181 disk_put_part(piter->part); 182 piter->part = NULL; 183 } 184 EXPORT_SYMBOL_GPL(disk_part_iter_exit); 185 186 static inline int sector_in_part(struct hd_struct *part, sector_t sector) 187 { 188 return part->start_sect <= sector && 189 sector < part->start_sect + part->nr_sects; 190 } 191 192 /** 193 * disk_map_sector_rcu - map sector to partition 194 * @disk: gendisk of interest 195 * @sector: sector to map 196 * 197 * Find out which partition @sector maps to on @disk. This is 198 * primarily used for stats accounting. 199 * 200 * CONTEXT: 201 * RCU read locked. The returned partition pointer is valid only 202 * while preemption is disabled. 203 * 204 * RETURNS: 205 * Found partition on success, part0 is returned if no partition matches 206 */ 207 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 208 { 209 struct disk_part_tbl *ptbl; 210 struct hd_struct *part; 211 int i; 212 213 ptbl = rcu_dereference(disk->part_tbl); 214 215 part = rcu_dereference(ptbl->last_lookup); 216 if (part && sector_in_part(part, sector)) 217 return part; 218 219 for (i = 1; i < ptbl->len; i++) { 220 part = rcu_dereference(ptbl->part[i]); 221 222 if (part && sector_in_part(part, sector)) { 223 rcu_assign_pointer(ptbl->last_lookup, part); 224 return part; 225 } 226 } 227 return &disk->part0; 228 } 229 EXPORT_SYMBOL_GPL(disk_map_sector_rcu); 230 231 /* 232 * Can be deleted altogether. Later. 233 * 234 */ 235 static struct blk_major_name { 236 struct blk_major_name *next; 237 int major; 238 char name[16]; 239 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 240 241 /* index in the above - for now: assume no multimajor ranges */ 242 static inline int major_to_index(int major) 243 { 244 return major % BLKDEV_MAJOR_HASH_SIZE; 245 } 246 247 #ifdef CONFIG_PROC_FS 248 void blkdev_show(struct seq_file *seqf, off_t offset) 249 { 250 struct blk_major_name *dp; 251 252 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 253 mutex_lock(&block_class_lock); 254 for (dp = major_names[offset]; dp; dp = dp->next) 255 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 256 mutex_unlock(&block_class_lock); 257 } 258 } 259 #endif /* CONFIG_PROC_FS */ 260 261 /** 262 * register_blkdev - register a new block device 263 * 264 * @major: the requested major device number [1..255]. If @major=0, try to 265 * allocate any unused major number. 266 * @name: the name of the new block device as a zero terminated string 267 * 268 * The @name must be unique within the system. 269 * 270 * The return value depends on the @major input parameter. 271 * - if a major device number was requested in range [1..255] then the 272 * function returns zero on success, or a negative error code 273 * - if any unused major number was requested with @major=0 parameter 274 * then the return value is the allocated major number in range 275 * [1..255] or a negative error code otherwise 276 */ 277 int register_blkdev(unsigned int major, const char *name) 278 { 279 struct blk_major_name **n, *p; 280 int index, ret = 0; 281 282 mutex_lock(&block_class_lock); 283 284 /* temporary */ 285 if (major == 0) { 286 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { 287 if (major_names[index] == NULL) 288 break; 289 } 290 291 if (index == 0) { 292 printk("register_blkdev: failed to get major for %s\n", 293 name); 294 ret = -EBUSY; 295 goto out; 296 } 297 major = index; 298 ret = major; 299 } 300 301 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); 302 if (p == NULL) { 303 ret = -ENOMEM; 304 goto out; 305 } 306 307 p->major = major; 308 strlcpy(p->name, name, sizeof(p->name)); 309 p->next = NULL; 310 index = major_to_index(major); 311 312 for (n = &major_names[index]; *n; n = &(*n)->next) { 313 if ((*n)->major == major) 314 break; 315 } 316 if (!*n) 317 *n = p; 318 else 319 ret = -EBUSY; 320 321 if (ret < 0) { 322 printk("register_blkdev: cannot get major %d for %s\n", 323 major, name); 324 kfree(p); 325 } 326 out: 327 mutex_unlock(&block_class_lock); 328 return ret; 329 } 330 331 EXPORT_SYMBOL(register_blkdev); 332 333 void unregister_blkdev(unsigned int major, const char *name) 334 { 335 struct blk_major_name **n; 336 struct blk_major_name *p = NULL; 337 int index = major_to_index(major); 338 339 mutex_lock(&block_class_lock); 340 for (n = &major_names[index]; *n; n = &(*n)->next) 341 if ((*n)->major == major) 342 break; 343 if (!*n || strcmp((*n)->name, name)) { 344 WARN_ON(1); 345 } else { 346 p = *n; 347 *n = p->next; 348 } 349 mutex_unlock(&block_class_lock); 350 kfree(p); 351 } 352 353 EXPORT_SYMBOL(unregister_blkdev); 354 355 static struct kobj_map *bdev_map; 356 357 /** 358 * blk_mangle_minor - scatter minor numbers apart 359 * @minor: minor number to mangle 360 * 361 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT 362 * is enabled. Mangling twice gives the original value. 363 * 364 * RETURNS: 365 * Mangled value. 366 * 367 * CONTEXT: 368 * Don't care. 369 */ 370 static int blk_mangle_minor(int minor) 371 { 372 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT 373 int i; 374 375 for (i = 0; i < MINORBITS / 2; i++) { 376 int low = minor & (1 << i); 377 int high = minor & (1 << (MINORBITS - 1 - i)); 378 int distance = MINORBITS - 1 - 2 * i; 379 380 minor ^= low | high; /* clear both bits */ 381 low <<= distance; /* swap the positions */ 382 high >>= distance; 383 minor |= low | high; /* and set */ 384 } 385 #endif 386 return minor; 387 } 388 389 /** 390 * blk_alloc_devt - allocate a dev_t for a partition 391 * @part: partition to allocate dev_t for 392 * @devt: out parameter for resulting dev_t 393 * 394 * Allocate a dev_t for block device. 395 * 396 * RETURNS: 397 * 0 on success, allocated dev_t is returned in *@devt. -errno on 398 * failure. 399 * 400 * CONTEXT: 401 * Might sleep. 402 */ 403 int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 404 { 405 struct gendisk *disk = part_to_disk(part); 406 int idx, rc; 407 408 /* in consecutive minor range? */ 409 if (part->partno < disk->minors) { 410 *devt = MKDEV(disk->major, disk->first_minor + part->partno); 411 return 0; 412 } 413 414 /* allocate ext devt */ 415 do { 416 if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) 417 return -ENOMEM; 418 rc = idr_get_new(&ext_devt_idr, part, &idx); 419 } while (rc == -EAGAIN); 420 421 if (rc) 422 return rc; 423 424 if (idx > MAX_EXT_DEVT) { 425 idr_remove(&ext_devt_idr, idx); 426 return -EBUSY; 427 } 428 429 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); 430 return 0; 431 } 432 433 /** 434 * blk_free_devt - free a dev_t 435 * @devt: dev_t to free 436 * 437 * Free @devt which was allocated using blk_alloc_devt(). 438 * 439 * CONTEXT: 440 * Might sleep. 441 */ 442 void blk_free_devt(dev_t devt) 443 { 444 might_sleep(); 445 446 if (devt == MKDEV(0, 0)) 447 return; 448 449 if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 450 mutex_lock(&ext_devt_mutex); 451 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 452 mutex_unlock(&ext_devt_mutex); 453 } 454 } 455 456 static char *bdevt_str(dev_t devt, char *buf) 457 { 458 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { 459 char tbuf[BDEVT_SIZE]; 460 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); 461 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); 462 } else 463 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 464 465 return buf; 466 } 467 468 /* 469 * Register device numbers dev..(dev+range-1) 470 * range must be nonzero 471 * The hash chain is sorted on range, so that subranges can override. 472 */ 473 void blk_register_region(dev_t devt, unsigned long range, struct module *module, 474 struct kobject *(*probe)(dev_t, int *, void *), 475 int (*lock)(dev_t, void *), void *data) 476 { 477 kobj_map(bdev_map, devt, range, module, probe, lock, data); 478 } 479 480 EXPORT_SYMBOL(blk_register_region); 481 482 void blk_unregister_region(dev_t devt, unsigned long range) 483 { 484 kobj_unmap(bdev_map, devt, range); 485 } 486 487 EXPORT_SYMBOL(blk_unregister_region); 488 489 static struct kobject *exact_match(dev_t devt, int *partno, void *data) 490 { 491 struct gendisk *p = data; 492 493 return &disk_to_dev(p)->kobj; 494 } 495 496 static int exact_lock(dev_t devt, void *data) 497 { 498 struct gendisk *p = data; 499 500 if (!get_disk(p)) 501 return -1; 502 return 0; 503 } 504 505 /** 506 * add_disk - add partitioning information to kernel list 507 * @disk: per-device partitioning information 508 * 509 * This function registers the partitioning information in @disk 510 * with the kernel. 511 * 512 * FIXME: error handling 513 */ 514 void add_disk(struct gendisk *disk) 515 { 516 struct backing_dev_info *bdi; 517 dev_t devt; 518 int retval; 519 520 /* minors == 0 indicates to use ext devt from part0 and should 521 * be accompanied with EXT_DEVT flag. Make sure all 522 * parameters make sense. 523 */ 524 WARN_ON(disk->minors && !(disk->major || disk->first_minor)); 525 WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); 526 527 disk->flags |= GENHD_FL_UP; 528 529 retval = blk_alloc_devt(&disk->part0, &devt); 530 if (retval) { 531 WARN_ON(1); 532 return; 533 } 534 disk_to_dev(disk)->devt = devt; 535 536 /* ->major and ->first_minor aren't supposed to be 537 * dereferenced from here on, but set them just in case. 538 */ 539 disk->major = MAJOR(devt); 540 disk->first_minor = MINOR(devt); 541 542 /* Register BDI before referencing it from bdev */ 543 bdi = &disk->queue->backing_dev_info; 544 bdi_register_dev(bdi, disk_devt(disk)); 545 546 blk_register_region(disk_devt(disk), disk->minors, NULL, 547 exact_match, exact_lock, disk); 548 register_disk(disk); 549 blk_register_queue(disk); 550 551 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 552 "bdi"); 553 WARN_ON(retval); 554 } 555 556 EXPORT_SYMBOL(add_disk); 557 EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ 558 559 void unlink_gendisk(struct gendisk *disk) 560 { 561 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 562 bdi_unregister(&disk->queue->backing_dev_info); 563 blk_unregister_queue(disk); 564 blk_unregister_region(disk_devt(disk), disk->minors); 565 } 566 567 /** 568 * get_gendisk - get partitioning information for a given device 569 * @devt: device to get partitioning information for 570 * @partno: returned partition index 571 * 572 * This function gets the structure containing partitioning 573 * information for the given device @devt. 574 */ 575 struct gendisk *get_gendisk(dev_t devt, int *partno) 576 { 577 struct gendisk *disk = NULL; 578 579 if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 580 struct kobject *kobj; 581 582 kobj = kobj_lookup(bdev_map, devt, partno); 583 if (kobj) 584 disk = dev_to_disk(kobj_to_dev(kobj)); 585 } else { 586 struct hd_struct *part; 587 588 mutex_lock(&ext_devt_mutex); 589 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 590 if (part && get_disk(part_to_disk(part))) { 591 *partno = part->partno; 592 disk = part_to_disk(part); 593 } 594 mutex_unlock(&ext_devt_mutex); 595 } 596 597 return disk; 598 } 599 EXPORT_SYMBOL(get_gendisk); 600 601 /** 602 * bdget_disk - do bdget() by gendisk and partition number 603 * @disk: gendisk of interest 604 * @partno: partition number 605 * 606 * Find partition @partno from @disk, do bdget() on it. 607 * 608 * CONTEXT: 609 * Don't care. 610 * 611 * RETURNS: 612 * Resulting block_device on success, NULL on failure. 613 */ 614 struct block_device *bdget_disk(struct gendisk *disk, int partno) 615 { 616 struct hd_struct *part; 617 struct block_device *bdev = NULL; 618 619 part = disk_get_part(disk, partno); 620 if (part) 621 bdev = bdget(part_devt(part)); 622 disk_put_part(part); 623 624 return bdev; 625 } 626 EXPORT_SYMBOL(bdget_disk); 627 628 /* 629 * print a full list of all partitions - intended for places where the root 630 * filesystem can't be mounted and thus to give the victim some idea of what 631 * went wrong 632 */ 633 void __init printk_all_partitions(void) 634 { 635 struct class_dev_iter iter; 636 struct device *dev; 637 638 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 639 while ((dev = class_dev_iter_next(&iter))) { 640 struct gendisk *disk = dev_to_disk(dev); 641 struct disk_part_iter piter; 642 struct hd_struct *part; 643 char name_buf[BDEVNAME_SIZE]; 644 char devt_buf[BDEVT_SIZE]; 645 u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1]; 646 647 /* 648 * Don't show empty devices or things that have been 649 * surpressed 650 */ 651 if (get_capacity(disk) == 0 || 652 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) 653 continue; 654 655 /* 656 * Note, unlike /proc/partitions, I am showing the 657 * numbers in hex - the same format as the root= 658 * option takes. 659 */ 660 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 661 while ((part = disk_part_iter_next(&piter))) { 662 bool is_part0 = part == &disk->part0; 663 664 uuid[0] = 0; 665 if (part->info) 666 part_unpack_uuid(part->info->uuid, uuid); 667 668 printk("%s%s %10llu %s %s", is_part0 ? "" : " ", 669 bdevt_str(part_devt(part), devt_buf), 670 (unsigned long long)part->nr_sects >> 1, 671 disk_name(disk, part->partno, name_buf), uuid); 672 if (is_part0) { 673 if (disk->driverfs_dev != NULL && 674 disk->driverfs_dev->driver != NULL) 675 printk(" driver: %s\n", 676 disk->driverfs_dev->driver->name); 677 else 678 printk(" (driver?)\n"); 679 } else 680 printk("\n"); 681 } 682 disk_part_iter_exit(&piter); 683 } 684 class_dev_iter_exit(&iter); 685 } 686 687 #ifdef CONFIG_PROC_FS 688 /* iterator */ 689 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) 690 { 691 loff_t skip = *pos; 692 struct class_dev_iter *iter; 693 struct device *dev; 694 695 iter = kmalloc(sizeof(*iter), GFP_KERNEL); 696 if (!iter) 697 return ERR_PTR(-ENOMEM); 698 699 seqf->private = iter; 700 class_dev_iter_init(iter, &block_class, NULL, &disk_type); 701 do { 702 dev = class_dev_iter_next(iter); 703 if (!dev) 704 return NULL; 705 } while (skip--); 706 707 return dev_to_disk(dev); 708 } 709 710 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) 711 { 712 struct device *dev; 713 714 (*pos)++; 715 dev = class_dev_iter_next(seqf->private); 716 if (dev) 717 return dev_to_disk(dev); 718 719 return NULL; 720 } 721 722 static void disk_seqf_stop(struct seq_file *seqf, void *v) 723 { 724 struct class_dev_iter *iter = seqf->private; 725 726 /* stop is called even after start failed :-( */ 727 if (iter) { 728 class_dev_iter_exit(iter); 729 kfree(iter); 730 } 731 } 732 733 static void *show_partition_start(struct seq_file *seqf, loff_t *pos) 734 { 735 static void *p; 736 737 p = disk_seqf_start(seqf, pos); 738 if (!IS_ERR(p) && p && !*pos) 739 seq_puts(seqf, "major minor #blocks name\n\n"); 740 return p; 741 } 742 743 static int show_partition(struct seq_file *seqf, void *v) 744 { 745 struct gendisk *sgp = v; 746 struct disk_part_iter piter; 747 struct hd_struct *part; 748 char buf[BDEVNAME_SIZE]; 749 750 /* Don't show non-partitionable removeable devices or empty devices */ 751 if (!get_capacity(sgp) || (!disk_partitionable(sgp) && 752 (sgp->flags & GENHD_FL_REMOVABLE))) 753 return 0; 754 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 755 return 0; 756 757 /* show the full disk and all non-0 size partitions of it */ 758 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 759 while ((part = disk_part_iter_next(&piter))) 760 seq_printf(seqf, "%4d %7d %10llu %s\n", 761 MAJOR(part_devt(part)), MINOR(part_devt(part)), 762 (unsigned long long)part->nr_sects >> 1, 763 disk_name(sgp, part->partno, buf)); 764 disk_part_iter_exit(&piter); 765 766 return 0; 767 } 768 769 static const struct seq_operations partitions_op = { 770 .start = show_partition_start, 771 .next = disk_seqf_next, 772 .stop = disk_seqf_stop, 773 .show = show_partition 774 }; 775 776 static int partitions_open(struct inode *inode, struct file *file) 777 { 778 return seq_open(file, &partitions_op); 779 } 780 781 static const struct file_operations proc_partitions_operations = { 782 .open = partitions_open, 783 .read = seq_read, 784 .llseek = seq_lseek, 785 .release = seq_release, 786 }; 787 #endif 788 789 790 static struct kobject *base_probe(dev_t devt, int *partno, void *data) 791 { 792 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 793 /* Make old-style 2.4 aliases work */ 794 request_module("block-major-%d", MAJOR(devt)); 795 return NULL; 796 } 797 798 static int __init genhd_device_init(void) 799 { 800 int error; 801 802 block_class.dev_kobj = sysfs_dev_block_kobj; 803 error = class_register(&block_class); 804 if (unlikely(error)) 805 return error; 806 bdev_map = kobj_map_init(base_probe, &block_class_lock); 807 blk_dev_init(); 808 809 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); 810 811 /* create top-level block dir */ 812 if (!sysfs_deprecated) 813 block_depr = kobject_create_and_add("block", NULL); 814 return 0; 815 } 816 817 subsys_initcall(genhd_device_init); 818 819 static ssize_t disk_range_show(struct device *dev, 820 struct device_attribute *attr, char *buf) 821 { 822 struct gendisk *disk = dev_to_disk(dev); 823 824 return sprintf(buf, "%d\n", disk->minors); 825 } 826 827 static ssize_t disk_ext_range_show(struct device *dev, 828 struct device_attribute *attr, char *buf) 829 { 830 struct gendisk *disk = dev_to_disk(dev); 831 832 return sprintf(buf, "%d\n", disk_max_parts(disk)); 833 } 834 835 static ssize_t disk_removable_show(struct device *dev, 836 struct device_attribute *attr, char *buf) 837 { 838 struct gendisk *disk = dev_to_disk(dev); 839 840 return sprintf(buf, "%d\n", 841 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 842 } 843 844 static ssize_t disk_ro_show(struct device *dev, 845 struct device_attribute *attr, char *buf) 846 { 847 struct gendisk *disk = dev_to_disk(dev); 848 849 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); 850 } 851 852 static ssize_t disk_capability_show(struct device *dev, 853 struct device_attribute *attr, char *buf) 854 { 855 struct gendisk *disk = dev_to_disk(dev); 856 857 return sprintf(buf, "%x\n", disk->flags); 858 } 859 860 static ssize_t disk_alignment_offset_show(struct device *dev, 861 struct device_attribute *attr, 862 char *buf) 863 { 864 struct gendisk *disk = dev_to_disk(dev); 865 866 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue)); 867 } 868 869 static ssize_t disk_discard_alignment_show(struct device *dev, 870 struct device_attribute *attr, 871 char *buf) 872 { 873 struct gendisk *disk = dev_to_disk(dev); 874 875 return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue)); 876 } 877 878 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 879 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); 880 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 881 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 882 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 883 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 884 static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show, 885 NULL); 886 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 887 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 888 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 889 #ifdef CONFIG_FAIL_MAKE_REQUEST 890 static struct device_attribute dev_attr_fail = 891 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 892 #endif 893 #ifdef CONFIG_FAIL_IO_TIMEOUT 894 static struct device_attribute dev_attr_fail_timeout = 895 __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, 896 part_timeout_store); 897 #endif 898 899 static struct attribute *disk_attrs[] = { 900 &dev_attr_range.attr, 901 &dev_attr_ext_range.attr, 902 &dev_attr_removable.attr, 903 &dev_attr_ro.attr, 904 &dev_attr_size.attr, 905 &dev_attr_alignment_offset.attr, 906 &dev_attr_discard_alignment.attr, 907 &dev_attr_capability.attr, 908 &dev_attr_stat.attr, 909 &dev_attr_inflight.attr, 910 #ifdef CONFIG_FAIL_MAKE_REQUEST 911 &dev_attr_fail.attr, 912 #endif 913 #ifdef CONFIG_FAIL_IO_TIMEOUT 914 &dev_attr_fail_timeout.attr, 915 #endif 916 NULL 917 }; 918 919 static struct attribute_group disk_attr_group = { 920 .attrs = disk_attrs, 921 }; 922 923 static const struct attribute_group *disk_attr_groups[] = { 924 &disk_attr_group, 925 NULL 926 }; 927 928 static void disk_free_ptbl_rcu_cb(struct rcu_head *head) 929 { 930 struct disk_part_tbl *ptbl = 931 container_of(head, struct disk_part_tbl, rcu_head); 932 933 kfree(ptbl); 934 } 935 936 /** 937 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way 938 * @disk: disk to replace part_tbl for 939 * @new_ptbl: new part_tbl to install 940 * 941 * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The 942 * original ptbl is freed using RCU callback. 943 * 944 * LOCKING: 945 * Matching bd_mutx locked. 946 */ 947 static void disk_replace_part_tbl(struct gendisk *disk, 948 struct disk_part_tbl *new_ptbl) 949 { 950 struct disk_part_tbl *old_ptbl = disk->part_tbl; 951 952 rcu_assign_pointer(disk->part_tbl, new_ptbl); 953 954 if (old_ptbl) { 955 rcu_assign_pointer(old_ptbl->last_lookup, NULL); 956 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); 957 } 958 } 959 960 /** 961 * disk_expand_part_tbl - expand disk->part_tbl 962 * @disk: disk to expand part_tbl for 963 * @partno: expand such that this partno can fit in 964 * 965 * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl 966 * uses RCU to allow unlocked dereferencing for stats and other stuff. 967 * 968 * LOCKING: 969 * Matching bd_mutex locked, might sleep. 970 * 971 * RETURNS: 972 * 0 on success, -errno on failure. 973 */ 974 int disk_expand_part_tbl(struct gendisk *disk, int partno) 975 { 976 struct disk_part_tbl *old_ptbl = disk->part_tbl; 977 struct disk_part_tbl *new_ptbl; 978 int len = old_ptbl ? old_ptbl->len : 0; 979 int target = partno + 1; 980 size_t size; 981 int i; 982 983 /* disk_max_parts() is zero during initialization, ignore if so */ 984 if (disk_max_parts(disk) && target > disk_max_parts(disk)) 985 return -EINVAL; 986 987 if (target <= len) 988 return 0; 989 990 size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); 991 new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); 992 if (!new_ptbl) 993 return -ENOMEM; 994 995 new_ptbl->len = target; 996 997 for (i = 0; i < len; i++) 998 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); 999 1000 disk_replace_part_tbl(disk, new_ptbl); 1001 return 0; 1002 } 1003 1004 static void disk_release(struct device *dev) 1005 { 1006 struct gendisk *disk = dev_to_disk(dev); 1007 1008 kfree(disk->random); 1009 disk_replace_part_tbl(disk, NULL); 1010 free_part_stats(&disk->part0); 1011 free_part_info(&disk->part0); 1012 kfree(disk); 1013 } 1014 struct class block_class = { 1015 .name = "block", 1016 }; 1017 1018 static char *block_devnode(struct device *dev, mode_t *mode) 1019 { 1020 struct gendisk *disk = dev_to_disk(dev); 1021 1022 if (disk->devnode) 1023 return disk->devnode(disk, mode); 1024 return NULL; 1025 } 1026 1027 static struct device_type disk_type = { 1028 .name = "disk", 1029 .groups = disk_attr_groups, 1030 .release = disk_release, 1031 .devnode = block_devnode, 1032 }; 1033 1034 #ifdef CONFIG_PROC_FS 1035 /* 1036 * aggregate disk stat collector. Uses the same stats that the sysfs 1037 * entries do, above, but makes them available through one seq_file. 1038 * 1039 * The output looks suspiciously like /proc/partitions with a bunch of 1040 * extra fields. 1041 */ 1042 static int diskstats_show(struct seq_file *seqf, void *v) 1043 { 1044 struct gendisk *gp = v; 1045 struct disk_part_iter piter; 1046 struct hd_struct *hd; 1047 char buf[BDEVNAME_SIZE]; 1048 int cpu; 1049 1050 /* 1051 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) 1052 seq_puts(seqf, "major minor name" 1053 " rio rmerge rsect ruse wio wmerge " 1054 "wsect wuse running use aveq" 1055 "\n\n"); 1056 */ 1057 1058 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); 1059 while ((hd = disk_part_iter_next(&piter))) { 1060 cpu = part_stat_lock(); 1061 part_round_stats(cpu, hd); 1062 part_stat_unlock(); 1063 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1064 "%u %lu %lu %llu %u %u %u %u\n", 1065 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1066 disk_name(gp, hd->partno, buf), 1067 part_stat_read(hd, ios[0]), 1068 part_stat_read(hd, merges[0]), 1069 (unsigned long long)part_stat_read(hd, sectors[0]), 1070 jiffies_to_msecs(part_stat_read(hd, ticks[0])), 1071 part_stat_read(hd, ios[1]), 1072 part_stat_read(hd, merges[1]), 1073 (unsigned long long)part_stat_read(hd, sectors[1]), 1074 jiffies_to_msecs(part_stat_read(hd, ticks[1])), 1075 part_in_flight(hd), 1076 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1077 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 1078 ); 1079 } 1080 disk_part_iter_exit(&piter); 1081 1082 return 0; 1083 } 1084 1085 static const struct seq_operations diskstats_op = { 1086 .start = disk_seqf_start, 1087 .next = disk_seqf_next, 1088 .stop = disk_seqf_stop, 1089 .show = diskstats_show 1090 }; 1091 1092 static int diskstats_open(struct inode *inode, struct file *file) 1093 { 1094 return seq_open(file, &diskstats_op); 1095 } 1096 1097 static const struct file_operations proc_diskstats_operations = { 1098 .open = diskstats_open, 1099 .read = seq_read, 1100 .llseek = seq_lseek, 1101 .release = seq_release, 1102 }; 1103 1104 static int __init proc_genhd_init(void) 1105 { 1106 proc_create("diskstats", 0, NULL, &proc_diskstats_operations); 1107 proc_create("partitions", 0, NULL, &proc_partitions_operations); 1108 return 0; 1109 } 1110 module_init(proc_genhd_init); 1111 #endif /* CONFIG_PROC_FS */ 1112 1113 static void media_change_notify_thread(struct work_struct *work) 1114 { 1115 struct gendisk *gd = container_of(work, struct gendisk, async_notify); 1116 char event[] = "MEDIA_CHANGE=1"; 1117 char *envp[] = { event, NULL }; 1118 1119 /* 1120 * set enviroment vars to indicate which event this is for 1121 * so that user space will know to go check the media status. 1122 */ 1123 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1124 put_device(gd->driverfs_dev); 1125 } 1126 1127 #if 0 1128 void genhd_media_change_notify(struct gendisk *disk) 1129 { 1130 get_device(disk->driverfs_dev); 1131 schedule_work(&disk->async_notify); 1132 } 1133 EXPORT_SYMBOL_GPL(genhd_media_change_notify); 1134 #endif /* 0 */ 1135 1136 dev_t blk_lookup_devt(const char *name, int partno) 1137 { 1138 dev_t devt = MKDEV(0, 0); 1139 struct class_dev_iter iter; 1140 struct device *dev; 1141 1142 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1143 while ((dev = class_dev_iter_next(&iter))) { 1144 struct gendisk *disk = dev_to_disk(dev); 1145 struct hd_struct *part; 1146 1147 if (strcmp(dev_name(dev), name)) 1148 continue; 1149 1150 if (partno < disk->minors) { 1151 /* We need to return the right devno, even 1152 * if the partition doesn't exist yet. 1153 */ 1154 devt = MKDEV(MAJOR(dev->devt), 1155 MINOR(dev->devt) + partno); 1156 break; 1157 } 1158 part = disk_get_part(disk, partno); 1159 if (part) { 1160 devt = part_devt(part); 1161 disk_put_part(part); 1162 break; 1163 } 1164 disk_put_part(part); 1165 } 1166 class_dev_iter_exit(&iter); 1167 return devt; 1168 } 1169 EXPORT_SYMBOL(blk_lookup_devt); 1170 1171 struct gendisk *alloc_disk(int minors) 1172 { 1173 return alloc_disk_node(minors, -1); 1174 } 1175 EXPORT_SYMBOL(alloc_disk); 1176 1177 struct gendisk *alloc_disk_node(int minors, int node_id) 1178 { 1179 struct gendisk *disk; 1180 1181 disk = kmalloc_node(sizeof(struct gendisk), 1182 GFP_KERNEL | __GFP_ZERO, node_id); 1183 if (disk) { 1184 if (!init_part_stats(&disk->part0)) { 1185 kfree(disk); 1186 return NULL; 1187 } 1188 disk->node_id = node_id; 1189 if (disk_expand_part_tbl(disk, 0)) { 1190 free_part_stats(&disk->part0); 1191 kfree(disk); 1192 return NULL; 1193 } 1194 disk->part_tbl->part[0] = &disk->part0; 1195 1196 disk->minors = minors; 1197 rand_initialize_disk(disk); 1198 disk_to_dev(disk)->class = &block_class; 1199 disk_to_dev(disk)->type = &disk_type; 1200 device_initialize(disk_to_dev(disk)); 1201 INIT_WORK(&disk->async_notify, 1202 media_change_notify_thread); 1203 } 1204 return disk; 1205 } 1206 EXPORT_SYMBOL(alloc_disk_node); 1207 1208 struct kobject *get_disk(struct gendisk *disk) 1209 { 1210 struct module *owner; 1211 struct kobject *kobj; 1212 1213 if (!disk->fops) 1214 return NULL; 1215 owner = disk->fops->owner; 1216 if (owner && !try_module_get(owner)) 1217 return NULL; 1218 kobj = kobject_get(&disk_to_dev(disk)->kobj); 1219 if (kobj == NULL) { 1220 module_put(owner); 1221 return NULL; 1222 } 1223 return kobj; 1224 1225 } 1226 1227 EXPORT_SYMBOL(get_disk); 1228 1229 void put_disk(struct gendisk *disk) 1230 { 1231 if (disk) 1232 kobject_put(&disk_to_dev(disk)->kobj); 1233 } 1234 1235 EXPORT_SYMBOL(put_disk); 1236 1237 static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1238 { 1239 char event[] = "DISK_RO=1"; 1240 char *envp[] = { event, NULL }; 1241 1242 if (!ro) 1243 event[8] = '0'; 1244 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1245 } 1246 1247 void set_device_ro(struct block_device *bdev, int flag) 1248 { 1249 bdev->bd_part->policy = flag; 1250 } 1251 1252 EXPORT_SYMBOL(set_device_ro); 1253 1254 void set_disk_ro(struct gendisk *disk, int flag) 1255 { 1256 struct disk_part_iter piter; 1257 struct hd_struct *part; 1258 1259 if (disk->part0.policy != flag) { 1260 set_disk_ro_uevent(disk, flag); 1261 disk->part0.policy = flag; 1262 } 1263 1264 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1265 while ((part = disk_part_iter_next(&piter))) 1266 part->policy = flag; 1267 disk_part_iter_exit(&piter); 1268 } 1269 1270 EXPORT_SYMBOL(set_disk_ro); 1271 1272 int bdev_read_only(struct block_device *bdev) 1273 { 1274 if (!bdev) 1275 return 0; 1276 return bdev->bd_part->policy; 1277 } 1278 1279 EXPORT_SYMBOL(bdev_read_only); 1280 1281 int invalidate_partition(struct gendisk *disk, int partno) 1282 { 1283 int res = 0; 1284 struct block_device *bdev = bdget_disk(disk, partno); 1285 if (bdev) { 1286 fsync_bdev(bdev); 1287 res = __invalidate_device(bdev); 1288 bdput(bdev); 1289 } 1290 return res; 1291 } 1292 1293 EXPORT_SYMBOL(invalidate_partition); 1294