xref: /openbmc/linux/block/genhd.c (revision 322cbb50)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  gendisk handling
4  *
5  * Portions Copyright (C) 2020 Christoph Hellwig
6  */
7 
8 #include <linux/module.h>
9 #include <linux/ctype.h>
10 #include <linux/fs.h>
11 #include <linux/kdev_t.h>
12 #include <linux/kernel.h>
13 #include <linux/blkdev.h>
14 #include <linux/backing-dev.h>
15 #include <linux/init.h>
16 #include <linux/spinlock.h>
17 #include <linux/proc_fs.h>
18 #include <linux/seq_file.h>
19 #include <linux/slab.h>
20 #include <linux/kmod.h>
21 #include <linux/major.h>
22 #include <linux/mutex.h>
23 #include <linux/idr.h>
24 #include <linux/log2.h>
25 #include <linux/pm_runtime.h>
26 #include <linux/badblocks.h>
27 #include <linux/part_stat.h>
28 
29 #include "blk.h"
30 #include "blk-mq-sched.h"
31 #include "blk-rq-qos.h"
32 
33 static struct kobject *block_depr;
34 
35 /*
36  * Unique, monotonically increasing sequential number associated with block
37  * devices instances (i.e. incremented each time a device is attached).
38  * Associating uevents with block devices in userspace is difficult and racy:
39  * the uevent netlink socket is lossy, and on slow and overloaded systems has
40  * a very high latency.
41  * Block devices do not have exclusive owners in userspace, any process can set
42  * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
43  * can be reused again and again).
44  * A userspace process setting up a block device and watching for its events
45  * cannot thus reliably tell whether an event relates to the device it just set
46  * up or another earlier instance with the same name.
47  * This sequential number allows userspace processes to solve this problem, and
48  * uniquely associate an uevent to the lifetime to a device.
49  */
50 static atomic64_t diskseq;
51 
52 /* for extended dynamic devt allocation, currently only one major is used */
53 #define NR_EXT_DEVT		(1 << MINORBITS)
54 static DEFINE_IDA(ext_devt_ida);
55 
56 void set_capacity(struct gendisk *disk, sector_t sectors)
57 {
58 	struct block_device *bdev = disk->part0;
59 
60 	spin_lock(&bdev->bd_size_lock);
61 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
62 	bdev->bd_nr_sectors = sectors;
63 	spin_unlock(&bdev->bd_size_lock);
64 }
65 EXPORT_SYMBOL(set_capacity);
66 
67 /*
68  * Set disk capacity and notify if the size is not currently zero and will not
69  * be set to zero.  Returns true if a uevent was sent, otherwise false.
70  */
71 bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
72 {
73 	sector_t capacity = get_capacity(disk);
74 	char *envp[] = { "RESIZE=1", NULL };
75 
76 	set_capacity(disk, size);
77 
78 	/*
79 	 * Only print a message and send a uevent if the gendisk is user visible
80 	 * and alive.  This avoids spamming the log and udev when setting the
81 	 * initial capacity during probing.
82 	 */
83 	if (size == capacity ||
84 	    !disk_live(disk) ||
85 	    (disk->flags & GENHD_FL_HIDDEN))
86 		return false;
87 
88 	pr_info("%s: detected capacity change from %lld to %lld\n",
89 		disk->disk_name, capacity, size);
90 
91 	/*
92 	 * Historically we did not send a uevent for changes to/from an empty
93 	 * device.
94 	 */
95 	if (!capacity || !size)
96 		return false;
97 	kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
98 	return true;
99 }
100 EXPORT_SYMBOL_GPL(set_capacity_and_notify);
101 
102 /*
103  * Format the device name of the indicated block device into the supplied buffer
104  * and return a pointer to that same buffer for convenience.
105  *
106  * Note: do not use this in new code, use the %pg specifier to sprintf and
107  * printk insted.
108  */
109 const char *bdevname(struct block_device *bdev, char *buf)
110 {
111 	struct gendisk *hd = bdev->bd_disk;
112 	int partno = bdev->bd_partno;
113 
114 	if (!partno)
115 		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
116 	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
117 		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
118 	else
119 		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
120 
121 	return buf;
122 }
123 EXPORT_SYMBOL(bdevname);
124 
125 static void part_stat_read_all(struct block_device *part,
126 		struct disk_stats *stat)
127 {
128 	int cpu;
129 
130 	memset(stat, 0, sizeof(struct disk_stats));
131 	for_each_possible_cpu(cpu) {
132 		struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
133 		int group;
134 
135 		for (group = 0; group < NR_STAT_GROUPS; group++) {
136 			stat->nsecs[group] += ptr->nsecs[group];
137 			stat->sectors[group] += ptr->sectors[group];
138 			stat->ios[group] += ptr->ios[group];
139 			stat->merges[group] += ptr->merges[group];
140 		}
141 
142 		stat->io_ticks += ptr->io_ticks;
143 	}
144 }
145 
146 static unsigned int part_in_flight(struct block_device *part)
147 {
148 	unsigned int inflight = 0;
149 	int cpu;
150 
151 	for_each_possible_cpu(cpu) {
152 		inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
153 			    part_stat_local_read_cpu(part, in_flight[1], cpu);
154 	}
155 	if ((int)inflight < 0)
156 		inflight = 0;
157 
158 	return inflight;
159 }
160 
161 static void part_in_flight_rw(struct block_device *part,
162 		unsigned int inflight[2])
163 {
164 	int cpu;
165 
166 	inflight[0] = 0;
167 	inflight[1] = 0;
168 	for_each_possible_cpu(cpu) {
169 		inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
170 		inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
171 	}
172 	if ((int)inflight[0] < 0)
173 		inflight[0] = 0;
174 	if ((int)inflight[1] < 0)
175 		inflight[1] = 0;
176 }
177 
178 /*
179  * Can be deleted altogether. Later.
180  *
181  */
182 #define BLKDEV_MAJOR_HASH_SIZE 255
183 static struct blk_major_name {
184 	struct blk_major_name *next;
185 	int major;
186 	char name[16];
187 #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
188 	void (*probe)(dev_t devt);
189 #endif
190 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
191 static DEFINE_MUTEX(major_names_lock);
192 static DEFINE_SPINLOCK(major_names_spinlock);
193 
194 /* index in the above - for now: assume no multimajor ranges */
195 static inline int major_to_index(unsigned major)
196 {
197 	return major % BLKDEV_MAJOR_HASH_SIZE;
198 }
199 
200 #ifdef CONFIG_PROC_FS
201 void blkdev_show(struct seq_file *seqf, off_t offset)
202 {
203 	struct blk_major_name *dp;
204 
205 	spin_lock(&major_names_spinlock);
206 	for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
207 		if (dp->major == offset)
208 			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
209 	spin_unlock(&major_names_spinlock);
210 }
211 #endif /* CONFIG_PROC_FS */
212 
213 /**
214  * __register_blkdev - register a new block device
215  *
216  * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
217  *         @major = 0, try to allocate any unused major number.
218  * @name: the name of the new block device as a zero terminated string
219  * @probe: pre-devtmpfs / pre-udev callback used to create disks when their
220  *	   pre-created device node is accessed. When a probe call uses
221  *	   add_disk() and it fails the driver must cleanup resources. This
222  *	   interface may soon be removed.
223  *
224  * The @name must be unique within the system.
225  *
226  * The return value depends on the @major input parameter:
227  *
228  *  - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
229  *    then the function returns zero on success, or a negative error code
230  *  - if any unused major number was requested with @major = 0 parameter
231  *    then the return value is the allocated major number in range
232  *    [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
233  *
234  * See Documentation/admin-guide/devices.txt for the list of allocated
235  * major numbers.
236  *
237  * Use register_blkdev instead for any new code.
238  */
239 int __register_blkdev(unsigned int major, const char *name,
240 		void (*probe)(dev_t devt))
241 {
242 	struct blk_major_name **n, *p;
243 	int index, ret = 0;
244 
245 	mutex_lock(&major_names_lock);
246 
247 	/* temporary */
248 	if (major == 0) {
249 		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
250 			if (major_names[index] == NULL)
251 				break;
252 		}
253 
254 		if (index == 0) {
255 			printk("%s: failed to get major for %s\n",
256 			       __func__, name);
257 			ret = -EBUSY;
258 			goto out;
259 		}
260 		major = index;
261 		ret = major;
262 	}
263 
264 	if (major >= BLKDEV_MAJOR_MAX) {
265 		pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
266 		       __func__, major, BLKDEV_MAJOR_MAX-1, name);
267 
268 		ret = -EINVAL;
269 		goto out;
270 	}
271 
272 	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
273 	if (p == NULL) {
274 		ret = -ENOMEM;
275 		goto out;
276 	}
277 
278 	p->major = major;
279 #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
280 	p->probe = probe;
281 #endif
282 	strlcpy(p->name, name, sizeof(p->name));
283 	p->next = NULL;
284 	index = major_to_index(major);
285 
286 	spin_lock(&major_names_spinlock);
287 	for (n = &major_names[index]; *n; n = &(*n)->next) {
288 		if ((*n)->major == major)
289 			break;
290 	}
291 	if (!*n)
292 		*n = p;
293 	else
294 		ret = -EBUSY;
295 	spin_unlock(&major_names_spinlock);
296 
297 	if (ret < 0) {
298 		printk("register_blkdev: cannot get major %u for %s\n",
299 		       major, name);
300 		kfree(p);
301 	}
302 out:
303 	mutex_unlock(&major_names_lock);
304 	return ret;
305 }
306 EXPORT_SYMBOL(__register_blkdev);
307 
308 void unregister_blkdev(unsigned int major, const char *name)
309 {
310 	struct blk_major_name **n;
311 	struct blk_major_name *p = NULL;
312 	int index = major_to_index(major);
313 
314 	mutex_lock(&major_names_lock);
315 	spin_lock(&major_names_spinlock);
316 	for (n = &major_names[index]; *n; n = &(*n)->next)
317 		if ((*n)->major == major)
318 			break;
319 	if (!*n || strcmp((*n)->name, name)) {
320 		WARN_ON(1);
321 	} else {
322 		p = *n;
323 		*n = p->next;
324 	}
325 	spin_unlock(&major_names_spinlock);
326 	mutex_unlock(&major_names_lock);
327 	kfree(p);
328 }
329 
330 EXPORT_SYMBOL(unregister_blkdev);
331 
332 int blk_alloc_ext_minor(void)
333 {
334 	int idx;
335 
336 	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
337 	if (idx == -ENOSPC)
338 		return -EBUSY;
339 	return idx;
340 }
341 
342 void blk_free_ext_minor(unsigned int minor)
343 {
344 	ida_free(&ext_devt_ida, minor);
345 }
346 
347 static char *bdevt_str(dev_t devt, char *buf)
348 {
349 	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
350 		char tbuf[BDEVT_SIZE];
351 		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
352 		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
353 	} else
354 		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
355 
356 	return buf;
357 }
358 
359 void disk_uevent(struct gendisk *disk, enum kobject_action action)
360 {
361 	struct block_device *part;
362 	unsigned long idx;
363 
364 	rcu_read_lock();
365 	xa_for_each(&disk->part_tbl, idx, part) {
366 		if (bdev_is_partition(part) && !bdev_nr_sectors(part))
367 			continue;
368 		if (!kobject_get_unless_zero(&part->bd_device.kobj))
369 			continue;
370 
371 		rcu_read_unlock();
372 		kobject_uevent(bdev_kobj(part), action);
373 		put_device(&part->bd_device);
374 		rcu_read_lock();
375 	}
376 	rcu_read_unlock();
377 }
378 EXPORT_SYMBOL_GPL(disk_uevent);
379 
380 int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
381 {
382 	struct block_device *bdev;
383 
384 	if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
385 		return -EINVAL;
386 	if (disk->open_partitions)
387 		return -EBUSY;
388 
389 	set_bit(GD_NEED_PART_SCAN, &disk->state);
390 	bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
391 	if (IS_ERR(bdev))
392 		return PTR_ERR(bdev);
393 	blkdev_put(bdev, mode);
394 	return 0;
395 }
396 
397 /**
398  * device_add_disk - add disk information to kernel list
399  * @parent: parent device for the disk
400  * @disk: per-device partitioning information
401  * @groups: Additional per-device sysfs groups
402  *
403  * This function registers the partitioning information in @disk
404  * with the kernel.
405  */
406 int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
407 				 const struct attribute_group **groups)
408 
409 {
410 	struct device *ddev = disk_to_dev(disk);
411 	int ret;
412 
413 	/*
414 	 * The disk queue should now be all set with enough information about
415 	 * the device for the elevator code to pick an adequate default
416 	 * elevator if one is needed, that is, for devices requesting queue
417 	 * registration.
418 	 */
419 	elevator_init_mq(disk->queue);
420 
421 	/*
422 	 * If the driver provides an explicit major number it also must provide
423 	 * the number of minors numbers supported, and those will be used to
424 	 * setup the gendisk.
425 	 * Otherwise just allocate the device numbers for both the whole device
426 	 * and all partitions from the extended dev_t space.
427 	 */
428 	if (disk->major) {
429 		if (WARN_ON(!disk->minors))
430 			return -EINVAL;
431 
432 		if (disk->minors > DISK_MAX_PARTS) {
433 			pr_err("block: can't allocate more than %d partitions\n",
434 				DISK_MAX_PARTS);
435 			disk->minors = DISK_MAX_PARTS;
436 		}
437 		if (disk->first_minor + disk->minors > MINORMASK + 1)
438 			return -EINVAL;
439 	} else {
440 		if (WARN_ON(disk->minors))
441 			return -EINVAL;
442 
443 		ret = blk_alloc_ext_minor();
444 		if (ret < 0)
445 			return ret;
446 		disk->major = BLOCK_EXT_MAJOR;
447 		disk->first_minor = ret;
448 	}
449 
450 	/* delay uevents, until we scanned partition table */
451 	dev_set_uevent_suppress(ddev, 1);
452 
453 	ddev->parent = parent;
454 	ddev->groups = groups;
455 	dev_set_name(ddev, "%s", disk->disk_name);
456 	if (!(disk->flags & GENHD_FL_HIDDEN))
457 		ddev->devt = MKDEV(disk->major, disk->first_minor);
458 	ret = device_add(ddev);
459 	if (ret)
460 		goto out_free_ext_minor;
461 
462 	ret = disk_alloc_events(disk);
463 	if (ret)
464 		goto out_device_del;
465 
466 	if (!sysfs_deprecated) {
467 		ret = sysfs_create_link(block_depr, &ddev->kobj,
468 					kobject_name(&ddev->kobj));
469 		if (ret)
470 			goto out_device_del;
471 	}
472 
473 	/*
474 	 * avoid probable deadlock caused by allocating memory with
475 	 * GFP_KERNEL in runtime_resume callback of its all ancestor
476 	 * devices
477 	 */
478 	pm_runtime_set_memalloc_noio(ddev, true);
479 
480 	ret = blk_integrity_add(disk);
481 	if (ret)
482 		goto out_del_block_link;
483 
484 	disk->part0->bd_holder_dir =
485 		kobject_create_and_add("holders", &ddev->kobj);
486 	if (!disk->part0->bd_holder_dir) {
487 		ret = -ENOMEM;
488 		goto out_del_integrity;
489 	}
490 	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
491 	if (!disk->slave_dir) {
492 		ret = -ENOMEM;
493 		goto out_put_holder_dir;
494 	}
495 
496 	ret = bd_register_pending_holders(disk);
497 	if (ret < 0)
498 		goto out_put_slave_dir;
499 
500 	ret = blk_register_queue(disk);
501 	if (ret)
502 		goto out_put_slave_dir;
503 
504 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
505 		ret = bdi_register(disk->bdi, "%u:%u",
506 				   disk->major, disk->first_minor);
507 		if (ret)
508 			goto out_unregister_queue;
509 		bdi_set_owner(disk->bdi, ddev);
510 		ret = sysfs_create_link(&ddev->kobj,
511 					&disk->bdi->dev->kobj, "bdi");
512 		if (ret)
513 			goto out_unregister_bdi;
514 
515 		bdev_add(disk->part0, ddev->devt);
516 		if (get_capacity(disk))
517 			disk_scan_partitions(disk, FMODE_READ);
518 
519 		/*
520 		 * Announce the disk and partitions after all partitions are
521 		 * created. (for hidden disks uevents remain suppressed forever)
522 		 */
523 		dev_set_uevent_suppress(ddev, 0);
524 		disk_uevent(disk, KOBJ_ADD);
525 	}
526 
527 	disk_update_readahead(disk);
528 	disk_add_events(disk);
529 	return 0;
530 
531 out_unregister_bdi:
532 	if (!(disk->flags & GENHD_FL_HIDDEN))
533 		bdi_unregister(disk->bdi);
534 out_unregister_queue:
535 	blk_unregister_queue(disk);
536 out_put_slave_dir:
537 	kobject_put(disk->slave_dir);
538 out_put_holder_dir:
539 	kobject_put(disk->part0->bd_holder_dir);
540 out_del_integrity:
541 	blk_integrity_del(disk);
542 out_del_block_link:
543 	if (!sysfs_deprecated)
544 		sysfs_remove_link(block_depr, dev_name(ddev));
545 out_device_del:
546 	device_del(ddev);
547 out_free_ext_minor:
548 	if (disk->major == BLOCK_EXT_MAJOR)
549 		blk_free_ext_minor(disk->first_minor);
550 	return ret;
551 }
552 EXPORT_SYMBOL(device_add_disk);
553 
554 /**
555  * del_gendisk - remove the gendisk
556  * @disk: the struct gendisk to remove
557  *
558  * Removes the gendisk and all its associated resources. This deletes the
559  * partitions associated with the gendisk, and unregisters the associated
560  * request_queue.
561  *
562  * This is the counter to the respective __device_add_disk() call.
563  *
564  * The final removal of the struct gendisk happens when its refcount reaches 0
565  * with put_disk(), which should be called after del_gendisk(), if
566  * __device_add_disk() was used.
567  *
568  * Drivers exist which depend on the release of the gendisk to be synchronous,
569  * it should not be deferred.
570  *
571  * Context: can sleep
572  */
573 void del_gendisk(struct gendisk *disk)
574 {
575 	struct request_queue *q = disk->queue;
576 
577 	might_sleep();
578 
579 	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
580 		return;
581 
582 	blk_integrity_del(disk);
583 	disk_del_events(disk);
584 
585 	mutex_lock(&disk->open_mutex);
586 	remove_inode_hash(disk->part0->bd_inode);
587 	blk_drop_partitions(disk);
588 	mutex_unlock(&disk->open_mutex);
589 
590 	fsync_bdev(disk->part0);
591 	__invalidate_device(disk->part0, true);
592 
593 	/*
594 	 * Fail any new I/O.
595 	 */
596 	set_bit(GD_DEAD, &disk->state);
597 	set_capacity(disk, 0);
598 
599 	/*
600 	 * Prevent new I/O from crossing bio_queue_enter().
601 	 */
602 	blk_queue_start_drain(q);
603 
604 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
605 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
606 
607 		/*
608 		 * Unregister bdi before releasing device numbers (as they can
609 		 * get reused and we'd get clashes in sysfs).
610 		 */
611 		bdi_unregister(disk->bdi);
612 	}
613 
614 	blk_unregister_queue(disk);
615 
616 	kobject_put(disk->part0->bd_holder_dir);
617 	kobject_put(disk->slave_dir);
618 
619 	part_stat_set_all(disk->part0, 0);
620 	disk->part0->bd_stamp = 0;
621 	if (!sysfs_deprecated)
622 		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
623 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
624 	device_del(disk_to_dev(disk));
625 
626 	blk_mq_freeze_queue_wait(q);
627 
628 	rq_qos_exit(q);
629 	blk_sync_queue(q);
630 	blk_flush_integrity();
631 	/*
632 	 * Allow using passthrough request again after the queue is torn down.
633 	 */
634 	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
635 	__blk_mq_unfreeze_queue(q, true);
636 
637 }
638 EXPORT_SYMBOL(del_gendisk);
639 
640 /**
641  * invalidate_disk - invalidate the disk
642  * @disk: the struct gendisk to invalidate
643  *
644  * A helper to invalidates the disk. It will clean the disk's associated
645  * buffer/page caches and reset its internal states so that the disk
646  * can be reused by the drivers.
647  *
648  * Context: can sleep
649  */
650 void invalidate_disk(struct gendisk *disk)
651 {
652 	struct block_device *bdev = disk->part0;
653 
654 	invalidate_bdev(bdev);
655 	bdev->bd_inode->i_mapping->wb_err = 0;
656 	set_capacity(disk, 0);
657 }
658 EXPORT_SYMBOL(invalidate_disk);
659 
660 /* sysfs access to bad-blocks list. */
661 static ssize_t disk_badblocks_show(struct device *dev,
662 					struct device_attribute *attr,
663 					char *page)
664 {
665 	struct gendisk *disk = dev_to_disk(dev);
666 
667 	if (!disk->bb)
668 		return sprintf(page, "\n");
669 
670 	return badblocks_show(disk->bb, page, 0);
671 }
672 
673 static ssize_t disk_badblocks_store(struct device *dev,
674 					struct device_attribute *attr,
675 					const char *page, size_t len)
676 {
677 	struct gendisk *disk = dev_to_disk(dev);
678 
679 	if (!disk->bb)
680 		return -ENXIO;
681 
682 	return badblocks_store(disk->bb, page, len, 0);
683 }
684 
685 #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
686 void blk_request_module(dev_t devt)
687 {
688 	unsigned int major = MAJOR(devt);
689 	struct blk_major_name **n;
690 
691 	mutex_lock(&major_names_lock);
692 	for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
693 		if ((*n)->major == major && (*n)->probe) {
694 			(*n)->probe(devt);
695 			mutex_unlock(&major_names_lock);
696 			return;
697 		}
698 	}
699 	mutex_unlock(&major_names_lock);
700 
701 	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
702 		/* Make old-style 2.4 aliases work */
703 		request_module("block-major-%d", MAJOR(devt));
704 }
705 #endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
706 
707 /*
708  * print a full list of all partitions - intended for places where the root
709  * filesystem can't be mounted and thus to give the victim some idea of what
710  * went wrong
711  */
712 void __init printk_all_partitions(void)
713 {
714 	struct class_dev_iter iter;
715 	struct device *dev;
716 
717 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
718 	while ((dev = class_dev_iter_next(&iter))) {
719 		struct gendisk *disk = dev_to_disk(dev);
720 		struct block_device *part;
721 		char devt_buf[BDEVT_SIZE];
722 		unsigned long idx;
723 
724 		/*
725 		 * Don't show empty devices or things that have been
726 		 * suppressed
727 		 */
728 		if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
729 			continue;
730 
731 		/*
732 		 * Note, unlike /proc/partitions, I am showing the numbers in
733 		 * hex - the same format as the root= option takes.
734 		 */
735 		rcu_read_lock();
736 		xa_for_each(&disk->part_tbl, idx, part) {
737 			if (!bdev_nr_sectors(part))
738 				continue;
739 			printk("%s%s %10llu %pg %s",
740 			       bdev_is_partition(part) ? "  " : "",
741 			       bdevt_str(part->bd_dev, devt_buf),
742 			       bdev_nr_sectors(part) >> 1, part,
743 			       part->bd_meta_info ?
744 					part->bd_meta_info->uuid : "");
745 			if (bdev_is_partition(part))
746 				printk("\n");
747 			else if (dev->parent && dev->parent->driver)
748 				printk(" driver: %s\n",
749 					dev->parent->driver->name);
750 			else
751 				printk(" (driver?)\n");
752 		}
753 		rcu_read_unlock();
754 	}
755 	class_dev_iter_exit(&iter);
756 }
757 
758 #ifdef CONFIG_PROC_FS
759 /* iterator */
760 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
761 {
762 	loff_t skip = *pos;
763 	struct class_dev_iter *iter;
764 	struct device *dev;
765 
766 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
767 	if (!iter)
768 		return ERR_PTR(-ENOMEM);
769 
770 	seqf->private = iter;
771 	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
772 	do {
773 		dev = class_dev_iter_next(iter);
774 		if (!dev)
775 			return NULL;
776 	} while (skip--);
777 
778 	return dev_to_disk(dev);
779 }
780 
781 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
782 {
783 	struct device *dev;
784 
785 	(*pos)++;
786 	dev = class_dev_iter_next(seqf->private);
787 	if (dev)
788 		return dev_to_disk(dev);
789 
790 	return NULL;
791 }
792 
793 static void disk_seqf_stop(struct seq_file *seqf, void *v)
794 {
795 	struct class_dev_iter *iter = seqf->private;
796 
797 	/* stop is called even after start failed :-( */
798 	if (iter) {
799 		class_dev_iter_exit(iter);
800 		kfree(iter);
801 		seqf->private = NULL;
802 	}
803 }
804 
805 static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
806 {
807 	void *p;
808 
809 	p = disk_seqf_start(seqf, pos);
810 	if (!IS_ERR_OR_NULL(p) && !*pos)
811 		seq_puts(seqf, "major minor  #blocks  name\n\n");
812 	return p;
813 }
814 
815 static int show_partition(struct seq_file *seqf, void *v)
816 {
817 	struct gendisk *sgp = v;
818 	struct block_device *part;
819 	unsigned long idx;
820 
821 	if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN))
822 		return 0;
823 
824 	rcu_read_lock();
825 	xa_for_each(&sgp->part_tbl, idx, part) {
826 		if (!bdev_nr_sectors(part))
827 			continue;
828 		seq_printf(seqf, "%4d  %7d %10llu %pg\n",
829 			   MAJOR(part->bd_dev), MINOR(part->bd_dev),
830 			   bdev_nr_sectors(part) >> 1, part);
831 	}
832 	rcu_read_unlock();
833 	return 0;
834 }
835 
836 static const struct seq_operations partitions_op = {
837 	.start	= show_partition_start,
838 	.next	= disk_seqf_next,
839 	.stop	= disk_seqf_stop,
840 	.show	= show_partition
841 };
842 #endif
843 
844 static int __init genhd_device_init(void)
845 {
846 	int error;
847 
848 	block_class.dev_kobj = sysfs_dev_block_kobj;
849 	error = class_register(&block_class);
850 	if (unlikely(error))
851 		return error;
852 	blk_dev_init();
853 
854 	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
855 
856 	/* create top-level block dir */
857 	if (!sysfs_deprecated)
858 		block_depr = kobject_create_and_add("block", NULL);
859 	return 0;
860 }
861 
862 subsys_initcall(genhd_device_init);
863 
864 static ssize_t disk_range_show(struct device *dev,
865 			       struct device_attribute *attr, char *buf)
866 {
867 	struct gendisk *disk = dev_to_disk(dev);
868 
869 	return sprintf(buf, "%d\n", disk->minors);
870 }
871 
872 static ssize_t disk_ext_range_show(struct device *dev,
873 				   struct device_attribute *attr, char *buf)
874 {
875 	struct gendisk *disk = dev_to_disk(dev);
876 
877 	return sprintf(buf, "%d\n",
878 		(disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
879 }
880 
881 static ssize_t disk_removable_show(struct device *dev,
882 				   struct device_attribute *attr, char *buf)
883 {
884 	struct gendisk *disk = dev_to_disk(dev);
885 
886 	return sprintf(buf, "%d\n",
887 		       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
888 }
889 
890 static ssize_t disk_hidden_show(struct device *dev,
891 				   struct device_attribute *attr, char *buf)
892 {
893 	struct gendisk *disk = dev_to_disk(dev);
894 
895 	return sprintf(buf, "%d\n",
896 		       (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
897 }
898 
899 static ssize_t disk_ro_show(struct device *dev,
900 				   struct device_attribute *attr, char *buf)
901 {
902 	struct gendisk *disk = dev_to_disk(dev);
903 
904 	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
905 }
906 
907 ssize_t part_size_show(struct device *dev,
908 		       struct device_attribute *attr, char *buf)
909 {
910 	return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
911 }
912 
913 ssize_t part_stat_show(struct device *dev,
914 		       struct device_attribute *attr, char *buf)
915 {
916 	struct block_device *bdev = dev_to_bdev(dev);
917 	struct request_queue *q = bdev_get_queue(bdev);
918 	struct disk_stats stat;
919 	unsigned int inflight;
920 
921 	part_stat_read_all(bdev, &stat);
922 	if (queue_is_mq(q))
923 		inflight = blk_mq_in_flight(q, bdev);
924 	else
925 		inflight = part_in_flight(bdev);
926 
927 	return sprintf(buf,
928 		"%8lu %8lu %8llu %8u "
929 		"%8lu %8lu %8llu %8u "
930 		"%8u %8u %8u "
931 		"%8lu %8lu %8llu %8u "
932 		"%8lu %8u"
933 		"\n",
934 		stat.ios[STAT_READ],
935 		stat.merges[STAT_READ],
936 		(unsigned long long)stat.sectors[STAT_READ],
937 		(unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
938 		stat.ios[STAT_WRITE],
939 		stat.merges[STAT_WRITE],
940 		(unsigned long long)stat.sectors[STAT_WRITE],
941 		(unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
942 		inflight,
943 		jiffies_to_msecs(stat.io_ticks),
944 		(unsigned int)div_u64(stat.nsecs[STAT_READ] +
945 				      stat.nsecs[STAT_WRITE] +
946 				      stat.nsecs[STAT_DISCARD] +
947 				      stat.nsecs[STAT_FLUSH],
948 						NSEC_PER_MSEC),
949 		stat.ios[STAT_DISCARD],
950 		stat.merges[STAT_DISCARD],
951 		(unsigned long long)stat.sectors[STAT_DISCARD],
952 		(unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
953 		stat.ios[STAT_FLUSH],
954 		(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
955 }
956 
957 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
958 			   char *buf)
959 {
960 	struct block_device *bdev = dev_to_bdev(dev);
961 	struct request_queue *q = bdev_get_queue(bdev);
962 	unsigned int inflight[2];
963 
964 	if (queue_is_mq(q))
965 		blk_mq_in_flight_rw(q, bdev, inflight);
966 	else
967 		part_in_flight_rw(bdev, inflight);
968 
969 	return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
970 }
971 
972 static ssize_t disk_capability_show(struct device *dev,
973 				    struct device_attribute *attr, char *buf)
974 {
975 	struct gendisk *disk = dev_to_disk(dev);
976 
977 	return sprintf(buf, "%x\n", disk->flags);
978 }
979 
980 static ssize_t disk_alignment_offset_show(struct device *dev,
981 					  struct device_attribute *attr,
982 					  char *buf)
983 {
984 	struct gendisk *disk = dev_to_disk(dev);
985 
986 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
987 }
988 
989 static ssize_t disk_discard_alignment_show(struct device *dev,
990 					   struct device_attribute *attr,
991 					   char *buf)
992 {
993 	struct gendisk *disk = dev_to_disk(dev);
994 
995 	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
996 }
997 
998 static ssize_t diskseq_show(struct device *dev,
999 			    struct device_attribute *attr, char *buf)
1000 {
1001 	struct gendisk *disk = dev_to_disk(dev);
1002 
1003 	return sprintf(buf, "%llu\n", disk->diskseq);
1004 }
1005 
1006 static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
1007 static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
1008 static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
1009 static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
1010 static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
1011 static DEVICE_ATTR(size, 0444, part_size_show, NULL);
1012 static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
1013 static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
1014 static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
1015 static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
1016 static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
1017 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
1018 static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
1019 
1020 #ifdef CONFIG_FAIL_MAKE_REQUEST
1021 ssize_t part_fail_show(struct device *dev,
1022 		       struct device_attribute *attr, char *buf)
1023 {
1024 	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
1025 }
1026 
1027 ssize_t part_fail_store(struct device *dev,
1028 			struct device_attribute *attr,
1029 			const char *buf, size_t count)
1030 {
1031 	int i;
1032 
1033 	if (count > 0 && sscanf(buf, "%d", &i) > 0)
1034 		dev_to_bdev(dev)->bd_make_it_fail = i;
1035 
1036 	return count;
1037 }
1038 
1039 static struct device_attribute dev_attr_fail =
1040 	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
1041 #endif /* CONFIG_FAIL_MAKE_REQUEST */
1042 
1043 #ifdef CONFIG_FAIL_IO_TIMEOUT
1044 static struct device_attribute dev_attr_fail_timeout =
1045 	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1046 #endif
1047 
1048 static struct attribute *disk_attrs[] = {
1049 	&dev_attr_range.attr,
1050 	&dev_attr_ext_range.attr,
1051 	&dev_attr_removable.attr,
1052 	&dev_attr_hidden.attr,
1053 	&dev_attr_ro.attr,
1054 	&dev_attr_size.attr,
1055 	&dev_attr_alignment_offset.attr,
1056 	&dev_attr_discard_alignment.attr,
1057 	&dev_attr_capability.attr,
1058 	&dev_attr_stat.attr,
1059 	&dev_attr_inflight.attr,
1060 	&dev_attr_badblocks.attr,
1061 	&dev_attr_events.attr,
1062 	&dev_attr_events_async.attr,
1063 	&dev_attr_events_poll_msecs.attr,
1064 	&dev_attr_diskseq.attr,
1065 #ifdef CONFIG_FAIL_MAKE_REQUEST
1066 	&dev_attr_fail.attr,
1067 #endif
1068 #ifdef CONFIG_FAIL_IO_TIMEOUT
1069 	&dev_attr_fail_timeout.attr,
1070 #endif
1071 	NULL
1072 };
1073 
1074 static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
1075 {
1076 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
1077 	struct gendisk *disk = dev_to_disk(dev);
1078 
1079 	if (a == &dev_attr_badblocks.attr && !disk->bb)
1080 		return 0;
1081 	return a->mode;
1082 }
1083 
1084 static struct attribute_group disk_attr_group = {
1085 	.attrs = disk_attrs,
1086 	.is_visible = disk_visible,
1087 };
1088 
1089 static const struct attribute_group *disk_attr_groups[] = {
1090 	&disk_attr_group,
1091 	NULL
1092 };
1093 
1094 /**
1095  * disk_release - releases all allocated resources of the gendisk
1096  * @dev: the device representing this disk
1097  *
1098  * This function releases all allocated resources of the gendisk.
1099  *
1100  * Drivers which used __device_add_disk() have a gendisk with a request_queue
1101  * assigned. Since the request_queue sits on top of the gendisk for these
1102  * drivers we also call blk_put_queue() for them, and we expect the
1103  * request_queue refcount to reach 0 at this point, and so the request_queue
1104  * will also be freed prior to the disk.
1105  *
1106  * Context: can sleep
1107  */
1108 static void disk_release(struct device *dev)
1109 {
1110 	struct gendisk *disk = dev_to_disk(dev);
1111 
1112 	might_sleep();
1113 	WARN_ON_ONCE(disk_live(disk));
1114 
1115 	blk_mq_cancel_work_sync(disk->queue);
1116 
1117 	disk_release_events(disk);
1118 	kfree(disk->random);
1119 	xa_destroy(&disk->part_tbl);
1120 	disk->queue->disk = NULL;
1121 	blk_put_queue(disk->queue);
1122 	iput(disk->part0->bd_inode);	/* frees the disk */
1123 }
1124 
1125 static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
1126 {
1127 	struct gendisk *disk = dev_to_disk(dev);
1128 
1129 	return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
1130 }
1131 
1132 struct class block_class = {
1133 	.name		= "block",
1134 	.dev_uevent	= block_uevent,
1135 };
1136 
1137 static char *block_devnode(struct device *dev, umode_t *mode,
1138 			   kuid_t *uid, kgid_t *gid)
1139 {
1140 	struct gendisk *disk = dev_to_disk(dev);
1141 
1142 	if (disk->fops->devnode)
1143 		return disk->fops->devnode(disk, mode);
1144 	return NULL;
1145 }
1146 
1147 const struct device_type disk_type = {
1148 	.name		= "disk",
1149 	.groups		= disk_attr_groups,
1150 	.release	= disk_release,
1151 	.devnode	= block_devnode,
1152 };
1153 
1154 #ifdef CONFIG_PROC_FS
1155 /*
1156  * aggregate disk stat collector.  Uses the same stats that the sysfs
1157  * entries do, above, but makes them available through one seq_file.
1158  *
1159  * The output looks suspiciously like /proc/partitions with a bunch of
1160  * extra fields.
1161  */
1162 static int diskstats_show(struct seq_file *seqf, void *v)
1163 {
1164 	struct gendisk *gp = v;
1165 	struct block_device *hd;
1166 	unsigned int inflight;
1167 	struct disk_stats stat;
1168 	unsigned long idx;
1169 
1170 	/*
1171 	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1172 		seq_puts(seqf,	"major minor name"
1173 				"     rio rmerge rsect ruse wio wmerge "
1174 				"wsect wuse running use aveq"
1175 				"\n\n");
1176 	*/
1177 
1178 	rcu_read_lock();
1179 	xa_for_each(&gp->part_tbl, idx, hd) {
1180 		if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
1181 			continue;
1182 		part_stat_read_all(hd, &stat);
1183 		if (queue_is_mq(gp->queue))
1184 			inflight = blk_mq_in_flight(gp->queue, hd);
1185 		else
1186 			inflight = part_in_flight(hd);
1187 
1188 		seq_printf(seqf, "%4d %7d %pg "
1189 			   "%lu %lu %lu %u "
1190 			   "%lu %lu %lu %u "
1191 			   "%u %u %u "
1192 			   "%lu %lu %lu %u "
1193 			   "%lu %u"
1194 			   "\n",
1195 			   MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
1196 			   stat.ios[STAT_READ],
1197 			   stat.merges[STAT_READ],
1198 			   stat.sectors[STAT_READ],
1199 			   (unsigned int)div_u64(stat.nsecs[STAT_READ],
1200 							NSEC_PER_MSEC),
1201 			   stat.ios[STAT_WRITE],
1202 			   stat.merges[STAT_WRITE],
1203 			   stat.sectors[STAT_WRITE],
1204 			   (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
1205 							NSEC_PER_MSEC),
1206 			   inflight,
1207 			   jiffies_to_msecs(stat.io_ticks),
1208 			   (unsigned int)div_u64(stat.nsecs[STAT_READ] +
1209 						 stat.nsecs[STAT_WRITE] +
1210 						 stat.nsecs[STAT_DISCARD] +
1211 						 stat.nsecs[STAT_FLUSH],
1212 							NSEC_PER_MSEC),
1213 			   stat.ios[STAT_DISCARD],
1214 			   stat.merges[STAT_DISCARD],
1215 			   stat.sectors[STAT_DISCARD],
1216 			   (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
1217 						 NSEC_PER_MSEC),
1218 			   stat.ios[STAT_FLUSH],
1219 			   (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
1220 						 NSEC_PER_MSEC)
1221 			);
1222 	}
1223 	rcu_read_unlock();
1224 
1225 	return 0;
1226 }
1227 
1228 static const struct seq_operations diskstats_op = {
1229 	.start	= disk_seqf_start,
1230 	.next	= disk_seqf_next,
1231 	.stop	= disk_seqf_stop,
1232 	.show	= diskstats_show
1233 };
1234 
1235 static int __init proc_genhd_init(void)
1236 {
1237 	proc_create_seq("diskstats", 0, NULL, &diskstats_op);
1238 	proc_create_seq("partitions", 0, NULL, &partitions_op);
1239 	return 0;
1240 }
1241 module_init(proc_genhd_init);
1242 #endif /* CONFIG_PROC_FS */
1243 
1244 dev_t part_devt(struct gendisk *disk, u8 partno)
1245 {
1246 	struct block_device *part;
1247 	dev_t devt = 0;
1248 
1249 	rcu_read_lock();
1250 	part = xa_load(&disk->part_tbl, partno);
1251 	if (part)
1252 		devt = part->bd_dev;
1253 	rcu_read_unlock();
1254 
1255 	return devt;
1256 }
1257 
1258 dev_t blk_lookup_devt(const char *name, int partno)
1259 {
1260 	dev_t devt = MKDEV(0, 0);
1261 	struct class_dev_iter iter;
1262 	struct device *dev;
1263 
1264 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1265 	while ((dev = class_dev_iter_next(&iter))) {
1266 		struct gendisk *disk = dev_to_disk(dev);
1267 
1268 		if (strcmp(dev_name(dev), name))
1269 			continue;
1270 
1271 		if (partno < disk->minors) {
1272 			/* We need to return the right devno, even
1273 			 * if the partition doesn't exist yet.
1274 			 */
1275 			devt = MKDEV(MAJOR(dev->devt),
1276 				     MINOR(dev->devt) + partno);
1277 		} else {
1278 			devt = part_devt(disk, partno);
1279 			if (devt)
1280 				break;
1281 		}
1282 	}
1283 	class_dev_iter_exit(&iter);
1284 	return devt;
1285 }
1286 
1287 struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
1288 		struct lock_class_key *lkclass)
1289 {
1290 	struct gendisk *disk;
1291 
1292 	if (!blk_get_queue(q))
1293 		return NULL;
1294 
1295 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1296 	if (!disk)
1297 		goto out_put_queue;
1298 
1299 	disk->bdi = bdi_alloc(node_id);
1300 	if (!disk->bdi)
1301 		goto out_free_disk;
1302 
1303 	/* bdev_alloc() might need the queue, set before the first call */
1304 	disk->queue = q;
1305 
1306 	disk->part0 = bdev_alloc(disk, 0);
1307 	if (!disk->part0)
1308 		goto out_free_bdi;
1309 
1310 	disk->node_id = node_id;
1311 	mutex_init(&disk->open_mutex);
1312 	xa_init(&disk->part_tbl);
1313 	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
1314 		goto out_destroy_part_tbl;
1315 
1316 	rand_initialize_disk(disk);
1317 	disk_to_dev(disk)->class = &block_class;
1318 	disk_to_dev(disk)->type = &disk_type;
1319 	device_initialize(disk_to_dev(disk));
1320 	inc_diskseq(disk);
1321 	q->disk = disk;
1322 	lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
1323 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1324 	INIT_LIST_HEAD(&disk->slave_bdevs);
1325 #endif
1326 	return disk;
1327 
1328 out_destroy_part_tbl:
1329 	xa_destroy(&disk->part_tbl);
1330 	disk->part0->bd_disk = NULL;
1331 	iput(disk->part0->bd_inode);
1332 out_free_bdi:
1333 	bdi_put(disk->bdi);
1334 out_free_disk:
1335 	kfree(disk);
1336 out_put_queue:
1337 	blk_put_queue(q);
1338 	return NULL;
1339 }
1340 EXPORT_SYMBOL(__alloc_disk_node);
1341 
1342 struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
1343 {
1344 	struct request_queue *q;
1345 	struct gendisk *disk;
1346 
1347 	q = blk_alloc_queue(node, false);
1348 	if (!q)
1349 		return NULL;
1350 
1351 	disk = __alloc_disk_node(q, node, lkclass);
1352 	if (!disk) {
1353 		blk_cleanup_queue(q);
1354 		return NULL;
1355 	}
1356 	return disk;
1357 }
1358 EXPORT_SYMBOL(__blk_alloc_disk);
1359 
1360 /**
1361  * put_disk - decrements the gendisk refcount
1362  * @disk: the struct gendisk to decrement the refcount for
1363  *
1364  * This decrements the refcount for the struct gendisk. When this reaches 0
1365  * we'll have disk_release() called.
1366  *
1367  * Context: Any context, but the last reference must not be dropped from
1368  *          atomic context.
1369  */
1370 void put_disk(struct gendisk *disk)
1371 {
1372 	if (disk)
1373 		put_device(disk_to_dev(disk));
1374 }
1375 EXPORT_SYMBOL(put_disk);
1376 
1377 /**
1378  * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
1379  * @disk: gendisk to shutdown
1380  *
1381  * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
1382  * the queue DEAD, destroy and put it and the gendisk structure.
1383  *
1384  * Context: can sleep
1385  */
1386 void blk_cleanup_disk(struct gendisk *disk)
1387 {
1388 	blk_cleanup_queue(disk->queue);
1389 	put_disk(disk);
1390 }
1391 EXPORT_SYMBOL(blk_cleanup_disk);
1392 
1393 static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1394 {
1395 	char event[] = "DISK_RO=1";
1396 	char *envp[] = { event, NULL };
1397 
1398 	if (!ro)
1399 		event[8] = '0';
1400 	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1401 }
1402 
1403 /**
1404  * set_disk_ro - set a gendisk read-only
1405  * @disk:	gendisk to operate on
1406  * @read_only:	%true to set the disk read-only, %false set the disk read/write
1407  *
1408  * This function is used to indicate whether a given disk device should have its
1409  * read-only flag set. set_disk_ro() is typically used by device drivers to
1410  * indicate whether the underlying physical device is write-protected.
1411  */
1412 void set_disk_ro(struct gendisk *disk, bool read_only)
1413 {
1414 	if (read_only) {
1415 		if (test_and_set_bit(GD_READ_ONLY, &disk->state))
1416 			return;
1417 	} else {
1418 		if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
1419 			return;
1420 	}
1421 	set_disk_ro_uevent(disk, read_only);
1422 }
1423 EXPORT_SYMBOL(set_disk_ro);
1424 
1425 void inc_diskseq(struct gendisk *disk)
1426 {
1427 	disk->diskseq = atomic64_inc_return(&diskseq);
1428 }
1429