blk-zoned.c (5eac3eb30c9ab9ee7fe2bd9aa9db6373cabb77f8) | blk-zoned.c (d41003513e61dd9d4974cb441d30b63650b85654) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Zoned block device handling 4 * 5 * Copyright (c) 2015, Hannes Reinecke 6 * Copyright (c) 2015, SUSE Linux GmbH 7 * 8 * Copyright (c) 2016, Damien Le Moal --- 87 unchanged lines hidden (view full) --- 96 return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk)); 97} 98EXPORT_SYMBOL_GPL(blkdev_nr_zones); 99 100/** 101 * blkdev_report_zones - Get zones information 102 * @bdev: Target block device 103 * @sector: Sector from which to report zones | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Zoned block device handling 4 * 5 * Copyright (c) 2015, Hannes Reinecke 6 * Copyright (c) 2015, SUSE Linux GmbH 7 * 8 * Copyright (c) 2016, Damien Le Moal --- 87 unchanged lines hidden (view full) --- 96 return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk)); 97} 98EXPORT_SYMBOL_GPL(blkdev_nr_zones); 99 100/** 101 * blkdev_report_zones - Get zones information 102 * @bdev: Target block device 103 * @sector: Sector from which to report zones |
104 * @zones: Array of zone structures where to return the zones information 105 * @nr_zones: Number of zone structures in the zone array | 104 * @nr_zones: Maximum number of zones to report 105 * @cb: Callback function called for each reported zone 106 * @data: Private data for the callback |
106 * 107 * Description: | 107 * 108 * Description: |
108 * Get zone information starting from the zone containing @sector. 109 * The number of zone information reported may be less than the number 110 * requested by @nr_zones. The number of zones actually reported is 111 * returned in @nr_zones. 112 * The caller must use memalloc_noXX_save/restore() calls to control 113 * memory allocations done within this function (zone array and command 114 * buffer allocation by the device driver). | 109 * Get zone information starting from the zone containing @sector for at most 110 * @nr_zones, and call @cb for each zone reported by the device. 111 * To report all zones in a device starting from @sector, the BLK_ALL_ZONES 112 * constant can be passed to @nr_zones. 113 * Returns the number of zones reported by the device, or a negative errno 114 * value in case of failure. 115 * 116 * Note: The caller must use memalloc_noXX_save/restore() calls to control 117 * memory allocations done within this function. |
115 */ 116int blkdev_report_zones(struct block_device *bdev, sector_t sector, | 118 */ 119int blkdev_report_zones(struct block_device *bdev, sector_t sector, |
117 struct blk_zone *zones, unsigned int *nr_zones) | 120 unsigned int nr_zones, report_zones_cb cb, void *data) |
118{ | 121{ |
119 struct request_queue *q = bdev_get_queue(bdev); | |
120 struct gendisk *disk = bdev->bd_disk; 121 sector_t capacity = get_capacity(disk); 122 | 122 struct gendisk *disk = bdev->bd_disk; 123 sector_t capacity = get_capacity(disk); 124 |
123 if (!blk_queue_is_zoned(q)) | 125 if (!blk_queue_is_zoned(bdev_get_queue(bdev)) || 126 WARN_ON_ONCE(!disk->fops->report_zones)) |
124 return -EOPNOTSUPP; 125 | 127 return -EOPNOTSUPP; 128 |
126 /* 127 * A block device that advertized itself as zoned must have a 128 * report_zones method. If it does not have one defined, the device 129 * driver has a bug. So warn about that. 130 */ 131 if (WARN_ON_ONCE(!disk->fops->report_zones)) 132 return -EOPNOTSUPP; 133 134 if (!*nr_zones || sector >= capacity) { 135 *nr_zones = 0; | 129 if (!nr_zones || sector >= capacity) |
136 return 0; | 130 return 0; |
137 } | |
138 | 131 |
139 *nr_zones = min(*nr_zones, __blkdev_nr_zones(q, capacity - sector)); 140 141 return disk->fops->report_zones(disk, sector, zones, nr_zones); | 132 return disk->fops->report_zones(disk, sector, nr_zones, cb, data); |
142} 143EXPORT_SYMBOL_GPL(blkdev_report_zones); 144 145static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, 146 sector_t sector, 147 sector_t nr_sectors) 148{ 149 if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) --- 77 unchanged lines hidden (view full) --- 227 228 ret = submit_bio_wait(bio); 229 bio_put(bio); 230 231 return ret; 232} 233EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); 234 | 133} 134EXPORT_SYMBOL_GPL(blkdev_report_zones); 135 136static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, 137 sector_t sector, 138 sector_t nr_sectors) 139{ 140 if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) --- 77 unchanged lines hidden (view full) --- 218 219 ret = submit_bio_wait(bio); 220 bio_put(bio); 221 222 return ret; 223} 224EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); 225 |
226struct zone_report_args { 227 struct blk_zone __user *zones; 228}; 229 230static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, 231 void *data) 232{ 233 struct zone_report_args *args = data; 234 235 if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) 236 return -EFAULT; 237 return 0; 238} 239 |
|
235/* 236 * BLKREPORTZONE ioctl processing. 237 * Called from blkdev_ioctl. 238 */ 239int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, 240 unsigned int cmd, unsigned long arg) 241{ 242 void __user *argp = (void __user *)arg; | 240/* 241 * BLKREPORTZONE ioctl processing. 242 * Called from blkdev_ioctl. 243 */ 244int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, 245 unsigned int cmd, unsigned long arg) 246{ 247 void __user *argp = (void __user *)arg; |
248 struct zone_report_args args; |
|
243 struct request_queue *q; 244 struct blk_zone_report rep; | 249 struct request_queue *q; 250 struct blk_zone_report rep; |
245 struct blk_zone *zones; | |
246 int ret; 247 248 if (!argp) 249 return -EINVAL; 250 251 q = bdev_get_queue(bdev); 252 if (!q) 253 return -ENXIO; --- 5 unchanged lines hidden (view full) --- 259 return -EACCES; 260 261 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) 262 return -EFAULT; 263 264 if (!rep.nr_zones) 265 return -EINVAL; 266 | 251 int ret; 252 253 if (!argp) 254 return -EINVAL; 255 256 q = bdev_get_queue(bdev); 257 if (!q) 258 return -ENXIO; --- 5 unchanged lines hidden (view full) --- 264 return -EACCES; 265 266 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) 267 return -EFAULT; 268 269 if (!rep.nr_zones) 270 return -EINVAL; 271 |
267 rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones); | 272 args.zones = argp + sizeof(struct blk_zone_report); 273 ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, 274 blkdev_copy_zone_to_user, &args); 275 if (ret < 0) 276 return ret; |
268 | 277 |
269 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone), 270 GFP_KERNEL | __GFP_ZERO); 271 if (!zones) 272 return -ENOMEM; 273 274 ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones); 275 if (ret) 276 goto out; 277 278 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { 279 ret = -EFAULT; 280 goto out; 281 } 282 283 if (rep.nr_zones) { 284 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, 285 sizeof(struct blk_zone) * rep.nr_zones)) 286 ret = -EFAULT; 287 } 288 289 out: 290 kvfree(zones); 291 292 return ret; | 278 rep.nr_zones = ret; 279 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) 280 return -EFAULT; 281 return 0; |
293} 294 295/* 296 * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. 297 * Called from blkdev_ioctl. 298 */ 299int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, 300 unsigned int cmd, unsigned long arg) --- 45 unchanged lines hidden (view full) --- 346 347static inline unsigned long *blk_alloc_zone_bitmap(int node, 348 unsigned int nr_zones) 349{ 350 return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), 351 GFP_NOIO, node); 352} 353 | 282} 283 284/* 285 * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. 286 * Called from blkdev_ioctl. 287 */ 288int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, 289 unsigned int cmd, unsigned long arg) --- 45 unchanged lines hidden (view full) --- 335 336static inline unsigned long *blk_alloc_zone_bitmap(int node, 337 unsigned int nr_zones) 338{ 339 return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long), 340 GFP_NOIO, node); 341} 342 |
354/* 355 * Allocate an array of struct blk_zone to get nr_zones zone information. 356 * The allocated array may be smaller than nr_zones. 357 */ 358static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones) 359{ 360 struct blk_zone *zones; 361 size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES); 362 363 /* 364 * GFP_KERNEL here is meaningless as the caller task context has 365 * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones() 366 * with memalloc_noio_save(). 367 */ 368 zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL); 369 if (!zones) { 370 *nr_zones = 0; 371 return NULL; 372 } 373 374 *nr_zones = nrz; 375 376 return zones; 377} 378 | |
379void blk_queue_free_zone_bitmaps(struct request_queue *q) 380{ 381 kfree(q->seq_zones_bitmap); 382 q->seq_zones_bitmap = NULL; 383 kfree(q->seq_zones_wlock); 384 q->seq_zones_wlock = NULL; 385} 386 | 343void blk_queue_free_zone_bitmaps(struct request_queue *q) 344{ 345 kfree(q->seq_zones_bitmap); 346 q->seq_zones_bitmap = NULL; 347 kfree(q->seq_zones_wlock); 348 q->seq_zones_wlock = NULL; 349} 350 |
351struct blk_revalidate_zone_args { 352 struct gendisk *disk; 353 unsigned long *seq_zones_bitmap; 354 unsigned long *seq_zones_wlock; 355 sector_t sector; 356}; 357 |
|
387/* 388 * Helper function to check the validity of zones of a zoned block device. 389 */ | 358/* 359 * Helper function to check the validity of zones of a zoned block device. 360 */ |
390static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, 391 sector_t *sector) | 361static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, 362 void *data) |
392{ | 363{ |
364 struct blk_revalidate_zone_args *args = data; 365 struct gendisk *disk = args->disk; |
|
393 struct request_queue *q = disk->queue; 394 sector_t zone_sectors = blk_queue_zone_sectors(q); 395 sector_t capacity = get_capacity(disk); 396 397 /* 398 * All zones must have the same size, with the exception on an eventual 399 * smaller last zone. 400 */ 401 if (zone->start + zone_sectors < capacity && 402 zone->len != zone_sectors) { 403 pr_warn("%s: Invalid zoned device with non constant zone size\n", 404 disk->disk_name); 405 return false; 406 } 407 408 if (zone->start + zone->len >= capacity && 409 zone->len > zone_sectors) { 410 pr_warn("%s: Invalid zoned device with larger last zone size\n", 411 disk->disk_name); | 366 struct request_queue *q = disk->queue; 367 sector_t zone_sectors = blk_queue_zone_sectors(q); 368 sector_t capacity = get_capacity(disk); 369 370 /* 371 * All zones must have the same size, with the exception on an eventual 372 * smaller last zone. 373 */ 374 if (zone->start + zone_sectors < capacity && 375 zone->len != zone_sectors) { 376 pr_warn("%s: Invalid zoned device with non constant zone size\n", 377 disk->disk_name); 378 return false; 379 } 380 381 if (zone->start + zone->len >= capacity && 382 zone->len > zone_sectors) { 383 pr_warn("%s: Invalid zoned device with larger last zone size\n", 384 disk->disk_name); |
412 return false; | 385 return -ENODEV; |
413 } 414 415 /* Check for holes in the zone report */ | 386 } 387 388 /* Check for holes in the zone report */ |
416 if (zone->start != *sector) { | 389 if (zone->start != args->sector) { |
417 pr_warn("%s: Zone gap at sectors %llu..%llu\n", | 390 pr_warn("%s: Zone gap at sectors %llu..%llu\n", |
418 disk->disk_name, *sector, zone->start); 419 return false; | 391 disk->disk_name, args->sector, zone->start); 392 return -ENODEV; |
420 } 421 422 /* Check zone type */ 423 switch (zone->type) { 424 case BLK_ZONE_TYPE_CONVENTIONAL: 425 case BLK_ZONE_TYPE_SEQWRITE_REQ: 426 case BLK_ZONE_TYPE_SEQWRITE_PREF: 427 break; 428 default: 429 pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", 430 disk->disk_name, (int)zone->type, zone->start); | 393 } 394 395 /* Check zone type */ 396 switch (zone->type) { 397 case BLK_ZONE_TYPE_CONVENTIONAL: 398 case BLK_ZONE_TYPE_SEQWRITE_REQ: 399 case BLK_ZONE_TYPE_SEQWRITE_PREF: 400 break; 401 default: 402 pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", 403 disk->disk_name, (int)zone->type, zone->start); |
431 return false; | 404 return -ENODEV; |
432 } 433 | 405 } 406 |
434 *sector += zone->len; | 407 if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) 408 set_bit(idx, args->seq_zones_bitmap); |
435 | 409 |
436 return true; | 410 args->sector += zone->len; 411 return 0; |
437} 438 | 412} 413 |
414static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones, 415 struct blk_revalidate_zone_args *args) 416{ 417 /* 418 * Ensure that all memory allocations in this context are done as 419 * if GFP_NOIO was specified. 420 */ 421 unsigned int noio_flag = memalloc_noio_save(); 422 struct request_queue *q = disk->queue; 423 int ret; 424 425 args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); 426 if (!args->seq_zones_wlock) 427 return -ENOMEM; 428 args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); 429 if (!args->seq_zones_bitmap) 430 return -ENOMEM; 431 432 ret = disk->fops->report_zones(disk, 0, nr_zones, 433 blk_revalidate_zone_cb, args); 434 memalloc_noio_restore(noio_flag); 435 return ret; 436} 437 |
|
439/** 440 * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps 441 * @disk: Target disk 442 * 443 * Helper function for low-level device drivers to (re) allocate and initialize 444 * a disk request queue zone bitmaps. This functions should normally be called 445 * within the disk ->revalidate method. For BIO based queues, no zone bitmap 446 * is allocated. 447 */ 448int blk_revalidate_disk_zones(struct gendisk *disk) 449{ 450 struct request_queue *q = disk->queue; 451 unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk)); | 438/** 439 * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps 440 * @disk: Target disk 441 * 442 * Helper function for low-level device drivers to (re) allocate and initialize 443 * a disk request queue zone bitmaps. This functions should normally be called 444 * within the disk ->revalidate method. For BIO based queues, no zone bitmap 445 * is allocated. 446 */ 447int blk_revalidate_disk_zones(struct gendisk *disk) 448{ 449 struct request_queue *q = disk->queue; 450 unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk)); |
452 unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; 453 unsigned int i, rep_nr_zones = 0, z = 0, nrz; 454 struct blk_zone *zones = NULL; 455 unsigned int noio_flag; 456 sector_t sector = 0; | 451 struct blk_revalidate_zone_args args = { .disk = disk }; |
457 int ret = 0; 458 459 if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) 460 return -EIO; 461 462 /* 463 * BIO based queues do not use a scheduler so only q->nr_zones 464 * needs to be updated so that the sysfs exposed value is correct. 465 */ 466 if (!queue_is_mq(q)) { 467 q->nr_zones = nr_zones; 468 return 0; 469 } 470 | 452 int ret = 0; 453 454 if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) 455 return -EIO; 456 457 /* 458 * BIO based queues do not use a scheduler so only q->nr_zones 459 * needs to be updated so that the sysfs exposed value is correct. 460 */ 461 if (!queue_is_mq(q)) { 462 q->nr_zones = nr_zones; 463 return 0; 464 } 465 |
471 /* 472 * Ensure that all memory allocations in this context are done as 473 * if GFP_NOIO was specified. 474 */ 475 noio_flag = memalloc_noio_save(); | 466 if (nr_zones) 467 ret = blk_update_zone_info(disk, nr_zones, &args); |
476 | 468 |
477 if (!nr_zones) 478 goto update; 479 480 /* Allocate bitmaps */ 481 ret = -ENOMEM; 482 seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); 483 if (!seq_zones_wlock) 484 goto out; 485 seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); 486 if (!seq_zones_bitmap) 487 goto out; 488 | |
489 /* | 469 /* |
490 * Get zone information to check the zones and initialize 491 * seq_zones_bitmap. 492 */ 493 rep_nr_zones = nr_zones; 494 zones = blk_alloc_zones(&rep_nr_zones); 495 if (!zones) 496 goto out; 497 498 while (z < nr_zones) { 499 nrz = min(nr_zones - z, rep_nr_zones); 500 ret = disk->fops->report_zones(disk, sector, zones, &nrz); 501 if (ret) 502 goto out; 503 if (!nrz) 504 break; 505 for (i = 0; i < nrz; i++) { 506 if (!blk_zone_valid(disk, &zones[i], §or)) { 507 ret = -ENODEV; 508 goto out; 509 } 510 if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) 511 set_bit(z, seq_zones_bitmap); 512 z++; 513 } 514 } 515 516 if (WARN_ON(z != nr_zones)) { 517 ret = -EIO; 518 goto out; 519 } 520 521update: 522 /* | |
523 * Install the new bitmaps, making sure the queue is stopped and 524 * all I/Os are completed (i.e. a scheduler is not referencing the 525 * bitmaps). 526 */ 527 blk_mq_freeze_queue(q); | 470 * Install the new bitmaps, making sure the queue is stopped and 471 * all I/Os are completed (i.e. a scheduler is not referencing the 472 * bitmaps). 473 */ 474 blk_mq_freeze_queue(q); |
528 q->nr_zones = nr_zones; 529 swap(q->seq_zones_wlock, seq_zones_wlock); 530 swap(q->seq_zones_bitmap, seq_zones_bitmap); 531 blk_mq_unfreeze_queue(q); 532 533out: 534 memalloc_noio_restore(noio_flag); 535 536 kvfree(zones); 537 kfree(seq_zones_wlock); 538 kfree(seq_zones_bitmap); 539 540 if (ret) { | 475 if (ret >= 0) { 476 q->nr_zones = nr_zones; 477 swap(q->seq_zones_wlock, args.seq_zones_wlock); 478 swap(q->seq_zones_bitmap, args.seq_zones_bitmap); 479 ret = 0; 480 } else { |
541 pr_warn("%s: failed to revalidate zones\n", disk->disk_name); | 481 pr_warn("%s: failed to revalidate zones\n", disk->disk_name); |
542 blk_mq_freeze_queue(q); | |
543 blk_queue_free_zone_bitmaps(q); | 482 blk_queue_free_zone_bitmaps(q); |
544 blk_mq_unfreeze_queue(q); | |
545 } | 483 } |
484 blk_mq_unfreeze_queue(q); |
|
546 | 485 |
486 kfree(args.seq_zones_wlock); 487 kfree(args.seq_zones_bitmap); |
|
547 return ret; 548} 549EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); 550 | 488 return ret; 489} 490EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); 491 |