17fc18728SDamien Le Moal // SPDX-License-Identifier: GPL-2.0 27fc18728SDamien Le Moal /* 37fc18728SDamien Le Moal * Copyright (C) 2021 Western Digital Corporation or its affiliates. 47fc18728SDamien Le Moal */ 57fc18728SDamien Le Moal 67fc18728SDamien Le Moal #include <linux/blkdev.h> 7bb37d772SDamien Le Moal #include <linux/mm.h> 8bb37d772SDamien Le Moal #include <linux/sched/mm.h> 9bb37d772SDamien Le Moal #include <linux/slab.h> 107fc18728SDamien Le Moal 117fc18728SDamien Le Moal #include "dm-core.h" 127fc18728SDamien Le Moal 13bb37d772SDamien Le Moal #define DM_MSG_PREFIX "zone" 14bb37d772SDamien Le Moal 15bb37d772SDamien Le Moal #define DM_ZONE_INVALID_WP_OFST UINT_MAX 16bb37d772SDamien Le Moal 17bb37d772SDamien Le Moal /* 18bb37d772SDamien Le Moal * For internal zone reports bypassing the top BIO submission path. 19bb37d772SDamien Le Moal */ 20bb37d772SDamien Le Moal static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, 21bb37d772SDamien Le Moal sector_t sector, unsigned int nr_zones, 22bb37d772SDamien Le Moal report_zones_cb cb, void *data) 23bb37d772SDamien Le Moal { 24bb37d772SDamien Le Moal struct gendisk *disk = md->disk; 25bb37d772SDamien Le Moal int ret; 26bb37d772SDamien Le Moal struct dm_report_zones_args args = { 27bb37d772SDamien Le Moal .next_sector = sector, 28bb37d772SDamien Le Moal .orig_data = data, 29bb37d772SDamien Le Moal .orig_cb = cb, 30bb37d772SDamien Le Moal }; 31bb37d772SDamien Le Moal 32bb37d772SDamien Le Moal do { 33bb37d772SDamien Le Moal struct dm_target *tgt; 34bb37d772SDamien Le Moal 35bb37d772SDamien Le Moal tgt = dm_table_find_target(t, args.next_sector); 36bb37d772SDamien Le Moal if (WARN_ON_ONCE(!tgt->type->report_zones)) 37bb37d772SDamien Le Moal return -EIO; 38bb37d772SDamien Le Moal 39bb37d772SDamien Le Moal args.tgt = tgt; 40bb37d772SDamien Le Moal ret = tgt->type->report_zones(tgt, &args, 41bb37d772SDamien Le Moal nr_zones - args.zone_idx); 42bb37d772SDamien Le Moal if (ret < 0) 43bb37d772SDamien Le Moal return ret; 44bb37d772SDamien Le Moal } while (args.zone_idx < nr_zones && 45bb37d772SDamien Le Moal args.next_sector < get_capacity(disk)); 46bb37d772SDamien Le Moal 47bb37d772SDamien Le Moal return args.zone_idx; 48bb37d772SDamien Le Moal } 49bb37d772SDamien Le Moal 507fc18728SDamien Le Moal /* 517fc18728SDamien Le Moal * User facing dm device block device report zone operation. This calls the 527fc18728SDamien Le Moal * report_zones operation for each target of a device table. This operation is 537fc18728SDamien Le Moal * generally implemented by targets using dm_report_zones(). 547fc18728SDamien Le Moal */ 557fc18728SDamien Le Moal int dm_blk_report_zones(struct gendisk *disk, sector_t sector, 567fc18728SDamien Le Moal unsigned int nr_zones, report_zones_cb cb, void *data) 577fc18728SDamien Le Moal { 587fc18728SDamien Le Moal struct mapped_device *md = disk->private_data; 597fc18728SDamien Le Moal struct dm_table *map; 607fc18728SDamien Le Moal int srcu_idx, ret; 617fc18728SDamien Le Moal 627fc18728SDamien Le Moal if (dm_suspended_md(md)) 637fc18728SDamien Le Moal return -EAGAIN; 647fc18728SDamien Le Moal 657fc18728SDamien Le Moal map = dm_get_live_table(md, &srcu_idx); 66bb37d772SDamien Le Moal if (!map) 67bb37d772SDamien Le Moal return -EIO; 687fc18728SDamien Le Moal 69bb37d772SDamien Le Moal ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); 707fc18728SDamien Le Moal 717fc18728SDamien Le Moal dm_put_live_table(md, srcu_idx); 72bb37d772SDamien Le Moal 737fc18728SDamien Le Moal return ret; 747fc18728SDamien Le Moal } 757fc18728SDamien Le Moal 76912e8875SDamien Le Moal static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, 77912e8875SDamien Le Moal void *data) 787fc18728SDamien Le Moal { 797fc18728SDamien Le Moal struct dm_report_zones_args *args = data; 807fc18728SDamien Le Moal sector_t sector_diff = args->tgt->begin - args->start; 817fc18728SDamien Le Moal 827fc18728SDamien Le Moal /* 837fc18728SDamien Le Moal * Ignore zones beyond the target range. 847fc18728SDamien Le Moal */ 857fc18728SDamien Le Moal if (zone->start >= args->start + args->tgt->len) 867fc18728SDamien Le Moal return 0; 877fc18728SDamien Le Moal 887fc18728SDamien Le Moal /* 897fc18728SDamien Le Moal * Remap the start sector and write pointer position of the zone 907fc18728SDamien Le Moal * to match its position in the target range. 917fc18728SDamien Le Moal */ 927fc18728SDamien Le Moal zone->start += sector_diff; 937fc18728SDamien Le Moal if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { 947fc18728SDamien Le Moal if (zone->cond == BLK_ZONE_COND_FULL) 957fc18728SDamien Le Moal zone->wp = zone->start + zone->len; 967fc18728SDamien Le Moal else if (zone->cond == BLK_ZONE_COND_EMPTY) 977fc18728SDamien Le Moal zone->wp = zone->start; 987fc18728SDamien Le Moal else 997fc18728SDamien Le Moal zone->wp += sector_diff; 1007fc18728SDamien Le Moal } 1017fc18728SDamien Le Moal 1027fc18728SDamien Le Moal args->next_sector = zone->start + zone->len; 1037fc18728SDamien Le Moal return args->orig_cb(zone, args->zone_idx++, args->orig_data); 1047fc18728SDamien Le Moal } 105912e8875SDamien Le Moal 106912e8875SDamien Le Moal /* 107912e8875SDamien Le Moal * Helper for drivers of zoned targets to implement struct target_type 108912e8875SDamien Le Moal * report_zones operation. 109912e8875SDamien Le Moal */ 110912e8875SDamien Le Moal int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, 111912e8875SDamien Le Moal struct dm_report_zones_args *args, unsigned int nr_zones) 112912e8875SDamien Le Moal { 113912e8875SDamien Le Moal /* 114912e8875SDamien Le Moal * Set the target mapping start sector first so that 115912e8875SDamien Le Moal * dm_report_zones_cb() can correctly remap zone information. 116912e8875SDamien Le Moal */ 117912e8875SDamien Le Moal args->start = start; 118912e8875SDamien Le Moal 119912e8875SDamien Le Moal return blkdev_report_zones(bdev, sector, nr_zones, 120912e8875SDamien Le Moal dm_report_zones_cb, args); 121912e8875SDamien Le Moal } 122912e8875SDamien Le Moal EXPORT_SYMBOL_GPL(dm_report_zones); 1237fc18728SDamien Le Moal 124bf14e2b2SDamien Le Moal bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) 125bf14e2b2SDamien Le Moal { 126bf14e2b2SDamien Le Moal struct request_queue *q = md->queue; 127bf14e2b2SDamien Le Moal 128bf14e2b2SDamien Le Moal if (!blk_queue_is_zoned(q)) 129bf14e2b2SDamien Le Moal return false; 130bf14e2b2SDamien Le Moal 131bf14e2b2SDamien Le Moal switch (bio_op(bio)) { 132bf14e2b2SDamien Le Moal case REQ_OP_WRITE_ZEROES: 133bf14e2b2SDamien Le Moal case REQ_OP_WRITE: 134bf14e2b2SDamien Le Moal return !op_is_flush(bio->bi_opf) && bio_sectors(bio); 135bf14e2b2SDamien Le Moal default: 136bf14e2b2SDamien Le Moal return false; 137bf14e2b2SDamien Le Moal } 138bf14e2b2SDamien Le Moal } 139bf14e2b2SDamien Le Moal 140bb37d772SDamien Le Moal void dm_cleanup_zoned_dev(struct mapped_device *md) 1417fc18728SDamien Le Moal { 142bb37d772SDamien Le Moal struct request_queue *q = md->queue; 143bb37d772SDamien Le Moal 144bb37d772SDamien Le Moal if (q) { 145bb37d772SDamien Le Moal kfree(q->conv_zones_bitmap); 146bb37d772SDamien Le Moal q->conv_zones_bitmap = NULL; 147bb37d772SDamien Le Moal kfree(q->seq_zones_wlock); 148bb37d772SDamien Le Moal q->seq_zones_wlock = NULL; 149bb37d772SDamien Le Moal } 150bb37d772SDamien Le Moal 151bb37d772SDamien Le Moal kvfree(md->zwp_offset); 152bb37d772SDamien Le Moal md->zwp_offset = NULL; 153bb37d772SDamien Le Moal md->nr_zones = 0; 154bb37d772SDamien Le Moal } 155bb37d772SDamien Le Moal 156bb37d772SDamien Le Moal static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone) 157bb37d772SDamien Le Moal { 158bb37d772SDamien Le Moal switch (zone->cond) { 159bb37d772SDamien Le Moal case BLK_ZONE_COND_IMP_OPEN: 160bb37d772SDamien Le Moal case BLK_ZONE_COND_EXP_OPEN: 161bb37d772SDamien Le Moal case BLK_ZONE_COND_CLOSED: 162bb37d772SDamien Le Moal return zone->wp - zone->start; 163bb37d772SDamien Le Moal case BLK_ZONE_COND_FULL: 164bb37d772SDamien Le Moal return zone->len; 165bb37d772SDamien Le Moal case BLK_ZONE_COND_EMPTY: 166bb37d772SDamien Le Moal case BLK_ZONE_COND_NOT_WP: 167bb37d772SDamien Le Moal case BLK_ZONE_COND_OFFLINE: 168bb37d772SDamien Le Moal case BLK_ZONE_COND_READONLY: 169bb37d772SDamien Le Moal default: 170bb37d772SDamien Le Moal /* 171bb37d772SDamien Le Moal * Conventional, offline and read-only zones do not have a valid 172bb37d772SDamien Le Moal * write pointer. Use 0 as for an empty zone. 173bb37d772SDamien Le Moal */ 174bb37d772SDamien Le Moal return 0; 175bb37d772SDamien Le Moal } 176bb37d772SDamien Le Moal } 177bb37d772SDamien Le Moal 178bb37d772SDamien Le Moal static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, 179bb37d772SDamien Le Moal void *data) 180bb37d772SDamien Le Moal { 181bb37d772SDamien Le Moal struct mapped_device *md = data; 182bb37d772SDamien Le Moal struct request_queue *q = md->queue; 183bb37d772SDamien Le Moal 184bb37d772SDamien Le Moal switch (zone->type) { 185bb37d772SDamien Le Moal case BLK_ZONE_TYPE_CONVENTIONAL: 186bb37d772SDamien Le Moal if (!q->conv_zones_bitmap) { 187bb37d772SDamien Le Moal q->conv_zones_bitmap = 188bb37d772SDamien Le Moal kcalloc(BITS_TO_LONGS(q->nr_zones), 189bb37d772SDamien Le Moal sizeof(unsigned long), GFP_NOIO); 190bb37d772SDamien Le Moal if (!q->conv_zones_bitmap) 191bb37d772SDamien Le Moal return -ENOMEM; 192bb37d772SDamien Le Moal } 193bb37d772SDamien Le Moal set_bit(idx, q->conv_zones_bitmap); 194bb37d772SDamien Le Moal break; 195bb37d772SDamien Le Moal case BLK_ZONE_TYPE_SEQWRITE_REQ: 196bb37d772SDamien Le Moal case BLK_ZONE_TYPE_SEQWRITE_PREF: 197bb37d772SDamien Le Moal if (!q->seq_zones_wlock) { 198bb37d772SDamien Le Moal q->seq_zones_wlock = 199bb37d772SDamien Le Moal kcalloc(BITS_TO_LONGS(q->nr_zones), 200bb37d772SDamien Le Moal sizeof(unsigned long), GFP_NOIO); 201bb37d772SDamien Le Moal if (!q->seq_zones_wlock) 202bb37d772SDamien Le Moal return -ENOMEM; 203bb37d772SDamien Le Moal } 204bb37d772SDamien Le Moal if (!md->zwp_offset) { 205bb37d772SDamien Le Moal md->zwp_offset = 206bb37d772SDamien Le Moal kvcalloc(q->nr_zones, sizeof(unsigned int), 20728436ba3SDamien Le Moal GFP_KERNEL); 208bb37d772SDamien Le Moal if (!md->zwp_offset) 209bb37d772SDamien Le Moal return -ENOMEM; 210bb37d772SDamien Le Moal } 211bb37d772SDamien Le Moal md->zwp_offset[idx] = dm_get_zone_wp_offset(zone); 212bb37d772SDamien Le Moal 213bb37d772SDamien Le Moal break; 214bb37d772SDamien Le Moal default: 215bb37d772SDamien Le Moal DMERR("Invalid zone type 0x%x at sectors %llu", 216bb37d772SDamien Le Moal (int)zone->type, zone->start); 217bb37d772SDamien Le Moal return -ENODEV; 218bb37d772SDamien Le Moal } 219bb37d772SDamien Le Moal 220bb37d772SDamien Le Moal return 0; 221bb37d772SDamien Le Moal } 222bb37d772SDamien Le Moal 223bb37d772SDamien Le Moal /* 224bb37d772SDamien Le Moal * Revalidate the zones of a mapped device to initialize resource necessary 225bb37d772SDamien Le Moal * for zone append emulation. Note that we cannot simply use the block layer 226bb37d772SDamien Le Moal * blk_revalidate_disk_zones() function here as the mapped device is suspended 227bb37d772SDamien Le Moal * (this is called from __bind() context). 228bb37d772SDamien Le Moal */ 229bb37d772SDamien Le Moal static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) 230bb37d772SDamien Le Moal { 231bb37d772SDamien Le Moal struct request_queue *q = md->queue; 23228436ba3SDamien Le Moal unsigned int noio_flag; 233bb37d772SDamien Le Moal int ret; 234bb37d772SDamien Le Moal 235bb37d772SDamien Le Moal /* 236bb37d772SDamien Le Moal * Check if something changed. If yes, cleanup the current resources 237bb37d772SDamien Le Moal * and reallocate everything. 238bb37d772SDamien Le Moal */ 239bb37d772SDamien Le Moal if (!q->nr_zones || q->nr_zones != md->nr_zones) 240bb37d772SDamien Le Moal dm_cleanup_zoned_dev(md); 241bb37d772SDamien Le Moal if (md->nr_zones) 242bb37d772SDamien Le Moal return 0; 243bb37d772SDamien Le Moal 24428436ba3SDamien Le Moal /* 24528436ba3SDamien Le Moal * Scan all zones to initialize everything. Ensure that all vmalloc 24628436ba3SDamien Le Moal * operations in this context are done as if GFP_NOIO was specified. 24728436ba3SDamien Le Moal */ 24828436ba3SDamien Le Moal noio_flag = memalloc_noio_save(); 249bb37d772SDamien Le Moal ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones, 250bb37d772SDamien Le Moal dm_zone_revalidate_cb, md); 25128436ba3SDamien Le Moal memalloc_noio_restore(noio_flag); 252bb37d772SDamien Le Moal if (ret < 0) 253bb37d772SDamien Le Moal goto err; 254bb37d772SDamien Le Moal if (ret != q->nr_zones) { 255bb37d772SDamien Le Moal ret = -EIO; 256bb37d772SDamien Le Moal goto err; 257bb37d772SDamien Le Moal } 258bb37d772SDamien Le Moal 259bb37d772SDamien Le Moal md->nr_zones = q->nr_zones; 260bb37d772SDamien Le Moal 261bb37d772SDamien Le Moal return 0; 262bb37d772SDamien Le Moal 263bb37d772SDamien Le Moal err: 264bb37d772SDamien Le Moal DMERR("Revalidate zones failed %d", ret); 265bb37d772SDamien Le Moal dm_cleanup_zoned_dev(md); 266bb37d772SDamien Le Moal return ret; 267bb37d772SDamien Le Moal } 268bb37d772SDamien Le Moal 269bb37d772SDamien Le Moal static int device_not_zone_append_capable(struct dm_target *ti, 270bb37d772SDamien Le Moal struct dm_dev *dev, sector_t start, 271bb37d772SDamien Le Moal sector_t len, void *data) 272bb37d772SDamien Le Moal { 273edd1dbc8SChristoph Hellwig return !bdev_is_zoned(dev->bdev); 274bb37d772SDamien Le Moal } 275bb37d772SDamien Le Moal 276bb37d772SDamien Le Moal static bool dm_table_supports_zone_append(struct dm_table *t) 277bb37d772SDamien Le Moal { 278bb37d772SDamien Le Moal struct dm_target *ti; 279bb37d772SDamien Le Moal unsigned int i; 280bb37d772SDamien Le Moal 281bb37d772SDamien Le Moal for (i = 0; i < dm_table_get_num_targets(t); i++) { 282bb37d772SDamien Le Moal ti = dm_table_get_target(t, i); 283bb37d772SDamien Le Moal 284bb37d772SDamien Le Moal if (ti->emulate_zone_append) 285bb37d772SDamien Le Moal return false; 286bb37d772SDamien Le Moal 287bb37d772SDamien Le Moal if (!ti->type->iterate_devices || 288bb37d772SDamien Le Moal ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) 289bb37d772SDamien Le Moal return false; 290bb37d772SDamien Le Moal } 291bb37d772SDamien Le Moal 292bb37d772SDamien Le Moal return true; 293bb37d772SDamien Le Moal } 294bb37d772SDamien Le Moal 295bb37d772SDamien Le Moal int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) 296bb37d772SDamien Le Moal { 297bb37d772SDamien Le Moal struct mapped_device *md = t->md; 2987fc18728SDamien Le Moal 2997fc18728SDamien Le Moal /* 3007fc18728SDamien Le Moal * For a zoned target, the number of zones should be updated for the 301bb37d772SDamien Le Moal * correct value to be exposed in sysfs queue/nr_zones. 3027fc18728SDamien Le Moal */ 3037fc18728SDamien Le Moal WARN_ON_ONCE(queue_is_mq(q)); 304b623e347SChristoph Hellwig q->nr_zones = bdev_nr_zones(md->disk->part0); 305bb37d772SDamien Le Moal 306bb37d772SDamien Le Moal /* Check if zone append is natively supported */ 307bb37d772SDamien Le Moal if (dm_table_supports_zone_append(t)) { 308bb37d772SDamien Le Moal clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 309bb37d772SDamien Le Moal dm_cleanup_zoned_dev(md); 310bb37d772SDamien Le Moal return 0; 311bb37d772SDamien Le Moal } 312bb37d772SDamien Le Moal 313bb37d772SDamien Le Moal /* 314bb37d772SDamien Le Moal * Mark the mapped device as needing zone append emulation and 315bb37d772SDamien Le Moal * initialize the emulation resources once the capacity is set. 316bb37d772SDamien Le Moal */ 317bb37d772SDamien Le Moal set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 318bb37d772SDamien Le Moal if (!get_capacity(md->disk)) 319bb37d772SDamien Le Moal return 0; 320bb37d772SDamien Le Moal 321bb37d772SDamien Le Moal return dm_revalidate_zones(md, t); 322bb37d772SDamien Le Moal } 323bb37d772SDamien Le Moal 324bb37d772SDamien Le Moal static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, 325bb37d772SDamien Le Moal void *data) 326bb37d772SDamien Le Moal { 327bb37d772SDamien Le Moal unsigned int *wp_offset = data; 328bb37d772SDamien Le Moal 329bb37d772SDamien Le Moal *wp_offset = dm_get_zone_wp_offset(zone); 330bb37d772SDamien Le Moal 331bb37d772SDamien Le Moal return 0; 332bb37d772SDamien Le Moal } 333bb37d772SDamien Le Moal 334bb37d772SDamien Le Moal static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, 335bb37d772SDamien Le Moal unsigned int *wp_ofst) 336bb37d772SDamien Le Moal { 337*de71973cSChristoph Hellwig sector_t sector = zno * bdev_zone_sectors(md->disk->part0); 338bb37d772SDamien Le Moal unsigned int noio_flag; 339bb37d772SDamien Le Moal struct dm_table *t; 340bb37d772SDamien Le Moal int srcu_idx, ret; 341bb37d772SDamien Le Moal 342bb37d772SDamien Le Moal t = dm_get_live_table(md, &srcu_idx); 343bb37d772SDamien Le Moal if (!t) 344bb37d772SDamien Le Moal return -EIO; 345bb37d772SDamien Le Moal 346bb37d772SDamien Le Moal /* 347bb37d772SDamien Le Moal * Ensure that all memory allocations in this context are done as if 348bb37d772SDamien Le Moal * GFP_NOIO was specified. 349bb37d772SDamien Le Moal */ 350bb37d772SDamien Le Moal noio_flag = memalloc_noio_save(); 351bb37d772SDamien Le Moal ret = dm_blk_do_report_zones(md, t, sector, 1, 352bb37d772SDamien Le Moal dm_update_zone_wp_offset_cb, wp_ofst); 353bb37d772SDamien Le Moal memalloc_noio_restore(noio_flag); 354bb37d772SDamien Le Moal 355bb37d772SDamien Le Moal dm_put_live_table(md, srcu_idx); 356bb37d772SDamien Le Moal 357bb37d772SDamien Le Moal if (ret != 1) 358bb37d772SDamien Le Moal return -EIO; 359bb37d772SDamien Le Moal 360bb37d772SDamien Le Moal return 0; 361bb37d772SDamien Le Moal } 362bb37d772SDamien Le Moal 36373d7b06eSMike Snitzer struct orig_bio_details { 36473d7b06eSMike Snitzer unsigned int op; 36573d7b06eSMike Snitzer unsigned int nr_sectors; 36673d7b06eSMike Snitzer }; 36773d7b06eSMike Snitzer 368bb37d772SDamien Le Moal /* 369bb37d772SDamien Le Moal * First phase of BIO mapping for targets with zone append emulation: 370bb37d772SDamien Le Moal * check all BIO that change a zone writer pointer and change zone 371bb37d772SDamien Le Moal * append operations into regular write operations. 372bb37d772SDamien Le Moal */ 373bb37d772SDamien Le Moal static bool dm_zone_map_bio_begin(struct mapped_device *md, 37473d7b06eSMike Snitzer unsigned int zno, struct bio *clone) 375bb37d772SDamien Le Moal { 376*de71973cSChristoph Hellwig sector_t zsectors = bdev_zone_sectors(md->disk->part0); 377bb37d772SDamien Le Moal unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); 378bb37d772SDamien Le Moal 379bb37d772SDamien Le Moal /* 380bb37d772SDamien Le Moal * If the target zone is in an error state, recover by inspecting the 381bb37d772SDamien Le Moal * zone to get its current write pointer position. Note that since the 382bb37d772SDamien Le Moal * target zone is already locked, a BIO issuing context should never 383bb37d772SDamien Le Moal * see the zone write in the DM_ZONE_UPDATING_WP_OFST state. 384bb37d772SDamien Le Moal */ 385bb37d772SDamien Le Moal if (zwp_offset == DM_ZONE_INVALID_WP_OFST) { 386bb37d772SDamien Le Moal if (dm_update_zone_wp_offset(md, zno, &zwp_offset)) 387bb37d772SDamien Le Moal return false; 388bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], zwp_offset); 389bb37d772SDamien Le Moal } 390bb37d772SDamien Le Moal 39173d7b06eSMike Snitzer switch (bio_op(clone)) { 392bb37d772SDamien Le Moal case REQ_OP_ZONE_RESET: 393bb37d772SDamien Le Moal case REQ_OP_ZONE_FINISH: 394bb37d772SDamien Le Moal return true; 395bb37d772SDamien Le Moal case REQ_OP_WRITE_ZEROES: 396bb37d772SDamien Le Moal case REQ_OP_WRITE: 397bb37d772SDamien Le Moal /* Writes must be aligned to the zone write pointer */ 398bb37d772SDamien Le Moal if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset) 399bb37d772SDamien Le Moal return false; 400bb37d772SDamien Le Moal break; 401bb37d772SDamien Le Moal case REQ_OP_ZONE_APPEND: 402bb37d772SDamien Le Moal /* 403bb37d772SDamien Le Moal * Change zone append operations into a non-mergeable regular 404bb37d772SDamien Le Moal * writes directed at the current write pointer position of the 405bb37d772SDamien Le Moal * target zone. 406bb37d772SDamien Le Moal */ 407bb37d772SDamien Le Moal clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | 40873d7b06eSMike Snitzer (clone->bi_opf & (~REQ_OP_MASK)); 40973d7b06eSMike Snitzer clone->bi_iter.bi_sector += zwp_offset; 410bb37d772SDamien Le Moal break; 411bb37d772SDamien Le Moal default: 412bb37d772SDamien Le Moal DMWARN_LIMIT("Invalid BIO operation"); 413bb37d772SDamien Le Moal return false; 414bb37d772SDamien Le Moal } 415bb37d772SDamien Le Moal 416bb37d772SDamien Le Moal /* Cannot write to a full zone */ 417bb37d772SDamien Le Moal if (zwp_offset >= zsectors) 418bb37d772SDamien Le Moal return false; 419bb37d772SDamien Le Moal 420bb37d772SDamien Le Moal return true; 421bb37d772SDamien Le Moal } 422bb37d772SDamien Le Moal 423bb37d772SDamien Le Moal /* 424bb37d772SDamien Le Moal * Second phase of BIO mapping for targets with zone append emulation: 425bb37d772SDamien Le Moal * update the zone write pointer offset array to account for the additional 426bb37d772SDamien Le Moal * data written to a zone. Note that at this point, the remapped clone BIO 427bb37d772SDamien Le Moal * may already have completed, so we do not touch it. 428bb37d772SDamien Le Moal */ 42973d7b06eSMike Snitzer static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, 43073d7b06eSMike Snitzer struct orig_bio_details *orig_bio_details, 431bb37d772SDamien Le Moal unsigned int nr_sectors) 432bb37d772SDamien Le Moal { 433bb37d772SDamien Le Moal unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); 434bb37d772SDamien Le Moal 435bb37d772SDamien Le Moal /* The clone BIO may already have been completed and failed */ 436bb37d772SDamien Le Moal if (zwp_offset == DM_ZONE_INVALID_WP_OFST) 437bb37d772SDamien Le Moal return BLK_STS_IOERR; 438bb37d772SDamien Le Moal 439bb37d772SDamien Le Moal /* Update the zone wp offset */ 44073d7b06eSMike Snitzer switch (orig_bio_details->op) { 441bb37d772SDamien Le Moal case REQ_OP_ZONE_RESET: 442bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], 0); 443bb37d772SDamien Le Moal return BLK_STS_OK; 444bb37d772SDamien Le Moal case REQ_OP_ZONE_FINISH: 445bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], 446*de71973cSChristoph Hellwig bdev_zone_sectors(md->disk->part0)); 447bb37d772SDamien Le Moal return BLK_STS_OK; 448bb37d772SDamien Le Moal case REQ_OP_WRITE_ZEROES: 449bb37d772SDamien Le Moal case REQ_OP_WRITE: 450bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); 451bb37d772SDamien Le Moal return BLK_STS_OK; 452bb37d772SDamien Le Moal case REQ_OP_ZONE_APPEND: 453bb37d772SDamien Le Moal /* 454bb37d772SDamien Le Moal * Check that the target did not truncate the write operation 455bb37d772SDamien Le Moal * emulating a zone append. 456bb37d772SDamien Le Moal */ 45773d7b06eSMike Snitzer if (nr_sectors != orig_bio_details->nr_sectors) { 458bb37d772SDamien Le Moal DMWARN_LIMIT("Truncated write for zone append"); 459bb37d772SDamien Le Moal return BLK_STS_IOERR; 460bb37d772SDamien Le Moal } 461bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); 462bb37d772SDamien Le Moal return BLK_STS_OK; 463bb37d772SDamien Le Moal default: 464bb37d772SDamien Le Moal DMWARN_LIMIT("Invalid BIO operation"); 465bb37d772SDamien Le Moal return BLK_STS_IOERR; 466bb37d772SDamien Le Moal } 467bb37d772SDamien Le Moal } 468bb37d772SDamien Le Moal 469bb37d772SDamien Le Moal static inline void dm_zone_lock(struct request_queue *q, 470bb37d772SDamien Le Moal unsigned int zno, struct bio *clone) 471bb37d772SDamien Le Moal { 472bb37d772SDamien Le Moal if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) 473bb37d772SDamien Le Moal return; 474bb37d772SDamien Le Moal 475bb37d772SDamien Le Moal wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); 476bb37d772SDamien Le Moal bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); 477bb37d772SDamien Le Moal } 478bb37d772SDamien Le Moal 479bb37d772SDamien Le Moal static inline void dm_zone_unlock(struct request_queue *q, 480bb37d772SDamien Le Moal unsigned int zno, struct bio *clone) 481bb37d772SDamien Le Moal { 482bb37d772SDamien Le Moal if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) 483bb37d772SDamien Le Moal return; 484bb37d772SDamien Le Moal 485bb37d772SDamien Le Moal WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock)); 486bb37d772SDamien Le Moal clear_bit_unlock(zno, q->seq_zones_wlock); 487bb37d772SDamien Le Moal smp_mb__after_atomic(); 488bb37d772SDamien Le Moal wake_up_bit(q->seq_zones_wlock, zno); 489bb37d772SDamien Le Moal 490bb37d772SDamien Le Moal bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); 491bb37d772SDamien Le Moal } 492bb37d772SDamien Le Moal 49373d7b06eSMike Snitzer static bool dm_need_zone_wp_tracking(struct bio *bio) 494bb37d772SDamien Le Moal { 495bb37d772SDamien Le Moal /* 496bb37d772SDamien Le Moal * Special processing is not needed for operations that do not need the 497bb37d772SDamien Le Moal * zone write lock, that is, all operations that target conventional 498bb37d772SDamien Le Moal * zones and all operations that do not modify directly a sequential 499bb37d772SDamien Le Moal * zone write pointer. 500bb37d772SDamien Le Moal */ 50173d7b06eSMike Snitzer if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) 502bb37d772SDamien Le Moal return false; 50373d7b06eSMike Snitzer switch (bio_op(bio)) { 504bb37d772SDamien Le Moal case REQ_OP_WRITE_ZEROES: 505bb37d772SDamien Le Moal case REQ_OP_WRITE: 506bb37d772SDamien Le Moal case REQ_OP_ZONE_RESET: 507bb37d772SDamien Le Moal case REQ_OP_ZONE_FINISH: 508bb37d772SDamien Le Moal case REQ_OP_ZONE_APPEND: 50973d7b06eSMike Snitzer return bio_zone_is_seq(bio); 510bb37d772SDamien Le Moal default: 511bb37d772SDamien Le Moal return false; 512bb37d772SDamien Le Moal } 513bb37d772SDamien Le Moal } 514bb37d772SDamien Le Moal 515bb37d772SDamien Le Moal /* 516bb37d772SDamien Le Moal * Special IO mapping for targets needing zone append emulation. 517bb37d772SDamien Le Moal */ 518bb37d772SDamien Le Moal int dm_zone_map_bio(struct dm_target_io *tio) 519bb37d772SDamien Le Moal { 520bb37d772SDamien Le Moal struct dm_io *io = tio->io; 521bb37d772SDamien Le Moal struct dm_target *ti = tio->ti; 522bb37d772SDamien Le Moal struct mapped_device *md = io->md; 523bb37d772SDamien Le Moal struct request_queue *q = md->queue; 524bb37d772SDamien Le Moal struct bio *clone = &tio->clone; 52573d7b06eSMike Snitzer struct orig_bio_details orig_bio_details; 526bb37d772SDamien Le Moal unsigned int zno; 527bb37d772SDamien Le Moal blk_status_t sts; 528bb37d772SDamien Le Moal int r; 529bb37d772SDamien Le Moal 530bb37d772SDamien Le Moal /* 531bb37d772SDamien Le Moal * IOs that do not change a zone write pointer do not need 532bb37d772SDamien Le Moal * any additional special processing. 533bb37d772SDamien Le Moal */ 53473d7b06eSMike Snitzer if (!dm_need_zone_wp_tracking(clone)) 535bb37d772SDamien Le Moal return ti->type->map(ti, clone); 536bb37d772SDamien Le Moal 537bb37d772SDamien Le Moal /* Lock the target zone */ 53873d7b06eSMike Snitzer zno = bio_zone_no(clone); 539bb37d772SDamien Le Moal dm_zone_lock(q, zno, clone); 540bb37d772SDamien Le Moal 54173d7b06eSMike Snitzer orig_bio_details.nr_sectors = bio_sectors(clone); 54273d7b06eSMike Snitzer orig_bio_details.op = bio_op(clone); 54373d7b06eSMike Snitzer 544bb37d772SDamien Le Moal /* 545bb37d772SDamien Le Moal * Check that the bio and the target zone write pointer offset are 546bb37d772SDamien Le Moal * both valid, and if the bio is a zone append, remap it to a write. 547bb37d772SDamien Le Moal */ 54873d7b06eSMike Snitzer if (!dm_zone_map_bio_begin(md, zno, clone)) { 549bb37d772SDamien Le Moal dm_zone_unlock(q, zno, clone); 550bb37d772SDamien Le Moal return DM_MAPIO_KILL; 551bb37d772SDamien Le Moal } 552bb37d772SDamien Le Moal 553bb37d772SDamien Le Moal /* Let the target do its work */ 554bb37d772SDamien Le Moal r = ti->type->map(ti, clone); 555bb37d772SDamien Le Moal switch (r) { 556bb37d772SDamien Le Moal case DM_MAPIO_SUBMITTED: 557bb37d772SDamien Le Moal /* 558bb37d772SDamien Le Moal * The target submitted the clone BIO. The target zone will 559bb37d772SDamien Le Moal * be unlocked on completion of the clone. 560bb37d772SDamien Le Moal */ 56173d7b06eSMike Snitzer sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, 56273d7b06eSMike Snitzer *tio->len_ptr); 563bb37d772SDamien Le Moal break; 564bb37d772SDamien Le Moal case DM_MAPIO_REMAPPED: 565bb37d772SDamien Le Moal /* 566bb37d772SDamien Le Moal * The target only remapped the clone BIO. In case of error, 567bb37d772SDamien Le Moal * unlock the target zone here as the clone will not be 568bb37d772SDamien Le Moal * submitted. 569bb37d772SDamien Le Moal */ 57073d7b06eSMike Snitzer sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, 57173d7b06eSMike Snitzer *tio->len_ptr); 572bb37d772SDamien Le Moal if (sts != BLK_STS_OK) 573bb37d772SDamien Le Moal dm_zone_unlock(q, zno, clone); 574bb37d772SDamien Le Moal break; 575bb37d772SDamien Le Moal case DM_MAPIO_REQUEUE: 576bb37d772SDamien Le Moal case DM_MAPIO_KILL: 577bb37d772SDamien Le Moal default: 578bb37d772SDamien Le Moal dm_zone_unlock(q, zno, clone); 579bb37d772SDamien Le Moal sts = BLK_STS_IOERR; 580bb37d772SDamien Le Moal break; 581bb37d772SDamien Le Moal } 582bb37d772SDamien Le Moal 583bb37d772SDamien Le Moal if (sts != BLK_STS_OK) 584bb37d772SDamien Le Moal return DM_MAPIO_KILL; 585bb37d772SDamien Le Moal 586bb37d772SDamien Le Moal return r; 587bb37d772SDamien Le Moal } 588bb37d772SDamien Le Moal 589bb37d772SDamien Le Moal /* 590bb37d772SDamien Le Moal * IO completion callback called from clone_endio(). 591bb37d772SDamien Le Moal */ 592bb37d772SDamien Le Moal void dm_zone_endio(struct dm_io *io, struct bio *clone) 593bb37d772SDamien Le Moal { 594bb37d772SDamien Le Moal struct mapped_device *md = io->md; 595bb37d772SDamien Le Moal struct request_queue *q = md->queue; 596*de71973cSChristoph Hellwig struct gendisk *disk = md->disk; 597bb37d772SDamien Le Moal struct bio *orig_bio = io->orig_bio; 598bb37d772SDamien Le Moal unsigned int zwp_offset; 599bb37d772SDamien Le Moal unsigned int zno; 600bb37d772SDamien Le Moal 601bb37d772SDamien Le Moal /* 602bb37d772SDamien Le Moal * For targets that do not emulate zone append, we only need to 603bb37d772SDamien Le Moal * handle native zone-append bios. 604bb37d772SDamien Le Moal */ 605bb37d772SDamien Le Moal if (!dm_emulate_zone_append(md)) { 606bb37d772SDamien Le Moal /* 607bb37d772SDamien Le Moal * Get the offset within the zone of the written sector 608bb37d772SDamien Le Moal * and add that to the original bio sector position. 609bb37d772SDamien Le Moal */ 610bb37d772SDamien Le Moal if (clone->bi_status == BLK_STS_OK && 611bb37d772SDamien Le Moal bio_op(clone) == REQ_OP_ZONE_APPEND) { 612*de71973cSChristoph Hellwig sector_t mask = 613*de71973cSChristoph Hellwig (sector_t)bdev_zone_sectors(disk->part0) - 1; 614bb37d772SDamien Le Moal 615bb37d772SDamien Le Moal orig_bio->bi_iter.bi_sector += 616bb37d772SDamien Le Moal clone->bi_iter.bi_sector & mask; 617bb37d772SDamien Le Moal } 618bb37d772SDamien Le Moal 619bb37d772SDamien Le Moal return; 620bb37d772SDamien Le Moal } 621bb37d772SDamien Le Moal 622bb37d772SDamien Le Moal /* 623bb37d772SDamien Le Moal * For targets that do emulate zone append, if the clone BIO does not 624bb37d772SDamien Le Moal * own the target zone write lock, we have nothing to do. 625bb37d772SDamien Le Moal */ 626bb37d772SDamien Le Moal if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) 627bb37d772SDamien Le Moal return; 628bb37d772SDamien Le Moal 629bb37d772SDamien Le Moal zno = bio_zone_no(orig_bio); 630bb37d772SDamien Le Moal 631bb37d772SDamien Le Moal if (clone->bi_status != BLK_STS_OK) { 632bb37d772SDamien Le Moal /* 633bb37d772SDamien Le Moal * BIOs that modify a zone write pointer may leave the zone 634bb37d772SDamien Le Moal * in an unknown state in case of failure (e.g. the write 635bb37d772SDamien Le Moal * pointer was only partially advanced). In this case, set 636bb37d772SDamien Le Moal * the target zone write pointer as invalid unless it is 637bb37d772SDamien Le Moal * already being updated. 638bb37d772SDamien Le Moal */ 639bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST); 640bb37d772SDamien Le Moal } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { 641bb37d772SDamien Le Moal /* 642bb37d772SDamien Le Moal * Get the written sector for zone append operation that were 643bb37d772SDamien Le Moal * emulated using regular write operations. 644bb37d772SDamien Le Moal */ 645bb37d772SDamien Le Moal zwp_offset = READ_ONCE(md->zwp_offset[zno]); 646bb37d772SDamien Le Moal if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio))) 647bb37d772SDamien Le Moal WRITE_ONCE(md->zwp_offset[zno], 648bb37d772SDamien Le Moal DM_ZONE_INVALID_WP_OFST); 649bb37d772SDamien Le Moal else 650bb37d772SDamien Le Moal orig_bio->bi_iter.bi_sector += 651bb37d772SDamien Le Moal zwp_offset - bio_sectors(orig_bio); 652bb37d772SDamien Le Moal } 653bb37d772SDamien Le Moal 654bb37d772SDamien Le Moal dm_zone_unlock(q, zno, clone); 6557fc18728SDamien Le Moal } 656