1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to generic helpers functions 4 */ 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/bio.h> 8 #include <linux/blkdev.h> 9 #include <linux/scatterlist.h> 10 11 #include "blk.h" 12 13 static struct bio *next_bio(struct bio *bio, unsigned int nr_pages, 14 gfp_t gfp) 15 { 16 struct bio *new = bio_alloc(gfp, nr_pages); 17 18 if (bio) { 19 bio_chain(bio, new); 20 submit_bio(bio); 21 } 22 23 return new; 24 } 25 26 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 27 sector_t nr_sects, gfp_t gfp_mask, int flags, 28 struct bio **biop) 29 { 30 struct request_queue *q = bdev_get_queue(bdev); 31 struct bio *bio = *biop; 32 unsigned int granularity; 33 unsigned int op; 34 int alignment; 35 sector_t bs_mask; 36 37 if (!q) 38 return -ENXIO; 39 40 if (bdev_read_only(bdev)) 41 return -EPERM; 42 43 if (flags & BLKDEV_DISCARD_SECURE) { 44 if (!blk_queue_secure_erase(q)) 45 return -EOPNOTSUPP; 46 op = REQ_OP_SECURE_ERASE; 47 } else { 48 if (!blk_queue_discard(q)) 49 return -EOPNOTSUPP; 50 op = REQ_OP_DISCARD; 51 } 52 53 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 54 if ((sector | nr_sects) & bs_mask) 55 return -EINVAL; 56 57 /* Zero-sector (unknown) and one-sector granularities are the same. */ 58 granularity = max(q->limits.discard_granularity >> 9, 1U); 59 alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; 60 61 while (nr_sects) { 62 unsigned int req_sects; 63 sector_t end_sect, tmp; 64 65 /* 66 * Issue in chunks of the user defined max discard setting, 67 * ensuring that bi_size doesn't overflow 68 */ 69 req_sects = min_t(sector_t, nr_sects, 70 q->limits.max_discard_sectors); 71 if (!req_sects) 72 goto fail; 73 if (req_sects > UINT_MAX >> 9) 74 req_sects = UINT_MAX >> 9; 75 76 /* 77 * If splitting a request, and the next starting sector would be 78 * misaligned, stop the discard at the previous aligned sector. 79 */ 80 end_sect = sector + req_sects; 81 tmp = end_sect; 82 if (req_sects < nr_sects && 83 sector_div(tmp, granularity) != alignment) { 84 end_sect = end_sect - alignment; 85 sector_div(end_sect, granularity); 86 end_sect = end_sect * granularity + alignment; 87 req_sects = end_sect - sector; 88 } 89 90 bio = next_bio(bio, 0, gfp_mask); 91 bio->bi_iter.bi_sector = sector; 92 bio_set_dev(bio, bdev); 93 bio_set_op_attrs(bio, op, 0); 94 95 bio->bi_iter.bi_size = req_sects << 9; 96 nr_sects -= req_sects; 97 sector = end_sect; 98 99 /* 100 * We can loop for a long time in here, if someone does 101 * full device discards (like mkfs). Be nice and allow 102 * us to schedule out to avoid softlocking if preempt 103 * is disabled. 104 */ 105 cond_resched(); 106 } 107 108 *biop = bio; 109 return 0; 110 111 fail: 112 if (bio) { 113 submit_bio_wait(bio); 114 bio_put(bio); 115 } 116 *biop = NULL; 117 return -EOPNOTSUPP; 118 } 119 EXPORT_SYMBOL(__blkdev_issue_discard); 120 121 /** 122 * blkdev_issue_discard - queue a discard 123 * @bdev: blockdev to issue discard for 124 * @sector: start sector 125 * @nr_sects: number of sectors to discard 126 * @gfp_mask: memory allocation flags (for bio_alloc) 127 * @flags: BLKDEV_DISCARD_* flags to control behaviour 128 * 129 * Description: 130 * Issue a discard request for the sectors in question. 131 */ 132 int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 133 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) 134 { 135 struct bio *bio = NULL; 136 struct blk_plug plug; 137 int ret; 138 139 blk_start_plug(&plug); 140 ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, 141 &bio); 142 if (!ret && bio) { 143 ret = submit_bio_wait(bio); 144 if (ret == -EOPNOTSUPP) 145 ret = 0; 146 bio_put(bio); 147 } 148 blk_finish_plug(&plug); 149 150 return ret; 151 } 152 EXPORT_SYMBOL(blkdev_issue_discard); 153 154 /** 155 * __blkdev_issue_write_same - generate number of bios with same page 156 * @bdev: target blockdev 157 * @sector: start sector 158 * @nr_sects: number of sectors to write 159 * @gfp_mask: memory allocation flags (for bio_alloc) 160 * @page: page containing data to write 161 * @biop: pointer to anchor bio 162 * 163 * Description: 164 * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page. 165 */ 166 static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, 167 sector_t nr_sects, gfp_t gfp_mask, struct page *page, 168 struct bio **biop) 169 { 170 struct request_queue *q = bdev_get_queue(bdev); 171 unsigned int max_write_same_sectors; 172 struct bio *bio = *biop; 173 sector_t bs_mask; 174 175 if (!q) 176 return -ENXIO; 177 178 if (bdev_read_only(bdev)) 179 return -EPERM; 180 181 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 182 if ((sector | nr_sects) & bs_mask) 183 return -EINVAL; 184 185 if (!bdev_write_same(bdev)) 186 return -EOPNOTSUPP; 187 188 /* Ensure that max_write_same_sectors doesn't overflow bi_size */ 189 max_write_same_sectors = UINT_MAX >> 9; 190 191 while (nr_sects) { 192 bio = next_bio(bio, 1, gfp_mask); 193 bio->bi_iter.bi_sector = sector; 194 bio_set_dev(bio, bdev); 195 bio->bi_vcnt = 1; 196 bio->bi_io_vec->bv_page = page; 197 bio->bi_io_vec->bv_offset = 0; 198 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); 199 bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0); 200 201 if (nr_sects > max_write_same_sectors) { 202 bio->bi_iter.bi_size = max_write_same_sectors << 9; 203 nr_sects -= max_write_same_sectors; 204 sector += max_write_same_sectors; 205 } else { 206 bio->bi_iter.bi_size = nr_sects << 9; 207 nr_sects = 0; 208 } 209 cond_resched(); 210 } 211 212 *biop = bio; 213 return 0; 214 } 215 216 /** 217 * blkdev_issue_write_same - queue a write same operation 218 * @bdev: target blockdev 219 * @sector: start sector 220 * @nr_sects: number of sectors to write 221 * @gfp_mask: memory allocation flags (for bio_alloc) 222 * @page: page containing data 223 * 224 * Description: 225 * Issue a write same request for the sectors in question. 226 */ 227 int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, 228 sector_t nr_sects, gfp_t gfp_mask, 229 struct page *page) 230 { 231 struct bio *bio = NULL; 232 struct blk_plug plug; 233 int ret; 234 235 blk_start_plug(&plug); 236 ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page, 237 &bio); 238 if (ret == 0 && bio) { 239 ret = submit_bio_wait(bio); 240 bio_put(bio); 241 } 242 blk_finish_plug(&plug); 243 return ret; 244 } 245 EXPORT_SYMBOL(blkdev_issue_write_same); 246 247 static int __blkdev_issue_write_zeroes(struct block_device *bdev, 248 sector_t sector, sector_t nr_sects, gfp_t gfp_mask, 249 struct bio **biop, unsigned flags) 250 { 251 struct bio *bio = *biop; 252 unsigned int max_write_zeroes_sectors; 253 struct request_queue *q = bdev_get_queue(bdev); 254 255 if (!q) 256 return -ENXIO; 257 258 if (bdev_read_only(bdev)) 259 return -EPERM; 260 261 /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */ 262 max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev); 263 264 if (max_write_zeroes_sectors == 0) 265 return -EOPNOTSUPP; 266 267 while (nr_sects) { 268 bio = next_bio(bio, 0, gfp_mask); 269 bio->bi_iter.bi_sector = sector; 270 bio_set_dev(bio, bdev); 271 bio->bi_opf = REQ_OP_WRITE_ZEROES; 272 if (flags & BLKDEV_ZERO_NOUNMAP) 273 bio->bi_opf |= REQ_NOUNMAP; 274 275 if (nr_sects > max_write_zeroes_sectors) { 276 bio->bi_iter.bi_size = max_write_zeroes_sectors << 9; 277 nr_sects -= max_write_zeroes_sectors; 278 sector += max_write_zeroes_sectors; 279 } else { 280 bio->bi_iter.bi_size = nr_sects << 9; 281 nr_sects = 0; 282 } 283 cond_resched(); 284 } 285 286 *biop = bio; 287 return 0; 288 } 289 290 /* 291 * Convert a number of 512B sectors to a number of pages. 292 * The result is limited to a number of pages that can fit into a BIO. 293 * Also make sure that the result is always at least 1 (page) for the cases 294 * where nr_sects is lower than the number of sectors in a page. 295 */ 296 static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) 297 { 298 sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); 299 300 return min(pages, (sector_t)BIO_MAX_PAGES); 301 } 302 303 static int __blkdev_issue_zero_pages(struct block_device *bdev, 304 sector_t sector, sector_t nr_sects, gfp_t gfp_mask, 305 struct bio **biop) 306 { 307 struct request_queue *q = bdev_get_queue(bdev); 308 struct bio *bio = *biop; 309 int bi_size = 0; 310 unsigned int sz; 311 312 if (!q) 313 return -ENXIO; 314 315 if (bdev_read_only(bdev)) 316 return -EPERM; 317 318 while (nr_sects != 0) { 319 bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), 320 gfp_mask); 321 bio->bi_iter.bi_sector = sector; 322 bio_set_dev(bio, bdev); 323 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 324 325 while (nr_sects != 0) { 326 sz = min((sector_t) PAGE_SIZE, nr_sects << 9); 327 bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); 328 nr_sects -= bi_size >> 9; 329 sector += bi_size >> 9; 330 if (bi_size < sz) 331 break; 332 } 333 cond_resched(); 334 } 335 336 *biop = bio; 337 return 0; 338 } 339 340 /** 341 * __blkdev_issue_zeroout - generate number of zero filed write bios 342 * @bdev: blockdev to issue 343 * @sector: start sector 344 * @nr_sects: number of sectors to write 345 * @gfp_mask: memory allocation flags (for bio_alloc) 346 * @biop: pointer to anchor bio 347 * @flags: controls detailed behavior 348 * 349 * Description: 350 * Zero-fill a block range, either using hardware offload or by explicitly 351 * writing zeroes to the device. 352 * 353 * If a device is using logical block provisioning, the underlying space will 354 * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. 355 * 356 * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return 357 * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided. 358 */ 359 int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 360 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, 361 unsigned flags) 362 { 363 int ret; 364 sector_t bs_mask; 365 366 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 367 if ((sector | nr_sects) & bs_mask) 368 return -EINVAL; 369 370 ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, 371 biop, flags); 372 if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK)) 373 return ret; 374 375 return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, 376 biop); 377 } 378 EXPORT_SYMBOL(__blkdev_issue_zeroout); 379 380 /** 381 * blkdev_issue_zeroout - zero-fill a block range 382 * @bdev: blockdev to write 383 * @sector: start sector 384 * @nr_sects: number of sectors to write 385 * @gfp_mask: memory allocation flags (for bio_alloc) 386 * @flags: controls detailed behavior 387 * 388 * Description: 389 * Zero-fill a block range, either using hardware offload or by explicitly 390 * writing zeroes to the device. See __blkdev_issue_zeroout() for the 391 * valid values for %flags. 392 */ 393 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 394 sector_t nr_sects, gfp_t gfp_mask, unsigned flags) 395 { 396 int ret = 0; 397 sector_t bs_mask; 398 struct bio *bio; 399 struct blk_plug plug; 400 bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev); 401 402 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 403 if ((sector | nr_sects) & bs_mask) 404 return -EINVAL; 405 406 retry: 407 bio = NULL; 408 blk_start_plug(&plug); 409 if (try_write_zeroes) { 410 ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, 411 gfp_mask, &bio, flags); 412 } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { 413 ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects, 414 gfp_mask, &bio); 415 } else { 416 /* No zeroing offload support */ 417 ret = -EOPNOTSUPP; 418 } 419 if (ret == 0 && bio) { 420 ret = submit_bio_wait(bio); 421 bio_put(bio); 422 } 423 blk_finish_plug(&plug); 424 if (ret && try_write_zeroes) { 425 if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { 426 try_write_zeroes = false; 427 goto retry; 428 } 429 if (!bdev_write_zeroes_sectors(bdev)) { 430 /* 431 * Zeroing offload support was indicated, but the 432 * device reported ILLEGAL REQUEST (for some devices 433 * there is no non-destructive way to verify whether 434 * WRITE ZEROES is actually supported). 435 */ 436 ret = -EOPNOTSUPP; 437 } 438 } 439 440 return ret; 441 } 442 EXPORT_SYMBOL(blkdev_issue_zeroout); 443