file.c (34422914dc00b291d1c47dbdabe93b154c2f2b25) | file.c (aa7f243f32e1d18036ee00d71d3ccfad70ae2121) |
---|---|
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Simple file system for zoned block devices exposing zones as files. 4 * 5 * Copyright (C) 2022 Western Digital Corporation or its affiliates. 6 */ 7#include <linux/module.h> 8#include <linux/pagemap.h> --- 15 unchanged lines hidden (view full) --- 24 25#include "trace.h" 26 27static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, 28 loff_t length, unsigned int flags, 29 struct iomap *iomap, struct iomap *srcmap) 30{ 31 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Simple file system for zoned block devices exposing zones as files. 4 * 5 * Copyright (C) 2022 Western Digital Corporation or its affiliates. 6 */ 7#include <linux/module.h> 8#include <linux/pagemap.h> --- 15 unchanged lines hidden (view full) --- 24 25#include "trace.h" 26 27static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, 28 loff_t length, unsigned int flags, 29 struct iomap *iomap, struct iomap *srcmap) 30{ 31 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
32 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
32 struct super_block *sb = inode->i_sb; 33 loff_t isize; 34 35 /* 36 * All blocks are always mapped below EOF. If reading past EOF, 37 * act as if there is a hole up to the file maximum size. 38 */ 39 mutex_lock(&zi->i_truncate_mutex); 40 iomap->bdev = inode->i_sb->s_bdev; 41 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); 42 isize = i_size_read(inode); 43 if (iomap->offset >= isize) { 44 iomap->type = IOMAP_HOLE; 45 iomap->addr = IOMAP_NULL_ADDR; 46 iomap->length = length; 47 } else { 48 iomap->type = IOMAP_MAPPED; | 33 struct super_block *sb = inode->i_sb; 34 loff_t isize; 35 36 /* 37 * All blocks are always mapped below EOF. If reading past EOF, 38 * act as if there is a hole up to the file maximum size. 39 */ 40 mutex_lock(&zi->i_truncate_mutex); 41 iomap->bdev = inode->i_sb->s_bdev; 42 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); 43 isize = i_size_read(inode); 44 if (iomap->offset >= isize) { 45 iomap->type = IOMAP_HOLE; 46 iomap->addr = IOMAP_NULL_ADDR; 47 iomap->length = length; 48 } else { 49 iomap->type = IOMAP_MAPPED; |
49 iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; | 50 iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset; |
50 iomap->length = isize - iomap->offset; 51 } 52 mutex_unlock(&zi->i_truncate_mutex); 53 54 trace_zonefs_iomap_begin(inode, iomap); 55 56 return 0; 57} 58 59static const struct iomap_ops zonefs_read_iomap_ops = { 60 .iomap_begin = zonefs_read_iomap_begin, 61}; 62 63static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, 64 loff_t length, unsigned int flags, 65 struct iomap *iomap, struct iomap *srcmap) 66{ 67 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 51 iomap->length = isize - iomap->offset; 52 } 53 mutex_unlock(&zi->i_truncate_mutex); 54 55 trace_zonefs_iomap_begin(inode, iomap); 56 57 return 0; 58} 59 60static const struct iomap_ops zonefs_read_iomap_ops = { 61 .iomap_begin = zonefs_read_iomap_begin, 62}; 63 64static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, 65 loff_t length, unsigned int flags, 66 struct iomap *iomap, struct iomap *srcmap) 67{ 68 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
69 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
68 struct super_block *sb = inode->i_sb; 69 loff_t isize; 70 71 /* All write I/Os should always be within the file maximum size */ | 70 struct super_block *sb = inode->i_sb; 71 loff_t isize; 72 73 /* All write I/Os should always be within the file maximum size */ |
72 if (WARN_ON_ONCE(offset + length > zi->i_max_size)) | 74 if (WARN_ON_ONCE(offset + length > z->z_capacity)) |
73 return -EIO; 74 75 /* 76 * Sequential zones can only accept direct writes. This is already 77 * checked when writes are issued, so warn if we see a page writeback 78 * operation. 79 */ | 75 return -EIO; 76 77 /* 78 * Sequential zones can only accept direct writes. This is already 79 * checked when writes are issued, so warn if we see a page writeback 80 * operation. 81 */ |
80 if (WARN_ON_ONCE(zonefs_zone_is_seq(zi) && !(flags & IOMAP_DIRECT))) | 82 if (WARN_ON_ONCE(zonefs_zone_is_seq(z) && !(flags & IOMAP_DIRECT))) |
81 return -EIO; 82 83 /* 84 * For conventional zones, all blocks are always mapped. For sequential 85 * zones, all blocks after always mapped below the inode size (zone 86 * write pointer) and unwriten beyond. 87 */ 88 mutex_lock(&zi->i_truncate_mutex); 89 iomap->bdev = inode->i_sb->s_bdev; 90 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); | 83 return -EIO; 84 85 /* 86 * For conventional zones, all blocks are always mapped. For sequential 87 * zones, all blocks after always mapped below the inode size (zone 88 * write pointer) and unwriten beyond. 89 */ 90 mutex_lock(&zi->i_truncate_mutex); 91 iomap->bdev = inode->i_sb->s_bdev; 92 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); |
91 iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; | 93 iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset; |
92 isize = i_size_read(inode); 93 if (iomap->offset >= isize) { 94 iomap->type = IOMAP_UNWRITTEN; | 94 isize = i_size_read(inode); 95 if (iomap->offset >= isize) { 96 iomap->type = IOMAP_UNWRITTEN; |
95 iomap->length = zi->i_max_size - iomap->offset; | 97 iomap->length = z->z_capacity - iomap->offset; |
96 } else { 97 iomap->type = IOMAP_MAPPED; 98 iomap->length = isize - iomap->offset; 99 } 100 mutex_unlock(&zi->i_truncate_mutex); 101 102 trace_zonefs_iomap_begin(inode, iomap); 103 --- 16 unchanged lines hidden (view full) --- 120 121/* 122 * Map blocks for page writeback. This is used only on conventional zone files, 123 * which implies that the page range can only be within the fixed inode size. 124 */ 125static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, 126 struct inode *inode, loff_t offset) 127{ | 98 } else { 99 iomap->type = IOMAP_MAPPED; 100 iomap->length = isize - iomap->offset; 101 } 102 mutex_unlock(&zi->i_truncate_mutex); 103 104 trace_zonefs_iomap_begin(inode, iomap); 105 --- 16 unchanged lines hidden (view full) --- 122 123/* 124 * Map blocks for page writeback. This is used only on conventional zone files, 125 * which implies that the page range can only be within the fixed inode size. 126 */ 127static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, 128 struct inode *inode, loff_t offset) 129{ |
128 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 130 struct zonefs_zone *z = zonefs_inode_zone(inode); |
129 | 131 |
130 if (WARN_ON_ONCE(zonefs_zone_is_seq(zi))) | 132 if (WARN_ON_ONCE(zonefs_zone_is_seq(z))) |
131 return -EIO; 132 if (WARN_ON_ONCE(offset >= i_size_read(inode))) 133 return -EIO; 134 135 /* If the mapping is already OK, nothing needs to be done */ 136 if (offset >= wpc->iomap.offset && 137 offset < wpc->iomap.offset + wpc->iomap.length) 138 return 0; 139 | 133 return -EIO; 134 if (WARN_ON_ONCE(offset >= i_size_read(inode))) 135 return -EIO; 136 137 /* If the mapping is already OK, nothing needs to be done */ 138 if (offset >= wpc->iomap.offset && 139 offset < wpc->iomap.offset + wpc->iomap.length) 140 return 0; 141 |
140 return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, | 142 return zonefs_write_iomap_begin(inode, offset, 143 z->z_capacity - offset, |
141 IOMAP_WRITE, &wpc->iomap, NULL); 142} 143 144static const struct iomap_writeback_ops zonefs_writeback_ops = { 145 .map_blocks = zonefs_write_map_blocks, 146}; 147 148static int zonefs_writepages(struct address_space *mapping, --- 31 unchanged lines hidden (view full) --- 180 .error_remove_page = generic_error_remove_page, 181 .direct_IO = noop_direct_IO, 182 .swap_activate = zonefs_swap_activate, 183}; 184 185int zonefs_file_truncate(struct inode *inode, loff_t isize) 186{ 187 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 144 IOMAP_WRITE, &wpc->iomap, NULL); 145} 146 147static const struct iomap_writeback_ops zonefs_writeback_ops = { 148 .map_blocks = zonefs_write_map_blocks, 149}; 150 151static int zonefs_writepages(struct address_space *mapping, --- 31 unchanged lines hidden (view full) --- 183 .error_remove_page = generic_error_remove_page, 184 .direct_IO = noop_direct_IO, 185 .swap_activate = zonefs_swap_activate, 186}; 187 188int zonefs_file_truncate(struct inode *inode, loff_t isize) 189{ 190 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
191 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
188 loff_t old_isize; 189 enum req_op op; 190 int ret = 0; 191 192 /* 193 * Only sequential zone files can be truncated and truncation is allowed 194 * only down to a 0 size, which is equivalent to a zone reset, and to 195 * the maximum file size, which is equivalent to a zone finish. 196 */ | 192 loff_t old_isize; 193 enum req_op op; 194 int ret = 0; 195 196 /* 197 * Only sequential zone files can be truncated and truncation is allowed 198 * only down to a 0 size, which is equivalent to a zone reset, and to 199 * the maximum file size, which is equivalent to a zone finish. 200 */ |
197 if (!zonefs_zone_is_seq(zi)) | 201 if (!zonefs_zone_is_seq(z)) |
198 return -EPERM; 199 200 if (!isize) 201 op = REQ_OP_ZONE_RESET; | 202 return -EPERM; 203 204 if (!isize) 205 op = REQ_OP_ZONE_RESET; |
202 else if (isize == zi->i_max_size) | 206 else if (isize == z->z_capacity) |
203 op = REQ_OP_ZONE_FINISH; 204 else 205 return -EPERM; 206 207 inode_dio_wait(inode); 208 209 /* Serialize against page faults */ 210 filemap_invalidate_lock(inode->i_mapping); 211 212 /* Serialize against zonefs_iomap_begin() */ 213 mutex_lock(&zi->i_truncate_mutex); 214 215 old_isize = i_size_read(inode); 216 if (isize == old_isize) 217 goto unlock; 218 | 207 op = REQ_OP_ZONE_FINISH; 208 else 209 return -EPERM; 210 211 inode_dio_wait(inode); 212 213 /* Serialize against page faults */ 214 filemap_invalidate_lock(inode->i_mapping); 215 216 /* Serialize against zonefs_iomap_begin() */ 217 mutex_lock(&zi->i_truncate_mutex); 218 219 old_isize = i_size_read(inode); 220 if (isize == old_isize) 221 goto unlock; 222 |
219 ret = zonefs_zone_mgmt(inode, op); | 223 ret = zonefs_inode_zone_mgmt(inode, op); |
220 if (ret) 221 goto unlock; 222 223 /* 224 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set, 225 * take care of open zones. 226 */ | 224 if (ret) 225 goto unlock; 226 227 /* 228 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set, 229 * take care of open zones. 230 */ |
227 if (zi->i_flags & ZONEFS_ZONE_OPEN) { | 231 if (z->z_flags & ZONEFS_ZONE_OPEN) { |
228 /* 229 * Truncating a zone to EMPTY or FULL is the equivalent of 230 * closing the zone. For a truncation to 0, we need to 231 * re-open the zone to ensure new writes can be processed. 232 * For a truncation to the maximum file size, the zone is 233 * closed and writes cannot be accepted anymore, so clear 234 * the open flag. 235 */ 236 if (!isize) | 232 /* 233 * Truncating a zone to EMPTY or FULL is the equivalent of 234 * closing the zone. For a truncation to 0, we need to 235 * re-open the zone to ensure new writes can be processed. 236 * For a truncation to the maximum file size, the zone is 237 * closed and writes cannot be accepted anymore, so clear 238 * the open flag. 239 */ 240 if (!isize) |
237 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); | 241 ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_OPEN); |
238 else | 242 else |
239 zi->i_flags &= ~ZONEFS_ZONE_OPEN; | 243 z->z_flags &= ~ZONEFS_ZONE_OPEN; |
240 } 241 242 zonefs_update_stats(inode, isize); 243 truncate_setsize(inode, isize); | 244 } 245 246 zonefs_update_stats(inode, isize); 247 truncate_setsize(inode, isize); |
244 zi->i_wpoffset = isize; 245 zonefs_account_active(inode); | 248 z->z_wpoffset = isize; 249 zonefs_inode_account_active(inode); |
246 247unlock: 248 mutex_unlock(&zi->i_truncate_mutex); 249 filemap_invalidate_unlock(inode->i_mapping); 250 251 return ret; 252} 253 --- 90 unchanged lines hidden (view full) --- 344 struct inode *inode = file_inode(iocb->ki_filp); 345 struct zonefs_inode_info *zi = ZONEFS_I(inode); 346 347 if (error) { 348 zonefs_io_error(inode, true); 349 return error; 350 } 351 | 250 251unlock: 252 mutex_unlock(&zi->i_truncate_mutex); 253 filemap_invalidate_unlock(inode->i_mapping); 254 255 return ret; 256} 257 --- 90 unchanged lines hidden (view full) --- 348 struct inode *inode = file_inode(iocb->ki_filp); 349 struct zonefs_inode_info *zi = ZONEFS_I(inode); 350 351 if (error) { 352 zonefs_io_error(inode, true); 353 return error; 354 } 355 |
352 if (size && zonefs_zone_is_seq(zi)) { | 356 if (size && zonefs_inode_is_seq(inode)) { |
353 /* 354 * Note that we may be seeing completions out of order, 355 * but that is not a problem since a write completed 356 * successfully necessarily means that all preceding writes 357 * were also successful. So we can safely increase the inode 358 * size to the write end location. 359 */ 360 mutex_lock(&zi->i_truncate_mutex); --- 9 unchanged lines hidden (view full) --- 370 371static const struct iomap_dio_ops zonefs_write_dio_ops = { 372 .end_io = zonefs_file_write_dio_end_io, 373}; 374 375static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) 376{ 377 struct inode *inode = file_inode(iocb->ki_filp); | 357 /* 358 * Note that we may be seeing completions out of order, 359 * but that is not a problem since a write completed 360 * successfully necessarily means that all preceding writes 361 * were also successful. So we can safely increase the inode 362 * size to the write end location. 363 */ 364 mutex_lock(&zi->i_truncate_mutex); --- 9 unchanged lines hidden (view full) --- 374 375static const struct iomap_dio_ops zonefs_write_dio_ops = { 376 .end_io = zonefs_file_write_dio_end_io, 377}; 378 379static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) 380{ 381 struct inode *inode = file_inode(iocb->ki_filp); |
378 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 382 struct zonefs_zone *z = zonefs_inode_zone(inode); |
379 struct block_device *bdev = inode->i_sb->s_bdev; 380 unsigned int max = bdev_max_zone_append_sectors(bdev); 381 struct bio *bio; 382 ssize_t size; 383 int nr_pages; 384 ssize_t ret; 385 386 max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); 387 iov_iter_truncate(from, max); 388 389 nr_pages = iov_iter_npages(from, BIO_MAX_VECS); 390 if (!nr_pages) 391 return 0; 392 393 bio = bio_alloc(bdev, nr_pages, 394 REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); | 383 struct block_device *bdev = inode->i_sb->s_bdev; 384 unsigned int max = bdev_max_zone_append_sectors(bdev); 385 struct bio *bio; 386 ssize_t size; 387 int nr_pages; 388 ssize_t ret; 389 390 max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); 391 iov_iter_truncate(from, max); 392 393 nr_pages = iov_iter_npages(from, BIO_MAX_VECS); 394 if (!nr_pages) 395 return 0; 396 397 bio = bio_alloc(bdev, nr_pages, 398 REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); |
395 bio->bi_iter.bi_sector = zi->i_zsector; | 399 bio->bi_iter.bi_sector = z->z_sector; |
396 bio->bi_ioprio = iocb->ki_ioprio; 397 if (iocb_is_dsync(iocb)) 398 bio->bi_opf |= REQ_FUA; 399 400 ret = bio_iov_iter_get_pages(bio, from); 401 if (unlikely(ret)) 402 goto out_release; 403 --- 8 unchanged lines hidden (view full) --- 412 /* 413 * If the file zone was written underneath the file system, the zone 414 * write pointer may not be where we expect it to be, but the zone 415 * append write can still succeed. So check manually that we wrote where 416 * we intended to, that is, at zi->i_wpoffset. 417 */ 418 if (!ret) { 419 sector_t wpsector = | 400 bio->bi_ioprio = iocb->ki_ioprio; 401 if (iocb_is_dsync(iocb)) 402 bio->bi_opf |= REQ_FUA; 403 404 ret = bio_iov_iter_get_pages(bio, from); 405 if (unlikely(ret)) 406 goto out_release; 407 --- 8 unchanged lines hidden (view full) --- 416 /* 417 * If the file zone was written underneath the file system, the zone 418 * write pointer may not be where we expect it to be, but the zone 419 * append write can still succeed. So check manually that we wrote where 420 * we intended to, that is, at zi->i_wpoffset. 421 */ 422 if (!ret) { 423 sector_t wpsector = |
420 zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); | 424 z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT); |
421 422 if (bio->bi_iter.bi_sector != wpsector) { 423 zonefs_warn(inode->i_sb, 424 "Corrupted write pointer %llu for zone at %llu\n", | 425 426 if (bio->bi_iter.bi_sector != wpsector) { 427 zonefs_warn(inode->i_sb, 428 "Corrupted write pointer %llu for zone at %llu\n", |
425 wpsector, zi->i_zsector); | 429 wpsector, z->z_sector); |
426 ret = -EIO; 427 } 428 } 429 430 zonefs_file_write_dio_end_io(iocb, size, ret, 0); 431 trace_zonefs_file_dio_append(inode, size, ret); 432 433out_release: --- 11 unchanged lines hidden (view full) --- 445/* 446 * Do not exceed the LFS limits nor the file zone size. If pos is under the 447 * limit it becomes a short access. If it exceeds the limit, return -EFBIG. 448 */ 449static loff_t zonefs_write_check_limits(struct file *file, loff_t pos, 450 loff_t count) 451{ 452 struct inode *inode = file_inode(file); | 430 ret = -EIO; 431 } 432 } 433 434 zonefs_file_write_dio_end_io(iocb, size, ret, 0); 435 trace_zonefs_file_dio_append(inode, size, ret); 436 437out_release: --- 11 unchanged lines hidden (view full) --- 449/* 450 * Do not exceed the LFS limits nor the file zone size. If pos is under the 451 * limit it becomes a short access. If it exceeds the limit, return -EFBIG. 452 */ 453static loff_t zonefs_write_check_limits(struct file *file, loff_t pos, 454 loff_t count) 455{ 456 struct inode *inode = file_inode(file); |
453 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 457 struct zonefs_zone *z = zonefs_inode_zone(inode); |
454 loff_t limit = rlimit(RLIMIT_FSIZE); | 458 loff_t limit = rlimit(RLIMIT_FSIZE); |
455 loff_t max_size = zi->i_max_size; | 459 loff_t max_size = z->z_capacity; |
456 457 if (limit != RLIM_INFINITY) { 458 if (pos >= limit) { 459 send_sig(SIGXFSZ, current, 0); 460 return -EFBIG; 461 } 462 count = min(count, limit - pos); 463 } --- 7 unchanged lines hidden (view full) --- 471 return min(count, max_size - pos); 472} 473 474static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from) 475{ 476 struct file *file = iocb->ki_filp; 477 struct inode *inode = file_inode(file); 478 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 460 461 if (limit != RLIM_INFINITY) { 462 if (pos >= limit) { 463 send_sig(SIGXFSZ, current, 0); 464 return -EFBIG; 465 } 466 count = min(count, limit - pos); 467 } --- 7 unchanged lines hidden (view full) --- 475 return min(count, max_size - pos); 476} 477 478static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from) 479{ 480 struct file *file = iocb->ki_filp; 481 struct inode *inode = file_inode(file); 482 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
483 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
479 loff_t count; 480 481 if (IS_SWAPFILE(inode)) 482 return -ETXTBSY; 483 484 if (!iov_iter_count(from)) 485 return 0; 486 487 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) 488 return -EINVAL; 489 490 if (iocb->ki_flags & IOCB_APPEND) { | 484 loff_t count; 485 486 if (IS_SWAPFILE(inode)) 487 return -ETXTBSY; 488 489 if (!iov_iter_count(from)) 490 return 0; 491 492 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) 493 return -EINVAL; 494 495 if (iocb->ki_flags & IOCB_APPEND) { |
491 if (zonefs_zone_is_cnv(zi)) | 496 if (zonefs_zone_is_cnv(z)) |
492 return -EINVAL; 493 mutex_lock(&zi->i_truncate_mutex); | 497 return -EINVAL; 498 mutex_lock(&zi->i_truncate_mutex); |
494 iocb->ki_pos = zi->i_wpoffset; | 499 iocb->ki_pos = z->z_wpoffset; |
495 mutex_unlock(&zi->i_truncate_mutex); 496 } 497 498 count = zonefs_write_check_limits(file, iocb->ki_pos, 499 iov_iter_count(from)); 500 if (count < 0) 501 return count; 502 --- 10 unchanged lines hidden (view full) --- 513 * elevator feature is being used (e.g. mq-deadline). The block layer always 514 * automatically select such an elevator for zoned block devices during the 515 * device initialization. 516 */ 517static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) 518{ 519 struct inode *inode = file_inode(iocb->ki_filp); 520 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 500 mutex_unlock(&zi->i_truncate_mutex); 501 } 502 503 count = zonefs_write_check_limits(file, iocb->ki_pos, 504 iov_iter_count(from)); 505 if (count < 0) 506 return count; 507 --- 10 unchanged lines hidden (view full) --- 518 * elevator feature is being used (e.g. mq-deadline). The block layer always 519 * automatically select such an elevator for zoned block devices during the 520 * device initialization. 521 */ 522static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) 523{ 524 struct inode *inode = file_inode(iocb->ki_filp); 525 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
526 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
521 struct super_block *sb = inode->i_sb; 522 bool sync = is_sync_kiocb(iocb); 523 bool append = false; 524 ssize_t ret, count; 525 526 /* 527 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT 528 * as this can cause write reordering (e.g. the first aio gets EAGAIN 529 * on the inode lock but the second goes through but is now unaligned). 530 */ | 527 struct super_block *sb = inode->i_sb; 528 bool sync = is_sync_kiocb(iocb); 529 bool append = false; 530 ssize_t ret, count; 531 532 /* 533 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT 534 * as this can cause write reordering (e.g. the first aio gets EAGAIN 535 * on the inode lock but the second goes through but is now unaligned). 536 */ |
531 if (zonefs_zone_is_seq(zi) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) | 537 if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT)) |
532 return -EOPNOTSUPP; 533 534 if (iocb->ki_flags & IOCB_NOWAIT) { 535 if (!inode_trylock(inode)) 536 return -EAGAIN; 537 } else { 538 inode_lock(inode); 539 } --- 5 unchanged lines hidden (view full) --- 545 } 546 547 if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { 548 ret = -EINVAL; 549 goto inode_unlock; 550 } 551 552 /* Enforce sequential writes (append only) in sequential zones */ | 538 return -EOPNOTSUPP; 539 540 if (iocb->ki_flags & IOCB_NOWAIT) { 541 if (!inode_trylock(inode)) 542 return -EAGAIN; 543 } else { 544 inode_lock(inode); 545 } --- 5 unchanged lines hidden (view full) --- 551 } 552 553 if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { 554 ret = -EINVAL; 555 goto inode_unlock; 556 } 557 558 /* Enforce sequential writes (append only) in sequential zones */ |
553 if (zonefs_zone_is_seq(zi)) { | 559 if (zonefs_zone_is_seq(z)) { |
554 mutex_lock(&zi->i_truncate_mutex); | 560 mutex_lock(&zi->i_truncate_mutex); |
555 if (iocb->ki_pos != zi->i_wpoffset) { | 561 if (iocb->ki_pos != z->z_wpoffset) { |
556 mutex_unlock(&zi->i_truncate_mutex); 557 ret = -EINVAL; 558 goto inode_unlock; 559 } 560 mutex_unlock(&zi->i_truncate_mutex); 561 append = sync; 562 } 563 564 if (append) 565 ret = zonefs_file_dio_append(iocb, from); 566 else 567 ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, 568 &zonefs_write_dio_ops, 0, NULL, 0); | 562 mutex_unlock(&zi->i_truncate_mutex); 563 ret = -EINVAL; 564 goto inode_unlock; 565 } 566 mutex_unlock(&zi->i_truncate_mutex); 567 append = sync; 568 } 569 570 if (append) 571 ret = zonefs_file_dio_append(iocb, from); 572 else 573 ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, 574 &zonefs_write_dio_ops, 0, NULL, 0); |
569 if (zonefs_zone_is_seq(zi) && | 575 if (zonefs_zone_is_seq(z) && |
570 (ret > 0 || ret == -EIOCBQUEUED)) { 571 if (ret > 0) 572 count = ret; 573 574 /* 575 * Update the zone write pointer offset assuming the write 576 * operation succeeded. If it did not, the error recovery path 577 * will correct it. Also do active seq file accounting. 578 */ 579 mutex_lock(&zi->i_truncate_mutex); | 576 (ret > 0 || ret == -EIOCBQUEUED)) { 577 if (ret > 0) 578 count = ret; 579 580 /* 581 * Update the zone write pointer offset assuming the write 582 * operation succeeded. If it did not, the error recovery path 583 * will correct it. Also do active seq file accounting. 584 */ 585 mutex_lock(&zi->i_truncate_mutex); |
580 zi->i_wpoffset += count; 581 zonefs_account_active(inode); | 586 z->z_wpoffset += count; 587 zonefs_inode_account_active(inode); |
582 mutex_unlock(&zi->i_truncate_mutex); 583 } 584 585inode_unlock: 586 inode_unlock(inode); 587 588 return ret; 589} --- 34 unchanged lines hidden (view full) --- 624 ret = generic_write_sync(iocb, ret); 625 626 return ret; 627} 628 629static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 630{ 631 struct inode *inode = file_inode(iocb->ki_filp); | 588 mutex_unlock(&zi->i_truncate_mutex); 589 } 590 591inode_unlock: 592 inode_unlock(inode); 593 594 return ret; 595} --- 34 unchanged lines hidden (view full) --- 630 ret = generic_write_sync(iocb, ret); 631 632 return ret; 633} 634 635static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 636{ 637 struct inode *inode = file_inode(iocb->ki_filp); |
638 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
632 633 if (unlikely(IS_IMMUTABLE(inode))) 634 return -EPERM; 635 636 if (sb_rdonly(inode->i_sb)) 637 return -EROFS; 638 | 639 640 if (unlikely(IS_IMMUTABLE(inode))) 641 return -EPERM; 642 643 if (sb_rdonly(inode->i_sb)) 644 return -EROFS; 645 |
639 /* Write operations beyond the zone size are not allowed */ 640 if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size) | 646 /* Write operations beyond the zone capacity are not allowed */ 647 if (iocb->ki_pos >= z->z_capacity) |
641 return -EFBIG; 642 643 if (iocb->ki_flags & IOCB_DIRECT) { 644 ssize_t ret = zonefs_file_dio_write(iocb, from); 645 646 if (ret != -ENOTBLK) 647 return ret; 648 } --- 15 unchanged lines hidden (view full) --- 664static const struct iomap_dio_ops zonefs_read_dio_ops = { 665 .end_io = zonefs_file_read_dio_end_io, 666}; 667 668static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 669{ 670 struct inode *inode = file_inode(iocb->ki_filp); 671 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 648 return -EFBIG; 649 650 if (iocb->ki_flags & IOCB_DIRECT) { 651 ssize_t ret = zonefs_file_dio_write(iocb, from); 652 653 if (ret != -ENOTBLK) 654 return ret; 655 } --- 15 unchanged lines hidden (view full) --- 671static const struct iomap_dio_ops zonefs_read_dio_ops = { 672 .end_io = zonefs_file_read_dio_end_io, 673}; 674 675static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 676{ 677 struct inode *inode = file_inode(iocb->ki_filp); 678 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
679 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
672 struct super_block *sb = inode->i_sb; 673 loff_t isize; 674 ssize_t ret; 675 676 /* Offline zones cannot be read */ 677 if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) 678 return -EPERM; 679 | 680 struct super_block *sb = inode->i_sb; 681 loff_t isize; 682 ssize_t ret; 683 684 /* Offline zones cannot be read */ 685 if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) 686 return -EPERM; 687 |
680 if (iocb->ki_pos >= zi->i_max_size) | 688 if (iocb->ki_pos >= z->z_capacity) |
681 return 0; 682 683 if (iocb->ki_flags & IOCB_NOWAIT) { 684 if (!inode_trylock_shared(inode)) 685 return -EAGAIN; 686 } else { 687 inode_lock_shared(inode); 688 } --- 44 unchanged lines hidden (view full) --- 733 return false; 734 735 return true; 736} 737 738static int zonefs_seq_file_write_open(struct inode *inode) 739{ 740 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 689 return 0; 690 691 if (iocb->ki_flags & IOCB_NOWAIT) { 692 if (!inode_trylock_shared(inode)) 693 return -EAGAIN; 694 } else { 695 inode_lock_shared(inode); 696 } --- 44 unchanged lines hidden (view full) --- 741 return false; 742 743 return true; 744} 745 746static int zonefs_seq_file_write_open(struct inode *inode) 747{ 748 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
749 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
741 int ret = 0; 742 743 mutex_lock(&zi->i_truncate_mutex); 744 745 if (!zi->i_wr_refcnt) { 746 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 747 unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files); 748 749 if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { 750 751 if (sbi->s_max_wro_seq_files 752 && wro > sbi->s_max_wro_seq_files) { 753 atomic_dec(&sbi->s_wro_seq_files); 754 ret = -EBUSY; 755 goto unlock; 756 } 757 | 750 int ret = 0; 751 752 mutex_lock(&zi->i_truncate_mutex); 753 754 if (!zi->i_wr_refcnt) { 755 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 756 unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files); 757 758 if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { 759 760 if (sbi->s_max_wro_seq_files 761 && wro > sbi->s_max_wro_seq_files) { 762 atomic_dec(&sbi->s_wro_seq_files); 763 ret = -EBUSY; 764 goto unlock; 765 } 766 |
758 if (i_size_read(inode) < zi->i_max_size) { 759 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); | 767 if (i_size_read(inode) < z->z_capacity) { 768 ret = zonefs_inode_zone_mgmt(inode, 769 REQ_OP_ZONE_OPEN); |
760 if (ret) { 761 atomic_dec(&sbi->s_wro_seq_files); 762 goto unlock; 763 } | 770 if (ret) { 771 atomic_dec(&sbi->s_wro_seq_files); 772 goto unlock; 773 } |
764 zi->i_flags |= ZONEFS_ZONE_OPEN; 765 zonefs_account_active(inode); | 774 z->z_flags |= ZONEFS_ZONE_OPEN; 775 zonefs_inode_account_active(inode); |
766 } 767 } 768 } 769 770 zi->i_wr_refcnt++; 771 772unlock: 773 mutex_unlock(&zi->i_truncate_mutex); --- 13 unchanged lines hidden (view full) --- 787 return zonefs_seq_file_write_open(inode); 788 789 return 0; 790} 791 792static void zonefs_seq_file_write_close(struct inode *inode) 793{ 794 struct zonefs_inode_info *zi = ZONEFS_I(inode); | 776 } 777 } 778 } 779 780 zi->i_wr_refcnt++; 781 782unlock: 783 mutex_unlock(&zi->i_truncate_mutex); --- 13 unchanged lines hidden (view full) --- 797 return zonefs_seq_file_write_open(inode); 798 799 return 0; 800} 801 802static void zonefs_seq_file_write_close(struct inode *inode) 803{ 804 struct zonefs_inode_info *zi = ZONEFS_I(inode); |
805 struct zonefs_zone *z = zonefs_inode_zone(inode); |
|
795 struct super_block *sb = inode->i_sb; 796 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 797 int ret = 0; 798 799 mutex_lock(&zi->i_truncate_mutex); 800 801 zi->i_wr_refcnt--; 802 if (zi->i_wr_refcnt) 803 goto unlock; 804 805 /* 806 * The file zone may not be open anymore (e.g. the file was truncated to 807 * its maximum size or it was fully written). For this case, we only 808 * need to decrement the write open count. 809 */ | 806 struct super_block *sb = inode->i_sb; 807 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 808 int ret = 0; 809 810 mutex_lock(&zi->i_truncate_mutex); 811 812 zi->i_wr_refcnt--; 813 if (zi->i_wr_refcnt) 814 goto unlock; 815 816 /* 817 * The file zone may not be open anymore (e.g. the file was truncated to 818 * its maximum size or it was fully written). For this case, we only 819 * need to decrement the write open count. 820 */ |
810 if (zi->i_flags & ZONEFS_ZONE_OPEN) { 811 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); | 821 if (z->z_flags & ZONEFS_ZONE_OPEN) { 822 ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); |
812 if (ret) { 813 __zonefs_io_error(inode, false); 814 /* 815 * Leaving zones explicitly open may lead to a state 816 * where most zones cannot be written (zone resources 817 * exhausted). So take preventive action by remounting 818 * read-only. 819 */ | 823 if (ret) { 824 __zonefs_io_error(inode, false); 825 /* 826 * Leaving zones explicitly open may lead to a state 827 * where most zones cannot be written (zone resources 828 * exhausted). So take preventive action by remounting 829 * read-only. 830 */ |
820 if (zi->i_flags & ZONEFS_ZONE_OPEN && | 831 if (z->z_flags & ZONEFS_ZONE_OPEN && |
821 !(sb->s_flags & SB_RDONLY)) { 822 zonefs_warn(sb, 823 "closing zone at %llu failed %d\n", | 832 !(sb->s_flags & SB_RDONLY)) { 833 zonefs_warn(sb, 834 "closing zone at %llu failed %d\n", |
824 zi->i_zsector, ret); | 835 z->z_sector, ret); |
825 zonefs_warn(sb, 826 "remounting filesystem read-only\n"); 827 sb->s_flags |= SB_RDONLY; 828 } 829 goto unlock; 830 } 831 | 836 zonefs_warn(sb, 837 "remounting filesystem read-only\n"); 838 sb->s_flags |= SB_RDONLY; 839 } 840 goto unlock; 841 } 842 |
832 zi->i_flags &= ~ZONEFS_ZONE_OPEN; 833 zonefs_account_active(inode); | 843 z->z_flags &= ~ZONEFS_ZONE_OPEN; 844 zonefs_inode_account_active(inode); |
834 } 835 836 atomic_dec(&sbi->s_wro_seq_files); 837 838unlock: 839 mutex_unlock(&zi->i_truncate_mutex); 840} 841 --- 26 unchanged lines hidden --- | 845 } 846 847 atomic_dec(&sbi->s_wro_seq_files); 848 849unlock: 850 mutex_unlock(&zi->i_truncate_mutex); 851} 852 --- 26 unchanged lines hidden --- |