file.c (34422914dc00b291d1c47dbdabe93b154c2f2b25) file.c (aa7f243f32e1d18036ee00d71d3ccfad70ae2121)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Simple file system for zoned block devices exposing zones as files.
4 *
5 * Copyright (C) 2022 Western Digital Corporation or its affiliates.
6 */
7#include <linux/module.h>
8#include <linux/pagemap.h>

--- 15 unchanged lines hidden (view full) ---

24
25#include "trace.h"
26
27static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
28 loff_t length, unsigned int flags,
29 struct iomap *iomap, struct iomap *srcmap)
30{
31 struct zonefs_inode_info *zi = ZONEFS_I(inode);
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Simple file system for zoned block devices exposing zones as files.
4 *
5 * Copyright (C) 2022 Western Digital Corporation or its affiliates.
6 */
7#include <linux/module.h>
8#include <linux/pagemap.h>

--- 15 unchanged lines hidden (view full) ---

24
25#include "trace.h"
26
27static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
28 loff_t length, unsigned int flags,
29 struct iomap *iomap, struct iomap *srcmap)
30{
31 struct zonefs_inode_info *zi = ZONEFS_I(inode);
32 struct zonefs_zone *z = zonefs_inode_zone(inode);
32 struct super_block *sb = inode->i_sb;
33 loff_t isize;
34
35 /*
36 * All blocks are always mapped below EOF. If reading past EOF,
37 * act as if there is a hole up to the file maximum size.
38 */
39 mutex_lock(&zi->i_truncate_mutex);
40 iomap->bdev = inode->i_sb->s_bdev;
41 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
42 isize = i_size_read(inode);
43 if (iomap->offset >= isize) {
44 iomap->type = IOMAP_HOLE;
45 iomap->addr = IOMAP_NULL_ADDR;
46 iomap->length = length;
47 } else {
48 iomap->type = IOMAP_MAPPED;
33 struct super_block *sb = inode->i_sb;
34 loff_t isize;
35
36 /*
37 * All blocks are always mapped below EOF. If reading past EOF,
38 * act as if there is a hole up to the file maximum size.
39 */
40 mutex_lock(&zi->i_truncate_mutex);
41 iomap->bdev = inode->i_sb->s_bdev;
42 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
43 isize = i_size_read(inode);
44 if (iomap->offset >= isize) {
45 iomap->type = IOMAP_HOLE;
46 iomap->addr = IOMAP_NULL_ADDR;
47 iomap->length = length;
48 } else {
49 iomap->type = IOMAP_MAPPED;
49 iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
50 iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
50 iomap->length = isize - iomap->offset;
51 }
52 mutex_unlock(&zi->i_truncate_mutex);
53
54 trace_zonefs_iomap_begin(inode, iomap);
55
56 return 0;
57}
58
59static const struct iomap_ops zonefs_read_iomap_ops = {
60 .iomap_begin = zonefs_read_iomap_begin,
61};
62
63static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
64 loff_t length, unsigned int flags,
65 struct iomap *iomap, struct iomap *srcmap)
66{
67 struct zonefs_inode_info *zi = ZONEFS_I(inode);
51 iomap->length = isize - iomap->offset;
52 }
53 mutex_unlock(&zi->i_truncate_mutex);
54
55 trace_zonefs_iomap_begin(inode, iomap);
56
57 return 0;
58}
59
60static const struct iomap_ops zonefs_read_iomap_ops = {
61 .iomap_begin = zonefs_read_iomap_begin,
62};
63
64static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
65 loff_t length, unsigned int flags,
66 struct iomap *iomap, struct iomap *srcmap)
67{
68 struct zonefs_inode_info *zi = ZONEFS_I(inode);
69 struct zonefs_zone *z = zonefs_inode_zone(inode);
68 struct super_block *sb = inode->i_sb;
69 loff_t isize;
70
71 /* All write I/Os should always be within the file maximum size */
70 struct super_block *sb = inode->i_sb;
71 loff_t isize;
72
73 /* All write I/Os should always be within the file maximum size */
72 if (WARN_ON_ONCE(offset + length > zi->i_max_size))
74 if (WARN_ON_ONCE(offset + length > z->z_capacity))
73 return -EIO;
74
75 /*
76 * Sequential zones can only accept direct writes. This is already
77 * checked when writes are issued, so warn if we see a page writeback
78 * operation.
79 */
75 return -EIO;
76
77 /*
78 * Sequential zones can only accept direct writes. This is already
79 * checked when writes are issued, so warn if we see a page writeback
80 * operation.
81 */
80 if (WARN_ON_ONCE(zonefs_zone_is_seq(zi) && !(flags & IOMAP_DIRECT)))
82 if (WARN_ON_ONCE(zonefs_zone_is_seq(z) && !(flags & IOMAP_DIRECT)))
81 return -EIO;
82
83 /*
84 * For conventional zones, all blocks are always mapped. For sequential
85 * zones, all blocks after always mapped below the inode size (zone
86 * write pointer) and unwriten beyond.
87 */
88 mutex_lock(&zi->i_truncate_mutex);
89 iomap->bdev = inode->i_sb->s_bdev;
90 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
83 return -EIO;
84
85 /*
86 * For conventional zones, all blocks are always mapped. For sequential
87 * zones, all blocks after always mapped below the inode size (zone
88 * write pointer) and unwriten beyond.
89 */
90 mutex_lock(&zi->i_truncate_mutex);
91 iomap->bdev = inode->i_sb->s_bdev;
92 iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
91 iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
93 iomap->addr = (z->z_sector << SECTOR_SHIFT) + iomap->offset;
92 isize = i_size_read(inode);
93 if (iomap->offset >= isize) {
94 iomap->type = IOMAP_UNWRITTEN;
94 isize = i_size_read(inode);
95 if (iomap->offset >= isize) {
96 iomap->type = IOMAP_UNWRITTEN;
95 iomap->length = zi->i_max_size - iomap->offset;
97 iomap->length = z->z_capacity - iomap->offset;
96 } else {
97 iomap->type = IOMAP_MAPPED;
98 iomap->length = isize - iomap->offset;
99 }
100 mutex_unlock(&zi->i_truncate_mutex);
101
102 trace_zonefs_iomap_begin(inode, iomap);
103

--- 16 unchanged lines hidden (view full) ---

120
121/*
122 * Map blocks for page writeback. This is used only on conventional zone files,
123 * which implies that the page range can only be within the fixed inode size.
124 */
125static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
126 struct inode *inode, loff_t offset)
127{
98 } else {
99 iomap->type = IOMAP_MAPPED;
100 iomap->length = isize - iomap->offset;
101 }
102 mutex_unlock(&zi->i_truncate_mutex);
103
104 trace_zonefs_iomap_begin(inode, iomap);
105

--- 16 unchanged lines hidden (view full) ---

122
123/*
124 * Map blocks for page writeback. This is used only on conventional zone files,
125 * which implies that the page range can only be within the fixed inode size.
126 */
127static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
128 struct inode *inode, loff_t offset)
129{
128 struct zonefs_inode_info *zi = ZONEFS_I(inode);
130 struct zonefs_zone *z = zonefs_inode_zone(inode);
129
131
130 if (WARN_ON_ONCE(zonefs_zone_is_seq(zi)))
132 if (WARN_ON_ONCE(zonefs_zone_is_seq(z)))
131 return -EIO;
132 if (WARN_ON_ONCE(offset >= i_size_read(inode)))
133 return -EIO;
134
135 /* If the mapping is already OK, nothing needs to be done */
136 if (offset >= wpc->iomap.offset &&
137 offset < wpc->iomap.offset + wpc->iomap.length)
138 return 0;
139
133 return -EIO;
134 if (WARN_ON_ONCE(offset >= i_size_read(inode)))
135 return -EIO;
136
137 /* If the mapping is already OK, nothing needs to be done */
138 if (offset >= wpc->iomap.offset &&
139 offset < wpc->iomap.offset + wpc->iomap.length)
140 return 0;
141
140 return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
142 return zonefs_write_iomap_begin(inode, offset,
143 z->z_capacity - offset,
141 IOMAP_WRITE, &wpc->iomap, NULL);
142}
143
144static const struct iomap_writeback_ops zonefs_writeback_ops = {
145 .map_blocks = zonefs_write_map_blocks,
146};
147
148static int zonefs_writepages(struct address_space *mapping,

--- 31 unchanged lines hidden (view full) ---

180 .error_remove_page = generic_error_remove_page,
181 .direct_IO = noop_direct_IO,
182 .swap_activate = zonefs_swap_activate,
183};
184
185int zonefs_file_truncate(struct inode *inode, loff_t isize)
186{
187 struct zonefs_inode_info *zi = ZONEFS_I(inode);
144 IOMAP_WRITE, &wpc->iomap, NULL);
145}
146
147static const struct iomap_writeback_ops zonefs_writeback_ops = {
148 .map_blocks = zonefs_write_map_blocks,
149};
150
151static int zonefs_writepages(struct address_space *mapping,

--- 31 unchanged lines hidden (view full) ---

183 .error_remove_page = generic_error_remove_page,
184 .direct_IO = noop_direct_IO,
185 .swap_activate = zonefs_swap_activate,
186};
187
188int zonefs_file_truncate(struct inode *inode, loff_t isize)
189{
190 struct zonefs_inode_info *zi = ZONEFS_I(inode);
191 struct zonefs_zone *z = zonefs_inode_zone(inode);
188 loff_t old_isize;
189 enum req_op op;
190 int ret = 0;
191
192 /*
193 * Only sequential zone files can be truncated and truncation is allowed
194 * only down to a 0 size, which is equivalent to a zone reset, and to
195 * the maximum file size, which is equivalent to a zone finish.
196 */
192 loff_t old_isize;
193 enum req_op op;
194 int ret = 0;
195
196 /*
197 * Only sequential zone files can be truncated and truncation is allowed
198 * only down to a 0 size, which is equivalent to a zone reset, and to
199 * the maximum file size, which is equivalent to a zone finish.
200 */
197 if (!zonefs_zone_is_seq(zi))
201 if (!zonefs_zone_is_seq(z))
198 return -EPERM;
199
200 if (!isize)
201 op = REQ_OP_ZONE_RESET;
202 return -EPERM;
203
204 if (!isize)
205 op = REQ_OP_ZONE_RESET;
202 else if (isize == zi->i_max_size)
206 else if (isize == z->z_capacity)
203 op = REQ_OP_ZONE_FINISH;
204 else
205 return -EPERM;
206
207 inode_dio_wait(inode);
208
209 /* Serialize against page faults */
210 filemap_invalidate_lock(inode->i_mapping);
211
212 /* Serialize against zonefs_iomap_begin() */
213 mutex_lock(&zi->i_truncate_mutex);
214
215 old_isize = i_size_read(inode);
216 if (isize == old_isize)
217 goto unlock;
218
207 op = REQ_OP_ZONE_FINISH;
208 else
209 return -EPERM;
210
211 inode_dio_wait(inode);
212
213 /* Serialize against page faults */
214 filemap_invalidate_lock(inode->i_mapping);
215
216 /* Serialize against zonefs_iomap_begin() */
217 mutex_lock(&zi->i_truncate_mutex);
218
219 old_isize = i_size_read(inode);
220 if (isize == old_isize)
221 goto unlock;
222
219 ret = zonefs_zone_mgmt(inode, op);
223 ret = zonefs_inode_zone_mgmt(inode, op);
220 if (ret)
221 goto unlock;
222
223 /*
224 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
225 * take care of open zones.
226 */
224 if (ret)
225 goto unlock;
226
227 /*
228 * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
229 * take care of open zones.
230 */
227 if (zi->i_flags & ZONEFS_ZONE_OPEN) {
231 if (z->z_flags & ZONEFS_ZONE_OPEN) {
228 /*
229 * Truncating a zone to EMPTY or FULL is the equivalent of
230 * closing the zone. For a truncation to 0, we need to
231 * re-open the zone to ensure new writes can be processed.
232 * For a truncation to the maximum file size, the zone is
233 * closed and writes cannot be accepted anymore, so clear
234 * the open flag.
235 */
236 if (!isize)
232 /*
233 * Truncating a zone to EMPTY or FULL is the equivalent of
234 * closing the zone. For a truncation to 0, we need to
235 * re-open the zone to ensure new writes can be processed.
236 * For a truncation to the maximum file size, the zone is
237 * closed and writes cannot be accepted anymore, so clear
238 * the open flag.
239 */
240 if (!isize)
237 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
241 ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
238 else
242 else
239 zi->i_flags &= ~ZONEFS_ZONE_OPEN;
243 z->z_flags &= ~ZONEFS_ZONE_OPEN;
240 }
241
242 zonefs_update_stats(inode, isize);
243 truncate_setsize(inode, isize);
244 }
245
246 zonefs_update_stats(inode, isize);
247 truncate_setsize(inode, isize);
244 zi->i_wpoffset = isize;
245 zonefs_account_active(inode);
248 z->z_wpoffset = isize;
249 zonefs_inode_account_active(inode);
246
247unlock:
248 mutex_unlock(&zi->i_truncate_mutex);
249 filemap_invalidate_unlock(inode->i_mapping);
250
251 return ret;
252}
253

--- 90 unchanged lines hidden (view full) ---

344 struct inode *inode = file_inode(iocb->ki_filp);
345 struct zonefs_inode_info *zi = ZONEFS_I(inode);
346
347 if (error) {
348 zonefs_io_error(inode, true);
349 return error;
350 }
351
250
251unlock:
252 mutex_unlock(&zi->i_truncate_mutex);
253 filemap_invalidate_unlock(inode->i_mapping);
254
255 return ret;
256}
257

--- 90 unchanged lines hidden (view full) ---

348 struct inode *inode = file_inode(iocb->ki_filp);
349 struct zonefs_inode_info *zi = ZONEFS_I(inode);
350
351 if (error) {
352 zonefs_io_error(inode, true);
353 return error;
354 }
355
352 if (size && zonefs_zone_is_seq(zi)) {
356 if (size && zonefs_inode_is_seq(inode)) {
353 /*
354 * Note that we may be seeing completions out of order,
355 * but that is not a problem since a write completed
356 * successfully necessarily means that all preceding writes
357 * were also successful. So we can safely increase the inode
358 * size to the write end location.
359 */
360 mutex_lock(&zi->i_truncate_mutex);

--- 9 unchanged lines hidden (view full) ---

370
371static const struct iomap_dio_ops zonefs_write_dio_ops = {
372 .end_io = zonefs_file_write_dio_end_io,
373};
374
375static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
376{
377 struct inode *inode = file_inode(iocb->ki_filp);
357 /*
358 * Note that we may be seeing completions out of order,
359 * but that is not a problem since a write completed
360 * successfully necessarily means that all preceding writes
361 * were also successful. So we can safely increase the inode
362 * size to the write end location.
363 */
364 mutex_lock(&zi->i_truncate_mutex);

--- 9 unchanged lines hidden (view full) ---

374
375static const struct iomap_dio_ops zonefs_write_dio_ops = {
376 .end_io = zonefs_file_write_dio_end_io,
377};
378
379static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
380{
381 struct inode *inode = file_inode(iocb->ki_filp);
378 struct zonefs_inode_info *zi = ZONEFS_I(inode);
382 struct zonefs_zone *z = zonefs_inode_zone(inode);
379 struct block_device *bdev = inode->i_sb->s_bdev;
380 unsigned int max = bdev_max_zone_append_sectors(bdev);
381 struct bio *bio;
382 ssize_t size;
383 int nr_pages;
384 ssize_t ret;
385
386 max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
387 iov_iter_truncate(from, max);
388
389 nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
390 if (!nr_pages)
391 return 0;
392
393 bio = bio_alloc(bdev, nr_pages,
394 REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
383 struct block_device *bdev = inode->i_sb->s_bdev;
384 unsigned int max = bdev_max_zone_append_sectors(bdev);
385 struct bio *bio;
386 ssize_t size;
387 int nr_pages;
388 ssize_t ret;
389
390 max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
391 iov_iter_truncate(from, max);
392
393 nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
394 if (!nr_pages)
395 return 0;
396
397 bio = bio_alloc(bdev, nr_pages,
398 REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
395 bio->bi_iter.bi_sector = zi->i_zsector;
399 bio->bi_iter.bi_sector = z->z_sector;
396 bio->bi_ioprio = iocb->ki_ioprio;
397 if (iocb_is_dsync(iocb))
398 bio->bi_opf |= REQ_FUA;
399
400 ret = bio_iov_iter_get_pages(bio, from);
401 if (unlikely(ret))
402 goto out_release;
403

--- 8 unchanged lines hidden (view full) ---

412 /*
413 * If the file zone was written underneath the file system, the zone
414 * write pointer may not be where we expect it to be, but the zone
415 * append write can still succeed. So check manually that we wrote where
416 * we intended to, that is, at zi->i_wpoffset.
417 */
418 if (!ret) {
419 sector_t wpsector =
400 bio->bi_ioprio = iocb->ki_ioprio;
401 if (iocb_is_dsync(iocb))
402 bio->bi_opf |= REQ_FUA;
403
404 ret = bio_iov_iter_get_pages(bio, from);
405 if (unlikely(ret))
406 goto out_release;
407

--- 8 unchanged lines hidden (view full) ---

416 /*
417 * If the file zone was written underneath the file system, the zone
418 * write pointer may not be where we expect it to be, but the zone
419 * append write can still succeed. So check manually that we wrote where
420 * we intended to, that is, at zi->i_wpoffset.
421 */
422 if (!ret) {
423 sector_t wpsector =
420 zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT);
424 z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT);
421
422 if (bio->bi_iter.bi_sector != wpsector) {
423 zonefs_warn(inode->i_sb,
424 "Corrupted write pointer %llu for zone at %llu\n",
425
426 if (bio->bi_iter.bi_sector != wpsector) {
427 zonefs_warn(inode->i_sb,
428 "Corrupted write pointer %llu for zone at %llu\n",
425 wpsector, zi->i_zsector);
429 wpsector, z->z_sector);
426 ret = -EIO;
427 }
428 }
429
430 zonefs_file_write_dio_end_io(iocb, size, ret, 0);
431 trace_zonefs_file_dio_append(inode, size, ret);
432
433out_release:

--- 11 unchanged lines hidden (view full) ---

445/*
446 * Do not exceed the LFS limits nor the file zone size. If pos is under the
447 * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
448 */
449static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
450 loff_t count)
451{
452 struct inode *inode = file_inode(file);
430 ret = -EIO;
431 }
432 }
433
434 zonefs_file_write_dio_end_io(iocb, size, ret, 0);
435 trace_zonefs_file_dio_append(inode, size, ret);
436
437out_release:

--- 11 unchanged lines hidden (view full) ---

449/*
450 * Do not exceed the LFS limits nor the file zone size. If pos is under the
451 * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
452 */
453static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
454 loff_t count)
455{
456 struct inode *inode = file_inode(file);
453 struct zonefs_inode_info *zi = ZONEFS_I(inode);
457 struct zonefs_zone *z = zonefs_inode_zone(inode);
454 loff_t limit = rlimit(RLIMIT_FSIZE);
458 loff_t limit = rlimit(RLIMIT_FSIZE);
455 loff_t max_size = zi->i_max_size;
459 loff_t max_size = z->z_capacity;
456
457 if (limit != RLIM_INFINITY) {
458 if (pos >= limit) {
459 send_sig(SIGXFSZ, current, 0);
460 return -EFBIG;
461 }
462 count = min(count, limit - pos);
463 }

--- 7 unchanged lines hidden (view full) ---

471 return min(count, max_size - pos);
472}
473
474static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
475{
476 struct file *file = iocb->ki_filp;
477 struct inode *inode = file_inode(file);
478 struct zonefs_inode_info *zi = ZONEFS_I(inode);
460
461 if (limit != RLIM_INFINITY) {
462 if (pos >= limit) {
463 send_sig(SIGXFSZ, current, 0);
464 return -EFBIG;
465 }
466 count = min(count, limit - pos);
467 }

--- 7 unchanged lines hidden (view full) ---

475 return min(count, max_size - pos);
476}
477
478static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
479{
480 struct file *file = iocb->ki_filp;
481 struct inode *inode = file_inode(file);
482 struct zonefs_inode_info *zi = ZONEFS_I(inode);
483 struct zonefs_zone *z = zonefs_inode_zone(inode);
479 loff_t count;
480
481 if (IS_SWAPFILE(inode))
482 return -ETXTBSY;
483
484 if (!iov_iter_count(from))
485 return 0;
486
487 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
488 return -EINVAL;
489
490 if (iocb->ki_flags & IOCB_APPEND) {
484 loff_t count;
485
486 if (IS_SWAPFILE(inode))
487 return -ETXTBSY;
488
489 if (!iov_iter_count(from))
490 return 0;
491
492 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
493 return -EINVAL;
494
495 if (iocb->ki_flags & IOCB_APPEND) {
491 if (zonefs_zone_is_cnv(zi))
496 if (zonefs_zone_is_cnv(z))
492 return -EINVAL;
493 mutex_lock(&zi->i_truncate_mutex);
497 return -EINVAL;
498 mutex_lock(&zi->i_truncate_mutex);
494 iocb->ki_pos = zi->i_wpoffset;
499 iocb->ki_pos = z->z_wpoffset;
495 mutex_unlock(&zi->i_truncate_mutex);
496 }
497
498 count = zonefs_write_check_limits(file, iocb->ki_pos,
499 iov_iter_count(from));
500 if (count < 0)
501 return count;
502

--- 10 unchanged lines hidden (view full) ---

513 * elevator feature is being used (e.g. mq-deadline). The block layer always
514 * automatically select such an elevator for zoned block devices during the
515 * device initialization.
516 */
517static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
518{
519 struct inode *inode = file_inode(iocb->ki_filp);
520 struct zonefs_inode_info *zi = ZONEFS_I(inode);
500 mutex_unlock(&zi->i_truncate_mutex);
501 }
502
503 count = zonefs_write_check_limits(file, iocb->ki_pos,
504 iov_iter_count(from));
505 if (count < 0)
506 return count;
507

--- 10 unchanged lines hidden (view full) ---

518 * elevator feature is being used (e.g. mq-deadline). The block layer always
519 * automatically select such an elevator for zoned block devices during the
520 * device initialization.
521 */
522static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
523{
524 struct inode *inode = file_inode(iocb->ki_filp);
525 struct zonefs_inode_info *zi = ZONEFS_I(inode);
526 struct zonefs_zone *z = zonefs_inode_zone(inode);
521 struct super_block *sb = inode->i_sb;
522 bool sync = is_sync_kiocb(iocb);
523 bool append = false;
524 ssize_t ret, count;
525
526 /*
527 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
528 * as this can cause write reordering (e.g. the first aio gets EAGAIN
529 * on the inode lock but the second goes through but is now unaligned).
530 */
527 struct super_block *sb = inode->i_sb;
528 bool sync = is_sync_kiocb(iocb);
529 bool append = false;
530 ssize_t ret, count;
531
532 /*
533 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
534 * as this can cause write reordering (e.g. the first aio gets EAGAIN
535 * on the inode lock but the second goes through but is now unaligned).
536 */
531 if (zonefs_zone_is_seq(zi) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
537 if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
532 return -EOPNOTSUPP;
533
534 if (iocb->ki_flags & IOCB_NOWAIT) {
535 if (!inode_trylock(inode))
536 return -EAGAIN;
537 } else {
538 inode_lock(inode);
539 }

--- 5 unchanged lines hidden (view full) ---

545 }
546
547 if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
548 ret = -EINVAL;
549 goto inode_unlock;
550 }
551
552 /* Enforce sequential writes (append only) in sequential zones */
538 return -EOPNOTSUPP;
539
540 if (iocb->ki_flags & IOCB_NOWAIT) {
541 if (!inode_trylock(inode))
542 return -EAGAIN;
543 } else {
544 inode_lock(inode);
545 }

--- 5 unchanged lines hidden (view full) ---

551 }
552
553 if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
554 ret = -EINVAL;
555 goto inode_unlock;
556 }
557
558 /* Enforce sequential writes (append only) in sequential zones */
553 if (zonefs_zone_is_seq(zi)) {
559 if (zonefs_zone_is_seq(z)) {
554 mutex_lock(&zi->i_truncate_mutex);
560 mutex_lock(&zi->i_truncate_mutex);
555 if (iocb->ki_pos != zi->i_wpoffset) {
561 if (iocb->ki_pos != z->z_wpoffset) {
556 mutex_unlock(&zi->i_truncate_mutex);
557 ret = -EINVAL;
558 goto inode_unlock;
559 }
560 mutex_unlock(&zi->i_truncate_mutex);
561 append = sync;
562 }
563
564 if (append)
565 ret = zonefs_file_dio_append(iocb, from);
566 else
567 ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
568 &zonefs_write_dio_ops, 0, NULL, 0);
562 mutex_unlock(&zi->i_truncate_mutex);
563 ret = -EINVAL;
564 goto inode_unlock;
565 }
566 mutex_unlock(&zi->i_truncate_mutex);
567 append = sync;
568 }
569
570 if (append)
571 ret = zonefs_file_dio_append(iocb, from);
572 else
573 ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
574 &zonefs_write_dio_ops, 0, NULL, 0);
569 if (zonefs_zone_is_seq(zi) &&
575 if (zonefs_zone_is_seq(z) &&
570 (ret > 0 || ret == -EIOCBQUEUED)) {
571 if (ret > 0)
572 count = ret;
573
574 /*
575 * Update the zone write pointer offset assuming the write
576 * operation succeeded. If it did not, the error recovery path
577 * will correct it. Also do active seq file accounting.
578 */
579 mutex_lock(&zi->i_truncate_mutex);
576 (ret > 0 || ret == -EIOCBQUEUED)) {
577 if (ret > 0)
578 count = ret;
579
580 /*
581 * Update the zone write pointer offset assuming the write
582 * operation succeeded. If it did not, the error recovery path
583 * will correct it. Also do active seq file accounting.
584 */
585 mutex_lock(&zi->i_truncate_mutex);
580 zi->i_wpoffset += count;
581 zonefs_account_active(inode);
586 z->z_wpoffset += count;
587 zonefs_inode_account_active(inode);
582 mutex_unlock(&zi->i_truncate_mutex);
583 }
584
585inode_unlock:
586 inode_unlock(inode);
587
588 return ret;
589}

--- 34 unchanged lines hidden (view full) ---

624 ret = generic_write_sync(iocb, ret);
625
626 return ret;
627}
628
629static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
630{
631 struct inode *inode = file_inode(iocb->ki_filp);
588 mutex_unlock(&zi->i_truncate_mutex);
589 }
590
591inode_unlock:
592 inode_unlock(inode);
593
594 return ret;
595}

--- 34 unchanged lines hidden (view full) ---

630 ret = generic_write_sync(iocb, ret);
631
632 return ret;
633}
634
635static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
636{
637 struct inode *inode = file_inode(iocb->ki_filp);
638 struct zonefs_zone *z = zonefs_inode_zone(inode);
632
633 if (unlikely(IS_IMMUTABLE(inode)))
634 return -EPERM;
635
636 if (sb_rdonly(inode->i_sb))
637 return -EROFS;
638
639
640 if (unlikely(IS_IMMUTABLE(inode)))
641 return -EPERM;
642
643 if (sb_rdonly(inode->i_sb))
644 return -EROFS;
645
639 /* Write operations beyond the zone size are not allowed */
640 if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
646 /* Write operations beyond the zone capacity are not allowed */
647 if (iocb->ki_pos >= z->z_capacity)
641 return -EFBIG;
642
643 if (iocb->ki_flags & IOCB_DIRECT) {
644 ssize_t ret = zonefs_file_dio_write(iocb, from);
645
646 if (ret != -ENOTBLK)
647 return ret;
648 }

--- 15 unchanged lines hidden (view full) ---

664static const struct iomap_dio_ops zonefs_read_dio_ops = {
665 .end_io = zonefs_file_read_dio_end_io,
666};
667
668static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
669{
670 struct inode *inode = file_inode(iocb->ki_filp);
671 struct zonefs_inode_info *zi = ZONEFS_I(inode);
648 return -EFBIG;
649
650 if (iocb->ki_flags & IOCB_DIRECT) {
651 ssize_t ret = zonefs_file_dio_write(iocb, from);
652
653 if (ret != -ENOTBLK)
654 return ret;
655 }

--- 15 unchanged lines hidden (view full) ---

671static const struct iomap_dio_ops zonefs_read_dio_ops = {
672 .end_io = zonefs_file_read_dio_end_io,
673};
674
675static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
676{
677 struct inode *inode = file_inode(iocb->ki_filp);
678 struct zonefs_inode_info *zi = ZONEFS_I(inode);
679 struct zonefs_zone *z = zonefs_inode_zone(inode);
672 struct super_block *sb = inode->i_sb;
673 loff_t isize;
674 ssize_t ret;
675
676 /* Offline zones cannot be read */
677 if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
678 return -EPERM;
679
680 struct super_block *sb = inode->i_sb;
681 loff_t isize;
682 ssize_t ret;
683
684 /* Offline zones cannot be read */
685 if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
686 return -EPERM;
687
680 if (iocb->ki_pos >= zi->i_max_size)
688 if (iocb->ki_pos >= z->z_capacity)
681 return 0;
682
683 if (iocb->ki_flags & IOCB_NOWAIT) {
684 if (!inode_trylock_shared(inode))
685 return -EAGAIN;
686 } else {
687 inode_lock_shared(inode);
688 }

--- 44 unchanged lines hidden (view full) ---

733 return false;
734
735 return true;
736}
737
738static int zonefs_seq_file_write_open(struct inode *inode)
739{
740 struct zonefs_inode_info *zi = ZONEFS_I(inode);
689 return 0;
690
691 if (iocb->ki_flags & IOCB_NOWAIT) {
692 if (!inode_trylock_shared(inode))
693 return -EAGAIN;
694 } else {
695 inode_lock_shared(inode);
696 }

--- 44 unchanged lines hidden (view full) ---

741 return false;
742
743 return true;
744}
745
746static int zonefs_seq_file_write_open(struct inode *inode)
747{
748 struct zonefs_inode_info *zi = ZONEFS_I(inode);
749 struct zonefs_zone *z = zonefs_inode_zone(inode);
741 int ret = 0;
742
743 mutex_lock(&zi->i_truncate_mutex);
744
745 if (!zi->i_wr_refcnt) {
746 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
747 unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
748
749 if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
750
751 if (sbi->s_max_wro_seq_files
752 && wro > sbi->s_max_wro_seq_files) {
753 atomic_dec(&sbi->s_wro_seq_files);
754 ret = -EBUSY;
755 goto unlock;
756 }
757
750 int ret = 0;
751
752 mutex_lock(&zi->i_truncate_mutex);
753
754 if (!zi->i_wr_refcnt) {
755 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
756 unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files);
757
758 if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
759
760 if (sbi->s_max_wro_seq_files
761 && wro > sbi->s_max_wro_seq_files) {
762 atomic_dec(&sbi->s_wro_seq_files);
763 ret = -EBUSY;
764 goto unlock;
765 }
766
758 if (i_size_read(inode) < zi->i_max_size) {
759 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
767 if (i_size_read(inode) < z->z_capacity) {
768 ret = zonefs_inode_zone_mgmt(inode,
769 REQ_OP_ZONE_OPEN);
760 if (ret) {
761 atomic_dec(&sbi->s_wro_seq_files);
762 goto unlock;
763 }
770 if (ret) {
771 atomic_dec(&sbi->s_wro_seq_files);
772 goto unlock;
773 }
764 zi->i_flags |= ZONEFS_ZONE_OPEN;
765 zonefs_account_active(inode);
774 z->z_flags |= ZONEFS_ZONE_OPEN;
775 zonefs_inode_account_active(inode);
766 }
767 }
768 }
769
770 zi->i_wr_refcnt++;
771
772unlock:
773 mutex_unlock(&zi->i_truncate_mutex);

--- 13 unchanged lines hidden (view full) ---

787 return zonefs_seq_file_write_open(inode);
788
789 return 0;
790}
791
792static void zonefs_seq_file_write_close(struct inode *inode)
793{
794 struct zonefs_inode_info *zi = ZONEFS_I(inode);
776 }
777 }
778 }
779
780 zi->i_wr_refcnt++;
781
782unlock:
783 mutex_unlock(&zi->i_truncate_mutex);

--- 13 unchanged lines hidden (view full) ---

797 return zonefs_seq_file_write_open(inode);
798
799 return 0;
800}
801
802static void zonefs_seq_file_write_close(struct inode *inode)
803{
804 struct zonefs_inode_info *zi = ZONEFS_I(inode);
805 struct zonefs_zone *z = zonefs_inode_zone(inode);
795 struct super_block *sb = inode->i_sb;
796 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
797 int ret = 0;
798
799 mutex_lock(&zi->i_truncate_mutex);
800
801 zi->i_wr_refcnt--;
802 if (zi->i_wr_refcnt)
803 goto unlock;
804
805 /*
806 * The file zone may not be open anymore (e.g. the file was truncated to
807 * its maximum size or it was fully written). For this case, we only
808 * need to decrement the write open count.
809 */
806 struct super_block *sb = inode->i_sb;
807 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
808 int ret = 0;
809
810 mutex_lock(&zi->i_truncate_mutex);
811
812 zi->i_wr_refcnt--;
813 if (zi->i_wr_refcnt)
814 goto unlock;
815
816 /*
817 * The file zone may not be open anymore (e.g. the file was truncated to
818 * its maximum size or it was fully written). For this case, we only
819 * need to decrement the write open count.
820 */
810 if (zi->i_flags & ZONEFS_ZONE_OPEN) {
811 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
821 if (z->z_flags & ZONEFS_ZONE_OPEN) {
822 ret = zonefs_inode_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
812 if (ret) {
813 __zonefs_io_error(inode, false);
814 /*
815 * Leaving zones explicitly open may lead to a state
816 * where most zones cannot be written (zone resources
817 * exhausted). So take preventive action by remounting
818 * read-only.
819 */
823 if (ret) {
824 __zonefs_io_error(inode, false);
825 /*
826 * Leaving zones explicitly open may lead to a state
827 * where most zones cannot be written (zone resources
828 * exhausted). So take preventive action by remounting
829 * read-only.
830 */
820 if (zi->i_flags & ZONEFS_ZONE_OPEN &&
831 if (z->z_flags & ZONEFS_ZONE_OPEN &&
821 !(sb->s_flags & SB_RDONLY)) {
822 zonefs_warn(sb,
823 "closing zone at %llu failed %d\n",
832 !(sb->s_flags & SB_RDONLY)) {
833 zonefs_warn(sb,
834 "closing zone at %llu failed %d\n",
824 zi->i_zsector, ret);
835 z->z_sector, ret);
825 zonefs_warn(sb,
826 "remounting filesystem read-only\n");
827 sb->s_flags |= SB_RDONLY;
828 }
829 goto unlock;
830 }
831
836 zonefs_warn(sb,
837 "remounting filesystem read-only\n");
838 sb->s_flags |= SB_RDONLY;
839 }
840 goto unlock;
841 }
842
832 zi->i_flags &= ~ZONEFS_ZONE_OPEN;
833 zonefs_account_active(inode);
843 z->z_flags &= ~ZONEFS_ZONE_OPEN;
844 zonefs_inode_account_active(inode);
834 }
835
836 atomic_dec(&sbi->s_wro_seq_files);
837
838unlock:
839 mutex_unlock(&zi->i_truncate_mutex);
840}
841

--- 26 unchanged lines hidden ---
845 }
846
847 atomic_dec(&sbi->s_wro_seq_files);
848
849unlock:
850 mutex_unlock(&zi->i_truncate_mutex);
851}
852

--- 26 unchanged lines hidden ---