super.c (34422914dc00b291d1c47dbdabe93b154c2f2b25) super.c (aa7f243f32e1d18036ee00d71d3ccfad70ae2121)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Simple file system for zoned block devices exposing zones as files.
4 *
5 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
6 */
7#include <linux/module.h>
8#include <linux/pagemap.h>

--- 14 unchanged lines hidden (view full) ---

23#include <linux/task_io_accounting_ops.h>
24
25#include "zonefs.h"
26
27#define CREATE_TRACE_POINTS
28#include "trace.h"
29
30/*
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Simple file system for zoned block devices exposing zones as files.
4 *
5 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
6 */
7#include <linux/module.h>
8#include <linux/pagemap.h>

--- 14 unchanged lines hidden (view full) ---

23#include <linux/task_io_accounting_ops.h>
24
25#include "zonefs.h"
26
27#define CREATE_TRACE_POINTS
28#include "trace.h"
29
30/*
31 * Manage the active zone count. Called with zi->i_truncate_mutex held.
31 * Get the name of a zone group directory.
32 */
32 */
33void zonefs_account_active(struct inode *inode)
33static const char *zonefs_zgroup_name(enum zonefs_ztype ztype)
34{
34{
35 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
36 struct zonefs_inode_info *zi = ZONEFS_I(inode);
35 switch (ztype) {
36 case ZONEFS_ZTYPE_CNV:
37 return "cnv";
38 case ZONEFS_ZTYPE_SEQ:
39 return "seq";
40 default:
41 WARN_ON_ONCE(1);
42 return "???";
43 }
44}
37
45
38 lockdep_assert_held(&zi->i_truncate_mutex);
46/*
47 * Manage the active zone count.
48 */
49static void zonefs_account_active(struct super_block *sb,
50 struct zonefs_zone *z)
51{
52 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
39
53
40 if (zonefs_zone_is_cnv(zi))
54 if (zonefs_zone_is_cnv(z))
41 return;
42
43 /*
44 * For zones that transitioned to the offline or readonly condition,
45 * we only need to clear the active state.
46 */
55 return;
56
57 /*
58 * For zones that transitioned to the offline or readonly condition,
59 * we only need to clear the active state.
60 */
47 if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
61 if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY))
48 goto out;
49
50 /*
51 * If the zone is active, that is, if it is explicitly open or
52 * partially written, check if it was already accounted as active.
53 */
62 goto out;
63
64 /*
65 * If the zone is active, that is, if it is explicitly open or
66 * partially written, check if it was already accounted as active.
67 */
54 if ((zi->i_flags & ZONEFS_ZONE_OPEN) ||
55 (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) {
56 if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) {
57 zi->i_flags |= ZONEFS_ZONE_ACTIVE;
68 if ((z->z_flags & ZONEFS_ZONE_OPEN) ||
69 (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) {
70 if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) {
71 z->z_flags |= ZONEFS_ZONE_ACTIVE;
58 atomic_inc(&sbi->s_active_seq_files);
59 }
60 return;
61 }
62
63out:
64 /* The zone is not active. If it was, update the active count */
72 atomic_inc(&sbi->s_active_seq_files);
73 }
74 return;
75 }
76
77out:
78 /* The zone is not active. If it was, update the active count */
65 if (zi->i_flags & ZONEFS_ZONE_ACTIVE) {
66 zi->i_flags &= ~ZONEFS_ZONE_ACTIVE;
79 if (z->z_flags & ZONEFS_ZONE_ACTIVE) {
80 z->z_flags &= ~ZONEFS_ZONE_ACTIVE;
67 atomic_dec(&sbi->s_active_seq_files);
68 }
69}
70
81 atomic_dec(&sbi->s_active_seq_files);
82 }
83}
84
71int zonefs_zone_mgmt(struct inode *inode, enum req_op op)
85/*
86 * Manage the active zone count. Called with zi->i_truncate_mutex held.
87 */
88void zonefs_inode_account_active(struct inode *inode)
72{
89{
73 struct zonefs_inode_info *zi = ZONEFS_I(inode);
74 int ret;
90 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
75
91
76 lockdep_assert_held(&zi->i_truncate_mutex);
92 return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode));
93}
77
94
95/*
96 * Execute a zone management operation.
97 */
98static int zonefs_zone_mgmt(struct super_block *sb,
99 struct zonefs_zone *z, enum req_op op)
100{
101 int ret;
102
78 /*
79 * With ZNS drives, closing an explicitly open zone that has not been
80 * written will change the zone state to "closed", that is, the zone
81 * will remain active. Since this can then cause failure of explicit
82 * open operation on other zones if the drive active zone resources
83 * are exceeded, make sure that the zone does not remain active by
84 * resetting it.
85 */
103 /*
104 * With ZNS drives, closing an explicitly open zone that has not been
105 * written will change the zone state to "closed", that is, the zone
106 * will remain active. Since this can then cause failure of explicit
107 * open operation on other zones if the drive active zone resources
108 * are exceeded, make sure that the zone does not remain active by
109 * resetting it.
110 */
86 if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset)
111 if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset)
87 op = REQ_OP_ZONE_RESET;
88
112 op = REQ_OP_ZONE_RESET;
113
89 trace_zonefs_zone_mgmt(inode, op);
90 ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
91 zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS);
114 trace_zonefs_zone_mgmt(sb, z, op);
115 ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector,
116 z->z_size >> SECTOR_SHIFT, GFP_NOFS);
92 if (ret) {
117 if (ret) {
93 zonefs_err(inode->i_sb,
118 zonefs_err(sb,
94 "Zone management operation %s at %llu failed %d\n",
119 "Zone management operation %s at %llu failed %d\n",
95 blk_op_str(op), zi->i_zsector, ret);
120 blk_op_str(op), z->z_sector, ret);
96 return ret;
97 }
98
99 return 0;
100}
101
121 return ret;
122 }
123
124 return 0;
125}
126
127int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op)
128{
129 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex);
130
131 return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op);
132}
133
102void zonefs_i_size_write(struct inode *inode, loff_t isize)
103{
134void zonefs_i_size_write(struct inode *inode, loff_t isize)
135{
104 struct zonefs_inode_info *zi = ZONEFS_I(inode);
136 struct zonefs_zone *z = zonefs_inode_zone(inode);
105
106 i_size_write(inode, isize);
137
138 i_size_write(inode, isize);
139
107 /*
108 * A full zone is no longer open/active and does not need
109 * explicit closing.
110 */
140 /*
141 * A full zone is no longer open/active and does not need
142 * explicit closing.
143 */
111 if (isize >= zi->i_max_size) {
144 if (isize >= z->z_capacity) {
112 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
113
145 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb);
146
114 if (zi->i_flags & ZONEFS_ZONE_ACTIVE)
147 if (z->z_flags & ZONEFS_ZONE_ACTIVE)
115 atomic_dec(&sbi->s_active_seq_files);
148 atomic_dec(&sbi->s_active_seq_files);
116 zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
149 z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE);
117 }
118}
119
120void zonefs_update_stats(struct inode *inode, loff_t new_isize)
121{
122 struct super_block *sb = inode->i_sb;
123 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
124 loff_t old_isize = i_size_read(inode);

--- 20 unchanged lines hidden (view full) ---

145 if (sbi->s_used_blocks > sbi->s_blocks)
146 sbi->s_used_blocks = sbi->s_blocks;
147 }
148
149 spin_unlock(&sbi->s_lock);
150}
151
152/*
150 }
151}
152
153void zonefs_update_stats(struct inode *inode, loff_t new_isize)
154{
155 struct super_block *sb = inode->i_sb;
156 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
157 loff_t old_isize = i_size_read(inode);

--- 20 unchanged lines hidden (view full) ---

178 if (sbi->s_used_blocks > sbi->s_blocks)
179 sbi->s_used_blocks = sbi->s_blocks;
180 }
181
182 spin_unlock(&sbi->s_lock);
183}
184
185/*
153 * Check a zone condition and adjust its file inode access permissions for
154 * offline and readonly zones. Return the inode size corresponding to the
155 * amount of readable data in the zone.
186 * Check a zone condition. Return the amount of written (and still readable)
187 * data in the zone.
156 */
188 */
157static loff_t zonefs_check_zone_condition(struct inode *inode,
189static loff_t zonefs_check_zone_condition(struct super_block *sb,
190 struct zonefs_zone *z,
158 struct blk_zone *zone)
159{
191 struct blk_zone *zone)
192{
160 struct zonefs_inode_info *zi = ZONEFS_I(inode);
161
162 switch (zone->cond) {
163 case BLK_ZONE_COND_OFFLINE:
193 switch (zone->cond) {
194 case BLK_ZONE_COND_OFFLINE:
164 zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
165 inode->i_ino);
166 zi->i_flags |= ZONEFS_ZONE_OFFLINE;
195 zonefs_warn(sb, "Zone %llu: offline zone\n",
196 z->z_sector);
197 z->z_flags |= ZONEFS_ZONE_OFFLINE;
167 return 0;
168 case BLK_ZONE_COND_READONLY:
169 /*
170 * The write pointer of read-only zones is invalid, so we cannot
171 * determine the zone wpoffset (inode size). We thus keep the
172 * zone wpoffset as is, which leads to an empty file
173 * (wpoffset == 0) on mount. For a runtime error, this keeps
174 * the inode size as it was when last updated so that the user
175 * can recover data.
176 */
198 return 0;
199 case BLK_ZONE_COND_READONLY:
200 /*
201 * The write pointer of read-only zones is invalid, so we cannot
202 * determine the zone wpoffset (inode size). We thus keep the
203 * zone wpoffset as is, which leads to an empty file
204 * (wpoffset == 0) on mount. For a runtime error, this keeps
205 * the inode size as it was when last updated so that the user
206 * can recover data.
207 */
177 zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
178 inode->i_ino);
179 zi->i_flags |= ZONEFS_ZONE_READONLY;
180 if (zonefs_zone_is_cnv(zi))
181 return zi->i_max_size;
182 return zi->i_wpoffset;
208 zonefs_warn(sb, "Zone %llu: read-only zone\n",
209 z->z_sector);
210 z->z_flags |= ZONEFS_ZONE_READONLY;
211 if (zonefs_zone_is_cnv(z))
212 return z->z_capacity;
213 return z->z_wpoffset;
183 case BLK_ZONE_COND_FULL:
184 /* The write pointer of full zones is invalid. */
214 case BLK_ZONE_COND_FULL:
215 /* The write pointer of full zones is invalid. */
185 return zi->i_max_size;
216 return z->z_capacity;
186 default:
217 default:
187 if (zonefs_zone_is_cnv(zi))
188 return zi->i_max_size;
218 if (zonefs_zone_is_cnv(z))
219 return z->z_capacity;
189 return (zone->wp - zone->start) << SECTOR_SHIFT;
190 }
191}
192
193/*
194 * Check a zone condition and adjust its inode access permissions for
195 * offline and readonly zones.
196 */
197static void zonefs_inode_update_mode(struct inode *inode)
198{
220 return (zone->wp - zone->start) << SECTOR_SHIFT;
221 }
222}
223
224/*
225 * Check a zone condition and adjust its inode access permissions for
226 * offline and readonly zones.
227 */
228static void zonefs_inode_update_mode(struct inode *inode)
229{
199 struct zonefs_inode_info *zi = ZONEFS_I(inode);
230 struct zonefs_zone *z = zonefs_inode_zone(inode);
200
231
201 if (zi->i_flags & ZONEFS_ZONE_OFFLINE) {
232 if (z->z_flags & ZONEFS_ZONE_OFFLINE) {
202 /* Offline zones cannot be read nor written */
203 inode->i_flags |= S_IMMUTABLE;
204 inode->i_mode &= ~0777;
233 /* Offline zones cannot be read nor written */
234 inode->i_flags |= S_IMMUTABLE;
235 inode->i_mode &= ~0777;
205 } else if (zi->i_flags & ZONEFS_ZONE_READONLY) {
236 } else if (z->z_flags & ZONEFS_ZONE_READONLY) {
206 /* Readonly zones cannot be written */
207 inode->i_flags |= S_IMMUTABLE;
237 /* Readonly zones cannot be written */
238 inode->i_flags |= S_IMMUTABLE;
208 if (zi->i_flags & ZONEFS_ZONE_INIT_MODE)
239 if (z->z_flags & ZONEFS_ZONE_INIT_MODE)
209 inode->i_mode &= ~0777;
210 else
211 inode->i_mode &= ~0222;
212 }
213
240 inode->i_mode &= ~0777;
241 else
242 inode->i_mode &= ~0222;
243 }
244
214 zi->i_flags &= ~ZONEFS_ZONE_INIT_MODE;
245 z->z_flags &= ~ZONEFS_ZONE_INIT_MODE;
215}
216
217struct zonefs_ioerr_data {
218 struct inode *inode;
219 bool write;
220};
221
222static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
223 void *data)
224{
225 struct zonefs_ioerr_data *err = data;
226 struct inode *inode = err->inode;
246}
247
248struct zonefs_ioerr_data {
249 struct inode *inode;
250 bool write;
251};
252
253static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
254 void *data)
255{
256 struct zonefs_ioerr_data *err = data;
257 struct inode *inode = err->inode;
227 struct zonefs_inode_info *zi = ZONEFS_I(inode);
258 struct zonefs_zone *z = zonefs_inode_zone(inode);
228 struct super_block *sb = inode->i_sb;
229 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
230 loff_t isize, data_size;
231
232 /*
233 * Check the zone condition: if the zone is not "bad" (offline or
234 * read-only), read errors are simply signaled to the IO issuer as long
235 * as there is no inconsistency between the inode size and the amount of
236 * data writen in the zone (data_size).
237 */
259 struct super_block *sb = inode->i_sb;
260 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
261 loff_t isize, data_size;
262
263 /*
264 * Check the zone condition: if the zone is not "bad" (offline or
265 * read-only), read errors are simply signaled to the IO issuer as long
266 * as there is no inconsistency between the inode size and the amount of
267 * data writen in the zone (data_size).
268 */
238 data_size = zonefs_check_zone_condition(inode, zone);
269 data_size = zonefs_check_zone_condition(sb, z, zone);
239 isize = i_size_read(inode);
270 isize = i_size_read(inode);
240 if (!(zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
271 if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
241 !err->write && isize == data_size)
242 return 0;
243
244 /*
245 * At this point, we detected either a bad zone or an inconsistency
246 * between the inode size and the amount of data written in the zone.
247 * For the latter case, the cause may be a write IO error or an external
248 * action on the device. Two error patterns exist:

--- 6 unchanged lines hidden (view full) ---

255 * device side write cache after getting successful write IO
256 * completions. Other possibilities are (a) an external corruption,
257 * e.g. an application reset the zone directly, or (b) the device
258 * has a serious problem (e.g. firmware bug).
259 *
260 * In all cases, warn about inode size inconsistency and handle the
261 * IO error according to the zone condition and to the mount options.
262 */
272 !err->write && isize == data_size)
273 return 0;
274
275 /*
276 * At this point, we detected either a bad zone or an inconsistency
277 * between the inode size and the amount of data written in the zone.
278 * For the latter case, the cause may be a write IO error or an external
279 * action on the device. Two error patterns exist:

--- 6 unchanged lines hidden (view full) ---

286 * device side write cache after getting successful write IO
287 * completions. Other possibilities are (a) an external corruption,
288 * e.g. an application reset the zone directly, or (b) the device
289 * has a serious problem (e.g. firmware bug).
290 *
291 * In all cases, warn about inode size inconsistency and handle the
292 * IO error according to the zone condition and to the mount options.
293 */
263 if (zonefs_zone_is_seq(zi) && isize != data_size)
264 zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
294 if (zonefs_zone_is_seq(z) && isize != data_size)
295 zonefs_warn(sb,
296 "inode %lu: invalid size %lld (should be %lld)\n",
265 inode->i_ino, isize, data_size);
266
267 /*
268 * First handle bad zones signaled by hardware. The mount options
269 * errors=zone-ro and errors=zone-offline result in changing the
270 * zone condition to read-only and offline respectively, as if the
271 * condition was signaled by the hardware.
272 */
297 inode->i_ino, isize, data_size);
298
299 /*
300 * First handle bad zones signaled by hardware. The mount options
301 * errors=zone-ro and errors=zone-offline result in changing the
302 * zone condition to read-only and offline respectively, as if the
303 * condition was signaled by the hardware.
304 */
273 if ((zi->i_flags & ZONEFS_ZONE_OFFLINE) ||
305 if ((z->z_flags & ZONEFS_ZONE_OFFLINE) ||
274 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
275 zonefs_warn(sb, "inode %lu: read/write access disabled\n",
276 inode->i_ino);
306 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) {
307 zonefs_warn(sb, "inode %lu: read/write access disabled\n",
308 inode->i_ino);
277 if (!(zi->i_flags & ZONEFS_ZONE_OFFLINE))
278 zi->i_flags |= ZONEFS_ZONE_OFFLINE;
309 if (!(z->z_flags & ZONEFS_ZONE_OFFLINE))
310 z->z_flags |= ZONEFS_ZONE_OFFLINE;
279 zonefs_inode_update_mode(inode);
280 data_size = 0;
311 zonefs_inode_update_mode(inode);
312 data_size = 0;
281 } else if ((zi->i_flags & ZONEFS_ZONE_READONLY) ||
313 } else if ((z->z_flags & ZONEFS_ZONE_READONLY) ||
282 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
283 zonefs_warn(sb, "inode %lu: write access disabled\n",
284 inode->i_ino);
314 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) {
315 zonefs_warn(sb, "inode %lu: write access disabled\n",
316 inode->i_ino);
285 if (!(zi->i_flags & ZONEFS_ZONE_READONLY))
286 zi->i_flags |= ZONEFS_ZONE_READONLY;
317 if (!(z->z_flags & ZONEFS_ZONE_READONLY))
318 z->z_flags |= ZONEFS_ZONE_READONLY;
287 zonefs_inode_update_mode(inode);
288 data_size = isize;
289 } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
290 data_size > isize) {
291 /* Do not expose garbage data */
292 data_size = isize;
293 }
294
295 /*
296 * If the filesystem is mounted with the explicit-open mount option, we
297 * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
298 * the read-only or offline condition, to avoid attempting an explicit
299 * close of the zone when the inode file is closed.
300 */
301 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
319 zonefs_inode_update_mode(inode);
320 data_size = isize;
321 } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO &&
322 data_size > isize) {
323 /* Do not expose garbage data */
324 data_size = isize;
325 }
326
327 /*
328 * If the filesystem is mounted with the explicit-open mount option, we
329 * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
330 * the read-only or offline condition, to avoid attempting an explicit
331 * close of the zone when the inode file is closed.
332 */
333 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) &&
302 (zi->i_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
303 zi->i_flags &= ~ZONEFS_ZONE_OPEN;
334 (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)))
335 z->z_flags &= ~ZONEFS_ZONE_OPEN;
304
305 /*
306 * If error=remount-ro was specified, any error result in remounting
307 * the volume as read-only.
308 */
309 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
310 zonefs_warn(sb, "remounting filesystem read-only\n");
311 sb->s_flags |= SB_RDONLY;
312 }
313
314 /*
315 * Update block usage stats and the inode size to prevent access to
316 * invalid data.
317 */
318 zonefs_update_stats(inode, data_size);
319 zonefs_i_size_write(inode, data_size);
336
337 /*
338 * If error=remount-ro was specified, any error result in remounting
339 * the volume as read-only.
340 */
341 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
342 zonefs_warn(sb, "remounting filesystem read-only\n");
343 sb->s_flags |= SB_RDONLY;
344 }
345
346 /*
347 * Update block usage stats and the inode size to prevent access to
348 * invalid data.
349 */
350 zonefs_update_stats(inode, data_size);
351 zonefs_i_size_write(inode, data_size);
320 zi->i_wpoffset = data_size;
321 zonefs_account_active(inode);
352 z->z_wpoffset = data_size;
353 zonefs_inode_account_active(inode);
322
323 return 0;
324}
325
326/*
327 * When an file IO error occurs, check the file zone to see if there is a change
328 * in the zone condition (e.g. offline or read-only). For a failed write to a
329 * sequential zone, the zone write pointer position must also be checked to
330 * eventually correct the file size and zonefs inode write pointer offset
331 * (which can be out of sync with the drive due to partial write failures).
332 */
333void __zonefs_io_error(struct inode *inode, bool write)
334{
354
355 return 0;
356}
357
358/*
359 * When an file IO error occurs, check the file zone to see if there is a change
360 * in the zone condition (e.g. offline or read-only). For a failed write to a
361 * sequential zone, the zone write pointer position must also be checked to
362 * eventually correct the file size and zonefs inode write pointer offset
363 * (which can be out of sync with the drive due to partial write failures).
364 */
365void __zonefs_io_error(struct inode *inode, bool write)
366{
335 struct zonefs_inode_info *zi = ZONEFS_I(inode);
367 struct zonefs_zone *z = zonefs_inode_zone(inode);
336 struct super_block *sb = inode->i_sb;
337 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
338 unsigned int noio_flag;
339 unsigned int nr_zones = 1;
340 struct zonefs_ioerr_data err = {
341 .inode = inode,
342 .write = write,
343 };
344 int ret;
345
346 /*
347 * The only files that have more than one zone are conventional zone
348 * files with aggregated conventional zones, for which the inode zone
349 * size is always larger than the device zone size.
350 */
368 struct super_block *sb = inode->i_sb;
369 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
370 unsigned int noio_flag;
371 unsigned int nr_zones = 1;
372 struct zonefs_ioerr_data err = {
373 .inode = inode,
374 .write = write,
375 };
376 int ret;
377
378 /*
379 * The only files that have more than one zone are conventional zone
380 * files with aggregated conventional zones, for which the inode zone
381 * size is always larger than the device zone size.
382 */
351 if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
352 nr_zones = zi->i_zone_size >>
383 if (z->z_size > bdev_zone_sectors(sb->s_bdev))
384 nr_zones = z->z_size >>
353 (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
354
355 /*
356 * Memory allocations in blkdev_report_zones() can trigger a memory
357 * reclaim which may in turn cause a recursion into zonefs as well as
358 * struct request allocations for the same device. The former case may
359 * end up in a deadlock on the inode truncate mutex, while the latter
360 * may prevent IO forward progress. Executing the report zones under
361 * the GFP_NOIO context avoids both problems.
362 */
363 noio_flag = memalloc_noio_save();
385 (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
386
387 /*
388 * Memory allocations in blkdev_report_zones() can trigger a memory
389 * reclaim which may in turn cause a recursion into zonefs as well as
390 * struct request allocations for the same device. The former case may
391 * end up in a deadlock on the inode truncate mutex, while the latter
392 * may prevent IO forward progress. Executing the report zones under
393 * the GFP_NOIO context avoids both problems.
394 */
395 noio_flag = memalloc_noio_save();
364 ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones,
396 ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
365 zonefs_io_error_cb, &err);
366 if (ret != nr_zones)
367 zonefs_err(sb, "Get inode %lu zone information failed %d\n",
368 inode->i_ino, ret);
369 memalloc_noio_restore(noio_flag);
370}
371
372static struct kmem_cache *zonefs_inode_cachep;
373
374static struct inode *zonefs_alloc_inode(struct super_block *sb)
375{
376 struct zonefs_inode_info *zi;
377
378 zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
379 if (!zi)
380 return NULL;
381
382 inode_init_once(&zi->i_vnode);
383 mutex_init(&zi->i_truncate_mutex);
397 zonefs_io_error_cb, &err);
398 if (ret != nr_zones)
399 zonefs_err(sb, "Get inode %lu zone information failed %d\n",
400 inode->i_ino, ret);
401 memalloc_noio_restore(noio_flag);
402}
403
404static struct kmem_cache *zonefs_inode_cachep;
405
406static struct inode *zonefs_alloc_inode(struct super_block *sb)
407{
408 struct zonefs_inode_info *zi;
409
410 zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL);
411 if (!zi)
412 return NULL;
413
414 inode_init_once(&zi->i_vnode);
415 mutex_init(&zi->i_truncate_mutex);
384 zi->i_wpoffset = 0;
385 zi->i_wr_refcnt = 0;
416 zi->i_wr_refcnt = 0;
386 zi->i_flags = 0;
387
388 return &zi->i_vnode;
389}
390
391static void zonefs_free_inode(struct inode *inode)
392{
393 kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
394}

--- 16 unchanged lines hidden (view full) ---

411 buf->f_blocks = sbi->s_blocks;
412 if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
413 buf->f_bfree = 0;
414 else
415 buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
416 buf->f_bavail = buf->f_bfree;
417
418 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
417
418 return &zi->i_vnode;
419}
420
421static void zonefs_free_inode(struct inode *inode)
422{
423 kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
424}

--- 16 unchanged lines hidden (view full) ---

441 buf->f_blocks = sbi->s_blocks;
442 if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
443 buf->f_bfree = 0;
444 else
445 buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
446 buf->f_bavail = buf->f_bfree;
447
448 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
419 if (sbi->s_nr_files[t])
420 buf->f_files += sbi->s_nr_files[t] + 1;
449 if (sbi->s_zgroup[t].g_nr_zones)
450 buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1;
421 }
422 buf->f_ffree = 0;
423
424 spin_unlock(&sbi->s_lock);
425
426 buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
427
428 return 0;

--- 123 unchanged lines hidden (view full) ---

552}
553
554static const struct inode_operations zonefs_dir_inode_operations = {
555 .lookup = simple_lookup,
556 .setattr = zonefs_inode_setattr,
557};
558
559static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
451 }
452 buf->f_ffree = 0;
453
454 spin_unlock(&sbi->s_lock);
455
456 buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b);
457
458 return 0;

--- 123 unchanged lines hidden (view full) ---

582}
583
584static const struct inode_operations zonefs_dir_inode_operations = {
585 .lookup = simple_lookup,
586 .setattr = zonefs_inode_setattr,
587};
588
589static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
560 enum zonefs_ztype type)
590 enum zonefs_ztype ztype)
561{
562 struct super_block *sb = parent->i_sb;
563
591{
592 struct super_block *sb = parent->i_sb;
593
564 inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1;
594 inode->i_ino = bdev_nr_zones(sb->s_bdev) + ztype + 1;
565 inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
566 inode->i_op = &zonefs_dir_inode_operations;
567 inode->i_fop = &simple_dir_operations;
568 set_nlink(inode, 2);
569 inc_nlink(parent);
570}
571
572static const struct inode_operations zonefs_file_inode_operations = {
573 .setattr = zonefs_inode_setattr,
574};
575
595 inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
596 inode->i_op = &zonefs_dir_inode_operations;
597 inode->i_fop = &simple_dir_operations;
598 set_nlink(inode, 2);
599 inc_nlink(parent);
600}
601
602static const struct inode_operations zonefs_file_inode_operations = {
603 .setattr = zonefs_inode_setattr,
604};
605
576static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
577 enum zonefs_ztype type)
606static void zonefs_init_file_inode(struct inode *inode,
607 struct zonefs_zone *z)
578{
579 struct super_block *sb = inode->i_sb;
580 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
608{
609 struct super_block *sb = inode->i_sb;
610 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
581 struct zonefs_inode_info *zi = ZONEFS_I(inode);
582 int ret = 0;
583
611
584 inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
585 inode->i_mode = S_IFREG | sbi->s_perm;
612 inode->i_private = z;
586
613
587 if (type == ZONEFS_ZTYPE_CNV)
588 zi->i_flags |= ZONEFS_ZONE_CNV;
589
590 zi->i_zsector = zone->start;
591 zi->i_zone_size = zone->len << SECTOR_SHIFT;
592 if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
593 !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
594 zonefs_err(sb,
595 "zone size %llu doesn't match device's zone sectors %llu\n",
596 zi->i_zone_size,
597 bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
598 return -EINVAL;
599 }
600
601 zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
602 zone->capacity << SECTOR_SHIFT);
603 zi->i_wpoffset = zonefs_check_zone_condition(inode, zone);
604
614 inode->i_ino = z->z_sector >> sbi->s_zone_sectors_shift;
615 inode->i_mode = S_IFREG | sbi->s_perm;
605 inode->i_uid = sbi->s_uid;
606 inode->i_gid = sbi->s_gid;
616 inode->i_uid = sbi->s_uid;
617 inode->i_gid = sbi->s_gid;
607 inode->i_size = zi->i_wpoffset;
608 inode->i_blocks = zi->i_max_size >> SECTOR_SHIFT;
618 inode->i_size = z->z_wpoffset;
619 inode->i_blocks = z->z_capacity >> SECTOR_SHIFT;
609
610 inode->i_op = &zonefs_file_inode_operations;
611 inode->i_fop = &zonefs_file_operations;
612 inode->i_mapping->a_ops = &zonefs_file_aops;
613
614 /* Update the inode access rights depending on the zone condition */
620
621 inode->i_op = &zonefs_file_inode_operations;
622 inode->i_fop = &zonefs_file_operations;
623 inode->i_mapping->a_ops = &zonefs_file_aops;
624
625 /* Update the inode access rights depending on the zone condition */
615 zi->i_flags |= ZONEFS_ZONE_INIT_MODE;
626 z->z_flags |= ZONEFS_ZONE_INIT_MODE;
616 zonefs_inode_update_mode(inode);
627 zonefs_inode_update_mode(inode);
617
618 sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
619 sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
620 sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
621
622 mutex_lock(&zi->i_truncate_mutex);
623
624 /*
625 * For sequential zones, make sure that any open zone is closed first
626 * to ensure that the initial number of open zones is 0, in sync with
627 * the open zone accounting done when the mount option
628 * ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
629 */
630 if (type == ZONEFS_ZTYPE_SEQ &&
631 (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
632 zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
633 ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE);
634 if (ret)
635 goto unlock;
636 }
637
638 zonefs_account_active(inode);
639
640unlock:
641 mutex_unlock(&zi->i_truncate_mutex);
642
643 return ret;
644}
645
646static struct dentry *zonefs_create_inode(struct dentry *parent,
628}
629
630static struct dentry *zonefs_create_inode(struct dentry *parent,
647 const char *name, struct blk_zone *zone,
648 enum zonefs_ztype type)
631 const char *name,
632 struct zonefs_zone *z,
633 enum zonefs_ztype ztype)
649{
650 struct inode *dir = d_inode(parent);
651 struct dentry *dentry;
652 struct inode *inode;
653 int ret = -ENOMEM;
654
655 dentry = d_alloc_name(parent, name);
656 if (!dentry)
657 return ERR_PTR(ret);
658
659 inode = new_inode(parent->d_sb);
660 if (!inode)
661 goto dput;
662
663 inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
634{
635 struct inode *dir = d_inode(parent);
636 struct dentry *dentry;
637 struct inode *inode;
638 int ret = -ENOMEM;
639
640 dentry = d_alloc_name(parent, name);
641 if (!dentry)
642 return ERR_PTR(ret);
643
644 inode = new_inode(parent->d_sb);
645 if (!inode)
646 goto dput;
647
648 inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
664 if (zone) {
665 ret = zonefs_init_file_inode(inode, zone, type);
666 if (ret) {
667 iput(inode);
668 goto dput;
669 }
670 } else {
671 zonefs_init_dir_inode(dir, inode, type);
672 }
649 if (z)
650 zonefs_init_file_inode(inode, z);
651 else
652 zonefs_init_dir_inode(dir, inode, ztype);
673
674 d_add(dentry, inode);
675 dir->i_size++;
676
677 return dentry;
678
679dput:
680 dput(dentry);
681
682 return ERR_PTR(ret);
683}
684
685struct zonefs_zone_data {
686 struct super_block *sb;
687 unsigned int nr_zones[ZONEFS_ZTYPE_MAX];
653
654 d_add(dentry, inode);
655 dir->i_size++;
656
657 return dentry;
658
659dput:
660 dput(dentry);
661
662 return ERR_PTR(ret);
663}
664
665struct zonefs_zone_data {
666 struct super_block *sb;
667 unsigned int nr_zones[ZONEFS_ZTYPE_MAX];
668 sector_t cnv_zone_start;
688 struct blk_zone *zones;
689};
690
691/*
669 struct blk_zone *zones;
670};
671
672/*
692 * Create a zone group and populate it with zone files.
673 * Create the inodes for a zone group.
693 */
674 */
694static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
695 enum zonefs_ztype type)
675static int zonefs_create_zgroup_inodes(struct super_block *sb,
676 enum zonefs_ztype ztype)
696{
677{
697 struct super_block *sb = zd->sb;
698 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
678 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
699 struct blk_zone *zone, *next, *end;
700 const char *zgroup_name;
701 char *file_name;
679 struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
702 struct dentry *dir, *dent;
680 struct dentry *dir, *dent;
703 unsigned int n = 0;
704 int ret;
681 char *file_name;
682 int i, ret = 0;
705
683
684 if (!zgroup)
685 return -ENOMEM;
686
706 /* If the group is empty, there is nothing to do */
687 /* If the group is empty, there is nothing to do */
707 if (!zd->nr_zones[type])
688 if (!zgroup->g_nr_zones)
708 return 0;
709
710 file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
711 if (!file_name)
712 return -ENOMEM;
713
689 return 0;
690
691 file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
692 if (!file_name)
693 return -ENOMEM;
694
714 if (type == ZONEFS_ZTYPE_CNV)
715 zgroup_name = "cnv";
716 else
717 zgroup_name = "seq";
718
719 dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
695 dir = zonefs_create_inode(sb->s_root, zonefs_zgroup_name(ztype),
696 NULL, ztype);
720 if (IS_ERR(dir)) {
721 ret = PTR_ERR(dir);
722 goto free;
723 }
724
697 if (IS_ERR(dir)) {
698 ret = PTR_ERR(dir);
699 goto free;
700 }
701
725 /*
726 * The first zone contains the super block: skip it.
727 */
728 end = zd->zones + bdev_nr_zones(sb->s_bdev);
729 for (zone = &zd->zones[1]; zone < end; zone = next) {
730
731 next = zone + 1;
732 if (zonefs_zone_type(zone) != type)
733 continue;
734
735 /*
736 * For conventional zones, contiguous zones can be aggregated
737 * together to form larger files. Note that this overwrites the
738 * length of the first zone of the set of contiguous zones
739 * aggregated together. If one offline or read-only zone is
740 * found, assume that all zones aggregated have the same
741 * condition.
742 */
743 if (type == ZONEFS_ZTYPE_CNV &&
744 (sbi->s_features & ZONEFS_F_AGGRCNV)) {
745 for (; next < end; next++) {
746 if (zonefs_zone_type(next) != type)
747 break;
748 zone->len += next->len;
749 zone->capacity += next->capacity;
750 if (next->cond == BLK_ZONE_COND_READONLY &&
751 zone->cond != BLK_ZONE_COND_OFFLINE)
752 zone->cond = BLK_ZONE_COND_READONLY;
753 else if (next->cond == BLK_ZONE_COND_OFFLINE)
754 zone->cond = BLK_ZONE_COND_OFFLINE;
755 }
756 if (zone->capacity != zone->len) {
757 zonefs_err(sb, "Invalid conventional zone capacity\n");
758 ret = -EINVAL;
759 goto free;
760 }
761 }
762
763 /*
764 * Use the file number within its group as file name.
765 */
766 snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
767 dent = zonefs_create_inode(dir, file_name, zone, type);
702 for (i = 0; i < zgroup->g_nr_zones; i++) {
703 /* Use the zone number within its group as the file name */
704 snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", i);
705 dent = zonefs_create_inode(dir, file_name,
706 &zgroup->g_zones[i], ztype);
768 if (IS_ERR(dent)) {
769 ret = PTR_ERR(dent);
707 if (IS_ERR(dent)) {
708 ret = PTR_ERR(dent);
770 goto free;
709 break;
771 }
710 }
772
773 n++;
774 }
775
711 }
712
776 zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
777 zgroup_name, n, n > 1 ? "s" : "");
778
779 sbi->s_nr_files[type] = n;
780 ret = 0;
781
782free:
783 kfree(file_name);
784
785 return ret;
786}
787
788static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
789 void *data)
790{
791 struct zonefs_zone_data *zd = data;
713free:
714 kfree(file_name);
715
716 return ret;
717}
718
719static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
720 void *data)
721{
722 struct zonefs_zone_data *zd = data;
723 struct super_block *sb = zd->sb;
724 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
792
793 /*
725
726 /*
794 * Count the number of usable zones: the first zone at index 0 contains
795 * the super block and is ignored.
727 * We do not care about the first zone: it contains the super block
728 * and not exposed as a file.
796 */
729 */
730 if (!idx)
731 return 0;
732
733 /*
734 * Count the number of zones that will be exposed as files.
735 * For sequential zones, we always have as many files as zones.
736 * FOr conventional zones, the number of files depends on if we have
737 * conventional zones aggregation enabled.
738 */
797 switch (zone->type) {
798 case BLK_ZONE_TYPE_CONVENTIONAL:
739 switch (zone->type) {
740 case BLK_ZONE_TYPE_CONVENTIONAL:
799 zone->wp = zone->start + zone->len;
800 if (idx)
801 zd->nr_zones[ZONEFS_ZTYPE_CNV]++;
741 if (sbi->s_features & ZONEFS_F_AGGRCNV) {
742 /* One file per set of contiguous conventional zones */
743 if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) ||
744 zone->start != zd->cnv_zone_start)
745 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
746 zd->cnv_zone_start = zone->start + zone->len;
747 } else {
748 /* One file per zone */
749 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++;
750 }
802 break;
803 case BLK_ZONE_TYPE_SEQWRITE_REQ:
804 case BLK_ZONE_TYPE_SEQWRITE_PREF:
751 break;
752 case BLK_ZONE_TYPE_SEQWRITE_REQ:
753 case BLK_ZONE_TYPE_SEQWRITE_PREF:
805 if (idx)
806 zd->nr_zones[ZONEFS_ZTYPE_SEQ]++;
754 sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++;
807 break;
808 default:
809 zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
810 zone->type);
811 return -EIO;
812 }
813
814 memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));

--- 23 unchanged lines hidden (view full) ---

838 zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
839 ret, bdev_nr_zones(bdev));
840 return -EIO;
841 }
842
843 return 0;
844}
845
755 break;
756 default:
757 zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
758 zone->type);
759 return -EIO;
760 }
761
762 memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));

--- 23 unchanged lines hidden (view full) ---

786 zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
787 ret, bdev_nr_zones(bdev));
788 return -EIO;
789 }
790
791 return 0;
792}
793
846static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd)
794static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd)
847{
848 kvfree(zd->zones);
849}
850
851/*
795{
796 kvfree(zd->zones);
797}
798
799/*
800 * Create a zone group and populate it with zone files.
801 */
802static int zonefs_init_zgroup(struct super_block *sb,
803 struct zonefs_zone_data *zd,
804 enum zonefs_ztype ztype)
805{
806 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
807 struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype];
808 struct blk_zone *zone, *next, *end;
809 struct zonefs_zone *z;
810 unsigned int n = 0;
811 int ret;
812
813 /* Allocate the zone group. If it is empty, we have nothing to do. */
814 if (!zgroup->g_nr_zones)
815 return 0;
816
817 zgroup->g_zones = kvcalloc(zgroup->g_nr_zones,
818 sizeof(struct zonefs_zone), GFP_KERNEL);
819 if (!zgroup->g_zones)
820 return -ENOMEM;
821
822 /*
823 * Initialize the zone groups using the device zone information.
824 * We always skip the first zone as it contains the super block
825 * and is not use to back a file.
826 */
827 end = zd->zones + bdev_nr_zones(sb->s_bdev);
828 for (zone = &zd->zones[1]; zone < end; zone = next) {
829
830 next = zone + 1;
831 if (zonefs_zone_type(zone) != ztype)
832 continue;
833
834 if (WARN_ON_ONCE(n >= zgroup->g_nr_zones))
835 return -EINVAL;
836
837 /*
838 * For conventional zones, contiguous zones can be aggregated
839 * together to form larger files. Note that this overwrites the
840 * length of the first zone of the set of contiguous zones
841 * aggregated together. If one offline or read-only zone is
842 * found, assume that all zones aggregated have the same
843 * condition.
844 */
845 if (ztype == ZONEFS_ZTYPE_CNV &&
846 (sbi->s_features & ZONEFS_F_AGGRCNV)) {
847 for (; next < end; next++) {
848 if (zonefs_zone_type(next) != ztype)
849 break;
850 zone->len += next->len;
851 zone->capacity += next->capacity;
852 if (next->cond == BLK_ZONE_COND_READONLY &&
853 zone->cond != BLK_ZONE_COND_OFFLINE)
854 zone->cond = BLK_ZONE_COND_READONLY;
855 else if (next->cond == BLK_ZONE_COND_OFFLINE)
856 zone->cond = BLK_ZONE_COND_OFFLINE;
857 }
858 }
859
860 z = &zgroup->g_zones[n];
861 if (ztype == ZONEFS_ZTYPE_CNV)
862 z->z_flags |= ZONEFS_ZONE_CNV;
863 z->z_sector = zone->start;
864 z->z_size = zone->len << SECTOR_SHIFT;
865 if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT &&
866 !(sbi->s_features & ZONEFS_F_AGGRCNV)) {
867 zonefs_err(sb,
868 "Invalid zone size %llu (device zone sectors %llu)\n",
869 z->z_size,
870 bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT);
871 return -EINVAL;
872 }
873
874 z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE,
875 zone->capacity << SECTOR_SHIFT);
876 z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone);
877
878 sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes);
879 sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits;
880 sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits;
881
882 /*
883 * For sequential zones, make sure that any open zone is closed
884 * first to ensure that the initial number of open zones is 0,
885 * in sync with the open zone accounting done when the mount
886 * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used.
887 */
888 if (ztype == ZONEFS_ZTYPE_SEQ &&
889 (zone->cond == BLK_ZONE_COND_IMP_OPEN ||
890 zone->cond == BLK_ZONE_COND_EXP_OPEN)) {
891 ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE);
892 if (ret)
893 return ret;
894 }
895
896 zonefs_account_active(sb, z);
897
898 n++;
899 }
900
901 if (WARN_ON_ONCE(n != zgroup->g_nr_zones))
902 return -EINVAL;
903
904 zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
905 zonefs_zgroup_name(ztype),
906 zgroup->g_nr_zones,
907 zgroup->g_nr_zones > 1 ? "s" : "");
908
909 return 0;
910}
911
912static void zonefs_free_zgroups(struct super_block *sb)
913{
914 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
915 enum zonefs_ztype ztype;
916
917 if (!sbi)
918 return;
919
920 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
921 kvfree(sbi->s_zgroup[ztype].g_zones);
922 sbi->s_zgroup[ztype].g_zones = NULL;
923 }
924}
925
926/*
927 * Create a zone group and populate it with zone files.
928 */
929static int zonefs_init_zgroups(struct super_block *sb)
930{
931 struct zonefs_zone_data zd;
932 enum zonefs_ztype ztype;
933 int ret;
934
935 /* First get the device zone information */
936 memset(&zd, 0, sizeof(struct zonefs_zone_data));
937 zd.sb = sb;
938 ret = zonefs_get_zone_info(&zd);
939 if (ret)
940 goto cleanup;
941
942 /* Allocate and initialize the zone groups */
943 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) {
944 ret = zonefs_init_zgroup(sb, &zd, ztype);
945 if (ret) {
946 zonefs_info(sb,
947 "Zone group \"%s\" initialization failed\n",
948 zonefs_zgroup_name(ztype));
949 break;
950 }
951 }
952
953cleanup:
954 zonefs_free_zone_info(&zd);
955 if (ret)
956 zonefs_free_zgroups(sb);
957
958 return ret;
959}
960
961/*
852 * Read super block information from the device.
853 */
854static int zonefs_read_super(struct super_block *sb)
855{
856 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
857 struct zonefs_super *super;
858 u32 crc, stored_crc;
859 struct page *page;

--- 80 unchanged lines hidden (view full) ---

940
941/*
942 * Check that the device is zoned. If it is, get the list of zones and create
943 * sub-directories and files according to the device zone configuration and
944 * format options.
945 */
946static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
947{
962 * Read super block information from the device.
963 */
964static int zonefs_read_super(struct super_block *sb)
965{
966 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
967 struct zonefs_super *super;
968 u32 crc, stored_crc;
969 struct page *page;

--- 80 unchanged lines hidden (view full) ---

1050
1051/*
1052 * Check that the device is zoned. If it is, get the list of zones and create
1053 * sub-directories and files according to the device zone configuration and
1054 * format options.
1055 */
1056static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
1057{
948 struct zonefs_zone_data zd;
949 struct zonefs_sb_info *sbi;
950 struct inode *inode;
951 enum zonefs_ztype t;
952 int ret;
953
954 if (!bdev_is_zoned(sb->s_bdev)) {
955 zonefs_err(sb, "Not a zoned block device\n");
956 return -EINVAL;

--- 36 unchanged lines hidden (view full) ---

993 ret = zonefs_read_super(sb);
994 if (ret)
995 return ret;
996
997 ret = zonefs_parse_options(sb, data);
998 if (ret)
999 return ret;
1000
1058 struct zonefs_sb_info *sbi;
1059 struct inode *inode;
1060 enum zonefs_ztype t;
1061 int ret;
1062
1063 if (!bdev_is_zoned(sb->s_bdev)) {
1064 zonefs_err(sb, "Not a zoned block device\n");
1065 return -EINVAL;

--- 36 unchanged lines hidden (view full) ---

1102 ret = zonefs_read_super(sb);
1103 if (ret)
1104 return ret;
1105
1106 ret = zonefs_parse_options(sb, data);
1107 if (ret)
1108 return ret;
1109
1001 memset(&zd, 0, sizeof(struct zonefs_zone_data));
1002 zd.sb = sb;
1003 ret = zonefs_get_zone_info(&zd);
1004 if (ret)
1005 goto cleanup;
1006
1007 ret = zonefs_sysfs_register(sb);
1008 if (ret)
1009 goto cleanup;
1010
1011 zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
1012
1013 if (!sbi->s_max_wro_seq_files &&
1014 !sbi->s_max_active_seq_files &&
1015 sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
1016 zonefs_info(sb,
1017 "No open and active zone limits. Ignoring explicit_open mount option\n");
1018 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
1019 }
1020
1110 zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev));
1111
1112 if (!sbi->s_max_wro_seq_files &&
1113 !sbi->s_max_active_seq_files &&
1114 sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
1115 zonefs_info(sb,
1116 "No open and active zone limits. Ignoring explicit_open mount option\n");
1117 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
1118 }
1119
1120 /* Initialize the zone groups */
1121 ret = zonefs_init_zgroups(sb);
1122 if (ret)
1123 goto cleanup;
1124
1021 /* Create root directory inode */
1022 ret = -ENOMEM;
1023 inode = new_inode(sb);
1024 if (!inode)
1025 goto cleanup;
1026
1027 inode->i_ino = bdev_nr_zones(sb->s_bdev);
1028 inode->i_mode = S_IFDIR | 0555;
1029 inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
1030 inode->i_op = &zonefs_dir_inode_operations;
1031 inode->i_fop = &simple_dir_operations;
1032 set_nlink(inode, 2);
1033
1034 sb->s_root = d_make_root(inode);
1035 if (!sb->s_root)
1036 goto cleanup;
1037
1038 /* Create and populate files in zone groups directories */
1039 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
1125 /* Create root directory inode */
1126 ret = -ENOMEM;
1127 inode = new_inode(sb);
1128 if (!inode)
1129 goto cleanup;
1130
1131 inode->i_ino = bdev_nr_zones(sb->s_bdev);
1132 inode->i_mode = S_IFDIR | 0555;
1133 inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
1134 inode->i_op = &zonefs_dir_inode_operations;
1135 inode->i_fop = &simple_dir_operations;
1136 set_nlink(inode, 2);
1137
1138 sb->s_root = d_make_root(inode);
1139 if (!sb->s_root)
1140 goto cleanup;
1141
1142 /* Create and populate files in zone groups directories */
1143 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
1040 ret = zonefs_create_zgroup(&zd, t);
1144 ret = zonefs_create_zgroup_inodes(sb, t);
1041 if (ret)
1145 if (ret)
1042 break;
1146 goto cleanup;
1043 }
1044
1147 }
1148
1149 ret = zonefs_sysfs_register(sb);
1150 if (ret)
1151 goto cleanup;
1152
1153 return 0;
1154
1045cleanup:
1155cleanup:
1046 zonefs_cleanup_zone_info(&zd);
1156 zonefs_free_zgroups(sb);
1047
1048 return ret;
1049}
1050
1051static struct dentry *zonefs_mount(struct file_system_type *fs_type,
1052 int flags, const char *dev_name, void *data)
1053{
1054 return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super);
1055}
1056
1057static void zonefs_kill_super(struct super_block *sb)
1058{
1059 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
1060
1061 if (sb->s_root)
1062 d_genocide(sb->s_root);
1063
1064 zonefs_sysfs_unregister(sb);
1157
1158 return ret;
1159}
1160
1161static struct dentry *zonefs_mount(struct file_system_type *fs_type,
1162 int flags, const char *dev_name, void *data)
1163{
1164 return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super);
1165}
1166
1167static void zonefs_kill_super(struct super_block *sb)
1168{
1169 struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
1170
1171 if (sb->s_root)
1172 d_genocide(sb->s_root);
1173
1174 zonefs_sysfs_unregister(sb);
1175 zonefs_free_zgroups(sb);
1065 kill_block_super(sb);
1066 kfree(sbi);
1067}
1068
1069/*
1070 * File system definition and registration.
1071 */
1072static struct file_system_type zonefs_type = {

--- 69 unchanged lines hidden ---
1176 kill_block_super(sb);
1177 kfree(sbi);
1178}
1179
1180/*
1181 * File system definition and registration.
1182 */
1183static struct file_system_type zonefs_type = {

--- 69 unchanged lines hidden ---