1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Simple file system for zoned block devices exposing zones as files. 4 * 5 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 6 */ 7 #include <linux/module.h> 8 #include <linux/pagemap.h> 9 #include <linux/magic.h> 10 #include <linux/iomap.h> 11 #include <linux/init.h> 12 #include <linux/slab.h> 13 #include <linux/blkdev.h> 14 #include <linux/statfs.h> 15 #include <linux/writeback.h> 16 #include <linux/quotaops.h> 17 #include <linux/seq_file.h> 18 #include <linux/parser.h> 19 #include <linux/uio.h> 20 #include <linux/mman.h> 21 #include <linux/sched/mm.h> 22 #include <linux/crc32.h> 23 #include <linux/task_io_accounting_ops.h> 24 25 #include "zonefs.h" 26 27 #define CREATE_TRACE_POINTS 28 #include "trace.h" 29 30 /* 31 * Get the name of a zone group directory. 32 */ 33 static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) 34 { 35 switch (ztype) { 36 case ZONEFS_ZTYPE_CNV: 37 return "cnv"; 38 case ZONEFS_ZTYPE_SEQ: 39 return "seq"; 40 default: 41 WARN_ON_ONCE(1); 42 return "???"; 43 } 44 } 45 46 /* 47 * Manage the active zone count. 48 */ 49 static void zonefs_account_active(struct super_block *sb, 50 struct zonefs_zone *z) 51 { 52 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 53 54 if (zonefs_zone_is_cnv(z)) 55 return; 56 57 /* 58 * For zones that transitioned to the offline or readonly condition, 59 * we only need to clear the active state. 60 */ 61 if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) 62 goto out; 63 64 /* 65 * If the zone is active, that is, if it is explicitly open or 66 * partially written, check if it was already accounted as active. 67 */ 68 if ((z->z_flags & ZONEFS_ZONE_OPEN) || 69 (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { 70 if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { 71 z->z_flags |= ZONEFS_ZONE_ACTIVE; 72 atomic_inc(&sbi->s_active_seq_files); 73 } 74 return; 75 } 76 77 out: 78 /* The zone is not active. If it was, update the active count */ 79 if (z->z_flags & ZONEFS_ZONE_ACTIVE) { 80 z->z_flags &= ~ZONEFS_ZONE_ACTIVE; 81 atomic_dec(&sbi->s_active_seq_files); 82 } 83 } 84 85 /* 86 * Manage the active zone count. Called with zi->i_truncate_mutex held. 87 */ 88 void zonefs_inode_account_active(struct inode *inode) 89 { 90 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 91 92 return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); 93 } 94 95 /* 96 * Execute a zone management operation. 97 */ 98 static int zonefs_zone_mgmt(struct super_block *sb, 99 struct zonefs_zone *z, enum req_op op) 100 { 101 int ret; 102 103 /* 104 * With ZNS drives, closing an explicitly open zone that has not been 105 * written will change the zone state to "closed", that is, the zone 106 * will remain active. Since this can then cause failure of explicit 107 * open operation on other zones if the drive active zone resources 108 * are exceeded, make sure that the zone does not remain active by 109 * resetting it. 110 */ 111 if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) 112 op = REQ_OP_ZONE_RESET; 113 114 trace_zonefs_zone_mgmt(sb, z, op); 115 ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, 116 z->z_size >> SECTOR_SHIFT, GFP_NOFS); 117 if (ret) { 118 zonefs_err(sb, 119 "Zone management operation %s at %llu failed %d\n", 120 blk_op_str(op), z->z_sector, ret); 121 return ret; 122 } 123 124 return 0; 125 } 126 127 int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) 128 { 129 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 130 131 return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); 132 } 133 134 void zonefs_i_size_write(struct inode *inode, loff_t isize) 135 { 136 struct zonefs_zone *z = zonefs_inode_zone(inode); 137 138 i_size_write(inode, isize); 139 140 /* 141 * A full zone is no longer open/active and does not need 142 * explicit closing. 143 */ 144 if (isize >= z->z_capacity) { 145 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 146 147 if (z->z_flags & ZONEFS_ZONE_ACTIVE) 148 atomic_dec(&sbi->s_active_seq_files); 149 z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); 150 } 151 } 152 153 void zonefs_update_stats(struct inode *inode, loff_t new_isize) 154 { 155 struct super_block *sb = inode->i_sb; 156 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 157 loff_t old_isize = i_size_read(inode); 158 loff_t nr_blocks; 159 160 if (new_isize == old_isize) 161 return; 162 163 spin_lock(&sbi->s_lock); 164 165 /* 166 * This may be called for an update after an IO error. 167 * So beware of the values seen. 168 */ 169 if (new_isize < old_isize) { 170 nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; 171 if (sbi->s_used_blocks > nr_blocks) 172 sbi->s_used_blocks -= nr_blocks; 173 else 174 sbi->s_used_blocks = 0; 175 } else { 176 sbi->s_used_blocks += 177 (new_isize - old_isize) >> sb->s_blocksize_bits; 178 if (sbi->s_used_blocks > sbi->s_blocks) 179 sbi->s_used_blocks = sbi->s_blocks; 180 } 181 182 spin_unlock(&sbi->s_lock); 183 } 184 185 /* 186 * Check a zone condition. Return the amount of written (and still readable) 187 * data in the zone. 188 */ 189 static loff_t zonefs_check_zone_condition(struct super_block *sb, 190 struct zonefs_zone *z, 191 struct blk_zone *zone) 192 { 193 switch (zone->cond) { 194 case BLK_ZONE_COND_OFFLINE: 195 zonefs_warn(sb, "Zone %llu: offline zone\n", 196 z->z_sector); 197 z->z_flags |= ZONEFS_ZONE_OFFLINE; 198 return 0; 199 case BLK_ZONE_COND_READONLY: 200 /* 201 * The write pointer of read-only zones is invalid, so we cannot 202 * determine the zone wpoffset (inode size). We thus keep the 203 * zone wpoffset as is, which leads to an empty file 204 * (wpoffset == 0) on mount. For a runtime error, this keeps 205 * the inode size as it was when last updated so that the user 206 * can recover data. 207 */ 208 zonefs_warn(sb, "Zone %llu: read-only zone\n", 209 z->z_sector); 210 z->z_flags |= ZONEFS_ZONE_READONLY; 211 if (zonefs_zone_is_cnv(z)) 212 return z->z_capacity; 213 return z->z_wpoffset; 214 case BLK_ZONE_COND_FULL: 215 /* The write pointer of full zones is invalid. */ 216 return z->z_capacity; 217 default: 218 if (zonefs_zone_is_cnv(z)) 219 return z->z_capacity; 220 return (zone->wp - zone->start) << SECTOR_SHIFT; 221 } 222 } 223 224 /* 225 * Check a zone condition and adjust its inode access permissions for 226 * offline and readonly zones. 227 */ 228 static void zonefs_inode_update_mode(struct inode *inode) 229 { 230 struct zonefs_zone *z = zonefs_inode_zone(inode); 231 232 if (z->z_flags & ZONEFS_ZONE_OFFLINE) { 233 /* Offline zones cannot be read nor written */ 234 inode->i_flags |= S_IMMUTABLE; 235 inode->i_mode &= ~0777; 236 } else if (z->z_flags & ZONEFS_ZONE_READONLY) { 237 /* Readonly zones cannot be written */ 238 inode->i_flags |= S_IMMUTABLE; 239 if (z->z_flags & ZONEFS_ZONE_INIT_MODE) 240 inode->i_mode &= ~0777; 241 else 242 inode->i_mode &= ~0222; 243 } 244 245 z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; 246 z->z_mode = inode->i_mode; 247 } 248 249 struct zonefs_ioerr_data { 250 struct inode *inode; 251 bool write; 252 }; 253 254 static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, 255 void *data) 256 { 257 struct zonefs_ioerr_data *err = data; 258 struct inode *inode = err->inode; 259 struct zonefs_zone *z = zonefs_inode_zone(inode); 260 struct super_block *sb = inode->i_sb; 261 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 262 loff_t isize, data_size; 263 264 /* 265 * Check the zone condition: if the zone is not "bad" (offline or 266 * read-only), read errors are simply signaled to the IO issuer as long 267 * as there is no inconsistency between the inode size and the amount of 268 * data writen in the zone (data_size). 269 */ 270 data_size = zonefs_check_zone_condition(sb, z, zone); 271 isize = i_size_read(inode); 272 if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && 273 !err->write && isize == data_size) 274 return 0; 275 276 /* 277 * At this point, we detected either a bad zone or an inconsistency 278 * between the inode size and the amount of data written in the zone. 279 * For the latter case, the cause may be a write IO error or an external 280 * action on the device. Two error patterns exist: 281 * 1) The inode size is lower than the amount of data in the zone: 282 * a write operation partially failed and data was writen at the end 283 * of the file. This can happen in the case of a large direct IO 284 * needing several BIOs and/or write requests to be processed. 285 * 2) The inode size is larger than the amount of data in the zone: 286 * this can happen with a deferred write error with the use of the 287 * device side write cache after getting successful write IO 288 * completions. Other possibilities are (a) an external corruption, 289 * e.g. an application reset the zone directly, or (b) the device 290 * has a serious problem (e.g. firmware bug). 291 * 292 * In all cases, warn about inode size inconsistency and handle the 293 * IO error according to the zone condition and to the mount options. 294 */ 295 if (zonefs_zone_is_seq(z) && isize != data_size) 296 zonefs_warn(sb, 297 "inode %lu: invalid size %lld (should be %lld)\n", 298 inode->i_ino, isize, data_size); 299 300 /* 301 * First handle bad zones signaled by hardware. The mount options 302 * errors=zone-ro and errors=zone-offline result in changing the 303 * zone condition to read-only and offline respectively, as if the 304 * condition was signaled by the hardware. 305 */ 306 if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || 307 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { 308 zonefs_warn(sb, "inode %lu: read/write access disabled\n", 309 inode->i_ino); 310 if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) 311 z->z_flags |= ZONEFS_ZONE_OFFLINE; 312 zonefs_inode_update_mode(inode); 313 data_size = 0; 314 } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || 315 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { 316 zonefs_warn(sb, "inode %lu: write access disabled\n", 317 inode->i_ino); 318 if (!(z->z_flags & ZONEFS_ZONE_READONLY)) 319 z->z_flags |= ZONEFS_ZONE_READONLY; 320 zonefs_inode_update_mode(inode); 321 data_size = isize; 322 } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && 323 data_size > isize) { 324 /* Do not expose garbage data */ 325 data_size = isize; 326 } 327 328 /* 329 * If the filesystem is mounted with the explicit-open mount option, we 330 * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to 331 * the read-only or offline condition, to avoid attempting an explicit 332 * close of the zone when the inode file is closed. 333 */ 334 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && 335 (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) 336 z->z_flags &= ~ZONEFS_ZONE_OPEN; 337 338 /* 339 * If error=remount-ro was specified, any error result in remounting 340 * the volume as read-only. 341 */ 342 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { 343 zonefs_warn(sb, "remounting filesystem read-only\n"); 344 sb->s_flags |= SB_RDONLY; 345 } 346 347 /* 348 * Update block usage stats and the inode size to prevent access to 349 * invalid data. 350 */ 351 zonefs_update_stats(inode, data_size); 352 zonefs_i_size_write(inode, data_size); 353 z->z_wpoffset = data_size; 354 zonefs_inode_account_active(inode); 355 356 return 0; 357 } 358 359 /* 360 * When an file IO error occurs, check the file zone to see if there is a change 361 * in the zone condition (e.g. offline or read-only). For a failed write to a 362 * sequential zone, the zone write pointer position must also be checked to 363 * eventually correct the file size and zonefs inode write pointer offset 364 * (which can be out of sync with the drive due to partial write failures). 365 */ 366 void __zonefs_io_error(struct inode *inode, bool write) 367 { 368 struct zonefs_zone *z = zonefs_inode_zone(inode); 369 struct super_block *sb = inode->i_sb; 370 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 371 unsigned int noio_flag; 372 unsigned int nr_zones = 1; 373 struct zonefs_ioerr_data err = { 374 .inode = inode, 375 .write = write, 376 }; 377 int ret; 378 379 /* 380 * The only files that have more than one zone are conventional zone 381 * files with aggregated conventional zones, for which the inode zone 382 * size is always larger than the device zone size. 383 */ 384 if (z->z_size > bdev_zone_sectors(sb->s_bdev)) 385 nr_zones = z->z_size >> 386 (sbi->s_zone_sectors_shift + SECTOR_SHIFT); 387 388 /* 389 * Memory allocations in blkdev_report_zones() can trigger a memory 390 * reclaim which may in turn cause a recursion into zonefs as well as 391 * struct request allocations for the same device. The former case may 392 * end up in a deadlock on the inode truncate mutex, while the latter 393 * may prevent IO forward progress. Executing the report zones under 394 * the GFP_NOIO context avoids both problems. 395 */ 396 noio_flag = memalloc_noio_save(); 397 ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, 398 zonefs_io_error_cb, &err); 399 if (ret != nr_zones) 400 zonefs_err(sb, "Get inode %lu zone information failed %d\n", 401 inode->i_ino, ret); 402 memalloc_noio_restore(noio_flag); 403 } 404 405 static struct kmem_cache *zonefs_inode_cachep; 406 407 static struct inode *zonefs_alloc_inode(struct super_block *sb) 408 { 409 struct zonefs_inode_info *zi; 410 411 zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL); 412 if (!zi) 413 return NULL; 414 415 inode_init_once(&zi->i_vnode); 416 mutex_init(&zi->i_truncate_mutex); 417 zi->i_wr_refcnt = 0; 418 419 return &zi->i_vnode; 420 } 421 422 static void zonefs_free_inode(struct inode *inode) 423 { 424 kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); 425 } 426 427 /* 428 * File system stat. 429 */ 430 static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) 431 { 432 struct super_block *sb = dentry->d_sb; 433 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 434 enum zonefs_ztype t; 435 436 buf->f_type = ZONEFS_MAGIC; 437 buf->f_bsize = sb->s_blocksize; 438 buf->f_namelen = ZONEFS_NAME_MAX; 439 440 spin_lock(&sbi->s_lock); 441 442 buf->f_blocks = sbi->s_blocks; 443 if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) 444 buf->f_bfree = 0; 445 else 446 buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; 447 buf->f_bavail = buf->f_bfree; 448 449 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { 450 if (sbi->s_zgroup[t].g_nr_zones) 451 buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; 452 } 453 buf->f_ffree = 0; 454 455 spin_unlock(&sbi->s_lock); 456 457 buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b); 458 459 return 0; 460 } 461 462 enum { 463 Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, 464 Opt_explicit_open, Opt_err, 465 }; 466 467 static const match_table_t tokens = { 468 { Opt_errors_ro, "errors=remount-ro"}, 469 { Opt_errors_zro, "errors=zone-ro"}, 470 { Opt_errors_zol, "errors=zone-offline"}, 471 { Opt_errors_repair, "errors=repair"}, 472 { Opt_explicit_open, "explicit-open" }, 473 { Opt_err, NULL} 474 }; 475 476 static int zonefs_parse_options(struct super_block *sb, char *options) 477 { 478 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 479 substring_t args[MAX_OPT_ARGS]; 480 char *p; 481 482 if (!options) 483 return 0; 484 485 while ((p = strsep(&options, ",")) != NULL) { 486 int token; 487 488 if (!*p) 489 continue; 490 491 token = match_token(p, tokens, args); 492 switch (token) { 493 case Opt_errors_ro: 494 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 495 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; 496 break; 497 case Opt_errors_zro: 498 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 499 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; 500 break; 501 case Opt_errors_zol: 502 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 503 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; 504 break; 505 case Opt_errors_repair: 506 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 507 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; 508 break; 509 case Opt_explicit_open: 510 sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN; 511 break; 512 default: 513 return -EINVAL; 514 } 515 } 516 517 return 0; 518 } 519 520 static int zonefs_show_options(struct seq_file *seq, struct dentry *root) 521 { 522 struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); 523 524 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) 525 seq_puts(seq, ",errors=remount-ro"); 526 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) 527 seq_puts(seq, ",errors=zone-ro"); 528 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) 529 seq_puts(seq, ",errors=zone-offline"); 530 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) 531 seq_puts(seq, ",errors=repair"); 532 533 return 0; 534 } 535 536 static int zonefs_remount(struct super_block *sb, int *flags, char *data) 537 { 538 sync_filesystem(sb); 539 540 return zonefs_parse_options(sb, data); 541 } 542 543 static int zonefs_inode_setattr(struct mnt_idmap *idmap, 544 struct dentry *dentry, struct iattr *iattr) 545 { 546 struct inode *inode = d_inode(dentry); 547 int ret; 548 549 if (unlikely(IS_IMMUTABLE(inode))) 550 return -EPERM; 551 552 ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 553 if (ret) 554 return ret; 555 556 /* 557 * Since files and directories cannot be created nor deleted, do not 558 * allow setting any write attributes on the sub-directories grouping 559 * files by zone type. 560 */ 561 if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && 562 (iattr->ia_mode & 0222)) 563 return -EPERM; 564 565 if (((iattr->ia_valid & ATTR_UID) && 566 !uid_eq(iattr->ia_uid, inode->i_uid)) || 567 ((iattr->ia_valid & ATTR_GID) && 568 !gid_eq(iattr->ia_gid, inode->i_gid))) { 569 ret = dquot_transfer(&nop_mnt_idmap, inode, iattr); 570 if (ret) 571 return ret; 572 } 573 574 if (iattr->ia_valid & ATTR_SIZE) { 575 ret = zonefs_file_truncate(inode, iattr->ia_size); 576 if (ret) 577 return ret; 578 } 579 580 setattr_copy(&nop_mnt_idmap, inode, iattr); 581 582 if (S_ISREG(inode->i_mode)) { 583 struct zonefs_zone *z = zonefs_inode_zone(inode); 584 585 z->z_mode = inode->i_mode; 586 z->z_uid = inode->i_uid; 587 z->z_gid = inode->i_gid; 588 } 589 590 return 0; 591 } 592 593 static const struct inode_operations zonefs_file_inode_operations = { 594 .setattr = zonefs_inode_setattr, 595 }; 596 597 static long zonefs_fname_to_fno(const struct qstr *fname) 598 { 599 const char *name = fname->name; 600 unsigned int len = fname->len; 601 long fno = 0, shift = 1; 602 const char *rname; 603 char c = *name; 604 unsigned int i; 605 606 /* 607 * File names are always a base-10 number string without any 608 * leading 0s. 609 */ 610 if (!isdigit(c)) 611 return -ENOENT; 612 613 if (len > 1 && c == '0') 614 return -ENOENT; 615 616 if (len == 1) 617 return c - '0'; 618 619 for (i = 0, rname = name + len - 1; i < len; i++, rname--) { 620 c = *rname; 621 if (!isdigit(c)) 622 return -ENOENT; 623 fno += (c - '0') * shift; 624 shift *= 10; 625 } 626 627 return fno; 628 } 629 630 static struct inode *zonefs_get_file_inode(struct inode *dir, 631 struct dentry *dentry) 632 { 633 struct zonefs_zone_group *zgroup = dir->i_private; 634 struct super_block *sb = dir->i_sb; 635 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 636 struct zonefs_zone *z; 637 struct inode *inode; 638 ino_t ino; 639 long fno; 640 641 /* Get the file number from the file name */ 642 fno = zonefs_fname_to_fno(&dentry->d_name); 643 if (fno < 0) 644 return ERR_PTR(fno); 645 646 if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones) 647 return ERR_PTR(-ENOENT); 648 649 z = &zgroup->g_zones[fno]; 650 ino = z->z_sector >> sbi->s_zone_sectors_shift; 651 inode = iget_locked(sb, ino); 652 if (!inode) 653 return ERR_PTR(-ENOMEM); 654 if (!(inode->i_state & I_NEW)) { 655 WARN_ON_ONCE(inode->i_private != z); 656 return inode; 657 } 658 659 inode->i_ino = ino; 660 inode->i_mode = z->z_mode; 661 inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, 662 inode_get_ctime(dir)); 663 inode->i_uid = z->z_uid; 664 inode->i_gid = z->z_gid; 665 inode->i_size = z->z_wpoffset; 666 inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; 667 inode->i_private = z; 668 669 inode->i_op = &zonefs_file_inode_operations; 670 inode->i_fop = &zonefs_file_operations; 671 inode->i_mapping->a_ops = &zonefs_file_aops; 672 673 /* Update the inode access rights depending on the zone condition */ 674 zonefs_inode_update_mode(inode); 675 676 unlock_new_inode(inode); 677 678 return inode; 679 } 680 681 static struct inode *zonefs_get_zgroup_inode(struct super_block *sb, 682 enum zonefs_ztype ztype) 683 { 684 struct inode *root = d_inode(sb->s_root); 685 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 686 struct inode *inode; 687 ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; 688 689 inode = iget_locked(sb, ino); 690 if (!inode) 691 return ERR_PTR(-ENOMEM); 692 if (!(inode->i_state & I_NEW)) 693 return inode; 694 695 inode->i_ino = ino; 696 inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555); 697 inode->i_size = sbi->s_zgroup[ztype].g_nr_zones; 698 inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, 699 inode_get_ctime(root)); 700 inode->i_private = &sbi->s_zgroup[ztype]; 701 set_nlink(inode, 2); 702 703 inode->i_op = &zonefs_dir_inode_operations; 704 inode->i_fop = &zonefs_dir_operations; 705 706 unlock_new_inode(inode); 707 708 return inode; 709 } 710 711 712 static struct inode *zonefs_get_dir_inode(struct inode *dir, 713 struct dentry *dentry) 714 { 715 struct super_block *sb = dir->i_sb; 716 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 717 const char *name = dentry->d_name.name; 718 enum zonefs_ztype ztype; 719 720 /* 721 * We only need to check for the "seq" directory and 722 * the "cnv" directory if we have conventional zones. 723 */ 724 if (dentry->d_name.len != 3) 725 return ERR_PTR(-ENOENT); 726 727 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 728 if (sbi->s_zgroup[ztype].g_nr_zones && 729 memcmp(name, zonefs_zgroup_name(ztype), 3) == 0) 730 break; 731 } 732 if (ztype == ZONEFS_ZTYPE_MAX) 733 return ERR_PTR(-ENOENT); 734 735 return zonefs_get_zgroup_inode(sb, ztype); 736 } 737 738 static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry, 739 unsigned int flags) 740 { 741 struct inode *inode; 742 743 if (dentry->d_name.len > ZONEFS_NAME_MAX) 744 return ERR_PTR(-ENAMETOOLONG); 745 746 if (dir == d_inode(dir->i_sb->s_root)) 747 inode = zonefs_get_dir_inode(dir, dentry); 748 else 749 inode = zonefs_get_file_inode(dir, dentry); 750 if (IS_ERR(inode)) 751 return ERR_CAST(inode); 752 753 return d_splice_alias(inode, dentry); 754 } 755 756 static int zonefs_readdir_root(struct file *file, struct dir_context *ctx) 757 { 758 struct inode *inode = file_inode(file); 759 struct super_block *sb = inode->i_sb; 760 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 761 enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV; 762 ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1; 763 764 if (ctx->pos >= inode->i_size) 765 return 0; 766 767 if (!dir_emit_dots(file, ctx)) 768 return 0; 769 770 if (ctx->pos == 2) { 771 if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) 772 ztype = ZONEFS_ZTYPE_SEQ; 773 774 if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, 775 base_ino + ztype, DT_DIR)) 776 return 0; 777 ctx->pos++; 778 } 779 780 if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) { 781 ztype = ZONEFS_ZTYPE_SEQ; 782 if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, 783 base_ino + ztype, DT_DIR)) 784 return 0; 785 ctx->pos++; 786 } 787 788 return 0; 789 } 790 791 static int zonefs_readdir_zgroup(struct file *file, 792 struct dir_context *ctx) 793 { 794 struct inode *inode = file_inode(file); 795 struct zonefs_zone_group *zgroup = inode->i_private; 796 struct super_block *sb = inode->i_sb; 797 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 798 struct zonefs_zone *z; 799 int fname_len; 800 char *fname; 801 ino_t ino; 802 int f; 803 804 /* 805 * The size of zone group directories is equal to the number 806 * of zone files in the group and does note include the "." and 807 * ".." entries. Hence the "+ 2" here. 808 */ 809 if (ctx->pos >= inode->i_size + 2) 810 return 0; 811 812 if (!dir_emit_dots(file, ctx)) 813 return 0; 814 815 fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); 816 if (!fname) 817 return -ENOMEM; 818 819 for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) { 820 z = &zgroup->g_zones[f]; 821 ino = z->z_sector >> sbi->s_zone_sectors_shift; 822 fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f); 823 if (!dir_emit(ctx, fname, fname_len, ino, DT_REG)) 824 break; 825 ctx->pos++; 826 } 827 828 kfree(fname); 829 830 return 0; 831 } 832 833 static int zonefs_readdir(struct file *file, struct dir_context *ctx) 834 { 835 struct inode *inode = file_inode(file); 836 837 if (inode == d_inode(inode->i_sb->s_root)) 838 return zonefs_readdir_root(file, ctx); 839 840 return zonefs_readdir_zgroup(file, ctx); 841 } 842 843 const struct inode_operations zonefs_dir_inode_operations = { 844 .lookup = zonefs_lookup, 845 .setattr = zonefs_inode_setattr, 846 }; 847 848 const struct file_operations zonefs_dir_operations = { 849 .llseek = generic_file_llseek, 850 .read = generic_read_dir, 851 .iterate_shared = zonefs_readdir, 852 }; 853 854 struct zonefs_zone_data { 855 struct super_block *sb; 856 unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; 857 sector_t cnv_zone_start; 858 struct blk_zone *zones; 859 }; 860 861 static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, 862 void *data) 863 { 864 struct zonefs_zone_data *zd = data; 865 struct super_block *sb = zd->sb; 866 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 867 868 /* 869 * We do not care about the first zone: it contains the super block 870 * and not exposed as a file. 871 */ 872 if (!idx) 873 return 0; 874 875 /* 876 * Count the number of zones that will be exposed as files. 877 * For sequential zones, we always have as many files as zones. 878 * FOr conventional zones, the number of files depends on if we have 879 * conventional zones aggregation enabled. 880 */ 881 switch (zone->type) { 882 case BLK_ZONE_TYPE_CONVENTIONAL: 883 if (sbi->s_features & ZONEFS_F_AGGRCNV) { 884 /* One file per set of contiguous conventional zones */ 885 if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || 886 zone->start != zd->cnv_zone_start) 887 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 888 zd->cnv_zone_start = zone->start + zone->len; 889 } else { 890 /* One file per zone */ 891 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 892 } 893 break; 894 case BLK_ZONE_TYPE_SEQWRITE_REQ: 895 case BLK_ZONE_TYPE_SEQWRITE_PREF: 896 sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; 897 break; 898 default: 899 zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", 900 zone->type); 901 return -EIO; 902 } 903 904 memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); 905 906 return 0; 907 } 908 909 static int zonefs_get_zone_info(struct zonefs_zone_data *zd) 910 { 911 struct block_device *bdev = zd->sb->s_bdev; 912 int ret; 913 914 zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone), 915 GFP_KERNEL); 916 if (!zd->zones) 917 return -ENOMEM; 918 919 /* Get zones information from the device */ 920 ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, 921 zonefs_get_zone_info_cb, zd); 922 if (ret < 0) { 923 zonefs_err(zd->sb, "Zone report failed %d\n", ret); 924 return ret; 925 } 926 927 if (ret != bdev_nr_zones(bdev)) { 928 zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", 929 ret, bdev_nr_zones(bdev)); 930 return -EIO; 931 } 932 933 return 0; 934 } 935 936 static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) 937 { 938 kvfree(zd->zones); 939 } 940 941 /* 942 * Create a zone group and populate it with zone files. 943 */ 944 static int zonefs_init_zgroup(struct super_block *sb, 945 struct zonefs_zone_data *zd, 946 enum zonefs_ztype ztype) 947 { 948 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 949 struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; 950 struct blk_zone *zone, *next, *end; 951 struct zonefs_zone *z; 952 unsigned int n = 0; 953 int ret; 954 955 /* Allocate the zone group. If it is empty, we have nothing to do. */ 956 if (!zgroup->g_nr_zones) 957 return 0; 958 959 zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, 960 sizeof(struct zonefs_zone), GFP_KERNEL); 961 if (!zgroup->g_zones) 962 return -ENOMEM; 963 964 /* 965 * Initialize the zone groups using the device zone information. 966 * We always skip the first zone as it contains the super block 967 * and is not use to back a file. 968 */ 969 end = zd->zones + bdev_nr_zones(sb->s_bdev); 970 for (zone = &zd->zones[1]; zone < end; zone = next) { 971 972 next = zone + 1; 973 if (zonefs_zone_type(zone) != ztype) 974 continue; 975 976 if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) 977 return -EINVAL; 978 979 /* 980 * For conventional zones, contiguous zones can be aggregated 981 * together to form larger files. Note that this overwrites the 982 * length of the first zone of the set of contiguous zones 983 * aggregated together. If one offline or read-only zone is 984 * found, assume that all zones aggregated have the same 985 * condition. 986 */ 987 if (ztype == ZONEFS_ZTYPE_CNV && 988 (sbi->s_features & ZONEFS_F_AGGRCNV)) { 989 for (; next < end; next++) { 990 if (zonefs_zone_type(next) != ztype) 991 break; 992 zone->len += next->len; 993 zone->capacity += next->capacity; 994 if (next->cond == BLK_ZONE_COND_READONLY && 995 zone->cond != BLK_ZONE_COND_OFFLINE) 996 zone->cond = BLK_ZONE_COND_READONLY; 997 else if (next->cond == BLK_ZONE_COND_OFFLINE) 998 zone->cond = BLK_ZONE_COND_OFFLINE; 999 } 1000 } 1001 1002 z = &zgroup->g_zones[n]; 1003 if (ztype == ZONEFS_ZTYPE_CNV) 1004 z->z_flags |= ZONEFS_ZONE_CNV; 1005 z->z_sector = zone->start; 1006 z->z_size = zone->len << SECTOR_SHIFT; 1007 if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && 1008 !(sbi->s_features & ZONEFS_F_AGGRCNV)) { 1009 zonefs_err(sb, 1010 "Invalid zone size %llu (device zone sectors %llu)\n", 1011 z->z_size, 1012 bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); 1013 return -EINVAL; 1014 } 1015 1016 z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, 1017 zone->capacity << SECTOR_SHIFT); 1018 z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); 1019 1020 z->z_mode = S_IFREG | sbi->s_perm; 1021 z->z_uid = sbi->s_uid; 1022 z->z_gid = sbi->s_gid; 1023 1024 /* 1025 * Let zonefs_inode_update_mode() know that we will need 1026 * special initialization of the inode mode the first time 1027 * it is accessed. 1028 */ 1029 z->z_flags |= ZONEFS_ZONE_INIT_MODE; 1030 1031 sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); 1032 sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; 1033 sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; 1034 1035 /* 1036 * For sequential zones, make sure that any open zone is closed 1037 * first to ensure that the initial number of open zones is 0, 1038 * in sync with the open zone accounting done when the mount 1039 * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. 1040 */ 1041 if (ztype == ZONEFS_ZTYPE_SEQ && 1042 (zone->cond == BLK_ZONE_COND_IMP_OPEN || 1043 zone->cond == BLK_ZONE_COND_EXP_OPEN)) { 1044 ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); 1045 if (ret) 1046 return ret; 1047 } 1048 1049 zonefs_account_active(sb, z); 1050 1051 n++; 1052 } 1053 1054 if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) 1055 return -EINVAL; 1056 1057 zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", 1058 zonefs_zgroup_name(ztype), 1059 zgroup->g_nr_zones, 1060 zgroup->g_nr_zones > 1 ? "s" : ""); 1061 1062 return 0; 1063 } 1064 1065 static void zonefs_free_zgroups(struct super_block *sb) 1066 { 1067 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1068 enum zonefs_ztype ztype; 1069 1070 if (!sbi) 1071 return; 1072 1073 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1074 kvfree(sbi->s_zgroup[ztype].g_zones); 1075 sbi->s_zgroup[ztype].g_zones = NULL; 1076 } 1077 } 1078 1079 /* 1080 * Create a zone group and populate it with zone files. 1081 */ 1082 static int zonefs_init_zgroups(struct super_block *sb) 1083 { 1084 struct zonefs_zone_data zd; 1085 enum zonefs_ztype ztype; 1086 int ret; 1087 1088 /* First get the device zone information */ 1089 memset(&zd, 0, sizeof(struct zonefs_zone_data)); 1090 zd.sb = sb; 1091 ret = zonefs_get_zone_info(&zd); 1092 if (ret) 1093 goto cleanup; 1094 1095 /* Allocate and initialize the zone groups */ 1096 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1097 ret = zonefs_init_zgroup(sb, &zd, ztype); 1098 if (ret) { 1099 zonefs_info(sb, 1100 "Zone group \"%s\" initialization failed\n", 1101 zonefs_zgroup_name(ztype)); 1102 break; 1103 } 1104 } 1105 1106 cleanup: 1107 zonefs_free_zone_info(&zd); 1108 if (ret) 1109 zonefs_free_zgroups(sb); 1110 1111 return ret; 1112 } 1113 1114 /* 1115 * Read super block information from the device. 1116 */ 1117 static int zonefs_read_super(struct super_block *sb) 1118 { 1119 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1120 struct zonefs_super *super; 1121 u32 crc, stored_crc; 1122 struct page *page; 1123 struct bio_vec bio_vec; 1124 struct bio bio; 1125 int ret; 1126 1127 page = alloc_page(GFP_KERNEL); 1128 if (!page) 1129 return -ENOMEM; 1130 1131 bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); 1132 bio.bi_iter.bi_sector = 0; 1133 __bio_add_page(&bio, page, PAGE_SIZE, 0); 1134 1135 ret = submit_bio_wait(&bio); 1136 if (ret) 1137 goto free_page; 1138 1139 super = page_address(page); 1140 1141 ret = -EINVAL; 1142 if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) 1143 goto free_page; 1144 1145 stored_crc = le32_to_cpu(super->s_crc); 1146 super->s_crc = 0; 1147 crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); 1148 if (crc != stored_crc) { 1149 zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", 1150 crc, stored_crc); 1151 goto free_page; 1152 } 1153 1154 sbi->s_features = le64_to_cpu(super->s_features); 1155 if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { 1156 zonefs_err(sb, "Unknown features set 0x%llx\n", 1157 sbi->s_features); 1158 goto free_page; 1159 } 1160 1161 if (sbi->s_features & ZONEFS_F_UID) { 1162 sbi->s_uid = make_kuid(current_user_ns(), 1163 le32_to_cpu(super->s_uid)); 1164 if (!uid_valid(sbi->s_uid)) { 1165 zonefs_err(sb, "Invalid UID feature\n"); 1166 goto free_page; 1167 } 1168 } 1169 1170 if (sbi->s_features & ZONEFS_F_GID) { 1171 sbi->s_gid = make_kgid(current_user_ns(), 1172 le32_to_cpu(super->s_gid)); 1173 if (!gid_valid(sbi->s_gid)) { 1174 zonefs_err(sb, "Invalid GID feature\n"); 1175 goto free_page; 1176 } 1177 } 1178 1179 if (sbi->s_features & ZONEFS_F_PERM) 1180 sbi->s_perm = le32_to_cpu(super->s_perm); 1181 1182 if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { 1183 zonefs_err(sb, "Reserved area is being used\n"); 1184 goto free_page; 1185 } 1186 1187 import_uuid(&sbi->s_uuid, super->s_uuid); 1188 ret = 0; 1189 1190 free_page: 1191 __free_page(page); 1192 1193 return ret; 1194 } 1195 1196 static const struct super_operations zonefs_sops = { 1197 .alloc_inode = zonefs_alloc_inode, 1198 .free_inode = zonefs_free_inode, 1199 .statfs = zonefs_statfs, 1200 .remount_fs = zonefs_remount, 1201 .show_options = zonefs_show_options, 1202 }; 1203 1204 static int zonefs_get_zgroup_inodes(struct super_block *sb) 1205 { 1206 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1207 struct inode *dir_inode; 1208 enum zonefs_ztype ztype; 1209 1210 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1211 if (!sbi->s_zgroup[ztype].g_nr_zones) 1212 continue; 1213 1214 dir_inode = zonefs_get_zgroup_inode(sb, ztype); 1215 if (IS_ERR(dir_inode)) 1216 return PTR_ERR(dir_inode); 1217 1218 sbi->s_zgroup[ztype].g_inode = dir_inode; 1219 } 1220 1221 return 0; 1222 } 1223 1224 static void zonefs_release_zgroup_inodes(struct super_block *sb) 1225 { 1226 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1227 enum zonefs_ztype ztype; 1228 1229 if (!sbi) 1230 return; 1231 1232 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1233 if (sbi->s_zgroup[ztype].g_inode) { 1234 iput(sbi->s_zgroup[ztype].g_inode); 1235 sbi->s_zgroup[ztype].g_inode = NULL; 1236 } 1237 } 1238 } 1239 1240 /* 1241 * Check that the device is zoned. If it is, get the list of zones and create 1242 * sub-directories and files according to the device zone configuration and 1243 * format options. 1244 */ 1245 static int zonefs_fill_super(struct super_block *sb, void *data, int silent) 1246 { 1247 struct zonefs_sb_info *sbi; 1248 struct inode *inode; 1249 enum zonefs_ztype ztype; 1250 int ret; 1251 1252 if (!bdev_is_zoned(sb->s_bdev)) { 1253 zonefs_err(sb, "Not a zoned block device\n"); 1254 return -EINVAL; 1255 } 1256 1257 /* 1258 * Initialize super block information: the maximum file size is updated 1259 * when the zone files are created so that the format option 1260 * ZONEFS_F_AGGRCNV which increases the maximum file size of a file 1261 * beyond the zone size is taken into account. 1262 */ 1263 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1264 if (!sbi) 1265 return -ENOMEM; 1266 1267 spin_lock_init(&sbi->s_lock); 1268 sb->s_fs_info = sbi; 1269 sb->s_magic = ZONEFS_MAGIC; 1270 sb->s_maxbytes = 0; 1271 sb->s_op = &zonefs_sops; 1272 sb->s_time_gran = 1; 1273 1274 /* 1275 * The block size is set to the device zone write granularity to ensure 1276 * that write operations are always aligned according to the device 1277 * interface constraints. 1278 */ 1279 sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev)); 1280 sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); 1281 sbi->s_uid = GLOBAL_ROOT_UID; 1282 sbi->s_gid = GLOBAL_ROOT_GID; 1283 sbi->s_perm = 0640; 1284 sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; 1285 1286 atomic_set(&sbi->s_wro_seq_files, 0); 1287 sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev); 1288 atomic_set(&sbi->s_active_seq_files, 0); 1289 sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev); 1290 1291 ret = zonefs_read_super(sb); 1292 if (ret) 1293 return ret; 1294 1295 ret = zonefs_parse_options(sb, data); 1296 if (ret) 1297 return ret; 1298 1299 zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); 1300 1301 if (!sbi->s_max_wro_seq_files && 1302 !sbi->s_max_active_seq_files && 1303 sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { 1304 zonefs_info(sb, 1305 "No open and active zone limits. Ignoring explicit_open mount option\n"); 1306 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; 1307 } 1308 1309 /* Initialize the zone groups */ 1310 ret = zonefs_init_zgroups(sb); 1311 if (ret) 1312 goto cleanup; 1313 1314 /* Create the root directory inode */ 1315 ret = -ENOMEM; 1316 inode = new_inode(sb); 1317 if (!inode) 1318 goto cleanup; 1319 1320 inode->i_ino = bdev_nr_zones(sb->s_bdev); 1321 inode->i_mode = S_IFDIR | 0555; 1322 inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); 1323 inode->i_op = &zonefs_dir_inode_operations; 1324 inode->i_fop = &zonefs_dir_operations; 1325 inode->i_size = 2; 1326 set_nlink(inode, 2); 1327 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1328 if (sbi->s_zgroup[ztype].g_nr_zones) { 1329 inc_nlink(inode); 1330 inode->i_size++; 1331 } 1332 } 1333 1334 sb->s_root = d_make_root(inode); 1335 if (!sb->s_root) 1336 goto cleanup; 1337 1338 /* 1339 * Take a reference on the zone groups directory inodes 1340 * to keep them in the inode cache. 1341 */ 1342 ret = zonefs_get_zgroup_inodes(sb); 1343 if (ret) 1344 goto cleanup; 1345 1346 ret = zonefs_sysfs_register(sb); 1347 if (ret) 1348 goto cleanup; 1349 1350 return 0; 1351 1352 cleanup: 1353 zonefs_release_zgroup_inodes(sb); 1354 zonefs_free_zgroups(sb); 1355 1356 return ret; 1357 } 1358 1359 static struct dentry *zonefs_mount(struct file_system_type *fs_type, 1360 int flags, const char *dev_name, void *data) 1361 { 1362 return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); 1363 } 1364 1365 static void zonefs_kill_super(struct super_block *sb) 1366 { 1367 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1368 1369 /* Release the reference on the zone group directory inodes */ 1370 zonefs_release_zgroup_inodes(sb); 1371 1372 kill_block_super(sb); 1373 1374 zonefs_sysfs_unregister(sb); 1375 zonefs_free_zgroups(sb); 1376 kfree(sbi); 1377 } 1378 1379 /* 1380 * File system definition and registration. 1381 */ 1382 static struct file_system_type zonefs_type = { 1383 .owner = THIS_MODULE, 1384 .name = "zonefs", 1385 .mount = zonefs_mount, 1386 .kill_sb = zonefs_kill_super, 1387 .fs_flags = FS_REQUIRES_DEV, 1388 }; 1389 1390 static int __init zonefs_init_inodecache(void) 1391 { 1392 zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", 1393 sizeof(struct zonefs_inode_info), 0, 1394 (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), 1395 NULL); 1396 if (zonefs_inode_cachep == NULL) 1397 return -ENOMEM; 1398 return 0; 1399 } 1400 1401 static void zonefs_destroy_inodecache(void) 1402 { 1403 /* 1404 * Make sure all delayed rcu free inodes are flushed before we 1405 * destroy the inode cache. 1406 */ 1407 rcu_barrier(); 1408 kmem_cache_destroy(zonefs_inode_cachep); 1409 } 1410 1411 static int __init zonefs_init(void) 1412 { 1413 int ret; 1414 1415 BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); 1416 1417 ret = zonefs_init_inodecache(); 1418 if (ret) 1419 return ret; 1420 1421 ret = zonefs_sysfs_init(); 1422 if (ret) 1423 goto destroy_inodecache; 1424 1425 ret = register_filesystem(&zonefs_type); 1426 if (ret) 1427 goto sysfs_exit; 1428 1429 return 0; 1430 1431 sysfs_exit: 1432 zonefs_sysfs_exit(); 1433 destroy_inodecache: 1434 zonefs_destroy_inodecache(); 1435 1436 return ret; 1437 } 1438 1439 static void __exit zonefs_exit(void) 1440 { 1441 unregister_filesystem(&zonefs_type); 1442 zonefs_sysfs_exit(); 1443 zonefs_destroy_inodecache(); 1444 } 1445 1446 MODULE_AUTHOR("Damien Le Moal"); 1447 MODULE_DESCRIPTION("Zone file system for zoned block devices"); 1448 MODULE_LICENSE("GPL"); 1449 MODULE_ALIAS_FS("zonefs"); 1450 module_init(zonefs_init); 1451 module_exit(zonefs_exit); 1452