1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Simple file system for zoned block devices exposing zones as files. 4 * 5 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 6 */ 7 #include <linux/module.h> 8 #include <linux/pagemap.h> 9 #include <linux/magic.h> 10 #include <linux/iomap.h> 11 #include <linux/init.h> 12 #include <linux/slab.h> 13 #include <linux/blkdev.h> 14 #include <linux/statfs.h> 15 #include <linux/writeback.h> 16 #include <linux/quotaops.h> 17 #include <linux/seq_file.h> 18 #include <linux/parser.h> 19 #include <linux/uio.h> 20 #include <linux/mman.h> 21 #include <linux/sched/mm.h> 22 #include <linux/crc32.h> 23 #include <linux/task_io_accounting_ops.h> 24 25 #include "zonefs.h" 26 27 #define CREATE_TRACE_POINTS 28 #include "trace.h" 29 30 /* 31 * Get the name of a zone group directory. 32 */ 33 static const char *zonefs_zgroup_name(enum zonefs_ztype ztype) 34 { 35 switch (ztype) { 36 case ZONEFS_ZTYPE_CNV: 37 return "cnv"; 38 case ZONEFS_ZTYPE_SEQ: 39 return "seq"; 40 default: 41 WARN_ON_ONCE(1); 42 return "???"; 43 } 44 } 45 46 /* 47 * Manage the active zone count. 48 */ 49 static void zonefs_account_active(struct super_block *sb, 50 struct zonefs_zone *z) 51 { 52 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 53 54 if (zonefs_zone_is_cnv(z)) 55 return; 56 57 /* 58 * For zones that transitioned to the offline or readonly condition, 59 * we only need to clear the active state. 60 */ 61 if (z->z_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) 62 goto out; 63 64 /* 65 * If the zone is active, that is, if it is explicitly open or 66 * partially written, check if it was already accounted as active. 67 */ 68 if ((z->z_flags & ZONEFS_ZONE_OPEN) || 69 (z->z_wpoffset > 0 && z->z_wpoffset < z->z_capacity)) { 70 if (!(z->z_flags & ZONEFS_ZONE_ACTIVE)) { 71 z->z_flags |= ZONEFS_ZONE_ACTIVE; 72 atomic_inc(&sbi->s_active_seq_files); 73 } 74 return; 75 } 76 77 out: 78 /* The zone is not active. If it was, update the active count */ 79 if (z->z_flags & ZONEFS_ZONE_ACTIVE) { 80 z->z_flags &= ~ZONEFS_ZONE_ACTIVE; 81 atomic_dec(&sbi->s_active_seq_files); 82 } 83 } 84 85 /* 86 * Manage the active zone count. Called with zi->i_truncate_mutex held. 87 */ 88 void zonefs_inode_account_active(struct inode *inode) 89 { 90 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 91 92 return zonefs_account_active(inode->i_sb, zonefs_inode_zone(inode)); 93 } 94 95 /* 96 * Execute a zone management operation. 97 */ 98 static int zonefs_zone_mgmt(struct super_block *sb, 99 struct zonefs_zone *z, enum req_op op) 100 { 101 int ret; 102 103 /* 104 * With ZNS drives, closing an explicitly open zone that has not been 105 * written will change the zone state to "closed", that is, the zone 106 * will remain active. Since this can then cause failure of explicit 107 * open operation on other zones if the drive active zone resources 108 * are exceeded, make sure that the zone does not remain active by 109 * resetting it. 110 */ 111 if (op == REQ_OP_ZONE_CLOSE && !z->z_wpoffset) 112 op = REQ_OP_ZONE_RESET; 113 114 trace_zonefs_zone_mgmt(sb, z, op); 115 ret = blkdev_zone_mgmt(sb->s_bdev, op, z->z_sector, 116 z->z_size >> SECTOR_SHIFT, GFP_NOFS); 117 if (ret) { 118 zonefs_err(sb, 119 "Zone management operation %s at %llu failed %d\n", 120 blk_op_str(op), z->z_sector, ret); 121 return ret; 122 } 123 124 return 0; 125 } 126 127 int zonefs_inode_zone_mgmt(struct inode *inode, enum req_op op) 128 { 129 lockdep_assert_held(&ZONEFS_I(inode)->i_truncate_mutex); 130 131 return zonefs_zone_mgmt(inode->i_sb, zonefs_inode_zone(inode), op); 132 } 133 134 void zonefs_i_size_write(struct inode *inode, loff_t isize) 135 { 136 struct zonefs_zone *z = zonefs_inode_zone(inode); 137 138 i_size_write(inode, isize); 139 140 /* 141 * A full zone is no longer open/active and does not need 142 * explicit closing. 143 */ 144 if (isize >= z->z_capacity) { 145 struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); 146 147 if (z->z_flags & ZONEFS_ZONE_ACTIVE) 148 atomic_dec(&sbi->s_active_seq_files); 149 z->z_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); 150 } 151 } 152 153 void zonefs_update_stats(struct inode *inode, loff_t new_isize) 154 { 155 struct super_block *sb = inode->i_sb; 156 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 157 loff_t old_isize = i_size_read(inode); 158 loff_t nr_blocks; 159 160 if (new_isize == old_isize) 161 return; 162 163 spin_lock(&sbi->s_lock); 164 165 /* 166 * This may be called for an update after an IO error. 167 * So beware of the values seen. 168 */ 169 if (new_isize < old_isize) { 170 nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; 171 if (sbi->s_used_blocks > nr_blocks) 172 sbi->s_used_blocks -= nr_blocks; 173 else 174 sbi->s_used_blocks = 0; 175 } else { 176 sbi->s_used_blocks += 177 (new_isize - old_isize) >> sb->s_blocksize_bits; 178 if (sbi->s_used_blocks > sbi->s_blocks) 179 sbi->s_used_blocks = sbi->s_blocks; 180 } 181 182 spin_unlock(&sbi->s_lock); 183 } 184 185 /* 186 * Check a zone condition. Return the amount of written (and still readable) 187 * data in the zone. 188 */ 189 static loff_t zonefs_check_zone_condition(struct super_block *sb, 190 struct zonefs_zone *z, 191 struct blk_zone *zone) 192 { 193 switch (zone->cond) { 194 case BLK_ZONE_COND_OFFLINE: 195 zonefs_warn(sb, "Zone %llu: offline zone\n", 196 z->z_sector); 197 z->z_flags |= ZONEFS_ZONE_OFFLINE; 198 return 0; 199 case BLK_ZONE_COND_READONLY: 200 /* 201 * The write pointer of read-only zones is invalid, so we cannot 202 * determine the zone wpoffset (inode size). We thus keep the 203 * zone wpoffset as is, which leads to an empty file 204 * (wpoffset == 0) on mount. For a runtime error, this keeps 205 * the inode size as it was when last updated so that the user 206 * can recover data. 207 */ 208 zonefs_warn(sb, "Zone %llu: read-only zone\n", 209 z->z_sector); 210 z->z_flags |= ZONEFS_ZONE_READONLY; 211 if (zonefs_zone_is_cnv(z)) 212 return z->z_capacity; 213 return z->z_wpoffset; 214 case BLK_ZONE_COND_FULL: 215 /* The write pointer of full zones is invalid. */ 216 return z->z_capacity; 217 default: 218 if (zonefs_zone_is_cnv(z)) 219 return z->z_capacity; 220 return (zone->wp - zone->start) << SECTOR_SHIFT; 221 } 222 } 223 224 /* 225 * Check a zone condition and adjust its inode access permissions for 226 * offline and readonly zones. 227 */ 228 static void zonefs_inode_update_mode(struct inode *inode) 229 { 230 struct zonefs_zone *z = zonefs_inode_zone(inode); 231 232 if (z->z_flags & ZONEFS_ZONE_OFFLINE) { 233 /* Offline zones cannot be read nor written */ 234 inode->i_flags |= S_IMMUTABLE; 235 inode->i_mode &= ~0777; 236 } else if (z->z_flags & ZONEFS_ZONE_READONLY) { 237 /* Readonly zones cannot be written */ 238 inode->i_flags |= S_IMMUTABLE; 239 if (z->z_flags & ZONEFS_ZONE_INIT_MODE) 240 inode->i_mode &= ~0777; 241 else 242 inode->i_mode &= ~0222; 243 } 244 245 z->z_flags &= ~ZONEFS_ZONE_INIT_MODE; 246 z->z_mode = inode->i_mode; 247 } 248 249 static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, 250 void *data) 251 { 252 struct blk_zone *z = data; 253 254 *z = *zone; 255 return 0; 256 } 257 258 static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, 259 bool write) 260 { 261 struct zonefs_zone *z = zonefs_inode_zone(inode); 262 struct super_block *sb = inode->i_sb; 263 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 264 loff_t isize, data_size; 265 266 /* 267 * Check the zone condition: if the zone is not "bad" (offline or 268 * read-only), read errors are simply signaled to the IO issuer as long 269 * as there is no inconsistency between the inode size and the amount of 270 * data writen in the zone (data_size). 271 */ 272 data_size = zonefs_check_zone_condition(sb, z, zone); 273 isize = i_size_read(inode); 274 if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && 275 !write && isize == data_size) 276 return; 277 278 /* 279 * At this point, we detected either a bad zone or an inconsistency 280 * between the inode size and the amount of data written in the zone. 281 * For the latter case, the cause may be a write IO error or an external 282 * action on the device. Two error patterns exist: 283 * 1) The inode size is lower than the amount of data in the zone: 284 * a write operation partially failed and data was writen at the end 285 * of the file. This can happen in the case of a large direct IO 286 * needing several BIOs and/or write requests to be processed. 287 * 2) The inode size is larger than the amount of data in the zone: 288 * this can happen with a deferred write error with the use of the 289 * device side write cache after getting successful write IO 290 * completions. Other possibilities are (a) an external corruption, 291 * e.g. an application reset the zone directly, or (b) the device 292 * has a serious problem (e.g. firmware bug). 293 * 294 * In all cases, warn about inode size inconsistency and handle the 295 * IO error according to the zone condition and to the mount options. 296 */ 297 if (isize != data_size) 298 zonefs_warn(sb, 299 "inode %lu: invalid size %lld (should be %lld)\n", 300 inode->i_ino, isize, data_size); 301 302 /* 303 * First handle bad zones signaled by hardware. The mount options 304 * errors=zone-ro and errors=zone-offline result in changing the 305 * zone condition to read-only and offline respectively, as if the 306 * condition was signaled by the hardware. 307 */ 308 if ((z->z_flags & ZONEFS_ZONE_OFFLINE) || 309 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)) { 310 zonefs_warn(sb, "inode %lu: read/write access disabled\n", 311 inode->i_ino); 312 if (!(z->z_flags & ZONEFS_ZONE_OFFLINE)) 313 z->z_flags |= ZONEFS_ZONE_OFFLINE; 314 zonefs_inode_update_mode(inode); 315 data_size = 0; 316 } else if ((z->z_flags & ZONEFS_ZONE_READONLY) || 317 (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)) { 318 zonefs_warn(sb, "inode %lu: write access disabled\n", 319 inode->i_ino); 320 if (!(z->z_flags & ZONEFS_ZONE_READONLY)) 321 z->z_flags |= ZONEFS_ZONE_READONLY; 322 zonefs_inode_update_mode(inode); 323 data_size = isize; 324 } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && 325 data_size > isize) { 326 /* Do not expose garbage data */ 327 data_size = isize; 328 } 329 330 /* 331 * If the filesystem is mounted with the explicit-open mount option, we 332 * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to 333 * the read-only or offline condition, to avoid attempting an explicit 334 * close of the zone when the inode file is closed. 335 */ 336 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) && 337 (z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE))) 338 z->z_flags &= ~ZONEFS_ZONE_OPEN; 339 340 /* 341 * If error=remount-ro was specified, any error result in remounting 342 * the volume as read-only. 343 */ 344 if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { 345 zonefs_warn(sb, "remounting filesystem read-only\n"); 346 sb->s_flags |= SB_RDONLY; 347 } 348 349 /* 350 * Update block usage stats and the inode size to prevent access to 351 * invalid data. 352 */ 353 zonefs_update_stats(inode, data_size); 354 zonefs_i_size_write(inode, data_size); 355 z->z_wpoffset = data_size; 356 zonefs_inode_account_active(inode); 357 } 358 359 /* 360 * When an file IO error occurs, check the file zone to see if there is a change 361 * in the zone condition (e.g. offline or read-only). For a failed write to a 362 * sequential zone, the zone write pointer position must also be checked to 363 * eventually correct the file size and zonefs inode write pointer offset 364 * (which can be out of sync with the drive due to partial write failures). 365 */ 366 void __zonefs_io_error(struct inode *inode, bool write) 367 { 368 struct zonefs_zone *z = zonefs_inode_zone(inode); 369 struct super_block *sb = inode->i_sb; 370 unsigned int noio_flag; 371 struct blk_zone zone; 372 int ret; 373 374 /* 375 * Conventional zone have no write pointer and cannot become read-only 376 * or offline. So simply fake a report for a single or aggregated zone 377 * and let zonefs_handle_io_error() correct the zone inode information 378 * according to the mount options. 379 */ 380 if (!zonefs_zone_is_seq(z)) { 381 zone.start = z->z_sector; 382 zone.len = z->z_size >> SECTOR_SHIFT; 383 zone.wp = zone.start + zone.len; 384 zone.type = BLK_ZONE_TYPE_CONVENTIONAL; 385 zone.cond = BLK_ZONE_COND_NOT_WP; 386 zone.capacity = zone.len; 387 goto handle_io_error; 388 } 389 390 /* 391 * Memory allocations in blkdev_report_zones() can trigger a memory 392 * reclaim which may in turn cause a recursion into zonefs as well as 393 * struct request allocations for the same device. The former case may 394 * end up in a deadlock on the inode truncate mutex, while the latter 395 * may prevent IO forward progress. Executing the report zones under 396 * the GFP_NOIO context avoids both problems. 397 */ 398 noio_flag = memalloc_noio_save(); 399 ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, 400 zonefs_io_error_cb, &zone); 401 memalloc_noio_restore(noio_flag); 402 403 if (ret != 1) { 404 zonefs_err(sb, "Get inode %lu zone information failed %d\n", 405 inode->i_ino, ret); 406 zonefs_warn(sb, "remounting filesystem read-only\n"); 407 sb->s_flags |= SB_RDONLY; 408 return; 409 } 410 411 handle_io_error: 412 zonefs_handle_io_error(inode, &zone, write); 413 } 414 415 static struct kmem_cache *zonefs_inode_cachep; 416 417 static struct inode *zonefs_alloc_inode(struct super_block *sb) 418 { 419 struct zonefs_inode_info *zi; 420 421 zi = alloc_inode_sb(sb, zonefs_inode_cachep, GFP_KERNEL); 422 if (!zi) 423 return NULL; 424 425 inode_init_once(&zi->i_vnode); 426 mutex_init(&zi->i_truncate_mutex); 427 zi->i_wr_refcnt = 0; 428 429 return &zi->i_vnode; 430 } 431 432 static void zonefs_free_inode(struct inode *inode) 433 { 434 kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); 435 } 436 437 /* 438 * File system stat. 439 */ 440 static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) 441 { 442 struct super_block *sb = dentry->d_sb; 443 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 444 enum zonefs_ztype t; 445 446 buf->f_type = ZONEFS_MAGIC; 447 buf->f_bsize = sb->s_blocksize; 448 buf->f_namelen = ZONEFS_NAME_MAX; 449 450 spin_lock(&sbi->s_lock); 451 452 buf->f_blocks = sbi->s_blocks; 453 if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) 454 buf->f_bfree = 0; 455 else 456 buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; 457 buf->f_bavail = buf->f_bfree; 458 459 for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { 460 if (sbi->s_zgroup[t].g_nr_zones) 461 buf->f_files += sbi->s_zgroup[t].g_nr_zones + 1; 462 } 463 buf->f_ffree = 0; 464 465 spin_unlock(&sbi->s_lock); 466 467 buf->f_fsid = uuid_to_fsid(sbi->s_uuid.b); 468 469 return 0; 470 } 471 472 enum { 473 Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, 474 Opt_explicit_open, Opt_err, 475 }; 476 477 static const match_table_t tokens = { 478 { Opt_errors_ro, "errors=remount-ro"}, 479 { Opt_errors_zro, "errors=zone-ro"}, 480 { Opt_errors_zol, "errors=zone-offline"}, 481 { Opt_errors_repair, "errors=repair"}, 482 { Opt_explicit_open, "explicit-open" }, 483 { Opt_err, NULL} 484 }; 485 486 static int zonefs_parse_options(struct super_block *sb, char *options) 487 { 488 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 489 substring_t args[MAX_OPT_ARGS]; 490 char *p; 491 492 if (!options) 493 return 0; 494 495 while ((p = strsep(&options, ",")) != NULL) { 496 int token; 497 498 if (!*p) 499 continue; 500 501 token = match_token(p, tokens, args); 502 switch (token) { 503 case Opt_errors_ro: 504 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 505 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; 506 break; 507 case Opt_errors_zro: 508 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 509 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; 510 break; 511 case Opt_errors_zol: 512 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 513 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; 514 break; 515 case Opt_errors_repair: 516 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; 517 sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; 518 break; 519 case Opt_explicit_open: 520 sbi->s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN; 521 break; 522 default: 523 return -EINVAL; 524 } 525 } 526 527 return 0; 528 } 529 530 static int zonefs_show_options(struct seq_file *seq, struct dentry *root) 531 { 532 struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); 533 534 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) 535 seq_puts(seq, ",errors=remount-ro"); 536 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) 537 seq_puts(seq, ",errors=zone-ro"); 538 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) 539 seq_puts(seq, ",errors=zone-offline"); 540 if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) 541 seq_puts(seq, ",errors=repair"); 542 543 return 0; 544 } 545 546 static int zonefs_remount(struct super_block *sb, int *flags, char *data) 547 { 548 sync_filesystem(sb); 549 550 return zonefs_parse_options(sb, data); 551 } 552 553 static int zonefs_inode_setattr(struct mnt_idmap *idmap, 554 struct dentry *dentry, struct iattr *iattr) 555 { 556 struct inode *inode = d_inode(dentry); 557 int ret; 558 559 if (unlikely(IS_IMMUTABLE(inode))) 560 return -EPERM; 561 562 ret = setattr_prepare(&nop_mnt_idmap, dentry, iattr); 563 if (ret) 564 return ret; 565 566 /* 567 * Since files and directories cannot be created nor deleted, do not 568 * allow setting any write attributes on the sub-directories grouping 569 * files by zone type. 570 */ 571 if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && 572 (iattr->ia_mode & 0222)) 573 return -EPERM; 574 575 if (((iattr->ia_valid & ATTR_UID) && 576 !uid_eq(iattr->ia_uid, inode->i_uid)) || 577 ((iattr->ia_valid & ATTR_GID) && 578 !gid_eq(iattr->ia_gid, inode->i_gid))) { 579 ret = dquot_transfer(&nop_mnt_idmap, inode, iattr); 580 if (ret) 581 return ret; 582 } 583 584 if (iattr->ia_valid & ATTR_SIZE) { 585 ret = zonefs_file_truncate(inode, iattr->ia_size); 586 if (ret) 587 return ret; 588 } 589 590 setattr_copy(&nop_mnt_idmap, inode, iattr); 591 592 if (S_ISREG(inode->i_mode)) { 593 struct zonefs_zone *z = zonefs_inode_zone(inode); 594 595 z->z_mode = inode->i_mode; 596 z->z_uid = inode->i_uid; 597 z->z_gid = inode->i_gid; 598 } 599 600 return 0; 601 } 602 603 static const struct inode_operations zonefs_file_inode_operations = { 604 .setattr = zonefs_inode_setattr, 605 }; 606 607 static long zonefs_fname_to_fno(const struct qstr *fname) 608 { 609 const char *name = fname->name; 610 unsigned int len = fname->len; 611 long fno = 0, shift = 1; 612 const char *rname; 613 char c = *name; 614 unsigned int i; 615 616 /* 617 * File names are always a base-10 number string without any 618 * leading 0s. 619 */ 620 if (!isdigit(c)) 621 return -ENOENT; 622 623 if (len > 1 && c == '0') 624 return -ENOENT; 625 626 if (len == 1) 627 return c - '0'; 628 629 for (i = 0, rname = name + len - 1; i < len; i++, rname--) { 630 c = *rname; 631 if (!isdigit(c)) 632 return -ENOENT; 633 fno += (c - '0') * shift; 634 shift *= 10; 635 } 636 637 return fno; 638 } 639 640 static struct inode *zonefs_get_file_inode(struct inode *dir, 641 struct dentry *dentry) 642 { 643 struct zonefs_zone_group *zgroup = dir->i_private; 644 struct super_block *sb = dir->i_sb; 645 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 646 struct zonefs_zone *z; 647 struct inode *inode; 648 ino_t ino; 649 long fno; 650 651 /* Get the file number from the file name */ 652 fno = zonefs_fname_to_fno(&dentry->d_name); 653 if (fno < 0) 654 return ERR_PTR(fno); 655 656 if (!zgroup->g_nr_zones || fno >= zgroup->g_nr_zones) 657 return ERR_PTR(-ENOENT); 658 659 z = &zgroup->g_zones[fno]; 660 ino = z->z_sector >> sbi->s_zone_sectors_shift; 661 inode = iget_locked(sb, ino); 662 if (!inode) 663 return ERR_PTR(-ENOMEM); 664 if (!(inode->i_state & I_NEW)) { 665 WARN_ON_ONCE(inode->i_private != z); 666 return inode; 667 } 668 669 inode->i_ino = ino; 670 inode->i_mode = z->z_mode; 671 inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, 672 inode_get_ctime(dir)); 673 inode->i_uid = z->z_uid; 674 inode->i_gid = z->z_gid; 675 inode->i_size = z->z_wpoffset; 676 inode->i_blocks = z->z_capacity >> SECTOR_SHIFT; 677 inode->i_private = z; 678 679 inode->i_op = &zonefs_file_inode_operations; 680 inode->i_fop = &zonefs_file_operations; 681 inode->i_mapping->a_ops = &zonefs_file_aops; 682 683 /* Update the inode access rights depending on the zone condition */ 684 zonefs_inode_update_mode(inode); 685 686 unlock_new_inode(inode); 687 688 return inode; 689 } 690 691 static struct inode *zonefs_get_zgroup_inode(struct super_block *sb, 692 enum zonefs_ztype ztype) 693 { 694 struct inode *root = d_inode(sb->s_root); 695 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 696 struct inode *inode; 697 ino_t ino = bdev_nr_zones(sb->s_bdev) + ztype + 1; 698 699 inode = iget_locked(sb, ino); 700 if (!inode) 701 return ERR_PTR(-ENOMEM); 702 if (!(inode->i_state & I_NEW)) 703 return inode; 704 705 inode->i_ino = ino; 706 inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555); 707 inode->i_size = sbi->s_zgroup[ztype].g_nr_zones; 708 inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode, 709 inode_get_ctime(root)); 710 inode->i_private = &sbi->s_zgroup[ztype]; 711 set_nlink(inode, 2); 712 713 inode->i_op = &zonefs_dir_inode_operations; 714 inode->i_fop = &zonefs_dir_operations; 715 716 unlock_new_inode(inode); 717 718 return inode; 719 } 720 721 722 static struct inode *zonefs_get_dir_inode(struct inode *dir, 723 struct dentry *dentry) 724 { 725 struct super_block *sb = dir->i_sb; 726 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 727 const char *name = dentry->d_name.name; 728 enum zonefs_ztype ztype; 729 730 /* 731 * We only need to check for the "seq" directory and 732 * the "cnv" directory if we have conventional zones. 733 */ 734 if (dentry->d_name.len != 3) 735 return ERR_PTR(-ENOENT); 736 737 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 738 if (sbi->s_zgroup[ztype].g_nr_zones && 739 memcmp(name, zonefs_zgroup_name(ztype), 3) == 0) 740 break; 741 } 742 if (ztype == ZONEFS_ZTYPE_MAX) 743 return ERR_PTR(-ENOENT); 744 745 return zonefs_get_zgroup_inode(sb, ztype); 746 } 747 748 static struct dentry *zonefs_lookup(struct inode *dir, struct dentry *dentry, 749 unsigned int flags) 750 { 751 struct inode *inode; 752 753 if (dentry->d_name.len > ZONEFS_NAME_MAX) 754 return ERR_PTR(-ENAMETOOLONG); 755 756 if (dir == d_inode(dir->i_sb->s_root)) 757 inode = zonefs_get_dir_inode(dir, dentry); 758 else 759 inode = zonefs_get_file_inode(dir, dentry); 760 if (IS_ERR(inode)) 761 return ERR_CAST(inode); 762 763 return d_splice_alias(inode, dentry); 764 } 765 766 static int zonefs_readdir_root(struct file *file, struct dir_context *ctx) 767 { 768 struct inode *inode = file_inode(file); 769 struct super_block *sb = inode->i_sb; 770 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 771 enum zonefs_ztype ztype = ZONEFS_ZTYPE_CNV; 772 ino_t base_ino = bdev_nr_zones(sb->s_bdev) + 1; 773 774 if (ctx->pos >= inode->i_size) 775 return 0; 776 777 if (!dir_emit_dots(file, ctx)) 778 return 0; 779 780 if (ctx->pos == 2) { 781 if (!sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) 782 ztype = ZONEFS_ZTYPE_SEQ; 783 784 if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, 785 base_ino + ztype, DT_DIR)) 786 return 0; 787 ctx->pos++; 788 } 789 790 if (ctx->pos == 3 && ztype != ZONEFS_ZTYPE_SEQ) { 791 ztype = ZONEFS_ZTYPE_SEQ; 792 if (!dir_emit(ctx, zonefs_zgroup_name(ztype), 3, 793 base_ino + ztype, DT_DIR)) 794 return 0; 795 ctx->pos++; 796 } 797 798 return 0; 799 } 800 801 static int zonefs_readdir_zgroup(struct file *file, 802 struct dir_context *ctx) 803 { 804 struct inode *inode = file_inode(file); 805 struct zonefs_zone_group *zgroup = inode->i_private; 806 struct super_block *sb = inode->i_sb; 807 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 808 struct zonefs_zone *z; 809 int fname_len; 810 char *fname; 811 ino_t ino; 812 int f; 813 814 /* 815 * The size of zone group directories is equal to the number 816 * of zone files in the group and does note include the "." and 817 * ".." entries. Hence the "+ 2" here. 818 */ 819 if (ctx->pos >= inode->i_size + 2) 820 return 0; 821 822 if (!dir_emit_dots(file, ctx)) 823 return 0; 824 825 fname = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); 826 if (!fname) 827 return -ENOMEM; 828 829 for (f = ctx->pos - 2; f < zgroup->g_nr_zones; f++) { 830 z = &zgroup->g_zones[f]; 831 ino = z->z_sector >> sbi->s_zone_sectors_shift; 832 fname_len = snprintf(fname, ZONEFS_NAME_MAX - 1, "%u", f); 833 if (!dir_emit(ctx, fname, fname_len, ino, DT_REG)) 834 break; 835 ctx->pos++; 836 } 837 838 kfree(fname); 839 840 return 0; 841 } 842 843 static int zonefs_readdir(struct file *file, struct dir_context *ctx) 844 { 845 struct inode *inode = file_inode(file); 846 847 if (inode == d_inode(inode->i_sb->s_root)) 848 return zonefs_readdir_root(file, ctx); 849 850 return zonefs_readdir_zgroup(file, ctx); 851 } 852 853 const struct inode_operations zonefs_dir_inode_operations = { 854 .lookup = zonefs_lookup, 855 .setattr = zonefs_inode_setattr, 856 }; 857 858 const struct file_operations zonefs_dir_operations = { 859 .llseek = generic_file_llseek, 860 .read = generic_read_dir, 861 .iterate_shared = zonefs_readdir, 862 }; 863 864 struct zonefs_zone_data { 865 struct super_block *sb; 866 unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; 867 sector_t cnv_zone_start; 868 struct blk_zone *zones; 869 }; 870 871 static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, 872 void *data) 873 { 874 struct zonefs_zone_data *zd = data; 875 struct super_block *sb = zd->sb; 876 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 877 878 /* 879 * We do not care about the first zone: it contains the super block 880 * and not exposed as a file. 881 */ 882 if (!idx) 883 return 0; 884 885 /* 886 * Count the number of zones that will be exposed as files. 887 * For sequential zones, we always have as many files as zones. 888 * FOr conventional zones, the number of files depends on if we have 889 * conventional zones aggregation enabled. 890 */ 891 switch (zone->type) { 892 case BLK_ZONE_TYPE_CONVENTIONAL: 893 if (sbi->s_features & ZONEFS_F_AGGRCNV) { 894 /* One file per set of contiguous conventional zones */ 895 if (!(sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones) || 896 zone->start != zd->cnv_zone_start) 897 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 898 zd->cnv_zone_start = zone->start + zone->len; 899 } else { 900 /* One file per zone */ 901 sbi->s_zgroup[ZONEFS_ZTYPE_CNV].g_nr_zones++; 902 } 903 break; 904 case BLK_ZONE_TYPE_SEQWRITE_REQ: 905 case BLK_ZONE_TYPE_SEQWRITE_PREF: 906 sbi->s_zgroup[ZONEFS_ZTYPE_SEQ].g_nr_zones++; 907 break; 908 default: 909 zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", 910 zone->type); 911 return -EIO; 912 } 913 914 memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); 915 916 return 0; 917 } 918 919 static int zonefs_get_zone_info(struct zonefs_zone_data *zd) 920 { 921 struct block_device *bdev = zd->sb->s_bdev; 922 int ret; 923 924 zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone), 925 GFP_KERNEL); 926 if (!zd->zones) 927 return -ENOMEM; 928 929 /* Get zones information from the device */ 930 ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, 931 zonefs_get_zone_info_cb, zd); 932 if (ret < 0) { 933 zonefs_err(zd->sb, "Zone report failed %d\n", ret); 934 return ret; 935 } 936 937 if (ret != bdev_nr_zones(bdev)) { 938 zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", 939 ret, bdev_nr_zones(bdev)); 940 return -EIO; 941 } 942 943 return 0; 944 } 945 946 static inline void zonefs_free_zone_info(struct zonefs_zone_data *zd) 947 { 948 kvfree(zd->zones); 949 } 950 951 /* 952 * Create a zone group and populate it with zone files. 953 */ 954 static int zonefs_init_zgroup(struct super_block *sb, 955 struct zonefs_zone_data *zd, 956 enum zonefs_ztype ztype) 957 { 958 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 959 struct zonefs_zone_group *zgroup = &sbi->s_zgroup[ztype]; 960 struct blk_zone *zone, *next, *end; 961 struct zonefs_zone *z; 962 unsigned int n = 0; 963 int ret; 964 965 /* Allocate the zone group. If it is empty, we have nothing to do. */ 966 if (!zgroup->g_nr_zones) 967 return 0; 968 969 zgroup->g_zones = kvcalloc(zgroup->g_nr_zones, 970 sizeof(struct zonefs_zone), GFP_KERNEL); 971 if (!zgroup->g_zones) 972 return -ENOMEM; 973 974 /* 975 * Initialize the zone groups using the device zone information. 976 * We always skip the first zone as it contains the super block 977 * and is not use to back a file. 978 */ 979 end = zd->zones + bdev_nr_zones(sb->s_bdev); 980 for (zone = &zd->zones[1]; zone < end; zone = next) { 981 982 next = zone + 1; 983 if (zonefs_zone_type(zone) != ztype) 984 continue; 985 986 if (WARN_ON_ONCE(n >= zgroup->g_nr_zones)) 987 return -EINVAL; 988 989 /* 990 * For conventional zones, contiguous zones can be aggregated 991 * together to form larger files. Note that this overwrites the 992 * length of the first zone of the set of contiguous zones 993 * aggregated together. If one offline or read-only zone is 994 * found, assume that all zones aggregated have the same 995 * condition. 996 */ 997 if (ztype == ZONEFS_ZTYPE_CNV && 998 (sbi->s_features & ZONEFS_F_AGGRCNV)) { 999 for (; next < end; next++) { 1000 if (zonefs_zone_type(next) != ztype) 1001 break; 1002 zone->len += next->len; 1003 zone->capacity += next->capacity; 1004 if (next->cond == BLK_ZONE_COND_READONLY && 1005 zone->cond != BLK_ZONE_COND_OFFLINE) 1006 zone->cond = BLK_ZONE_COND_READONLY; 1007 else if (next->cond == BLK_ZONE_COND_OFFLINE) 1008 zone->cond = BLK_ZONE_COND_OFFLINE; 1009 } 1010 } 1011 1012 z = &zgroup->g_zones[n]; 1013 if (ztype == ZONEFS_ZTYPE_CNV) 1014 z->z_flags |= ZONEFS_ZONE_CNV; 1015 z->z_sector = zone->start; 1016 z->z_size = zone->len << SECTOR_SHIFT; 1017 if (z->z_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && 1018 !(sbi->s_features & ZONEFS_F_AGGRCNV)) { 1019 zonefs_err(sb, 1020 "Invalid zone size %llu (device zone sectors %llu)\n", 1021 z->z_size, 1022 bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); 1023 return -EINVAL; 1024 } 1025 1026 z->z_capacity = min_t(loff_t, MAX_LFS_FILESIZE, 1027 zone->capacity << SECTOR_SHIFT); 1028 z->z_wpoffset = zonefs_check_zone_condition(sb, z, zone); 1029 1030 z->z_mode = S_IFREG | sbi->s_perm; 1031 z->z_uid = sbi->s_uid; 1032 z->z_gid = sbi->s_gid; 1033 1034 /* 1035 * Let zonefs_inode_update_mode() know that we will need 1036 * special initialization of the inode mode the first time 1037 * it is accessed. 1038 */ 1039 z->z_flags |= ZONEFS_ZONE_INIT_MODE; 1040 1041 sb->s_maxbytes = max(z->z_capacity, sb->s_maxbytes); 1042 sbi->s_blocks += z->z_capacity >> sb->s_blocksize_bits; 1043 sbi->s_used_blocks += z->z_wpoffset >> sb->s_blocksize_bits; 1044 1045 /* 1046 * For sequential zones, make sure that any open zone is closed 1047 * first to ensure that the initial number of open zones is 0, 1048 * in sync with the open zone accounting done when the mount 1049 * option ZONEFS_MNTOPT_EXPLICIT_OPEN is used. 1050 */ 1051 if (ztype == ZONEFS_ZTYPE_SEQ && 1052 (zone->cond == BLK_ZONE_COND_IMP_OPEN || 1053 zone->cond == BLK_ZONE_COND_EXP_OPEN)) { 1054 ret = zonefs_zone_mgmt(sb, z, REQ_OP_ZONE_CLOSE); 1055 if (ret) 1056 return ret; 1057 } 1058 1059 zonefs_account_active(sb, z); 1060 1061 n++; 1062 } 1063 1064 if (WARN_ON_ONCE(n != zgroup->g_nr_zones)) 1065 return -EINVAL; 1066 1067 zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", 1068 zonefs_zgroup_name(ztype), 1069 zgroup->g_nr_zones, 1070 zgroup->g_nr_zones > 1 ? "s" : ""); 1071 1072 return 0; 1073 } 1074 1075 static void zonefs_free_zgroups(struct super_block *sb) 1076 { 1077 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1078 enum zonefs_ztype ztype; 1079 1080 if (!sbi) 1081 return; 1082 1083 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1084 kvfree(sbi->s_zgroup[ztype].g_zones); 1085 sbi->s_zgroup[ztype].g_zones = NULL; 1086 } 1087 } 1088 1089 /* 1090 * Create a zone group and populate it with zone files. 1091 */ 1092 static int zonefs_init_zgroups(struct super_block *sb) 1093 { 1094 struct zonefs_zone_data zd; 1095 enum zonefs_ztype ztype; 1096 int ret; 1097 1098 /* First get the device zone information */ 1099 memset(&zd, 0, sizeof(struct zonefs_zone_data)); 1100 zd.sb = sb; 1101 ret = zonefs_get_zone_info(&zd); 1102 if (ret) 1103 goto cleanup; 1104 1105 /* Allocate and initialize the zone groups */ 1106 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1107 ret = zonefs_init_zgroup(sb, &zd, ztype); 1108 if (ret) { 1109 zonefs_info(sb, 1110 "Zone group \"%s\" initialization failed\n", 1111 zonefs_zgroup_name(ztype)); 1112 break; 1113 } 1114 } 1115 1116 cleanup: 1117 zonefs_free_zone_info(&zd); 1118 if (ret) 1119 zonefs_free_zgroups(sb); 1120 1121 return ret; 1122 } 1123 1124 /* 1125 * Read super block information from the device. 1126 */ 1127 static int zonefs_read_super(struct super_block *sb) 1128 { 1129 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1130 struct zonefs_super *super; 1131 u32 crc, stored_crc; 1132 struct page *page; 1133 struct bio_vec bio_vec; 1134 struct bio bio; 1135 int ret; 1136 1137 page = alloc_page(GFP_KERNEL); 1138 if (!page) 1139 return -ENOMEM; 1140 1141 bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); 1142 bio.bi_iter.bi_sector = 0; 1143 __bio_add_page(&bio, page, PAGE_SIZE, 0); 1144 1145 ret = submit_bio_wait(&bio); 1146 if (ret) 1147 goto free_page; 1148 1149 super = page_address(page); 1150 1151 ret = -EINVAL; 1152 if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) 1153 goto free_page; 1154 1155 stored_crc = le32_to_cpu(super->s_crc); 1156 super->s_crc = 0; 1157 crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); 1158 if (crc != stored_crc) { 1159 zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", 1160 crc, stored_crc); 1161 goto free_page; 1162 } 1163 1164 sbi->s_features = le64_to_cpu(super->s_features); 1165 if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { 1166 zonefs_err(sb, "Unknown features set 0x%llx\n", 1167 sbi->s_features); 1168 goto free_page; 1169 } 1170 1171 if (sbi->s_features & ZONEFS_F_UID) { 1172 sbi->s_uid = make_kuid(current_user_ns(), 1173 le32_to_cpu(super->s_uid)); 1174 if (!uid_valid(sbi->s_uid)) { 1175 zonefs_err(sb, "Invalid UID feature\n"); 1176 goto free_page; 1177 } 1178 } 1179 1180 if (sbi->s_features & ZONEFS_F_GID) { 1181 sbi->s_gid = make_kgid(current_user_ns(), 1182 le32_to_cpu(super->s_gid)); 1183 if (!gid_valid(sbi->s_gid)) { 1184 zonefs_err(sb, "Invalid GID feature\n"); 1185 goto free_page; 1186 } 1187 } 1188 1189 if (sbi->s_features & ZONEFS_F_PERM) 1190 sbi->s_perm = le32_to_cpu(super->s_perm); 1191 1192 if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { 1193 zonefs_err(sb, "Reserved area is being used\n"); 1194 goto free_page; 1195 } 1196 1197 import_uuid(&sbi->s_uuid, super->s_uuid); 1198 ret = 0; 1199 1200 free_page: 1201 __free_page(page); 1202 1203 return ret; 1204 } 1205 1206 static const struct super_operations zonefs_sops = { 1207 .alloc_inode = zonefs_alloc_inode, 1208 .free_inode = zonefs_free_inode, 1209 .statfs = zonefs_statfs, 1210 .remount_fs = zonefs_remount, 1211 .show_options = zonefs_show_options, 1212 }; 1213 1214 static int zonefs_get_zgroup_inodes(struct super_block *sb) 1215 { 1216 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1217 struct inode *dir_inode; 1218 enum zonefs_ztype ztype; 1219 1220 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1221 if (!sbi->s_zgroup[ztype].g_nr_zones) 1222 continue; 1223 1224 dir_inode = zonefs_get_zgroup_inode(sb, ztype); 1225 if (IS_ERR(dir_inode)) 1226 return PTR_ERR(dir_inode); 1227 1228 sbi->s_zgroup[ztype].g_inode = dir_inode; 1229 } 1230 1231 return 0; 1232 } 1233 1234 static void zonefs_release_zgroup_inodes(struct super_block *sb) 1235 { 1236 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1237 enum zonefs_ztype ztype; 1238 1239 if (!sbi) 1240 return; 1241 1242 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1243 if (sbi->s_zgroup[ztype].g_inode) { 1244 iput(sbi->s_zgroup[ztype].g_inode); 1245 sbi->s_zgroup[ztype].g_inode = NULL; 1246 } 1247 } 1248 } 1249 1250 /* 1251 * Check that the device is zoned. If it is, get the list of zones and create 1252 * sub-directories and files according to the device zone configuration and 1253 * format options. 1254 */ 1255 static int zonefs_fill_super(struct super_block *sb, void *data, int silent) 1256 { 1257 struct zonefs_sb_info *sbi; 1258 struct inode *inode; 1259 enum zonefs_ztype ztype; 1260 int ret; 1261 1262 if (!bdev_is_zoned(sb->s_bdev)) { 1263 zonefs_err(sb, "Not a zoned block device\n"); 1264 return -EINVAL; 1265 } 1266 1267 /* 1268 * Initialize super block information: the maximum file size is updated 1269 * when the zone files are created so that the format option 1270 * ZONEFS_F_AGGRCNV which increases the maximum file size of a file 1271 * beyond the zone size is taken into account. 1272 */ 1273 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 1274 if (!sbi) 1275 return -ENOMEM; 1276 1277 spin_lock_init(&sbi->s_lock); 1278 sb->s_fs_info = sbi; 1279 sb->s_magic = ZONEFS_MAGIC; 1280 sb->s_maxbytes = 0; 1281 sb->s_op = &zonefs_sops; 1282 sb->s_time_gran = 1; 1283 1284 /* 1285 * The block size is set to the device zone write granularity to ensure 1286 * that write operations are always aligned according to the device 1287 * interface constraints. 1288 */ 1289 sb_set_blocksize(sb, bdev_zone_write_granularity(sb->s_bdev)); 1290 sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); 1291 sbi->s_uid = GLOBAL_ROOT_UID; 1292 sbi->s_gid = GLOBAL_ROOT_GID; 1293 sbi->s_perm = 0640; 1294 sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; 1295 1296 atomic_set(&sbi->s_wro_seq_files, 0); 1297 sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev); 1298 atomic_set(&sbi->s_active_seq_files, 0); 1299 sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev); 1300 1301 ret = zonefs_read_super(sb); 1302 if (ret) 1303 return ret; 1304 1305 ret = zonefs_parse_options(sb, data); 1306 if (ret) 1307 return ret; 1308 1309 zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); 1310 1311 if (!sbi->s_max_wro_seq_files && 1312 !sbi->s_max_active_seq_files && 1313 sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { 1314 zonefs_info(sb, 1315 "No open and active zone limits. Ignoring explicit_open mount option\n"); 1316 sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; 1317 } 1318 1319 /* Initialize the zone groups */ 1320 ret = zonefs_init_zgroups(sb); 1321 if (ret) 1322 goto cleanup; 1323 1324 /* Create the root directory inode */ 1325 ret = -ENOMEM; 1326 inode = new_inode(sb); 1327 if (!inode) 1328 goto cleanup; 1329 1330 inode->i_ino = bdev_nr_zones(sb->s_bdev); 1331 inode->i_mode = S_IFDIR | 0555; 1332 inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); 1333 inode->i_op = &zonefs_dir_inode_operations; 1334 inode->i_fop = &zonefs_dir_operations; 1335 inode->i_size = 2; 1336 set_nlink(inode, 2); 1337 for (ztype = 0; ztype < ZONEFS_ZTYPE_MAX; ztype++) { 1338 if (sbi->s_zgroup[ztype].g_nr_zones) { 1339 inc_nlink(inode); 1340 inode->i_size++; 1341 } 1342 } 1343 1344 sb->s_root = d_make_root(inode); 1345 if (!sb->s_root) 1346 goto cleanup; 1347 1348 /* 1349 * Take a reference on the zone groups directory inodes 1350 * to keep them in the inode cache. 1351 */ 1352 ret = zonefs_get_zgroup_inodes(sb); 1353 if (ret) 1354 goto cleanup; 1355 1356 ret = zonefs_sysfs_register(sb); 1357 if (ret) 1358 goto cleanup; 1359 1360 return 0; 1361 1362 cleanup: 1363 zonefs_release_zgroup_inodes(sb); 1364 zonefs_free_zgroups(sb); 1365 1366 return ret; 1367 } 1368 1369 static struct dentry *zonefs_mount(struct file_system_type *fs_type, 1370 int flags, const char *dev_name, void *data) 1371 { 1372 return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); 1373 } 1374 1375 static void zonefs_kill_super(struct super_block *sb) 1376 { 1377 struct zonefs_sb_info *sbi = ZONEFS_SB(sb); 1378 1379 /* Release the reference on the zone group directory inodes */ 1380 zonefs_release_zgroup_inodes(sb); 1381 1382 kill_block_super(sb); 1383 1384 zonefs_sysfs_unregister(sb); 1385 zonefs_free_zgroups(sb); 1386 kfree(sbi); 1387 } 1388 1389 /* 1390 * File system definition and registration. 1391 */ 1392 static struct file_system_type zonefs_type = { 1393 .owner = THIS_MODULE, 1394 .name = "zonefs", 1395 .mount = zonefs_mount, 1396 .kill_sb = zonefs_kill_super, 1397 .fs_flags = FS_REQUIRES_DEV, 1398 }; 1399 1400 static int __init zonefs_init_inodecache(void) 1401 { 1402 zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", 1403 sizeof(struct zonefs_inode_info), 0, 1404 (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), 1405 NULL); 1406 if (zonefs_inode_cachep == NULL) 1407 return -ENOMEM; 1408 return 0; 1409 } 1410 1411 static void zonefs_destroy_inodecache(void) 1412 { 1413 /* 1414 * Make sure all delayed rcu free inodes are flushed before we 1415 * destroy the inode cache. 1416 */ 1417 rcu_barrier(); 1418 kmem_cache_destroy(zonefs_inode_cachep); 1419 } 1420 1421 static int __init zonefs_init(void) 1422 { 1423 int ret; 1424 1425 BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); 1426 1427 ret = zonefs_init_inodecache(); 1428 if (ret) 1429 return ret; 1430 1431 ret = zonefs_sysfs_init(); 1432 if (ret) 1433 goto destroy_inodecache; 1434 1435 ret = register_filesystem(&zonefs_type); 1436 if (ret) 1437 goto sysfs_exit; 1438 1439 return 0; 1440 1441 sysfs_exit: 1442 zonefs_sysfs_exit(); 1443 destroy_inodecache: 1444 zonefs_destroy_inodecache(); 1445 1446 return ret; 1447 } 1448 1449 static void __exit zonefs_exit(void) 1450 { 1451 unregister_filesystem(&zonefs_type); 1452 zonefs_sysfs_exit(); 1453 zonefs_destroy_inodecache(); 1454 } 1455 1456 MODULE_AUTHOR("Damien Le Moal"); 1457 MODULE_DESCRIPTION("Zone file system for zoned block devices"); 1458 MODULE_LICENSE("GPL"); 1459 MODULE_ALIAS_FS("zonefs"); 1460 module_init(zonefs_init); 1461 module_exit(zonefs_exit); 1462