1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/spinlock.h> 11 #include <linux/completion.h> 12 #include <linux/buffer_head.h> 13 #include <linux/gfs2_ondisk.h> 14 #include <linux/bio.h> 15 #include <linux/posix_acl.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "bmap.h" 20 #include "glock.h" 21 #include "glops.h" 22 #include "inode.h" 23 #include "log.h" 24 #include "meta_io.h" 25 #include "recovery.h" 26 #include "rgrp.h" 27 #include "util.h" 28 #include "trans.h" 29 #include "dir.h" 30 31 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) 32 { 33 fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", 34 bh, (unsigned long long)bh->b_blocknr, bh->b_state, 35 bh->b_page->mapping, bh->b_page->flags); 36 fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", 37 gl->gl_name.ln_type, gl->gl_name.ln_number, 38 gfs2_glock2aspace(gl)); 39 gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); 40 } 41 42 /** 43 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL 44 * @gl: the glock 45 * @fsync: set when called from fsync (not all buffers will be clean) 46 * 47 * None of the buffers should be dirty, locked, or pinned. 48 */ 49 50 static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, 51 unsigned int nr_revokes) 52 { 53 struct gfs2_sbd *sdp = gl->gl_sbd; 54 struct list_head *head = &gl->gl_ail_list; 55 struct gfs2_bufdata *bd, *tmp; 56 struct buffer_head *bh; 57 const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); 58 59 gfs2_log_lock(sdp); 60 spin_lock(&sdp->sd_ail_lock); 61 list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) { 62 if (nr_revokes == 0) 63 break; 64 bh = bd->bd_bh; 65 if (bh->b_state & b_state) { 66 if (fsync) 67 continue; 68 gfs2_ail_error(gl, bh); 69 } 70 gfs2_trans_add_revoke(sdp, bd); 71 nr_revokes--; 72 } 73 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 74 spin_unlock(&sdp->sd_ail_lock); 75 gfs2_log_unlock(sdp); 76 } 77 78 79 static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 80 { 81 struct gfs2_sbd *sdp = gl->gl_sbd; 82 struct gfs2_trans tr; 83 84 memset(&tr, 0, sizeof(tr)); 85 tr.tr_revokes = atomic_read(&gl->gl_ail_count); 86 87 if (!tr.tr_revokes) 88 return; 89 90 /* A shortened, inline version of gfs2_trans_begin() */ 91 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 92 tr.tr_ip = (unsigned long)__builtin_return_address(0); 93 sb_start_intwrite(sdp->sd_vfs); 94 gfs2_log_reserve(sdp, tr.tr_reserved); 95 WARN_ON_ONCE(current->journal_info); 96 current->journal_info = &tr; 97 98 __gfs2_ail_flush(gl, 0, tr.tr_revokes); 99 100 gfs2_trans_end(sdp); 101 gfs2_log_flush(sdp, NULL); 102 } 103 104 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 105 { 106 struct gfs2_sbd *sdp = gl->gl_sbd; 107 unsigned int revokes = atomic_read(&gl->gl_ail_count); 108 unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); 109 int ret; 110 111 if (!revokes) 112 return; 113 114 while (revokes > max_revokes) 115 max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); 116 117 ret = gfs2_trans_begin(sdp, 0, max_revokes); 118 if (ret) 119 return; 120 __gfs2_ail_flush(gl, fsync, max_revokes); 121 gfs2_trans_end(sdp); 122 gfs2_log_flush(sdp, NULL); 123 } 124 125 /** 126 * rgrp_go_sync - sync out the metadata for this glock 127 * @gl: the glock 128 * 129 * Called when demoting or unlocking an EX glock. We must flush 130 * to disk all dirty buffers/pages relating to this glock, and must not 131 * not return to caller to demote/unlock the glock until I/O is complete. 132 */ 133 134 static void rgrp_go_sync(struct gfs2_glock *gl) 135 { 136 struct gfs2_sbd *sdp = gl->gl_sbd; 137 struct address_space *mapping = &sdp->sd_aspace; 138 struct gfs2_rgrpd *rgd; 139 int error; 140 141 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 142 return; 143 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 144 145 gfs2_log_flush(sdp, gl); 146 filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 147 error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 148 mapping_set_error(mapping, error); 149 gfs2_ail_empty_gl(gl); 150 151 spin_lock(&gl->gl_spin); 152 rgd = gl->gl_object; 153 if (rgd) 154 gfs2_free_clones(rgd); 155 spin_unlock(&gl->gl_spin); 156 } 157 158 /** 159 * rgrp_go_inval - invalidate the metadata for this glock 160 * @gl: the glock 161 * @flags: 162 * 163 * We never used LM_ST_DEFERRED with resource groups, so that we 164 * should always see the metadata flag set here. 165 * 166 */ 167 168 static void rgrp_go_inval(struct gfs2_glock *gl, int flags) 169 { 170 struct gfs2_sbd *sdp = gl->gl_sbd; 171 struct address_space *mapping = &sdp->sd_aspace; 172 173 WARN_ON_ONCE(!(flags & DIO_METADATA)); 174 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 175 truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 176 177 if (gl->gl_object) { 178 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; 179 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 180 } 181 } 182 183 /** 184 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock 185 * @gl: the glock protecting the inode 186 * 187 */ 188 189 static void inode_go_sync(struct gfs2_glock *gl) 190 { 191 struct gfs2_inode *ip = gl->gl_object; 192 struct address_space *metamapping = gfs2_glock2aspace(gl); 193 int error; 194 195 if (ip && !S_ISREG(ip->i_inode.i_mode)) 196 ip = NULL; 197 if (ip) { 198 if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 199 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 200 inode_dio_wait(&ip->i_inode); 201 } 202 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 203 return; 204 205 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 206 207 gfs2_log_flush(gl->gl_sbd, gl); 208 filemap_fdatawrite(metamapping); 209 if (ip) { 210 struct address_space *mapping = ip->i_inode.i_mapping; 211 filemap_fdatawrite(mapping); 212 error = filemap_fdatawait(mapping); 213 mapping_set_error(mapping, error); 214 } 215 error = filemap_fdatawait(metamapping); 216 mapping_set_error(metamapping, error); 217 gfs2_ail_empty_gl(gl); 218 /* 219 * Writeback of the data mapping may cause the dirty flag to be set 220 * so we have to clear it again here. 221 */ 222 smp_mb__before_clear_bit(); 223 clear_bit(GLF_DIRTY, &gl->gl_flags); 224 } 225 226 /** 227 * inode_go_inval - prepare a inode glock to be released 228 * @gl: the glock 229 * @flags: 230 * 231 * Normally we invlidate everything, but if we are moving into 232 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we 233 * can keep hold of the metadata, since it won't have changed. 234 * 235 */ 236 237 static void inode_go_inval(struct gfs2_glock *gl, int flags) 238 { 239 struct gfs2_inode *ip = gl->gl_object; 240 241 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 242 243 if (flags & DIO_METADATA) { 244 struct address_space *mapping = gfs2_glock2aspace(gl); 245 truncate_inode_pages(mapping, 0); 246 if (ip) { 247 set_bit(GIF_INVALID, &ip->i_flags); 248 forget_all_cached_acls(&ip->i_inode); 249 gfs2_dir_hash_inval(ip); 250 } 251 } 252 253 if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { 254 gfs2_log_flush(gl->gl_sbd, NULL); 255 gl->gl_sbd->sd_rindex_uptodate = 0; 256 } 257 if (ip && S_ISREG(ip->i_inode.i_mode)) 258 truncate_inode_pages(ip->i_inode.i_mapping, 0); 259 } 260 261 /** 262 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock 263 * @gl: the glock 264 * 265 * Returns: 1 if it's ok 266 */ 267 268 static int inode_go_demote_ok(const struct gfs2_glock *gl) 269 { 270 struct gfs2_sbd *sdp = gl->gl_sbd; 271 struct gfs2_holder *gh; 272 273 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 274 return 0; 275 276 if (!list_empty(&gl->gl_holders)) { 277 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 278 if (gh->gh_list.next != &gl->gl_holders) 279 return 0; 280 } 281 282 return 1; 283 } 284 285 /** 286 * gfs2_set_nlink - Set the inode's link count based on on-disk info 287 * @inode: The inode in question 288 * @nlink: The link count 289 * 290 * If the link count has hit zero, it must never be raised, whatever the 291 * on-disk inode might say. When new struct inodes are created the link 292 * count is set to 1, so that we can safely use this test even when reading 293 * in on disk information for the first time. 294 */ 295 296 static void gfs2_set_nlink(struct inode *inode, u32 nlink) 297 { 298 /* 299 * We will need to review setting the nlink count here in the 300 * light of the forthcoming ro bind mount work. This is a reminder 301 * to do that. 302 */ 303 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { 304 if (nlink == 0) 305 clear_nlink(inode); 306 else 307 set_nlink(inode, nlink); 308 } 309 } 310 311 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 312 { 313 const struct gfs2_dinode *str = buf; 314 struct timespec atime; 315 u16 height, depth; 316 317 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 318 goto corrupt; 319 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 320 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 321 ip->i_inode.i_rdev = 0; 322 switch (ip->i_inode.i_mode & S_IFMT) { 323 case S_IFBLK: 324 case S_IFCHR: 325 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 326 be32_to_cpu(str->di_minor)); 327 break; 328 }; 329 330 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); 331 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); 332 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 333 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 334 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 335 atime.tv_sec = be64_to_cpu(str->di_atime); 336 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 337 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 338 ip->i_inode.i_atime = atime; 339 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 340 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 341 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 342 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 343 344 ip->i_goal = be64_to_cpu(str->di_goal_meta); 345 ip->i_generation = be64_to_cpu(str->di_generation); 346 347 ip->i_diskflags = be32_to_cpu(str->di_flags); 348 ip->i_eattr = be64_to_cpu(str->di_eattr); 349 /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ 350 gfs2_set_inode_flags(&ip->i_inode); 351 height = be16_to_cpu(str->di_height); 352 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 353 goto corrupt; 354 ip->i_height = (u8)height; 355 356 depth = be16_to_cpu(str->di_depth); 357 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 358 goto corrupt; 359 ip->i_depth = (u8)depth; 360 ip->i_entries = be32_to_cpu(str->di_entries); 361 362 if (S_ISREG(ip->i_inode.i_mode)) 363 gfs2_set_aops(&ip->i_inode); 364 365 return 0; 366 corrupt: 367 gfs2_consist_inode(ip); 368 return -EIO; 369 } 370 371 /** 372 * gfs2_inode_refresh - Refresh the incore copy of the dinode 373 * @ip: The GFS2 inode 374 * 375 * Returns: errno 376 */ 377 378 int gfs2_inode_refresh(struct gfs2_inode *ip) 379 { 380 struct buffer_head *dibh; 381 int error; 382 383 error = gfs2_meta_inode_buffer(ip, &dibh); 384 if (error) 385 return error; 386 387 error = gfs2_dinode_in(ip, dibh->b_data); 388 brelse(dibh); 389 clear_bit(GIF_INVALID, &ip->i_flags); 390 391 return error; 392 } 393 394 /** 395 * inode_go_lock - operation done after an inode lock is locked by a process 396 * @gl: the glock 397 * @flags: 398 * 399 * Returns: errno 400 */ 401 402 static int inode_go_lock(struct gfs2_holder *gh) 403 { 404 struct gfs2_glock *gl = gh->gh_gl; 405 struct gfs2_sbd *sdp = gl->gl_sbd; 406 struct gfs2_inode *ip = gl->gl_object; 407 int error = 0; 408 409 if (!ip || (gh->gh_flags & GL_SKIP)) 410 return 0; 411 412 if (test_bit(GIF_INVALID, &ip->i_flags)) { 413 error = gfs2_inode_refresh(ip); 414 if (error) 415 return error; 416 } 417 418 if (gh->gh_state != LM_ST_DEFERRED) 419 inode_dio_wait(&ip->i_inode); 420 421 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && 422 (gl->gl_state == LM_ST_EXCLUSIVE) && 423 (gh->gh_state == LM_ST_EXCLUSIVE)) { 424 spin_lock(&sdp->sd_trunc_lock); 425 if (list_empty(&ip->i_trunc_list)) 426 list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); 427 spin_unlock(&sdp->sd_trunc_lock); 428 wake_up(&sdp->sd_quota_wait); 429 return 1; 430 } 431 432 return error; 433 } 434 435 /** 436 * inode_go_dump - print information about an inode 437 * @seq: The iterator 438 * @ip: the inode 439 * 440 */ 441 442 static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) 443 { 444 const struct gfs2_inode *ip = gl->gl_object; 445 if (ip == NULL) 446 return; 447 gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", 448 (unsigned long long)ip->i_no_formal_ino, 449 (unsigned long long)ip->i_no_addr, 450 IF2DT(ip->i_inode.i_mode), ip->i_flags, 451 (unsigned int)ip->i_diskflags, 452 (unsigned long long)i_size_read(&ip->i_inode)); 453 } 454 455 /** 456 * trans_go_sync - promote/demote the transaction glock 457 * @gl: the glock 458 * @state: the requested state 459 * @flags: 460 * 461 */ 462 463 static void trans_go_sync(struct gfs2_glock *gl) 464 { 465 struct gfs2_sbd *sdp = gl->gl_sbd; 466 467 if (gl->gl_state != LM_ST_UNLOCKED && 468 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 469 gfs2_meta_syncfs(sdp); 470 gfs2_log_shutdown(sdp); 471 } 472 } 473 474 /** 475 * trans_go_xmote_bh - After promoting/demoting the transaction glock 476 * @gl: the glock 477 * 478 */ 479 480 static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) 481 { 482 struct gfs2_sbd *sdp = gl->gl_sbd; 483 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); 484 struct gfs2_glock *j_gl = ip->i_gl; 485 struct gfs2_log_header_host head; 486 int error; 487 488 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 489 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 490 491 error = gfs2_find_jhead(sdp->sd_jdesc, &head); 492 if (error) 493 gfs2_consist(sdp); 494 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) 495 gfs2_consist(sdp); 496 497 /* Initialize some head of the log stuff */ 498 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { 499 sdp->sd_log_sequence = head.lh_sequence + 1; 500 gfs2_log_pointers_init(sdp, head.lh_blkno); 501 } 502 } 503 return 0; 504 } 505 506 /** 507 * trans_go_demote_ok 508 * @gl: the glock 509 * 510 * Always returns 0 511 */ 512 513 static int trans_go_demote_ok(const struct gfs2_glock *gl) 514 { 515 return 0; 516 } 517 518 /** 519 * iopen_go_callback - schedule the dcache entry for the inode to be deleted 520 * @gl: the glock 521 * 522 * gl_spin lock is held while calling this 523 */ 524 static void iopen_go_callback(struct gfs2_glock *gl, bool remote) 525 { 526 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 527 struct gfs2_sbd *sdp = gl->gl_sbd; 528 529 if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) 530 return; 531 532 if (gl->gl_demote_state == LM_ST_UNLOCKED && 533 gl->gl_state == LM_ST_SHARED && ip) { 534 gl->gl_lockref.count++; 535 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 536 gl->gl_lockref.count--; 537 } 538 } 539 540 const struct gfs2_glock_operations gfs2_meta_glops = { 541 .go_type = LM_TYPE_META, 542 }; 543 544 const struct gfs2_glock_operations gfs2_inode_glops = { 545 .go_sync = inode_go_sync, 546 .go_inval = inode_go_inval, 547 .go_demote_ok = inode_go_demote_ok, 548 .go_lock = inode_go_lock, 549 .go_dump = inode_go_dump, 550 .go_type = LM_TYPE_INODE, 551 .go_flags = GLOF_ASPACE, 552 }; 553 554 const struct gfs2_glock_operations gfs2_rgrp_glops = { 555 .go_sync = rgrp_go_sync, 556 .go_inval = rgrp_go_inval, 557 .go_lock = gfs2_rgrp_go_lock, 558 .go_unlock = gfs2_rgrp_go_unlock, 559 .go_dump = gfs2_rgrp_dump, 560 .go_type = LM_TYPE_RGRP, 561 .go_flags = GLOF_LVB, 562 }; 563 564 const struct gfs2_glock_operations gfs2_trans_glops = { 565 .go_sync = trans_go_sync, 566 .go_xmote_bh = trans_go_xmote_bh, 567 .go_demote_ok = trans_go_demote_ok, 568 .go_type = LM_TYPE_NONDISK, 569 }; 570 571 const struct gfs2_glock_operations gfs2_iopen_glops = { 572 .go_type = LM_TYPE_IOPEN, 573 .go_callback = iopen_go_callback, 574 }; 575 576 const struct gfs2_glock_operations gfs2_flock_glops = { 577 .go_type = LM_TYPE_FLOCK, 578 }; 579 580 const struct gfs2_glock_operations gfs2_nondisk_glops = { 581 .go_type = LM_TYPE_NONDISK, 582 }; 583 584 const struct gfs2_glock_operations gfs2_quota_glops = { 585 .go_type = LM_TYPE_QUOTA, 586 .go_flags = GLOF_LVB, 587 }; 588 589 const struct gfs2_glock_operations gfs2_journal_glops = { 590 .go_type = LM_TYPE_JOURNAL, 591 }; 592 593 const struct gfs2_glock_operations *gfs2_glops_list[] = { 594 [LM_TYPE_META] = &gfs2_meta_glops, 595 [LM_TYPE_INODE] = &gfs2_inode_glops, 596 [LM_TYPE_RGRP] = &gfs2_rgrp_glops, 597 [LM_TYPE_IOPEN] = &gfs2_iopen_glops, 598 [LM_TYPE_FLOCK] = &gfs2_flock_glops, 599 [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, 600 [LM_TYPE_QUOTA] = &gfs2_quota_glops, 601 [LM_TYPE_JOURNAL] = &gfs2_journal_glops, 602 }; 603 604