1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/spinlock.h> 11 #include <linux/completion.h> 12 #include <linux/buffer_head.h> 13 #include <linux/gfs2_ondisk.h> 14 #include <linux/bio.h> 15 #include <linux/posix_acl.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "bmap.h" 20 #include "glock.h" 21 #include "glops.h" 22 #include "inode.h" 23 #include "log.h" 24 #include "meta_io.h" 25 #include "recovery.h" 26 #include "rgrp.h" 27 #include "util.h" 28 #include "trans.h" 29 #include "dir.h" 30 31 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) 32 { 33 fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", 34 bh, (unsigned long long)bh->b_blocknr, bh->b_state, 35 bh->b_page->mapping, bh->b_page->flags); 36 fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n", 37 gl->gl_name.ln_type, gl->gl_name.ln_number, 38 gfs2_glock2aspace(gl)); 39 gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n"); 40 } 41 42 /** 43 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL 44 * @gl: the glock 45 * @fsync: set when called from fsync (not all buffers will be clean) 46 * 47 * None of the buffers should be dirty, locked, or pinned. 48 */ 49 50 static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, 51 unsigned int nr_revokes) 52 { 53 struct gfs2_sbd *sdp = gl->gl_sbd; 54 struct list_head *head = &gl->gl_ail_list; 55 struct gfs2_bufdata *bd, *tmp; 56 struct buffer_head *bh; 57 const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); 58 59 gfs2_log_lock(sdp); 60 spin_lock(&sdp->sd_ail_lock); 61 list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) { 62 if (nr_revokes == 0) 63 break; 64 bh = bd->bd_bh; 65 if (bh->b_state & b_state) { 66 if (fsync) 67 continue; 68 gfs2_ail_error(gl, bh); 69 } 70 gfs2_trans_add_revoke(sdp, bd); 71 nr_revokes--; 72 } 73 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 74 spin_unlock(&sdp->sd_ail_lock); 75 gfs2_log_unlock(sdp); 76 } 77 78 79 static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 80 { 81 struct gfs2_sbd *sdp = gl->gl_sbd; 82 struct gfs2_trans tr; 83 84 memset(&tr, 0, sizeof(tr)); 85 INIT_LIST_HEAD(&tr.tr_buf); 86 INIT_LIST_HEAD(&tr.tr_databuf); 87 tr.tr_revokes = atomic_read(&gl->gl_ail_count); 88 89 if (!tr.tr_revokes) 90 return; 91 92 /* A shortened, inline version of gfs2_trans_begin() 93 * tr->alloced is not set since the transaction structure is 94 * on the stack */ 95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 96 tr.tr_ip = _RET_IP_; 97 sb_start_intwrite(sdp->sd_vfs); 98 if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { 99 sb_end_intwrite(sdp->sd_vfs); 100 return; 101 } 102 WARN_ON_ONCE(current->journal_info); 103 current->journal_info = &tr; 104 105 __gfs2_ail_flush(gl, 0, tr.tr_revokes); 106 107 gfs2_trans_end(sdp); 108 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 109 } 110 111 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 112 { 113 struct gfs2_sbd *sdp = gl->gl_sbd; 114 unsigned int revokes = atomic_read(&gl->gl_ail_count); 115 unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); 116 int ret; 117 118 if (!revokes) 119 return; 120 121 while (revokes > max_revokes) 122 max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); 123 124 ret = gfs2_trans_begin(sdp, 0, max_revokes); 125 if (ret) 126 return; 127 __gfs2_ail_flush(gl, fsync, max_revokes); 128 gfs2_trans_end(sdp); 129 gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); 130 } 131 132 /** 133 * rgrp_go_sync - sync out the metadata for this glock 134 * @gl: the glock 135 * 136 * Called when demoting or unlocking an EX glock. We must flush 137 * to disk all dirty buffers/pages relating to this glock, and must not 138 * not return to caller to demote/unlock the glock until I/O is complete. 139 */ 140 141 static void rgrp_go_sync(struct gfs2_glock *gl) 142 { 143 struct gfs2_sbd *sdp = gl->gl_sbd; 144 struct address_space *mapping = &sdp->sd_aspace; 145 struct gfs2_rgrpd *rgd; 146 int error; 147 148 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 149 return; 150 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 151 152 gfs2_log_flush(sdp, gl, NORMAL_FLUSH); 153 filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 154 error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 155 mapping_set_error(mapping, error); 156 gfs2_ail_empty_gl(gl); 157 158 spin_lock(&gl->gl_spin); 159 rgd = gl->gl_object; 160 if (rgd) 161 gfs2_free_clones(rgd); 162 spin_unlock(&gl->gl_spin); 163 } 164 165 /** 166 * rgrp_go_inval - invalidate the metadata for this glock 167 * @gl: the glock 168 * @flags: 169 * 170 * We never used LM_ST_DEFERRED with resource groups, so that we 171 * should always see the metadata flag set here. 172 * 173 */ 174 175 static void rgrp_go_inval(struct gfs2_glock *gl, int flags) 176 { 177 struct gfs2_sbd *sdp = gl->gl_sbd; 178 struct address_space *mapping = &sdp->sd_aspace; 179 180 WARN_ON_ONCE(!(flags & DIO_METADATA)); 181 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 182 truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); 183 184 if (gl->gl_object) { 185 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; 186 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 187 } 188 } 189 190 /** 191 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock 192 * @gl: the glock protecting the inode 193 * 194 */ 195 196 static void inode_go_sync(struct gfs2_glock *gl) 197 { 198 struct gfs2_inode *ip = gl->gl_object; 199 struct address_space *metamapping = gfs2_glock2aspace(gl); 200 int error; 201 202 if (ip && !S_ISREG(ip->i_inode.i_mode)) 203 ip = NULL; 204 if (ip) { 205 if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 206 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 207 inode_dio_wait(&ip->i_inode); 208 } 209 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 210 return; 211 212 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); 213 214 gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH); 215 filemap_fdatawrite(metamapping); 216 if (ip) { 217 struct address_space *mapping = ip->i_inode.i_mapping; 218 filemap_fdatawrite(mapping); 219 error = filemap_fdatawait(mapping); 220 mapping_set_error(mapping, error); 221 } 222 error = filemap_fdatawait(metamapping); 223 mapping_set_error(metamapping, error); 224 gfs2_ail_empty_gl(gl); 225 /* 226 * Writeback of the data mapping may cause the dirty flag to be set 227 * so we have to clear it again here. 228 */ 229 smp_mb__before_atomic(); 230 clear_bit(GLF_DIRTY, &gl->gl_flags); 231 } 232 233 /** 234 * inode_go_inval - prepare a inode glock to be released 235 * @gl: the glock 236 * @flags: 237 * 238 * Normally we invalidate everything, but if we are moving into 239 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we 240 * can keep hold of the metadata, since it won't have changed. 241 * 242 */ 243 244 static void inode_go_inval(struct gfs2_glock *gl, int flags) 245 { 246 struct gfs2_inode *ip = gl->gl_object; 247 248 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 249 250 if (flags & DIO_METADATA) { 251 struct address_space *mapping = gfs2_glock2aspace(gl); 252 truncate_inode_pages(mapping, 0); 253 if (ip) { 254 set_bit(GIF_INVALID, &ip->i_flags); 255 forget_all_cached_acls(&ip->i_inode); 256 gfs2_dir_hash_inval(ip); 257 } 258 } 259 260 if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { 261 gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH); 262 gl->gl_sbd->sd_rindex_uptodate = 0; 263 } 264 if (ip && S_ISREG(ip->i_inode.i_mode)) 265 truncate_inode_pages(ip->i_inode.i_mapping, 0); 266 } 267 268 /** 269 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock 270 * @gl: the glock 271 * 272 * Returns: 1 if it's ok 273 */ 274 275 static int inode_go_demote_ok(const struct gfs2_glock *gl) 276 { 277 struct gfs2_sbd *sdp = gl->gl_sbd; 278 struct gfs2_holder *gh; 279 280 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 281 return 0; 282 283 if (!list_empty(&gl->gl_holders)) { 284 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 285 if (gh->gh_list.next != &gl->gl_holders) 286 return 0; 287 } 288 289 return 1; 290 } 291 292 /** 293 * gfs2_set_nlink - Set the inode's link count based on on-disk info 294 * @inode: The inode in question 295 * @nlink: The link count 296 * 297 * If the link count has hit zero, it must never be raised, whatever the 298 * on-disk inode might say. When new struct inodes are created the link 299 * count is set to 1, so that we can safely use this test even when reading 300 * in on disk information for the first time. 301 */ 302 303 static void gfs2_set_nlink(struct inode *inode, u32 nlink) 304 { 305 /* 306 * We will need to review setting the nlink count here in the 307 * light of the forthcoming ro bind mount work. This is a reminder 308 * to do that. 309 */ 310 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { 311 if (nlink == 0) 312 clear_nlink(inode); 313 else 314 set_nlink(inode, nlink); 315 } 316 } 317 318 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 319 { 320 const struct gfs2_dinode *str = buf; 321 struct timespec atime; 322 u16 height, depth; 323 324 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 325 goto corrupt; 326 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 327 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 328 ip->i_inode.i_rdev = 0; 329 switch (ip->i_inode.i_mode & S_IFMT) { 330 case S_IFBLK: 331 case S_IFCHR: 332 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 333 be32_to_cpu(str->di_minor)); 334 break; 335 }; 336 337 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); 338 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); 339 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 340 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 341 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 342 atime.tv_sec = be64_to_cpu(str->di_atime); 343 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 344 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 345 ip->i_inode.i_atime = atime; 346 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 347 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 348 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 349 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 350 351 ip->i_goal = be64_to_cpu(str->di_goal_meta); 352 ip->i_generation = be64_to_cpu(str->di_generation); 353 354 ip->i_diskflags = be32_to_cpu(str->di_flags); 355 ip->i_eattr = be64_to_cpu(str->di_eattr); 356 /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ 357 gfs2_set_inode_flags(&ip->i_inode); 358 height = be16_to_cpu(str->di_height); 359 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 360 goto corrupt; 361 ip->i_height = (u8)height; 362 363 depth = be16_to_cpu(str->di_depth); 364 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 365 goto corrupt; 366 ip->i_depth = (u8)depth; 367 ip->i_entries = be32_to_cpu(str->di_entries); 368 369 if (S_ISREG(ip->i_inode.i_mode)) 370 gfs2_set_aops(&ip->i_inode); 371 372 return 0; 373 corrupt: 374 gfs2_consist_inode(ip); 375 return -EIO; 376 } 377 378 /** 379 * gfs2_inode_refresh - Refresh the incore copy of the dinode 380 * @ip: The GFS2 inode 381 * 382 * Returns: errno 383 */ 384 385 int gfs2_inode_refresh(struct gfs2_inode *ip) 386 { 387 struct buffer_head *dibh; 388 int error; 389 390 error = gfs2_meta_inode_buffer(ip, &dibh); 391 if (error) 392 return error; 393 394 error = gfs2_dinode_in(ip, dibh->b_data); 395 brelse(dibh); 396 clear_bit(GIF_INVALID, &ip->i_flags); 397 398 return error; 399 } 400 401 /** 402 * inode_go_lock - operation done after an inode lock is locked by a process 403 * @gl: the glock 404 * @flags: 405 * 406 * Returns: errno 407 */ 408 409 static int inode_go_lock(struct gfs2_holder *gh) 410 { 411 struct gfs2_glock *gl = gh->gh_gl; 412 struct gfs2_sbd *sdp = gl->gl_sbd; 413 struct gfs2_inode *ip = gl->gl_object; 414 int error = 0; 415 416 if (!ip || (gh->gh_flags & GL_SKIP)) 417 return 0; 418 419 if (test_bit(GIF_INVALID, &ip->i_flags)) { 420 error = gfs2_inode_refresh(ip); 421 if (error) 422 return error; 423 } 424 425 if (gh->gh_state != LM_ST_DEFERRED) 426 inode_dio_wait(&ip->i_inode); 427 428 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && 429 (gl->gl_state == LM_ST_EXCLUSIVE) && 430 (gh->gh_state == LM_ST_EXCLUSIVE)) { 431 spin_lock(&sdp->sd_trunc_lock); 432 if (list_empty(&ip->i_trunc_list)) 433 list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); 434 spin_unlock(&sdp->sd_trunc_lock); 435 wake_up(&sdp->sd_quota_wait); 436 return 1; 437 } 438 439 return error; 440 } 441 442 /** 443 * inode_go_dump - print information about an inode 444 * @seq: The iterator 445 * @ip: the inode 446 * 447 */ 448 449 static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) 450 { 451 const struct gfs2_inode *ip = gl->gl_object; 452 if (ip == NULL) 453 return; 454 gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", 455 (unsigned long long)ip->i_no_formal_ino, 456 (unsigned long long)ip->i_no_addr, 457 IF2DT(ip->i_inode.i_mode), ip->i_flags, 458 (unsigned int)ip->i_diskflags, 459 (unsigned long long)i_size_read(&ip->i_inode)); 460 } 461 462 /** 463 * freeze_go_sync - promote/demote the freeze glock 464 * @gl: the glock 465 * @state: the requested state 466 * @flags: 467 * 468 */ 469 470 static void freeze_go_sync(struct gfs2_glock *gl) 471 { 472 struct gfs2_sbd *sdp = gl->gl_sbd; 473 DEFINE_WAIT(wait); 474 475 if (gl->gl_state == LM_ST_SHARED && 476 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 477 atomic_set(&sdp->sd_log_freeze, 1); 478 wake_up(&sdp->sd_logd_waitq); 479 do { 480 prepare_to_wait(&sdp->sd_log_frozen_wait, &wait, 481 TASK_UNINTERRUPTIBLE); 482 if (atomic_read(&sdp->sd_log_freeze)) 483 io_schedule(); 484 } while(atomic_read(&sdp->sd_log_freeze)); 485 finish_wait(&sdp->sd_log_frozen_wait, &wait); 486 } 487 } 488 489 /** 490 * freeze_go_xmote_bh - After promoting/demoting the freeze glock 491 * @gl: the glock 492 * 493 */ 494 495 static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) 496 { 497 struct gfs2_sbd *sdp = gl->gl_sbd; 498 struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); 499 struct gfs2_glock *j_gl = ip->i_gl; 500 struct gfs2_log_header_host head; 501 int error; 502 503 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 504 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 505 506 error = gfs2_find_jhead(sdp->sd_jdesc, &head); 507 if (error) 508 gfs2_consist(sdp); 509 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) 510 gfs2_consist(sdp); 511 512 /* Initialize some head of the log stuff */ 513 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { 514 sdp->sd_log_sequence = head.lh_sequence + 1; 515 gfs2_log_pointers_init(sdp, head.lh_blkno); 516 } 517 } 518 return 0; 519 } 520 521 /** 522 * trans_go_demote_ok 523 * @gl: the glock 524 * 525 * Always returns 0 526 */ 527 528 static int freeze_go_demote_ok(const struct gfs2_glock *gl) 529 { 530 return 0; 531 } 532 533 /** 534 * iopen_go_callback - schedule the dcache entry for the inode to be deleted 535 * @gl: the glock 536 * 537 * gl_spin lock is held while calling this 538 */ 539 static void iopen_go_callback(struct gfs2_glock *gl, bool remote) 540 { 541 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 542 struct gfs2_sbd *sdp = gl->gl_sbd; 543 544 if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) 545 return; 546 547 if (gl->gl_demote_state == LM_ST_UNLOCKED && 548 gl->gl_state == LM_ST_SHARED && ip) { 549 gl->gl_lockref.count++; 550 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 551 gl->gl_lockref.count--; 552 } 553 } 554 555 const struct gfs2_glock_operations gfs2_meta_glops = { 556 .go_type = LM_TYPE_META, 557 }; 558 559 const struct gfs2_glock_operations gfs2_inode_glops = { 560 .go_sync = inode_go_sync, 561 .go_inval = inode_go_inval, 562 .go_demote_ok = inode_go_demote_ok, 563 .go_lock = inode_go_lock, 564 .go_dump = inode_go_dump, 565 .go_type = LM_TYPE_INODE, 566 .go_flags = GLOF_ASPACE, 567 }; 568 569 const struct gfs2_glock_operations gfs2_rgrp_glops = { 570 .go_sync = rgrp_go_sync, 571 .go_inval = rgrp_go_inval, 572 .go_lock = gfs2_rgrp_go_lock, 573 .go_unlock = gfs2_rgrp_go_unlock, 574 .go_dump = gfs2_rgrp_dump, 575 .go_type = LM_TYPE_RGRP, 576 .go_flags = GLOF_LVB, 577 }; 578 579 const struct gfs2_glock_operations gfs2_freeze_glops = { 580 .go_sync = freeze_go_sync, 581 .go_xmote_bh = freeze_go_xmote_bh, 582 .go_demote_ok = freeze_go_demote_ok, 583 .go_type = LM_TYPE_NONDISK, 584 }; 585 586 const struct gfs2_glock_operations gfs2_iopen_glops = { 587 .go_type = LM_TYPE_IOPEN, 588 .go_callback = iopen_go_callback, 589 }; 590 591 const struct gfs2_glock_operations gfs2_flock_glops = { 592 .go_type = LM_TYPE_FLOCK, 593 }; 594 595 const struct gfs2_glock_operations gfs2_nondisk_glops = { 596 .go_type = LM_TYPE_NONDISK, 597 }; 598 599 const struct gfs2_glock_operations gfs2_quota_glops = { 600 .go_type = LM_TYPE_QUOTA, 601 .go_flags = GLOF_LVB, 602 }; 603 604 const struct gfs2_glock_operations gfs2_journal_glops = { 605 .go_type = LM_TYPE_JOURNAL, 606 }; 607 608 const struct gfs2_glock_operations *gfs2_glops_list[] = { 609 [LM_TYPE_META] = &gfs2_meta_glops, 610 [LM_TYPE_INODE] = &gfs2_inode_glops, 611 [LM_TYPE_RGRP] = &gfs2_rgrp_glops, 612 [LM_TYPE_IOPEN] = &gfs2_iopen_glops, 613 [LM_TYPE_FLOCK] = &gfs2_flock_glops, 614 [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, 615 [LM_TYPE_QUOTA] = &gfs2_quota_glops, 616 [LM_TYPE_JOURNAL] = &gfs2_journal_glops, 617 }; 618 619