1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/sched.h> 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/buffer_head.h> 13 #include <linux/delay.h> 14 #include <linux/sort.h> 15 #include <linux/hash.h> 16 #include <linux/jhash.h> 17 #include <linux/kallsyms.h> 18 #include <linux/gfs2_ondisk.h> 19 #include <linux/list.h> 20 #include <linux/wait.h> 21 #include <linux/module.h> 22 #include <linux/uaccess.h> 23 #include <linux/seq_file.h> 24 #include <linux/debugfs.h> 25 #include <linux/kthread.h> 26 #include <linux/freezer.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/rcupdate.h> 30 #include <linux/rculist_bl.h> 31 #include <linux/bit_spinlock.h> 32 #include <linux/percpu.h> 33 #include <linux/list_sort.h> 34 #include <linux/lockref.h> 35 #include <linux/rhashtable.h> 36 37 #include "gfs2.h" 38 #include "incore.h" 39 #include "glock.h" 40 #include "glops.h" 41 #include "inode.h" 42 #include "lops.h" 43 #include "meta_io.h" 44 #include "quota.h" 45 #include "super.h" 46 #include "util.h" 47 #include "bmap.h" 48 #define CREATE_TRACE_POINTS 49 #include "trace_gfs2.h" 50 51 struct gfs2_glock_iter { 52 struct gfs2_sbd *sdp; /* incore superblock */ 53 struct rhashtable_iter hti; /* rhashtable iterator */ 54 struct gfs2_glock *gl; /* current glock struct */ 55 loff_t last_pos; /* last position */ 56 }; 57 58 typedef void (*glock_examiner) (struct gfs2_glock * gl); 59 60 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 61 static void __gfs2_glock_dq(struct gfs2_holder *gh); 62 63 static struct dentry *gfs2_root; 64 static struct workqueue_struct *glock_workqueue; 65 struct workqueue_struct *gfs2_delete_workqueue; 66 static LIST_HEAD(lru_list); 67 static atomic_t lru_count = ATOMIC_INIT(0); 68 static DEFINE_SPINLOCK(lru_lock); 69 70 #define GFS2_GL_HASH_SHIFT 15 71 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) 72 73 static const struct rhashtable_params ht_parms = { 74 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, 75 .key_len = offsetofend(struct lm_lockname, ln_type), 76 .key_offset = offsetof(struct gfs2_glock, gl_name), 77 .head_offset = offsetof(struct gfs2_glock, gl_node), 78 }; 79 80 static struct rhashtable gl_hash_table; 81 82 #define GLOCK_WAIT_TABLE_BITS 12 83 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) 84 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; 85 86 struct wait_glock_queue { 87 struct lm_lockname *name; 88 wait_queue_entry_t wait; 89 }; 90 91 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, 92 int sync, void *key) 93 { 94 struct wait_glock_queue *wait_glock = 95 container_of(wait, struct wait_glock_queue, wait); 96 struct lm_lockname *wait_name = wait_glock->name; 97 struct lm_lockname *wake_name = key; 98 99 if (wake_name->ln_sbd != wait_name->ln_sbd || 100 wake_name->ln_number != wait_name->ln_number || 101 wake_name->ln_type != wait_name->ln_type) 102 return 0; 103 return autoremove_wake_function(wait, mode, sync, key); 104 } 105 106 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) 107 { 108 u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); 109 110 return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); 111 } 112 113 /** 114 * wake_up_glock - Wake up waiters on a glock 115 * @gl: the glock 116 */ 117 static void wake_up_glock(struct gfs2_glock *gl) 118 { 119 wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); 120 121 if (waitqueue_active(wq)) 122 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); 123 } 124 125 static void gfs2_glock_dealloc(struct rcu_head *rcu) 126 { 127 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 128 129 kfree(gl->gl_lksb.sb_lvbptr); 130 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 131 struct gfs2_glock_aspace *gla = 132 container_of(gl, struct gfs2_glock_aspace, glock); 133 kmem_cache_free(gfs2_glock_aspace_cachep, gla); 134 } else 135 kmem_cache_free(gfs2_glock_cachep, gl); 136 } 137 138 /** 139 * glock_blocked_by_withdraw - determine if we can still use a glock 140 * @gl: the glock 141 * 142 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted 143 * when we're withdrawn. For example, to maintain metadata integrity, we should 144 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like 145 * iopen or the transaction glocks may be safely used because none of their 146 * metadata goes through the journal. So in general, we should disallow all 147 * glocks that are journaled, and allow all the others. One exception is: 148 * we need to allow our active journal to be promoted and demoted so others 149 * may recover it and we can reacquire it when they're done. 150 */ 151 static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) 152 { 153 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 154 155 if (likely(!gfs2_withdrawn(sdp))) 156 return false; 157 if (gl->gl_ops->go_flags & GLOF_NONDISK) 158 return false; 159 if (!sdp->sd_jdesc || 160 gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) 161 return false; 162 return true; 163 } 164 165 void gfs2_glock_free(struct gfs2_glock *gl) 166 { 167 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 168 169 gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); 170 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); 171 smp_mb(); 172 wake_up_glock(gl); 173 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); 174 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 175 wake_up(&sdp->sd_glock_wait); 176 } 177 178 /** 179 * gfs2_glock_hold() - increment reference count on glock 180 * @gl: The glock to hold 181 * 182 */ 183 184 void gfs2_glock_hold(struct gfs2_glock *gl) 185 { 186 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 187 lockref_get(&gl->gl_lockref); 188 } 189 190 /** 191 * demote_ok - Check to see if it's ok to unlock a glock 192 * @gl: the glock 193 * 194 * Returns: 1 if it's ok 195 */ 196 197 static int demote_ok(const struct gfs2_glock *gl) 198 { 199 const struct gfs2_glock_operations *glops = gl->gl_ops; 200 201 if (gl->gl_state == LM_ST_UNLOCKED) 202 return 0; 203 /* 204 * Note that demote_ok is used for the lru process of disposing of 205 * glocks. For this purpose, we don't care if the glock's holders 206 * have the HIF_MAY_DEMOTE flag set or not. If someone is using 207 * them, don't demote. 208 */ 209 if (!list_empty(&gl->gl_holders)) 210 return 0; 211 if (glops->go_demote_ok) 212 return glops->go_demote_ok(gl); 213 return 1; 214 } 215 216 217 void gfs2_glock_add_to_lru(struct gfs2_glock *gl) 218 { 219 if (!(gl->gl_ops->go_flags & GLOF_LRU)) 220 return; 221 222 spin_lock(&lru_lock); 223 224 list_move_tail(&gl->gl_lru, &lru_list); 225 226 if (!test_bit(GLF_LRU, &gl->gl_flags)) { 227 set_bit(GLF_LRU, &gl->gl_flags); 228 atomic_inc(&lru_count); 229 } 230 231 spin_unlock(&lru_lock); 232 } 233 234 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 235 { 236 if (!(gl->gl_ops->go_flags & GLOF_LRU)) 237 return; 238 239 spin_lock(&lru_lock); 240 if (test_bit(GLF_LRU, &gl->gl_flags)) { 241 list_del_init(&gl->gl_lru); 242 atomic_dec(&lru_count); 243 clear_bit(GLF_LRU, &gl->gl_flags); 244 } 245 spin_unlock(&lru_lock); 246 } 247 248 /* 249 * Enqueue the glock on the work queue. Passes one glock reference on to the 250 * work queue. 251 */ 252 static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 253 if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) { 254 /* 255 * We are holding the lockref spinlock, and the work was still 256 * queued above. The queued work (glock_work_func) takes that 257 * spinlock before dropping its glock reference(s), so it 258 * cannot have dropped them in the meantime. 259 */ 260 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2); 261 gl->gl_lockref.count--; 262 } 263 } 264 265 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 266 spin_lock(&gl->gl_lockref.lock); 267 __gfs2_glock_queue_work(gl, delay); 268 spin_unlock(&gl->gl_lockref.lock); 269 } 270 271 static void __gfs2_glock_put(struct gfs2_glock *gl) 272 { 273 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 274 struct address_space *mapping = gfs2_glock2aspace(gl); 275 276 lockref_mark_dead(&gl->gl_lockref); 277 278 gfs2_glock_remove_from_lru(gl); 279 spin_unlock(&gl->gl_lockref.lock); 280 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 281 if (mapping) { 282 truncate_inode_pages_final(mapping); 283 if (!gfs2_withdrawn(sdp)) 284 GLOCK_BUG_ON(gl, !mapping_empty(mapping)); 285 } 286 trace_gfs2_glock_put(gl); 287 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); 288 } 289 290 /* 291 * Cause the glock to be put in work queue context. 292 */ 293 void gfs2_glock_queue_put(struct gfs2_glock *gl) 294 { 295 gfs2_glock_queue_work(gl, 0); 296 } 297 298 /** 299 * gfs2_glock_put() - Decrement reference count on glock 300 * @gl: The glock to put 301 * 302 */ 303 304 void gfs2_glock_put(struct gfs2_glock *gl) 305 { 306 if (lockref_put_or_lock(&gl->gl_lockref)) 307 return; 308 309 __gfs2_glock_put(gl); 310 } 311 312 /** 313 * may_grant - check if it's ok to grant a new lock 314 * @gl: The glock 315 * @current_gh: One of the current holders of @gl 316 * @gh: The lock request which we wish to grant 317 * 318 * With our current compatibility rules, if a glock has one or more active 319 * holders (HIF_HOLDER flag set), any of those holders can be passed in as 320 * @current_gh; they are all the same as far as compatibility with the new @gh 321 * goes. 322 * 323 * Returns true if it's ok to grant the lock. 324 */ 325 326 static inline bool may_grant(struct gfs2_glock *gl, 327 struct gfs2_holder *current_gh, 328 struct gfs2_holder *gh) 329 { 330 if (current_gh) { 331 GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, ¤t_gh->gh_iflags)); 332 333 switch(current_gh->gh_state) { 334 case LM_ST_EXCLUSIVE: 335 /* 336 * Here we make a special exception to grant holders 337 * who agree to share the EX lock with other holders 338 * who also have the bit set. If the original holder 339 * has the LM_FLAG_NODE_SCOPE bit set, we grant more 340 * holders with the bit set. 341 */ 342 return gh->gh_state == LM_ST_EXCLUSIVE && 343 (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) && 344 (gh->gh_flags & LM_FLAG_NODE_SCOPE); 345 346 case LM_ST_SHARED: 347 case LM_ST_DEFERRED: 348 return gh->gh_state == current_gh->gh_state; 349 350 default: 351 return false; 352 } 353 } 354 355 if (gl->gl_state == gh->gh_state) 356 return true; 357 if (gh->gh_flags & GL_EXACT) 358 return false; 359 if (gl->gl_state == LM_ST_EXCLUSIVE) { 360 return gh->gh_state == LM_ST_SHARED || 361 gh->gh_state == LM_ST_DEFERRED; 362 } 363 if (gh->gh_flags & LM_FLAG_ANY) 364 return gl->gl_state != LM_ST_UNLOCKED; 365 return false; 366 } 367 368 static void gfs2_holder_wake(struct gfs2_holder *gh) 369 { 370 clear_bit(HIF_WAIT, &gh->gh_iflags); 371 smp_mb__after_atomic(); 372 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 373 if (gh->gh_flags & GL_ASYNC) { 374 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd; 375 376 wake_up(&sdp->sd_async_glock_wait); 377 } 378 } 379 380 /** 381 * do_error - Something unexpected has happened during a lock request 382 * @gl: The glock 383 * @ret: The status from the DLM 384 */ 385 386 static void do_error(struct gfs2_glock *gl, const int ret) 387 { 388 struct gfs2_holder *gh, *tmp; 389 390 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 391 if (!test_bit(HIF_WAIT, &gh->gh_iflags)) 392 continue; 393 if (ret & LM_OUT_ERROR) 394 gh->gh_error = -EIO; 395 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) 396 gh->gh_error = GLR_TRYFAILED; 397 else 398 continue; 399 list_del_init(&gh->gh_list); 400 trace_gfs2_glock_queue(gh, 0); 401 gfs2_holder_wake(gh); 402 } 403 } 404 405 /** 406 * demote_incompat_holders - demote incompatible demoteable holders 407 * @gl: the glock we want to promote 408 * @current_gh: the newly promoted holder 409 * 410 * We're passing the newly promoted holder in @current_gh, but actually, any of 411 * the strong holders would do. 412 */ 413 static void demote_incompat_holders(struct gfs2_glock *gl, 414 struct gfs2_holder *current_gh) 415 { 416 struct gfs2_holder *gh, *tmp; 417 418 /* 419 * Demote incompatible holders before we make ourselves eligible. 420 * (This holder may or may not allow auto-demoting, but we don't want 421 * to demote the new holder before it's even granted.) 422 */ 423 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 424 /* 425 * Since holders are at the front of the list, we stop when we 426 * find the first non-holder. 427 */ 428 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 429 return; 430 if (gh == current_gh) 431 continue; 432 if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) && 433 !may_grant(gl, current_gh, gh)) { 434 /* 435 * We should not recurse into do_promote because 436 * __gfs2_glock_dq only calls handle_callback, 437 * gfs2_glock_add_to_lru and __gfs2_glock_queue_work. 438 */ 439 __gfs2_glock_dq(gh); 440 } 441 } 442 } 443 444 /** 445 * find_first_holder - find the first "holder" gh 446 * @gl: the glock 447 */ 448 449 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) 450 { 451 struct gfs2_holder *gh; 452 453 if (!list_empty(&gl->gl_holders)) { 454 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, 455 gh_list); 456 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 457 return gh; 458 } 459 return NULL; 460 } 461 462 /** 463 * find_first_strong_holder - find the first non-demoteable holder 464 * @gl: the glock 465 * 466 * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set. 467 */ 468 static inline struct gfs2_holder * 469 find_first_strong_holder(struct gfs2_glock *gl) 470 { 471 struct gfs2_holder *gh; 472 473 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 474 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 475 return NULL; 476 if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags)) 477 return gh; 478 } 479 return NULL; 480 } 481 482 /* 483 * gfs2_instantiate - Call the glops instantiate function 484 * @gh: The glock holder 485 * 486 * Returns: 0 if instantiate was successful, or error. 487 */ 488 int gfs2_instantiate(struct gfs2_holder *gh) 489 { 490 struct gfs2_glock *gl = gh->gh_gl; 491 const struct gfs2_glock_operations *glops = gl->gl_ops; 492 int ret; 493 494 again: 495 if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) 496 goto done; 497 498 /* 499 * Since we unlock the lockref lock, we set a flag to indicate 500 * instantiate is in progress. 501 */ 502 if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { 503 wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, 504 TASK_UNINTERRUPTIBLE); 505 /* 506 * Here we just waited for a different instantiate to finish. 507 * But that may not have been successful, as when a process 508 * locks an inode glock _before_ it has an actual inode to 509 * instantiate into. So we check again. This process might 510 * have an inode to instantiate, so might be successful. 511 */ 512 goto again; 513 } 514 515 ret = glops->go_instantiate(gl); 516 if (!ret) 517 clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); 518 clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); 519 if (ret) 520 return ret; 521 522 done: 523 if (glops->go_held) 524 return glops->go_held(gh); 525 return 0; 526 } 527 528 /** 529 * do_promote - promote as many requests as possible on the current queue 530 * @gl: The glock 531 * 532 * Returns: 1 if there is a blocked holder at the head of the list 533 */ 534 535 static int do_promote(struct gfs2_glock *gl) 536 { 537 struct gfs2_holder *gh, *current_gh; 538 bool incompat_holders_demoted = false; 539 540 current_gh = find_first_strong_holder(gl); 541 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 542 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 543 continue; 544 if (!may_grant(gl, current_gh, gh)) { 545 /* 546 * If we get here, it means we may not grant this 547 * holder for some reason. If this holder is at the 548 * head of the list, it means we have a blocked holder 549 * at the head, so return 1. 550 */ 551 if (list_is_first(&gh->gh_list, &gl->gl_holders)) 552 return 1; 553 do_error(gl, 0); 554 break; 555 } 556 set_bit(HIF_HOLDER, &gh->gh_iflags); 557 trace_gfs2_promote(gh); 558 gfs2_holder_wake(gh); 559 if (!incompat_holders_demoted) { 560 current_gh = gh; 561 demote_incompat_holders(gl, current_gh); 562 incompat_holders_demoted = true; 563 } 564 } 565 return 0; 566 } 567 568 /** 569 * find_first_waiter - find the first gh that's waiting for the glock 570 * @gl: the glock 571 */ 572 573 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) 574 { 575 struct gfs2_holder *gh; 576 577 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 578 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 579 return gh; 580 } 581 return NULL; 582 } 583 584 /** 585 * state_change - record that the glock is now in a different state 586 * @gl: the glock 587 * @new_state: the new state 588 */ 589 590 static void state_change(struct gfs2_glock *gl, unsigned int new_state) 591 { 592 int held1, held2; 593 594 held1 = (gl->gl_state != LM_ST_UNLOCKED); 595 held2 = (new_state != LM_ST_UNLOCKED); 596 597 if (held1 != held2) { 598 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 599 if (held2) 600 gl->gl_lockref.count++; 601 else 602 gl->gl_lockref.count--; 603 } 604 if (new_state != gl->gl_target) 605 /* shorten our minimum hold time */ 606 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, 607 GL_GLOCK_MIN_HOLD); 608 gl->gl_state = new_state; 609 gl->gl_tchange = jiffies; 610 } 611 612 static void gfs2_set_demote(struct gfs2_glock *gl) 613 { 614 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 615 616 set_bit(GLF_DEMOTE, &gl->gl_flags); 617 smp_mb(); 618 wake_up(&sdp->sd_async_glock_wait); 619 } 620 621 static void gfs2_demote_wake(struct gfs2_glock *gl) 622 { 623 gl->gl_demote_state = LM_ST_EXCLUSIVE; 624 clear_bit(GLF_DEMOTE, &gl->gl_flags); 625 smp_mb__after_atomic(); 626 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 627 } 628 629 /** 630 * finish_xmote - The DLM has replied to one of our lock requests 631 * @gl: The glock 632 * @ret: The status from the DLM 633 * 634 */ 635 636 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) 637 { 638 const struct gfs2_glock_operations *glops = gl->gl_ops; 639 struct gfs2_holder *gh; 640 unsigned state = ret & LM_OUT_ST_MASK; 641 642 spin_lock(&gl->gl_lockref.lock); 643 trace_gfs2_glock_state_change(gl, state); 644 state_change(gl, state); 645 gh = find_first_waiter(gl); 646 647 /* Demote to UN request arrived during demote to SH or DF */ 648 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 649 state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) 650 gl->gl_target = LM_ST_UNLOCKED; 651 652 /* Check for state != intended state */ 653 if (unlikely(state != gl->gl_target)) { 654 if (gh && (ret & LM_OUT_CANCELED)) 655 gfs2_holder_wake(gh); 656 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 657 /* move to back of queue and try next entry */ 658 if (ret & LM_OUT_CANCELED) { 659 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) 660 list_move_tail(&gh->gh_list, &gl->gl_holders); 661 gh = find_first_waiter(gl); 662 gl->gl_target = gh->gh_state; 663 goto retry; 664 } 665 /* Some error or failed "try lock" - report it */ 666 if ((ret & LM_OUT_ERROR) || 667 (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 668 gl->gl_target = gl->gl_state; 669 do_error(gl, ret); 670 goto out; 671 } 672 } 673 switch(state) { 674 /* Unlocked due to conversion deadlock, try again */ 675 case LM_ST_UNLOCKED: 676 retry: 677 do_xmote(gl, gh, gl->gl_target); 678 break; 679 /* Conversion fails, unlock and try again */ 680 case LM_ST_SHARED: 681 case LM_ST_DEFERRED: 682 do_xmote(gl, gh, LM_ST_UNLOCKED); 683 break; 684 default: /* Everything else */ 685 fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", 686 gl->gl_target, state); 687 GLOCK_BUG_ON(gl, 1); 688 } 689 spin_unlock(&gl->gl_lockref.lock); 690 return; 691 } 692 693 /* Fast path - we got what we asked for */ 694 if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) 695 gfs2_demote_wake(gl); 696 if (state != LM_ST_UNLOCKED) { 697 if (glops->go_xmote_bh) { 698 int rv; 699 700 spin_unlock(&gl->gl_lockref.lock); 701 rv = glops->go_xmote_bh(gl); 702 spin_lock(&gl->gl_lockref.lock); 703 if (rv) { 704 do_error(gl, rv); 705 goto out; 706 } 707 } 708 do_promote(gl); 709 } 710 out: 711 clear_bit(GLF_LOCK, &gl->gl_flags); 712 spin_unlock(&gl->gl_lockref.lock); 713 } 714 715 static bool is_system_glock(struct gfs2_glock *gl) 716 { 717 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 718 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 719 720 if (gl == m_ip->i_gl) 721 return true; 722 return false; 723 } 724 725 /** 726 * do_xmote - Calls the DLM to change the state of a lock 727 * @gl: The lock state 728 * @gh: The holder (only for promotes) 729 * @target: The target lock state 730 * 731 */ 732 733 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) 734 __releases(&gl->gl_lockref.lock) 735 __acquires(&gl->gl_lockref.lock) 736 { 737 const struct gfs2_glock_operations *glops = gl->gl_ops; 738 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 739 unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); 740 int ret; 741 742 if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && 743 gh && !(gh->gh_flags & LM_FLAG_NOEXP)) 744 return; 745 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | 746 LM_FLAG_PRIORITY); 747 GLOCK_BUG_ON(gl, gl->gl_state == target); 748 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); 749 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 750 glops->go_inval) { 751 /* 752 * If another process is already doing the invalidate, let that 753 * finish first. The glock state machine will get back to this 754 * holder again later. 755 */ 756 if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS, 757 &gl->gl_flags)) 758 return; 759 do_error(gl, 0); /* Fail queued try locks */ 760 } 761 gl->gl_req = target; 762 set_bit(GLF_BLOCKING, &gl->gl_flags); 763 if ((gl->gl_req == LM_ST_UNLOCKED) || 764 (gl->gl_state == LM_ST_EXCLUSIVE) || 765 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 766 clear_bit(GLF_BLOCKING, &gl->gl_flags); 767 spin_unlock(&gl->gl_lockref.lock); 768 if (glops->go_sync) { 769 ret = glops->go_sync(gl); 770 /* If we had a problem syncing (due to io errors or whatever, 771 * we should not invalidate the metadata or tell dlm to 772 * release the glock to other nodes. 773 */ 774 if (ret) { 775 if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 776 fs_err(sdp, "Error %d syncing glock \n", ret); 777 gfs2_dump_glock(NULL, gl, true); 778 } 779 goto skip_inval; 780 } 781 } 782 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { 783 /* 784 * The call to go_sync should have cleared out the ail list. 785 * If there are still items, we have a problem. We ought to 786 * withdraw, but we can't because the withdraw code also uses 787 * glocks. Warn about the error, dump the glock, then fall 788 * through and wait for logd to do the withdraw for us. 789 */ 790 if ((atomic_read(&gl->gl_ail_count) != 0) && 791 (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { 792 gfs2_glock_assert_warn(gl, 793 !atomic_read(&gl->gl_ail_count)); 794 gfs2_dump_glock(NULL, gl, true); 795 } 796 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 797 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 798 } 799 800 skip_inval: 801 gfs2_glock_hold(gl); 802 /* 803 * Check for an error encountered since we called go_sync and go_inval. 804 * If so, we can't withdraw from the glock code because the withdraw 805 * code itself uses glocks (see function signal_our_withdraw) to 806 * change the mount to read-only. Most importantly, we must not call 807 * dlm to unlock the glock until the journal is in a known good state 808 * (after journal replay) otherwise other nodes may use the object 809 * (rgrp or dinode) and then later, journal replay will corrupt the 810 * file system. The best we can do here is wait for the logd daemon 811 * to see sd_log_error and withdraw, and in the meantime, requeue the 812 * work for later. 813 * 814 * We make a special exception for some system glocks, such as the 815 * system statfs inode glock, which needs to be granted before the 816 * gfs2_quotad daemon can exit, and that exit needs to finish before 817 * we can unmount the withdrawn file system. 818 * 819 * However, if we're just unlocking the lock (say, for unmount, when 820 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete 821 * then it's okay to tell dlm to unlock it. 822 */ 823 if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) 824 gfs2_withdraw_delayed(sdp); 825 if (glock_blocked_by_withdraw(gl) && 826 (target != LM_ST_UNLOCKED || 827 test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { 828 if (!is_system_glock(gl)) { 829 gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); 830 goto out; 831 } else { 832 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 833 } 834 } 835 836 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 837 /* lock_dlm */ 838 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 839 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && 840 target == LM_ST_UNLOCKED && 841 test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { 842 finish_xmote(gl, target); 843 gfs2_glock_queue_work(gl, 0); 844 } else if (ret) { 845 fs_err(sdp, "lm_lock ret %d\n", ret); 846 GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); 847 } 848 } else { /* lock_nolock */ 849 finish_xmote(gl, target); 850 gfs2_glock_queue_work(gl, 0); 851 } 852 out: 853 spin_lock(&gl->gl_lockref.lock); 854 } 855 856 /** 857 * run_queue - do all outstanding tasks related to a glock 858 * @gl: The glock in question 859 * @nonblock: True if we must not block in run_queue 860 * 861 */ 862 863 static void run_queue(struct gfs2_glock *gl, const int nonblock) 864 __releases(&gl->gl_lockref.lock) 865 __acquires(&gl->gl_lockref.lock) 866 { 867 struct gfs2_holder *gh = NULL; 868 869 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) 870 return; 871 872 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 873 874 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 875 gl->gl_demote_state != gl->gl_state) { 876 if (find_first_holder(gl)) 877 goto out_unlock; 878 if (nonblock) 879 goto out_sched; 880 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 881 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); 882 gl->gl_target = gl->gl_demote_state; 883 } else { 884 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) 885 gfs2_demote_wake(gl); 886 if (do_promote(gl) == 0) 887 goto out_unlock; 888 gh = find_first_waiter(gl); 889 gl->gl_target = gh->gh_state; 890 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 891 do_error(gl, 0); /* Fail queued try locks */ 892 } 893 do_xmote(gl, gh, gl->gl_target); 894 return; 895 896 out_sched: 897 clear_bit(GLF_LOCK, &gl->gl_flags); 898 smp_mb__after_atomic(); 899 gl->gl_lockref.count++; 900 __gfs2_glock_queue_work(gl, 0); 901 return; 902 903 out_unlock: 904 clear_bit(GLF_LOCK, &gl->gl_flags); 905 smp_mb__after_atomic(); 906 return; 907 } 908 909 void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) 910 { 911 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 912 913 if (ri->ri_magic == 0) 914 ri->ri_magic = cpu_to_be32(GFS2_MAGIC); 915 if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC)) 916 ri->ri_generation_deleted = cpu_to_be64(generation); 917 } 918 919 bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation) 920 { 921 struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; 922 923 if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC)) 924 return false; 925 return generation <= be64_to_cpu(ri->ri_generation_deleted); 926 } 927 928 static void gfs2_glock_poke(struct gfs2_glock *gl) 929 { 930 int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP; 931 struct gfs2_holder gh; 932 int error; 933 934 __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); 935 error = gfs2_glock_nq(&gh); 936 if (!error) 937 gfs2_glock_dq(&gh); 938 gfs2_holder_uninit(&gh); 939 } 940 941 static bool gfs2_try_evict(struct gfs2_glock *gl) 942 { 943 struct gfs2_inode *ip; 944 bool evicted = false; 945 946 /* 947 * If there is contention on the iopen glock and we have an inode, try 948 * to grab and release the inode so that it can be evicted. This will 949 * allow the remote node to go ahead and delete the inode without us 950 * having to do it, which will avoid rgrp glock thrashing. 951 * 952 * The remote node is likely still holding the corresponding inode 953 * glock, so it will run before we get to verify that the delete has 954 * happened below. 955 */ 956 spin_lock(&gl->gl_lockref.lock); 957 ip = gl->gl_object; 958 if (ip && !igrab(&ip->i_inode)) 959 ip = NULL; 960 spin_unlock(&gl->gl_lockref.lock); 961 if (ip) { 962 struct gfs2_glock *inode_gl = NULL; 963 964 gl->gl_no_formal_ino = ip->i_no_formal_ino; 965 set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); 966 d_prune_aliases(&ip->i_inode); 967 iput(&ip->i_inode); 968 969 /* If the inode was evicted, gl->gl_object will now be NULL. */ 970 spin_lock(&gl->gl_lockref.lock); 971 ip = gl->gl_object; 972 if (ip) { 973 inode_gl = ip->i_gl; 974 lockref_get(&inode_gl->gl_lockref); 975 clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); 976 } 977 spin_unlock(&gl->gl_lockref.lock); 978 if (inode_gl) { 979 gfs2_glock_poke(inode_gl); 980 gfs2_glock_put(inode_gl); 981 } 982 evicted = !ip; 983 } 984 return evicted; 985 } 986 987 static void delete_work_func(struct work_struct *work) 988 { 989 struct delayed_work *dwork = to_delayed_work(work); 990 struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); 991 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 992 struct inode *inode; 993 u64 no_addr = gl->gl_name.ln_number; 994 995 spin_lock(&gl->gl_lockref.lock); 996 clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); 997 spin_unlock(&gl->gl_lockref.lock); 998 999 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { 1000 /* 1001 * If we can evict the inode, give the remote node trying to 1002 * delete the inode some time before verifying that the delete 1003 * has happened. Otherwise, if we cause contention on the inode glock 1004 * immediately, the remote node will think that we still have 1005 * the inode in use, and so it will give up waiting. 1006 * 1007 * If we can't evict the inode, signal to the remote node that 1008 * the inode is still in use. We'll later try to delete the 1009 * inode locally in gfs2_evict_inode. 1010 * 1011 * FIXME: We only need to verify that the remote node has 1012 * deleted the inode because nodes before this remote delete 1013 * rework won't cooperate. At a later time, when we no longer 1014 * care about compatibility with such nodes, we can skip this 1015 * step entirely. 1016 */ 1017 if (gfs2_try_evict(gl)) { 1018 if (gfs2_queue_delete_work(gl, 5 * HZ)) 1019 return; 1020 } 1021 goto out; 1022 } 1023 1024 inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, 1025 GFS2_BLKST_UNLINKED); 1026 if (!IS_ERR_OR_NULL(inode)) { 1027 d_prune_aliases(inode); 1028 iput(inode); 1029 } 1030 out: 1031 gfs2_glock_put(gl); 1032 } 1033 1034 static void glock_work_func(struct work_struct *work) 1035 { 1036 unsigned long delay = 0; 1037 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 1038 unsigned int drop_refs = 1; 1039 1040 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { 1041 finish_xmote(gl, gl->gl_reply); 1042 drop_refs++; 1043 } 1044 spin_lock(&gl->gl_lockref.lock); 1045 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1046 gl->gl_state != LM_ST_UNLOCKED && 1047 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 1048 unsigned long holdtime, now = jiffies; 1049 1050 holdtime = gl->gl_tchange + gl->gl_hold_time; 1051 if (time_before(now, holdtime)) 1052 delay = holdtime - now; 1053 1054 if (!delay) { 1055 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 1056 gfs2_set_demote(gl); 1057 } 1058 } 1059 run_queue(gl, 0); 1060 if (delay) { 1061 /* Keep one glock reference for the work we requeue. */ 1062 drop_refs--; 1063 if (gl->gl_name.ln_type != LM_TYPE_INODE) 1064 delay = 0; 1065 __gfs2_glock_queue_work(gl, delay); 1066 } 1067 1068 /* 1069 * Drop the remaining glock references manually here. (Mind that 1070 * __gfs2_glock_queue_work depends on the lockref spinlock begin held 1071 * here as well.) 1072 */ 1073 gl->gl_lockref.count -= drop_refs; 1074 if (!gl->gl_lockref.count) { 1075 __gfs2_glock_put(gl); 1076 return; 1077 } 1078 spin_unlock(&gl->gl_lockref.lock); 1079 } 1080 1081 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, 1082 struct gfs2_glock *new) 1083 { 1084 struct wait_glock_queue wait; 1085 wait_queue_head_t *wq = glock_waitqueue(name); 1086 struct gfs2_glock *gl; 1087 1088 wait.name = name; 1089 init_wait(&wait.wait); 1090 wait.wait.func = glock_wake_function; 1091 1092 again: 1093 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1094 rcu_read_lock(); 1095 if (new) { 1096 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, 1097 &new->gl_node, ht_parms); 1098 if (IS_ERR(gl)) 1099 goto out; 1100 } else { 1101 gl = rhashtable_lookup_fast(&gl_hash_table, 1102 name, ht_parms); 1103 } 1104 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { 1105 rcu_read_unlock(); 1106 schedule(); 1107 goto again; 1108 } 1109 out: 1110 rcu_read_unlock(); 1111 finish_wait(wq, &wait.wait); 1112 return gl; 1113 } 1114 1115 /** 1116 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 1117 * @sdp: The GFS2 superblock 1118 * @number: the lock number 1119 * @glops: The glock_operations to use 1120 * @create: If 0, don't create the glock if it doesn't exist 1121 * @glp: the glock is returned here 1122 * 1123 * This does not lock a glock, just finds/creates structures for one. 1124 * 1125 * Returns: errno 1126 */ 1127 1128 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 1129 const struct gfs2_glock_operations *glops, int create, 1130 struct gfs2_glock **glp) 1131 { 1132 struct super_block *s = sdp->sd_vfs; 1133 struct lm_lockname name = { .ln_number = number, 1134 .ln_type = glops->go_type, 1135 .ln_sbd = sdp }; 1136 struct gfs2_glock *gl, *tmp; 1137 struct address_space *mapping; 1138 int ret = 0; 1139 1140 gl = find_insert_glock(&name, NULL); 1141 if (gl) { 1142 *glp = gl; 1143 return 0; 1144 } 1145 if (!create) 1146 return -ENOENT; 1147 1148 if (glops->go_flags & GLOF_ASPACE) { 1149 struct gfs2_glock_aspace *gla = 1150 kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS); 1151 if (!gla) 1152 return -ENOMEM; 1153 gl = &gla->glock; 1154 } else { 1155 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS); 1156 if (!gl) 1157 return -ENOMEM; 1158 } 1159 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 1160 gl->gl_ops = glops; 1161 1162 if (glops->go_flags & GLOF_LVB) { 1163 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); 1164 if (!gl->gl_lksb.sb_lvbptr) { 1165 gfs2_glock_dealloc(&gl->gl_rcu); 1166 return -ENOMEM; 1167 } 1168 } 1169 1170 atomic_inc(&sdp->sd_glock_disposal); 1171 gl->gl_node.next = NULL; 1172 gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; 1173 gl->gl_name = name; 1174 lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); 1175 gl->gl_lockref.count = 1; 1176 gl->gl_state = LM_ST_UNLOCKED; 1177 gl->gl_target = LM_ST_UNLOCKED; 1178 gl->gl_demote_state = LM_ST_EXCLUSIVE; 1179 gl->gl_dstamp = 0; 1180 preempt_disable(); 1181 /* We use the global stats to estimate the initial per-glock stats */ 1182 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; 1183 preempt_enable(); 1184 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 1185 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 1186 gl->gl_tchange = jiffies; 1187 gl->gl_object = NULL; 1188 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 1189 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 1190 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) 1191 INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func); 1192 1193 mapping = gfs2_glock2aspace(gl); 1194 if (mapping) { 1195 mapping->a_ops = &gfs2_meta_aops; 1196 mapping->host = s->s_bdev->bd_inode; 1197 mapping->flags = 0; 1198 mapping_set_gfp_mask(mapping, GFP_NOFS); 1199 mapping->private_data = NULL; 1200 mapping->writeback_index = 0; 1201 } 1202 1203 tmp = find_insert_glock(&name, gl); 1204 if (!tmp) { 1205 *glp = gl; 1206 goto out; 1207 } 1208 if (IS_ERR(tmp)) { 1209 ret = PTR_ERR(tmp); 1210 goto out_free; 1211 } 1212 *glp = tmp; 1213 1214 out_free: 1215 gfs2_glock_dealloc(&gl->gl_rcu); 1216 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 1217 wake_up(&sdp->sd_glock_wait); 1218 1219 out: 1220 return ret; 1221 } 1222 1223 /** 1224 * __gfs2_holder_init - initialize a struct gfs2_holder in the default way 1225 * @gl: the glock 1226 * @state: the state we're requesting 1227 * @flags: the modifier flags 1228 * @gh: the holder structure 1229 * 1230 */ 1231 1232 void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, 1233 struct gfs2_holder *gh, unsigned long ip) 1234 { 1235 INIT_LIST_HEAD(&gh->gh_list); 1236 gh->gh_gl = gl; 1237 gh->gh_ip = ip; 1238 gh->gh_owner_pid = get_pid(task_pid(current)); 1239 gh->gh_state = state; 1240 gh->gh_flags = flags; 1241 gh->gh_iflags = 0; 1242 gfs2_glock_hold(gl); 1243 } 1244 1245 /** 1246 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it 1247 * @state: the state we're requesting 1248 * @flags: the modifier flags 1249 * @gh: the holder structure 1250 * 1251 * Don't mess with the glock. 1252 * 1253 */ 1254 1255 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) 1256 { 1257 gh->gh_state = state; 1258 gh->gh_flags = flags; 1259 gh->gh_iflags = 0; 1260 gh->gh_ip = _RET_IP_; 1261 put_pid(gh->gh_owner_pid); 1262 gh->gh_owner_pid = get_pid(task_pid(current)); 1263 } 1264 1265 /** 1266 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) 1267 * @gh: the holder structure 1268 * 1269 */ 1270 1271 void gfs2_holder_uninit(struct gfs2_holder *gh) 1272 { 1273 put_pid(gh->gh_owner_pid); 1274 gfs2_glock_put(gh->gh_gl); 1275 gfs2_holder_mark_uninitialized(gh); 1276 gh->gh_ip = 0; 1277 } 1278 1279 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, 1280 unsigned long start_time) 1281 { 1282 /* Have we waited longer that a second? */ 1283 if (time_after(jiffies, start_time + HZ)) { 1284 /* Lengthen the minimum hold time. */ 1285 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR, 1286 GL_GLOCK_MAX_HOLD); 1287 } 1288 } 1289 1290 /** 1291 * gfs2_glock_holder_ready - holder is ready and its error code can be collected 1292 * @gh: the glock holder 1293 * 1294 * Called when a glock holder no longer needs to be waited for because it is 1295 * now either held (HIF_HOLDER set; gh_error == 0), or acquiring the lock has 1296 * failed (gh_error != 0). 1297 */ 1298 1299 int gfs2_glock_holder_ready(struct gfs2_holder *gh) 1300 { 1301 if (gh->gh_error || (gh->gh_flags & GL_SKIP)) 1302 return gh->gh_error; 1303 gh->gh_error = gfs2_instantiate(gh); 1304 if (gh->gh_error) 1305 gfs2_glock_dq(gh); 1306 return gh->gh_error; 1307 } 1308 1309 /** 1310 * gfs2_glock_wait - wait on a glock acquisition 1311 * @gh: the glock holder 1312 * 1313 * Returns: 0 on success 1314 */ 1315 1316 int gfs2_glock_wait(struct gfs2_holder *gh) 1317 { 1318 unsigned long start_time = jiffies; 1319 1320 might_sleep(); 1321 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1322 gfs2_glock_update_hold_time(gh->gh_gl, start_time); 1323 return gfs2_glock_holder_ready(gh); 1324 } 1325 1326 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) 1327 { 1328 int i; 1329 1330 for (i = 0; i < num_gh; i++) 1331 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) 1332 return 1; 1333 return 0; 1334 } 1335 1336 /** 1337 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions 1338 * @num_gh: the number of holders in the array 1339 * @ghs: the glock holder array 1340 * 1341 * Returns: 0 on success, meaning all glocks have been granted and are held. 1342 * -ESTALE if the request timed out, meaning all glocks were released, 1343 * and the caller should retry the operation. 1344 */ 1345 1346 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) 1347 { 1348 struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; 1349 int i, ret = 0, timeout = 0; 1350 unsigned long start_time = jiffies; 1351 1352 might_sleep(); 1353 /* 1354 * Total up the (minimum hold time * 2) of all glocks and use that to 1355 * determine the max amount of time we should wait. 1356 */ 1357 for (i = 0; i < num_gh; i++) 1358 timeout += ghs[i].gh_gl->gl_hold_time << 1; 1359 1360 if (!wait_event_timeout(sdp->sd_async_glock_wait, 1361 !glocks_pending(num_gh, ghs), timeout)) { 1362 ret = -ESTALE; /* request timed out. */ 1363 goto out; 1364 } 1365 1366 for (i = 0; i < num_gh; i++) { 1367 struct gfs2_holder *gh = &ghs[i]; 1368 int ret2; 1369 1370 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) { 1371 gfs2_glock_update_hold_time(gh->gh_gl, 1372 start_time); 1373 } 1374 ret2 = gfs2_glock_holder_ready(gh); 1375 if (!ret) 1376 ret = ret2; 1377 } 1378 1379 out: 1380 if (ret) { 1381 for (i = 0; i < num_gh; i++) { 1382 struct gfs2_holder *gh = &ghs[i]; 1383 1384 gfs2_glock_dq(gh); 1385 } 1386 } 1387 return ret; 1388 } 1389 1390 /** 1391 * handle_callback - process a demote request 1392 * @gl: the glock 1393 * @state: the state the caller wants us to change to 1394 * @delay: zero to demote immediately; otherwise pending demote 1395 * @remote: true if this came from a different cluster node 1396 * 1397 * There are only two requests that we are going to see in actual 1398 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 1399 */ 1400 1401 static void handle_callback(struct gfs2_glock *gl, unsigned int state, 1402 unsigned long delay, bool remote) 1403 { 1404 if (delay) 1405 set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 1406 else 1407 gfs2_set_demote(gl); 1408 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 1409 gl->gl_demote_state = state; 1410 gl->gl_demote_time = jiffies; 1411 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 1412 gl->gl_demote_state != state) { 1413 gl->gl_demote_state = LM_ST_UNLOCKED; 1414 } 1415 if (gl->gl_ops->go_callback) 1416 gl->gl_ops->go_callback(gl, remote); 1417 trace_gfs2_demote_rq(gl, remote); 1418 } 1419 1420 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 1421 { 1422 struct va_format vaf; 1423 va_list args; 1424 1425 va_start(args, fmt); 1426 1427 if (seq) { 1428 seq_vprintf(seq, fmt, args); 1429 } else { 1430 vaf.fmt = fmt; 1431 vaf.va = &args; 1432 1433 pr_err("%pV", &vaf); 1434 } 1435 1436 va_end(args); 1437 } 1438 1439 /** 1440 * add_to_queue - Add a holder to the wait queue (but look for recursion) 1441 * @gh: the holder structure to add 1442 * 1443 * Eventually we should move the recursive locking trap to a 1444 * debugging option or something like that. This is the fast 1445 * path and needs to have the minimum number of distractions. 1446 * 1447 */ 1448 1449 static inline void add_to_queue(struct gfs2_holder *gh) 1450 __releases(&gl->gl_lockref.lock) 1451 __acquires(&gl->gl_lockref.lock) 1452 { 1453 struct gfs2_glock *gl = gh->gh_gl; 1454 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1455 struct list_head *insert_pt = NULL; 1456 struct gfs2_holder *gh2; 1457 int try_futile = 0; 1458 1459 GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); 1460 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1461 GLOCK_BUG_ON(gl, true); 1462 1463 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { 1464 if (test_bit(GLF_LOCK, &gl->gl_flags)) { 1465 struct gfs2_holder *current_gh; 1466 1467 current_gh = find_first_strong_holder(gl); 1468 try_futile = !may_grant(gl, current_gh, gh); 1469 } 1470 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 1471 goto fail; 1472 } 1473 1474 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1475 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && 1476 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) && 1477 !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))) 1478 goto trap_recursive; 1479 if (try_futile && 1480 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 1481 fail: 1482 gh->gh_error = GLR_TRYFAILED; 1483 gfs2_holder_wake(gh); 1484 return; 1485 } 1486 if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) 1487 continue; 1488 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) 1489 insert_pt = &gh2->gh_list; 1490 } 1491 trace_gfs2_glock_queue(gh, 1); 1492 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); 1493 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); 1494 if (likely(insert_pt == NULL)) { 1495 list_add_tail(&gh->gh_list, &gl->gl_holders); 1496 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 1497 goto do_cancel; 1498 return; 1499 } 1500 list_add_tail(&gh->gh_list, insert_pt); 1501 do_cancel: 1502 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); 1503 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { 1504 spin_unlock(&gl->gl_lockref.lock); 1505 if (sdp->sd_lockstruct.ls_ops->lm_cancel) 1506 sdp->sd_lockstruct.ls_ops->lm_cancel(gl); 1507 spin_lock(&gl->gl_lockref.lock); 1508 } 1509 return; 1510 1511 trap_recursive: 1512 fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip); 1513 fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid)); 1514 fs_err(sdp, "lock type: %d req lock state : %d\n", 1515 gh2->gh_gl->gl_name.ln_type, gh2->gh_state); 1516 fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip); 1517 fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1518 fs_err(sdp, "lock type: %d req lock state : %d\n", 1519 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1520 gfs2_dump_glock(NULL, gl, true); 1521 BUG(); 1522 } 1523 1524 /** 1525 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) 1526 * @gh: the holder structure 1527 * 1528 * if (gh->gh_flags & GL_ASYNC), this never returns an error 1529 * 1530 * Returns: 0, GLR_TRYFAILED, or errno on failure 1531 */ 1532 1533 int gfs2_glock_nq(struct gfs2_holder *gh) 1534 { 1535 struct gfs2_glock *gl = gh->gh_gl; 1536 int error = 0; 1537 1538 if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) 1539 return -EIO; 1540 1541 if (test_bit(GLF_LRU, &gl->gl_flags)) 1542 gfs2_glock_remove_from_lru(gl); 1543 1544 gh->gh_error = 0; 1545 spin_lock(&gl->gl_lockref.lock); 1546 add_to_queue(gh); 1547 if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && 1548 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { 1549 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1550 gl->gl_lockref.count++; 1551 __gfs2_glock_queue_work(gl, 0); 1552 } 1553 run_queue(gl, 1); 1554 spin_unlock(&gl->gl_lockref.lock); 1555 1556 if (!(gh->gh_flags & GL_ASYNC)) 1557 error = gfs2_glock_wait(gh); 1558 1559 return error; 1560 } 1561 1562 /** 1563 * gfs2_glock_poll - poll to see if an async request has been completed 1564 * @gh: the holder 1565 * 1566 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on 1567 */ 1568 1569 int gfs2_glock_poll(struct gfs2_holder *gh) 1570 { 1571 return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; 1572 } 1573 1574 static inline bool needs_demote(struct gfs2_glock *gl) 1575 { 1576 return (test_bit(GLF_DEMOTE, &gl->gl_flags) || 1577 test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); 1578 } 1579 1580 static void __gfs2_glock_dq(struct gfs2_holder *gh) 1581 { 1582 struct gfs2_glock *gl = gh->gh_gl; 1583 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1584 unsigned delay = 0; 1585 int fast_path = 0; 1586 1587 /* 1588 * This while loop is similar to function demote_incompat_holders: 1589 * If the glock is due to be demoted (which may be from another node 1590 * or even if this holder is GL_NOCACHE), the weak holders are 1591 * demoted as well, allowing the glock to be demoted. 1592 */ 1593 while (gh) { 1594 /* 1595 * If we're in the process of file system withdraw, we cannot 1596 * just dequeue any glocks until our journal is recovered, lest 1597 * we introduce file system corruption. We need two exceptions 1598 * to this rule: We need to allow unlocking of nondisk glocks 1599 * and the glock for our own journal that needs recovery. 1600 */ 1601 if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && 1602 glock_blocked_by_withdraw(gl) && 1603 gh->gh_gl != sdp->sd_jinode_gl) { 1604 sdp->sd_glock_dqs_held++; 1605 spin_unlock(&gl->gl_lockref.lock); 1606 might_sleep(); 1607 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, 1608 TASK_UNINTERRUPTIBLE); 1609 spin_lock(&gl->gl_lockref.lock); 1610 } 1611 1612 /* 1613 * This holder should not be cached, so mark it for demote. 1614 * Note: this should be done before the check for needs_demote 1615 * below. 1616 */ 1617 if (gh->gh_flags & GL_NOCACHE) 1618 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 1619 1620 list_del_init(&gh->gh_list); 1621 clear_bit(HIF_HOLDER, &gh->gh_iflags); 1622 trace_gfs2_glock_queue(gh, 0); 1623 1624 /* 1625 * If there hasn't been a demote request we are done. 1626 * (Let the remaining holders, if any, keep holding it.) 1627 */ 1628 if (!needs_demote(gl)) { 1629 if (list_empty(&gl->gl_holders)) 1630 fast_path = 1; 1631 break; 1632 } 1633 /* 1634 * If we have another strong holder (we cannot auto-demote) 1635 * we are done. It keeps holding it until it is done. 1636 */ 1637 if (find_first_strong_holder(gl)) 1638 break; 1639 1640 /* 1641 * If we have a weak holder at the head of the list, it 1642 * (and all others like it) must be auto-demoted. If there 1643 * are no more weak holders, we exit the while loop. 1644 */ 1645 gh = find_first_holder(gl); 1646 } 1647 1648 if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) 1649 gfs2_glock_add_to_lru(gl); 1650 1651 if (unlikely(!fast_path)) { 1652 gl->gl_lockref.count++; 1653 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1654 !test_bit(GLF_DEMOTE, &gl->gl_flags) && 1655 gl->gl_name.ln_type == LM_TYPE_INODE) 1656 delay = gl->gl_hold_time; 1657 __gfs2_glock_queue_work(gl, delay); 1658 } 1659 } 1660 1661 /** 1662 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) 1663 * @gh: the glock holder 1664 * 1665 */ 1666 void gfs2_glock_dq(struct gfs2_holder *gh) 1667 { 1668 struct gfs2_glock *gl = gh->gh_gl; 1669 1670 spin_lock(&gl->gl_lockref.lock); 1671 if (list_is_first(&gh->gh_list, &gl->gl_holders) && 1672 !test_bit(HIF_HOLDER, &gh->gh_iflags)) { 1673 spin_unlock(&gl->gl_lockref.lock); 1674 gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); 1675 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1676 spin_lock(&gl->gl_lockref.lock); 1677 } 1678 1679 __gfs2_glock_dq(gh); 1680 spin_unlock(&gl->gl_lockref.lock); 1681 } 1682 1683 void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1684 { 1685 struct gfs2_glock *gl = gh->gh_gl; 1686 gfs2_glock_dq(gh); 1687 might_sleep(); 1688 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); 1689 } 1690 1691 /** 1692 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1693 * @gh: the holder structure 1694 * 1695 */ 1696 1697 void gfs2_glock_dq_uninit(struct gfs2_holder *gh) 1698 { 1699 gfs2_glock_dq(gh); 1700 gfs2_holder_uninit(gh); 1701 } 1702 1703 /** 1704 * gfs2_glock_nq_num - acquire a glock based on lock number 1705 * @sdp: the filesystem 1706 * @number: the lock number 1707 * @glops: the glock operations for the type of glock 1708 * @state: the state to acquire the glock in 1709 * @flags: modifier flags for the acquisition 1710 * @gh: the struct gfs2_holder 1711 * 1712 * Returns: errno 1713 */ 1714 1715 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 1716 const struct gfs2_glock_operations *glops, 1717 unsigned int state, u16 flags, struct gfs2_holder *gh) 1718 { 1719 struct gfs2_glock *gl; 1720 int error; 1721 1722 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); 1723 if (!error) { 1724 error = gfs2_glock_nq_init(gl, state, flags, gh); 1725 gfs2_glock_put(gl); 1726 } 1727 1728 return error; 1729 } 1730 1731 /** 1732 * glock_compare - Compare two struct gfs2_glock structures for sorting 1733 * @arg_a: the first structure 1734 * @arg_b: the second structure 1735 * 1736 */ 1737 1738 static int glock_compare(const void *arg_a, const void *arg_b) 1739 { 1740 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; 1741 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; 1742 const struct lm_lockname *a = &gh_a->gh_gl->gl_name; 1743 const struct lm_lockname *b = &gh_b->gh_gl->gl_name; 1744 1745 if (a->ln_number > b->ln_number) 1746 return 1; 1747 if (a->ln_number < b->ln_number) 1748 return -1; 1749 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); 1750 return 0; 1751 } 1752 1753 /** 1754 * nq_m_sync - synchronously acquire more than one glock in deadlock free order 1755 * @num_gh: the number of structures 1756 * @ghs: an array of struct gfs2_holder structures 1757 * @p: placeholder for the holder structure to pass back 1758 * 1759 * Returns: 0 on success (all glocks acquired), 1760 * errno on failure (no glocks acquired) 1761 */ 1762 1763 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, 1764 struct gfs2_holder **p) 1765 { 1766 unsigned int x; 1767 int error = 0; 1768 1769 for (x = 0; x < num_gh; x++) 1770 p[x] = &ghs[x]; 1771 1772 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); 1773 1774 for (x = 0; x < num_gh; x++) { 1775 error = gfs2_glock_nq(p[x]); 1776 if (error) { 1777 while (x--) 1778 gfs2_glock_dq(p[x]); 1779 break; 1780 } 1781 } 1782 1783 return error; 1784 } 1785 1786 /** 1787 * gfs2_glock_nq_m - acquire multiple glocks 1788 * @num_gh: the number of structures 1789 * @ghs: an array of struct gfs2_holder structures 1790 * 1791 * Returns: 0 on success (all glocks acquired), 1792 * errno on failure (no glocks acquired) 1793 */ 1794 1795 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1796 { 1797 struct gfs2_holder *tmp[4]; 1798 struct gfs2_holder **pph = tmp; 1799 int error = 0; 1800 1801 switch(num_gh) { 1802 case 0: 1803 return 0; 1804 case 1: 1805 return gfs2_glock_nq(ghs); 1806 default: 1807 if (num_gh <= 4) 1808 break; 1809 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *), 1810 GFP_NOFS); 1811 if (!pph) 1812 return -ENOMEM; 1813 } 1814 1815 error = nq_m_sync(num_gh, ghs, pph); 1816 1817 if (pph != tmp) 1818 kfree(pph); 1819 1820 return error; 1821 } 1822 1823 /** 1824 * gfs2_glock_dq_m - release multiple glocks 1825 * @num_gh: the number of structures 1826 * @ghs: an array of struct gfs2_holder structures 1827 * 1828 */ 1829 1830 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1831 { 1832 while (num_gh--) 1833 gfs2_glock_dq(&ghs[num_gh]); 1834 } 1835 1836 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1837 { 1838 unsigned long delay = 0; 1839 unsigned long holdtime; 1840 unsigned long now = jiffies; 1841 1842 gfs2_glock_hold(gl); 1843 spin_lock(&gl->gl_lockref.lock); 1844 holdtime = gl->gl_tchange + gl->gl_hold_time; 1845 if (!list_empty(&gl->gl_holders) && 1846 gl->gl_name.ln_type == LM_TYPE_INODE) { 1847 if (time_before(now, holdtime)) 1848 delay = holdtime - now; 1849 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) 1850 delay = gl->gl_hold_time; 1851 } 1852 /* 1853 * Note 1: We cannot call demote_incompat_holders from handle_callback 1854 * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq -> 1855 * handle_callback -> demote_incompat_holders -> gfs2_glock_dq 1856 * Plus, we only want to demote the holders if the request comes from 1857 * a remote cluster node because local holder conflicts are resolved 1858 * elsewhere. 1859 * 1860 * Note 2: if a remote node wants this glock in EX mode, lock_dlm will 1861 * request that we set our state to UNLOCKED. Here we mock up a holder 1862 * to make it look like someone wants the lock EX locally. Any SH 1863 * and DF requests should be able to share the lock without demoting. 1864 * 1865 * Note 3: We only want to demote the demoteable holders when there 1866 * are no more strong holders. The demoteable holders might as well 1867 * keep the glock until the last strong holder is done with it. 1868 */ 1869 if (!find_first_strong_holder(gl)) { 1870 struct gfs2_holder mock_gh = { 1871 .gh_gl = gl, 1872 .gh_state = (state == LM_ST_UNLOCKED) ? 1873 LM_ST_EXCLUSIVE : state, 1874 .gh_iflags = BIT(HIF_HOLDER) 1875 }; 1876 1877 demote_incompat_holders(gl, &mock_gh); 1878 } 1879 handle_callback(gl, state, delay, true); 1880 __gfs2_glock_queue_work(gl, delay); 1881 spin_unlock(&gl->gl_lockref.lock); 1882 } 1883 1884 /** 1885 * gfs2_should_freeze - Figure out if glock should be frozen 1886 * @gl: The glock in question 1887 * 1888 * Glocks are not frozen if (a) the result of the dlm operation is 1889 * an error, (b) the locking operation was an unlock operation or 1890 * (c) if there is a "noexp" flagged request anywhere in the queue 1891 * 1892 * Returns: 1 if freezing should occur, 0 otherwise 1893 */ 1894 1895 static int gfs2_should_freeze(const struct gfs2_glock *gl) 1896 { 1897 const struct gfs2_holder *gh; 1898 1899 if (gl->gl_reply & ~LM_OUT_ST_MASK) 1900 return 0; 1901 if (gl->gl_target == LM_ST_UNLOCKED) 1902 return 0; 1903 1904 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1905 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 1906 continue; 1907 if (LM_FLAG_NOEXP & gh->gh_flags) 1908 return 0; 1909 } 1910 1911 return 1; 1912 } 1913 1914 /** 1915 * gfs2_glock_complete - Callback used by locking 1916 * @gl: Pointer to the glock 1917 * @ret: The return value from the dlm 1918 * 1919 * The gl_reply field is under the gl_lockref.lock lock so that it is ok 1920 * to use a bitfield shared with other glock state fields. 1921 */ 1922 1923 void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1924 { 1925 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; 1926 1927 spin_lock(&gl->gl_lockref.lock); 1928 gl->gl_reply = ret; 1929 1930 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { 1931 if (gfs2_should_freeze(gl)) { 1932 set_bit(GLF_FROZEN, &gl->gl_flags); 1933 spin_unlock(&gl->gl_lockref.lock); 1934 return; 1935 } 1936 } 1937 1938 gl->gl_lockref.count++; 1939 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1940 __gfs2_glock_queue_work(gl, 0); 1941 spin_unlock(&gl->gl_lockref.lock); 1942 } 1943 1944 static int glock_cmp(void *priv, const struct list_head *a, 1945 const struct list_head *b) 1946 { 1947 struct gfs2_glock *gla, *glb; 1948 1949 gla = list_entry(a, struct gfs2_glock, gl_lru); 1950 glb = list_entry(b, struct gfs2_glock, gl_lru); 1951 1952 if (gla->gl_name.ln_number > glb->gl_name.ln_number) 1953 return 1; 1954 if (gla->gl_name.ln_number < glb->gl_name.ln_number) 1955 return -1; 1956 1957 return 0; 1958 } 1959 1960 /** 1961 * gfs2_dispose_glock_lru - Demote a list of glocks 1962 * @list: The list to dispose of 1963 * 1964 * Disposing of glocks may involve disk accesses, so that here we sort 1965 * the glocks by number (i.e. disk location of the inodes) so that if 1966 * there are any such accesses, they'll be sent in order (mostly). 1967 * 1968 * Must be called under the lru_lock, but may drop and retake this 1969 * lock. While the lru_lock is dropped, entries may vanish from the 1970 * list, but no new entries will appear on the list (since it is 1971 * private) 1972 */ 1973 1974 static void gfs2_dispose_glock_lru(struct list_head *list) 1975 __releases(&lru_lock) 1976 __acquires(&lru_lock) 1977 { 1978 struct gfs2_glock *gl; 1979 1980 list_sort(NULL, list, glock_cmp); 1981 1982 while(!list_empty(list)) { 1983 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 1984 list_del_init(&gl->gl_lru); 1985 clear_bit(GLF_LRU, &gl->gl_flags); 1986 if (!spin_trylock(&gl->gl_lockref.lock)) { 1987 add_back_to_lru: 1988 list_add(&gl->gl_lru, &lru_list); 1989 set_bit(GLF_LRU, &gl->gl_flags); 1990 atomic_inc(&lru_count); 1991 continue; 1992 } 1993 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1994 spin_unlock(&gl->gl_lockref.lock); 1995 goto add_back_to_lru; 1996 } 1997 gl->gl_lockref.count++; 1998 if (demote_ok(gl)) 1999 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 2000 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); 2001 __gfs2_glock_queue_work(gl, 0); 2002 spin_unlock(&gl->gl_lockref.lock); 2003 cond_resched_lock(&lru_lock); 2004 } 2005 } 2006 2007 /** 2008 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote 2009 * @nr: The number of entries to scan 2010 * 2011 * This function selects the entries on the LRU which are able to 2012 * be demoted, and then kicks off the process by calling 2013 * gfs2_dispose_glock_lru() above. 2014 */ 2015 2016 static long gfs2_scan_glock_lru(int nr) 2017 { 2018 struct gfs2_glock *gl; 2019 LIST_HEAD(skipped); 2020 LIST_HEAD(dispose); 2021 long freed = 0; 2022 2023 spin_lock(&lru_lock); 2024 while ((nr-- >= 0) && !list_empty(&lru_list)) { 2025 gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru); 2026 2027 /* Test for being demotable */ 2028 if (!test_bit(GLF_LOCK, &gl->gl_flags)) { 2029 list_move(&gl->gl_lru, &dispose); 2030 atomic_dec(&lru_count); 2031 freed++; 2032 continue; 2033 } 2034 2035 list_move(&gl->gl_lru, &skipped); 2036 } 2037 list_splice(&skipped, &lru_list); 2038 if (!list_empty(&dispose)) 2039 gfs2_dispose_glock_lru(&dispose); 2040 spin_unlock(&lru_lock); 2041 2042 return freed; 2043 } 2044 2045 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, 2046 struct shrink_control *sc) 2047 { 2048 if (!(sc->gfp_mask & __GFP_FS)) 2049 return SHRINK_STOP; 2050 return gfs2_scan_glock_lru(sc->nr_to_scan); 2051 } 2052 2053 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, 2054 struct shrink_control *sc) 2055 { 2056 return vfs_pressure_ratio(atomic_read(&lru_count)); 2057 } 2058 2059 static struct shrinker glock_shrinker = { 2060 .seeks = DEFAULT_SEEKS, 2061 .count_objects = gfs2_glock_shrink_count, 2062 .scan_objects = gfs2_glock_shrink_scan, 2063 }; 2064 2065 /** 2066 * glock_hash_walk - Call a function for glock in a hash bucket 2067 * @examiner: the function 2068 * @sdp: the filesystem 2069 * 2070 * Note that the function can be called multiple times on the same 2071 * object. So the user must ensure that the function can cope with 2072 * that. 2073 */ 2074 2075 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) 2076 { 2077 struct gfs2_glock *gl; 2078 struct rhashtable_iter iter; 2079 2080 rhashtable_walk_enter(&gl_hash_table, &iter); 2081 2082 do { 2083 rhashtable_walk_start(&iter); 2084 2085 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { 2086 if (gl->gl_name.ln_sbd == sdp) 2087 examiner(gl); 2088 } 2089 2090 rhashtable_walk_stop(&iter); 2091 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 2092 2093 rhashtable_walk_exit(&iter); 2094 } 2095 2096 bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay) 2097 { 2098 bool queued; 2099 2100 spin_lock(&gl->gl_lockref.lock); 2101 queued = queue_delayed_work(gfs2_delete_workqueue, 2102 &gl->gl_delete, delay); 2103 if (queued) 2104 set_bit(GLF_PENDING_DELETE, &gl->gl_flags); 2105 spin_unlock(&gl->gl_lockref.lock); 2106 return queued; 2107 } 2108 2109 void gfs2_cancel_delete_work(struct gfs2_glock *gl) 2110 { 2111 if (cancel_delayed_work(&gl->gl_delete)) { 2112 clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); 2113 gfs2_glock_put(gl); 2114 } 2115 } 2116 2117 bool gfs2_delete_work_queued(const struct gfs2_glock *gl) 2118 { 2119 return test_bit(GLF_PENDING_DELETE, &gl->gl_flags); 2120 } 2121 2122 static void flush_delete_work(struct gfs2_glock *gl) 2123 { 2124 if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { 2125 if (cancel_delayed_work(&gl->gl_delete)) { 2126 queue_delayed_work(gfs2_delete_workqueue, 2127 &gl->gl_delete, 0); 2128 } 2129 } 2130 } 2131 2132 void gfs2_flush_delete_work(struct gfs2_sbd *sdp) 2133 { 2134 glock_hash_walk(flush_delete_work, sdp); 2135 flush_workqueue(gfs2_delete_workqueue); 2136 } 2137 2138 /** 2139 * thaw_glock - thaw out a glock which has an unprocessed reply waiting 2140 * @gl: The glock to thaw 2141 * 2142 */ 2143 2144 static void thaw_glock(struct gfs2_glock *gl) 2145 { 2146 if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) 2147 return; 2148 if (!lockref_get_not_dead(&gl->gl_lockref)) 2149 return; 2150 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 2151 gfs2_glock_queue_work(gl, 0); 2152 } 2153 2154 /** 2155 * clear_glock - look at a glock and see if we can free it from glock cache 2156 * @gl: the glock to look at 2157 * 2158 */ 2159 2160 static void clear_glock(struct gfs2_glock *gl) 2161 { 2162 gfs2_glock_remove_from_lru(gl); 2163 2164 spin_lock(&gl->gl_lockref.lock); 2165 if (!__lockref_is_dead(&gl->gl_lockref)) { 2166 gl->gl_lockref.count++; 2167 if (gl->gl_state != LM_ST_UNLOCKED) 2168 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 2169 __gfs2_glock_queue_work(gl, 0); 2170 } 2171 spin_unlock(&gl->gl_lockref.lock); 2172 } 2173 2174 /** 2175 * gfs2_glock_thaw - Thaw any frozen glocks 2176 * @sdp: The super block 2177 * 2178 */ 2179 2180 void gfs2_glock_thaw(struct gfs2_sbd *sdp) 2181 { 2182 glock_hash_walk(thaw_glock, sdp); 2183 } 2184 2185 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2186 { 2187 spin_lock(&gl->gl_lockref.lock); 2188 gfs2_dump_glock(seq, gl, fsid); 2189 spin_unlock(&gl->gl_lockref.lock); 2190 } 2191 2192 static void dump_glock_func(struct gfs2_glock *gl) 2193 { 2194 dump_glock(NULL, gl, true); 2195 } 2196 2197 /** 2198 * gfs2_gl_hash_clear - Empty out the glock hash table 2199 * @sdp: the filesystem 2200 * 2201 * Called when unmounting the filesystem. 2202 */ 2203 2204 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 2205 { 2206 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 2207 flush_workqueue(glock_workqueue); 2208 glock_hash_walk(clear_glock, sdp); 2209 flush_workqueue(glock_workqueue); 2210 wait_event_timeout(sdp->sd_glock_wait, 2211 atomic_read(&sdp->sd_glock_disposal) == 0, 2212 HZ * 600); 2213 glock_hash_walk(dump_glock_func, sdp); 2214 } 2215 2216 static const char *state2str(unsigned state) 2217 { 2218 switch(state) { 2219 case LM_ST_UNLOCKED: 2220 return "UN"; 2221 case LM_ST_SHARED: 2222 return "SH"; 2223 case LM_ST_DEFERRED: 2224 return "DF"; 2225 case LM_ST_EXCLUSIVE: 2226 return "EX"; 2227 } 2228 return "??"; 2229 } 2230 2231 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) 2232 { 2233 char *p = buf; 2234 if (flags & LM_FLAG_TRY) 2235 *p++ = 't'; 2236 if (flags & LM_FLAG_TRY_1CB) 2237 *p++ = 'T'; 2238 if (flags & LM_FLAG_NOEXP) 2239 *p++ = 'e'; 2240 if (flags & LM_FLAG_ANY) 2241 *p++ = 'A'; 2242 if (flags & LM_FLAG_PRIORITY) 2243 *p++ = 'p'; 2244 if (flags & LM_FLAG_NODE_SCOPE) 2245 *p++ = 'n'; 2246 if (flags & GL_ASYNC) 2247 *p++ = 'a'; 2248 if (flags & GL_EXACT) 2249 *p++ = 'E'; 2250 if (flags & GL_NOCACHE) 2251 *p++ = 'c'; 2252 if (test_bit(HIF_HOLDER, &iflags)) 2253 *p++ = 'H'; 2254 if (test_bit(HIF_WAIT, &iflags)) 2255 *p++ = 'W'; 2256 if (test_bit(HIF_MAY_DEMOTE, &iflags)) 2257 *p++ = 'D'; 2258 if (flags & GL_SKIP) 2259 *p++ = 's'; 2260 *p = 0; 2261 return buf; 2262 } 2263 2264 /** 2265 * dump_holder - print information about a glock holder 2266 * @seq: the seq_file struct 2267 * @gh: the glock holder 2268 * @fs_id_buf: pointer to file system id (if requested) 2269 * 2270 */ 2271 2272 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, 2273 const char *fs_id_buf) 2274 { 2275 struct task_struct *gh_owner = NULL; 2276 char flags_buf[32]; 2277 2278 rcu_read_lock(); 2279 if (gh->gh_owner_pid) 2280 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 2281 gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", 2282 fs_id_buf, state2str(gh->gh_state), 2283 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 2284 gh->gh_error, 2285 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 2286 gh_owner ? gh_owner->comm : "(ended)", 2287 (void *)gh->gh_ip); 2288 rcu_read_unlock(); 2289 } 2290 2291 static const char *gflags2str(char *buf, const struct gfs2_glock *gl) 2292 { 2293 const unsigned long *gflags = &gl->gl_flags; 2294 char *p = buf; 2295 2296 if (test_bit(GLF_LOCK, gflags)) 2297 *p++ = 'l'; 2298 if (test_bit(GLF_DEMOTE, gflags)) 2299 *p++ = 'D'; 2300 if (test_bit(GLF_PENDING_DEMOTE, gflags)) 2301 *p++ = 'd'; 2302 if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) 2303 *p++ = 'p'; 2304 if (test_bit(GLF_DIRTY, gflags)) 2305 *p++ = 'y'; 2306 if (test_bit(GLF_LFLUSH, gflags)) 2307 *p++ = 'f'; 2308 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) 2309 *p++ = 'i'; 2310 if (test_bit(GLF_REPLY_PENDING, gflags)) 2311 *p++ = 'r'; 2312 if (test_bit(GLF_INITIAL, gflags)) 2313 *p++ = 'I'; 2314 if (test_bit(GLF_FROZEN, gflags)) 2315 *p++ = 'F'; 2316 if (!list_empty(&gl->gl_holders)) 2317 *p++ = 'q'; 2318 if (test_bit(GLF_LRU, gflags)) 2319 *p++ = 'L'; 2320 if (gl->gl_object) 2321 *p++ = 'o'; 2322 if (test_bit(GLF_BLOCKING, gflags)) 2323 *p++ = 'b'; 2324 if (test_bit(GLF_PENDING_DELETE, gflags)) 2325 *p++ = 'P'; 2326 if (test_bit(GLF_FREEING, gflags)) 2327 *p++ = 'x'; 2328 if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) 2329 *p++ = 'n'; 2330 if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) 2331 *p++ = 'N'; 2332 *p = 0; 2333 return buf; 2334 } 2335 2336 /** 2337 * gfs2_dump_glock - print information about a glock 2338 * @seq: The seq_file struct 2339 * @gl: the glock 2340 * @fsid: If true, also dump the file system id 2341 * 2342 * The file format is as follows: 2343 * One line per object, capital letters are used to indicate objects 2344 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, 2345 * other objects are indented by a single space and follow the glock to 2346 * which they are related. Fields are indicated by lower case letters 2347 * followed by a colon and the field value, except for strings which are in 2348 * [] so that its possible to see if they are composed of spaces for 2349 * example. The field's are n = number (id of the object), f = flags, 2350 * t = type, s = state, r = refcount, e = error, p = pid. 2351 * 2352 */ 2353 2354 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 2355 { 2356 const struct gfs2_glock_operations *glops = gl->gl_ops; 2357 unsigned long long dtime; 2358 const struct gfs2_holder *gh; 2359 char gflags_buf[32]; 2360 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 2361 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 2362 unsigned long nrpages = 0; 2363 2364 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 2365 struct address_space *mapping = gfs2_glock2aspace(gl); 2366 2367 nrpages = mapping->nrpages; 2368 } 2369 memset(fs_id_buf, 0, sizeof(fs_id_buf)); 2370 if (fsid && sdp) /* safety precaution */ 2371 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 2372 dtime = jiffies - gl->gl_demote_time; 2373 dtime *= 1000000/HZ; /* demote time in uSec */ 2374 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 2375 dtime = 0; 2376 gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " 2377 "v:%d r:%d m:%ld p:%lu\n", 2378 fs_id_buf, state2str(gl->gl_state), 2379 gl->gl_name.ln_type, 2380 (unsigned long long)gl->gl_name.ln_number, 2381 gflags2str(gflags_buf, gl), 2382 state2str(gl->gl_target), 2383 state2str(gl->gl_demote_state), dtime, 2384 atomic_read(&gl->gl_ail_count), 2385 atomic_read(&gl->gl_revokes), 2386 (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages); 2387 2388 list_for_each_entry(gh, &gl->gl_holders, gh_list) 2389 dump_holder(seq, gh, fs_id_buf); 2390 2391 if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) 2392 glops->go_dump(seq, gl, fs_id_buf); 2393 } 2394 2395 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) 2396 { 2397 struct gfs2_glock *gl = iter_ptr; 2398 2399 seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", 2400 gl->gl_name.ln_type, 2401 (unsigned long long)gl->gl_name.ln_number, 2402 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], 2403 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], 2404 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], 2405 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], 2406 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], 2407 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], 2408 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], 2409 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); 2410 return 0; 2411 } 2412 2413 static const char *gfs2_gltype[] = { 2414 "type", 2415 "reserved", 2416 "nondisk", 2417 "inode", 2418 "rgrp", 2419 "meta", 2420 "iopen", 2421 "flock", 2422 "plock", 2423 "quota", 2424 "journal", 2425 }; 2426 2427 static const char *gfs2_stype[] = { 2428 [GFS2_LKS_SRTT] = "srtt", 2429 [GFS2_LKS_SRTTVAR] = "srttvar", 2430 [GFS2_LKS_SRTTB] = "srttb", 2431 [GFS2_LKS_SRTTVARB] = "srttvarb", 2432 [GFS2_LKS_SIRT] = "sirt", 2433 [GFS2_LKS_SIRTVAR] = "sirtvar", 2434 [GFS2_LKS_DCOUNT] = "dlm", 2435 [GFS2_LKS_QCOUNT] = "queue", 2436 }; 2437 2438 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) 2439 2440 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) 2441 { 2442 struct gfs2_sbd *sdp = seq->private; 2443 loff_t pos = *(loff_t *)iter_ptr; 2444 unsigned index = pos >> 3; 2445 unsigned subindex = pos & 0x07; 2446 int i; 2447 2448 if (index == 0 && subindex != 0) 2449 return 0; 2450 2451 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], 2452 (index == 0) ? "cpu": gfs2_stype[subindex]); 2453 2454 for_each_possible_cpu(i) { 2455 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); 2456 2457 if (index == 0) 2458 seq_printf(seq, " %15u", i); 2459 else 2460 seq_printf(seq, " %15llu", (unsigned long long)lkstats-> 2461 lkstats[index - 1].stats[subindex]); 2462 } 2463 seq_putc(seq, '\n'); 2464 return 0; 2465 } 2466 2467 int __init gfs2_glock_init(void) 2468 { 2469 int i, ret; 2470 2471 ret = rhashtable_init(&gl_hash_table, &ht_parms); 2472 if (ret < 0) 2473 return ret; 2474 2475 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 2476 WQ_HIGHPRI | WQ_FREEZABLE, 0); 2477 if (!glock_workqueue) { 2478 rhashtable_destroy(&gl_hash_table); 2479 return -ENOMEM; 2480 } 2481 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 2482 WQ_MEM_RECLAIM | WQ_FREEZABLE, 2483 0); 2484 if (!gfs2_delete_workqueue) { 2485 destroy_workqueue(glock_workqueue); 2486 rhashtable_destroy(&gl_hash_table); 2487 return -ENOMEM; 2488 } 2489 2490 ret = register_shrinker(&glock_shrinker, "gfs2-glock"); 2491 if (ret) { 2492 destroy_workqueue(gfs2_delete_workqueue); 2493 destroy_workqueue(glock_workqueue); 2494 rhashtable_destroy(&gl_hash_table); 2495 return ret; 2496 } 2497 2498 for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) 2499 init_waitqueue_head(glock_wait_table + i); 2500 2501 return 0; 2502 } 2503 2504 void gfs2_glock_exit(void) 2505 { 2506 unregister_shrinker(&glock_shrinker); 2507 rhashtable_destroy(&gl_hash_table); 2508 destroy_workqueue(glock_workqueue); 2509 destroy_workqueue(gfs2_delete_workqueue); 2510 } 2511 2512 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) 2513 { 2514 struct gfs2_glock *gl = gi->gl; 2515 2516 if (gl) { 2517 if (n == 0) 2518 return; 2519 if (!lockref_put_not_zero(&gl->gl_lockref)) 2520 gfs2_glock_queue_put(gl); 2521 } 2522 for (;;) { 2523 gl = rhashtable_walk_next(&gi->hti); 2524 if (IS_ERR_OR_NULL(gl)) { 2525 if (gl == ERR_PTR(-EAGAIN)) { 2526 n = 1; 2527 continue; 2528 } 2529 gl = NULL; 2530 break; 2531 } 2532 if (gl->gl_name.ln_sbd != gi->sdp) 2533 continue; 2534 if (n <= 1) { 2535 if (!lockref_get_not_dead(&gl->gl_lockref)) 2536 continue; 2537 break; 2538 } else { 2539 if (__lockref_is_dead(&gl->gl_lockref)) 2540 continue; 2541 n--; 2542 } 2543 } 2544 gi->gl = gl; 2545 } 2546 2547 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 2548 __acquires(RCU) 2549 { 2550 struct gfs2_glock_iter *gi = seq->private; 2551 loff_t n; 2552 2553 /* 2554 * We can either stay where we are, skip to the next hash table 2555 * entry, or start from the beginning. 2556 */ 2557 if (*pos < gi->last_pos) { 2558 rhashtable_walk_exit(&gi->hti); 2559 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2560 n = *pos + 1; 2561 } else { 2562 n = *pos - gi->last_pos; 2563 } 2564 2565 rhashtable_walk_start(&gi->hti); 2566 2567 gfs2_glock_iter_next(gi, n); 2568 gi->last_pos = *pos; 2569 return gi->gl; 2570 } 2571 2572 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, 2573 loff_t *pos) 2574 { 2575 struct gfs2_glock_iter *gi = seq->private; 2576 2577 (*pos)++; 2578 gi->last_pos = *pos; 2579 gfs2_glock_iter_next(gi, 1); 2580 return gi->gl; 2581 } 2582 2583 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 2584 __releases(RCU) 2585 { 2586 struct gfs2_glock_iter *gi = seq->private; 2587 2588 rhashtable_walk_stop(&gi->hti); 2589 } 2590 2591 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 2592 { 2593 dump_glock(seq, iter_ptr, false); 2594 return 0; 2595 } 2596 2597 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) 2598 { 2599 preempt_disable(); 2600 if (*pos >= GFS2_NR_SBSTATS) 2601 return NULL; 2602 return pos; 2603 } 2604 2605 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, 2606 loff_t *pos) 2607 { 2608 (*pos)++; 2609 if (*pos >= GFS2_NR_SBSTATS) 2610 return NULL; 2611 return pos; 2612 } 2613 2614 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) 2615 { 2616 preempt_enable(); 2617 } 2618 2619 static const struct seq_operations gfs2_glock_seq_ops = { 2620 .start = gfs2_glock_seq_start, 2621 .next = gfs2_glock_seq_next, 2622 .stop = gfs2_glock_seq_stop, 2623 .show = gfs2_glock_seq_show, 2624 }; 2625 2626 static const struct seq_operations gfs2_glstats_seq_ops = { 2627 .start = gfs2_glock_seq_start, 2628 .next = gfs2_glock_seq_next, 2629 .stop = gfs2_glock_seq_stop, 2630 .show = gfs2_glstats_seq_show, 2631 }; 2632 2633 static const struct seq_operations gfs2_sbstats_sops = { 2634 .start = gfs2_sbstats_seq_start, 2635 .next = gfs2_sbstats_seq_next, 2636 .stop = gfs2_sbstats_seq_stop, 2637 .show = gfs2_sbstats_seq_show, 2638 }; 2639 2640 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) 2641 2642 static int __gfs2_glocks_open(struct inode *inode, struct file *file, 2643 const struct seq_operations *ops) 2644 { 2645 int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); 2646 if (ret == 0) { 2647 struct seq_file *seq = file->private_data; 2648 struct gfs2_glock_iter *gi = seq->private; 2649 2650 gi->sdp = inode->i_private; 2651 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2652 if (seq->buf) 2653 seq->size = GFS2_SEQ_GOODSIZE; 2654 /* 2655 * Initially, we are "before" the first hash table entry; the 2656 * first call to rhashtable_walk_next gets us the first entry. 2657 */ 2658 gi->last_pos = -1; 2659 gi->gl = NULL; 2660 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2661 } 2662 return ret; 2663 } 2664 2665 static int gfs2_glocks_open(struct inode *inode, struct file *file) 2666 { 2667 return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); 2668 } 2669 2670 static int gfs2_glocks_release(struct inode *inode, struct file *file) 2671 { 2672 struct seq_file *seq = file->private_data; 2673 struct gfs2_glock_iter *gi = seq->private; 2674 2675 if (gi->gl) 2676 gfs2_glock_put(gi->gl); 2677 rhashtable_walk_exit(&gi->hti); 2678 return seq_release_private(inode, file); 2679 } 2680 2681 static int gfs2_glstats_open(struct inode *inode, struct file *file) 2682 { 2683 return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); 2684 } 2685 2686 static const struct file_operations gfs2_glocks_fops = { 2687 .owner = THIS_MODULE, 2688 .open = gfs2_glocks_open, 2689 .read = seq_read, 2690 .llseek = seq_lseek, 2691 .release = gfs2_glocks_release, 2692 }; 2693 2694 static const struct file_operations gfs2_glstats_fops = { 2695 .owner = THIS_MODULE, 2696 .open = gfs2_glstats_open, 2697 .read = seq_read, 2698 .llseek = seq_lseek, 2699 .release = gfs2_glocks_release, 2700 }; 2701 2702 DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats); 2703 2704 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2705 { 2706 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2707 2708 debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2709 &gfs2_glocks_fops); 2710 2711 debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2712 &gfs2_glstats_fops); 2713 2714 debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2715 &gfs2_sbstats_fops); 2716 } 2717 2718 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2719 { 2720 debugfs_remove_recursive(sdp->debugfs_dir); 2721 sdp->debugfs_dir = NULL; 2722 } 2723 2724 void gfs2_register_debugfs(void) 2725 { 2726 gfs2_root = debugfs_create_dir("gfs2", NULL); 2727 } 2728 2729 void gfs2_unregister_debugfs(void) 2730 { 2731 debugfs_remove(gfs2_root); 2732 gfs2_root = NULL; 2733 } 2734