1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/sched.h> 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/buffer_head.h> 13 #include <linux/delay.h> 14 #include <linux/sort.h> 15 #include <linux/hash.h> 16 #include <linux/jhash.h> 17 #include <linux/kallsyms.h> 18 #include <linux/gfs2_ondisk.h> 19 #include <linux/list.h> 20 #include <linux/wait.h> 21 #include <linux/module.h> 22 #include <linux/uaccess.h> 23 #include <linux/seq_file.h> 24 #include <linux/debugfs.h> 25 #include <linux/kthread.h> 26 #include <linux/freezer.h> 27 #include <linux/workqueue.h> 28 #include <linux/jiffies.h> 29 #include <linux/rcupdate.h> 30 #include <linux/rculist_bl.h> 31 #include <linux/bit_spinlock.h> 32 #include <linux/percpu.h> 33 #include <linux/list_sort.h> 34 #include <linux/lockref.h> 35 #include <linux/rhashtable.h> 36 37 #include "gfs2.h" 38 #include "incore.h" 39 #include "glock.h" 40 #include "glops.h" 41 #include "inode.h" 42 #include "lops.h" 43 #include "meta_io.h" 44 #include "quota.h" 45 #include "super.h" 46 #include "util.h" 47 #include "bmap.h" 48 #define CREATE_TRACE_POINTS 49 #include "trace_gfs2.h" 50 51 struct gfs2_glock_iter { 52 struct gfs2_sbd *sdp; /* incore superblock */ 53 struct rhashtable_iter hti; /* rhashtable iterator */ 54 struct gfs2_glock *gl; /* current glock struct */ 55 loff_t last_pos; /* last position */ 56 }; 57 58 typedef void (*glock_examiner) (struct gfs2_glock * gl); 59 60 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 61 62 static struct dentry *gfs2_root; 63 static struct workqueue_struct *glock_workqueue; 64 struct workqueue_struct *gfs2_delete_workqueue; 65 static LIST_HEAD(lru_list); 66 static atomic_t lru_count = ATOMIC_INIT(0); 67 static DEFINE_SPINLOCK(lru_lock); 68 69 #define GFS2_GL_HASH_SHIFT 15 70 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) 71 72 static const struct rhashtable_params ht_parms = { 73 .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, 74 .key_len = offsetofend(struct lm_lockname, ln_type), 75 .key_offset = offsetof(struct gfs2_glock, gl_name), 76 .head_offset = offsetof(struct gfs2_glock, gl_node), 77 }; 78 79 static struct rhashtable gl_hash_table; 80 81 #define GLOCK_WAIT_TABLE_BITS 12 82 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS) 83 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned; 84 85 struct wait_glock_queue { 86 struct lm_lockname *name; 87 wait_queue_entry_t wait; 88 }; 89 90 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, 91 int sync, void *key) 92 { 93 struct wait_glock_queue *wait_glock = 94 container_of(wait, struct wait_glock_queue, wait); 95 struct lm_lockname *wait_name = wait_glock->name; 96 struct lm_lockname *wake_name = key; 97 98 if (wake_name->ln_sbd != wait_name->ln_sbd || 99 wake_name->ln_number != wait_name->ln_number || 100 wake_name->ln_type != wait_name->ln_type) 101 return 0; 102 return autoremove_wake_function(wait, mode, sync, key); 103 } 104 105 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) 106 { 107 u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); 108 109 return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); 110 } 111 112 /** 113 * wake_up_glock - Wake up waiters on a glock 114 * @gl: the glock 115 */ 116 static void wake_up_glock(struct gfs2_glock *gl) 117 { 118 wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name); 119 120 if (waitqueue_active(wq)) 121 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name); 122 } 123 124 static void gfs2_glock_dealloc(struct rcu_head *rcu) 125 { 126 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 127 128 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 129 kmem_cache_free(gfs2_glock_aspace_cachep, gl); 130 } else { 131 kfree(gl->gl_lksb.sb_lvbptr); 132 kmem_cache_free(gfs2_glock_cachep, gl); 133 } 134 } 135 136 /** 137 * glock_blocked_by_withdraw - determine if we can still use a glock 138 * @gl: the glock 139 * 140 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted 141 * when we're withdrawn. For example, to maintain metadata integrity, we should 142 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like 143 * iopen or the transaction glocks may be safely used because none of their 144 * metadata goes through the journal. So in general, we should disallow all 145 * glocks that are journaled, and allow all the others. One exception is: 146 * we need to allow our active journal to be promoted and demoted so others 147 * may recover it and we can reacquire it when they're done. 148 */ 149 static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) 150 { 151 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 152 153 if (likely(!gfs2_withdrawn(sdp))) 154 return false; 155 if (gl->gl_ops->go_flags & GLOF_NONDISK) 156 return false; 157 if (!sdp->sd_jdesc || 158 gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) 159 return false; 160 return true; 161 } 162 163 void gfs2_glock_free(struct gfs2_glock *gl) 164 { 165 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 166 167 BUG_ON(atomic_read(&gl->gl_revokes)); 168 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); 169 smp_mb(); 170 wake_up_glock(gl); 171 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); 172 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 173 wake_up(&sdp->sd_glock_wait); 174 } 175 176 /** 177 * gfs2_glock_hold() - increment reference count on glock 178 * @gl: The glock to hold 179 * 180 */ 181 182 void gfs2_glock_hold(struct gfs2_glock *gl) 183 { 184 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 185 lockref_get(&gl->gl_lockref); 186 } 187 188 /** 189 * demote_ok - Check to see if it's ok to unlock a glock 190 * @gl: the glock 191 * 192 * Returns: 1 if it's ok 193 */ 194 195 static int demote_ok(const struct gfs2_glock *gl) 196 { 197 const struct gfs2_glock_operations *glops = gl->gl_ops; 198 199 if (gl->gl_state == LM_ST_UNLOCKED) 200 return 0; 201 if (!list_empty(&gl->gl_holders)) 202 return 0; 203 if (glops->go_demote_ok) 204 return glops->go_demote_ok(gl); 205 return 1; 206 } 207 208 209 void gfs2_glock_add_to_lru(struct gfs2_glock *gl) 210 { 211 if (!(gl->gl_ops->go_flags & GLOF_LRU)) 212 return; 213 214 spin_lock(&lru_lock); 215 216 list_del(&gl->gl_lru); 217 list_add_tail(&gl->gl_lru, &lru_list); 218 219 if (!test_bit(GLF_LRU, &gl->gl_flags)) { 220 set_bit(GLF_LRU, &gl->gl_flags); 221 atomic_inc(&lru_count); 222 } 223 224 spin_unlock(&lru_lock); 225 } 226 227 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) 228 { 229 if (!(gl->gl_ops->go_flags & GLOF_LRU)) 230 return; 231 232 spin_lock(&lru_lock); 233 if (test_bit(GLF_LRU, &gl->gl_flags)) { 234 list_del_init(&gl->gl_lru); 235 atomic_dec(&lru_count); 236 clear_bit(GLF_LRU, &gl->gl_flags); 237 } 238 spin_unlock(&lru_lock); 239 } 240 241 /* 242 * Enqueue the glock on the work queue. Passes one glock reference on to the 243 * work queue. 244 */ 245 static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 246 if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) { 247 /* 248 * We are holding the lockref spinlock, and the work was still 249 * queued above. The queued work (glock_work_func) takes that 250 * spinlock before dropping its glock reference(s), so it 251 * cannot have dropped them in the meantime. 252 */ 253 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2); 254 gl->gl_lockref.count--; 255 } 256 } 257 258 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { 259 spin_lock(&gl->gl_lockref.lock); 260 __gfs2_glock_queue_work(gl, delay); 261 spin_unlock(&gl->gl_lockref.lock); 262 } 263 264 static void __gfs2_glock_put(struct gfs2_glock *gl) 265 { 266 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 267 struct address_space *mapping = gfs2_glock2aspace(gl); 268 269 lockref_mark_dead(&gl->gl_lockref); 270 271 gfs2_glock_remove_from_lru(gl); 272 spin_unlock(&gl->gl_lockref.lock); 273 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 274 GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp)); 275 trace_gfs2_glock_put(gl); 276 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); 277 } 278 279 /* 280 * Cause the glock to be put in work queue context. 281 */ 282 void gfs2_glock_queue_put(struct gfs2_glock *gl) 283 { 284 gfs2_glock_queue_work(gl, 0); 285 } 286 287 /** 288 * gfs2_glock_put() - Decrement reference count on glock 289 * @gl: The glock to put 290 * 291 */ 292 293 void gfs2_glock_put(struct gfs2_glock *gl) 294 { 295 if (lockref_put_or_lock(&gl->gl_lockref)) 296 return; 297 298 __gfs2_glock_put(gl); 299 } 300 301 /** 302 * may_grant - check if its ok to grant a new lock 303 * @gl: The glock 304 * @gh: The lock request which we wish to grant 305 * 306 * Returns: true if its ok to grant the lock 307 */ 308 309 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) 310 { 311 const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list); 312 if ((gh->gh_state == LM_ST_EXCLUSIVE || 313 gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) 314 return 0; 315 if (gl->gl_state == gh->gh_state) 316 return 1; 317 if (gh->gh_flags & GL_EXACT) 318 return 0; 319 if (gl->gl_state == LM_ST_EXCLUSIVE) { 320 if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) 321 return 1; 322 if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) 323 return 1; 324 } 325 if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) 326 return 1; 327 return 0; 328 } 329 330 static void gfs2_holder_wake(struct gfs2_holder *gh) 331 { 332 clear_bit(HIF_WAIT, &gh->gh_iflags); 333 smp_mb__after_atomic(); 334 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 335 if (gh->gh_flags & GL_ASYNC) { 336 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd; 337 338 wake_up(&sdp->sd_async_glock_wait); 339 } 340 } 341 342 /** 343 * do_error - Something unexpected has happened during a lock request 344 * 345 */ 346 347 static void do_error(struct gfs2_glock *gl, const int ret) 348 { 349 struct gfs2_holder *gh, *tmp; 350 351 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 352 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 353 continue; 354 if (ret & LM_OUT_ERROR) 355 gh->gh_error = -EIO; 356 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) 357 gh->gh_error = GLR_TRYFAILED; 358 else 359 continue; 360 list_del_init(&gh->gh_list); 361 trace_gfs2_glock_queue(gh, 0); 362 gfs2_holder_wake(gh); 363 } 364 } 365 366 /** 367 * do_promote - promote as many requests as possible on the current queue 368 * @gl: The glock 369 * 370 * Returns: 1 if there is a blocked holder at the head of the list, or 2 371 * if a type specific operation is underway. 372 */ 373 374 static int do_promote(struct gfs2_glock *gl) 375 __releases(&gl->gl_lockref.lock) 376 __acquires(&gl->gl_lockref.lock) 377 { 378 const struct gfs2_glock_operations *glops = gl->gl_ops; 379 struct gfs2_holder *gh, *tmp; 380 int ret; 381 382 restart: 383 list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { 384 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 385 continue; 386 if (may_grant(gl, gh)) { 387 if (gh->gh_list.prev == &gl->gl_holders && 388 glops->go_lock) { 389 spin_unlock(&gl->gl_lockref.lock); 390 /* FIXME: eliminate this eventually */ 391 ret = glops->go_lock(gh); 392 spin_lock(&gl->gl_lockref.lock); 393 if (ret) { 394 if (ret == 1) 395 return 2; 396 gh->gh_error = ret; 397 list_del_init(&gh->gh_list); 398 trace_gfs2_glock_queue(gh, 0); 399 gfs2_holder_wake(gh); 400 goto restart; 401 } 402 set_bit(HIF_HOLDER, &gh->gh_iflags); 403 trace_gfs2_promote(gh, 1); 404 gfs2_holder_wake(gh); 405 goto restart; 406 } 407 set_bit(HIF_HOLDER, &gh->gh_iflags); 408 trace_gfs2_promote(gh, 0); 409 gfs2_holder_wake(gh); 410 continue; 411 } 412 if (gh->gh_list.prev == &gl->gl_holders) 413 return 1; 414 do_error(gl, 0); 415 break; 416 } 417 return 0; 418 } 419 420 /** 421 * find_first_waiter - find the first gh that's waiting for the glock 422 * @gl: the glock 423 */ 424 425 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) 426 { 427 struct gfs2_holder *gh; 428 429 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 430 if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) 431 return gh; 432 } 433 return NULL; 434 } 435 436 /** 437 * state_change - record that the glock is now in a different state 438 * @gl: the glock 439 * @new_state the new state 440 * 441 */ 442 443 static void state_change(struct gfs2_glock *gl, unsigned int new_state) 444 { 445 int held1, held2; 446 447 held1 = (gl->gl_state != LM_ST_UNLOCKED); 448 held2 = (new_state != LM_ST_UNLOCKED); 449 450 if (held1 != held2) { 451 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); 452 if (held2) 453 gl->gl_lockref.count++; 454 else 455 gl->gl_lockref.count--; 456 } 457 if (held1 && held2 && list_empty(&gl->gl_holders)) 458 clear_bit(GLF_QUEUED, &gl->gl_flags); 459 460 if (new_state != gl->gl_target) 461 /* shorten our minimum hold time */ 462 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, 463 GL_GLOCK_MIN_HOLD); 464 gl->gl_state = new_state; 465 gl->gl_tchange = jiffies; 466 } 467 468 static void gfs2_demote_wake(struct gfs2_glock *gl) 469 { 470 gl->gl_demote_state = LM_ST_EXCLUSIVE; 471 clear_bit(GLF_DEMOTE, &gl->gl_flags); 472 smp_mb__after_atomic(); 473 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 474 } 475 476 /** 477 * finish_xmote - The DLM has replied to one of our lock requests 478 * @gl: The glock 479 * @ret: The status from the DLM 480 * 481 */ 482 483 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) 484 { 485 const struct gfs2_glock_operations *glops = gl->gl_ops; 486 struct gfs2_holder *gh; 487 unsigned state = ret & LM_OUT_ST_MASK; 488 int rv; 489 490 spin_lock(&gl->gl_lockref.lock); 491 trace_gfs2_glock_state_change(gl, state); 492 state_change(gl, state); 493 gh = find_first_waiter(gl); 494 495 /* Demote to UN request arrived during demote to SH or DF */ 496 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 497 state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) 498 gl->gl_target = LM_ST_UNLOCKED; 499 500 /* Check for state != intended state */ 501 if (unlikely(state != gl->gl_target)) { 502 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { 503 /* move to back of queue and try next entry */ 504 if (ret & LM_OUT_CANCELED) { 505 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) 506 list_move_tail(&gh->gh_list, &gl->gl_holders); 507 gh = find_first_waiter(gl); 508 gl->gl_target = gh->gh_state; 509 goto retry; 510 } 511 /* Some error or failed "try lock" - report it */ 512 if ((ret & LM_OUT_ERROR) || 513 (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 514 gl->gl_target = gl->gl_state; 515 do_error(gl, ret); 516 goto out; 517 } 518 } 519 switch(state) { 520 /* Unlocked due to conversion deadlock, try again */ 521 case LM_ST_UNLOCKED: 522 retry: 523 do_xmote(gl, gh, gl->gl_target); 524 break; 525 /* Conversion fails, unlock and try again */ 526 case LM_ST_SHARED: 527 case LM_ST_DEFERRED: 528 do_xmote(gl, gh, LM_ST_UNLOCKED); 529 break; 530 default: /* Everything else */ 531 fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", 532 gl->gl_target, state); 533 GLOCK_BUG_ON(gl, 1); 534 } 535 spin_unlock(&gl->gl_lockref.lock); 536 return; 537 } 538 539 /* Fast path - we got what we asked for */ 540 if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) 541 gfs2_demote_wake(gl); 542 if (state != LM_ST_UNLOCKED) { 543 if (glops->go_xmote_bh) { 544 spin_unlock(&gl->gl_lockref.lock); 545 rv = glops->go_xmote_bh(gl, gh); 546 spin_lock(&gl->gl_lockref.lock); 547 if (rv) { 548 do_error(gl, rv); 549 goto out; 550 } 551 } 552 rv = do_promote(gl); 553 if (rv == 2) 554 goto out_locked; 555 } 556 out: 557 clear_bit(GLF_LOCK, &gl->gl_flags); 558 out_locked: 559 spin_unlock(&gl->gl_lockref.lock); 560 } 561 562 /** 563 * do_xmote - Calls the DLM to change the state of a lock 564 * @gl: The lock state 565 * @gh: The holder (only for promotes) 566 * @target: The target lock state 567 * 568 */ 569 570 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) 571 __releases(&gl->gl_lockref.lock) 572 __acquires(&gl->gl_lockref.lock) 573 { 574 const struct gfs2_glock_operations *glops = gl->gl_ops; 575 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 576 unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); 577 int ret; 578 579 if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && 580 gh && !(gh->gh_flags & LM_FLAG_NOEXP)) 581 return; 582 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | 583 LM_FLAG_PRIORITY); 584 GLOCK_BUG_ON(gl, gl->gl_state == target); 585 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); 586 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 587 glops->go_inval) { 588 /* 589 * If another process is already doing the invalidate, let that 590 * finish first. The glock state machine will get back to this 591 * holder again later. 592 */ 593 if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS, 594 &gl->gl_flags)) 595 return; 596 do_error(gl, 0); /* Fail queued try locks */ 597 } 598 gl->gl_req = target; 599 set_bit(GLF_BLOCKING, &gl->gl_flags); 600 if ((gl->gl_req == LM_ST_UNLOCKED) || 601 (gl->gl_state == LM_ST_EXCLUSIVE) || 602 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 603 clear_bit(GLF_BLOCKING, &gl->gl_flags); 604 spin_unlock(&gl->gl_lockref.lock); 605 if (glops->go_sync) { 606 ret = glops->go_sync(gl); 607 /* If we had a problem syncing (due to io errors or whatever, 608 * we should not invalidate the metadata or tell dlm to 609 * release the glock to other nodes. 610 */ 611 if (ret) { 612 if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 613 fs_err(sdp, "Error %d syncing glock \n", ret); 614 gfs2_dump_glock(NULL, gl, true); 615 } 616 goto skip_inval; 617 } 618 } 619 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { 620 /* 621 * The call to go_sync should have cleared out the ail list. 622 * If there are still items, we have a problem. We ought to 623 * withdraw, but we can't because the withdraw code also uses 624 * glocks. Warn about the error, dump the glock, then fall 625 * through and wait for logd to do the withdraw for us. 626 */ 627 if ((atomic_read(&gl->gl_ail_count) != 0) && 628 (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { 629 gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count)); 630 gfs2_dump_glock(NULL, gl, true); 631 } 632 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 633 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 634 } 635 636 skip_inval: 637 gfs2_glock_hold(gl); 638 /* 639 * Check for an error encountered since we called go_sync and go_inval. 640 * If so, we can't withdraw from the glock code because the withdraw 641 * code itself uses glocks (see function signal_our_withdraw) to 642 * change the mount to read-only. Most importantly, we must not call 643 * dlm to unlock the glock until the journal is in a known good state 644 * (after journal replay) otherwise other nodes may use the object 645 * (rgrp or dinode) and then later, journal replay will corrupt the 646 * file system. The best we can do here is wait for the logd daemon 647 * to see sd_log_error and withdraw, and in the meantime, requeue the 648 * work for later. 649 * 650 * However, if we're just unlocking the lock (say, for unmount, when 651 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete 652 * then it's okay to tell dlm to unlock it. 653 */ 654 if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) 655 gfs2_withdraw_delayed(sdp); 656 if (glock_blocked_by_withdraw(gl)) { 657 if (target != LM_ST_UNLOCKED || 658 test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) { 659 gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); 660 goto out; 661 } 662 } 663 664 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 665 /* lock_dlm */ 666 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 667 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && 668 target == LM_ST_UNLOCKED && 669 test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { 670 finish_xmote(gl, target); 671 gfs2_glock_queue_work(gl, 0); 672 } else if (ret) { 673 fs_err(sdp, "lm_lock ret %d\n", ret); 674 GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); 675 } 676 } else { /* lock_nolock */ 677 finish_xmote(gl, target); 678 gfs2_glock_queue_work(gl, 0); 679 } 680 out: 681 spin_lock(&gl->gl_lockref.lock); 682 } 683 684 /** 685 * find_first_holder - find the first "holder" gh 686 * @gl: the glock 687 */ 688 689 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) 690 { 691 struct gfs2_holder *gh; 692 693 if (!list_empty(&gl->gl_holders)) { 694 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); 695 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 696 return gh; 697 } 698 return NULL; 699 } 700 701 /** 702 * run_queue - do all outstanding tasks related to a glock 703 * @gl: The glock in question 704 * @nonblock: True if we must not block in run_queue 705 * 706 */ 707 708 static void run_queue(struct gfs2_glock *gl, const int nonblock) 709 __releases(&gl->gl_lockref.lock) 710 __acquires(&gl->gl_lockref.lock) 711 { 712 struct gfs2_holder *gh = NULL; 713 int ret; 714 715 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) 716 return; 717 718 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 719 720 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 721 gl->gl_demote_state != gl->gl_state) { 722 if (find_first_holder(gl)) 723 goto out_unlock; 724 if (nonblock) 725 goto out_sched; 726 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 727 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); 728 gl->gl_target = gl->gl_demote_state; 729 } else { 730 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) 731 gfs2_demote_wake(gl); 732 ret = do_promote(gl); 733 if (ret == 0) 734 goto out_unlock; 735 if (ret == 2) 736 goto out; 737 gh = find_first_waiter(gl); 738 gl->gl_target = gh->gh_state; 739 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 740 do_error(gl, 0); /* Fail queued try locks */ 741 } 742 do_xmote(gl, gh, gl->gl_target); 743 out: 744 return; 745 746 out_sched: 747 clear_bit(GLF_LOCK, &gl->gl_flags); 748 smp_mb__after_atomic(); 749 gl->gl_lockref.count++; 750 __gfs2_glock_queue_work(gl, 0); 751 return; 752 753 out_unlock: 754 clear_bit(GLF_LOCK, &gl->gl_flags); 755 smp_mb__after_atomic(); 756 return; 757 } 758 759 static void delete_work_func(struct work_struct *work) 760 { 761 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); 762 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 763 struct inode *inode; 764 u64 no_addr = gl->gl_name.ln_number; 765 766 /* If someone's using this glock to create a new dinode, the block must 767 have been freed by another node, then re-used, in which case our 768 iopen callback is too late after the fact. Ignore it. */ 769 if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) 770 goto out; 771 772 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 773 if (!IS_ERR_OR_NULL(inode)) { 774 d_prune_aliases(inode); 775 iput(inode); 776 } 777 out: 778 gfs2_glock_put(gl); 779 } 780 781 static void glock_work_func(struct work_struct *work) 782 { 783 unsigned long delay = 0; 784 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 785 unsigned int drop_refs = 1; 786 787 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { 788 finish_xmote(gl, gl->gl_reply); 789 drop_refs++; 790 } 791 spin_lock(&gl->gl_lockref.lock); 792 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 793 gl->gl_state != LM_ST_UNLOCKED && 794 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 795 unsigned long holdtime, now = jiffies; 796 797 holdtime = gl->gl_tchange + gl->gl_hold_time; 798 if (time_before(now, holdtime)) 799 delay = holdtime - now; 800 801 if (!delay) { 802 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 803 set_bit(GLF_DEMOTE, &gl->gl_flags); 804 } 805 } 806 run_queue(gl, 0); 807 if (delay) { 808 /* Keep one glock reference for the work we requeue. */ 809 drop_refs--; 810 if (gl->gl_name.ln_type != LM_TYPE_INODE) 811 delay = 0; 812 __gfs2_glock_queue_work(gl, delay); 813 } 814 815 /* 816 * Drop the remaining glock references manually here. (Mind that 817 * __gfs2_glock_queue_work depends on the lockref spinlock begin held 818 * here as well.) 819 */ 820 gl->gl_lockref.count -= drop_refs; 821 if (!gl->gl_lockref.count) { 822 __gfs2_glock_put(gl); 823 return; 824 } 825 spin_unlock(&gl->gl_lockref.lock); 826 } 827 828 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name, 829 struct gfs2_glock *new) 830 { 831 struct wait_glock_queue wait; 832 wait_queue_head_t *wq = glock_waitqueue(name); 833 struct gfs2_glock *gl; 834 835 wait.name = name; 836 init_wait(&wait.wait); 837 wait.wait.func = glock_wake_function; 838 839 again: 840 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 841 rcu_read_lock(); 842 if (new) { 843 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table, 844 &new->gl_node, ht_parms); 845 if (IS_ERR(gl)) 846 goto out; 847 } else { 848 gl = rhashtable_lookup_fast(&gl_hash_table, 849 name, ht_parms); 850 } 851 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) { 852 rcu_read_unlock(); 853 schedule(); 854 goto again; 855 } 856 out: 857 rcu_read_unlock(); 858 finish_wait(wq, &wait.wait); 859 return gl; 860 } 861 862 /** 863 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 864 * @sdp: The GFS2 superblock 865 * @number: the lock number 866 * @glops: The glock_operations to use 867 * @create: If 0, don't create the glock if it doesn't exist 868 * @glp: the glock is returned here 869 * 870 * This does not lock a glock, just finds/creates structures for one. 871 * 872 * Returns: errno 873 */ 874 875 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, 876 const struct gfs2_glock_operations *glops, int create, 877 struct gfs2_glock **glp) 878 { 879 struct super_block *s = sdp->sd_vfs; 880 struct lm_lockname name = { .ln_number = number, 881 .ln_type = glops->go_type, 882 .ln_sbd = sdp }; 883 struct gfs2_glock *gl, *tmp; 884 struct address_space *mapping; 885 struct kmem_cache *cachep; 886 int ret = 0; 887 888 gl = find_insert_glock(&name, NULL); 889 if (gl) { 890 *glp = gl; 891 return 0; 892 } 893 if (!create) 894 return -ENOENT; 895 896 if (glops->go_flags & GLOF_ASPACE) 897 cachep = gfs2_glock_aspace_cachep; 898 else 899 cachep = gfs2_glock_cachep; 900 gl = kmem_cache_alloc(cachep, GFP_NOFS); 901 if (!gl) 902 return -ENOMEM; 903 904 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 905 906 if (glops->go_flags & GLOF_LVB) { 907 gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); 908 if (!gl->gl_lksb.sb_lvbptr) { 909 kmem_cache_free(cachep, gl); 910 return -ENOMEM; 911 } 912 } 913 914 atomic_inc(&sdp->sd_glock_disposal); 915 gl->gl_node.next = NULL; 916 gl->gl_flags = 0; 917 gl->gl_name = name; 918 gl->gl_lockref.count = 1; 919 gl->gl_state = LM_ST_UNLOCKED; 920 gl->gl_target = LM_ST_UNLOCKED; 921 gl->gl_demote_state = LM_ST_EXCLUSIVE; 922 gl->gl_ops = glops; 923 gl->gl_dstamp = 0; 924 preempt_disable(); 925 /* We use the global stats to estimate the initial per-glock stats */ 926 gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type]; 927 preempt_enable(); 928 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 929 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 930 gl->gl_tchange = jiffies; 931 gl->gl_object = NULL; 932 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 933 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 934 INIT_WORK(&gl->gl_delete, delete_work_func); 935 936 mapping = gfs2_glock2aspace(gl); 937 if (mapping) { 938 mapping->a_ops = &gfs2_meta_aops; 939 mapping->host = s->s_bdev->bd_inode; 940 mapping->flags = 0; 941 mapping_set_gfp_mask(mapping, GFP_NOFS); 942 mapping->private_data = NULL; 943 mapping->writeback_index = 0; 944 } 945 946 tmp = find_insert_glock(&name, gl); 947 if (!tmp) { 948 *glp = gl; 949 goto out; 950 } 951 if (IS_ERR(tmp)) { 952 ret = PTR_ERR(tmp); 953 goto out_free; 954 } 955 *glp = tmp; 956 957 out_free: 958 kfree(gl->gl_lksb.sb_lvbptr); 959 kmem_cache_free(cachep, gl); 960 atomic_dec(&sdp->sd_glock_disposal); 961 962 out: 963 return ret; 964 } 965 966 /** 967 * gfs2_holder_init - initialize a struct gfs2_holder in the default way 968 * @gl: the glock 969 * @state: the state we're requesting 970 * @flags: the modifier flags 971 * @gh: the holder structure 972 * 973 */ 974 975 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, 976 struct gfs2_holder *gh) 977 { 978 INIT_LIST_HEAD(&gh->gh_list); 979 gh->gh_gl = gl; 980 gh->gh_ip = _RET_IP_; 981 gh->gh_owner_pid = get_pid(task_pid(current)); 982 gh->gh_state = state; 983 gh->gh_flags = flags; 984 gh->gh_error = 0; 985 gh->gh_iflags = 0; 986 gfs2_glock_hold(gl); 987 } 988 989 /** 990 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it 991 * @state: the state we're requesting 992 * @flags: the modifier flags 993 * @gh: the holder structure 994 * 995 * Don't mess with the glock. 996 * 997 */ 998 999 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh) 1000 { 1001 gh->gh_state = state; 1002 gh->gh_flags = flags; 1003 gh->gh_iflags = 0; 1004 gh->gh_ip = _RET_IP_; 1005 put_pid(gh->gh_owner_pid); 1006 gh->gh_owner_pid = get_pid(task_pid(current)); 1007 } 1008 1009 /** 1010 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) 1011 * @gh: the holder structure 1012 * 1013 */ 1014 1015 void gfs2_holder_uninit(struct gfs2_holder *gh) 1016 { 1017 put_pid(gh->gh_owner_pid); 1018 gfs2_glock_put(gh->gh_gl); 1019 gfs2_holder_mark_uninitialized(gh); 1020 gh->gh_ip = 0; 1021 } 1022 1023 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, 1024 unsigned long start_time) 1025 { 1026 /* Have we waited longer that a second? */ 1027 if (time_after(jiffies, start_time + HZ)) { 1028 /* Lengthen the minimum hold time. */ 1029 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR, 1030 GL_GLOCK_MAX_HOLD); 1031 } 1032 } 1033 1034 /** 1035 * gfs2_glock_wait - wait on a glock acquisition 1036 * @gh: the glock holder 1037 * 1038 * Returns: 0 on success 1039 */ 1040 1041 int gfs2_glock_wait(struct gfs2_holder *gh) 1042 { 1043 unsigned long start_time = jiffies; 1044 1045 might_sleep(); 1046 wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); 1047 gfs2_glock_update_hold_time(gh->gh_gl, start_time); 1048 return gh->gh_error; 1049 } 1050 1051 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) 1052 { 1053 int i; 1054 1055 for (i = 0; i < num_gh; i++) 1056 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) 1057 return 1; 1058 return 0; 1059 } 1060 1061 /** 1062 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions 1063 * @num_gh: the number of holders in the array 1064 * @ghs: the glock holder array 1065 * 1066 * Returns: 0 on success, meaning all glocks have been granted and are held. 1067 * -ESTALE if the request timed out, meaning all glocks were released, 1068 * and the caller should retry the operation. 1069 */ 1070 1071 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) 1072 { 1073 struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; 1074 int i, ret = 0, timeout = 0; 1075 unsigned long start_time = jiffies; 1076 bool keep_waiting; 1077 1078 might_sleep(); 1079 /* 1080 * Total up the (minimum hold time * 2) of all glocks and use that to 1081 * determine the max amount of time we should wait. 1082 */ 1083 for (i = 0; i < num_gh; i++) 1084 timeout += ghs[i].gh_gl->gl_hold_time << 1; 1085 1086 wait_for_dlm: 1087 if (!wait_event_timeout(sdp->sd_async_glock_wait, 1088 !glocks_pending(num_gh, ghs), timeout)) 1089 ret = -ESTALE; /* request timed out. */ 1090 1091 /* 1092 * If dlm granted all our requests, we need to adjust the glock 1093 * minimum hold time values according to how long we waited. 1094 * 1095 * If our request timed out, we need to repeatedly release any held 1096 * glocks we acquired thus far to allow dlm to acquire the remaining 1097 * glocks without deadlocking. We cannot currently cancel outstanding 1098 * glock acquisitions. 1099 * 1100 * The HIF_WAIT bit tells us which requests still need a response from 1101 * dlm. 1102 * 1103 * If dlm sent us any errors, we return the first error we find. 1104 */ 1105 keep_waiting = false; 1106 for (i = 0; i < num_gh; i++) { 1107 /* Skip holders we have already dequeued below. */ 1108 if (!gfs2_holder_queued(&ghs[i])) 1109 continue; 1110 /* Skip holders with a pending DLM response. */ 1111 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) { 1112 keep_waiting = true; 1113 continue; 1114 } 1115 1116 if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) { 1117 if (ret == -ESTALE) 1118 gfs2_glock_dq(&ghs[i]); 1119 else 1120 gfs2_glock_update_hold_time(ghs[i].gh_gl, 1121 start_time); 1122 } 1123 if (!ret) 1124 ret = ghs[i].gh_error; 1125 } 1126 1127 if (keep_waiting) 1128 goto wait_for_dlm; 1129 1130 /* 1131 * At this point, we've either acquired all locks or released them all. 1132 */ 1133 return ret; 1134 } 1135 1136 /** 1137 * handle_callback - process a demote request 1138 * @gl: the glock 1139 * @state: the state the caller wants us to change to 1140 * 1141 * There are only two requests that we are going to see in actual 1142 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 1143 */ 1144 1145 static void handle_callback(struct gfs2_glock *gl, unsigned int state, 1146 unsigned long delay, bool remote) 1147 { 1148 int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; 1149 1150 set_bit(bit, &gl->gl_flags); 1151 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { 1152 gl->gl_demote_state = state; 1153 gl->gl_demote_time = jiffies; 1154 } else if (gl->gl_demote_state != LM_ST_UNLOCKED && 1155 gl->gl_demote_state != state) { 1156 gl->gl_demote_state = LM_ST_UNLOCKED; 1157 } 1158 if (gl->gl_ops->go_callback) 1159 gl->gl_ops->go_callback(gl, remote); 1160 trace_gfs2_demote_rq(gl, remote); 1161 } 1162 1163 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 1164 { 1165 struct va_format vaf; 1166 va_list args; 1167 1168 va_start(args, fmt); 1169 1170 if (seq) { 1171 seq_vprintf(seq, fmt, args); 1172 } else { 1173 vaf.fmt = fmt; 1174 vaf.va = &args; 1175 1176 pr_err("%pV", &vaf); 1177 } 1178 1179 va_end(args); 1180 } 1181 1182 /** 1183 * add_to_queue - Add a holder to the wait queue (but look for recursion) 1184 * @gh: the holder structure to add 1185 * 1186 * Eventually we should move the recursive locking trap to a 1187 * debugging option or something like that. This is the fast 1188 * path and needs to have the minimum number of distractions. 1189 * 1190 */ 1191 1192 static inline void add_to_queue(struct gfs2_holder *gh) 1193 __releases(&gl->gl_lockref.lock) 1194 __acquires(&gl->gl_lockref.lock) 1195 { 1196 struct gfs2_glock *gl = gh->gh_gl; 1197 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1198 struct list_head *insert_pt = NULL; 1199 struct gfs2_holder *gh2; 1200 int try_futile = 0; 1201 1202 GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL); 1203 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1204 GLOCK_BUG_ON(gl, true); 1205 1206 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { 1207 if (test_bit(GLF_LOCK, &gl->gl_flags)) 1208 try_futile = !may_grant(gl, gh); 1209 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 1210 goto fail; 1211 } 1212 1213 list_for_each_entry(gh2, &gl->gl_holders, gh_list) { 1214 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && 1215 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) 1216 goto trap_recursive; 1217 if (try_futile && 1218 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { 1219 fail: 1220 gh->gh_error = GLR_TRYFAILED; 1221 gfs2_holder_wake(gh); 1222 return; 1223 } 1224 if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) 1225 continue; 1226 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) 1227 insert_pt = &gh2->gh_list; 1228 } 1229 set_bit(GLF_QUEUED, &gl->gl_flags); 1230 trace_gfs2_glock_queue(gh, 1); 1231 gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT); 1232 gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT); 1233 if (likely(insert_pt == NULL)) { 1234 list_add_tail(&gh->gh_list, &gl->gl_holders); 1235 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 1236 goto do_cancel; 1237 return; 1238 } 1239 list_add_tail(&gh->gh_list, insert_pt); 1240 do_cancel: 1241 gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); 1242 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { 1243 spin_unlock(&gl->gl_lockref.lock); 1244 if (sdp->sd_lockstruct.ls_ops->lm_cancel) 1245 sdp->sd_lockstruct.ls_ops->lm_cancel(gl); 1246 spin_lock(&gl->gl_lockref.lock); 1247 } 1248 return; 1249 1250 trap_recursive: 1251 fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip); 1252 fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid)); 1253 fs_err(sdp, "lock type: %d req lock state : %d\n", 1254 gh2->gh_gl->gl_name.ln_type, gh2->gh_state); 1255 fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip); 1256 fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1257 fs_err(sdp, "lock type: %d req lock state : %d\n", 1258 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1259 gfs2_dump_glock(NULL, gl, true); 1260 BUG(); 1261 } 1262 1263 /** 1264 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock) 1265 * @gh: the holder structure 1266 * 1267 * if (gh->gh_flags & GL_ASYNC), this never returns an error 1268 * 1269 * Returns: 0, GLR_TRYFAILED, or errno on failure 1270 */ 1271 1272 int gfs2_glock_nq(struct gfs2_holder *gh) 1273 { 1274 struct gfs2_glock *gl = gh->gh_gl; 1275 int error = 0; 1276 1277 if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) 1278 return -EIO; 1279 1280 if (test_bit(GLF_LRU, &gl->gl_flags)) 1281 gfs2_glock_remove_from_lru(gl); 1282 1283 spin_lock(&gl->gl_lockref.lock); 1284 add_to_queue(gh); 1285 if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && 1286 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { 1287 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1288 gl->gl_lockref.count++; 1289 __gfs2_glock_queue_work(gl, 0); 1290 } 1291 run_queue(gl, 1); 1292 spin_unlock(&gl->gl_lockref.lock); 1293 1294 if (!(gh->gh_flags & GL_ASYNC)) 1295 error = gfs2_glock_wait(gh); 1296 1297 return error; 1298 } 1299 1300 /** 1301 * gfs2_glock_poll - poll to see if an async request has been completed 1302 * @gh: the holder 1303 * 1304 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on 1305 */ 1306 1307 int gfs2_glock_poll(struct gfs2_holder *gh) 1308 { 1309 return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; 1310 } 1311 1312 /** 1313 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock) 1314 * @gh: the glock holder 1315 * 1316 */ 1317 1318 void gfs2_glock_dq(struct gfs2_holder *gh) 1319 { 1320 struct gfs2_glock *gl = gh->gh_gl; 1321 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1322 unsigned delay = 0; 1323 int fast_path = 0; 1324 1325 spin_lock(&gl->gl_lockref.lock); 1326 /* 1327 * If we're in the process of file system withdraw, we cannot just 1328 * dequeue any glocks until our journal is recovered, lest we 1329 * introduce file system corruption. We need two exceptions to this 1330 * rule: We need to allow unlocking of nondisk glocks and the glock 1331 * for our own journal that needs recovery. 1332 */ 1333 if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && 1334 glock_blocked_by_withdraw(gl) && 1335 gh->gh_gl != sdp->sd_jinode_gl) { 1336 sdp->sd_glock_dqs_held++; 1337 might_sleep(); 1338 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, 1339 TASK_UNINTERRUPTIBLE); 1340 } 1341 if (gh->gh_flags & GL_NOCACHE) 1342 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 1343 1344 list_del_init(&gh->gh_list); 1345 clear_bit(HIF_HOLDER, &gh->gh_iflags); 1346 if (find_first_holder(gl) == NULL) { 1347 if (list_empty(&gl->gl_holders) && 1348 !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1349 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1350 fast_path = 1; 1351 } 1352 if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) 1353 gfs2_glock_add_to_lru(gl); 1354 1355 trace_gfs2_glock_queue(gh, 0); 1356 if (unlikely(!fast_path)) { 1357 gl->gl_lockref.count++; 1358 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1359 !test_bit(GLF_DEMOTE, &gl->gl_flags) && 1360 gl->gl_name.ln_type == LM_TYPE_INODE) 1361 delay = gl->gl_hold_time; 1362 __gfs2_glock_queue_work(gl, delay); 1363 } 1364 spin_unlock(&gl->gl_lockref.lock); 1365 } 1366 1367 void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1368 { 1369 struct gfs2_glock *gl = gh->gh_gl; 1370 gfs2_glock_dq(gh); 1371 might_sleep(); 1372 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); 1373 } 1374 1375 /** 1376 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1377 * @gh: the holder structure 1378 * 1379 */ 1380 1381 void gfs2_glock_dq_uninit(struct gfs2_holder *gh) 1382 { 1383 gfs2_glock_dq(gh); 1384 gfs2_holder_uninit(gh); 1385 } 1386 1387 /** 1388 * gfs2_glock_nq_num - acquire a glock based on lock number 1389 * @sdp: the filesystem 1390 * @number: the lock number 1391 * @glops: the glock operations for the type of glock 1392 * @state: the state to acquire the glock in 1393 * @flags: modifier flags for the acquisition 1394 * @gh: the struct gfs2_holder 1395 * 1396 * Returns: errno 1397 */ 1398 1399 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, 1400 const struct gfs2_glock_operations *glops, 1401 unsigned int state, u16 flags, struct gfs2_holder *gh) 1402 { 1403 struct gfs2_glock *gl; 1404 int error; 1405 1406 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); 1407 if (!error) { 1408 error = gfs2_glock_nq_init(gl, state, flags, gh); 1409 gfs2_glock_put(gl); 1410 } 1411 1412 return error; 1413 } 1414 1415 /** 1416 * glock_compare - Compare two struct gfs2_glock structures for sorting 1417 * @arg_a: the first structure 1418 * @arg_b: the second structure 1419 * 1420 */ 1421 1422 static int glock_compare(const void *arg_a, const void *arg_b) 1423 { 1424 const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a; 1425 const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b; 1426 const struct lm_lockname *a = &gh_a->gh_gl->gl_name; 1427 const struct lm_lockname *b = &gh_b->gh_gl->gl_name; 1428 1429 if (a->ln_number > b->ln_number) 1430 return 1; 1431 if (a->ln_number < b->ln_number) 1432 return -1; 1433 BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); 1434 return 0; 1435 } 1436 1437 /** 1438 * nq_m_sync - synchonously acquire more than one glock in deadlock free order 1439 * @num_gh: the number of structures 1440 * @ghs: an array of struct gfs2_holder structures 1441 * 1442 * Returns: 0 on success (all glocks acquired), 1443 * errno on failure (no glocks acquired) 1444 */ 1445 1446 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, 1447 struct gfs2_holder **p) 1448 { 1449 unsigned int x; 1450 int error = 0; 1451 1452 for (x = 0; x < num_gh; x++) 1453 p[x] = &ghs[x]; 1454 1455 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); 1456 1457 for (x = 0; x < num_gh; x++) { 1458 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); 1459 1460 error = gfs2_glock_nq(p[x]); 1461 if (error) { 1462 while (x--) 1463 gfs2_glock_dq(p[x]); 1464 break; 1465 } 1466 } 1467 1468 return error; 1469 } 1470 1471 /** 1472 * gfs2_glock_nq_m - acquire multiple glocks 1473 * @num_gh: the number of structures 1474 * @ghs: an array of struct gfs2_holder structures 1475 * 1476 * 1477 * Returns: 0 on success (all glocks acquired), 1478 * errno on failure (no glocks acquired) 1479 */ 1480 1481 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1482 { 1483 struct gfs2_holder *tmp[4]; 1484 struct gfs2_holder **pph = tmp; 1485 int error = 0; 1486 1487 switch(num_gh) { 1488 case 0: 1489 return 0; 1490 case 1: 1491 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); 1492 return gfs2_glock_nq(ghs); 1493 default: 1494 if (num_gh <= 4) 1495 break; 1496 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *), 1497 GFP_NOFS); 1498 if (!pph) 1499 return -ENOMEM; 1500 } 1501 1502 error = nq_m_sync(num_gh, ghs, pph); 1503 1504 if (pph != tmp) 1505 kfree(pph); 1506 1507 return error; 1508 } 1509 1510 /** 1511 * gfs2_glock_dq_m - release multiple glocks 1512 * @num_gh: the number of structures 1513 * @ghs: an array of struct gfs2_holder structures 1514 * 1515 */ 1516 1517 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1518 { 1519 while (num_gh--) 1520 gfs2_glock_dq(&ghs[num_gh]); 1521 } 1522 1523 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1524 { 1525 unsigned long delay = 0; 1526 unsigned long holdtime; 1527 unsigned long now = jiffies; 1528 1529 gfs2_glock_hold(gl); 1530 holdtime = gl->gl_tchange + gl->gl_hold_time; 1531 if (test_bit(GLF_QUEUED, &gl->gl_flags) && 1532 gl->gl_name.ln_type == LM_TYPE_INODE) { 1533 if (time_before(now, holdtime)) 1534 delay = holdtime - now; 1535 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) 1536 delay = gl->gl_hold_time; 1537 } 1538 1539 spin_lock(&gl->gl_lockref.lock); 1540 handle_callback(gl, state, delay, true); 1541 __gfs2_glock_queue_work(gl, delay); 1542 spin_unlock(&gl->gl_lockref.lock); 1543 } 1544 1545 /** 1546 * gfs2_should_freeze - Figure out if glock should be frozen 1547 * @gl: The glock in question 1548 * 1549 * Glocks are not frozen if (a) the result of the dlm operation is 1550 * an error, (b) the locking operation was an unlock operation or 1551 * (c) if there is a "noexp" flagged request anywhere in the queue 1552 * 1553 * Returns: 1 if freezing should occur, 0 otherwise 1554 */ 1555 1556 static int gfs2_should_freeze(const struct gfs2_glock *gl) 1557 { 1558 const struct gfs2_holder *gh; 1559 1560 if (gl->gl_reply & ~LM_OUT_ST_MASK) 1561 return 0; 1562 if (gl->gl_target == LM_ST_UNLOCKED) 1563 return 0; 1564 1565 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1566 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 1567 continue; 1568 if (LM_FLAG_NOEXP & gh->gh_flags) 1569 return 0; 1570 } 1571 1572 return 1; 1573 } 1574 1575 /** 1576 * gfs2_glock_complete - Callback used by locking 1577 * @gl: Pointer to the glock 1578 * @ret: The return value from the dlm 1579 * 1580 * The gl_reply field is under the gl_lockref.lock lock so that it is ok 1581 * to use a bitfield shared with other glock state fields. 1582 */ 1583 1584 void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1585 { 1586 struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; 1587 1588 spin_lock(&gl->gl_lockref.lock); 1589 gl->gl_reply = ret; 1590 1591 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { 1592 if (gfs2_should_freeze(gl)) { 1593 set_bit(GLF_FROZEN, &gl->gl_flags); 1594 spin_unlock(&gl->gl_lockref.lock); 1595 return; 1596 } 1597 } 1598 1599 gl->gl_lockref.count++; 1600 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1601 __gfs2_glock_queue_work(gl, 0); 1602 spin_unlock(&gl->gl_lockref.lock); 1603 } 1604 1605 static int glock_cmp(void *priv, struct list_head *a, struct list_head *b) 1606 { 1607 struct gfs2_glock *gla, *glb; 1608 1609 gla = list_entry(a, struct gfs2_glock, gl_lru); 1610 glb = list_entry(b, struct gfs2_glock, gl_lru); 1611 1612 if (gla->gl_name.ln_number > glb->gl_name.ln_number) 1613 return 1; 1614 if (gla->gl_name.ln_number < glb->gl_name.ln_number) 1615 return -1; 1616 1617 return 0; 1618 } 1619 1620 /** 1621 * gfs2_dispose_glock_lru - Demote a list of glocks 1622 * @list: The list to dispose of 1623 * 1624 * Disposing of glocks may involve disk accesses, so that here we sort 1625 * the glocks by number (i.e. disk location of the inodes) so that if 1626 * there are any such accesses, they'll be sent in order (mostly). 1627 * 1628 * Must be called under the lru_lock, but may drop and retake this 1629 * lock. While the lru_lock is dropped, entries may vanish from the 1630 * list, but no new entries will appear on the list (since it is 1631 * private) 1632 */ 1633 1634 static void gfs2_dispose_glock_lru(struct list_head *list) 1635 __releases(&lru_lock) 1636 __acquires(&lru_lock) 1637 { 1638 struct gfs2_glock *gl; 1639 1640 list_sort(NULL, list, glock_cmp); 1641 1642 while(!list_empty(list)) { 1643 gl = list_first_entry(list, struct gfs2_glock, gl_lru); 1644 list_del_init(&gl->gl_lru); 1645 if (!spin_trylock(&gl->gl_lockref.lock)) { 1646 add_back_to_lru: 1647 list_add(&gl->gl_lru, &lru_list); 1648 set_bit(GLF_LRU, &gl->gl_flags); 1649 atomic_inc(&lru_count); 1650 continue; 1651 } 1652 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1653 spin_unlock(&gl->gl_lockref.lock); 1654 goto add_back_to_lru; 1655 } 1656 gl->gl_lockref.count++; 1657 if (demote_ok(gl)) 1658 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 1659 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); 1660 __gfs2_glock_queue_work(gl, 0); 1661 spin_unlock(&gl->gl_lockref.lock); 1662 cond_resched_lock(&lru_lock); 1663 } 1664 } 1665 1666 /** 1667 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote 1668 * @nr: The number of entries to scan 1669 * 1670 * This function selects the entries on the LRU which are able to 1671 * be demoted, and then kicks off the process by calling 1672 * gfs2_dispose_glock_lru() above. 1673 */ 1674 1675 static long gfs2_scan_glock_lru(int nr) 1676 { 1677 struct gfs2_glock *gl; 1678 LIST_HEAD(skipped); 1679 LIST_HEAD(dispose); 1680 long freed = 0; 1681 1682 spin_lock(&lru_lock); 1683 while ((nr-- >= 0) && !list_empty(&lru_list)) { 1684 gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru); 1685 1686 /* Test for being demotable */ 1687 if (!test_bit(GLF_LOCK, &gl->gl_flags)) { 1688 list_move(&gl->gl_lru, &dispose); 1689 atomic_dec(&lru_count); 1690 clear_bit(GLF_LRU, &gl->gl_flags); 1691 freed++; 1692 continue; 1693 } 1694 1695 list_move(&gl->gl_lru, &skipped); 1696 } 1697 list_splice(&skipped, &lru_list); 1698 if (!list_empty(&dispose)) 1699 gfs2_dispose_glock_lru(&dispose); 1700 spin_unlock(&lru_lock); 1701 1702 return freed; 1703 } 1704 1705 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, 1706 struct shrink_control *sc) 1707 { 1708 if (!(sc->gfp_mask & __GFP_FS)) 1709 return SHRINK_STOP; 1710 return gfs2_scan_glock_lru(sc->nr_to_scan); 1711 } 1712 1713 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, 1714 struct shrink_control *sc) 1715 { 1716 return vfs_pressure_ratio(atomic_read(&lru_count)); 1717 } 1718 1719 static struct shrinker glock_shrinker = { 1720 .seeks = DEFAULT_SEEKS, 1721 .count_objects = gfs2_glock_shrink_count, 1722 .scan_objects = gfs2_glock_shrink_scan, 1723 }; 1724 1725 /** 1726 * examine_bucket - Call a function for glock in a hash bucket 1727 * @examiner: the function 1728 * @sdp: the filesystem 1729 * @bucket: the bucket 1730 * 1731 * Note that the function can be called multiple times on the same 1732 * object. So the user must ensure that the function can cope with 1733 * that. 1734 */ 1735 1736 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) 1737 { 1738 struct gfs2_glock *gl; 1739 struct rhashtable_iter iter; 1740 1741 rhashtable_walk_enter(&gl_hash_table, &iter); 1742 1743 do { 1744 rhashtable_walk_start(&iter); 1745 1746 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) 1747 if (gl->gl_name.ln_sbd == sdp && 1748 lockref_get_not_dead(&gl->gl_lockref)) 1749 examiner(gl); 1750 1751 rhashtable_walk_stop(&iter); 1752 } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); 1753 1754 rhashtable_walk_exit(&iter); 1755 } 1756 1757 /** 1758 * thaw_glock - thaw out a glock which has an unprocessed reply waiting 1759 * @gl: The glock to thaw 1760 * 1761 */ 1762 1763 static void thaw_glock(struct gfs2_glock *gl) 1764 { 1765 if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) { 1766 gfs2_glock_put(gl); 1767 return; 1768 } 1769 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1770 gfs2_glock_queue_work(gl, 0); 1771 } 1772 1773 /** 1774 * clear_glock - look at a glock and see if we can free it from glock cache 1775 * @gl: the glock to look at 1776 * 1777 */ 1778 1779 static void clear_glock(struct gfs2_glock *gl) 1780 { 1781 gfs2_glock_remove_from_lru(gl); 1782 1783 spin_lock(&gl->gl_lockref.lock); 1784 if (gl->gl_state != LM_ST_UNLOCKED) 1785 handle_callback(gl, LM_ST_UNLOCKED, 0, false); 1786 __gfs2_glock_queue_work(gl, 0); 1787 spin_unlock(&gl->gl_lockref.lock); 1788 } 1789 1790 /** 1791 * gfs2_glock_thaw - Thaw any frozen glocks 1792 * @sdp: The super block 1793 * 1794 */ 1795 1796 void gfs2_glock_thaw(struct gfs2_sbd *sdp) 1797 { 1798 glock_hash_walk(thaw_glock, sdp); 1799 } 1800 1801 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 1802 { 1803 spin_lock(&gl->gl_lockref.lock); 1804 gfs2_dump_glock(seq, gl, fsid); 1805 spin_unlock(&gl->gl_lockref.lock); 1806 } 1807 1808 static void dump_glock_func(struct gfs2_glock *gl) 1809 { 1810 dump_glock(NULL, gl, true); 1811 } 1812 1813 /** 1814 * gfs2_gl_hash_clear - Empty out the glock hash table 1815 * @sdp: the filesystem 1816 * @wait: wait until it's all gone 1817 * 1818 * Called when unmounting the filesystem. 1819 */ 1820 1821 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1822 { 1823 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 1824 flush_workqueue(glock_workqueue); 1825 glock_hash_walk(clear_glock, sdp); 1826 flush_workqueue(glock_workqueue); 1827 wait_event_timeout(sdp->sd_glock_wait, 1828 atomic_read(&sdp->sd_glock_disposal) == 0, 1829 HZ * 600); 1830 glock_hash_walk(dump_glock_func, sdp); 1831 } 1832 1833 void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1834 { 1835 struct gfs2_glock *gl = ip->i_gl; 1836 int ret; 1837 1838 ret = gfs2_truncatei_resume(ip); 1839 gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); 1840 1841 spin_lock(&gl->gl_lockref.lock); 1842 clear_bit(GLF_LOCK, &gl->gl_flags); 1843 run_queue(gl, 1); 1844 spin_unlock(&gl->gl_lockref.lock); 1845 } 1846 1847 static const char *state2str(unsigned state) 1848 { 1849 switch(state) { 1850 case LM_ST_UNLOCKED: 1851 return "UN"; 1852 case LM_ST_SHARED: 1853 return "SH"; 1854 case LM_ST_DEFERRED: 1855 return "DF"; 1856 case LM_ST_EXCLUSIVE: 1857 return "EX"; 1858 } 1859 return "??"; 1860 } 1861 1862 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) 1863 { 1864 char *p = buf; 1865 if (flags & LM_FLAG_TRY) 1866 *p++ = 't'; 1867 if (flags & LM_FLAG_TRY_1CB) 1868 *p++ = 'T'; 1869 if (flags & LM_FLAG_NOEXP) 1870 *p++ = 'e'; 1871 if (flags & LM_FLAG_ANY) 1872 *p++ = 'A'; 1873 if (flags & LM_FLAG_PRIORITY) 1874 *p++ = 'p'; 1875 if (flags & GL_ASYNC) 1876 *p++ = 'a'; 1877 if (flags & GL_EXACT) 1878 *p++ = 'E'; 1879 if (flags & GL_NOCACHE) 1880 *p++ = 'c'; 1881 if (test_bit(HIF_HOLDER, &iflags)) 1882 *p++ = 'H'; 1883 if (test_bit(HIF_WAIT, &iflags)) 1884 *p++ = 'W'; 1885 if (test_bit(HIF_FIRST, &iflags)) 1886 *p++ = 'F'; 1887 *p = 0; 1888 return buf; 1889 } 1890 1891 /** 1892 * dump_holder - print information about a glock holder 1893 * @seq: the seq_file struct 1894 * @gh: the glock holder 1895 * @fs_id_buf: pointer to file system id (if requested) 1896 * 1897 */ 1898 1899 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, 1900 const char *fs_id_buf) 1901 { 1902 struct task_struct *gh_owner = NULL; 1903 char flags_buf[32]; 1904 1905 rcu_read_lock(); 1906 if (gh->gh_owner_pid) 1907 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1908 gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", 1909 fs_id_buf, state2str(gh->gh_state), 1910 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 1911 gh->gh_error, 1912 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 1913 gh_owner ? gh_owner->comm : "(ended)", 1914 (void *)gh->gh_ip); 1915 rcu_read_unlock(); 1916 } 1917 1918 static const char *gflags2str(char *buf, const struct gfs2_glock *gl) 1919 { 1920 const unsigned long *gflags = &gl->gl_flags; 1921 char *p = buf; 1922 1923 if (test_bit(GLF_LOCK, gflags)) 1924 *p++ = 'l'; 1925 if (test_bit(GLF_DEMOTE, gflags)) 1926 *p++ = 'D'; 1927 if (test_bit(GLF_PENDING_DEMOTE, gflags)) 1928 *p++ = 'd'; 1929 if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) 1930 *p++ = 'p'; 1931 if (test_bit(GLF_DIRTY, gflags)) 1932 *p++ = 'y'; 1933 if (test_bit(GLF_LFLUSH, gflags)) 1934 *p++ = 'f'; 1935 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) 1936 *p++ = 'i'; 1937 if (test_bit(GLF_REPLY_PENDING, gflags)) 1938 *p++ = 'r'; 1939 if (test_bit(GLF_INITIAL, gflags)) 1940 *p++ = 'I'; 1941 if (test_bit(GLF_FROZEN, gflags)) 1942 *p++ = 'F'; 1943 if (test_bit(GLF_QUEUED, gflags)) 1944 *p++ = 'q'; 1945 if (test_bit(GLF_LRU, gflags)) 1946 *p++ = 'L'; 1947 if (gl->gl_object) 1948 *p++ = 'o'; 1949 if (test_bit(GLF_BLOCKING, gflags)) 1950 *p++ = 'b'; 1951 *p = 0; 1952 return buf; 1953 } 1954 1955 /** 1956 * gfs2_dump_glock - print information about a glock 1957 * @seq: The seq_file struct 1958 * @gl: the glock 1959 * @fsid: If true, also dump the file system id 1960 * 1961 * The file format is as follows: 1962 * One line per object, capital letters are used to indicate objects 1963 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, 1964 * other objects are indented by a single space and follow the glock to 1965 * which they are related. Fields are indicated by lower case letters 1966 * followed by a colon and the field value, except for strings which are in 1967 * [] so that its possible to see if they are composed of spaces for 1968 * example. The field's are n = number (id of the object), f = flags, 1969 * t = type, s = state, r = refcount, e = error, p = pid. 1970 * 1971 */ 1972 1973 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) 1974 { 1975 const struct gfs2_glock_operations *glops = gl->gl_ops; 1976 unsigned long long dtime; 1977 const struct gfs2_holder *gh; 1978 char gflags_buf[32]; 1979 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1980 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 1981 1982 memset(fs_id_buf, 0, sizeof(fs_id_buf)); 1983 if (fsid && sdp) /* safety precaution */ 1984 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 1985 dtime = jiffies - gl->gl_demote_time; 1986 dtime *= 1000000/HZ; /* demote time in uSec */ 1987 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1988 dtime = 0; 1989 gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " 1990 "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state), 1991 gl->gl_name.ln_type, 1992 (unsigned long long)gl->gl_name.ln_number, 1993 gflags2str(gflags_buf, gl), 1994 state2str(gl->gl_target), 1995 state2str(gl->gl_demote_state), dtime, 1996 atomic_read(&gl->gl_ail_count), 1997 atomic_read(&gl->gl_revokes), 1998 (int)gl->gl_lockref.count, gl->gl_hold_time); 1999 2000 list_for_each_entry(gh, &gl->gl_holders, gh_list) 2001 dump_holder(seq, gh, fs_id_buf); 2002 2003 if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) 2004 glops->go_dump(seq, gl, fs_id_buf); 2005 } 2006 2007 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) 2008 { 2009 struct gfs2_glock *gl = iter_ptr; 2010 2011 seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n", 2012 gl->gl_name.ln_type, 2013 (unsigned long long)gl->gl_name.ln_number, 2014 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT], 2015 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR], 2016 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB], 2017 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB], 2018 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT], 2019 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR], 2020 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT], 2021 (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]); 2022 return 0; 2023 } 2024 2025 static const char *gfs2_gltype[] = { 2026 "type", 2027 "reserved", 2028 "nondisk", 2029 "inode", 2030 "rgrp", 2031 "meta", 2032 "iopen", 2033 "flock", 2034 "plock", 2035 "quota", 2036 "journal", 2037 }; 2038 2039 static const char *gfs2_stype[] = { 2040 [GFS2_LKS_SRTT] = "srtt", 2041 [GFS2_LKS_SRTTVAR] = "srttvar", 2042 [GFS2_LKS_SRTTB] = "srttb", 2043 [GFS2_LKS_SRTTVARB] = "srttvarb", 2044 [GFS2_LKS_SIRT] = "sirt", 2045 [GFS2_LKS_SIRTVAR] = "sirtvar", 2046 [GFS2_LKS_DCOUNT] = "dlm", 2047 [GFS2_LKS_QCOUNT] = "queue", 2048 }; 2049 2050 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype)) 2051 2052 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) 2053 { 2054 struct gfs2_sbd *sdp = seq->private; 2055 loff_t pos = *(loff_t *)iter_ptr; 2056 unsigned index = pos >> 3; 2057 unsigned subindex = pos & 0x07; 2058 int i; 2059 2060 if (index == 0 && subindex != 0) 2061 return 0; 2062 2063 seq_printf(seq, "%-10s %8s:", gfs2_gltype[index], 2064 (index == 0) ? "cpu": gfs2_stype[subindex]); 2065 2066 for_each_possible_cpu(i) { 2067 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i); 2068 2069 if (index == 0) 2070 seq_printf(seq, " %15u", i); 2071 else 2072 seq_printf(seq, " %15llu", (unsigned long long)lkstats-> 2073 lkstats[index - 1].stats[subindex]); 2074 } 2075 seq_putc(seq, '\n'); 2076 return 0; 2077 } 2078 2079 int __init gfs2_glock_init(void) 2080 { 2081 int i, ret; 2082 2083 ret = rhashtable_init(&gl_hash_table, &ht_parms); 2084 if (ret < 0) 2085 return ret; 2086 2087 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 2088 WQ_HIGHPRI | WQ_FREEZABLE, 0); 2089 if (!glock_workqueue) { 2090 rhashtable_destroy(&gl_hash_table); 2091 return -ENOMEM; 2092 } 2093 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 2094 WQ_MEM_RECLAIM | WQ_FREEZABLE, 2095 0); 2096 if (!gfs2_delete_workqueue) { 2097 destroy_workqueue(glock_workqueue); 2098 rhashtable_destroy(&gl_hash_table); 2099 return -ENOMEM; 2100 } 2101 2102 ret = register_shrinker(&glock_shrinker); 2103 if (ret) { 2104 destroy_workqueue(gfs2_delete_workqueue); 2105 destroy_workqueue(glock_workqueue); 2106 rhashtable_destroy(&gl_hash_table); 2107 return ret; 2108 } 2109 2110 for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++) 2111 init_waitqueue_head(glock_wait_table + i); 2112 2113 return 0; 2114 } 2115 2116 void gfs2_glock_exit(void) 2117 { 2118 unregister_shrinker(&glock_shrinker); 2119 rhashtable_destroy(&gl_hash_table); 2120 destroy_workqueue(glock_workqueue); 2121 destroy_workqueue(gfs2_delete_workqueue); 2122 } 2123 2124 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) 2125 { 2126 struct gfs2_glock *gl = gi->gl; 2127 2128 if (gl) { 2129 if (n == 0) 2130 return; 2131 if (!lockref_put_not_zero(&gl->gl_lockref)) 2132 gfs2_glock_queue_put(gl); 2133 } 2134 for (;;) { 2135 gl = rhashtable_walk_next(&gi->hti); 2136 if (IS_ERR_OR_NULL(gl)) { 2137 if (gl == ERR_PTR(-EAGAIN)) { 2138 n = 1; 2139 continue; 2140 } 2141 gl = NULL; 2142 break; 2143 } 2144 if (gl->gl_name.ln_sbd != gi->sdp) 2145 continue; 2146 if (n <= 1) { 2147 if (!lockref_get_not_dead(&gl->gl_lockref)) 2148 continue; 2149 break; 2150 } else { 2151 if (__lockref_is_dead(&gl->gl_lockref)) 2152 continue; 2153 n--; 2154 } 2155 } 2156 gi->gl = gl; 2157 } 2158 2159 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 2160 __acquires(RCU) 2161 { 2162 struct gfs2_glock_iter *gi = seq->private; 2163 loff_t n; 2164 2165 /* 2166 * We can either stay where we are, skip to the next hash table 2167 * entry, or start from the beginning. 2168 */ 2169 if (*pos < gi->last_pos) { 2170 rhashtable_walk_exit(&gi->hti); 2171 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2172 n = *pos + 1; 2173 } else { 2174 n = *pos - gi->last_pos; 2175 } 2176 2177 rhashtable_walk_start(&gi->hti); 2178 2179 gfs2_glock_iter_next(gi, n); 2180 gi->last_pos = *pos; 2181 return gi->gl; 2182 } 2183 2184 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, 2185 loff_t *pos) 2186 { 2187 struct gfs2_glock_iter *gi = seq->private; 2188 2189 (*pos)++; 2190 gi->last_pos = *pos; 2191 gfs2_glock_iter_next(gi, 1); 2192 return gi->gl; 2193 } 2194 2195 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 2196 __releases(RCU) 2197 { 2198 struct gfs2_glock_iter *gi = seq->private; 2199 2200 rhashtable_walk_stop(&gi->hti); 2201 } 2202 2203 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 2204 { 2205 dump_glock(seq, iter_ptr, false); 2206 return 0; 2207 } 2208 2209 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) 2210 { 2211 preempt_disable(); 2212 if (*pos >= GFS2_NR_SBSTATS) 2213 return NULL; 2214 return pos; 2215 } 2216 2217 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr, 2218 loff_t *pos) 2219 { 2220 (*pos)++; 2221 if (*pos >= GFS2_NR_SBSTATS) 2222 return NULL; 2223 return pos; 2224 } 2225 2226 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr) 2227 { 2228 preempt_enable(); 2229 } 2230 2231 static const struct seq_operations gfs2_glock_seq_ops = { 2232 .start = gfs2_glock_seq_start, 2233 .next = gfs2_glock_seq_next, 2234 .stop = gfs2_glock_seq_stop, 2235 .show = gfs2_glock_seq_show, 2236 }; 2237 2238 static const struct seq_operations gfs2_glstats_seq_ops = { 2239 .start = gfs2_glock_seq_start, 2240 .next = gfs2_glock_seq_next, 2241 .stop = gfs2_glock_seq_stop, 2242 .show = gfs2_glstats_seq_show, 2243 }; 2244 2245 static const struct seq_operations gfs2_sbstats_seq_ops = { 2246 .start = gfs2_sbstats_seq_start, 2247 .next = gfs2_sbstats_seq_next, 2248 .stop = gfs2_sbstats_seq_stop, 2249 .show = gfs2_sbstats_seq_show, 2250 }; 2251 2252 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) 2253 2254 static int __gfs2_glocks_open(struct inode *inode, struct file *file, 2255 const struct seq_operations *ops) 2256 { 2257 int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); 2258 if (ret == 0) { 2259 struct seq_file *seq = file->private_data; 2260 struct gfs2_glock_iter *gi = seq->private; 2261 2262 gi->sdp = inode->i_private; 2263 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2264 if (seq->buf) 2265 seq->size = GFS2_SEQ_GOODSIZE; 2266 /* 2267 * Initially, we are "before" the first hash table entry; the 2268 * first call to rhashtable_walk_next gets us the first entry. 2269 */ 2270 gi->last_pos = -1; 2271 gi->gl = NULL; 2272 rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2273 } 2274 return ret; 2275 } 2276 2277 static int gfs2_glocks_open(struct inode *inode, struct file *file) 2278 { 2279 return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); 2280 } 2281 2282 static int gfs2_glocks_release(struct inode *inode, struct file *file) 2283 { 2284 struct seq_file *seq = file->private_data; 2285 struct gfs2_glock_iter *gi = seq->private; 2286 2287 if (gi->gl) 2288 gfs2_glock_put(gi->gl); 2289 rhashtable_walk_exit(&gi->hti); 2290 return seq_release_private(inode, file); 2291 } 2292 2293 static int gfs2_glstats_open(struct inode *inode, struct file *file) 2294 { 2295 return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); 2296 } 2297 2298 static int gfs2_sbstats_open(struct inode *inode, struct file *file) 2299 { 2300 int ret = seq_open(file, &gfs2_sbstats_seq_ops); 2301 if (ret == 0) { 2302 struct seq_file *seq = file->private_data; 2303 seq->private = inode->i_private; /* sdp */ 2304 } 2305 return ret; 2306 } 2307 2308 static const struct file_operations gfs2_glocks_fops = { 2309 .owner = THIS_MODULE, 2310 .open = gfs2_glocks_open, 2311 .read = seq_read, 2312 .llseek = seq_lseek, 2313 .release = gfs2_glocks_release, 2314 }; 2315 2316 static const struct file_operations gfs2_glstats_fops = { 2317 .owner = THIS_MODULE, 2318 .open = gfs2_glstats_open, 2319 .read = seq_read, 2320 .llseek = seq_lseek, 2321 .release = gfs2_glocks_release, 2322 }; 2323 2324 static const struct file_operations gfs2_sbstats_fops = { 2325 .owner = THIS_MODULE, 2326 .open = gfs2_sbstats_open, 2327 .read = seq_read, 2328 .llseek = seq_lseek, 2329 .release = seq_release, 2330 }; 2331 2332 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2333 { 2334 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2335 2336 debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2337 &gfs2_glocks_fops); 2338 2339 debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2340 &gfs2_glstats_fops); 2341 2342 debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, 2343 &gfs2_sbstats_fops); 2344 } 2345 2346 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2347 { 2348 debugfs_remove_recursive(sdp->debugfs_dir); 2349 sdp->debugfs_dir = NULL; 2350 } 2351 2352 void gfs2_register_debugfs(void) 2353 { 2354 gfs2_root = debugfs_create_dir("gfs2", NULL); 2355 } 2356 2357 void gfs2_unregister_debugfs(void) 2358 { 2359 debugfs_remove(gfs2_root); 2360 gfs2_root = NULL; 2361 } 2362