16714d8e8SKurt Hackel /* -*- mode: c; c-basic-offset: 8; -*- 26714d8e8SKurt Hackel * vim: noexpandtab sw=8 ts=8 sts=0: 36714d8e8SKurt Hackel * 46714d8e8SKurt Hackel * dlmthread.c 56714d8e8SKurt Hackel * 66714d8e8SKurt Hackel * standalone DLM module 76714d8e8SKurt Hackel * 86714d8e8SKurt Hackel * Copyright (C) 2004 Oracle. All rights reserved. 96714d8e8SKurt Hackel * 106714d8e8SKurt Hackel * This program is free software; you can redistribute it and/or 116714d8e8SKurt Hackel * modify it under the terms of the GNU General Public 126714d8e8SKurt Hackel * License as published by the Free Software Foundation; either 136714d8e8SKurt Hackel * version 2 of the License, or (at your option) any later version. 146714d8e8SKurt Hackel * 156714d8e8SKurt Hackel * This program is distributed in the hope that it will be useful, 166714d8e8SKurt Hackel * but WITHOUT ANY WARRANTY; without even the implied warranty of 176714d8e8SKurt Hackel * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 186714d8e8SKurt Hackel * General Public License for more details. 196714d8e8SKurt Hackel * 206714d8e8SKurt Hackel * You should have received a copy of the GNU General Public 216714d8e8SKurt Hackel * License along with this program; if not, write to the 226714d8e8SKurt Hackel * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 236714d8e8SKurt Hackel * Boston, MA 021110-1307, USA. 246714d8e8SKurt Hackel * 256714d8e8SKurt Hackel */ 266714d8e8SKurt Hackel 276714d8e8SKurt Hackel 286714d8e8SKurt Hackel #include <linux/module.h> 296714d8e8SKurt Hackel #include <linux/fs.h> 306714d8e8SKurt Hackel #include <linux/types.h> 316714d8e8SKurt Hackel #include <linux/slab.h> 326714d8e8SKurt Hackel #include <linux/highmem.h> 336714d8e8SKurt Hackel #include <linux/utsname.h> 346714d8e8SKurt Hackel #include <linux/init.h> 356714d8e8SKurt Hackel #include <linux/sysctl.h> 366714d8e8SKurt Hackel #include <linux/random.h> 376714d8e8SKurt Hackel #include <linux/blkdev.h> 386714d8e8SKurt Hackel #include <linux/socket.h> 396714d8e8SKurt Hackel #include <linux/inet.h> 406714d8e8SKurt Hackel #include <linux/timer.h> 416714d8e8SKurt Hackel #include <linux/kthread.h> 428d79d088SKurt Hackel #include <linux/delay.h> 436714d8e8SKurt Hackel 446714d8e8SKurt Hackel 456714d8e8SKurt Hackel #include "cluster/heartbeat.h" 466714d8e8SKurt Hackel #include "cluster/nodemanager.h" 476714d8e8SKurt Hackel #include "cluster/tcp.h" 486714d8e8SKurt Hackel 496714d8e8SKurt Hackel #include "dlmapi.h" 506714d8e8SKurt Hackel #include "dlmcommon.h" 516714d8e8SKurt Hackel #include "dlmdomain.h" 526714d8e8SKurt Hackel 536714d8e8SKurt Hackel #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD) 546714d8e8SKurt Hackel #include "cluster/masklog.h" 556714d8e8SKurt Hackel 566714d8e8SKurt Hackel static int dlm_thread(void *data); 578b219809SKurt Hackel static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, 588b219809SKurt Hackel struct dlm_lock_resource *lockres); 596714d8e8SKurt Hackel 606714d8e8SKurt Hackel static void dlm_flush_asts(struct dlm_ctxt *dlm); 616714d8e8SKurt Hackel 626714d8e8SKurt Hackel #define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num) 636714d8e8SKurt Hackel 646714d8e8SKurt Hackel /* will exit holding res->spinlock, but may drop in function */ 656714d8e8SKurt Hackel /* waits until flags are cleared on res->state */ 666714d8e8SKurt Hackel void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags) 676714d8e8SKurt Hackel { 686714d8e8SKurt Hackel DECLARE_WAITQUEUE(wait, current); 696714d8e8SKurt Hackel 706714d8e8SKurt Hackel assert_spin_locked(&res->spinlock); 716714d8e8SKurt Hackel 726714d8e8SKurt Hackel add_wait_queue(&res->wq, &wait); 736714d8e8SKurt Hackel repeat: 746714d8e8SKurt Hackel set_current_state(TASK_UNINTERRUPTIBLE); 756714d8e8SKurt Hackel if (res->state & flags) { 766714d8e8SKurt Hackel spin_unlock(&res->spinlock); 776714d8e8SKurt Hackel schedule(); 786714d8e8SKurt Hackel spin_lock(&res->spinlock); 796714d8e8SKurt Hackel goto repeat; 806714d8e8SKurt Hackel } 816714d8e8SKurt Hackel remove_wait_queue(&res->wq, &wait); 826714d8e8SKurt Hackel current->state = TASK_RUNNING; 836714d8e8SKurt Hackel } 846714d8e8SKurt Hackel 856714d8e8SKurt Hackel 8669d72b06SKurt Hackel int __dlm_lockres_unused(struct dlm_lock_resource *res) 876714d8e8SKurt Hackel { 886714d8e8SKurt Hackel if (list_empty(&res->granted) && 896714d8e8SKurt Hackel list_empty(&res->converting) && 906714d8e8SKurt Hackel list_empty(&res->blocked) && 916714d8e8SKurt Hackel list_empty(&res->dirty)) 926714d8e8SKurt Hackel return 1; 936714d8e8SKurt Hackel return 0; 946714d8e8SKurt Hackel } 956714d8e8SKurt Hackel 966714d8e8SKurt Hackel 976714d8e8SKurt Hackel /* Call whenever you may have added or deleted something from one of 986714d8e8SKurt Hackel * the lockres queue's. This will figure out whether it belongs on the 996714d8e8SKurt Hackel * unused list or not and does the appropriate thing. */ 1006714d8e8SKurt Hackel void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 1016714d8e8SKurt Hackel struct dlm_lock_resource *res) 1026714d8e8SKurt Hackel { 1036714d8e8SKurt Hackel mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 1046714d8e8SKurt Hackel 1056714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 1066714d8e8SKurt Hackel assert_spin_locked(&res->spinlock); 1076714d8e8SKurt Hackel 1086714d8e8SKurt Hackel if (__dlm_lockres_unused(res)){ 1096714d8e8SKurt Hackel if (list_empty(&res->purge)) { 1106714d8e8SKurt Hackel mlog(0, "putting lockres %.*s from purge list\n", 1116714d8e8SKurt Hackel res->lockname.len, res->lockname.name); 1126714d8e8SKurt Hackel 1136714d8e8SKurt Hackel res->last_used = jiffies; 1146714d8e8SKurt Hackel list_add_tail(&res->purge, &dlm->purge_list); 1156714d8e8SKurt Hackel dlm->purge_count++; 1168b219809SKurt Hackel 1178b219809SKurt Hackel /* if this node is not the owner, there is 1188b219809SKurt Hackel * no way to keep track of who the owner could be. 1198b219809SKurt Hackel * unhash it to avoid serious problems. */ 1208b219809SKurt Hackel if (res->owner != dlm->node_num) { 1218b219809SKurt Hackel mlog(0, "%s:%.*s: doing immediate " 1228b219809SKurt Hackel "purge of lockres owned by %u\n", 1238b219809SKurt Hackel dlm->name, res->lockname.len, 1248b219809SKurt Hackel res->lockname.name, res->owner); 1258b219809SKurt Hackel 1268b219809SKurt Hackel dlm_purge_lockres_now(dlm, res); 1278b219809SKurt Hackel } 1286714d8e8SKurt Hackel } 1296714d8e8SKurt Hackel } else if (!list_empty(&res->purge)) { 1308b219809SKurt Hackel mlog(0, "removing lockres %.*s from purge list, " 1318b219809SKurt Hackel "owner=%u\n", res->lockname.len, res->lockname.name, 1328b219809SKurt Hackel res->owner); 1336714d8e8SKurt Hackel 1346714d8e8SKurt Hackel list_del_init(&res->purge); 1356714d8e8SKurt Hackel dlm->purge_count--; 1366714d8e8SKurt Hackel } 1376714d8e8SKurt Hackel } 1386714d8e8SKurt Hackel 1396714d8e8SKurt Hackel void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, 1406714d8e8SKurt Hackel struct dlm_lock_resource *res) 1416714d8e8SKurt Hackel { 1426714d8e8SKurt Hackel mlog_entry("%.*s\n", res->lockname.len, res->lockname.name); 1436714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 1446714d8e8SKurt Hackel spin_lock(&res->spinlock); 1456714d8e8SKurt Hackel 1466714d8e8SKurt Hackel __dlm_lockres_calc_usage(dlm, res); 1476714d8e8SKurt Hackel 1486714d8e8SKurt Hackel spin_unlock(&res->spinlock); 1496714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 1506714d8e8SKurt Hackel } 1516714d8e8SKurt Hackel 1526714d8e8SKurt Hackel /* TODO: Eventual API: Called with the dlm spinlock held, may drop it 1536714d8e8SKurt Hackel * to do migration, but will re-acquire before exit. */ 1546714d8e8SKurt Hackel void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres) 1556714d8e8SKurt Hackel { 1566714d8e8SKurt Hackel int master; 1576714d8e8SKurt Hackel int ret; 1586714d8e8SKurt Hackel 1596714d8e8SKurt Hackel spin_lock(&lockres->spinlock); 1606714d8e8SKurt Hackel master = lockres->owner == dlm->node_num; 1616714d8e8SKurt Hackel spin_unlock(&lockres->spinlock); 1626714d8e8SKurt Hackel 1636714d8e8SKurt Hackel mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len, 1646714d8e8SKurt Hackel lockres->lockname.name, master); 1656714d8e8SKurt Hackel 1666714d8e8SKurt Hackel /* Non master is the easy case -- no migration required, just 1676714d8e8SKurt Hackel * quit. */ 1686714d8e8SKurt Hackel if (!master) 1696714d8e8SKurt Hackel goto finish; 1706714d8e8SKurt Hackel 1716714d8e8SKurt Hackel /* Wheee! Migrate lockres here! */ 1726714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 1736714d8e8SKurt Hackel again: 1746714d8e8SKurt Hackel 1756714d8e8SKurt Hackel ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES); 1766714d8e8SKurt Hackel if (ret == -ENOTEMPTY) { 1776714d8e8SKurt Hackel mlog(ML_ERROR, "lockres %.*s still has local locks!\n", 1786714d8e8SKurt Hackel lockres->lockname.len, lockres->lockname.name); 1796714d8e8SKurt Hackel 1806714d8e8SKurt Hackel BUG(); 1816714d8e8SKurt Hackel } else if (ret < 0) { 1826714d8e8SKurt Hackel mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n", 1836714d8e8SKurt Hackel lockres->lockname.len, lockres->lockname.name); 1848d79d088SKurt Hackel msleep(100); 1856714d8e8SKurt Hackel goto again; 1866714d8e8SKurt Hackel } 1876714d8e8SKurt Hackel 1886714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 1896714d8e8SKurt Hackel 1906714d8e8SKurt Hackel finish: 1916714d8e8SKurt Hackel if (!list_empty(&lockres->purge)) { 1926714d8e8SKurt Hackel list_del_init(&lockres->purge); 1936714d8e8SKurt Hackel dlm->purge_count--; 1946714d8e8SKurt Hackel } 1956714d8e8SKurt Hackel __dlm_unhash_lockres(lockres); 1966714d8e8SKurt Hackel } 1976714d8e8SKurt Hackel 1988b219809SKurt Hackel /* make an unused lockres go away immediately. 1998b219809SKurt Hackel * as soon as the dlm spinlock is dropped, this lockres 2008b219809SKurt Hackel * will not be found. kfree still happens on last put. */ 2018b219809SKurt Hackel static void dlm_purge_lockres_now(struct dlm_ctxt *dlm, 2028b219809SKurt Hackel struct dlm_lock_resource *lockres) 2038b219809SKurt Hackel { 2048b219809SKurt Hackel assert_spin_locked(&dlm->spinlock); 2058b219809SKurt Hackel assert_spin_locked(&lockres->spinlock); 2068b219809SKurt Hackel 2078b219809SKurt Hackel BUG_ON(!__dlm_lockres_unused(lockres)); 2088b219809SKurt Hackel 2098b219809SKurt Hackel if (!list_empty(&lockres->purge)) { 2108b219809SKurt Hackel list_del_init(&lockres->purge); 2118b219809SKurt Hackel dlm->purge_count--; 2128b219809SKurt Hackel } 2138b219809SKurt Hackel __dlm_unhash_lockres(lockres); 2148b219809SKurt Hackel } 2158b219809SKurt Hackel 2166714d8e8SKurt Hackel static void dlm_run_purge_list(struct dlm_ctxt *dlm, 2176714d8e8SKurt Hackel int purge_now) 2186714d8e8SKurt Hackel { 2196714d8e8SKurt Hackel unsigned int run_max, unused; 2206714d8e8SKurt Hackel unsigned long purge_jiffies; 2216714d8e8SKurt Hackel struct dlm_lock_resource *lockres; 2226714d8e8SKurt Hackel 2236714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 2246714d8e8SKurt Hackel run_max = dlm->purge_count; 2256714d8e8SKurt Hackel 2266714d8e8SKurt Hackel while(run_max && !list_empty(&dlm->purge_list)) { 2276714d8e8SKurt Hackel run_max--; 2286714d8e8SKurt Hackel 2296714d8e8SKurt Hackel lockres = list_entry(dlm->purge_list.next, 2306714d8e8SKurt Hackel struct dlm_lock_resource, purge); 2316714d8e8SKurt Hackel 2326714d8e8SKurt Hackel /* Status of the lockres *might* change so double 2336714d8e8SKurt Hackel * check. If the lockres is unused, holding the dlm 2346714d8e8SKurt Hackel * spinlock will prevent people from getting and more 2356714d8e8SKurt Hackel * refs on it -- there's no need to keep the lockres 2366714d8e8SKurt Hackel * spinlock. */ 2376714d8e8SKurt Hackel spin_lock(&lockres->spinlock); 2386714d8e8SKurt Hackel unused = __dlm_lockres_unused(lockres); 2396714d8e8SKurt Hackel spin_unlock(&lockres->spinlock); 2406714d8e8SKurt Hackel 2416714d8e8SKurt Hackel if (!unused) 2426714d8e8SKurt Hackel continue; 2436714d8e8SKurt Hackel 2446714d8e8SKurt Hackel purge_jiffies = lockres->last_used + 2456714d8e8SKurt Hackel msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); 2466714d8e8SKurt Hackel 2476714d8e8SKurt Hackel /* Make sure that we want to be processing this guy at 2486714d8e8SKurt Hackel * this time. */ 2496714d8e8SKurt Hackel if (!purge_now && time_after(purge_jiffies, jiffies)) { 2506714d8e8SKurt Hackel /* Since resources are added to the purge list 2516714d8e8SKurt Hackel * in tail order, we can stop at the first 2526714d8e8SKurt Hackel * unpurgable resource -- anyone added after 2536714d8e8SKurt Hackel * him will have a greater last_used value */ 2546714d8e8SKurt Hackel break; 2556714d8e8SKurt Hackel } 2566714d8e8SKurt Hackel 2576714d8e8SKurt Hackel list_del_init(&lockres->purge); 2586714d8e8SKurt Hackel dlm->purge_count--; 2596714d8e8SKurt Hackel 2606714d8e8SKurt Hackel /* This may drop and reacquire the dlm spinlock if it 2616714d8e8SKurt Hackel * has to do migration. */ 2626714d8e8SKurt Hackel mlog(0, "calling dlm_purge_lockres!\n"); 2636714d8e8SKurt Hackel dlm_purge_lockres(dlm, lockres); 2646714d8e8SKurt Hackel mlog(0, "DONE calling dlm_purge_lockres!\n"); 2656714d8e8SKurt Hackel 2666714d8e8SKurt Hackel /* Avoid adding any scheduling latencies */ 2676714d8e8SKurt Hackel cond_resched_lock(&dlm->spinlock); 2686714d8e8SKurt Hackel } 2696714d8e8SKurt Hackel 2706714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 2716714d8e8SKurt Hackel } 2726714d8e8SKurt Hackel 2736714d8e8SKurt Hackel static void dlm_shuffle_lists(struct dlm_ctxt *dlm, 2746714d8e8SKurt Hackel struct dlm_lock_resource *res) 2756714d8e8SKurt Hackel { 2766714d8e8SKurt Hackel struct dlm_lock *lock, *target; 2776714d8e8SKurt Hackel struct list_head *iter; 2786714d8e8SKurt Hackel struct list_head *head; 2796714d8e8SKurt Hackel int can_grant = 1; 2806714d8e8SKurt Hackel 2816714d8e8SKurt Hackel //mlog(0, "res->lockname.len=%d\n", res->lockname.len); 2826714d8e8SKurt Hackel //mlog(0, "res->lockname.name=%p\n", res->lockname.name); 2836714d8e8SKurt Hackel //mlog(0, "shuffle res %.*s\n", res->lockname.len, 2846714d8e8SKurt Hackel // res->lockname.name); 2856714d8e8SKurt Hackel 2866714d8e8SKurt Hackel /* because this function is called with the lockres 2876714d8e8SKurt Hackel * spinlock, and because we know that it is not migrating/ 2886714d8e8SKurt Hackel * recovering/in-progress, it is fine to reserve asts and 2896714d8e8SKurt Hackel * basts right before queueing them all throughout */ 2906714d8e8SKurt Hackel assert_spin_locked(&res->spinlock); 2916714d8e8SKurt Hackel BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 2926714d8e8SKurt Hackel DLM_LOCK_RES_RECOVERING| 2936714d8e8SKurt Hackel DLM_LOCK_RES_IN_PROGRESS))); 2946714d8e8SKurt Hackel 2956714d8e8SKurt Hackel converting: 2966714d8e8SKurt Hackel if (list_empty(&res->converting)) 2976714d8e8SKurt Hackel goto blocked; 2986714d8e8SKurt Hackel mlog(0, "res %.*s has locks on a convert queue\n", res->lockname.len, 2996714d8e8SKurt Hackel res->lockname.name); 3006714d8e8SKurt Hackel 3016714d8e8SKurt Hackel target = list_entry(res->converting.next, struct dlm_lock, list); 3026714d8e8SKurt Hackel if (target->ml.convert_type == LKM_IVMODE) { 3036714d8e8SKurt Hackel mlog(ML_ERROR, "%.*s: converting a lock with no " 3046714d8e8SKurt Hackel "convert_type!\n", res->lockname.len, res->lockname.name); 3056714d8e8SKurt Hackel BUG(); 3066714d8e8SKurt Hackel } 3076714d8e8SKurt Hackel head = &res->granted; 3086714d8e8SKurt Hackel list_for_each(iter, head) { 3096714d8e8SKurt Hackel lock = list_entry(iter, struct dlm_lock, list); 3106714d8e8SKurt Hackel if (lock==target) 3116714d8e8SKurt Hackel continue; 3126714d8e8SKurt Hackel if (!dlm_lock_compatible(lock->ml.type, 3136714d8e8SKurt Hackel target->ml.convert_type)) { 3146714d8e8SKurt Hackel can_grant = 0; 3156714d8e8SKurt Hackel /* queue the BAST if not already */ 3166714d8e8SKurt Hackel if (lock->ml.highest_blocked == LKM_IVMODE) { 3176714d8e8SKurt Hackel __dlm_lockres_reserve_ast(res); 3186714d8e8SKurt Hackel dlm_queue_bast(dlm, lock); 3196714d8e8SKurt Hackel } 3206714d8e8SKurt Hackel /* update the highest_blocked if needed */ 3216714d8e8SKurt Hackel if (lock->ml.highest_blocked < target->ml.convert_type) 3226714d8e8SKurt Hackel lock->ml.highest_blocked = 3236714d8e8SKurt Hackel target->ml.convert_type; 3246714d8e8SKurt Hackel } 3256714d8e8SKurt Hackel } 3266714d8e8SKurt Hackel head = &res->converting; 3276714d8e8SKurt Hackel list_for_each(iter, head) { 3286714d8e8SKurt Hackel lock = list_entry(iter, struct dlm_lock, list); 3296714d8e8SKurt Hackel if (lock==target) 3306714d8e8SKurt Hackel continue; 3316714d8e8SKurt Hackel if (!dlm_lock_compatible(lock->ml.type, 3326714d8e8SKurt Hackel target->ml.convert_type)) { 3336714d8e8SKurt Hackel can_grant = 0; 3346714d8e8SKurt Hackel if (lock->ml.highest_blocked == LKM_IVMODE) { 3356714d8e8SKurt Hackel __dlm_lockres_reserve_ast(res); 3366714d8e8SKurt Hackel dlm_queue_bast(dlm, lock); 3376714d8e8SKurt Hackel } 3386714d8e8SKurt Hackel if (lock->ml.highest_blocked < target->ml.convert_type) 3396714d8e8SKurt Hackel lock->ml.highest_blocked = 3406714d8e8SKurt Hackel target->ml.convert_type; 3416714d8e8SKurt Hackel } 3426714d8e8SKurt Hackel } 3436714d8e8SKurt Hackel 3446714d8e8SKurt Hackel /* we can convert the lock */ 3456714d8e8SKurt Hackel if (can_grant) { 3466714d8e8SKurt Hackel spin_lock(&target->spinlock); 3476714d8e8SKurt Hackel BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 3486714d8e8SKurt Hackel 3496714d8e8SKurt Hackel mlog(0, "calling ast for converting lock: %.*s, have: %d, " 3506714d8e8SKurt Hackel "granting: %d, node: %u\n", res->lockname.len, 3516714d8e8SKurt Hackel res->lockname.name, target->ml.type, 3526714d8e8SKurt Hackel target->ml.convert_type, target->ml.node); 3536714d8e8SKurt Hackel 3546714d8e8SKurt Hackel target->ml.type = target->ml.convert_type; 3556714d8e8SKurt Hackel target->ml.convert_type = LKM_IVMODE; 356f116629dSAkinobu Mita list_move_tail(&target->list, &res->granted); 3576714d8e8SKurt Hackel 3586714d8e8SKurt Hackel BUG_ON(!target->lksb); 3596714d8e8SKurt Hackel target->lksb->status = DLM_NORMAL; 3606714d8e8SKurt Hackel 3616714d8e8SKurt Hackel spin_unlock(&target->spinlock); 3626714d8e8SKurt Hackel 3636714d8e8SKurt Hackel __dlm_lockres_reserve_ast(res); 3646714d8e8SKurt Hackel dlm_queue_ast(dlm, target); 3656714d8e8SKurt Hackel /* go back and check for more */ 3666714d8e8SKurt Hackel goto converting; 3676714d8e8SKurt Hackel } 3686714d8e8SKurt Hackel 3696714d8e8SKurt Hackel blocked: 3706714d8e8SKurt Hackel if (list_empty(&res->blocked)) 3716714d8e8SKurt Hackel goto leave; 3726714d8e8SKurt Hackel target = list_entry(res->blocked.next, struct dlm_lock, list); 3736714d8e8SKurt Hackel 3746714d8e8SKurt Hackel head = &res->granted; 3756714d8e8SKurt Hackel list_for_each(iter, head) { 3766714d8e8SKurt Hackel lock = list_entry(iter, struct dlm_lock, list); 3776714d8e8SKurt Hackel if (lock==target) 3786714d8e8SKurt Hackel continue; 3796714d8e8SKurt Hackel if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { 3806714d8e8SKurt Hackel can_grant = 0; 3816714d8e8SKurt Hackel if (lock->ml.highest_blocked == LKM_IVMODE) { 3826714d8e8SKurt Hackel __dlm_lockres_reserve_ast(res); 3836714d8e8SKurt Hackel dlm_queue_bast(dlm, lock); 3846714d8e8SKurt Hackel } 3856714d8e8SKurt Hackel if (lock->ml.highest_blocked < target->ml.type) 3866714d8e8SKurt Hackel lock->ml.highest_blocked = target->ml.type; 3876714d8e8SKurt Hackel } 3886714d8e8SKurt Hackel } 3896714d8e8SKurt Hackel 3906714d8e8SKurt Hackel head = &res->converting; 3916714d8e8SKurt Hackel list_for_each(iter, head) { 3926714d8e8SKurt Hackel lock = list_entry(iter, struct dlm_lock, list); 3936714d8e8SKurt Hackel if (lock==target) 3946714d8e8SKurt Hackel continue; 3956714d8e8SKurt Hackel if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { 3966714d8e8SKurt Hackel can_grant = 0; 3976714d8e8SKurt Hackel if (lock->ml.highest_blocked == LKM_IVMODE) { 3986714d8e8SKurt Hackel __dlm_lockres_reserve_ast(res); 3996714d8e8SKurt Hackel dlm_queue_bast(dlm, lock); 4006714d8e8SKurt Hackel } 4016714d8e8SKurt Hackel if (lock->ml.highest_blocked < target->ml.type) 4026714d8e8SKurt Hackel lock->ml.highest_blocked = target->ml.type; 4036714d8e8SKurt Hackel } 4046714d8e8SKurt Hackel } 4056714d8e8SKurt Hackel 4066714d8e8SKurt Hackel /* we can grant the blocked lock (only 4076714d8e8SKurt Hackel * possible if converting list empty) */ 4086714d8e8SKurt Hackel if (can_grant) { 4096714d8e8SKurt Hackel spin_lock(&target->spinlock); 4106714d8e8SKurt Hackel BUG_ON(target->ml.highest_blocked != LKM_IVMODE); 4116714d8e8SKurt Hackel 4126714d8e8SKurt Hackel mlog(0, "calling ast for blocked lock: %.*s, granting: %d, " 4136714d8e8SKurt Hackel "node: %u\n", res->lockname.len, res->lockname.name, 4146714d8e8SKurt Hackel target->ml.type, target->ml.node); 4156714d8e8SKurt Hackel 4166714d8e8SKurt Hackel // target->ml.type is already correct 417f116629dSAkinobu Mita list_move_tail(&target->list, &res->granted); 4186714d8e8SKurt Hackel 4196714d8e8SKurt Hackel BUG_ON(!target->lksb); 4206714d8e8SKurt Hackel target->lksb->status = DLM_NORMAL; 4216714d8e8SKurt Hackel 4226714d8e8SKurt Hackel spin_unlock(&target->spinlock); 4236714d8e8SKurt Hackel 4246714d8e8SKurt Hackel __dlm_lockres_reserve_ast(res); 4256714d8e8SKurt Hackel dlm_queue_ast(dlm, target); 4266714d8e8SKurt Hackel /* go back and check for more */ 4276714d8e8SKurt Hackel goto converting; 4286714d8e8SKurt Hackel } 4296714d8e8SKurt Hackel 4306714d8e8SKurt Hackel leave: 4316714d8e8SKurt Hackel return; 4326714d8e8SKurt Hackel } 4336714d8e8SKurt Hackel 4346714d8e8SKurt Hackel /* must have NO locks when calling this with res !=NULL * */ 4356714d8e8SKurt Hackel void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 4366714d8e8SKurt Hackel { 4376714d8e8SKurt Hackel mlog_entry("dlm=%p, res=%p\n", dlm, res); 4386714d8e8SKurt Hackel if (res) { 4396714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 4406714d8e8SKurt Hackel spin_lock(&res->spinlock); 4416714d8e8SKurt Hackel __dlm_dirty_lockres(dlm, res); 4426714d8e8SKurt Hackel spin_unlock(&res->spinlock); 4436714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 4446714d8e8SKurt Hackel } 4456714d8e8SKurt Hackel wake_up(&dlm->dlm_thread_wq); 4466714d8e8SKurt Hackel } 4476714d8e8SKurt Hackel 4486714d8e8SKurt Hackel void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 4496714d8e8SKurt Hackel { 4506714d8e8SKurt Hackel mlog_entry("dlm=%p, res=%p\n", dlm, res); 4516714d8e8SKurt Hackel 4526714d8e8SKurt Hackel assert_spin_locked(&dlm->spinlock); 4536714d8e8SKurt Hackel assert_spin_locked(&res->spinlock); 4546714d8e8SKurt Hackel 4556714d8e8SKurt Hackel /* don't shuffle secondary queues */ 4566714d8e8SKurt Hackel if ((res->owner == dlm->node_num) && 4576714d8e8SKurt Hackel !(res->state & DLM_LOCK_RES_DIRTY)) { 4586714d8e8SKurt Hackel list_add_tail(&res->dirty, &dlm->dirty_list); 4596714d8e8SKurt Hackel res->state |= DLM_LOCK_RES_DIRTY; 4606714d8e8SKurt Hackel } 4616714d8e8SKurt Hackel } 4626714d8e8SKurt Hackel 4636714d8e8SKurt Hackel 4646714d8e8SKurt Hackel /* Launch the NM thread for the mounted volume */ 4656714d8e8SKurt Hackel int dlm_launch_thread(struct dlm_ctxt *dlm) 4666714d8e8SKurt Hackel { 4676714d8e8SKurt Hackel mlog(0, "starting dlm thread...\n"); 4686714d8e8SKurt Hackel 4696714d8e8SKurt Hackel dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm_thread"); 4706714d8e8SKurt Hackel if (IS_ERR(dlm->dlm_thread_task)) { 4716714d8e8SKurt Hackel mlog_errno(PTR_ERR(dlm->dlm_thread_task)); 4726714d8e8SKurt Hackel dlm->dlm_thread_task = NULL; 4736714d8e8SKurt Hackel return -EINVAL; 4746714d8e8SKurt Hackel } 4756714d8e8SKurt Hackel 4766714d8e8SKurt Hackel return 0; 4776714d8e8SKurt Hackel } 4786714d8e8SKurt Hackel 4796714d8e8SKurt Hackel void dlm_complete_thread(struct dlm_ctxt *dlm) 4806714d8e8SKurt Hackel { 4816714d8e8SKurt Hackel if (dlm->dlm_thread_task) { 4826714d8e8SKurt Hackel mlog(ML_KTHREAD, "waiting for dlm thread to exit\n"); 4836714d8e8SKurt Hackel kthread_stop(dlm->dlm_thread_task); 4846714d8e8SKurt Hackel dlm->dlm_thread_task = NULL; 4856714d8e8SKurt Hackel } 4866714d8e8SKurt Hackel } 4876714d8e8SKurt Hackel 4886714d8e8SKurt Hackel static int dlm_dirty_list_empty(struct dlm_ctxt *dlm) 4896714d8e8SKurt Hackel { 4906714d8e8SKurt Hackel int empty; 4916714d8e8SKurt Hackel 4926714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 4936714d8e8SKurt Hackel empty = list_empty(&dlm->dirty_list); 4946714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 4956714d8e8SKurt Hackel 4966714d8e8SKurt Hackel return empty; 4976714d8e8SKurt Hackel } 4986714d8e8SKurt Hackel 4996714d8e8SKurt Hackel static void dlm_flush_asts(struct dlm_ctxt *dlm) 5006714d8e8SKurt Hackel { 5016714d8e8SKurt Hackel int ret; 5026714d8e8SKurt Hackel struct dlm_lock *lock; 5036714d8e8SKurt Hackel struct dlm_lock_resource *res; 5046714d8e8SKurt Hackel u8 hi; 5056714d8e8SKurt Hackel 5066714d8e8SKurt Hackel spin_lock(&dlm->ast_lock); 5076714d8e8SKurt Hackel while (!list_empty(&dlm->pending_asts)) { 5086714d8e8SKurt Hackel lock = list_entry(dlm->pending_asts.next, 5096714d8e8SKurt Hackel struct dlm_lock, ast_list); 5106714d8e8SKurt Hackel /* get an extra ref on lock */ 5116714d8e8SKurt Hackel dlm_lock_get(lock); 5126714d8e8SKurt Hackel res = lock->lockres; 5136714d8e8SKurt Hackel mlog(0, "delivering an ast for this lockres\n"); 5146714d8e8SKurt Hackel 5156714d8e8SKurt Hackel BUG_ON(!lock->ast_pending); 5166714d8e8SKurt Hackel 5176714d8e8SKurt Hackel /* remove from list (including ref) */ 5186714d8e8SKurt Hackel list_del_init(&lock->ast_list); 5196714d8e8SKurt Hackel dlm_lock_put(lock); 5206714d8e8SKurt Hackel spin_unlock(&dlm->ast_lock); 5216714d8e8SKurt Hackel 5226714d8e8SKurt Hackel if (lock->ml.node != dlm->node_num) { 5236714d8e8SKurt Hackel ret = dlm_do_remote_ast(dlm, res, lock); 5246714d8e8SKurt Hackel if (ret < 0) 5256714d8e8SKurt Hackel mlog_errno(ret); 5266714d8e8SKurt Hackel } else 5276714d8e8SKurt Hackel dlm_do_local_ast(dlm, res, lock); 5286714d8e8SKurt Hackel 5296714d8e8SKurt Hackel spin_lock(&dlm->ast_lock); 5306714d8e8SKurt Hackel 5316714d8e8SKurt Hackel /* possible that another ast was queued while 5326714d8e8SKurt Hackel * we were delivering the last one */ 5336714d8e8SKurt Hackel if (!list_empty(&lock->ast_list)) { 5346714d8e8SKurt Hackel mlog(0, "aha another ast got queued while " 5356714d8e8SKurt Hackel "we were finishing the last one. will " 5366714d8e8SKurt Hackel "keep the ast_pending flag set.\n"); 5376714d8e8SKurt Hackel } else 5386714d8e8SKurt Hackel lock->ast_pending = 0; 5396714d8e8SKurt Hackel 5406714d8e8SKurt Hackel /* drop the extra ref. 5416714d8e8SKurt Hackel * this may drop it completely. */ 5426714d8e8SKurt Hackel dlm_lock_put(lock); 5436714d8e8SKurt Hackel dlm_lockres_release_ast(dlm, res); 5446714d8e8SKurt Hackel } 5456714d8e8SKurt Hackel 5466714d8e8SKurt Hackel while (!list_empty(&dlm->pending_basts)) { 5476714d8e8SKurt Hackel lock = list_entry(dlm->pending_basts.next, 5486714d8e8SKurt Hackel struct dlm_lock, bast_list); 5496714d8e8SKurt Hackel /* get an extra ref on lock */ 5506714d8e8SKurt Hackel dlm_lock_get(lock); 5516714d8e8SKurt Hackel res = lock->lockres; 5526714d8e8SKurt Hackel 5536714d8e8SKurt Hackel BUG_ON(!lock->bast_pending); 5546714d8e8SKurt Hackel 5556714d8e8SKurt Hackel /* get the highest blocked lock, and reset */ 5566714d8e8SKurt Hackel spin_lock(&lock->spinlock); 5576714d8e8SKurt Hackel BUG_ON(lock->ml.highest_blocked <= LKM_IVMODE); 5586714d8e8SKurt Hackel hi = lock->ml.highest_blocked; 5596714d8e8SKurt Hackel lock->ml.highest_blocked = LKM_IVMODE; 5606714d8e8SKurt Hackel spin_unlock(&lock->spinlock); 5616714d8e8SKurt Hackel 5626714d8e8SKurt Hackel /* remove from list (including ref) */ 5636714d8e8SKurt Hackel list_del_init(&lock->bast_list); 5646714d8e8SKurt Hackel dlm_lock_put(lock); 5656714d8e8SKurt Hackel spin_unlock(&dlm->ast_lock); 5666714d8e8SKurt Hackel 5676714d8e8SKurt Hackel mlog(0, "delivering a bast for this lockres " 5686714d8e8SKurt Hackel "(blocked = %d\n", hi); 5696714d8e8SKurt Hackel 5706714d8e8SKurt Hackel if (lock->ml.node != dlm->node_num) { 5716714d8e8SKurt Hackel ret = dlm_send_proxy_bast(dlm, res, lock, hi); 5726714d8e8SKurt Hackel if (ret < 0) 5736714d8e8SKurt Hackel mlog_errno(ret); 5746714d8e8SKurt Hackel } else 5756714d8e8SKurt Hackel dlm_do_local_bast(dlm, res, lock, hi); 5766714d8e8SKurt Hackel 5776714d8e8SKurt Hackel spin_lock(&dlm->ast_lock); 5786714d8e8SKurt Hackel 5796714d8e8SKurt Hackel /* possible that another bast was queued while 5806714d8e8SKurt Hackel * we were delivering the last one */ 5816714d8e8SKurt Hackel if (!list_empty(&lock->bast_list)) { 5826714d8e8SKurt Hackel mlog(0, "aha another bast got queued while " 5836714d8e8SKurt Hackel "we were finishing the last one. will " 5846714d8e8SKurt Hackel "keep the bast_pending flag set.\n"); 5856714d8e8SKurt Hackel } else 5866714d8e8SKurt Hackel lock->bast_pending = 0; 5876714d8e8SKurt Hackel 5886714d8e8SKurt Hackel /* drop the extra ref. 5896714d8e8SKurt Hackel * this may drop it completely. */ 5906714d8e8SKurt Hackel dlm_lock_put(lock); 5916714d8e8SKurt Hackel dlm_lockres_release_ast(dlm, res); 5926714d8e8SKurt Hackel } 5936714d8e8SKurt Hackel wake_up(&dlm->ast_wq); 5946714d8e8SKurt Hackel spin_unlock(&dlm->ast_lock); 5956714d8e8SKurt Hackel } 5966714d8e8SKurt Hackel 5976714d8e8SKurt Hackel 5986714d8e8SKurt Hackel #define DLM_THREAD_TIMEOUT_MS (4 * 1000) 5996714d8e8SKurt Hackel #define DLM_THREAD_MAX_DIRTY 100 6006714d8e8SKurt Hackel #define DLM_THREAD_MAX_ASTS 10 6016714d8e8SKurt Hackel 6026714d8e8SKurt Hackel static int dlm_thread(void *data) 6036714d8e8SKurt Hackel { 6046714d8e8SKurt Hackel struct dlm_lock_resource *res; 6056714d8e8SKurt Hackel struct dlm_ctxt *dlm = data; 6066714d8e8SKurt Hackel unsigned long timeout = msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS); 6076714d8e8SKurt Hackel 6086714d8e8SKurt Hackel mlog(0, "dlm thread running for %s...\n", dlm->name); 6096714d8e8SKurt Hackel 6106714d8e8SKurt Hackel while (!kthread_should_stop()) { 6116714d8e8SKurt Hackel int n = DLM_THREAD_MAX_DIRTY; 6126714d8e8SKurt Hackel 6136714d8e8SKurt Hackel /* dlm_shutting_down is very point-in-time, but that 6146714d8e8SKurt Hackel * doesn't matter as we'll just loop back around if we 6156714d8e8SKurt Hackel * get false on the leading edge of a state 6166714d8e8SKurt Hackel * transition. */ 6176714d8e8SKurt Hackel dlm_run_purge_list(dlm, dlm_shutting_down(dlm)); 6186714d8e8SKurt Hackel 6196714d8e8SKurt Hackel /* We really don't want to hold dlm->spinlock while 6206714d8e8SKurt Hackel * calling dlm_shuffle_lists on each lockres that 6216714d8e8SKurt Hackel * needs to have its queues adjusted and AST/BASTs 6226714d8e8SKurt Hackel * run. So let's pull each entry off the dirty_list 6236714d8e8SKurt Hackel * and drop dlm->spinlock ASAP. Once off the list, 6246714d8e8SKurt Hackel * res->spinlock needs to be taken again to protect 6256714d8e8SKurt Hackel * the queues while calling dlm_shuffle_lists. */ 6266714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 6276714d8e8SKurt Hackel while (!list_empty(&dlm->dirty_list)) { 6286714d8e8SKurt Hackel int delay = 0; 6296714d8e8SKurt Hackel res = list_entry(dlm->dirty_list.next, 6306714d8e8SKurt Hackel struct dlm_lock_resource, dirty); 6316714d8e8SKurt Hackel 6326714d8e8SKurt Hackel /* peel a lockres off, remove it from the list, 6336714d8e8SKurt Hackel * unset the dirty flag and drop the dlm lock */ 6346714d8e8SKurt Hackel BUG_ON(!res); 6356714d8e8SKurt Hackel dlm_lockres_get(res); 6366714d8e8SKurt Hackel 6376714d8e8SKurt Hackel spin_lock(&res->spinlock); 6386714d8e8SKurt Hackel res->state &= ~DLM_LOCK_RES_DIRTY; 6396714d8e8SKurt Hackel list_del_init(&res->dirty); 6406714d8e8SKurt Hackel spin_unlock(&res->spinlock); 6416714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 6426714d8e8SKurt Hackel 6436714d8e8SKurt Hackel /* lockres can be re-dirtied/re-added to the 6446714d8e8SKurt Hackel * dirty_list in this gap, but that is ok */ 6456714d8e8SKurt Hackel 6466714d8e8SKurt Hackel spin_lock(&res->spinlock); 6476714d8e8SKurt Hackel if (res->owner != dlm->node_num) { 6486714d8e8SKurt Hackel __dlm_print_one_lock_resource(res); 6496714d8e8SKurt Hackel mlog(ML_ERROR, "inprog:%s, mig:%s, reco:%s, dirty:%s\n", 6506714d8e8SKurt Hackel res->state & DLM_LOCK_RES_IN_PROGRESS ? "yes" : "no", 6516714d8e8SKurt Hackel res->state & DLM_LOCK_RES_MIGRATING ? "yes" : "no", 6526714d8e8SKurt Hackel res->state & DLM_LOCK_RES_RECOVERING ? "yes" : "no", 6536714d8e8SKurt Hackel res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no"); 6546714d8e8SKurt Hackel } 6556714d8e8SKurt Hackel BUG_ON(res->owner != dlm->node_num); 6566714d8e8SKurt Hackel 6576714d8e8SKurt Hackel /* it is now ok to move lockreses in these states 6586714d8e8SKurt Hackel * to the dirty list, assuming that they will only be 6596714d8e8SKurt Hackel * dirty for a short while. */ 6606714d8e8SKurt Hackel if (res->state & (DLM_LOCK_RES_IN_PROGRESS | 6616714d8e8SKurt Hackel DLM_LOCK_RES_MIGRATING | 6626714d8e8SKurt Hackel DLM_LOCK_RES_RECOVERING)) { 6636714d8e8SKurt Hackel /* move it to the tail and keep going */ 6646714d8e8SKurt Hackel spin_unlock(&res->spinlock); 6656714d8e8SKurt Hackel mlog(0, "delaying list shuffling for in-" 6666714d8e8SKurt Hackel "progress lockres %.*s, state=%d\n", 6676714d8e8SKurt Hackel res->lockname.len, res->lockname.name, 6686714d8e8SKurt Hackel res->state); 6696714d8e8SKurt Hackel delay = 1; 6706714d8e8SKurt Hackel goto in_progress; 6716714d8e8SKurt Hackel } 6726714d8e8SKurt Hackel 6736714d8e8SKurt Hackel /* at this point the lockres is not migrating/ 6746714d8e8SKurt Hackel * recovering/in-progress. we have the lockres 6756714d8e8SKurt Hackel * spinlock and do NOT have the dlm lock. 6766714d8e8SKurt Hackel * safe to reserve/queue asts and run the lists. */ 6776714d8e8SKurt Hackel 6788d79d088SKurt Hackel mlog(0, "calling dlm_shuffle_lists with dlm=%s, " 6798d79d088SKurt Hackel "res=%.*s\n", dlm->name, 6808d79d088SKurt Hackel res->lockname.len, res->lockname.name); 6816714d8e8SKurt Hackel 6826714d8e8SKurt Hackel /* called while holding lockres lock */ 6836714d8e8SKurt Hackel dlm_shuffle_lists(dlm, res); 6846714d8e8SKurt Hackel spin_unlock(&res->spinlock); 6856714d8e8SKurt Hackel 6866714d8e8SKurt Hackel dlm_lockres_calc_usage(dlm, res); 6876714d8e8SKurt Hackel 6886714d8e8SKurt Hackel in_progress: 6896714d8e8SKurt Hackel 6906714d8e8SKurt Hackel spin_lock(&dlm->spinlock); 6916714d8e8SKurt Hackel /* if the lock was in-progress, stick 6926714d8e8SKurt Hackel * it on the back of the list */ 6936714d8e8SKurt Hackel if (delay) { 6946714d8e8SKurt Hackel spin_lock(&res->spinlock); 6956714d8e8SKurt Hackel list_add_tail(&res->dirty, &dlm->dirty_list); 6966714d8e8SKurt Hackel res->state |= DLM_LOCK_RES_DIRTY; 6976714d8e8SKurt Hackel spin_unlock(&res->spinlock); 6986714d8e8SKurt Hackel } 6996714d8e8SKurt Hackel dlm_lockres_put(res); 7006714d8e8SKurt Hackel 7016714d8e8SKurt Hackel /* unlikely, but we may need to give time to 7026714d8e8SKurt Hackel * other tasks */ 7036714d8e8SKurt Hackel if (!--n) { 7046714d8e8SKurt Hackel mlog(0, "throttling dlm_thread\n"); 7056714d8e8SKurt Hackel break; 7066714d8e8SKurt Hackel } 7076714d8e8SKurt Hackel } 7086714d8e8SKurt Hackel 7096714d8e8SKurt Hackel spin_unlock(&dlm->spinlock); 7106714d8e8SKurt Hackel dlm_flush_asts(dlm); 7116714d8e8SKurt Hackel 7126714d8e8SKurt Hackel /* yield and continue right away if there is more work to do */ 7136714d8e8SKurt Hackel if (!n) { 7146714d8e8SKurt Hackel yield(); 7156714d8e8SKurt Hackel continue; 7166714d8e8SKurt Hackel } 7176714d8e8SKurt Hackel 7186714d8e8SKurt Hackel wait_event_interruptible_timeout(dlm->dlm_thread_wq, 7196714d8e8SKurt Hackel !dlm_dirty_list_empty(dlm) || 7206714d8e8SKurt Hackel kthread_should_stop(), 7216714d8e8SKurt Hackel timeout); 7226714d8e8SKurt Hackel } 7236714d8e8SKurt Hackel 7246714d8e8SKurt Hackel mlog(0, "quitting DLM thread\n"); 7256714d8e8SKurt Hackel return 0; 7266714d8e8SKurt Hackel } 727