1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 359e33d69fSJan Kara #include <linux/quotaops.h> 36174cd4b1SIngo Molnar #include <linux/sched/signal.h> 37ccd979bdSMark Fasheh 38ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 39ccd979bdSMark Fasheh #include <cluster/masklog.h> 40ccd979bdSMark Fasheh 41ccd979bdSMark Fasheh #include "ocfs2.h" 42d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 43ccd979bdSMark Fasheh 44ccd979bdSMark Fasheh #include "alloc.h" 45d680efe9SMark Fasheh #include "dcache.h" 46ccd979bdSMark Fasheh #include "dlmglue.h" 47ccd979bdSMark Fasheh #include "extent_map.h" 487f1a37e3STiger Yang #include "file.h" 49ccd979bdSMark Fasheh #include "heartbeat.h" 50ccd979bdSMark Fasheh #include "inode.h" 51ccd979bdSMark Fasheh #include "journal.h" 5224ef1815SJoel Becker #include "stackglue.h" 53ccd979bdSMark Fasheh #include "slot_map.h" 54ccd979bdSMark Fasheh #include "super.h" 55ccd979bdSMark Fasheh #include "uptodate.h" 569e33d69fSJan Kara #include "quota.h" 578dec98edSTao Ma #include "refcounttree.h" 58b8a7a3a6SAndreas Gruenbacher #include "acl.h" 59ccd979bdSMark Fasheh 60ccd979bdSMark Fasheh #include "buffer_head_io.h" 61ccd979bdSMark Fasheh 62ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 63ccd979bdSMark Fasheh struct list_head mw_item; 64ccd979bdSMark Fasheh int mw_status; 65ccd979bdSMark Fasheh struct completion mw_complete; 66ccd979bdSMark Fasheh unsigned long mw_mask; 67ccd979bdSMark Fasheh unsigned long mw_goal; 688ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 695bc970e8SSunil Mushran ktime_t mw_lock_start; 708ddb7b00SSunil Mushran #endif 71ccd979bdSMark Fasheh }; 72ccd979bdSMark Fasheh 7354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 75cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 769e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 77ccd979bdSMark Fasheh 78d680efe9SMark Fasheh /* 79cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 80d680efe9SMark Fasheh * 81b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 82d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 83d680efe9SMark Fasheh * 84d680efe9SMark Fasheh */ 85d680efe9SMark Fasheh enum ocfs2_unblock_action { 86d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 87d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 88d680efe9SMark Fasheh * ->post_unlock callback */ 89d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 90d680efe9SMark Fasheh * ->post_unlock() callback. */ 91d680efe9SMark Fasheh }; 92d680efe9SMark Fasheh 93d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 94d680efe9SMark Fasheh int requeue; 95d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 96d680efe9SMark Fasheh }; 97d680efe9SMark Fasheh 98cb25797dSJan Kara /* Lockdep class keys */ 99cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 100cb25797dSJan Kara 101810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 102810d5aebSMark Fasheh int new_level); 103810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 104810d5aebSMark Fasheh 105cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 106cc567d89SMark Fasheh int blocking); 107cc567d89SMark Fasheh 108cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 109cc567d89SMark Fasheh int blocking); 110d680efe9SMark Fasheh 111d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 112d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 113ccd979bdSMark Fasheh 1149e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 1156cb129f5SAdrian Bunk 1168dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 1178dec98edSTao Ma int new_level); 1188dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 1198dec98edSTao Ma int blocking); 1208dec98edSTao Ma 1216cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1226cb129f5SAdrian Bunk 1236cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1246cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1256cb129f5SAdrian Bunk const char *function, 1266cb129f5SAdrian Bunk unsigned int line, 1276cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1286cb129f5SAdrian Bunk { 129a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1306cb129f5SAdrian Bunk 1316cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1326cb129f5SAdrian Bunk lockres->l_name, function, line); 1336cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1346cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1356cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1366cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1376cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1386cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1396cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1406cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1416cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1426cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1436cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1446cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1456cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1466cb129f5SAdrian Bunk } 1476cb129f5SAdrian Bunk 1486cb129f5SAdrian Bunk 149f625c979SMark Fasheh /* 150f625c979SMark Fasheh * OCFS2 Lock Resource Operations 151f625c979SMark Fasheh * 152f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1530d5dc6c2SMark Fasheh * 1540d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1550d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1560d5dc6c2SMark Fasheh * 1570d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1580d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1590d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1600d5dc6c2SMark Fasheh * destruction time). 161f625c979SMark Fasheh */ 162ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 16354a7e755SMark Fasheh /* 16454a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 16554a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 16654a7e755SMark Fasheh */ 16754a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 168b5e500e2SMark Fasheh 1690d5dc6c2SMark Fasheh /* 17034d024f8SMark Fasheh * Optionally called in the downconvert thread after a 17134d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 17234d024f8SMark Fasheh * after this callback is called, so it is safe to free 17334d024f8SMark Fasheh * memory, etc. 1740d5dc6c2SMark Fasheh * 1750d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1760d5dc6c2SMark Fasheh * by ->downconvert_worker() 1770d5dc6c2SMark Fasheh */ 178d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 179f625c979SMark Fasheh 180f625c979SMark Fasheh /* 18116d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 18216d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 18316d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 18416d5b956SMark Fasheh * 18516d5b956SMark Fasheh * For most locks, the default checks that there are no 18616d5b956SMark Fasheh * incompatible holders are sufficient. 18716d5b956SMark Fasheh * 18816d5b956SMark Fasheh * Called with the lockres spinlock held. 18916d5b956SMark Fasheh */ 19016d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 19116d5b956SMark Fasheh 19216d5b956SMark Fasheh /* 1935ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1945ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1955ef0d4eaSMark Fasheh * 1965ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1975ef0d4eaSMark Fasheh * in the flags field. 1985ef0d4eaSMark Fasheh * 1995ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 2005ef0d4eaSMark Fasheh */ 2015ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 2025ef0d4eaSMark Fasheh 2035ef0d4eaSMark Fasheh /* 204cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 205cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 206cc567d89SMark Fasheh * any locks held so the function can do work that might 207cc567d89SMark Fasheh * schedule (syncing out data, etc). 208cc567d89SMark Fasheh * 209cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 210cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 211cc567d89SMark Fasheh */ 212cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 213cc567d89SMark Fasheh 214cc567d89SMark Fasheh /* 215f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 216f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 217f625c979SMark Fasheh */ 218f625c979SMark Fasheh int flags; 219ccd979bdSMark Fasheh }; 220ccd979bdSMark Fasheh 221f625c979SMark Fasheh /* 222f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 223f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 224f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 225f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 226f625c979SMark Fasheh * expected that the locking wrapper will clear the 227f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 228f625c979SMark Fasheh */ 229f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 230f625c979SMark Fasheh 231b80fc012SMark Fasheh /* 2325ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2335ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 234b80fc012SMark Fasheh */ 235b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 236b80fc012SMark Fasheh 237ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 23854a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 239f625c979SMark Fasheh .flags = 0, 240ccd979bdSMark Fasheh }; 241ccd979bdSMark Fasheh 242e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 24354a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 244810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 245810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 246f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 247b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 248ccd979bdSMark Fasheh }; 249ccd979bdSMark Fasheh 250ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 251f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 252ccd979bdSMark Fasheh }; 253ccd979bdSMark Fasheh 254ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 255f625c979SMark Fasheh .flags = 0, 256ccd979bdSMark Fasheh }; 257ccd979bdSMark Fasheh 2586ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 2596ca497a8Swengang wang .flags = 0, 2606ca497a8Swengang wang }; 2616ca497a8Swengang wang 2624882abebSGang He static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { 2634882abebSGang He .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 2644882abebSGang He }; 2654882abebSGang He 26683273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 26783273932SSrinivas Eeda .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 26883273932SSrinivas Eeda }; 26983273932SSrinivas Eeda 270d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 27154a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 272d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 273cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 274f625c979SMark Fasheh .flags = 0, 275d680efe9SMark Fasheh }; 276d680efe9SMark Fasheh 27750008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 27850008630STiger Yang .get_osb = ocfs2_get_inode_osb, 27950008630STiger Yang .flags = 0, 28050008630STiger Yang }; 28150008630STiger Yang 282cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 283cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 284cf8e06f1SMark Fasheh .flags = 0, 285cf8e06f1SMark Fasheh }; 286cf8e06f1SMark Fasheh 2879e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 2889e33d69fSJan Kara .set_lvb = ocfs2_set_qinfo_lvb, 2899e33d69fSJan Kara .get_osb = ocfs2_get_qinfo_osb, 2909e33d69fSJan Kara .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 2919e33d69fSJan Kara }; 2929e33d69fSJan Kara 2938dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 2948dec98edSTao Ma .check_downconvert = ocfs2_check_refcount_downconvert, 2958dec98edSTao Ma .downconvert_worker = ocfs2_refcount_convert_worker, 2968dec98edSTao Ma .flags = 0, 2978dec98edSTao Ma }; 2988dec98edSTao Ma 299ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 300ccd979bdSMark Fasheh { 301ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 30250008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 30350008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 304ccd979bdSMark Fasheh } 305ccd979bdSMark Fasheh 306c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 307a796d286SJoel Becker { 308a796d286SJoel Becker return container_of(lksb, struct ocfs2_lock_res, l_lksb); 309a796d286SJoel Becker } 310a796d286SJoel Becker 311ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 312ccd979bdSMark Fasheh { 313ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 314ccd979bdSMark Fasheh 315ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 316ccd979bdSMark Fasheh } 317ccd979bdSMark Fasheh 318d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 319d680efe9SMark Fasheh { 320d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 321d680efe9SMark Fasheh 322d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 323d680efe9SMark Fasheh } 324d680efe9SMark Fasheh 3259e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 3269e33d69fSJan Kara { 3279e33d69fSJan Kara BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 3289e33d69fSJan Kara 3299e33d69fSJan Kara return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 3309e33d69fSJan Kara } 3319e33d69fSJan Kara 3328dec98edSTao Ma static inline struct ocfs2_refcount_tree * 3338dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 3348dec98edSTao Ma { 3358dec98edSTao Ma return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 3368dec98edSTao Ma } 3378dec98edSTao Ma 33854a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 33954a7e755SMark Fasheh { 34054a7e755SMark Fasheh if (lockres->l_ops->get_osb) 34154a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 34254a7e755SMark Fasheh 34354a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 34454a7e755SMark Fasheh } 34554a7e755SMark Fasheh 346ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 347ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 348ccd979bdSMark Fasheh int level, 349bd3e7610SJoel Becker u32 dlm_flags); 350ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 351ccd979bdSMark Fasheh int wanted); 352cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 353ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 354cb25797dSJan Kara int level, unsigned long caller_ip); 355cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 356cb25797dSJan Kara struct ocfs2_lock_res *lockres, 357cb25797dSJan Kara int level) 358cb25797dSJan Kara { 359cb25797dSJan Kara __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 360cb25797dSJan Kara } 361cb25797dSJan Kara 362ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 363ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 364ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 365ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 366ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 367ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 368ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 369ccd979bdSMark Fasheh int convert); 3707431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 371c74ff8bbSSunil Mushran if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 3727431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3737431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 374c74ff8bbSSunil Mushran else \ 375c74ff8bbSSunil Mushran mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 376c74ff8bbSSunil Mushran _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 377c74ff8bbSSunil Mushran (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 378ccd979bdSMark Fasheh } while (0) 37934d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 38034d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 381ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 382e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 383ccd979bdSMark Fasheh struct buffer_head **bh); 384ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 385ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 386de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 387cf8e06f1SMark Fasheh int new_level); 388cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 389cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 390cf8e06f1SMark Fasheh int new_level, 391de551246SJoel Becker int lvb, 392de551246SJoel Becker unsigned int generation); 393cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 394cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 395cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 396cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 397cf8e06f1SMark Fasheh 398ccd979bdSMark Fasheh 399ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 400ccd979bdSMark Fasheh u64 blkno, 401ccd979bdSMark Fasheh u32 generation, 402ccd979bdSMark Fasheh char *name) 403ccd979bdSMark Fasheh { 404ccd979bdSMark Fasheh int len; 405ccd979bdSMark Fasheh 406ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 407ccd979bdSMark Fasheh 408b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 409b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 410b0697053SMark Fasheh (long long)blkno, generation); 411ccd979bdSMark Fasheh 412ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 413ccd979bdSMark Fasheh 414ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 415ccd979bdSMark Fasheh } 416ccd979bdSMark Fasheh 41734af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 418ccd979bdSMark Fasheh 419ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 420ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 421ccd979bdSMark Fasheh { 422ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 423ccd979bdSMark Fasheh 424ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 425ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 426ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 427ccd979bdSMark Fasheh } 428ccd979bdSMark Fasheh 429ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 430ccd979bdSMark Fasheh { 431ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 432ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 433ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 434ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 435ccd979bdSMark Fasheh } 436ccd979bdSMark Fasheh 4378ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 4388ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4398ddb7b00SSunil Mushran { 4408ddb7b00SSunil Mushran res->l_lock_refresh = 0; 4415bc970e8SSunil Mushran memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); 4425bc970e8SSunil Mushran memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); 4438ddb7b00SSunil Mushran } 4448ddb7b00SSunil Mushran 4458ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 4468ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 4478ddb7b00SSunil Mushran { 4485bc970e8SSunil Mushran u32 usec; 4495bc970e8SSunil Mushran ktime_t kt; 4505bc970e8SSunil Mushran struct ocfs2_lock_stats *stats; 4518ddb7b00SSunil Mushran 4525bc970e8SSunil Mushran if (level == LKM_PRMODE) 4535bc970e8SSunil Mushran stats = &res->l_lock_prmode; 4545bc970e8SSunil Mushran else if (level == LKM_EXMODE) 4555bc970e8SSunil Mushran stats = &res->l_lock_exmode; 4565bc970e8SSunil Mushran else 4578ddb7b00SSunil Mushran return; 4588ddb7b00SSunil Mushran 4595bc970e8SSunil Mushran kt = ktime_sub(ktime_get(), mw->mw_lock_start); 4605bc970e8SSunil Mushran usec = ktime_to_us(kt); 4615bc970e8SSunil Mushran 4625bc970e8SSunil Mushran stats->ls_gets++; 4635bc970e8SSunil Mushran stats->ls_total += ktime_to_ns(kt); 4645bc970e8SSunil Mushran /* overflow */ 46516865b7cSroel if (unlikely(stats->ls_gets == 0)) { 4665bc970e8SSunil Mushran stats->ls_gets++; 4675bc970e8SSunil Mushran stats->ls_total = ktime_to_ns(kt); 4685bc970e8SSunil Mushran } 4695bc970e8SSunil Mushran 4705bc970e8SSunil Mushran if (stats->ls_max < usec) 4715bc970e8SSunil Mushran stats->ls_max = usec; 4725bc970e8SSunil Mushran 4738ddb7b00SSunil Mushran if (ret) 4745bc970e8SSunil Mushran stats->ls_fail++; 4758ddb7b00SSunil Mushran } 4768ddb7b00SSunil Mushran 4778ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4788ddb7b00SSunil Mushran { 4798ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4808ddb7b00SSunil Mushran } 4818ddb7b00SSunil Mushran 4828ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4838ddb7b00SSunil Mushran { 4845bc970e8SSunil Mushran mw->mw_lock_start = ktime_get(); 4858ddb7b00SSunil Mushran } 4868ddb7b00SSunil Mushran #else 4878ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4888ddb7b00SSunil Mushran { 4898ddb7b00SSunil Mushran } 4908ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4918ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4928ddb7b00SSunil Mushran { 4938ddb7b00SSunil Mushran } 4948ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4958ddb7b00SSunil Mushran { 4968ddb7b00SSunil Mushran } 4978ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4988ddb7b00SSunil Mushran { 4998ddb7b00SSunil Mushran } 5008ddb7b00SSunil Mushran #endif 5018ddb7b00SSunil Mushran 502ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 503ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 504ccd979bdSMark Fasheh enum ocfs2_lock_type type, 505ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 506ccd979bdSMark Fasheh void *priv) 507ccd979bdSMark Fasheh { 508ccd979bdSMark Fasheh res->l_type = type; 509ccd979bdSMark Fasheh res->l_ops = ops; 510ccd979bdSMark Fasheh res->l_priv = priv; 511ccd979bdSMark Fasheh 512bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 513bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 514bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 515ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 516ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 517ccd979bdSMark Fasheh 518ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 519ccd979bdSMark Fasheh 520ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 5218ddb7b00SSunil Mushran 5228ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 523cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 524cb25797dSJan Kara if (type != OCFS2_LOCK_TYPE_OPEN) 525cb25797dSJan Kara lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 526cb25797dSJan Kara &lockdep_keys[type], 0); 527cb25797dSJan Kara else 528cb25797dSJan Kara res->l_lockdep_map.key = NULL; 529cb25797dSJan Kara #endif 530ccd979bdSMark Fasheh } 531ccd979bdSMark Fasheh 532ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 533ccd979bdSMark Fasheh { 534ccd979bdSMark Fasheh /* This also clears out the lock status block */ 535ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 536ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 537ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 538ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 539ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 540439a36b8SEric Ren INIT_LIST_HEAD(&res->l_holders); 541ccd979bdSMark Fasheh } 542ccd979bdSMark Fasheh 543ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 544ccd979bdSMark Fasheh enum ocfs2_lock_type type, 54524c19ef4SMark Fasheh unsigned int generation, 546ccd979bdSMark Fasheh struct inode *inode) 547ccd979bdSMark Fasheh { 548ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 549ccd979bdSMark Fasheh 550ccd979bdSMark Fasheh switch(type) { 551ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 552ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 553ccd979bdSMark Fasheh break; 554ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 555e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 556ccd979bdSMark Fasheh break; 55750008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 55850008630STiger Yang ops = &ocfs2_inode_open_lops; 55950008630STiger Yang break; 560ccd979bdSMark Fasheh default: 561ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 562ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 563ccd979bdSMark Fasheh break; 564ccd979bdSMark Fasheh }; 565ccd979bdSMark Fasheh 566d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 56724c19ef4SMark Fasheh generation, res->l_name); 568d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 569d680efe9SMark Fasheh } 570d680efe9SMark Fasheh 57154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 57254a7e755SMark Fasheh { 57354a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 57454a7e755SMark Fasheh 57554a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 57654a7e755SMark Fasheh } 57754a7e755SMark Fasheh 5789e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 5799e33d69fSJan Kara { 5809e33d69fSJan Kara struct ocfs2_mem_dqinfo *info = lockres->l_priv; 5819e33d69fSJan Kara 5829e33d69fSJan Kara return OCFS2_SB(info->dqi_gi.dqi_sb); 5839e33d69fSJan Kara } 5849e33d69fSJan Kara 585cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 586cf8e06f1SMark Fasheh { 587cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 588cf8e06f1SMark Fasheh 589cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 590cf8e06f1SMark Fasheh } 591cf8e06f1SMark Fasheh 592d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 593d680efe9SMark Fasheh { 594d680efe9SMark Fasheh __be64 inode_blkno_be; 595d680efe9SMark Fasheh 596d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 597d680efe9SMark Fasheh sizeof(__be64)); 598d680efe9SMark Fasheh 599d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 600d680efe9SMark Fasheh } 601d680efe9SMark Fasheh 60254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 60354a7e755SMark Fasheh { 60454a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 60554a7e755SMark Fasheh 60654a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 60754a7e755SMark Fasheh } 60854a7e755SMark Fasheh 609d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 610d680efe9SMark Fasheh u64 parent, struct inode *inode) 611d680efe9SMark Fasheh { 612d680efe9SMark Fasheh int len; 613d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 614d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 615d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 616d680efe9SMark Fasheh 617d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 618d680efe9SMark Fasheh 619d680efe9SMark Fasheh /* 620d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 621d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 622d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 623d680efe9SMark Fasheh * want error prints to show something without garbling the 624d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 625d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 626d680efe9SMark Fasheh * binary lock names. The stringified names have been a 627d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 628d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 629d680efe9SMark Fasheh * 630d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 631d680efe9SMark Fasheh * name size stays the same though - the last part is all 632d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 633d680efe9SMark Fasheh */ 634d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 635d680efe9SMark Fasheh "%c%016llx", 636d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 637d680efe9SMark Fasheh (long long)parent); 638d680efe9SMark Fasheh 639d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 640d680efe9SMark Fasheh 641d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 642d680efe9SMark Fasheh sizeof(__be64)); 643d680efe9SMark Fasheh 644d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 645d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 646d680efe9SMark Fasheh dl); 647ccd979bdSMark Fasheh } 648ccd979bdSMark Fasheh 649ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 650ccd979bdSMark Fasheh struct ocfs2_super *osb) 651ccd979bdSMark Fasheh { 652ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 653ccd979bdSMark Fasheh * once on it manually. */ 654ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 655d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 656d680efe9SMark Fasheh 0, res->l_name); 657ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 658ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 659ccd979bdSMark Fasheh } 660ccd979bdSMark Fasheh 661ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 662ccd979bdSMark Fasheh struct ocfs2_super *osb) 663ccd979bdSMark Fasheh { 664ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 665ccd979bdSMark Fasheh * once on it manually. */ 666ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 667d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 668d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 669ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 670ccd979bdSMark Fasheh } 671ccd979bdSMark Fasheh 6726ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 6736ca497a8Swengang wang struct ocfs2_super *osb) 6746ca497a8Swengang wang { 6756ca497a8Swengang wang /* nfs_sync lockres doesn't come from a slab so we call init 6766ca497a8Swengang wang * once on it manually. */ 6776ca497a8Swengang wang ocfs2_lock_res_init_once(res); 6786ca497a8Swengang wang ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 6796ca497a8Swengang wang ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 6806ca497a8Swengang wang &ocfs2_nfs_sync_lops, osb); 6816ca497a8Swengang wang } 6826ca497a8Swengang wang 6834882abebSGang He void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb) 6844882abebSGang He { 6854882abebSGang He struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; 6864882abebSGang He 6874882abebSGang He ocfs2_lock_res_init_once(lockres); 6884882abebSGang He ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name); 6894882abebSGang He ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS, 6904882abebSGang He &ocfs2_trim_fs_lops, osb); 6914882abebSGang He } 6924882abebSGang He 6934882abebSGang He void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb) 6944882abebSGang He { 6954882abebSGang He struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; 6964882abebSGang He 6974882abebSGang He ocfs2_simple_drop_lockres(osb, lockres); 6984882abebSGang He ocfs2_lock_res_free(lockres); 6994882abebSGang He } 7004882abebSGang He 70183273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 70283273932SSrinivas Eeda struct ocfs2_super *osb) 70383273932SSrinivas Eeda { 70483273932SSrinivas Eeda ocfs2_lock_res_init_once(res); 70583273932SSrinivas Eeda ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 70683273932SSrinivas Eeda ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 70783273932SSrinivas Eeda &ocfs2_orphan_scan_lops, osb); 70883273932SSrinivas Eeda } 70983273932SSrinivas Eeda 710cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 711cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 712cf8e06f1SMark Fasheh { 713cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 714cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 715cf8e06f1SMark Fasheh 716cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 717cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 718cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 719cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 720cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 721cf8e06f1SMark Fasheh fp); 722cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 723cf8e06f1SMark Fasheh } 724cf8e06f1SMark Fasheh 7259e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 7269e33d69fSJan Kara struct ocfs2_mem_dqinfo *info) 7279e33d69fSJan Kara { 7289e33d69fSJan Kara ocfs2_lock_res_init_once(lockres); 7299e33d69fSJan Kara ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 7309e33d69fSJan Kara 0, lockres->l_name); 7319e33d69fSJan Kara ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 7329e33d69fSJan Kara OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 7339e33d69fSJan Kara info); 7349e33d69fSJan Kara } 7359e33d69fSJan Kara 7368dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 7378dec98edSTao Ma struct ocfs2_super *osb, u64 ref_blkno, 7388dec98edSTao Ma unsigned int generation) 7398dec98edSTao Ma { 7408dec98edSTao Ma ocfs2_lock_res_init_once(lockres); 7418dec98edSTao Ma ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 7428dec98edSTao Ma generation, lockres->l_name); 7438dec98edSTao Ma ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 7448dec98edSTao Ma &ocfs2_refcount_block_lops, osb); 7458dec98edSTao Ma } 7468dec98edSTao Ma 747ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 748ccd979bdSMark Fasheh { 749ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 750ccd979bdSMark Fasheh return; 751ccd979bdSMark Fasheh 752ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 753ccd979bdSMark Fasheh 754ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 755ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 756ccd979bdSMark Fasheh res->l_name); 757ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 758ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 759ccd979bdSMark Fasheh res->l_name); 760ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 761ccd979bdSMark Fasheh "Lockres %s is locked\n", 762ccd979bdSMark Fasheh res->l_name); 763ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 764ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 765ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 766ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 767ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 768ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 769ccd979bdSMark Fasheh 770ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 771ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 772ccd979bdSMark Fasheh 773ccd979bdSMark Fasheh res->l_flags = 0UL; 774ccd979bdSMark Fasheh } 775ccd979bdSMark Fasheh 776439a36b8SEric Ren /* 777439a36b8SEric Ren * Keep a list of processes who have interest in a lockres. 778439a36b8SEric Ren * Note: this is now only uesed for check recursive cluster locking. 779439a36b8SEric Ren */ 780439a36b8SEric Ren static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, 781439a36b8SEric Ren struct ocfs2_lock_holder *oh) 782439a36b8SEric Ren { 783439a36b8SEric Ren INIT_LIST_HEAD(&oh->oh_list); 784439a36b8SEric Ren oh->oh_owner_pid = get_pid(task_pid(current)); 785439a36b8SEric Ren 786439a36b8SEric Ren spin_lock(&lockres->l_lock); 787439a36b8SEric Ren list_add_tail(&oh->oh_list, &lockres->l_holders); 788439a36b8SEric Ren spin_unlock(&lockres->l_lock); 789439a36b8SEric Ren } 790439a36b8SEric Ren 791439a36b8SEric Ren static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres, 792439a36b8SEric Ren struct ocfs2_lock_holder *oh) 793439a36b8SEric Ren { 794439a36b8SEric Ren spin_lock(&lockres->l_lock); 795439a36b8SEric Ren list_del(&oh->oh_list); 796439a36b8SEric Ren spin_unlock(&lockres->l_lock); 797439a36b8SEric Ren 798439a36b8SEric Ren put_pid(oh->oh_owner_pid); 799439a36b8SEric Ren } 800439a36b8SEric Ren 801439a36b8SEric Ren static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres) 802439a36b8SEric Ren { 803439a36b8SEric Ren struct ocfs2_lock_holder *oh; 804439a36b8SEric Ren struct pid *pid; 805439a36b8SEric Ren 806439a36b8SEric Ren /* look in the list of holders for one with the current task as owner */ 807439a36b8SEric Ren spin_lock(&lockres->l_lock); 808439a36b8SEric Ren pid = task_pid(current); 809439a36b8SEric Ren list_for_each_entry(oh, &lockres->l_holders, oh_list) { 810439a36b8SEric Ren if (oh->oh_owner_pid == pid) { 811439a36b8SEric Ren spin_unlock(&lockres->l_lock); 812439a36b8SEric Ren return 1; 813439a36b8SEric Ren } 814439a36b8SEric Ren } 815439a36b8SEric Ren spin_unlock(&lockres->l_lock); 816439a36b8SEric Ren 817439a36b8SEric Ren return 0; 818439a36b8SEric Ren } 819439a36b8SEric Ren 820ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 821ccd979bdSMark Fasheh int level) 822ccd979bdSMark Fasheh { 823ccd979bdSMark Fasheh BUG_ON(!lockres); 824ccd979bdSMark Fasheh 825ccd979bdSMark Fasheh switch(level) { 826bd3e7610SJoel Becker case DLM_LOCK_EX: 827ccd979bdSMark Fasheh lockres->l_ex_holders++; 828ccd979bdSMark Fasheh break; 829bd3e7610SJoel Becker case DLM_LOCK_PR: 830ccd979bdSMark Fasheh lockres->l_ro_holders++; 831ccd979bdSMark Fasheh break; 832ccd979bdSMark Fasheh default: 833ccd979bdSMark Fasheh BUG(); 834ccd979bdSMark Fasheh } 835ccd979bdSMark Fasheh } 836ccd979bdSMark Fasheh 837ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 838ccd979bdSMark Fasheh int level) 839ccd979bdSMark Fasheh { 840ccd979bdSMark Fasheh BUG_ON(!lockres); 841ccd979bdSMark Fasheh 842ccd979bdSMark Fasheh switch(level) { 843bd3e7610SJoel Becker case DLM_LOCK_EX: 844ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 845ccd979bdSMark Fasheh lockres->l_ex_holders--; 846ccd979bdSMark Fasheh break; 847bd3e7610SJoel Becker case DLM_LOCK_PR: 848ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 849ccd979bdSMark Fasheh lockres->l_ro_holders--; 850ccd979bdSMark Fasheh break; 851ccd979bdSMark Fasheh default: 852ccd979bdSMark Fasheh BUG(); 853ccd979bdSMark Fasheh } 854ccd979bdSMark Fasheh } 855ccd979bdSMark Fasheh 856ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 857ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 858ccd979bdSMark Fasheh * lock types are added. */ 859ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 860ccd979bdSMark Fasheh { 861bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 862ccd979bdSMark Fasheh 863bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 864bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 865bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 866bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 867ccd979bdSMark Fasheh return new_level; 868ccd979bdSMark Fasheh } 869ccd979bdSMark Fasheh 870ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 871ccd979bdSMark Fasheh unsigned long newflags) 872ccd979bdSMark Fasheh { 873800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 874ccd979bdSMark Fasheh 875ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 876ccd979bdSMark Fasheh 877ccd979bdSMark Fasheh lockres->l_flags = newflags; 878ccd979bdSMark Fasheh 879800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 880ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 881ccd979bdSMark Fasheh continue; 882ccd979bdSMark Fasheh 883ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 884ccd979bdSMark Fasheh mw->mw_status = 0; 885ccd979bdSMark Fasheh complete(&mw->mw_complete); 886ccd979bdSMark Fasheh } 887ccd979bdSMark Fasheh } 888ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 889ccd979bdSMark Fasheh { 890ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 891ccd979bdSMark Fasheh } 892ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 893ccd979bdSMark Fasheh unsigned long clear) 894ccd979bdSMark Fasheh { 895ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 896ccd979bdSMark Fasheh } 897ccd979bdSMark Fasheh 898ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 899ccd979bdSMark Fasheh { 900ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 901ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 902ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 903bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 904ccd979bdSMark Fasheh 905ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 906ccd979bdSMark Fasheh if (lockres->l_level <= 907ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 908bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 909ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 910ccd979bdSMark Fasheh } 911ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 912ccd979bdSMark Fasheh } 913ccd979bdSMark Fasheh 914ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 915ccd979bdSMark Fasheh { 916ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 917ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 918ccd979bdSMark Fasheh 919ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 920ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 921ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 922ccd979bdSMark Fasheh * update */ 923bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 924f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 925ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 926ccd979bdSMark Fasheh 927ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 928a1912826SSunil Mushran 929a1912826SSunil Mushran /* 930a1912826SSunil Mushran * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing 931a1912826SSunil Mushran * the OCFS2_LOCK_BUSY flag to prevent the dc thread from 932a1912826SSunil Mushran * downconverting the lock before the upconvert has fully completed. 933d1e78238SXue jiufei * Do not prevent the dc thread from downconverting if NONBLOCK lock 934d1e78238SXue jiufei * had already returned. 935a1912826SSunil Mushran */ 936d1e78238SXue jiufei if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED)) 937a1912826SSunil Mushran lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 938d1e78238SXue jiufei else 939d1e78238SXue jiufei lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED); 940a1912826SSunil Mushran 941ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 942ccd979bdSMark Fasheh } 943ccd979bdSMark Fasheh 944ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 945ccd979bdSMark Fasheh { 9463cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 947ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 948ccd979bdSMark Fasheh 949bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 950f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 951f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 952ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 953ccd979bdSMark Fasheh 954ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 955ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 956ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 957ccd979bdSMark Fasheh } 958ccd979bdSMark Fasheh 959ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 960ccd979bdSMark Fasheh int level) 961ccd979bdSMark Fasheh { 962ccd979bdSMark Fasheh int needs_downconvert = 0; 963ccd979bdSMark Fasheh 964ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 965ccd979bdSMark Fasheh 966ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 967ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 968ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 969ccd979bdSMark Fasheh * blocking. this also catches the case where we get 970ccd979bdSMark Fasheh * duplicate BASTs */ 971ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 972ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 973ccd979bdSMark Fasheh needs_downconvert = 1; 974ccd979bdSMark Fasheh 975ccd979bdSMark Fasheh lockres->l_blocking = level; 976ccd979bdSMark Fasheh } 977ccd979bdSMark Fasheh 9789b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", 9799b915181SSunil Mushran lockres->l_name, level, lockres->l_level, lockres->l_blocking, 9809b915181SSunil Mushran needs_downconvert); 9819b915181SSunil Mushran 9820b94a909SWengang Wang if (needs_downconvert) 9830b94a909SWengang Wang lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 984c1e8d35eSTao Ma mlog(0, "needs_downconvert = %d\n", needs_downconvert); 985ccd979bdSMark Fasheh return needs_downconvert; 986ccd979bdSMark Fasheh } 987ccd979bdSMark Fasheh 988de551246SJoel Becker /* 989de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 990de551246SJoel Becker * 991de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 992de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 993de551246SJoel Becker * for more details on the race. 994de551246SJoel Becker * 995de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 996de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 997de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 998de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 999de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 1000de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 1001de551246SJoel Becker * nothing. 1002de551246SJoel Becker * 1003de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 1004de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 1005de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 1006de551246SJoel Becker * window. 1007de551246SJoel Becker * 1008de551246SJoel Becker * [Example] 1009de551246SJoel Becker * 1010de551246SJoel Becker * ocfs2_meta_lock() 1011de551246SJoel Becker * ocfs2_cluster_lock() 1012de551246SJoel Becker * set BUSY 1013de551246SJoel Becker * set PENDING 1014de551246SJoel Becker * drop l_lock 1015de551246SJoel Becker * ocfs2_dlm_lock() 1016de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 1017de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 1018de551246SJoel Becker * take_l_lock 1019de551246SJoel Becker * !BUSY 1020de551246SJoel Becker * ocfs2_prepare_downconvert() 1021de551246SJoel Becker * set BUSY 1022de551246SJoel Becker * set PENDING 1023de551246SJoel Becker * drop l_lock 1024de551246SJoel Becker * take l_lock 1025de551246SJoel Becker * clear PENDING 1026de551246SJoel Becker * drop l_lock 1027de551246SJoel Becker * <window> 1028de551246SJoel Becker * ocfs2_dlm_lock() 1029de551246SJoel Becker * 1030de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 1031de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 1032de551246SJoel Becker * 1033de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 1034de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 1035de551246SJoel Becker * 1036de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 1037de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 1038de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 1039de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 1040de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 1041de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 1042de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 1043de551246SJoel Becker * ocfs2_prepare_downconvert(). 1044de551246SJoel Becker */ 1045de551246SJoel Becker 1046de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 1047de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 1048de551246SJoel Becker unsigned int generation, 1049de551246SJoel Becker struct ocfs2_super *osb) 1050de551246SJoel Becker { 1051de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1052de551246SJoel Becker 1053de551246SJoel Becker /* 1054de551246SJoel Becker * The ast and locking functions can race us here. The winner 1055de551246SJoel Becker * will clear pending, the loser will not. 1056de551246SJoel Becker */ 1057de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 1058de551246SJoel Becker (lockres->l_pending_gen != generation)) 1059de551246SJoel Becker return; 1060de551246SJoel Becker 1061de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 1062de551246SJoel Becker lockres->l_pending_gen++; 1063de551246SJoel Becker 1064de551246SJoel Becker /* 1065de551246SJoel Becker * The downconvert thread may have skipped us because we 1066de551246SJoel Becker * were PENDING. Wake it up. 1067de551246SJoel Becker */ 1068de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1069de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 1070de551246SJoel Becker } 1071de551246SJoel Becker 1072de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 1073de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1074de551246SJoel Becker unsigned int generation, 1075de551246SJoel Becker struct ocfs2_super *osb) 1076de551246SJoel Becker { 1077de551246SJoel Becker unsigned long flags; 1078de551246SJoel Becker 1079de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1080de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 1081de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1082de551246SJoel Becker } 1083de551246SJoel Becker 1084de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1085de551246SJoel Becker { 1086de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1087de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1088de551246SJoel Becker 1089de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1090de551246SJoel Becker 1091de551246SJoel Becker return lockres->l_pending_gen; 1092de551246SJoel Becker } 1093de551246SJoel Becker 1094c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1095ccd979bdSMark Fasheh { 1096a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1097aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1098ccd979bdSMark Fasheh int needs_downconvert; 1099ccd979bdSMark Fasheh unsigned long flags; 1100ccd979bdSMark Fasheh 1101bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 1102ccd979bdSMark Fasheh 11039b915181SSunil Mushran mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " 11049b915181SSunil Mushran "type %s\n", lockres->l_name, level, lockres->l_level, 1105aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 1106aa2623adSMark Fasheh 1107cf8e06f1SMark Fasheh /* 1108cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 1109cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 1110cf8e06f1SMark Fasheh */ 1111cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1112cf8e06f1SMark Fasheh return; 1113cf8e06f1SMark Fasheh 1114ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1115ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1116ccd979bdSMark Fasheh if (needs_downconvert) 1117ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 1118ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1119ccd979bdSMark Fasheh 1120d680efe9SMark Fasheh wake_up(&lockres->l_event); 1121d680efe9SMark Fasheh 112234d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1123ccd979bdSMark Fasheh } 1124ccd979bdSMark Fasheh 1125c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1126ccd979bdSMark Fasheh { 1127a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1128de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1129ccd979bdSMark Fasheh unsigned long flags; 11301693a5c0SDavid Teigland int status; 1131ccd979bdSMark Fasheh 1132ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1133ccd979bdSMark Fasheh 11341693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 11351693a5c0SDavid Teigland 11361693a5c0SDavid Teigland if (status == -EAGAIN) { 11371693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 11381693a5c0SDavid Teigland goto out; 11391693a5c0SDavid Teigland } 11401693a5c0SDavid Teigland 11411693a5c0SDavid Teigland if (status) { 11428f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 11431693a5c0SDavid Teigland lockres->l_name, status); 1144ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1145ccd979bdSMark Fasheh return; 1146ccd979bdSMark Fasheh } 1147ccd979bdSMark Fasheh 11489b915181SSunil Mushran mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " 11499b915181SSunil Mushran "level %d => %d\n", lockres->l_name, lockres->l_action, 11509b915181SSunil Mushran lockres->l_unlock_action, lockres->l_level, lockres->l_requested); 11519b915181SSunil Mushran 1152ccd979bdSMark Fasheh switch(lockres->l_action) { 1153ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 1154ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 1155e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1156ccd979bdSMark Fasheh break; 1157ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 1158ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 1159ccd979bdSMark Fasheh break; 1160ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 1161ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 1162ccd979bdSMark Fasheh break; 1163ccd979bdSMark Fasheh default: 11649b915181SSunil Mushran mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " 11659b915181SSunil Mushran "flags 0x%lx, unlock: %u\n", 1166e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 1167e92d57dfSMark Fasheh lockres->l_unlock_action); 1168ccd979bdSMark Fasheh BUG(); 1169ccd979bdSMark Fasheh } 11701693a5c0SDavid Teigland out: 1171ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 1172ccd979bdSMark Fasheh * can catch it. */ 1173ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1174ccd979bdSMark Fasheh 1175de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1176de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1177de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1178de551246SJoel Becker 1179de551246SJoel Becker /* 1180de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1181de551246SJoel Becker * know that dlm_lock() has been called :-) 1182de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1183de551246SJoel Becker * can use lockres->l_pending_gen. 1184de551246SJoel Becker */ 1185de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1186de551246SJoel Becker 1187ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1188d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1189ccd979bdSMark Fasheh } 1190ccd979bdSMark Fasheh 1191553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 1192553b5eb9SJoel Becker { 1193553b5eb9SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1194553b5eb9SJoel Becker unsigned long flags; 1195553b5eb9SJoel Becker 11969b915181SSunil Mushran mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 11979b915181SSunil Mushran lockres->l_name, lockres->l_unlock_action); 1198553b5eb9SJoel Becker 1199553b5eb9SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1200553b5eb9SJoel Becker if (error) { 1201553b5eb9SJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 1202553b5eb9SJoel Becker "unlock_action %d\n", error, lockres->l_name, 1203553b5eb9SJoel Becker lockres->l_unlock_action); 1204553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1205553b5eb9SJoel Becker return; 1206553b5eb9SJoel Becker } 1207553b5eb9SJoel Becker 1208553b5eb9SJoel Becker switch(lockres->l_unlock_action) { 1209553b5eb9SJoel Becker case OCFS2_UNLOCK_CANCEL_CONVERT: 1210553b5eb9SJoel Becker mlog(0, "Cancel convert success for %s\n", lockres->l_name); 1211553b5eb9SJoel Becker lockres->l_action = OCFS2_AST_INVALID; 1212553b5eb9SJoel Becker /* Downconvert thread may have requeued this lock, we 1213553b5eb9SJoel Becker * need to wake it. */ 1214553b5eb9SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1215553b5eb9SJoel Becker ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 1216553b5eb9SJoel Becker break; 1217553b5eb9SJoel Becker case OCFS2_UNLOCK_DROP_LOCK: 1218553b5eb9SJoel Becker lockres->l_level = DLM_LOCK_IV; 1219553b5eb9SJoel Becker break; 1220553b5eb9SJoel Becker default: 1221553b5eb9SJoel Becker BUG(); 1222553b5eb9SJoel Becker } 1223553b5eb9SJoel Becker 1224553b5eb9SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1225553b5eb9SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1226553b5eb9SJoel Becker wake_up(&lockres->l_event); 1227553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1228553b5eb9SJoel Becker } 1229553b5eb9SJoel Becker 1230553b5eb9SJoel Becker /* 1231553b5eb9SJoel Becker * This is the filesystem locking protocol. It provides the lock handling 1232553b5eb9SJoel Becker * hooks for the underlying DLM. It has a maximum version number. 1233553b5eb9SJoel Becker * The version number allows interoperability with systems running at 1234553b5eb9SJoel Becker * the same major number and an equal or smaller minor number. 1235553b5eb9SJoel Becker * 1236553b5eb9SJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 1237553b5eb9SJoel Becker * lock, orders them differently, does different things underneath a lock), 1238553b5eb9SJoel Becker * the version must be changed. The protocol is negotiated when joining 1239553b5eb9SJoel Becker * the dlm domain. A node may join the domain if its major version is 1240553b5eb9SJoel Becker * identical to all other nodes and its minor version is greater than 1241553b5eb9SJoel Becker * or equal to all other nodes. When its minor version is greater than 1242553b5eb9SJoel Becker * the other nodes, it will run at the minor version specified by the 1243553b5eb9SJoel Becker * other nodes. 1244553b5eb9SJoel Becker * 1245553b5eb9SJoel Becker * If a locking change is made that will not be compatible with older 1246553b5eb9SJoel Becker * versions, the major number must be increased and the minor version set 1247553b5eb9SJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 1248553b5eb9SJoel Becker * speaking to older versions, the minor version must be increased. If a 1249553b5eb9SJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 1250553b5eb9SJoel Becker * are just ignored by older versions), the version does not need to be 1251553b5eb9SJoel Becker * updated. 1252553b5eb9SJoel Becker */ 1253553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = { 1254553b5eb9SJoel Becker .lp_max_version = { 1255553b5eb9SJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 1256553b5eb9SJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 1257553b5eb9SJoel Becker }, 1258553b5eb9SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 1259553b5eb9SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 1260553b5eb9SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 1261553b5eb9SJoel Becker }; 1262553b5eb9SJoel Becker 1263553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void) 1264553b5eb9SJoel Becker { 1265553b5eb9SJoel Becker ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); 1266553b5eb9SJoel Becker } 1267553b5eb9SJoel Becker 1268ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1269ccd979bdSMark Fasheh int convert) 1270ccd979bdSMark Fasheh { 1271ccd979bdSMark Fasheh unsigned long flags; 1272ccd979bdSMark Fasheh 1273ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1274ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1275a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1276ccd979bdSMark Fasheh if (convert) 1277ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1278ccd979bdSMark Fasheh else 1279ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1280ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1281ccd979bdSMark Fasheh 1282ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1283ccd979bdSMark Fasheh } 1284ccd979bdSMark Fasheh 1285ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1286ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1287ccd979bdSMark Fasheh * to do the right thing in that case. 1288ccd979bdSMark Fasheh */ 1289ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1290ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1291ccd979bdSMark Fasheh int level, 1292bd3e7610SJoel Becker u32 dlm_flags) 1293ccd979bdSMark Fasheh { 1294ccd979bdSMark Fasheh int ret = 0; 1295ccd979bdSMark Fasheh unsigned long flags; 1296de551246SJoel Becker unsigned int gen; 1297ccd979bdSMark Fasheh 1298bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1299ccd979bdSMark Fasheh dlm_flags); 1300ccd979bdSMark Fasheh 1301ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1302ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1303ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1304ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1305ccd979bdSMark Fasheh goto bail; 1306ccd979bdSMark Fasheh } 1307ccd979bdSMark Fasheh 1308ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1309ccd979bdSMark Fasheh lockres->l_requested = level; 1310ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1311de551246SJoel Becker gen = lockres_set_pending(lockres); 1312ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1313ccd979bdSMark Fasheh 13144670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1315ccd979bdSMark Fasheh level, 1316ccd979bdSMark Fasheh &lockres->l_lksb, 1317ccd979bdSMark Fasheh dlm_flags, 1318ccd979bdSMark Fasheh lockres->l_name, 1319a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1320de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 13217431cd7eSJoel Becker if (ret) { 13227431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1323ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1324ccd979bdSMark Fasheh } 1325ccd979bdSMark Fasheh 13267431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1327ccd979bdSMark Fasheh 1328ccd979bdSMark Fasheh bail: 1329ccd979bdSMark Fasheh return ret; 1330ccd979bdSMark Fasheh } 1331ccd979bdSMark Fasheh 1332ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1333ccd979bdSMark Fasheh int flag) 1334ccd979bdSMark Fasheh { 1335ccd979bdSMark Fasheh unsigned long flags; 1336ccd979bdSMark Fasheh int ret; 1337ccd979bdSMark Fasheh 1338ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1339ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1340ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1341ccd979bdSMark Fasheh 1342ccd979bdSMark Fasheh return ret; 1343ccd979bdSMark Fasheh } 1344ccd979bdSMark Fasheh 1345ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1346ccd979bdSMark Fasheh 1347ccd979bdSMark Fasheh { 1348ccd979bdSMark Fasheh wait_event(lockres->l_event, 1349ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1350ccd979bdSMark Fasheh } 1351ccd979bdSMark Fasheh 1352ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1353ccd979bdSMark Fasheh 1354ccd979bdSMark Fasheh { 1355ccd979bdSMark Fasheh wait_event(lockres->l_event, 1356ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1357ccd979bdSMark Fasheh } 1358ccd979bdSMark Fasheh 1359ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1360ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1361ccd979bdSMark Fasheh * level will be compatible with it. */ 1362ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1363ccd979bdSMark Fasheh int wanted) 1364ccd979bdSMark Fasheh { 1365ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1366ccd979bdSMark Fasheh 1367ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1368ccd979bdSMark Fasheh } 1369ccd979bdSMark Fasheh 1370ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1371ccd979bdSMark Fasheh { 1372ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1373ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 13748ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1375ccd979bdSMark Fasheh } 1376ccd979bdSMark Fasheh 1377ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1378ccd979bdSMark Fasheh { 1379ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1380ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 138116735d02SWolfram Sang reinit_completion(&mw->mw_complete); 1382ccd979bdSMark Fasheh return mw->mw_status; 1383ccd979bdSMark Fasheh } 1384ccd979bdSMark Fasheh 1385ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1386ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1387ccd979bdSMark Fasheh unsigned long mask, 1388ccd979bdSMark Fasheh unsigned long goal) 1389ccd979bdSMark Fasheh { 1390ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1391ccd979bdSMark Fasheh 1392ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1393ccd979bdSMark Fasheh 1394ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1395ccd979bdSMark Fasheh mw->mw_mask = mask; 1396ccd979bdSMark Fasheh mw->mw_goal = goal; 1397ccd979bdSMark Fasheh } 1398ccd979bdSMark Fasheh 1399ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1400ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1401d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1402ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1403ccd979bdSMark Fasheh { 1404ccd979bdSMark Fasheh int ret = 0; 1405ccd979bdSMark Fasheh 1406d1e78238SXue jiufei assert_spin_locked(&lockres->l_lock); 1407ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1408ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1409ccd979bdSMark Fasheh ret = -EBUSY; 1410ccd979bdSMark Fasheh 1411ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1412ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1413ccd979bdSMark Fasheh } 1414d1e78238SXue jiufei 1415d1e78238SXue jiufei return ret; 1416d1e78238SXue jiufei } 1417d1e78238SXue jiufei 1418d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1419d1e78238SXue jiufei struct ocfs2_mask_waiter *mw) 1420d1e78238SXue jiufei { 1421d1e78238SXue jiufei unsigned long flags; 1422d1e78238SXue jiufei int ret = 0; 1423d1e78238SXue jiufei 1424d1e78238SXue jiufei spin_lock_irqsave(&lockres->l_lock, flags); 1425d1e78238SXue jiufei ret = __lockres_remove_mask_waiter(lockres, mw); 1426ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1427ccd979bdSMark Fasheh 1428ccd979bdSMark Fasheh return ret; 1429ccd979bdSMark Fasheh 1430ccd979bdSMark Fasheh } 1431ccd979bdSMark Fasheh 1432cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1433cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1434cf8e06f1SMark Fasheh { 1435cf8e06f1SMark Fasheh int ret; 1436cf8e06f1SMark Fasheh 1437cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1438cf8e06f1SMark Fasheh if (ret) 1439cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1440cf8e06f1SMark Fasheh else 1441cf8e06f1SMark Fasheh ret = mw->mw_status; 1442cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 144316735d02SWolfram Sang reinit_completion(&mw->mw_complete); 1444cf8e06f1SMark Fasheh return ret; 1445cf8e06f1SMark Fasheh } 1446cf8e06f1SMark Fasheh 1447cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1448ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1449ccd979bdSMark Fasheh int level, 1450bd3e7610SJoel Becker u32 lkm_flags, 1451cb25797dSJan Kara int arg_flags, 1452cb25797dSJan Kara int l_subclass, 1453cb25797dSJan Kara unsigned long caller_ip) 1454ccd979bdSMark Fasheh { 1455ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1456ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1457ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1458ccd979bdSMark Fasheh unsigned long flags; 1459de551246SJoel Becker unsigned int gen; 14601693a5c0SDavid Teigland int noqueue_attempted = 0; 1461d1e78238SXue jiufei int dlm_locked = 0; 1462b1b1e15eSTariq Saeed int kick_dc = 0; 1463ccd979bdSMark Fasheh 14642f2eca20Salex chen if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { 14652f2eca20Salex chen mlog_errno(-EINVAL); 14662f2eca20Salex chen return -EINVAL; 14672f2eca20Salex chen } 14682f2eca20Salex chen 1469ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1470ccd979bdSMark Fasheh 1471b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1472bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1473b80fc012SMark Fasheh 1474ccd979bdSMark Fasheh again: 1475ccd979bdSMark Fasheh wait = 0; 1476ccd979bdSMark Fasheh 1477a1912826SSunil Mushran spin_lock_irqsave(&lockres->l_lock, flags); 1478a1912826SSunil Mushran 1479ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1480ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1481a1912826SSunil Mushran goto unlock; 1482ccd979bdSMark Fasheh } 1483ccd979bdSMark Fasheh 1484ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1485ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1486ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1487ccd979bdSMark Fasheh 1488ccd979bdSMark Fasheh /* We only compare against the currently granted level 1489ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1490ccd979bdSMark Fasheh * we'll get caught below. */ 1491ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1492ccd979bdSMark Fasheh level > lockres->l_level) { 1493ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1494ccd979bdSMark Fasheh * them. */ 1495ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1496ccd979bdSMark Fasheh wait = 1; 1497ccd979bdSMark Fasheh goto unlock; 1498ccd979bdSMark Fasheh } 1499ccd979bdSMark Fasheh 1500a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1501a1912826SSunil Mushran /* 1502a1912826SSunil Mushran * We've upconverted. If the lock now has a level we can 1503a1912826SSunil Mushran * work with, we take it. If, however, the lock is not at the 1504a1912826SSunil Mushran * required level, we go thru the full cycle. One way this could 1505a1912826SSunil Mushran * happen is if a process requesting an upconvert to PR is 1506a1912826SSunil Mushran * closely followed by another requesting upconvert to an EX. 1507a1912826SSunil Mushran * If the process requesting EX lands here, we want it to 1508a1912826SSunil Mushran * continue attempting to upconvert and let the process 1509a1912826SSunil Mushran * requesting PR take the lock. 1510a1912826SSunil Mushran * If multiple processes request upconvert to PR, the first one 1511a1912826SSunil Mushran * here will take the lock. The others will have to go thru the 1512a1912826SSunil Mushran * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1513a1912826SSunil Mushran * downconvert request. 1514a1912826SSunil Mushran */ 1515a1912826SSunil Mushran if (level <= lockres->l_level) 1516a1912826SSunil Mushran goto update_holders; 1517a1912826SSunil Mushran } 1518a1912826SSunil Mushran 1519ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1520ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1521ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1522ccd979bdSMark Fasheh * another node */ 1523ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1524ccd979bdSMark Fasheh wait = 1; 1525ccd979bdSMark Fasheh goto unlock; 1526ccd979bdSMark Fasheh } 1527ccd979bdSMark Fasheh 1528ccd979bdSMark Fasheh if (level > lockres->l_level) { 15291693a5c0SDavid Teigland if (noqueue_attempted > 0) { 15301693a5c0SDavid Teigland ret = -EAGAIN; 15311693a5c0SDavid Teigland goto unlock; 15321693a5c0SDavid Teigland } 15331693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 15341693a5c0SDavid Teigland noqueue_attempted = 1; 15351693a5c0SDavid Teigland 1536ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1537ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1538ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1539ccd979bdSMark Fasheh 1540019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1541019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1542bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1543019d1b22SMark Fasheh } else { 1544ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1545bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1546019d1b22SMark Fasheh } 1547019d1b22SMark Fasheh 1548ccd979bdSMark Fasheh lockres->l_requested = level; 1549ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1550de551246SJoel Becker gen = lockres_set_pending(lockres); 1551ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1552ccd979bdSMark Fasheh 1553bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1554bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1555ccd979bdSMark Fasheh 15569b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1557ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1558ccd979bdSMark Fasheh 1559ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 15604670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1561ccd979bdSMark Fasheh level, 1562ccd979bdSMark Fasheh &lockres->l_lksb, 1563019d1b22SMark Fasheh lkm_flags, 1564ccd979bdSMark Fasheh lockres->l_name, 1565a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1566de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 15677431cd7eSJoel Becker if (ret) { 15687431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 15697431cd7eSJoel Becker (ret != -EAGAIN)) { 157024ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 15717431cd7eSJoel Becker ret, lockres); 1572ccd979bdSMark Fasheh } 1573ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1574ccd979bdSMark Fasheh goto out; 1575ccd979bdSMark Fasheh } 1576d1e78238SXue jiufei dlm_locked = 1; 1577ccd979bdSMark Fasheh 157873ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1579ccd979bdSMark Fasheh lockres->l_name); 1580ccd979bdSMark Fasheh 1581ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1582ccd979bdSMark Fasheh * complete our work regardless. */ 1583ccd979bdSMark Fasheh catch_signals = 0; 1584ccd979bdSMark Fasheh 1585ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1586ccd979bdSMark Fasheh goto again; 1587ccd979bdSMark Fasheh } 1588ccd979bdSMark Fasheh 1589a1912826SSunil Mushran update_holders: 1590ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1591ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1592ccd979bdSMark Fasheh 1593ccd979bdSMark Fasheh ret = 0; 1594ccd979bdSMark Fasheh unlock: 1595a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1596a1912826SSunil Mushran 1597b1b1e15eSTariq Saeed /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ 1598b1b1e15eSTariq Saeed kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); 1599b1b1e15eSTariq Saeed 1600ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1601b1b1e15eSTariq Saeed if (kick_dc) 1602b1b1e15eSTariq Saeed ocfs2_wake_downconvert_thread(osb); 1603ccd979bdSMark Fasheh out: 1604ccd979bdSMark Fasheh /* 1605ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1606ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1607ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1608ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1609ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1610ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1611ccd979bdSMark Fasheh */ 1612ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1613ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1614ccd979bdSMark Fasheh wait = 0; 1615d1e78238SXue jiufei spin_lock_irqsave(&lockres->l_lock, flags); 1616d1e78238SXue jiufei if (__lockres_remove_mask_waiter(lockres, &mw)) { 1617d1e78238SXue jiufei if (dlm_locked) 1618d1e78238SXue jiufei lockres_or_flags(lockres, 1619d1e78238SXue jiufei OCFS2_LOCK_NONBLOCK_FINISHED); 1620d1e78238SXue jiufei spin_unlock_irqrestore(&lockres->l_lock, flags); 1621ccd979bdSMark Fasheh ret = -EAGAIN; 1622d1e78238SXue jiufei } else { 1623d1e78238SXue jiufei spin_unlock_irqrestore(&lockres->l_lock, flags); 1624ccd979bdSMark Fasheh goto again; 1625ccd979bdSMark Fasheh } 1626d1e78238SXue jiufei } 1627ccd979bdSMark Fasheh if (wait) { 1628ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1629ccd979bdSMark Fasheh if (ret == 0) 1630ccd979bdSMark Fasheh goto again; 1631ccd979bdSMark Fasheh mlog_errno(ret); 1632ccd979bdSMark Fasheh } 16338ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1634ccd979bdSMark Fasheh 1635cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1636cb25797dSJan Kara if (!ret && lockres->l_lockdep_map.key != NULL) { 1637cb25797dSJan Kara if (level == DLM_LOCK_PR) 1638cb25797dSJan Kara rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1639cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1640cb25797dSJan Kara caller_ip); 1641cb25797dSJan Kara else 1642cb25797dSJan Kara rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1643cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1644cb25797dSJan Kara caller_ip); 1645cb25797dSJan Kara } 1646cb25797dSJan Kara #endif 1647ccd979bdSMark Fasheh return ret; 1648ccd979bdSMark Fasheh } 1649ccd979bdSMark Fasheh 1650cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1651ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1652cb25797dSJan Kara int level, 1653cb25797dSJan Kara u32 lkm_flags, 1654cb25797dSJan Kara int arg_flags) 1655cb25797dSJan Kara { 1656cb25797dSJan Kara return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1657cb25797dSJan Kara 0, _RET_IP_); 1658cb25797dSJan Kara } 1659cb25797dSJan Kara 1660cb25797dSJan Kara 1661cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1662cb25797dSJan Kara struct ocfs2_lock_res *lockres, 1663cb25797dSJan Kara int level, 1664cb25797dSJan Kara unsigned long caller_ip) 1665ccd979bdSMark Fasheh { 1666ccd979bdSMark Fasheh unsigned long flags; 1667ccd979bdSMark Fasheh 1668ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1669ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 167034d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1671ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1672cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1673cb25797dSJan Kara if (lockres->l_lockdep_map.key != NULL) 1674cb25797dSJan Kara rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1675cb25797dSJan Kara #endif 1676ccd979bdSMark Fasheh } 1677ccd979bdSMark Fasheh 1678da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1679d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 168024c19ef4SMark Fasheh int ex, 168124c19ef4SMark Fasheh int local) 1682ccd979bdSMark Fasheh { 1683bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1684ccd979bdSMark Fasheh unsigned long flags; 1685bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1686ccd979bdSMark Fasheh 1687ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1688ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1689ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1690ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1691ccd979bdSMark Fasheh 169224c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1693ccd979bdSMark Fasheh } 1694ccd979bdSMark Fasheh 1695ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1696ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1697ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1698ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1699ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1700ccd979bdSMark Fasheh * with creating a new lock resource. */ 1701ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1702ccd979bdSMark Fasheh { 1703ccd979bdSMark Fasheh int ret; 1704d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1705ccd979bdSMark Fasheh 1706ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1707ccd979bdSMark Fasheh 1708b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1709ccd979bdSMark Fasheh 1710ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1711ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1712ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1713ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1714ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1715ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1716ccd979bdSMark Fasheh * valid when we release the EX. */ 1717ccd979bdSMark Fasheh 171824c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1719ccd979bdSMark Fasheh if (ret) { 1720ccd979bdSMark Fasheh mlog_errno(ret); 1721ccd979bdSMark Fasheh goto bail; 1722ccd979bdSMark Fasheh } 1723ccd979bdSMark Fasheh 172424c19ef4SMark Fasheh /* 1725bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 172624c19ef4SMark Fasheh * don't use a generation in their lock names. 172724c19ef4SMark Fasheh */ 1728e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1729ccd979bdSMark Fasheh if (ret) { 1730ccd979bdSMark Fasheh mlog_errno(ret); 1731ccd979bdSMark Fasheh goto bail; 1732ccd979bdSMark Fasheh } 1733ccd979bdSMark Fasheh 173450008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 1735a8f24f1bSJoseph Qi if (ret) 173650008630STiger Yang mlog_errno(ret); 173750008630STiger Yang 1738ccd979bdSMark Fasheh bail: 1739ccd979bdSMark Fasheh return ret; 1740ccd979bdSMark Fasheh } 1741ccd979bdSMark Fasheh 1742ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1743ccd979bdSMark Fasheh { 1744ccd979bdSMark Fasheh int status, level; 1745ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1746c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1747ccd979bdSMark Fasheh 1748b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1749b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1750ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1751ccd979bdSMark Fasheh 1752c1e8d35eSTao Ma if (ocfs2_mount_local(osb)) 1753c271c5c2SSunil Mushran return 0; 1754c271c5c2SSunil Mushran 1755ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1756ccd979bdSMark Fasheh 1757bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1758ccd979bdSMark Fasheh 1759ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1760ccd979bdSMark Fasheh 0); 1761ccd979bdSMark Fasheh if (status < 0) 1762ccd979bdSMark Fasheh mlog_errno(status); 1763ccd979bdSMark Fasheh 1764ccd979bdSMark Fasheh return status; 1765ccd979bdSMark Fasheh } 1766ccd979bdSMark Fasheh 176706e7f13dSGang He int ocfs2_try_rw_lock(struct inode *inode, int write) 176806e7f13dSGang He { 176906e7f13dSGang He int status, level; 177006e7f13dSGang He struct ocfs2_lock_res *lockres; 177106e7f13dSGang He struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 177206e7f13dSGang He 177306e7f13dSGang He mlog(0, "inode %llu try to take %s RW lock\n", 177406e7f13dSGang He (unsigned long long)OCFS2_I(inode)->ip_blkno, 177506e7f13dSGang He write ? "EXMODE" : "PRMODE"); 177606e7f13dSGang He 177706e7f13dSGang He if (ocfs2_mount_local(osb)) 177806e7f13dSGang He return 0; 177906e7f13dSGang He 178006e7f13dSGang He lockres = &OCFS2_I(inode)->ip_rw_lockres; 178106e7f13dSGang He 178206e7f13dSGang He level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 178306e7f13dSGang He 178406e7f13dSGang He status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0); 178506e7f13dSGang He return status; 178606e7f13dSGang He } 178706e7f13dSGang He 1788ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1789ccd979bdSMark Fasheh { 1790bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1791ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1792c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1793ccd979bdSMark Fasheh 1794b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1795b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1796ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1797ccd979bdSMark Fasheh 1798c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1799ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1800ccd979bdSMark Fasheh } 1801ccd979bdSMark Fasheh 180250008630STiger Yang /* 180350008630STiger Yang * ocfs2_open_lock always get PR mode lock. 180450008630STiger Yang */ 180550008630STiger Yang int ocfs2_open_lock(struct inode *inode) 180650008630STiger Yang { 180750008630STiger Yang int status = 0; 180850008630STiger Yang struct ocfs2_lock_res *lockres; 180950008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 181050008630STiger Yang 181150008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 181250008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 181350008630STiger Yang 181403efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 181550008630STiger Yang goto out; 181650008630STiger Yang 181750008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 181850008630STiger Yang 181950008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1820bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 182150008630STiger Yang if (status < 0) 182250008630STiger Yang mlog_errno(status); 182350008630STiger Yang 182450008630STiger Yang out: 182550008630STiger Yang return status; 182650008630STiger Yang } 182750008630STiger Yang 182850008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 182950008630STiger Yang { 183050008630STiger Yang int status = 0, level; 183150008630STiger Yang struct ocfs2_lock_res *lockres; 183250008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 183350008630STiger Yang 183450008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 183550008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 183650008630STiger Yang write ? "EXMODE" : "PRMODE"); 183750008630STiger Yang 183803efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 183903efed8aSTiger Yang if (write) 184003efed8aSTiger Yang status = -EROFS; 184103efed8aSTiger Yang goto out; 184203efed8aSTiger Yang } 184303efed8aSTiger Yang 184450008630STiger Yang if (ocfs2_mount_local(osb)) 184550008630STiger Yang goto out; 184650008630STiger Yang 184750008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 184850008630STiger Yang 1849bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 185050008630STiger Yang 185150008630STiger Yang /* 185250008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1853bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 185450008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 185550008630STiger Yang * this inode is still in use. 185650008630STiger Yang */ 185750008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1858bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 185950008630STiger Yang 186050008630STiger Yang out: 186150008630STiger Yang return status; 186250008630STiger Yang } 186350008630STiger Yang 186450008630STiger Yang /* 186550008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 186650008630STiger Yang */ 186750008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 186850008630STiger Yang { 186950008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 187050008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 187150008630STiger Yang 187250008630STiger Yang mlog(0, "inode %llu drop open lock\n", 187350008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 187450008630STiger Yang 187550008630STiger Yang if (ocfs2_mount_local(osb)) 187650008630STiger Yang goto out; 187750008630STiger Yang 187850008630STiger Yang if(lockres->l_ro_holders) 187950008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1880bd3e7610SJoel Becker DLM_LOCK_PR); 188150008630STiger Yang if(lockres->l_ex_holders) 188250008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1883bd3e7610SJoel Becker DLM_LOCK_EX); 188450008630STiger Yang 188550008630STiger Yang out: 1886c1e8d35eSTao Ma return; 188750008630STiger Yang } 188850008630STiger Yang 1889cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1890cf8e06f1SMark Fasheh int level) 1891cf8e06f1SMark Fasheh { 1892cf8e06f1SMark Fasheh int ret; 1893cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1894cf8e06f1SMark Fasheh unsigned long flags; 1895cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1896cf8e06f1SMark Fasheh 1897cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1898cf8e06f1SMark Fasheh 1899cf8e06f1SMark Fasheh retry_cancel: 1900cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1901cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1902cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1903cf8e06f1SMark Fasheh if (ret) { 1904cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1905cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1906cf8e06f1SMark Fasheh if (ret < 0) { 1907cf8e06f1SMark Fasheh mlog_errno(ret); 1908cf8e06f1SMark Fasheh goto out; 1909cf8e06f1SMark Fasheh } 1910cf8e06f1SMark Fasheh goto retry_cancel; 1911cf8e06f1SMark Fasheh } 1912cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1913cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1914cf8e06f1SMark Fasheh 1915cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1916cf8e06f1SMark Fasheh goto retry_cancel; 1917cf8e06f1SMark Fasheh } 1918cf8e06f1SMark Fasheh 1919cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1920cf8e06f1SMark Fasheh /* 1921cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1922cf8e06f1SMark Fasheh * point to restarting the syscall. 1923cf8e06f1SMark Fasheh */ 1924cf8e06f1SMark Fasheh if (lockres->l_level == level) 1925cf8e06f1SMark Fasheh ret = 0; 1926cf8e06f1SMark Fasheh 1927cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1928cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1929cf8e06f1SMark Fasheh 1930cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1931cf8e06f1SMark Fasheh 1932cf8e06f1SMark Fasheh out: 1933cf8e06f1SMark Fasheh return ret; 1934cf8e06f1SMark Fasheh } 1935cf8e06f1SMark Fasheh 1936cf8e06f1SMark Fasheh /* 1937cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1938cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1939cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 19403ad2f3fbSDaniel Mack * separate path to the "low-level" dlm calls. In particular: 1941cf8e06f1SMark Fasheh * 1942cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1943cf8e06f1SMark Fasheh * what's been requested. 1944cf8e06f1SMark Fasheh * 1945cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1946cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1947cf8e06f1SMark Fasheh * the blocking list). 1948cf8e06f1SMark Fasheh * 1949cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1950cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1951cf8e06f1SMark Fasheh * request. 1952cf8e06f1SMark Fasheh * 1953cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1954cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1955cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1956cf8e06f1SMark Fasheh */ 1957cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1958cf8e06f1SMark Fasheh { 1959e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1960e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1961cf8e06f1SMark Fasheh unsigned long flags; 1962cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1963cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1964cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1965cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1966cf8e06f1SMark Fasheh 1967cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1968cf8e06f1SMark Fasheh 1969cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1970bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1971cf8e06f1SMark Fasheh mlog(ML_ERROR, 1972cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1973cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1974cf8e06f1SMark Fasheh lockres->l_level); 1975cf8e06f1SMark Fasheh return -EINVAL; 1976cf8e06f1SMark Fasheh } 1977cf8e06f1SMark Fasheh 1978cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1979cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1980cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1981cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1982cf8e06f1SMark Fasheh 1983cf8e06f1SMark Fasheh /* 1984cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1985cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1986cf8e06f1SMark Fasheh */ 1987e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1988cf8e06f1SMark Fasheh if (ret < 0) { 1989cf8e06f1SMark Fasheh mlog_errno(ret); 1990cf8e06f1SMark Fasheh goto out; 1991cf8e06f1SMark Fasheh } 1992cf8e06f1SMark Fasheh 1993cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1994cf8e06f1SMark Fasheh if (ret) { 1995cf8e06f1SMark Fasheh mlog_errno(ret); 1996cf8e06f1SMark Fasheh goto out; 1997cf8e06f1SMark Fasheh } 1998cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1999cf8e06f1SMark Fasheh } 2000cf8e06f1SMark Fasheh 2001cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 2002e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 2003cf8e06f1SMark Fasheh lockres->l_requested = level; 2004cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2005cf8e06f1SMark Fasheh 2006cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 2007cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2008cf8e06f1SMark Fasheh 20094670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 2010a796d286SJoel Becker lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 20117431cd7eSJoel Becker if (ret) { 20127431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 201324ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 2014cf8e06f1SMark Fasheh ret = -EINVAL; 2015cf8e06f1SMark Fasheh } 2016cf8e06f1SMark Fasheh 2017cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 2018cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 2019cf8e06f1SMark Fasheh goto out; 2020cf8e06f1SMark Fasheh } 2021cf8e06f1SMark Fasheh 2022cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 2023cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 2024cf8e06f1SMark Fasheh /* 2025cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 2026cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 2027cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 2028cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 2029cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 2030cf8e06f1SMark Fasheh * reboot. 2031cf8e06f1SMark Fasheh * 2032cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 2033cf8e06f1SMark Fasheh * though. We can't exit this function with an 2034cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 2035cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 2036cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 2037af901ca1SAndré Goddard Rosa * to just bubble success back up to the user. 2038cf8e06f1SMark Fasheh */ 2039cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 20401693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 20411693a5c0SDavid Teigland /* Trylock failed asynchronously */ 20421693a5c0SDavid Teigland BUG_ON(!trylock); 20431693a5c0SDavid Teigland ret = -EAGAIN; 2044cf8e06f1SMark Fasheh } 2045cf8e06f1SMark Fasheh 2046cf8e06f1SMark Fasheh out: 2047cf8e06f1SMark Fasheh 2048cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 2049cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 2050cf8e06f1SMark Fasheh return ret; 2051cf8e06f1SMark Fasheh } 2052cf8e06f1SMark Fasheh 2053cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 2054cf8e06f1SMark Fasheh { 2055cf8e06f1SMark Fasheh int ret; 2056de551246SJoel Becker unsigned int gen; 2057cf8e06f1SMark Fasheh unsigned long flags; 2058cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 2059cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 2060cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 2061cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 2062cf8e06f1SMark Fasheh 2063cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 2064cf8e06f1SMark Fasheh 2065cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 2066cf8e06f1SMark Fasheh return; 2067cf8e06f1SMark Fasheh 2068e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 2069cf8e06f1SMark Fasheh return; 2070cf8e06f1SMark Fasheh 2071cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 2072cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 2073cf8e06f1SMark Fasheh lockres->l_action); 2074cf8e06f1SMark Fasheh 2075cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2076cf8e06f1SMark Fasheh /* 2077cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 2078cf8e06f1SMark Fasheh */ 2079cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 2080bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 2081cf8e06f1SMark Fasheh 2082e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 2083cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 2084cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2085cf8e06f1SMark Fasheh 2086e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 2087cf8e06f1SMark Fasheh if (ret) { 2088cf8e06f1SMark Fasheh mlog_errno(ret); 2089cf8e06f1SMark Fasheh return; 2090cf8e06f1SMark Fasheh } 2091cf8e06f1SMark Fasheh 2092cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 2093cf8e06f1SMark Fasheh if (ret) 2094cf8e06f1SMark Fasheh mlog_errno(ret); 2095cf8e06f1SMark Fasheh } 2096cf8e06f1SMark Fasheh 209734d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 2098ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2099ccd979bdSMark Fasheh { 2100ccd979bdSMark Fasheh int kick = 0; 2101ccd979bdSMark Fasheh 2102ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 210334d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 2104ccd979bdSMark Fasheh * condition. */ 2105ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 2106ccd979bdSMark Fasheh switch(lockres->l_blocking) { 2107bd3e7610SJoel Becker case DLM_LOCK_EX: 2108ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 2109ccd979bdSMark Fasheh kick = 1; 2110ccd979bdSMark Fasheh break; 2111bd3e7610SJoel Becker case DLM_LOCK_PR: 2112ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 2113ccd979bdSMark Fasheh kick = 1; 2114ccd979bdSMark Fasheh break; 2115ccd979bdSMark Fasheh default: 2116ccd979bdSMark Fasheh BUG(); 2117ccd979bdSMark Fasheh } 2118ccd979bdSMark Fasheh } 2119ccd979bdSMark Fasheh 2120ccd979bdSMark Fasheh if (kick) 212134d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 2122ccd979bdSMark Fasheh } 2123ccd979bdSMark Fasheh 2124ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 2125ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 2126ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 2127ccd979bdSMark Fasheh 2128ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 2129ccd979bdSMark Fasheh * now. */ 2130ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 2131ccd979bdSMark Fasheh { 2132ccd979bdSMark Fasheh u64 res; 2133ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 2134ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 2135ccd979bdSMark Fasheh 2136ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 2137ccd979bdSMark Fasheh 2138ccd979bdSMark Fasheh return res; 2139ccd979bdSMark Fasheh } 2140ccd979bdSMark Fasheh 2141ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 2142ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 2143e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2144ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2145ccd979bdSMark Fasheh { 2146ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2147e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2148ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2149ccd979bdSMark Fasheh 2150a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2151ccd979bdSMark Fasheh 215224c19ef4SMark Fasheh /* 215324c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 215424c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 215524c19ef4SMark Fasheh * status. 215624c19ef4SMark Fasheh */ 215724c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 215824c19ef4SMark Fasheh lvb->lvb_version = 0; 215924c19ef4SMark Fasheh goto out; 216024c19ef4SMark Fasheh } 216124c19ef4SMark Fasheh 21624d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 2163ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2164ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 216503ab30f7SEric W. Biederman lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode)); 216603ab30f7SEric W. Biederman lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); 2167ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2168ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2169ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 2170ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2171ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 2172ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2173ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 2174ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2175ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 217615b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2177f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2178ccd979bdSMark Fasheh 217924c19ef4SMark Fasheh out: 2180ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2181ccd979bdSMark Fasheh } 2182ccd979bdSMark Fasheh 2183ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 2184ccd979bdSMark Fasheh u64 packed_time) 2185ccd979bdSMark Fasheh { 2186ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2187ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2188ccd979bdSMark Fasheh } 2189ccd979bdSMark Fasheh 2190ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2191ccd979bdSMark Fasheh { 2192ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2193e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2194ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2195ccd979bdSMark Fasheh 2196ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2197ccd979bdSMark Fasheh 2198a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2199ccd979bdSMark Fasheh 2200ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 2201ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2202ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2203ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2204ccd979bdSMark Fasheh 2205ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 220615b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2207ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 2208ca4d147eSHerbert Poetzl 2209ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 2210ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2211ccd979bdSMark Fasheh inode->i_blocks = 0; 2212ccd979bdSMark Fasheh else 22138110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 2214ccd979bdSMark Fasheh 221503ab30f7SEric W. Biederman i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid)); 221603ab30f7SEric W. Biederman i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); 2217ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2218bfe86848SMiklos Szeredi set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2219ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 2220ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 2221ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 2222ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 2223ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 2224ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 2225ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2226ccd979bdSMark Fasheh } 2227ccd979bdSMark Fasheh 2228f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2229f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 2230ccd979bdSMark Fasheh { 2231a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2232ccd979bdSMark Fasheh 22331c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 22341c520dfbSJoel Becker && lvb->lvb_version == OCFS2_LVB_VERSION 2235f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2236ccd979bdSMark Fasheh return 1; 2237ccd979bdSMark Fasheh return 0; 2238ccd979bdSMark Fasheh } 2239ccd979bdSMark Fasheh 2240ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 2241ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 2242ccd979bdSMark Fasheh * 2243ccd979bdSMark Fasheh * 0 means no refresh needed. 2244ccd979bdSMark Fasheh * 2245ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 2246ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 2247ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2248ccd979bdSMark Fasheh { 2249ccd979bdSMark Fasheh unsigned long flags; 2250ccd979bdSMark Fasheh int status = 0; 2251ccd979bdSMark Fasheh 2252ccd979bdSMark Fasheh refresh_check: 2253ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2254ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2255ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2256ccd979bdSMark Fasheh goto bail; 2257ccd979bdSMark Fasheh } 2258ccd979bdSMark Fasheh 2259ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2260ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2261ccd979bdSMark Fasheh 2262ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 2263ccd979bdSMark Fasheh goto refresh_check; 2264ccd979bdSMark Fasheh } 2265ccd979bdSMark Fasheh 2266ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 2267ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2268ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2269ccd979bdSMark Fasheh 2270ccd979bdSMark Fasheh status = 1; 2271ccd979bdSMark Fasheh bail: 2272c1e8d35eSTao Ma mlog(0, "status %d\n", status); 2273ccd979bdSMark Fasheh return status; 2274ccd979bdSMark Fasheh } 2275ccd979bdSMark Fasheh 2276ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 2277ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 2278ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2279ccd979bdSMark Fasheh int status) 2280ccd979bdSMark Fasheh { 2281ccd979bdSMark Fasheh unsigned long flags; 2282ccd979bdSMark Fasheh 2283ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2284ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2285ccd979bdSMark Fasheh if (!status) 2286ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2287ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2288ccd979bdSMark Fasheh 2289ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2290ccd979bdSMark Fasheh } 2291ccd979bdSMark Fasheh 2292ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 2293e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 2294ccd979bdSMark Fasheh struct buffer_head **bh) 2295ccd979bdSMark Fasheh { 2296ccd979bdSMark Fasheh int status = 0; 2297ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2298e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2299ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 2300c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2301ccd979bdSMark Fasheh 2302be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 2303be9e986bSMark Fasheh goto bail; 2304be9e986bSMark Fasheh 2305ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2306ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2307b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2308ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2309b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2310ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2311ccd979bdSMark Fasheh status = -ENOENT; 2312ccd979bdSMark Fasheh goto bail; 2313ccd979bdSMark Fasheh } 2314ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2315ccd979bdSMark Fasheh 2316ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2317ccd979bdSMark Fasheh goto bail; 2318ccd979bdSMark Fasheh 2319ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2320ccd979bdSMark Fasheh * for the inode metadata. */ 23218cb471e8SJoel Becker ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2322ccd979bdSMark Fasheh 232383418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 232483418978SMark Fasheh 2325be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2326b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2327b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2328ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2329ccd979bdSMark Fasheh } else { 2330ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2331ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2332b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2333ccd979bdSMark Fasheh if (status < 0) { 2334ccd979bdSMark Fasheh mlog_errno(status); 2335ccd979bdSMark Fasheh goto bail_refresh; 2336ccd979bdSMark Fasheh } 2337ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2338ccd979bdSMark Fasheh 2339ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2340b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2341b657c95cSJoel Becker * already checked that the inode block is sane. 2342ccd979bdSMark Fasheh * 2343ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2344ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2345ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2346ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2347ccd979bdSMark Fasheh * locks associated with it. */ 2348ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2349ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2350b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2351ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2352b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2353b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2354ccd979bdSMark Fasheh inode->i_generation); 2355ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2356ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2357b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2358b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2359b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2360ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2361ccd979bdSMark Fasheh 2362ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 23638ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2364ccd979bdSMark Fasheh } 2365ccd979bdSMark Fasheh 2366ccd979bdSMark Fasheh status = 0; 2367ccd979bdSMark Fasheh bail_refresh: 2368ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2369ccd979bdSMark Fasheh bail: 2370ccd979bdSMark Fasheh return status; 2371ccd979bdSMark Fasheh } 2372ccd979bdSMark Fasheh 2373ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2374ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2375ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2376ccd979bdSMark Fasheh { 2377ccd979bdSMark Fasheh int status; 2378ccd979bdSMark Fasheh 2379ccd979bdSMark Fasheh if (passed_bh) { 2380ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2381ccd979bdSMark Fasheh * returned bh. */ 2382ccd979bdSMark Fasheh *ret_bh = passed_bh; 2383ccd979bdSMark Fasheh get_bh(*ret_bh); 2384ccd979bdSMark Fasheh 2385ccd979bdSMark Fasheh return 0; 2386ccd979bdSMark Fasheh } 2387ccd979bdSMark Fasheh 2388b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2389ccd979bdSMark Fasheh if (status < 0) 2390ccd979bdSMark Fasheh mlog_errno(status); 2391ccd979bdSMark Fasheh 2392ccd979bdSMark Fasheh return status; 2393ccd979bdSMark Fasheh } 2394ccd979bdSMark Fasheh 2395ccd979bdSMark Fasheh /* 2396ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2397ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2398ccd979bdSMark Fasheh */ 2399cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode, 2400ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2401ccd979bdSMark Fasheh int ex, 2402cb25797dSJan Kara int arg_flags, 2403cb25797dSJan Kara int subclass) 2404ccd979bdSMark Fasheh { 2405bd3e7610SJoel Becker int status, level, acquired; 2406bd3e7610SJoel Becker u32 dlm_flags; 2407c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2408ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2409ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2410ccd979bdSMark Fasheh 2411b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2412b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2413ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2414ccd979bdSMark Fasheh 2415ccd979bdSMark Fasheh status = 0; 2416ccd979bdSMark Fasheh acquired = 0; 2417ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2418ccd979bdSMark Fasheh * rodevices. */ 2419ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2420ccd979bdSMark Fasheh if (ex) 2421ccd979bdSMark Fasheh status = -EROFS; 242203efed8aSTiger Yang goto getbh; 2423ccd979bdSMark Fasheh } 2424ccd979bdSMark Fasheh 2425439a36b8SEric Ren if ((arg_flags & OCFS2_META_LOCK_GETBH) || 2426439a36b8SEric Ren ocfs2_mount_local(osb)) 2427439a36b8SEric Ren goto update; 2428c271c5c2SSunil Mushran 2429ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2430553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2431ccd979bdSMark Fasheh 2432e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2433bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2434ccd979bdSMark Fasheh dlm_flags = 0; 2435ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2436bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2437ccd979bdSMark Fasheh 2438cb25797dSJan Kara status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2439cb25797dSJan Kara arg_flags, subclass, _RET_IP_); 2440ccd979bdSMark Fasheh if (status < 0) { 244141003a7bSZach Brown if (status != -EAGAIN) 2442ccd979bdSMark Fasheh mlog_errno(status); 2443ccd979bdSMark Fasheh goto bail; 2444ccd979bdSMark Fasheh } 2445ccd979bdSMark Fasheh 2446ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2447ccd979bdSMark Fasheh acquired = 1; 2448ccd979bdSMark Fasheh 2449ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2450ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2451ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2452ccd979bdSMark Fasheh * abort the operation. */ 2453ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2454553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2455ccd979bdSMark Fasheh 2456439a36b8SEric Ren update: 245724c19ef4SMark Fasheh /* 245824c19ef4SMark Fasheh * We only see this flag if we're being called from 245924c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 246024c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 246124c19ef4SMark Fasheh * and let the caller handle it. 246224c19ef4SMark Fasheh */ 246324c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 246424c19ef4SMark Fasheh status = 0; 2465c271c5c2SSunil Mushran if (lockres) 246624c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 246724c19ef4SMark Fasheh goto bail; 246824c19ef4SMark Fasheh } 246924c19ef4SMark Fasheh 2470ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2471e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2472ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2473ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2474ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2475e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2476ccd979bdSMark Fasheh if (status < 0) { 2477ccd979bdSMark Fasheh if (status != -ENOENT) 2478ccd979bdSMark Fasheh mlog_errno(status); 2479ccd979bdSMark Fasheh goto bail; 2480ccd979bdSMark Fasheh } 248103efed8aSTiger Yang getbh: 2482ccd979bdSMark Fasheh if (ret_bh) { 2483ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2484ccd979bdSMark Fasheh if (status < 0) { 2485ccd979bdSMark Fasheh mlog_errno(status); 2486ccd979bdSMark Fasheh goto bail; 2487ccd979bdSMark Fasheh } 2488ccd979bdSMark Fasheh } 2489ccd979bdSMark Fasheh 2490ccd979bdSMark Fasheh bail: 2491ccd979bdSMark Fasheh if (status < 0) { 2492ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2493ccd979bdSMark Fasheh brelse(*ret_bh); 2494ccd979bdSMark Fasheh *ret_bh = NULL; 2495ccd979bdSMark Fasheh } 2496ccd979bdSMark Fasheh if (acquired) 2497e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2498ccd979bdSMark Fasheh } 2499ccd979bdSMark Fasheh 2500ccd979bdSMark Fasheh if (local_bh) 2501ccd979bdSMark Fasheh brelse(local_bh); 2502ccd979bdSMark Fasheh 2503ccd979bdSMark Fasheh return status; 2504ccd979bdSMark Fasheh } 2505ccd979bdSMark Fasheh 2506ccd979bdSMark Fasheh /* 250734d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 250834d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 250934d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2510ccd979bdSMark Fasheh * 2511ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2512ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2513ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2514ccd979bdSMark Fasheh * 251534d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 251634d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 251734d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 251834d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 251934d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 252034d024f8SMark Fasheh * immediately retry the aop call. 2521ccd979bdSMark Fasheh */ 2522e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2523ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2524ccd979bdSMark Fasheh int ex, 2525ccd979bdSMark Fasheh struct page *page) 2526ccd979bdSMark Fasheh { 2527ccd979bdSMark Fasheh int ret; 2528ccd979bdSMark Fasheh 2529e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2530ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2531ccd979bdSMark Fasheh unlock_page(page); 2532ff26cc10SGang He /* 2533ff26cc10SGang He * If we can't get inode lock immediately, we should not return 2534ff26cc10SGang He * directly here, since this will lead to a softlockup problem. 2535ff26cc10SGang He * The method is to get a blocking lock and immediately unlock 2536ff26cc10SGang He * before returning, this can avoid CPU resource waste due to 2537ff26cc10SGang He * lots of retries, and benefits fairness in getting lock. 2538ff26cc10SGang He */ 2539ff26cc10SGang He if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2540ff26cc10SGang He ocfs2_inode_unlock(inode, ex); 2541ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2542ccd979bdSMark Fasheh } 2543ccd979bdSMark Fasheh 2544ccd979bdSMark Fasheh return ret; 2545ccd979bdSMark Fasheh } 2546ccd979bdSMark Fasheh 2547e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 25487f1a37e3STiger Yang struct vfsmount *vfsmnt, 25497f1a37e3STiger Yang int *level) 25507f1a37e3STiger Yang { 25517f1a37e3STiger Yang int ret; 25527f1a37e3STiger Yang 2553e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 25547f1a37e3STiger Yang if (ret < 0) { 25557f1a37e3STiger Yang mlog_errno(ret); 25567f1a37e3STiger Yang return ret; 25577f1a37e3STiger Yang } 25587f1a37e3STiger Yang 25597f1a37e3STiger Yang /* 25607f1a37e3STiger Yang * If we should update atime, we will get EX lock, 25617f1a37e3STiger Yang * otherwise we just get PR lock. 25627f1a37e3STiger Yang */ 25637f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 25647f1a37e3STiger Yang struct buffer_head *bh = NULL; 25657f1a37e3STiger Yang 2566e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2567e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 25687f1a37e3STiger Yang if (ret < 0) { 25697f1a37e3STiger Yang mlog_errno(ret); 25707f1a37e3STiger Yang return ret; 25717f1a37e3STiger Yang } 25727f1a37e3STiger Yang *level = 1; 25737f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 25747f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 25757f1a37e3STiger Yang if (bh) 25767f1a37e3STiger Yang brelse(bh); 25777f1a37e3STiger Yang } else 25787f1a37e3STiger Yang *level = 0; 25797f1a37e3STiger Yang 25807f1a37e3STiger Yang return ret; 25817f1a37e3STiger Yang } 25827f1a37e3STiger Yang 2583e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2584ccd979bdSMark Fasheh int ex) 2585ccd979bdSMark Fasheh { 2586bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2587e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2588c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2589ccd979bdSMark Fasheh 2590b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2591b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2592ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2593ccd979bdSMark Fasheh 2594c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2595c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2596ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2597ccd979bdSMark Fasheh } 2598ccd979bdSMark Fasheh 2599439a36b8SEric Ren /* 2600439a36b8SEric Ren * This _tracker variantes are introduced to deal with the recursive cluster 2601439a36b8SEric Ren * locking issue. The idea is to keep track of a lock holder on the stack of 2602439a36b8SEric Ren * the current process. If there's a lock holder on the stack, we know the 2603439a36b8SEric Ren * task context is already protected by cluster locking. Currently, they're 2604439a36b8SEric Ren * used in some VFS entry routines. 2605439a36b8SEric Ren * 2606439a36b8SEric Ren * return < 0 on error, return == 0 if there's no lock holder on the stack 2607439a36b8SEric Ren * before this call, return == 1 if this call would be a recursive locking. 2608439a36b8SEric Ren */ 2609439a36b8SEric Ren int ocfs2_inode_lock_tracker(struct inode *inode, 2610439a36b8SEric Ren struct buffer_head **ret_bh, 2611439a36b8SEric Ren int ex, 2612439a36b8SEric Ren struct ocfs2_lock_holder *oh) 2613439a36b8SEric Ren { 2614439a36b8SEric Ren int status; 2615439a36b8SEric Ren int arg_flags = 0, has_locked; 2616439a36b8SEric Ren struct ocfs2_lock_res *lockres; 2617439a36b8SEric Ren 2618439a36b8SEric Ren lockres = &OCFS2_I(inode)->ip_inode_lockres; 2619439a36b8SEric Ren has_locked = ocfs2_is_locked_by_me(lockres); 2620439a36b8SEric Ren /* Just get buffer head if the cluster lock has been taken */ 2621439a36b8SEric Ren if (has_locked) 2622439a36b8SEric Ren arg_flags = OCFS2_META_LOCK_GETBH; 2623439a36b8SEric Ren 2624439a36b8SEric Ren if (likely(!has_locked || ret_bh)) { 2625439a36b8SEric Ren status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags); 2626439a36b8SEric Ren if (status < 0) { 2627439a36b8SEric Ren if (status != -ENOENT) 2628439a36b8SEric Ren mlog_errno(status); 2629439a36b8SEric Ren return status; 2630439a36b8SEric Ren } 2631439a36b8SEric Ren } 2632439a36b8SEric Ren if (!has_locked) 2633439a36b8SEric Ren ocfs2_add_holder(lockres, oh); 2634439a36b8SEric Ren 2635439a36b8SEric Ren return has_locked; 2636439a36b8SEric Ren } 2637439a36b8SEric Ren 2638439a36b8SEric Ren void ocfs2_inode_unlock_tracker(struct inode *inode, 2639439a36b8SEric Ren int ex, 2640439a36b8SEric Ren struct ocfs2_lock_holder *oh, 2641439a36b8SEric Ren int had_lock) 2642439a36b8SEric Ren { 2643439a36b8SEric Ren struct ocfs2_lock_res *lockres; 2644439a36b8SEric Ren 2645439a36b8SEric Ren lockres = &OCFS2_I(inode)->ip_inode_lockres; 26468818efaaSEric Ren /* had_lock means that the currect process already takes the cluster 26478818efaaSEric Ren * lock previously. If had_lock is 1, we have nothing to do here, and 26488818efaaSEric Ren * it will get unlocked where we got the lock. 26498818efaaSEric Ren */ 2650439a36b8SEric Ren if (!had_lock) { 2651439a36b8SEric Ren ocfs2_remove_holder(lockres, oh); 2652439a36b8SEric Ren ocfs2_inode_unlock(inode, ex); 2653439a36b8SEric Ren } 2654439a36b8SEric Ren } 2655439a36b8SEric Ren 2656df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 265783273932SSrinivas Eeda { 265883273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 265983273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 266083273932SSrinivas Eeda int status = 0; 266183273932SSrinivas Eeda 2662df152c24SSunil Mushran if (ocfs2_is_hard_readonly(osb)) 2663df152c24SSunil Mushran return -EROFS; 2664df152c24SSunil Mushran 2665df152c24SSunil Mushran if (ocfs2_mount_local(osb)) 2666df152c24SSunil Mushran return 0; 2667df152c24SSunil Mushran 266883273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 2669df152c24SSunil Mushran status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 267083273932SSrinivas Eeda if (status < 0) 267183273932SSrinivas Eeda return status; 267283273932SSrinivas Eeda 267383273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 26741c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 26751c520dfbSJoel Becker lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 267683273932SSrinivas Eeda *seqno = be32_to_cpu(lvb->lvb_os_seqno); 26773211949fSSunil Mushran else 26783211949fSSunil Mushran *seqno = osb->osb_orphan_scan.os_seqno + 1; 26793211949fSSunil Mushran 268083273932SSrinivas Eeda return status; 268183273932SSrinivas Eeda } 268283273932SSrinivas Eeda 2683df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 268483273932SSrinivas Eeda { 268583273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 268683273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 268783273932SSrinivas Eeda 2688df152c24SSunil Mushran if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 268983273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 269083273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 269183273932SSrinivas Eeda lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 269283273932SSrinivas Eeda lvb->lvb_os_seqno = cpu_to_be32(seqno); 2693df152c24SSunil Mushran ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2694df152c24SSunil Mushran } 269583273932SSrinivas Eeda } 269683273932SSrinivas Eeda 2697ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2698ccd979bdSMark Fasheh int ex) 2699ccd979bdSMark Fasheh { 2700c271c5c2SSunil Mushran int status = 0; 2701bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2702ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2703ccd979bdSMark Fasheh 2704ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2705ccd979bdSMark Fasheh return -EROFS; 2706ccd979bdSMark Fasheh 2707c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2708c271c5c2SSunil Mushran goto bail; 2709c271c5c2SSunil Mushran 2710ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2711ccd979bdSMark Fasheh if (status < 0) { 2712ccd979bdSMark Fasheh mlog_errno(status); 2713ccd979bdSMark Fasheh goto bail; 2714ccd979bdSMark Fasheh } 2715ccd979bdSMark Fasheh 2716ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2717ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2718ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2719ccd979bdSMark Fasheh * everything is up to the caller :) */ 2720ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2721ccd979bdSMark Fasheh if (status) { 27228e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2723ccd979bdSMark Fasheh 2724ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2725ccd979bdSMark Fasheh 27263278bb74SJunxiao Bi if (status < 0) { 27273278bb74SJunxiao Bi ocfs2_cluster_unlock(osb, lockres, level); 2728ccd979bdSMark Fasheh mlog_errno(status); 27293278bb74SJunxiao Bi } 27308ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2731ccd979bdSMark Fasheh } 2732ccd979bdSMark Fasheh bail: 2733ccd979bdSMark Fasheh return status; 2734ccd979bdSMark Fasheh } 2735ccd979bdSMark Fasheh 2736ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2737ccd979bdSMark Fasheh int ex) 2738ccd979bdSMark Fasheh { 2739bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2740ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2741ccd979bdSMark Fasheh 2742c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2743ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2744ccd979bdSMark Fasheh } 2745ccd979bdSMark Fasheh 2746ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2747ccd979bdSMark Fasheh { 2748ccd979bdSMark Fasheh int status; 2749ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2750ccd979bdSMark Fasheh 2751ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2752ccd979bdSMark Fasheh return -EROFS; 2753ccd979bdSMark Fasheh 2754c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2755c271c5c2SSunil Mushran return 0; 2756c271c5c2SSunil Mushran 2757bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2758ccd979bdSMark Fasheh if (status < 0) 2759ccd979bdSMark Fasheh mlog_errno(status); 2760ccd979bdSMark Fasheh 2761ccd979bdSMark Fasheh return status; 2762ccd979bdSMark Fasheh } 2763ccd979bdSMark Fasheh 2764ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2765ccd979bdSMark Fasheh { 2766ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2767ccd979bdSMark Fasheh 2768c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2769bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2770ccd979bdSMark Fasheh } 2771ccd979bdSMark Fasheh 27726ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 27736ca497a8Swengang wang { 27746ca497a8Swengang wang int status; 27756ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 27766ca497a8Swengang wang 27776ca497a8Swengang wang if (ocfs2_is_hard_readonly(osb)) 27786ca497a8Swengang wang return -EROFS; 27796ca497a8Swengang wang 27806ca497a8Swengang wang if (ocfs2_mount_local(osb)) 27816ca497a8Swengang wang return 0; 27826ca497a8Swengang wang 27836ca497a8Swengang wang status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 27846ca497a8Swengang wang 0, 0); 27856ca497a8Swengang wang if (status < 0) 27866ca497a8Swengang wang mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 27876ca497a8Swengang wang 27886ca497a8Swengang wang return status; 27896ca497a8Swengang wang } 27906ca497a8Swengang wang 27916ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 27926ca497a8Swengang wang { 27936ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 27946ca497a8Swengang wang 27956ca497a8Swengang wang if (!ocfs2_mount_local(osb)) 27966ca497a8Swengang wang ocfs2_cluster_unlock(osb, lockres, 27976ca497a8Swengang wang ex ? LKM_EXMODE : LKM_PRMODE); 27986ca497a8Swengang wang } 27996ca497a8Swengang wang 28004882abebSGang He int ocfs2_trim_fs_lock(struct ocfs2_super *osb, 28014882abebSGang He struct ocfs2_trim_fs_info *info, int trylock) 28024882abebSGang He { 28034882abebSGang He int status; 28044882abebSGang He struct ocfs2_trim_fs_lvb *lvb; 28054882abebSGang He struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; 28064882abebSGang He 28074882abebSGang He if (info) 28084882abebSGang He info->tf_valid = 0; 28094882abebSGang He 28104882abebSGang He if (ocfs2_is_hard_readonly(osb)) 28114882abebSGang He return -EROFS; 28124882abebSGang He 28134882abebSGang He if (ocfs2_mount_local(osb)) 28144882abebSGang He return 0; 28154882abebSGang He 28164882abebSGang He status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 28174882abebSGang He trylock ? DLM_LKF_NOQUEUE : 0, 0); 28184882abebSGang He if (status < 0) { 28194882abebSGang He if (status != -EAGAIN) 28204882abebSGang He mlog_errno(status); 28214882abebSGang He return status; 28224882abebSGang He } 28234882abebSGang He 28244882abebSGang He if (info) { 28254882abebSGang He lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 28264882abebSGang He if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 28274882abebSGang He lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) { 28284882abebSGang He info->tf_valid = 1; 28294882abebSGang He info->tf_success = lvb->lvb_success; 28304882abebSGang He info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum); 28314882abebSGang He info->tf_start = be64_to_cpu(lvb->lvb_start); 28324882abebSGang He info->tf_len = be64_to_cpu(lvb->lvb_len); 28334882abebSGang He info->tf_minlen = be64_to_cpu(lvb->lvb_minlen); 28344882abebSGang He info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen); 28354882abebSGang He } 28364882abebSGang He } 28374882abebSGang He 28384882abebSGang He return status; 28394882abebSGang He } 28404882abebSGang He 28414882abebSGang He void ocfs2_trim_fs_unlock(struct ocfs2_super *osb, 28424882abebSGang He struct ocfs2_trim_fs_info *info) 28434882abebSGang He { 28444882abebSGang He struct ocfs2_trim_fs_lvb *lvb; 28454882abebSGang He struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres; 28464882abebSGang He 28474882abebSGang He if (ocfs2_mount_local(osb)) 28484882abebSGang He return; 28494882abebSGang He 28504882abebSGang He if (info) { 28514882abebSGang He lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 28524882abebSGang He lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION; 28534882abebSGang He lvb->lvb_success = info->tf_success; 28544882abebSGang He lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum); 28554882abebSGang He lvb->lvb_start = cpu_to_be64(info->tf_start); 28564882abebSGang He lvb->lvb_len = cpu_to_be64(info->tf_len); 28574882abebSGang He lvb->lvb_minlen = cpu_to_be64(info->tf_minlen); 28584882abebSGang He lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen); 28594882abebSGang He } 28604882abebSGang He 28614882abebSGang He ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 28624882abebSGang He } 28634882abebSGang He 2864d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2865d680efe9SMark Fasheh { 2866d680efe9SMark Fasheh int ret; 2867bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2868d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2869d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2870d680efe9SMark Fasheh 2871d680efe9SMark Fasheh BUG_ON(!dl); 2872d680efe9SMark Fasheh 287303efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 287403efed8aSTiger Yang if (ex) 2875d680efe9SMark Fasheh return -EROFS; 287603efed8aSTiger Yang return 0; 287703efed8aSTiger Yang } 2878d680efe9SMark Fasheh 2879c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2880c271c5c2SSunil Mushran return 0; 2881c271c5c2SSunil Mushran 2882d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2883d680efe9SMark Fasheh if (ret < 0) 2884d680efe9SMark Fasheh mlog_errno(ret); 2885d680efe9SMark Fasheh 2886d680efe9SMark Fasheh return ret; 2887d680efe9SMark Fasheh } 2888d680efe9SMark Fasheh 2889d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2890d680efe9SMark Fasheh { 2891bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2892d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2893d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2894d680efe9SMark Fasheh 289503efed8aSTiger Yang if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 2896d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2897d680efe9SMark Fasheh } 2898d680efe9SMark Fasheh 2899ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2900ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2901ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2902ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2903ccd979bdSMark Fasheh { 2904ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2905ccd979bdSMark Fasheh 2906ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2907ccd979bdSMark Fasheh 2908ccd979bdSMark Fasheh kfree(dlm_debug); 2909ccd979bdSMark Fasheh } 2910ccd979bdSMark Fasheh 2911ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2912ccd979bdSMark Fasheh { 2913ccd979bdSMark Fasheh if (dlm_debug) 2914ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2915ccd979bdSMark Fasheh } 2916ccd979bdSMark Fasheh 2917ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2918ccd979bdSMark Fasheh { 2919ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2920ccd979bdSMark Fasheh } 2921ccd979bdSMark Fasheh 2922ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2923ccd979bdSMark Fasheh { 2924ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2925ccd979bdSMark Fasheh 2926ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2927ccd979bdSMark Fasheh if (!dlm_debug) { 2928ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2929ccd979bdSMark Fasheh goto out; 2930ccd979bdSMark Fasheh } 2931ccd979bdSMark Fasheh 2932ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2933ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2934ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2935ccd979bdSMark Fasheh out: 2936ccd979bdSMark Fasheh return dlm_debug; 2937ccd979bdSMark Fasheh } 2938ccd979bdSMark Fasheh 2939ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2940ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2941ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2942ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2943ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2944ccd979bdSMark Fasheh }; 2945ccd979bdSMark Fasheh 2946ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2947ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2948ccd979bdSMark Fasheh { 2949ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2950ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2951ccd979bdSMark Fasheh 2952ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2953ccd979bdSMark Fasheh 2954ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2955ccd979bdSMark Fasheh /* discover the head of the list */ 2956ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2957ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2958ccd979bdSMark Fasheh break; 2959ccd979bdSMark Fasheh } 2960ccd979bdSMark Fasheh 2961ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2962ccd979bdSMark Fasheh * l_ops field. */ 2963ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2964ccd979bdSMark Fasheh ret = iter; 2965ccd979bdSMark Fasheh break; 2966ccd979bdSMark Fasheh } 2967ccd979bdSMark Fasheh } 2968ccd979bdSMark Fasheh 2969ccd979bdSMark Fasheh return ret; 2970ccd979bdSMark Fasheh } 2971ccd979bdSMark Fasheh 2972ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2973ccd979bdSMark Fasheh { 2974ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2975ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2976ccd979bdSMark Fasheh 2977ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2978ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2979ccd979bdSMark Fasheh if (iter) { 2980ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2981ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2982ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2983ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2984ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2985ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2986ccd979bdSMark Fasheh * in them. */ 2987ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2988ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2989ccd979bdSMark Fasheh } 2990ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2991ccd979bdSMark Fasheh 2992ccd979bdSMark Fasheh return iter; 2993ccd979bdSMark Fasheh } 2994ccd979bdSMark Fasheh 2995ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2996ccd979bdSMark Fasheh { 2997ccd979bdSMark Fasheh } 2998ccd979bdSMark Fasheh 2999ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 3000ccd979bdSMark Fasheh { 3001ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 3002ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 3003ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 3004ccd979bdSMark Fasheh 3005ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 3006ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 3007ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 3008ccd979bdSMark Fasheh if (iter) { 3009ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 3010ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 3011ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 3012ccd979bdSMark Fasheh } 3013ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 3014ccd979bdSMark Fasheh 3015ccd979bdSMark Fasheh return iter; 3016ccd979bdSMark Fasheh } 3017ccd979bdSMark Fasheh 30185bc970e8SSunil Mushran /* 30195bc970e8SSunil Mushran * Version is used by debugfs.ocfs2 to determine the format being used 30205bc970e8SSunil Mushran * 30215bc970e8SSunil Mushran * New in version 2 30225bc970e8SSunil Mushran * - Lock stats printed 30235bc970e8SSunil Mushran * New in version 3 30245bc970e8SSunil Mushran * - Max time in lock stats is in usecs (instead of nsecs) 30255bc970e8SSunil Mushran */ 30265bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3 3027ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 3028ccd979bdSMark Fasheh { 3029ccd979bdSMark Fasheh int i; 3030ccd979bdSMark Fasheh char *lvb; 3031ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 3032ccd979bdSMark Fasheh 3033ccd979bdSMark Fasheh if (!lockres) 3034ccd979bdSMark Fasheh return -EINVAL; 3035ccd979bdSMark Fasheh 3036d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 3037d680efe9SMark Fasheh 3038d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 3039d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 3040d680efe9SMark Fasheh lockres->l_name, 3041d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 3042d680efe9SMark Fasheh else 3043d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 3044d680efe9SMark Fasheh 3045d680efe9SMark Fasheh seq_printf(m, "%d\t" 3046ccd979bdSMark Fasheh "0x%lx\t" 3047ccd979bdSMark Fasheh "0x%x\t" 3048ccd979bdSMark Fasheh "0x%x\t" 3049ccd979bdSMark Fasheh "%u\t" 3050ccd979bdSMark Fasheh "%u\t" 3051ccd979bdSMark Fasheh "%d\t" 3052ccd979bdSMark Fasheh "%d\t", 3053ccd979bdSMark Fasheh lockres->l_level, 3054ccd979bdSMark Fasheh lockres->l_flags, 3055ccd979bdSMark Fasheh lockres->l_action, 3056ccd979bdSMark Fasheh lockres->l_unlock_action, 3057ccd979bdSMark Fasheh lockres->l_ro_holders, 3058ccd979bdSMark Fasheh lockres->l_ex_holders, 3059ccd979bdSMark Fasheh lockres->l_requested, 3060ccd979bdSMark Fasheh lockres->l_blocking); 3061ccd979bdSMark Fasheh 3062ccd979bdSMark Fasheh /* Dump the raw LVB */ 30638f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 3064ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 3065ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 3066ccd979bdSMark Fasheh 30678ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 30685bc970e8SSunil Mushran # define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets) 30695bc970e8SSunil Mushran # define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets) 30705bc970e8SSunil Mushran # define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail) 30715bc970e8SSunil Mushran # define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail) 30725bc970e8SSunil Mushran # define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total) 30735bc970e8SSunil Mushran # define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total) 30745bc970e8SSunil Mushran # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) 30755bc970e8SSunil Mushran # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) 30765bc970e8SSunil Mushran # define lock_refresh(_l) ((_l)->l_lock_refresh) 30778ddb7b00SSunil Mushran #else 30785bc970e8SSunil Mushran # define lock_num_prmode(_l) (0) 30795bc970e8SSunil Mushran # define lock_num_exmode(_l) (0) 30808ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 30818ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 3082dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 3083dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 30848ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 30858ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 30868ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 30878ddb7b00SSunil Mushran #endif 30888ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 30895bc970e8SSunil Mushran seq_printf(m, "%u\t" 30905bc970e8SSunil Mushran "%u\t" 30918ddb7b00SSunil Mushran "%u\t" 30928ddb7b00SSunil Mushran "%u\t" 30938ddb7b00SSunil Mushran "%llu\t" 30948ddb7b00SSunil Mushran "%llu\t" 30958ddb7b00SSunil Mushran "%u\t" 30968ddb7b00SSunil Mushran "%u\t" 30978ddb7b00SSunil Mushran "%u\t", 30988ddb7b00SSunil Mushran lock_num_prmode(lockres), 30998ddb7b00SSunil Mushran lock_num_exmode(lockres), 31008ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 31018ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 31028ddb7b00SSunil Mushran lock_total_prmode(lockres), 31038ddb7b00SSunil Mushran lock_total_exmode(lockres), 31048ddb7b00SSunil Mushran lock_max_prmode(lockres), 31058ddb7b00SSunil Mushran lock_max_exmode(lockres), 31068ddb7b00SSunil Mushran lock_refresh(lockres)); 31078ddb7b00SSunil Mushran 3108ccd979bdSMark Fasheh /* End the line */ 3109ccd979bdSMark Fasheh seq_printf(m, "\n"); 3110ccd979bdSMark Fasheh return 0; 3111ccd979bdSMark Fasheh } 3112ccd979bdSMark Fasheh 311390d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 3114ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 3115ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 3116ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 3117ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 3118ccd979bdSMark Fasheh }; 3119ccd979bdSMark Fasheh 3120ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 3121ccd979bdSMark Fasheh { 312233fa1d90SJoe Perches struct seq_file *seq = file->private_data; 3123ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 3124ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 3125ccd979bdSMark Fasheh 3126ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 3127ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 3128ccd979bdSMark Fasheh return seq_release_private(inode, file); 3129ccd979bdSMark Fasheh } 3130ccd979bdSMark Fasheh 3131ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 3132ccd979bdSMark Fasheh { 3133ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 3134ccd979bdSMark Fasheh struct ocfs2_super *osb; 3135ccd979bdSMark Fasheh 31361848cb55SRob Jones priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv)); 3137ccd979bdSMark Fasheh if (!priv) { 31381848cb55SRob Jones mlog_errno(-ENOMEM); 31391848cb55SRob Jones return -ENOMEM; 3140ccd979bdSMark Fasheh } 31411848cb55SRob Jones 31428e18e294STheodore Ts'o osb = inode->i_private; 3143ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 3144ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 3145ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 3146ccd979bdSMark Fasheh 3147ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 3148ccd979bdSMark Fasheh priv->p_dlm_debug); 3149ccd979bdSMark Fasheh 31501848cb55SRob Jones return 0; 3151ccd979bdSMark Fasheh } 3152ccd979bdSMark Fasheh 31534b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 3154ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 3155ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 3156ccd979bdSMark Fasheh .read = seq_read, 3157ccd979bdSMark Fasheh .llseek = seq_lseek, 3158ccd979bdSMark Fasheh }; 3159ccd979bdSMark Fasheh 3160ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 3161ccd979bdSMark Fasheh { 3162ccd979bdSMark Fasheh int ret = 0; 3163ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 3164ccd979bdSMark Fasheh 3165ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 3166ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 3167ccd979bdSMark Fasheh osb->osb_debug_root, 3168ccd979bdSMark Fasheh osb, 3169ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 31708f443e23SLinus Torvalds if (!dlm_debug->d_locking_state) { 3171ccd979bdSMark Fasheh ret = -EINVAL; 3172ccd979bdSMark Fasheh mlog(ML_ERROR, 3173ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 3174ccd979bdSMark Fasheh goto out; 3175ccd979bdSMark Fasheh } 3176ccd979bdSMark Fasheh 3177ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 3178ccd979bdSMark Fasheh out: 3179ccd979bdSMark Fasheh return ret; 3180ccd979bdSMark Fasheh } 3181ccd979bdSMark Fasheh 3182ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 3183ccd979bdSMark Fasheh { 3184ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 3185ccd979bdSMark Fasheh 3186ccd979bdSMark Fasheh if (dlm_debug) { 3187ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 3188ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 3189ccd979bdSMark Fasheh } 3190ccd979bdSMark Fasheh } 3191ccd979bdSMark Fasheh 3192ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 3193ccd979bdSMark Fasheh { 3194c271c5c2SSunil Mushran int status = 0; 31954670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 3196ccd979bdSMark Fasheh 31970abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 31980abd6d18SMark Fasheh osb->node_num = 0; 3199c271c5c2SSunil Mushran goto local; 32000abd6d18SMark Fasheh } 3201c271c5c2SSunil Mushran 3202ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 3203ccd979bdSMark Fasheh if (status < 0) { 3204ccd979bdSMark Fasheh mlog_errno(status); 3205ccd979bdSMark Fasheh goto bail; 3206ccd979bdSMark Fasheh } 3207ccd979bdSMark Fasheh 320834d024f8SMark Fasheh /* launch downconvert thread */ 32095afc44e2SJoseph Qi osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s", 32105afc44e2SJoseph Qi osb->uuid_str); 321134d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 321234d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 321334d024f8SMark Fasheh osb->dc_task = NULL; 3214ccd979bdSMark Fasheh mlog_errno(status); 3215ccd979bdSMark Fasheh goto bail; 3216ccd979bdSMark Fasheh } 3217ccd979bdSMark Fasheh 3218ccd979bdSMark Fasheh /* for now, uuid == domain */ 32199c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 3220c74a3bddSGoldwyn Rodrigues osb->osb_cluster_name, 3221c74a3bddSGoldwyn Rodrigues strlen(osb->osb_cluster_name), 32229c6c877cSJoel Becker osb->uuid_str, 32234670c46dSJoel Becker strlen(osb->uuid_str), 3224553b5eb9SJoel Becker &lproto, ocfs2_do_node_down, osb, 32254670c46dSJoel Becker &conn); 32264670c46dSJoel Becker if (status) { 3227ccd979bdSMark Fasheh mlog_errno(status); 3228ccd979bdSMark Fasheh goto bail; 3229ccd979bdSMark Fasheh } 3230ccd979bdSMark Fasheh 32313e834151SGoldwyn Rodrigues status = ocfs2_cluster_this_node(conn, &osb->node_num); 32320abd6d18SMark Fasheh if (status < 0) { 32330abd6d18SMark Fasheh mlog_errno(status); 32340abd6d18SMark Fasheh mlog(ML_ERROR, 32350abd6d18SMark Fasheh "could not find this host's node number\n"); 3236286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 32370abd6d18SMark Fasheh goto bail; 32380abd6d18SMark Fasheh } 32390abd6d18SMark Fasheh 3240c271c5c2SSunil Mushran local: 3241ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3242ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 32436ca497a8Swengang wang ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 324483273932SSrinivas Eeda ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3245ccd979bdSMark Fasheh 32464670c46dSJoel Becker osb->cconn = conn; 3247ccd979bdSMark Fasheh bail: 3248ccd979bdSMark Fasheh if (status < 0) { 3249ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 325034d024f8SMark Fasheh if (osb->dc_task) 325134d024f8SMark Fasheh kthread_stop(osb->dc_task); 3252ccd979bdSMark Fasheh } 3253ccd979bdSMark Fasheh 3254ccd979bdSMark Fasheh return status; 3255ccd979bdSMark Fasheh } 3256ccd979bdSMark Fasheh 3257286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3258286eaa95SJoel Becker int hangup_pending) 3259ccd979bdSMark Fasheh { 3260ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 3261ccd979bdSMark Fasheh 32624670c46dSJoel Becker /* 32634670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 32644670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 32654670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 32664670c46dSJoel Becker */ 32674670c46dSJoel Becker 326834d024f8SMark Fasheh if (osb->dc_task) { 326934d024f8SMark Fasheh kthread_stop(osb->dc_task); 327034d024f8SMark Fasheh osb->dc_task = NULL; 3271ccd979bdSMark Fasheh } 3272ccd979bdSMark Fasheh 3273ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 3274ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 32756ca497a8Swengang wang ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 327683273932SSrinivas Eeda ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3277ccd979bdSMark Fasheh 3278286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 32794670c46dSJoel Becker osb->cconn = NULL; 3280ccd979bdSMark Fasheh 3281ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 3282ccd979bdSMark Fasheh } 3283ccd979bdSMark Fasheh 3284ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 32850d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 3286ccd979bdSMark Fasheh { 32877431cd7eSJoel Becker int ret; 3288ccd979bdSMark Fasheh unsigned long flags; 3289bd3e7610SJoel Becker u32 lkm_flags = 0; 3290ccd979bdSMark Fasheh 3291ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 3292ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3293ccd979bdSMark Fasheh goto out; 3294ccd979bdSMark Fasheh 3295b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3296bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 3297b80fc012SMark Fasheh 3298ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3299ccd979bdSMark Fasheh 3300ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3301ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 3302ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3303ccd979bdSMark Fasheh 3304ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3305ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3306ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 3307ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 3308ccd979bdSMark Fasheh lockres->l_unlock_action); 3309ccd979bdSMark Fasheh 3310ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3311ccd979bdSMark Fasheh 3312ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 3313ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 3314ccd979bdSMark Fasheh * future? */ 3315ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3316ccd979bdSMark Fasheh 3317ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3318ccd979bdSMark Fasheh } 3319ccd979bdSMark Fasheh 33200d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 33210d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3322bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 33230d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 33240d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 33250d5dc6c2SMark Fasheh } 3326ccd979bdSMark Fasheh 3327ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 3328ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3329ccd979bdSMark Fasheh lockres->l_name); 3330ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3331ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3332ccd979bdSMark Fasheh 3333ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3334ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3335ccd979bdSMark Fasheh goto out; 3336ccd979bdSMark Fasheh } 3337ccd979bdSMark Fasheh 3338ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3339ccd979bdSMark Fasheh 3340ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 3341ccd979bdSMark Fasheh * fire. */ 3342ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3343ccd979bdSMark Fasheh 3344ccd979bdSMark Fasheh /* is this necessary? */ 3345ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3346ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3347ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3348ccd979bdSMark Fasheh 3349ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3350ccd979bdSMark Fasheh 3351a796d286SJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 33527431cd7eSJoel Becker if (ret) { 33537431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3354ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3355cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3356ccd979bdSMark Fasheh BUG(); 3357ccd979bdSMark Fasheh } 335873ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3359ccd979bdSMark Fasheh lockres->l_name); 3360ccd979bdSMark Fasheh 3361ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3362ccd979bdSMark Fasheh out: 3363ccd979bdSMark Fasheh return 0; 3364ccd979bdSMark Fasheh } 3365ccd979bdSMark Fasheh 336684d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 336784d86f83SJan Kara struct ocfs2_lock_res *lockres); 336884d86f83SJan Kara 3369ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 3370ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 337134d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 3372ccd979bdSMark Fasheh * it safe to drop. 3373ccd979bdSMark Fasheh * 3374ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 337584d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, 337684d86f83SJan Kara struct ocfs2_lock_res *lockres) 3377ccd979bdSMark Fasheh { 3378ccd979bdSMark Fasheh int status; 3379ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 338084d86f83SJan Kara unsigned long flags, flags2; 3381ccd979bdSMark Fasheh 3382ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 3383ccd979bdSMark Fasheh 3384ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3385ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 338684d86f83SJan Kara if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) { 338784d86f83SJan Kara /* 338884d86f83SJan Kara * We know the downconvert is queued but not in progress 338984d86f83SJan Kara * because we are the downconvert thread and processing 339084d86f83SJan Kara * different lock. So we can just remove the lock from the 339184d86f83SJan Kara * queue. This is not only an optimization but also a way 339284d86f83SJan Kara * to avoid the following deadlock: 339384d86f83SJan Kara * ocfs2_dentry_post_unlock() 339484d86f83SJan Kara * ocfs2_dentry_lock_put() 339584d86f83SJan Kara * ocfs2_drop_dentry_lock() 339684d86f83SJan Kara * iput() 339784d86f83SJan Kara * ocfs2_evict_inode() 339884d86f83SJan Kara * ocfs2_clear_inode() 339984d86f83SJan Kara * ocfs2_mark_lockres_freeing() 340084d86f83SJan Kara * ... blocks waiting for OCFS2_LOCK_QUEUED 340184d86f83SJan Kara * since we are the downconvert thread which 340284d86f83SJan Kara * should clear the flag. 340384d86f83SJan Kara */ 340484d86f83SJan Kara spin_unlock_irqrestore(&lockres->l_lock, flags); 340584d86f83SJan Kara spin_lock_irqsave(&osb->dc_task_lock, flags2); 340684d86f83SJan Kara list_del_init(&lockres->l_blocked_list); 340784d86f83SJan Kara osb->blocked_lock_count--; 340884d86f83SJan Kara spin_unlock_irqrestore(&osb->dc_task_lock, flags2); 340984d86f83SJan Kara /* 341084d86f83SJan Kara * Warn if we recurse into another post_unlock call. Strictly 341184d86f83SJan Kara * speaking it isn't a problem but we need to be careful if 341284d86f83SJan Kara * that happens (stack overflow, deadlocks, ...) so warn if 341384d86f83SJan Kara * ocfs2 grows a path for which this can happen. 341484d86f83SJan Kara */ 341584d86f83SJan Kara WARN_ON_ONCE(lockres->l_ops->post_unlock); 341684d86f83SJan Kara /* Since the lock is freeing we don't do much in the fn below */ 341784d86f83SJan Kara ocfs2_process_blocked_lock(osb, lockres); 341884d86f83SJan Kara return; 341984d86f83SJan Kara } 3420ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3421ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3422ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3423ccd979bdSMark Fasheh 3424ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3425ccd979bdSMark Fasheh 3426ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 3427ccd979bdSMark Fasheh if (status) 3428ccd979bdSMark Fasheh mlog_errno(status); 3429ccd979bdSMark Fasheh 3430ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3431ccd979bdSMark Fasheh } 3432ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3433ccd979bdSMark Fasheh } 3434ccd979bdSMark Fasheh 3435d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3436d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3437d680efe9SMark Fasheh { 3438d680efe9SMark Fasheh int ret; 3439d680efe9SMark Fasheh 344084d86f83SJan Kara ocfs2_mark_lockres_freeing(osb, lockres); 34410d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 3442d680efe9SMark Fasheh if (ret) 3443d680efe9SMark Fasheh mlog_errno(ret); 3444d680efe9SMark Fasheh } 3445d680efe9SMark Fasheh 3446ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3447ccd979bdSMark Fasheh { 3448d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3449d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 34506ca497a8Swengang wang ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 345183273932SSrinivas Eeda ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3452ccd979bdSMark Fasheh } 3453ccd979bdSMark Fasheh 3454ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 3455ccd979bdSMark Fasheh { 3456ccd979bdSMark Fasheh int status, err; 3457ccd979bdSMark Fasheh 3458ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 3459ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 3460ccd979bdSMark Fasheh 3461ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 346250008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 3463ccd979bdSMark Fasheh if (err < 0) 3464ccd979bdSMark Fasheh mlog_errno(err); 3465ccd979bdSMark Fasheh 3466ccd979bdSMark Fasheh status = err; 3467ccd979bdSMark Fasheh 3468ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3469e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 3470ccd979bdSMark Fasheh if (err < 0) 3471ccd979bdSMark Fasheh mlog_errno(err); 3472ccd979bdSMark Fasheh if (err < 0 && !status) 3473ccd979bdSMark Fasheh status = err; 3474ccd979bdSMark Fasheh 3475ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 34760d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3477ccd979bdSMark Fasheh if (err < 0) 3478ccd979bdSMark Fasheh mlog_errno(err); 3479ccd979bdSMark Fasheh if (err < 0 && !status) 3480ccd979bdSMark Fasheh status = err; 3481ccd979bdSMark Fasheh 3482ccd979bdSMark Fasheh return status; 3483ccd979bdSMark Fasheh } 3484ccd979bdSMark Fasheh 3485de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3486ccd979bdSMark Fasheh int new_level) 3487ccd979bdSMark Fasheh { 3488ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3489ccd979bdSMark Fasheh 3490bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3491ccd979bdSMark Fasheh 3492ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 34939b915181SSunil Mushran mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " 34949b915181SSunil Mushran "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " 34959b915181SSunil Mushran "block %d, pgen %d\n", lockres->l_name, lockres->l_level, 34969b915181SSunil Mushran new_level, list_empty(&lockres->l_blocked_list), 34979b915181SSunil Mushran list_empty(&lockres->l_mask_waiters), lockres->l_type, 34989b915181SSunil Mushran lockres->l_flags, lockres->l_ro_holders, 34999b915181SSunil Mushran lockres->l_ex_holders, lockres->l_action, 35009b915181SSunil Mushran lockres->l_unlock_action, lockres->l_requested, 35019b915181SSunil Mushran lockres->l_blocking, lockres->l_pending_gen); 3502ccd979bdSMark Fasheh BUG(); 3503ccd979bdSMark Fasheh } 3504ccd979bdSMark Fasheh 35059b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", 35069b915181SSunil Mushran lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); 3507ccd979bdSMark Fasheh 3508ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3509ccd979bdSMark Fasheh lockres->l_requested = new_level; 3510ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3511de551246SJoel Becker return lockres_set_pending(lockres); 3512ccd979bdSMark Fasheh } 3513ccd979bdSMark Fasheh 3514ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3515ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3516ccd979bdSMark Fasheh int new_level, 3517de551246SJoel Becker int lvb, 3518de551246SJoel Becker unsigned int generation) 3519ccd979bdSMark Fasheh { 3520bd3e7610SJoel Becker int ret; 3521bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3522ccd979bdSMark Fasheh 35239b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 35249b915181SSunil Mushran lockres->l_level, new_level); 35259b915181SSunil Mushran 3526e7ee2c08SEric Ren /* 3527e7ee2c08SEric Ren * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always 3528e7ee2c08SEric Ren * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that 3529e7ee2c08SEric Ren * we can recover correctly from node failure. Otherwise, we may get 3530e7ee2c08SEric Ren * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. 3531e7ee2c08SEric Ren */ 3532e7ee2c08SEric Ren if (!ocfs2_is_o2cb_active() && 3533e7ee2c08SEric Ren lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3534e7ee2c08SEric Ren lvb = 1; 3535e7ee2c08SEric Ren 3536ccd979bdSMark Fasheh if (lvb) 3537bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3538ccd979bdSMark Fasheh 35394670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3540ccd979bdSMark Fasheh new_level, 3541ccd979bdSMark Fasheh &lockres->l_lksb, 3542ccd979bdSMark Fasheh dlm_flags, 3543ccd979bdSMark Fasheh lockres->l_name, 3544a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 3545de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 35467431cd7eSJoel Becker if (ret) { 35477431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3548ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3549ccd979bdSMark Fasheh goto bail; 3550ccd979bdSMark Fasheh } 3551ccd979bdSMark Fasheh 3552ccd979bdSMark Fasheh ret = 0; 3553ccd979bdSMark Fasheh bail: 3554ccd979bdSMark Fasheh return ret; 3555ccd979bdSMark Fasheh } 3556ccd979bdSMark Fasheh 355724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3558ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3559ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3560ccd979bdSMark Fasheh { 3561ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3562ccd979bdSMark Fasheh 3563ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3564ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3565ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3566ccd979bdSMark Fasheh * requeue this lock. */ 35679b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); 3568ccd979bdSMark Fasheh return 0; 3569ccd979bdSMark Fasheh } 3570ccd979bdSMark Fasheh 3571ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3572ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3573ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3574ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3575ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3576ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3577ccd979bdSMark Fasheh 3578ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3579ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3580ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3581ccd979bdSMark Fasheh 35829b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 35839b915181SSunil Mushran 3584ccd979bdSMark Fasheh return 1; 3585ccd979bdSMark Fasheh } 3586ccd979bdSMark Fasheh 3587ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3588ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3589ccd979bdSMark Fasheh { 3590ccd979bdSMark Fasheh int ret; 3591ccd979bdSMark Fasheh 35924670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3593a796d286SJoel Becker DLM_LKF_CANCEL); 35947431cd7eSJoel Becker if (ret) { 35957431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3596ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3597ccd979bdSMark Fasheh } 3598ccd979bdSMark Fasheh 35999b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3600ccd979bdSMark Fasheh 3601ccd979bdSMark Fasheh return ret; 3602ccd979bdSMark Fasheh } 3603ccd979bdSMark Fasheh 3604b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3605ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3606cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3607ccd979bdSMark Fasheh { 3608ccd979bdSMark Fasheh unsigned long flags; 3609ccd979bdSMark Fasheh int blocking; 3610ccd979bdSMark Fasheh int new_level; 3611079b8057SSunil Mushran int level; 3612ccd979bdSMark Fasheh int ret = 0; 36135ef0d4eaSMark Fasheh int set_lvb = 0; 3614de551246SJoel Becker unsigned int gen; 3615ccd979bdSMark Fasheh 3616ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3617ccd979bdSMark Fasheh 3618ccd979bdSMark Fasheh recheck: 3619db0f6ce6SSunil Mushran /* 3620db0f6ce6SSunil Mushran * Is it still blocking? If not, we have no more work to do. 3621db0f6ce6SSunil Mushran */ 3622db0f6ce6SSunil Mushran if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { 3623db0f6ce6SSunil Mushran BUG_ON(lockres->l_blocking != DLM_LOCK_NL); 3624db0f6ce6SSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 3625db0f6ce6SSunil Mushran ret = 0; 3626db0f6ce6SSunil Mushran goto leave; 3627db0f6ce6SSunil Mushran } 3628db0f6ce6SSunil Mushran 3629ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3630de551246SJoel Becker /* XXX 3631de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3632de551246SJoel Becker * exists entirely for one reason - another thread has set 3633de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3634de551246SJoel Becker * 3635de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3636de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3637de551246SJoel Becker * get no ast, and we will have no way of knowing the 3638de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3639de551246SJoel Becker * into dlm_lock() and wait...forever. 3640de551246SJoel Becker * 3641de551246SJoel Becker * Why forever? Because another node has asked for the 3642de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3643de551246SJoel Becker * 3644de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3645de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3646de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3647de551246SJoel Becker * we then cancel their request. 3648de551246SJoel Becker * 3649de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3650de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3651de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3652de551246SJoel Becker */ 36539b915181SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_PENDING) { 36549b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", 36559b915181SSunil Mushran lockres->l_name); 3656de551246SJoel Becker goto leave_requeue; 36579b915181SSunil Mushran } 3658de551246SJoel Becker 3659d680efe9SMark Fasheh ctl->requeue = 1; 3660ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3661ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3662ccd979bdSMark Fasheh if (ret) { 3663ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3664ccd979bdSMark Fasheh if (ret < 0) 3665ccd979bdSMark Fasheh mlog_errno(ret); 3666ccd979bdSMark Fasheh } 3667ccd979bdSMark Fasheh goto leave; 3668ccd979bdSMark Fasheh } 3669ccd979bdSMark Fasheh 3670a1912826SSunil Mushran /* 3671a1912826SSunil Mushran * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is 3672a1912826SSunil Mushran * set when the ast is received for an upconvert just before the 3673a1912826SSunil Mushran * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast 3674a1912826SSunil Mushran * on the heels of the ast, we want to delay the downconvert just 3675a1912826SSunil Mushran * enough to allow the up requestor to do its task. Because this 3676a1912826SSunil Mushran * lock is in the blocked queue, the lock will be downconverted 3677a1912826SSunil Mushran * as soon as the requestor is done with the lock. 3678a1912826SSunil Mushran */ 3679a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) 3680a1912826SSunil Mushran goto leave_requeue; 3681a1912826SSunil Mushran 36820d74125aSSunil Mushran /* 36830d74125aSSunil Mushran * How can we block and yet be at NL? We were trying to upconvert 36840d74125aSSunil Mushran * from NL and got canceled. The code comes back here, and now 36850d74125aSSunil Mushran * we notice and clear BLOCKING. 36860d74125aSSunil Mushran */ 36870d74125aSSunil Mushran if (lockres->l_level == DLM_LOCK_NL) { 36880d74125aSSunil Mushran BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 36899b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); 36900d74125aSSunil Mushran lockres->l_blocking = DLM_LOCK_NL; 36910d74125aSSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 36920d74125aSSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 36930d74125aSSunil Mushran goto leave; 36940d74125aSSunil Mushran } 36950d74125aSSunil Mushran 3696ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3697ccd979bdSMark Fasheh * then requeue. */ 3698bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 36999b915181SSunil Mushran && (lockres->l_ex_holders || lockres->l_ro_holders)) { 37009b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", 37019b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders, 37029b915181SSunil Mushran lockres->l_ro_holders); 3703f7fbfdd1SMark Fasheh goto leave_requeue; 37049b915181SSunil Mushran } 3705ccd979bdSMark Fasheh 3706ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3707ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3708bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 37099b915181SSunil Mushran lockres->l_ex_holders) { 37109b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", 37119b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders); 3712f7fbfdd1SMark Fasheh goto leave_requeue; 37139b915181SSunil Mushran } 3714f7fbfdd1SMark Fasheh 3715f7fbfdd1SMark Fasheh /* 3716f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3717f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3718f7fbfdd1SMark Fasheh */ 3719f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 37209b915181SSunil Mushran && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { 37219b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", 37229b915181SSunil Mushran lockres->l_name); 3723f7fbfdd1SMark Fasheh goto leave_requeue; 37249b915181SSunil Mushran } 3725ccd979bdSMark Fasheh 372616d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 372716d5b956SMark Fasheh 372816d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 37299b915181SSunil Mushran && !lockres->l_ops->check_downconvert(lockres, new_level)) { 37309b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", 37319b915181SSunil Mushran lockres->l_name); 373216d5b956SMark Fasheh goto leave_requeue; 37339b915181SSunil Mushran } 373416d5b956SMark Fasheh 3735ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3736ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3737ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3738cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3739ccd979bdSMark Fasheh goto downconvert; 3740ccd979bdSMark Fasheh 3741ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3742ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3743ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3744ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3745ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3746079b8057SSunil Mushran level = lockres->l_level; 3747ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3748ccd979bdSMark Fasheh 3749cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3750d680efe9SMark Fasheh 37519b915181SSunil Mushran if (ctl->unblock_action == UNBLOCK_STOP_POST) { 37529b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", 37539b915181SSunil Mushran lockres->l_name); 3754d680efe9SMark Fasheh goto leave; 37559b915181SSunil Mushran } 3756ccd979bdSMark Fasheh 3757ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3758079b8057SSunil Mushran if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3759ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3760ccd979bdSMark Fasheh * it just yet. */ 37619b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " 37629b915181SSunil Mushran "Recheck\n", lockres->l_name, blocking, 37639b915181SSunil Mushran lockres->l_blocking, level, lockres->l_level); 3764ccd979bdSMark Fasheh goto recheck; 3765ccd979bdSMark Fasheh } 3766ccd979bdSMark Fasheh 3767ccd979bdSMark Fasheh downconvert: 3768d680efe9SMark Fasheh ctl->requeue = 0; 3769ccd979bdSMark Fasheh 37705ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3771bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 37725ef0d4eaSMark Fasheh set_lvb = 1; 37735ef0d4eaSMark Fasheh 37745ef0d4eaSMark Fasheh /* 37755ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 37765ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 37775ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 37785ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 37795ef0d4eaSMark Fasheh */ 37805ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 37815ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 37825ef0d4eaSMark Fasheh } 37835ef0d4eaSMark Fasheh 3784de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3785ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3786de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3787de551246SJoel Becker gen); 3788de551246SJoel Becker 3789ccd979bdSMark Fasheh leave: 3790c1e8d35eSTao Ma if (ret) 3791c1e8d35eSTao Ma mlog_errno(ret); 3792ccd979bdSMark Fasheh return ret; 3793f7fbfdd1SMark Fasheh 3794f7fbfdd1SMark Fasheh leave_requeue: 3795f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3796f7fbfdd1SMark Fasheh ctl->requeue = 1; 3797f7fbfdd1SMark Fasheh 3798f7fbfdd1SMark Fasheh return 0; 3799ccd979bdSMark Fasheh } 3800ccd979bdSMark Fasheh 3801d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3802ccd979bdSMark Fasheh int blocking) 3803ccd979bdSMark Fasheh { 3804ccd979bdSMark Fasheh struct inode *inode; 3805ccd979bdSMark Fasheh struct address_space *mapping; 38065e98d492SGoldwyn Rodrigues struct ocfs2_inode_info *oi; 3807ccd979bdSMark Fasheh 3808ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3809ccd979bdSMark Fasheh mapping = inode->i_mapping; 3810ccd979bdSMark Fasheh 38115e98d492SGoldwyn Rodrigues if (S_ISDIR(inode->i_mode)) { 38125e98d492SGoldwyn Rodrigues oi = OCFS2_I(inode); 38135e98d492SGoldwyn Rodrigues oi->ip_dir_lock_gen++; 38145e98d492SGoldwyn Rodrigues mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); 38155e98d492SGoldwyn Rodrigues goto out; 38165e98d492SGoldwyn Rodrigues } 38175e98d492SGoldwyn Rodrigues 38181044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3819f1f54068SMark Fasheh goto out; 3820f1f54068SMark Fasheh 38217f4a2a97SMark Fasheh /* 38227f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 38237f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 38247f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 38257f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 38267f4a2a97SMark Fasheh * them up again. 38277f4a2a97SMark Fasheh */ 38287f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 38297f4a2a97SMark Fasheh 3830ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3831b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3832b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3833ccd979bdSMark Fasheh } 3834ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3835bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3836ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3837ccd979bdSMark Fasheh } else { 3838ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3839ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3840ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3841ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3842ccd979bdSMark Fasheh * them around in that case. */ 3843ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3844ccd979bdSMark Fasheh } 3845ccd979bdSMark Fasheh 3846b8a7a3a6SAndreas Gruenbacher forget_all_cached_acls(inode); 3847b8a7a3a6SAndreas Gruenbacher 3848f1f54068SMark Fasheh out: 3849d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3850ccd979bdSMark Fasheh } 3851ccd979bdSMark Fasheh 3852a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3853a4338481STao Ma struct ocfs2_lock_res *lockres, 3854810d5aebSMark Fasheh int new_level) 3855810d5aebSMark Fasheh { 3856a4338481STao Ma int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3857810d5aebSMark Fasheh 3858bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3859bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3860810d5aebSMark Fasheh 3861810d5aebSMark Fasheh if (checkpointed) 3862810d5aebSMark Fasheh return 1; 3863810d5aebSMark Fasheh 3864a4338481STao Ma ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3865810d5aebSMark Fasheh return 0; 3866810d5aebSMark Fasheh } 3867810d5aebSMark Fasheh 3868a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3869a4338481STao Ma int new_level) 3870a4338481STao Ma { 3871a4338481STao Ma struct inode *inode = ocfs2_lock_res_inode(lockres); 3872a4338481STao Ma 3873a4338481STao Ma return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3874a4338481STao Ma } 3875a4338481STao Ma 3876810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3877810d5aebSMark Fasheh { 3878810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3879810d5aebSMark Fasheh 3880810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3881810d5aebSMark Fasheh } 3882810d5aebSMark Fasheh 3883d680efe9SMark Fasheh /* 3884d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 388534d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3886d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3887d680efe9SMark Fasheh */ 3888d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3889d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3890d680efe9SMark Fasheh { 3891d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3892d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3893d680efe9SMark Fasheh } 3894d680efe9SMark Fasheh 3895d680efe9SMark Fasheh /* 3896d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3897d680efe9SMark Fasheh * 3898d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3899d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3900d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3901d680efe9SMark Fasheh * 3902d680efe9SMark Fasheh * We have two potential problems 3903d680efe9SMark Fasheh * 3904d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3905d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3906d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3907d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3908d680efe9SMark Fasheh * unblock processing. 3909d680efe9SMark Fasheh * 3910d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3911d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3912d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3913d680efe9SMark Fasheh */ 3914d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3915d680efe9SMark Fasheh int blocking) 3916d680efe9SMark Fasheh { 3917d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3918d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3919d680efe9SMark Fasheh struct dentry *dentry; 3920d680efe9SMark Fasheh unsigned long flags; 3921d680efe9SMark Fasheh int extra_ref = 0; 3922d680efe9SMark Fasheh 3923d680efe9SMark Fasheh /* 3924d680efe9SMark Fasheh * This node is blocking another node from getting a read 3925d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3926d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3927d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3928d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3929d680efe9SMark Fasheh * so there's no further work to do. 3930d680efe9SMark Fasheh */ 3931bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3932d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3933d680efe9SMark Fasheh 3934d680efe9SMark Fasheh /* 3935d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3936d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3937d680efe9SMark Fasheh * needs to be freed or not. 3938d680efe9SMark Fasheh */ 3939d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3940d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3941d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3942d680efe9SMark Fasheh 3943d680efe9SMark Fasheh /* 3944d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3945d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3946d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3947d680efe9SMark Fasheh * flag. 3948d680efe9SMark Fasheh */ 3949d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3950d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3951d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3952d680efe9SMark Fasheh && dl->dl_count) { 3953d680efe9SMark Fasheh dl->dl_count++; 3954d680efe9SMark Fasheh extra_ref = 1; 3955d680efe9SMark Fasheh } 3956d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3957d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3958d680efe9SMark Fasheh 3959d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3960d680efe9SMark Fasheh 3961d680efe9SMark Fasheh /* 3962d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3963d680efe9SMark Fasheh * which means we can't have any more outstanding 3964d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3965d680efe9SMark Fasheh */ 3966d680efe9SMark Fasheh if (!extra_ref) 3967d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3968d680efe9SMark Fasheh 3969d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3970d680efe9SMark Fasheh while (1) { 3971d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3972d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3973d680efe9SMark Fasheh if (!dentry) 3974d680efe9SMark Fasheh break; 3975d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3976d680efe9SMark Fasheh 397710ab8811Salex chen if (S_ISDIR(dl->dl_inode->i_mode)) 397810ab8811Salex chen shrink_dcache_parent(dentry); 397910ab8811Salex chen 3980a455589fSAl Viro mlog(0, "d_delete(%pd);\n", dentry); 3981d680efe9SMark Fasheh 3982d680efe9SMark Fasheh /* 3983d680efe9SMark Fasheh * The following dcache calls may do an 3984d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3985d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3986d680efe9SMark Fasheh * because the requesting node already has an 3987d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3988d680efe9SMark Fasheh * for a downconvert. 3989d680efe9SMark Fasheh */ 3990d680efe9SMark Fasheh d_delete(dentry); 3991d680efe9SMark Fasheh dput(dentry); 3992d680efe9SMark Fasheh 3993d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3994d680efe9SMark Fasheh } 3995d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3996d680efe9SMark Fasheh 3997d680efe9SMark Fasheh /* 3998d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3999d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 4000d680efe9SMark Fasheh */ 4001d680efe9SMark Fasheh if (dl->dl_count == 1) 4002d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 4003d680efe9SMark Fasheh 4004d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 4005d680efe9SMark Fasheh } 4006d680efe9SMark Fasheh 40078dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 40088dec98edSTao Ma int new_level) 40098dec98edSTao Ma { 40108dec98edSTao Ma struct ocfs2_refcount_tree *tree = 40118dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 40128dec98edSTao Ma 40138dec98edSTao Ma return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 40148dec98edSTao Ma } 40158dec98edSTao Ma 40168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 40178dec98edSTao Ma int blocking) 40188dec98edSTao Ma { 40198dec98edSTao Ma struct ocfs2_refcount_tree *tree = 40208dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 40218dec98edSTao Ma 40228dec98edSTao Ma ocfs2_metadata_cache_purge(&tree->rf_ci); 40238dec98edSTao Ma 40248dec98edSTao Ma return UNBLOCK_CONTINUE; 40258dec98edSTao Ma } 40268dec98edSTao Ma 40279e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 40289e33d69fSJan Kara { 40299e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb; 40309e33d69fSJan Kara struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 40319e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 40329e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 40339e33d69fSJan Kara 4034a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 40359e33d69fSJan Kara lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 40369e33d69fSJan Kara lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 40379e33d69fSJan Kara lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 40389e33d69fSJan Kara lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 40399e33d69fSJan Kara lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 40409e33d69fSJan Kara lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 40419e33d69fSJan Kara lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 40429e33d69fSJan Kara } 40439e33d69fSJan Kara 40449e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 40459e33d69fSJan Kara { 40469e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 40479e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 40489e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 40499e33d69fSJan Kara 40509e33d69fSJan Kara if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 40519e33d69fSJan Kara ocfs2_cluster_unlock(osb, lockres, level); 40529e33d69fSJan Kara } 40539e33d69fSJan Kara 40549e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 40559e33d69fSJan Kara { 40569e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 40579e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 40589e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 40599e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 406085eb8b73SJoel Becker struct buffer_head *bh = NULL; 40619e33d69fSJan Kara struct ocfs2_global_disk_dqinfo *gdinfo; 40629e33d69fSJan Kara int status = 0; 40639e33d69fSJan Kara 40641c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 40651c520dfbSJoel Becker lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 40669e33d69fSJan Kara info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 40679e33d69fSJan Kara info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 40689e33d69fSJan Kara oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 40699e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 40709e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 40719e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 40729e33d69fSJan Kara be32_to_cpu(lvb->lvb_free_entry); 40739e33d69fSJan Kara } else { 4074ae4f6ef1SJan Kara status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, 4075ae4f6ef1SJan Kara oinfo->dqi_giblk, &bh); 407685eb8b73SJoel Becker if (status) { 40779e33d69fSJan Kara mlog_errno(status); 40789e33d69fSJan Kara goto bail; 40799e33d69fSJan Kara } 40809e33d69fSJan Kara gdinfo = (struct ocfs2_global_disk_dqinfo *) 40819e33d69fSJan Kara (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 40829e33d69fSJan Kara info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 40839e33d69fSJan Kara info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 40849e33d69fSJan Kara oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 40859e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 40869e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 40879e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 40889e33d69fSJan Kara le32_to_cpu(gdinfo->dqi_free_entry); 40899e33d69fSJan Kara brelse(bh); 40909e33d69fSJan Kara ocfs2_track_lock_refresh(lockres); 40919e33d69fSJan Kara } 40929e33d69fSJan Kara 40939e33d69fSJan Kara bail: 40949e33d69fSJan Kara return status; 40959e33d69fSJan Kara } 40969e33d69fSJan Kara 40979e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file 40989e33d69fSJan Kara * so that we can safely refresh quota info from disk. */ 40999e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 41009e33d69fSJan Kara { 41019e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 41029e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 41039e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 41049e33d69fSJan Kara int status = 0; 41059e33d69fSJan Kara 41069e33d69fSJan Kara /* On RO devices, locking really isn't needed... */ 41079e33d69fSJan Kara if (ocfs2_is_hard_readonly(osb)) { 41089e33d69fSJan Kara if (ex) 41099e33d69fSJan Kara status = -EROFS; 41109e33d69fSJan Kara goto bail; 41119e33d69fSJan Kara } 41129e33d69fSJan Kara if (ocfs2_mount_local(osb)) 41139e33d69fSJan Kara goto bail; 41149e33d69fSJan Kara 41159e33d69fSJan Kara status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 41169e33d69fSJan Kara if (status < 0) { 41179e33d69fSJan Kara mlog_errno(status); 41189e33d69fSJan Kara goto bail; 41199e33d69fSJan Kara } 41209e33d69fSJan Kara if (!ocfs2_should_refresh_lock_res(lockres)) 41219e33d69fSJan Kara goto bail; 41229e33d69fSJan Kara /* OK, we have the lock but we need to refresh the quota info */ 41239e33d69fSJan Kara status = ocfs2_refresh_qinfo(oinfo); 41249e33d69fSJan Kara if (status) 41259e33d69fSJan Kara ocfs2_qinfo_unlock(oinfo, ex); 41269e33d69fSJan Kara ocfs2_complete_lock_res_refresh(lockres, status); 41279e33d69fSJan Kara bail: 41289e33d69fSJan Kara return status; 41299e33d69fSJan Kara } 41309e33d69fSJan Kara 41318dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 41328dec98edSTao Ma { 41338dec98edSTao Ma int status; 41348dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 41358dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 41368dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 41378dec98edSTao Ma 41388dec98edSTao Ma 41398dec98edSTao Ma if (ocfs2_is_hard_readonly(osb)) 41408dec98edSTao Ma return -EROFS; 41418dec98edSTao Ma 41428dec98edSTao Ma if (ocfs2_mount_local(osb)) 41438dec98edSTao Ma return 0; 41448dec98edSTao Ma 41458dec98edSTao Ma status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 41468dec98edSTao Ma if (status < 0) 41478dec98edSTao Ma mlog_errno(status); 41488dec98edSTao Ma 41498dec98edSTao Ma return status; 41508dec98edSTao Ma } 41518dec98edSTao Ma 41528dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 41538dec98edSTao Ma { 41548dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 41558dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 41568dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 41578dec98edSTao Ma 41588dec98edSTao Ma if (!ocfs2_mount_local(osb)) 41598dec98edSTao Ma ocfs2_cluster_unlock(osb, lockres, level); 41608dec98edSTao Ma } 41618dec98edSTao Ma 416200600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 4163ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 4164ccd979bdSMark Fasheh { 4165ccd979bdSMark Fasheh int status; 4166d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 4167ccd979bdSMark Fasheh unsigned long flags; 4168ccd979bdSMark Fasheh 4169ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 4170ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 4171ccd979bdSMark Fasheh * flag. */ 4172ccd979bdSMark Fasheh 4173ccd979bdSMark Fasheh BUG_ON(!lockres); 4174ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 4175ccd979bdSMark Fasheh 41769b915181SSunil Mushran mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); 4177ccd979bdSMark Fasheh 4178ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 417934d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 4180ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 4181ccd979bdSMark Fasheh * but short circuiting here will still save us some 4182ccd979bdSMark Fasheh * performance. */ 4183ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 4184ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 4185ccd979bdSMark Fasheh goto unqueue; 4186ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 4187ccd979bdSMark Fasheh 4188b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 4189ccd979bdSMark Fasheh if (status < 0) 4190ccd979bdSMark Fasheh mlog_errno(status); 4191ccd979bdSMark Fasheh 4192ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 4193ccd979bdSMark Fasheh unqueue: 4194d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 4195ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 4196ccd979bdSMark Fasheh } else 4197ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 4198ccd979bdSMark Fasheh 41999b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, 4200d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 4201ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 4202ccd979bdSMark Fasheh 4203d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 4204d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 4205d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 4206ccd979bdSMark Fasheh } 4207ccd979bdSMark Fasheh 4208ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 4209ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 4210ccd979bdSMark Fasheh { 4211a75e9ccaSSrinivas Eeda unsigned long flags; 4212a75e9ccaSSrinivas Eeda 4213ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 4214ccd979bdSMark Fasheh 4215ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 4216ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 4217ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 4218ccd979bdSMark Fasheh * to the resource will get it soon. */ 42199b915181SSunil Mushran mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", 4220ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 4221ccd979bdSMark Fasheh return; 4222ccd979bdSMark Fasheh } 4223ccd979bdSMark Fasheh 4224ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 4225ccd979bdSMark Fasheh 4226a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 4227ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 4228ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 4229ccd979bdSMark Fasheh &osb->blocked_lock_list); 4230ccd979bdSMark Fasheh osb->blocked_lock_count++; 4231ccd979bdSMark Fasheh } 4232a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 4233ccd979bdSMark Fasheh } 423434d024f8SMark Fasheh 423534d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 423634d024f8SMark Fasheh { 423734d024f8SMark Fasheh unsigned long processed; 4238a75e9ccaSSrinivas Eeda unsigned long flags; 423934d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 424034d024f8SMark Fasheh 4241a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 424234d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 424334d024f8SMark Fasheh * wake happens part-way through our work */ 424434d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 424534d024f8SMark Fasheh 424634d024f8SMark Fasheh processed = osb->blocked_lock_count; 4247209f7512SJoseph Qi /* 4248209f7512SJoseph Qi * blocked lock processing in this loop might call iput which can 4249209f7512SJoseph Qi * remove items off osb->blocked_lock_list. Downconvert up to 4250209f7512SJoseph Qi * 'processed' number of locks, but stop short if we had some 4251209f7512SJoseph Qi * removed in ocfs2_mark_lockres_freeing when downconverting. 4252209f7512SJoseph Qi */ 4253209f7512SJoseph Qi while (processed && !list_empty(&osb->blocked_lock_list)) { 425434d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 425534d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 425634d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 425734d024f8SMark Fasheh osb->blocked_lock_count--; 4258a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 425934d024f8SMark Fasheh 426034d024f8SMark Fasheh BUG_ON(!processed); 426134d024f8SMark Fasheh processed--; 426234d024f8SMark Fasheh 426334d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 426434d024f8SMark Fasheh 4265a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 426634d024f8SMark Fasheh } 4267a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 426834d024f8SMark Fasheh } 426934d024f8SMark Fasheh 427034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 427134d024f8SMark Fasheh { 427234d024f8SMark Fasheh int empty = 0; 4273a75e9ccaSSrinivas Eeda unsigned long flags; 427434d024f8SMark Fasheh 4275a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 427634d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 427734d024f8SMark Fasheh empty = 1; 427834d024f8SMark Fasheh 4279a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 428034d024f8SMark Fasheh return empty; 428134d024f8SMark Fasheh } 428234d024f8SMark Fasheh 428334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 428434d024f8SMark Fasheh { 428534d024f8SMark Fasheh int should_wake = 0; 4286a75e9ccaSSrinivas Eeda unsigned long flags; 428734d024f8SMark Fasheh 4288a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 428934d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 429034d024f8SMark Fasheh should_wake = 1; 4291a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 429234d024f8SMark Fasheh 429334d024f8SMark Fasheh return should_wake; 429434d024f8SMark Fasheh } 429534d024f8SMark Fasheh 4296200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 429734d024f8SMark Fasheh { 429834d024f8SMark Fasheh int status = 0; 429934d024f8SMark Fasheh struct ocfs2_super *osb = arg; 430034d024f8SMark Fasheh 430134d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 430234d024f8SMark Fasheh * work available */ 430334d024f8SMark Fasheh while (!(kthread_should_stop() && 430434d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 430534d024f8SMark Fasheh 430634d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 430734d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 430834d024f8SMark Fasheh kthread_should_stop()); 430934d024f8SMark Fasheh 431034d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 431134d024f8SMark Fasheh 431234d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 431334d024f8SMark Fasheh } 431434d024f8SMark Fasheh 431534d024f8SMark Fasheh osb->dc_task = NULL; 431634d024f8SMark Fasheh return status; 431734d024f8SMark Fasheh } 431834d024f8SMark Fasheh 431934d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 432034d024f8SMark Fasheh { 4321a75e9ccaSSrinivas Eeda unsigned long flags; 4322a75e9ccaSSrinivas Eeda 4323a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 432434d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 432534d024f8SMark Fasheh * the caller may have made to the voting state */ 432634d024f8SMark Fasheh osb->dc_wake_sequence++; 4327a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 432834d024f8SMark Fasheh wake_up(&osb->dc_event); 432934d024f8SMark Fasheh } 4330