1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 359e33d69fSJan Kara #include <linux/quotaops.h> 36ccd979bdSMark Fasheh 37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 38ccd979bdSMark Fasheh #include <cluster/masklog.h> 39ccd979bdSMark Fasheh 40ccd979bdSMark Fasheh #include "ocfs2.h" 41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 42ccd979bdSMark Fasheh 43ccd979bdSMark Fasheh #include "alloc.h" 44d680efe9SMark Fasheh #include "dcache.h" 45ccd979bdSMark Fasheh #include "dlmglue.h" 46ccd979bdSMark Fasheh #include "extent_map.h" 477f1a37e3STiger Yang #include "file.h" 48ccd979bdSMark Fasheh #include "heartbeat.h" 49ccd979bdSMark Fasheh #include "inode.h" 50ccd979bdSMark Fasheh #include "journal.h" 5124ef1815SJoel Becker #include "stackglue.h" 52ccd979bdSMark Fasheh #include "slot_map.h" 53ccd979bdSMark Fasheh #include "super.h" 54ccd979bdSMark Fasheh #include "uptodate.h" 559e33d69fSJan Kara #include "quota.h" 56ccd979bdSMark Fasheh 57ccd979bdSMark Fasheh #include "buffer_head_io.h" 58ccd979bdSMark Fasheh 59ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 60ccd979bdSMark Fasheh struct list_head mw_item; 61ccd979bdSMark Fasheh int mw_status; 62ccd979bdSMark Fasheh struct completion mw_complete; 63ccd979bdSMark Fasheh unsigned long mw_mask; 64ccd979bdSMark Fasheh unsigned long mw_goal; 658ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 668ddb7b00SSunil Mushran unsigned long long mw_lock_start; 678ddb7b00SSunil Mushran #endif 68ccd979bdSMark Fasheh }; 69ccd979bdSMark Fasheh 7054a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 72cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 739e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 74ccd979bdSMark Fasheh 75d680efe9SMark Fasheh /* 76cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 77d680efe9SMark Fasheh * 78b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 79d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 80d680efe9SMark Fasheh * 81d680efe9SMark Fasheh */ 82d680efe9SMark Fasheh enum ocfs2_unblock_action { 83d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 84d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 85d680efe9SMark Fasheh * ->post_unlock callback */ 86d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 87d680efe9SMark Fasheh * ->post_unlock() callback. */ 88d680efe9SMark Fasheh }; 89d680efe9SMark Fasheh 90d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 91d680efe9SMark Fasheh int requeue; 92d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 93d680efe9SMark Fasheh }; 94d680efe9SMark Fasheh 95810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 96810d5aebSMark Fasheh int new_level); 97810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 98810d5aebSMark Fasheh 99cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 100cc567d89SMark Fasheh int blocking); 101cc567d89SMark Fasheh 102cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 103cc567d89SMark Fasheh int blocking); 104d680efe9SMark Fasheh 105d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 106d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 107ccd979bdSMark Fasheh 1089e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 1096cb129f5SAdrian Bunk 1106cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1116cb129f5SAdrian Bunk 1126cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1136cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1146cb129f5SAdrian Bunk const char *function, 1156cb129f5SAdrian Bunk unsigned int line, 1166cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1176cb129f5SAdrian Bunk { 1188f2c9c1bSJoel Becker struct ocfs2_meta_lvb *lvb = 1198f2c9c1bSJoel Becker (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1206cb129f5SAdrian Bunk 1216cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1226cb129f5SAdrian Bunk lockres->l_name, function, line); 1236cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1246cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1256cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1266cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1276cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1286cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1296cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1306cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1316cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1326cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1336cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1346cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1356cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1366cb129f5SAdrian Bunk } 1376cb129f5SAdrian Bunk 1386cb129f5SAdrian Bunk 139f625c979SMark Fasheh /* 140f625c979SMark Fasheh * OCFS2 Lock Resource Operations 141f625c979SMark Fasheh * 142f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1430d5dc6c2SMark Fasheh * 1440d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1450d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1460d5dc6c2SMark Fasheh * 1470d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1480d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1490d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1500d5dc6c2SMark Fasheh * destruction time). 151f625c979SMark Fasheh */ 152ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 15354a7e755SMark Fasheh /* 15454a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 15554a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 15654a7e755SMark Fasheh */ 15754a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 158b5e500e2SMark Fasheh 1590d5dc6c2SMark Fasheh /* 16034d024f8SMark Fasheh * Optionally called in the downconvert thread after a 16134d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 16234d024f8SMark Fasheh * after this callback is called, so it is safe to free 16334d024f8SMark Fasheh * memory, etc. 1640d5dc6c2SMark Fasheh * 1650d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1660d5dc6c2SMark Fasheh * by ->downconvert_worker() 1670d5dc6c2SMark Fasheh */ 168d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 169f625c979SMark Fasheh 170f625c979SMark Fasheh /* 17116d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 17216d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 17316d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 17416d5b956SMark Fasheh * 17516d5b956SMark Fasheh * For most locks, the default checks that there are no 17616d5b956SMark Fasheh * incompatible holders are sufficient. 17716d5b956SMark Fasheh * 17816d5b956SMark Fasheh * Called with the lockres spinlock held. 17916d5b956SMark Fasheh */ 18016d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 18116d5b956SMark Fasheh 18216d5b956SMark Fasheh /* 1835ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1845ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1855ef0d4eaSMark Fasheh * 1865ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1875ef0d4eaSMark Fasheh * in the flags field. 1885ef0d4eaSMark Fasheh * 1895ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1905ef0d4eaSMark Fasheh */ 1915ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 1925ef0d4eaSMark Fasheh 1935ef0d4eaSMark Fasheh /* 194cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 195cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 196cc567d89SMark Fasheh * any locks held so the function can do work that might 197cc567d89SMark Fasheh * schedule (syncing out data, etc). 198cc567d89SMark Fasheh * 199cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 200cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 201cc567d89SMark Fasheh */ 202cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 203cc567d89SMark Fasheh 204cc567d89SMark Fasheh /* 205f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 206f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 207f625c979SMark Fasheh */ 208f625c979SMark Fasheh int flags; 209ccd979bdSMark Fasheh }; 210ccd979bdSMark Fasheh 211f625c979SMark Fasheh /* 212f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 213f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 214f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 215f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 216f625c979SMark Fasheh * expected that the locking wrapper will clear the 217f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 218f625c979SMark Fasheh */ 219f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 220f625c979SMark Fasheh 221b80fc012SMark Fasheh /* 2225ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2235ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 224b80fc012SMark Fasheh */ 225b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 226b80fc012SMark Fasheh 227ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 22854a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 229f625c979SMark Fasheh .flags = 0, 230ccd979bdSMark Fasheh }; 231ccd979bdSMark Fasheh 232e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 23354a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 234810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 235810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 236f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 237b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 238ccd979bdSMark Fasheh }; 239ccd979bdSMark Fasheh 240ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 241f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 242ccd979bdSMark Fasheh }; 243ccd979bdSMark Fasheh 244ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 245f625c979SMark Fasheh .flags = 0, 246ccd979bdSMark Fasheh }; 247ccd979bdSMark Fasheh 248d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 24954a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 250d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 251cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 252f625c979SMark Fasheh .flags = 0, 253d680efe9SMark Fasheh }; 254d680efe9SMark Fasheh 25550008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 25650008630STiger Yang .get_osb = ocfs2_get_inode_osb, 25750008630STiger Yang .flags = 0, 25850008630STiger Yang }; 25950008630STiger Yang 260cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 261cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 262cf8e06f1SMark Fasheh .flags = 0, 263cf8e06f1SMark Fasheh }; 264cf8e06f1SMark Fasheh 2659e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 2669e33d69fSJan Kara .set_lvb = ocfs2_set_qinfo_lvb, 2679e33d69fSJan Kara .get_osb = ocfs2_get_qinfo_osb, 2689e33d69fSJan Kara .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 2699e33d69fSJan Kara }; 2709e33d69fSJan Kara 271ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 272ccd979bdSMark Fasheh { 273ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 27450008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 27550008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 276ccd979bdSMark Fasheh } 277ccd979bdSMark Fasheh 278ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 279ccd979bdSMark Fasheh { 280ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 281ccd979bdSMark Fasheh 282ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 283ccd979bdSMark Fasheh } 284ccd979bdSMark Fasheh 285d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 286d680efe9SMark Fasheh { 287d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 288d680efe9SMark Fasheh 289d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 290d680efe9SMark Fasheh } 291d680efe9SMark Fasheh 2929e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 2939e33d69fSJan Kara { 2949e33d69fSJan Kara BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 2959e33d69fSJan Kara 2969e33d69fSJan Kara return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 2979e33d69fSJan Kara } 2989e33d69fSJan Kara 29954a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 30054a7e755SMark Fasheh { 30154a7e755SMark Fasheh if (lockres->l_ops->get_osb) 30254a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 30354a7e755SMark Fasheh 30454a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 30554a7e755SMark Fasheh } 30654a7e755SMark Fasheh 307ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 308ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 309ccd979bdSMark Fasheh int level, 310bd3e7610SJoel Becker u32 dlm_flags); 311ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 312ccd979bdSMark Fasheh int wanted); 313ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 314ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 315ccd979bdSMark Fasheh int level); 316ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 317ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 318ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 319ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 320ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 321ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 322ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 323ccd979bdSMark Fasheh int convert); 3247431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 3257431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3267431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 327ccd979bdSMark Fasheh } while (0) 32834d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 32934d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 330ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 331e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 332ccd979bdSMark Fasheh struct buffer_head **bh); 333ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 334ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 335de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 336cf8e06f1SMark Fasheh int new_level); 337cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 338cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 339cf8e06f1SMark Fasheh int new_level, 340de551246SJoel Becker int lvb, 341de551246SJoel Becker unsigned int generation); 342cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 343cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 344cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 345cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 346cf8e06f1SMark Fasheh 347ccd979bdSMark Fasheh 348ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 349ccd979bdSMark Fasheh u64 blkno, 350ccd979bdSMark Fasheh u32 generation, 351ccd979bdSMark Fasheh char *name) 352ccd979bdSMark Fasheh { 353ccd979bdSMark Fasheh int len; 354ccd979bdSMark Fasheh 355ccd979bdSMark Fasheh mlog_entry_void(); 356ccd979bdSMark Fasheh 357ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 358ccd979bdSMark Fasheh 359b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 360b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 361b0697053SMark Fasheh (long long)blkno, generation); 362ccd979bdSMark Fasheh 363ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 364ccd979bdSMark Fasheh 365ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 366ccd979bdSMark Fasheh 367ccd979bdSMark Fasheh mlog_exit_void(); 368ccd979bdSMark Fasheh } 369ccd979bdSMark Fasheh 37034af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 371ccd979bdSMark Fasheh 372ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 373ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 374ccd979bdSMark Fasheh { 375ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 376ccd979bdSMark Fasheh 377ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 378ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 379ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 380ccd979bdSMark Fasheh } 381ccd979bdSMark Fasheh 382ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 383ccd979bdSMark Fasheh { 384ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 385ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 386ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 387ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 388ccd979bdSMark Fasheh } 389ccd979bdSMark Fasheh 3908ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 3918ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 3928ddb7b00SSunil Mushran { 3938ddb7b00SSunil Mushran res->l_lock_num_prmode = 0; 3948ddb7b00SSunil Mushran res->l_lock_num_prmode_failed = 0; 3958ddb7b00SSunil Mushran res->l_lock_total_prmode = 0; 3968ddb7b00SSunil Mushran res->l_lock_max_prmode = 0; 3978ddb7b00SSunil Mushran res->l_lock_num_exmode = 0; 3988ddb7b00SSunil Mushran res->l_lock_num_exmode_failed = 0; 3998ddb7b00SSunil Mushran res->l_lock_total_exmode = 0; 4008ddb7b00SSunil Mushran res->l_lock_max_exmode = 0; 4018ddb7b00SSunil Mushran res->l_lock_refresh = 0; 4028ddb7b00SSunil Mushran } 4038ddb7b00SSunil Mushran 4048ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 4058ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 4068ddb7b00SSunil Mushran { 4078ddb7b00SSunil Mushran unsigned long long *num, *sum; 4088ddb7b00SSunil Mushran unsigned int *max, *failed; 4098ddb7b00SSunil Mushran struct timespec ts = current_kernel_time(); 4108ddb7b00SSunil Mushran unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 4118ddb7b00SSunil Mushran 4128ddb7b00SSunil Mushran if (level == LKM_PRMODE) { 4138ddb7b00SSunil Mushran num = &res->l_lock_num_prmode; 4148ddb7b00SSunil Mushran sum = &res->l_lock_total_prmode; 4158ddb7b00SSunil Mushran max = &res->l_lock_max_prmode; 4168ddb7b00SSunil Mushran failed = &res->l_lock_num_prmode_failed; 4178ddb7b00SSunil Mushran } else if (level == LKM_EXMODE) { 4188ddb7b00SSunil Mushran num = &res->l_lock_num_exmode; 4198ddb7b00SSunil Mushran sum = &res->l_lock_total_exmode; 4208ddb7b00SSunil Mushran max = &res->l_lock_max_exmode; 4218ddb7b00SSunil Mushran failed = &res->l_lock_num_exmode_failed; 4228ddb7b00SSunil Mushran } else 4238ddb7b00SSunil Mushran return; 4248ddb7b00SSunil Mushran 4258ddb7b00SSunil Mushran (*num)++; 4268ddb7b00SSunil Mushran (*sum) += time; 4278ddb7b00SSunil Mushran if (time > *max) 4288ddb7b00SSunil Mushran *max = time; 4298ddb7b00SSunil Mushran if (ret) 4308ddb7b00SSunil Mushran (*failed)++; 4318ddb7b00SSunil Mushran } 4328ddb7b00SSunil Mushran 4338ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4348ddb7b00SSunil Mushran { 4358ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4368ddb7b00SSunil Mushran } 4378ddb7b00SSunil Mushran 4388ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4398ddb7b00SSunil Mushran { 4408ddb7b00SSunil Mushran struct timespec ts = current_kernel_time(); 4418ddb7b00SSunil Mushran mw->mw_lock_start = timespec_to_ns(&ts); 4428ddb7b00SSunil Mushran } 4438ddb7b00SSunil Mushran #else 4448ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4458ddb7b00SSunil Mushran { 4468ddb7b00SSunil Mushran } 4478ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4488ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4498ddb7b00SSunil Mushran { 4508ddb7b00SSunil Mushran } 4518ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4528ddb7b00SSunil Mushran { 4538ddb7b00SSunil Mushran } 4548ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4558ddb7b00SSunil Mushran { 4568ddb7b00SSunil Mushran } 4578ddb7b00SSunil Mushran #endif 4588ddb7b00SSunil Mushran 459ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 460ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 461ccd979bdSMark Fasheh enum ocfs2_lock_type type, 462ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 463ccd979bdSMark Fasheh void *priv) 464ccd979bdSMark Fasheh { 465ccd979bdSMark Fasheh res->l_type = type; 466ccd979bdSMark Fasheh res->l_ops = ops; 467ccd979bdSMark Fasheh res->l_priv = priv; 468ccd979bdSMark Fasheh 469bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 470bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 471bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 472ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 473ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 474ccd979bdSMark Fasheh 475ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 476ccd979bdSMark Fasheh 477ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 4788ddb7b00SSunil Mushran 4798ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 480ccd979bdSMark Fasheh } 481ccd979bdSMark Fasheh 482ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 483ccd979bdSMark Fasheh { 484ccd979bdSMark Fasheh /* This also clears out the lock status block */ 485ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 486ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 487ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 488ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 489ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 490ccd979bdSMark Fasheh } 491ccd979bdSMark Fasheh 492ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 493ccd979bdSMark Fasheh enum ocfs2_lock_type type, 49424c19ef4SMark Fasheh unsigned int generation, 495ccd979bdSMark Fasheh struct inode *inode) 496ccd979bdSMark Fasheh { 497ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 498ccd979bdSMark Fasheh 499ccd979bdSMark Fasheh switch(type) { 500ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 501ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 502ccd979bdSMark Fasheh break; 503ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 504e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 505ccd979bdSMark Fasheh break; 50650008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 50750008630STiger Yang ops = &ocfs2_inode_open_lops; 50850008630STiger Yang break; 509ccd979bdSMark Fasheh default: 510ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 511ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 512ccd979bdSMark Fasheh break; 513ccd979bdSMark Fasheh }; 514ccd979bdSMark Fasheh 515d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 51624c19ef4SMark Fasheh generation, res->l_name); 517d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 518d680efe9SMark Fasheh } 519d680efe9SMark Fasheh 52054a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 52154a7e755SMark Fasheh { 52254a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 52354a7e755SMark Fasheh 52454a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 52554a7e755SMark Fasheh } 52654a7e755SMark Fasheh 5279e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 5289e33d69fSJan Kara { 5299e33d69fSJan Kara struct ocfs2_mem_dqinfo *info = lockres->l_priv; 5309e33d69fSJan Kara 5319e33d69fSJan Kara return OCFS2_SB(info->dqi_gi.dqi_sb); 5329e33d69fSJan Kara } 5339e33d69fSJan Kara 534cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 535cf8e06f1SMark Fasheh { 536cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 537cf8e06f1SMark Fasheh 538cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 539cf8e06f1SMark Fasheh } 540cf8e06f1SMark Fasheh 541d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 542d680efe9SMark Fasheh { 543d680efe9SMark Fasheh __be64 inode_blkno_be; 544d680efe9SMark Fasheh 545d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 546d680efe9SMark Fasheh sizeof(__be64)); 547d680efe9SMark Fasheh 548d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 549d680efe9SMark Fasheh } 550d680efe9SMark Fasheh 55154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 55254a7e755SMark Fasheh { 55354a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 55454a7e755SMark Fasheh 55554a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 55654a7e755SMark Fasheh } 55754a7e755SMark Fasheh 558d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 559d680efe9SMark Fasheh u64 parent, struct inode *inode) 560d680efe9SMark Fasheh { 561d680efe9SMark Fasheh int len; 562d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 563d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 564d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 565d680efe9SMark Fasheh 566d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 567d680efe9SMark Fasheh 568d680efe9SMark Fasheh /* 569d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 570d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 571d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 572d680efe9SMark Fasheh * want error prints to show something without garbling the 573d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 574d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 575d680efe9SMark Fasheh * binary lock names. The stringified names have been a 576d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 577d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 578d680efe9SMark Fasheh * 579d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 580d680efe9SMark Fasheh * name size stays the same though - the last part is all 581d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 582d680efe9SMark Fasheh */ 583d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 584d680efe9SMark Fasheh "%c%016llx", 585d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 586d680efe9SMark Fasheh (long long)parent); 587d680efe9SMark Fasheh 588d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 589d680efe9SMark Fasheh 590d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 591d680efe9SMark Fasheh sizeof(__be64)); 592d680efe9SMark Fasheh 593d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 594d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 595d680efe9SMark Fasheh dl); 596ccd979bdSMark Fasheh } 597ccd979bdSMark Fasheh 598ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 599ccd979bdSMark Fasheh struct ocfs2_super *osb) 600ccd979bdSMark Fasheh { 601ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 602ccd979bdSMark Fasheh * once on it manually. */ 603ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 604d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 605d680efe9SMark Fasheh 0, res->l_name); 606ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 607ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 608ccd979bdSMark Fasheh } 609ccd979bdSMark Fasheh 610ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 611ccd979bdSMark Fasheh struct ocfs2_super *osb) 612ccd979bdSMark Fasheh { 613ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 614ccd979bdSMark Fasheh * once on it manually. */ 615ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 616d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 617d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 618ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 619ccd979bdSMark Fasheh } 620ccd979bdSMark Fasheh 621cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 622cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 623cf8e06f1SMark Fasheh { 624cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 625cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 626cf8e06f1SMark Fasheh 627cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 628cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 629cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 630cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 631cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 632cf8e06f1SMark Fasheh fp); 633cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 634cf8e06f1SMark Fasheh } 635cf8e06f1SMark Fasheh 6369e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 6379e33d69fSJan Kara struct ocfs2_mem_dqinfo *info) 6389e33d69fSJan Kara { 6399e33d69fSJan Kara ocfs2_lock_res_init_once(lockres); 6409e33d69fSJan Kara ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 6419e33d69fSJan Kara 0, lockres->l_name); 6429e33d69fSJan Kara ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 6439e33d69fSJan Kara OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 6449e33d69fSJan Kara info); 6459e33d69fSJan Kara } 6469e33d69fSJan Kara 647ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 648ccd979bdSMark Fasheh { 649ccd979bdSMark Fasheh mlog_entry_void(); 650ccd979bdSMark Fasheh 651ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 652ccd979bdSMark Fasheh return; 653ccd979bdSMark Fasheh 654ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 655ccd979bdSMark Fasheh 656ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 657ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 658ccd979bdSMark Fasheh res->l_name); 659ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 660ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 661ccd979bdSMark Fasheh res->l_name); 662ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 663ccd979bdSMark Fasheh "Lockres %s is locked\n", 664ccd979bdSMark Fasheh res->l_name); 665ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 666ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 667ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 668ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 669ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 670ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 671ccd979bdSMark Fasheh 672ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 673ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 674ccd979bdSMark Fasheh 675ccd979bdSMark Fasheh res->l_flags = 0UL; 676ccd979bdSMark Fasheh mlog_exit_void(); 677ccd979bdSMark Fasheh } 678ccd979bdSMark Fasheh 679ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 680ccd979bdSMark Fasheh int level) 681ccd979bdSMark Fasheh { 682ccd979bdSMark Fasheh mlog_entry_void(); 683ccd979bdSMark Fasheh 684ccd979bdSMark Fasheh BUG_ON(!lockres); 685ccd979bdSMark Fasheh 686ccd979bdSMark Fasheh switch(level) { 687bd3e7610SJoel Becker case DLM_LOCK_EX: 688ccd979bdSMark Fasheh lockres->l_ex_holders++; 689ccd979bdSMark Fasheh break; 690bd3e7610SJoel Becker case DLM_LOCK_PR: 691ccd979bdSMark Fasheh lockres->l_ro_holders++; 692ccd979bdSMark Fasheh break; 693ccd979bdSMark Fasheh default: 694ccd979bdSMark Fasheh BUG(); 695ccd979bdSMark Fasheh } 696ccd979bdSMark Fasheh 697ccd979bdSMark Fasheh mlog_exit_void(); 698ccd979bdSMark Fasheh } 699ccd979bdSMark Fasheh 700ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 701ccd979bdSMark Fasheh int level) 702ccd979bdSMark Fasheh { 703ccd979bdSMark Fasheh mlog_entry_void(); 704ccd979bdSMark Fasheh 705ccd979bdSMark Fasheh BUG_ON(!lockres); 706ccd979bdSMark Fasheh 707ccd979bdSMark Fasheh switch(level) { 708bd3e7610SJoel Becker case DLM_LOCK_EX: 709ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 710ccd979bdSMark Fasheh lockres->l_ex_holders--; 711ccd979bdSMark Fasheh break; 712bd3e7610SJoel Becker case DLM_LOCK_PR: 713ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 714ccd979bdSMark Fasheh lockres->l_ro_holders--; 715ccd979bdSMark Fasheh break; 716ccd979bdSMark Fasheh default: 717ccd979bdSMark Fasheh BUG(); 718ccd979bdSMark Fasheh } 719ccd979bdSMark Fasheh mlog_exit_void(); 720ccd979bdSMark Fasheh } 721ccd979bdSMark Fasheh 722ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 723ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 724ccd979bdSMark Fasheh * lock types are added. */ 725ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 726ccd979bdSMark Fasheh { 727bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 728ccd979bdSMark Fasheh 729bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 730bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 731bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 732bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 733ccd979bdSMark Fasheh return new_level; 734ccd979bdSMark Fasheh } 735ccd979bdSMark Fasheh 736ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 737ccd979bdSMark Fasheh unsigned long newflags) 738ccd979bdSMark Fasheh { 739800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 740ccd979bdSMark Fasheh 741ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 742ccd979bdSMark Fasheh 743ccd979bdSMark Fasheh lockres->l_flags = newflags; 744ccd979bdSMark Fasheh 745800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 746ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 747ccd979bdSMark Fasheh continue; 748ccd979bdSMark Fasheh 749ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 750ccd979bdSMark Fasheh mw->mw_status = 0; 751ccd979bdSMark Fasheh complete(&mw->mw_complete); 752ccd979bdSMark Fasheh } 753ccd979bdSMark Fasheh } 754ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 755ccd979bdSMark Fasheh { 756ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 757ccd979bdSMark Fasheh } 758ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 759ccd979bdSMark Fasheh unsigned long clear) 760ccd979bdSMark Fasheh { 761ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 762ccd979bdSMark Fasheh } 763ccd979bdSMark Fasheh 764ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 765ccd979bdSMark Fasheh { 766ccd979bdSMark Fasheh mlog_entry_void(); 767ccd979bdSMark Fasheh 768ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 769ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 770ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 771bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 772ccd979bdSMark Fasheh 773ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 774ccd979bdSMark Fasheh if (lockres->l_level <= 775ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 776bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 777ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 778ccd979bdSMark Fasheh } 779ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 780ccd979bdSMark Fasheh 781ccd979bdSMark Fasheh mlog_exit_void(); 782ccd979bdSMark Fasheh } 783ccd979bdSMark Fasheh 784ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 785ccd979bdSMark Fasheh { 786ccd979bdSMark Fasheh mlog_entry_void(); 787ccd979bdSMark Fasheh 788ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 789ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 790ccd979bdSMark Fasheh 791ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 792ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 793ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 794ccd979bdSMark Fasheh * update */ 795bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 796f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 797ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 798ccd979bdSMark Fasheh 799ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 800ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 801ccd979bdSMark Fasheh 802ccd979bdSMark Fasheh mlog_exit_void(); 803ccd979bdSMark Fasheh } 804ccd979bdSMark Fasheh 805ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 806ccd979bdSMark Fasheh { 807ccd979bdSMark Fasheh mlog_entry_void(); 808ccd979bdSMark Fasheh 8093cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 810ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 811ccd979bdSMark Fasheh 812bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 813f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 814f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 815ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 816ccd979bdSMark Fasheh 817ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 818ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 819ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 820ccd979bdSMark Fasheh 821ccd979bdSMark Fasheh mlog_exit_void(); 822ccd979bdSMark Fasheh } 823ccd979bdSMark Fasheh 824ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 825ccd979bdSMark Fasheh int level) 826ccd979bdSMark Fasheh { 827ccd979bdSMark Fasheh int needs_downconvert = 0; 828ccd979bdSMark Fasheh mlog_entry_void(); 829ccd979bdSMark Fasheh 830ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 831ccd979bdSMark Fasheh 832ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 833ccd979bdSMark Fasheh 834ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 835ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 836ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 837ccd979bdSMark Fasheh * blocking. this also catches the case where we get 838ccd979bdSMark Fasheh * duplicate BASTs */ 839ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 840ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 841ccd979bdSMark Fasheh needs_downconvert = 1; 842ccd979bdSMark Fasheh 843ccd979bdSMark Fasheh lockres->l_blocking = level; 844ccd979bdSMark Fasheh } 845ccd979bdSMark Fasheh 846ccd979bdSMark Fasheh mlog_exit(needs_downconvert); 847ccd979bdSMark Fasheh return needs_downconvert; 848ccd979bdSMark Fasheh } 849ccd979bdSMark Fasheh 850de551246SJoel Becker /* 851de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 852de551246SJoel Becker * 853de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 854de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 855de551246SJoel Becker * for more details on the race. 856de551246SJoel Becker * 857de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 858de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 859de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 860de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 861de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 862de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 863de551246SJoel Becker * nothing. 864de551246SJoel Becker * 865de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 866de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 867de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 868de551246SJoel Becker * window. 869de551246SJoel Becker * 870de551246SJoel Becker * [Example] 871de551246SJoel Becker * 872de551246SJoel Becker * ocfs2_meta_lock() 873de551246SJoel Becker * ocfs2_cluster_lock() 874de551246SJoel Becker * set BUSY 875de551246SJoel Becker * set PENDING 876de551246SJoel Becker * drop l_lock 877de551246SJoel Becker * ocfs2_dlm_lock() 878de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 879de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 880de551246SJoel Becker * take_l_lock 881de551246SJoel Becker * !BUSY 882de551246SJoel Becker * ocfs2_prepare_downconvert() 883de551246SJoel Becker * set BUSY 884de551246SJoel Becker * set PENDING 885de551246SJoel Becker * drop l_lock 886de551246SJoel Becker * take l_lock 887de551246SJoel Becker * clear PENDING 888de551246SJoel Becker * drop l_lock 889de551246SJoel Becker * <window> 890de551246SJoel Becker * ocfs2_dlm_lock() 891de551246SJoel Becker * 892de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 893de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 894de551246SJoel Becker * 895de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 896de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 897de551246SJoel Becker * 898de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 899de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 900de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 901de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 902de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 903de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 904de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 905de551246SJoel Becker * ocfs2_prepare_downconvert(). 906de551246SJoel Becker */ 907de551246SJoel Becker 908de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 909de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 910de551246SJoel Becker unsigned int generation, 911de551246SJoel Becker struct ocfs2_super *osb) 912de551246SJoel Becker { 913de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 914de551246SJoel Becker 915de551246SJoel Becker /* 916de551246SJoel Becker * The ast and locking functions can race us here. The winner 917de551246SJoel Becker * will clear pending, the loser will not. 918de551246SJoel Becker */ 919de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 920de551246SJoel Becker (lockres->l_pending_gen != generation)) 921de551246SJoel Becker return; 922de551246SJoel Becker 923de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 924de551246SJoel Becker lockres->l_pending_gen++; 925de551246SJoel Becker 926de551246SJoel Becker /* 927de551246SJoel Becker * The downconvert thread may have skipped us because we 928de551246SJoel Becker * were PENDING. Wake it up. 929de551246SJoel Becker */ 930de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 931de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 932de551246SJoel Becker } 933de551246SJoel Becker 934de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 935de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 936de551246SJoel Becker unsigned int generation, 937de551246SJoel Becker struct ocfs2_super *osb) 938de551246SJoel Becker { 939de551246SJoel Becker unsigned long flags; 940de551246SJoel Becker 941de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 942de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 943de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 944de551246SJoel Becker } 945de551246SJoel Becker 946de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 947de551246SJoel Becker { 948de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 949de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 950de551246SJoel Becker 951de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 952de551246SJoel Becker 953de551246SJoel Becker return lockres->l_pending_gen; 954de551246SJoel Becker } 955de551246SJoel Becker 956de551246SJoel Becker 957aa2623adSMark Fasheh static void ocfs2_blocking_ast(void *opaque, int level) 958ccd979bdSMark Fasheh { 959aa2623adSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 960aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 961ccd979bdSMark Fasheh int needs_downconvert; 962ccd979bdSMark Fasheh unsigned long flags; 963ccd979bdSMark Fasheh 964bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 965ccd979bdSMark Fasheh 966aa2623adSMark Fasheh mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 967aa2623adSMark Fasheh lockres->l_name, level, lockres->l_level, 968aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 969aa2623adSMark Fasheh 970cf8e06f1SMark Fasheh /* 971cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 972cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 973cf8e06f1SMark Fasheh */ 974cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 975cf8e06f1SMark Fasheh return; 976cf8e06f1SMark Fasheh 977ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 978ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 979ccd979bdSMark Fasheh if (needs_downconvert) 980ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 981ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 982ccd979bdSMark Fasheh 983d680efe9SMark Fasheh wake_up(&lockres->l_event); 984d680efe9SMark Fasheh 98534d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 986ccd979bdSMark Fasheh } 987ccd979bdSMark Fasheh 988e92d57dfSMark Fasheh static void ocfs2_locking_ast(void *opaque) 989ccd979bdSMark Fasheh { 990e92d57dfSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 991de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 992ccd979bdSMark Fasheh unsigned long flags; 9931693a5c0SDavid Teigland int status; 994ccd979bdSMark Fasheh 995ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 996ccd979bdSMark Fasheh 9971693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 9981693a5c0SDavid Teigland 9991693a5c0SDavid Teigland if (status == -EAGAIN) { 10001693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 10011693a5c0SDavid Teigland goto out; 10021693a5c0SDavid Teigland } 10031693a5c0SDavid Teigland 10041693a5c0SDavid Teigland if (status) { 10058f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 10061693a5c0SDavid Teigland lockres->l_name, status); 1007ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1008ccd979bdSMark Fasheh return; 1009ccd979bdSMark Fasheh } 1010ccd979bdSMark Fasheh 1011ccd979bdSMark Fasheh switch(lockres->l_action) { 1012ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 1013ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 1014e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1015ccd979bdSMark Fasheh break; 1016ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 1017ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 1018ccd979bdSMark Fasheh break; 1019ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 1020ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 1021ccd979bdSMark Fasheh break; 1022ccd979bdSMark Fasheh default: 1023e92d57dfSMark Fasheh mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 1024e92d57dfSMark Fasheh "lockres flags = 0x%lx, unlock action: %u\n", 1025e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 1026e92d57dfSMark Fasheh lockres->l_unlock_action); 1027ccd979bdSMark Fasheh BUG(); 1028ccd979bdSMark Fasheh } 10291693a5c0SDavid Teigland out: 1030ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 1031ccd979bdSMark Fasheh * can catch it. */ 1032ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1033ccd979bdSMark Fasheh 1034de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1035de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1036de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1037de551246SJoel Becker 1038de551246SJoel Becker /* 1039de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1040de551246SJoel Becker * know that dlm_lock() has been called :-) 1041de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1042de551246SJoel Becker * can use lockres->l_pending_gen. 1043de551246SJoel Becker */ 1044de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1045de551246SJoel Becker 1046ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1047d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1048ccd979bdSMark Fasheh } 1049ccd979bdSMark Fasheh 1050ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1051ccd979bdSMark Fasheh int convert) 1052ccd979bdSMark Fasheh { 1053ccd979bdSMark Fasheh unsigned long flags; 1054ccd979bdSMark Fasheh 1055ccd979bdSMark Fasheh mlog_entry_void(); 1056ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1057ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1058ccd979bdSMark Fasheh if (convert) 1059ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1060ccd979bdSMark Fasheh else 1061ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1062ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1063ccd979bdSMark Fasheh 1064ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1065ccd979bdSMark Fasheh mlog_exit_void(); 1066ccd979bdSMark Fasheh } 1067ccd979bdSMark Fasheh 1068ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1069ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1070ccd979bdSMark Fasheh * to do the right thing in that case. 1071ccd979bdSMark Fasheh */ 1072ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1073ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1074ccd979bdSMark Fasheh int level, 1075bd3e7610SJoel Becker u32 dlm_flags) 1076ccd979bdSMark Fasheh { 1077ccd979bdSMark Fasheh int ret = 0; 1078ccd979bdSMark Fasheh unsigned long flags; 1079de551246SJoel Becker unsigned int gen; 1080ccd979bdSMark Fasheh 1081ccd979bdSMark Fasheh mlog_entry_void(); 1082ccd979bdSMark Fasheh 1083bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1084ccd979bdSMark Fasheh dlm_flags); 1085ccd979bdSMark Fasheh 1086ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1087ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1088ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1089ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1090ccd979bdSMark Fasheh goto bail; 1091ccd979bdSMark Fasheh } 1092ccd979bdSMark Fasheh 1093ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1094ccd979bdSMark Fasheh lockres->l_requested = level; 1095ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1096de551246SJoel Becker gen = lockres_set_pending(lockres); 1097ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1098ccd979bdSMark Fasheh 10994670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1100ccd979bdSMark Fasheh level, 1101ccd979bdSMark Fasheh &lockres->l_lksb, 1102ccd979bdSMark Fasheh dlm_flags, 1103ccd979bdSMark Fasheh lockres->l_name, 1104f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 110524ef1815SJoel Becker lockres); 1106de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 11077431cd7eSJoel Becker if (ret) { 11087431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1109ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1110ccd979bdSMark Fasheh } 1111ccd979bdSMark Fasheh 11127431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1113ccd979bdSMark Fasheh 1114ccd979bdSMark Fasheh bail: 1115ccd979bdSMark Fasheh mlog_exit(ret); 1116ccd979bdSMark Fasheh return ret; 1117ccd979bdSMark Fasheh } 1118ccd979bdSMark Fasheh 1119ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1120ccd979bdSMark Fasheh int flag) 1121ccd979bdSMark Fasheh { 1122ccd979bdSMark Fasheh unsigned long flags; 1123ccd979bdSMark Fasheh int ret; 1124ccd979bdSMark Fasheh 1125ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1126ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1127ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1128ccd979bdSMark Fasheh 1129ccd979bdSMark Fasheh return ret; 1130ccd979bdSMark Fasheh } 1131ccd979bdSMark Fasheh 1132ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1133ccd979bdSMark Fasheh 1134ccd979bdSMark Fasheh { 1135ccd979bdSMark Fasheh wait_event(lockres->l_event, 1136ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1137ccd979bdSMark Fasheh } 1138ccd979bdSMark Fasheh 1139ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1140ccd979bdSMark Fasheh 1141ccd979bdSMark Fasheh { 1142ccd979bdSMark Fasheh wait_event(lockres->l_event, 1143ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1144ccd979bdSMark Fasheh } 1145ccd979bdSMark Fasheh 1146ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1147ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1148ccd979bdSMark Fasheh * level will be compatible with it. */ 1149ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1150ccd979bdSMark Fasheh int wanted) 1151ccd979bdSMark Fasheh { 1152ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1153ccd979bdSMark Fasheh 1154ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1155ccd979bdSMark Fasheh } 1156ccd979bdSMark Fasheh 1157ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1158ccd979bdSMark Fasheh { 1159ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1160ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 11618ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1162ccd979bdSMark Fasheh } 1163ccd979bdSMark Fasheh 1164ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1165ccd979bdSMark Fasheh { 1166ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1167ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1168ccd979bdSMark Fasheh INIT_COMPLETION(mw->mw_complete); 1169ccd979bdSMark Fasheh return mw->mw_status; 1170ccd979bdSMark Fasheh } 1171ccd979bdSMark Fasheh 1172ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1173ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1174ccd979bdSMark Fasheh unsigned long mask, 1175ccd979bdSMark Fasheh unsigned long goal) 1176ccd979bdSMark Fasheh { 1177ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1178ccd979bdSMark Fasheh 1179ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1180ccd979bdSMark Fasheh 1181ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1182ccd979bdSMark Fasheh mw->mw_mask = mask; 1183ccd979bdSMark Fasheh mw->mw_goal = goal; 1184ccd979bdSMark Fasheh } 1185ccd979bdSMark Fasheh 1186ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1187ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1188ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1189ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1190ccd979bdSMark Fasheh { 1191ccd979bdSMark Fasheh unsigned long flags; 1192ccd979bdSMark Fasheh int ret = 0; 1193ccd979bdSMark Fasheh 1194ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1195ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1196ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1197ccd979bdSMark Fasheh ret = -EBUSY; 1198ccd979bdSMark Fasheh 1199ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1200ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1201ccd979bdSMark Fasheh } 1202ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1203ccd979bdSMark Fasheh 1204ccd979bdSMark Fasheh return ret; 1205ccd979bdSMark Fasheh 1206ccd979bdSMark Fasheh } 1207ccd979bdSMark Fasheh 1208cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1209cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1210cf8e06f1SMark Fasheh { 1211cf8e06f1SMark Fasheh int ret; 1212cf8e06f1SMark Fasheh 1213cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1214cf8e06f1SMark Fasheh if (ret) 1215cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1216cf8e06f1SMark Fasheh else 1217cf8e06f1SMark Fasheh ret = mw->mw_status; 1218cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1219cf8e06f1SMark Fasheh INIT_COMPLETION(mw->mw_complete); 1220cf8e06f1SMark Fasheh return ret; 1221cf8e06f1SMark Fasheh } 1222cf8e06f1SMark Fasheh 1223ccd979bdSMark Fasheh static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1224ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1225ccd979bdSMark Fasheh int level, 1226bd3e7610SJoel Becker u32 lkm_flags, 1227ccd979bdSMark Fasheh int arg_flags) 1228ccd979bdSMark Fasheh { 1229ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1230ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1231ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1232ccd979bdSMark Fasheh unsigned long flags; 1233de551246SJoel Becker unsigned int gen; 12341693a5c0SDavid Teigland int noqueue_attempted = 0; 1235ccd979bdSMark Fasheh 1236ccd979bdSMark Fasheh mlog_entry_void(); 1237ccd979bdSMark Fasheh 1238ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1239ccd979bdSMark Fasheh 1240b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1241bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1242b80fc012SMark Fasheh 1243ccd979bdSMark Fasheh again: 1244ccd979bdSMark Fasheh wait = 0; 1245ccd979bdSMark Fasheh 1246ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1247ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1248ccd979bdSMark Fasheh goto out; 1249ccd979bdSMark Fasheh } 1250ccd979bdSMark Fasheh 1251ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1252ccd979bdSMark Fasheh 1253ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1254ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1255ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1256ccd979bdSMark Fasheh 1257ccd979bdSMark Fasheh /* We only compare against the currently granted level 1258ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1259ccd979bdSMark Fasheh * we'll get caught below. */ 1260ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1261ccd979bdSMark Fasheh level > lockres->l_level) { 1262ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1263ccd979bdSMark Fasheh * them. */ 1264ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1265ccd979bdSMark Fasheh wait = 1; 1266ccd979bdSMark Fasheh goto unlock; 1267ccd979bdSMark Fasheh } 1268ccd979bdSMark Fasheh 1269ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1270ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1271ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1272ccd979bdSMark Fasheh * another node */ 1273ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1274ccd979bdSMark Fasheh wait = 1; 1275ccd979bdSMark Fasheh goto unlock; 1276ccd979bdSMark Fasheh } 1277ccd979bdSMark Fasheh 1278ccd979bdSMark Fasheh if (level > lockres->l_level) { 12791693a5c0SDavid Teigland if (noqueue_attempted > 0) { 12801693a5c0SDavid Teigland ret = -EAGAIN; 12811693a5c0SDavid Teigland goto unlock; 12821693a5c0SDavid Teigland } 12831693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 12841693a5c0SDavid Teigland noqueue_attempted = 1; 12851693a5c0SDavid Teigland 1286ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1287ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1288ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1289ccd979bdSMark Fasheh 1290019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1291019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1292bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1293019d1b22SMark Fasheh } else { 1294ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1295bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1296019d1b22SMark Fasheh } 1297019d1b22SMark Fasheh 1298ccd979bdSMark Fasheh lockres->l_requested = level; 1299ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1300de551246SJoel Becker gen = lockres_set_pending(lockres); 1301ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1302ccd979bdSMark Fasheh 1303bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1304bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1305ccd979bdSMark Fasheh 1306ccd979bdSMark Fasheh mlog(0, "lock %s, convert from %d to level = %d\n", 1307ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1308ccd979bdSMark Fasheh 1309ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 13104670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1311ccd979bdSMark Fasheh level, 1312ccd979bdSMark Fasheh &lockres->l_lksb, 1313019d1b22SMark Fasheh lkm_flags, 1314ccd979bdSMark Fasheh lockres->l_name, 1315f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 131624ef1815SJoel Becker lockres); 1317de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 13187431cd7eSJoel Becker if (ret) { 13197431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 13207431cd7eSJoel Becker (ret != -EAGAIN)) { 132124ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 13227431cd7eSJoel Becker ret, lockres); 1323ccd979bdSMark Fasheh } 1324ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1325ccd979bdSMark Fasheh goto out; 1326ccd979bdSMark Fasheh } 1327ccd979bdSMark Fasheh 132824ef1815SJoel Becker mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", 1329ccd979bdSMark Fasheh lockres->l_name); 1330ccd979bdSMark Fasheh 1331ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1332ccd979bdSMark Fasheh * complete our work regardless. */ 1333ccd979bdSMark Fasheh catch_signals = 0; 1334ccd979bdSMark Fasheh 1335ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1336ccd979bdSMark Fasheh goto again; 1337ccd979bdSMark Fasheh } 1338ccd979bdSMark Fasheh 1339ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1340ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1341ccd979bdSMark Fasheh 1342ccd979bdSMark Fasheh ret = 0; 1343ccd979bdSMark Fasheh unlock: 1344ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1345ccd979bdSMark Fasheh out: 1346ccd979bdSMark Fasheh /* 1347ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1348ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1349ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1350ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1351ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1352ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1353ccd979bdSMark Fasheh */ 1354ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1355ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1356ccd979bdSMark Fasheh wait = 0; 1357ccd979bdSMark Fasheh if (lockres_remove_mask_waiter(lockres, &mw)) 1358ccd979bdSMark Fasheh ret = -EAGAIN; 1359ccd979bdSMark Fasheh else 1360ccd979bdSMark Fasheh goto again; 1361ccd979bdSMark Fasheh } 1362ccd979bdSMark Fasheh if (wait) { 1363ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1364ccd979bdSMark Fasheh if (ret == 0) 1365ccd979bdSMark Fasheh goto again; 1366ccd979bdSMark Fasheh mlog_errno(ret); 1367ccd979bdSMark Fasheh } 13688ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1369ccd979bdSMark Fasheh 1370ccd979bdSMark Fasheh mlog_exit(ret); 1371ccd979bdSMark Fasheh return ret; 1372ccd979bdSMark Fasheh } 1373ccd979bdSMark Fasheh 1374ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1375ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1376ccd979bdSMark Fasheh int level) 1377ccd979bdSMark Fasheh { 1378ccd979bdSMark Fasheh unsigned long flags; 1379ccd979bdSMark Fasheh 1380ccd979bdSMark Fasheh mlog_entry_void(); 1381ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1382ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 138334d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1384ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1385ccd979bdSMark Fasheh mlog_exit_void(); 1386ccd979bdSMark Fasheh } 1387ccd979bdSMark Fasheh 1388da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1389d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 139024c19ef4SMark Fasheh int ex, 139124c19ef4SMark Fasheh int local) 1392ccd979bdSMark Fasheh { 1393bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1394ccd979bdSMark Fasheh unsigned long flags; 1395bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1396ccd979bdSMark Fasheh 1397ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1398ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1399ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1400ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1401ccd979bdSMark Fasheh 140224c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1403ccd979bdSMark Fasheh } 1404ccd979bdSMark Fasheh 1405ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1406ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1407ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1408ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1409ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1410ccd979bdSMark Fasheh * with creating a new lock resource. */ 1411ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1412ccd979bdSMark Fasheh { 1413ccd979bdSMark Fasheh int ret; 1414d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1415ccd979bdSMark Fasheh 1416ccd979bdSMark Fasheh BUG_ON(!inode); 1417ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1418ccd979bdSMark Fasheh 1419ccd979bdSMark Fasheh mlog_entry_void(); 1420ccd979bdSMark Fasheh 1421b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1422ccd979bdSMark Fasheh 1423ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1424ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1425ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1426ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1427ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1428ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1429ccd979bdSMark Fasheh * valid when we release the EX. */ 1430ccd979bdSMark Fasheh 143124c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1432ccd979bdSMark Fasheh if (ret) { 1433ccd979bdSMark Fasheh mlog_errno(ret); 1434ccd979bdSMark Fasheh goto bail; 1435ccd979bdSMark Fasheh } 1436ccd979bdSMark Fasheh 143724c19ef4SMark Fasheh /* 1438bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 143924c19ef4SMark Fasheh * don't use a generation in their lock names. 144024c19ef4SMark Fasheh */ 1441e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1442ccd979bdSMark Fasheh if (ret) { 1443ccd979bdSMark Fasheh mlog_errno(ret); 1444ccd979bdSMark Fasheh goto bail; 1445ccd979bdSMark Fasheh } 1446ccd979bdSMark Fasheh 144750008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 144850008630STiger Yang if (ret) { 144950008630STiger Yang mlog_errno(ret); 145050008630STiger Yang goto bail; 145150008630STiger Yang } 145250008630STiger Yang 1453ccd979bdSMark Fasheh bail: 1454ccd979bdSMark Fasheh mlog_exit(ret); 1455ccd979bdSMark Fasheh return ret; 1456ccd979bdSMark Fasheh } 1457ccd979bdSMark Fasheh 1458ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1459ccd979bdSMark Fasheh { 1460ccd979bdSMark Fasheh int status, level; 1461ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1462c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1463ccd979bdSMark Fasheh 1464ccd979bdSMark Fasheh BUG_ON(!inode); 1465ccd979bdSMark Fasheh 1466ccd979bdSMark Fasheh mlog_entry_void(); 1467ccd979bdSMark Fasheh 1468b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1469b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1470ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1471ccd979bdSMark Fasheh 1472c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1473c271c5c2SSunil Mushran return 0; 1474c271c5c2SSunil Mushran 1475ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1476ccd979bdSMark Fasheh 1477bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1478ccd979bdSMark Fasheh 1479ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1480ccd979bdSMark Fasheh 0); 1481ccd979bdSMark Fasheh if (status < 0) 1482ccd979bdSMark Fasheh mlog_errno(status); 1483ccd979bdSMark Fasheh 1484ccd979bdSMark Fasheh mlog_exit(status); 1485ccd979bdSMark Fasheh return status; 1486ccd979bdSMark Fasheh } 1487ccd979bdSMark Fasheh 1488ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1489ccd979bdSMark Fasheh { 1490bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1491ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1492c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1493ccd979bdSMark Fasheh 1494ccd979bdSMark Fasheh mlog_entry_void(); 1495ccd979bdSMark Fasheh 1496b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1497b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1498ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1499ccd979bdSMark Fasheh 1500c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1501ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1502ccd979bdSMark Fasheh 1503ccd979bdSMark Fasheh mlog_exit_void(); 1504ccd979bdSMark Fasheh } 1505ccd979bdSMark Fasheh 150650008630STiger Yang /* 150750008630STiger Yang * ocfs2_open_lock always get PR mode lock. 150850008630STiger Yang */ 150950008630STiger Yang int ocfs2_open_lock(struct inode *inode) 151050008630STiger Yang { 151150008630STiger Yang int status = 0; 151250008630STiger Yang struct ocfs2_lock_res *lockres; 151350008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 151450008630STiger Yang 151550008630STiger Yang BUG_ON(!inode); 151650008630STiger Yang 151750008630STiger Yang mlog_entry_void(); 151850008630STiger Yang 151950008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 152050008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 152150008630STiger Yang 152250008630STiger Yang if (ocfs2_mount_local(osb)) 152350008630STiger Yang goto out; 152450008630STiger Yang 152550008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 152650008630STiger Yang 152750008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1528bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 152950008630STiger Yang if (status < 0) 153050008630STiger Yang mlog_errno(status); 153150008630STiger Yang 153250008630STiger Yang out: 153350008630STiger Yang mlog_exit(status); 153450008630STiger Yang return status; 153550008630STiger Yang } 153650008630STiger Yang 153750008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 153850008630STiger Yang { 153950008630STiger Yang int status = 0, level; 154050008630STiger Yang struct ocfs2_lock_res *lockres; 154150008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 154250008630STiger Yang 154350008630STiger Yang BUG_ON(!inode); 154450008630STiger Yang 154550008630STiger Yang mlog_entry_void(); 154650008630STiger Yang 154750008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 154850008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 154950008630STiger Yang write ? "EXMODE" : "PRMODE"); 155050008630STiger Yang 155150008630STiger Yang if (ocfs2_mount_local(osb)) 155250008630STiger Yang goto out; 155350008630STiger Yang 155450008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 155550008630STiger Yang 1556bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 155750008630STiger Yang 155850008630STiger Yang /* 155950008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1560bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 156150008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 156250008630STiger Yang * this inode is still in use. 156350008630STiger Yang */ 156450008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1565bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 156650008630STiger Yang 156750008630STiger Yang out: 156850008630STiger Yang mlog_exit(status); 156950008630STiger Yang return status; 157050008630STiger Yang } 157150008630STiger Yang 157250008630STiger Yang /* 157350008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 157450008630STiger Yang */ 157550008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 157650008630STiger Yang { 157750008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 157850008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 157950008630STiger Yang 158050008630STiger Yang mlog_entry_void(); 158150008630STiger Yang 158250008630STiger Yang mlog(0, "inode %llu drop open lock\n", 158350008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 158450008630STiger Yang 158550008630STiger Yang if (ocfs2_mount_local(osb)) 158650008630STiger Yang goto out; 158750008630STiger Yang 158850008630STiger Yang if(lockres->l_ro_holders) 158950008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1590bd3e7610SJoel Becker DLM_LOCK_PR); 159150008630STiger Yang if(lockres->l_ex_holders) 159250008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1593bd3e7610SJoel Becker DLM_LOCK_EX); 159450008630STiger Yang 159550008630STiger Yang out: 159650008630STiger Yang mlog_exit_void(); 159750008630STiger Yang } 159850008630STiger Yang 1599cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1600cf8e06f1SMark Fasheh int level) 1601cf8e06f1SMark Fasheh { 1602cf8e06f1SMark Fasheh int ret; 1603cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1604cf8e06f1SMark Fasheh unsigned long flags; 1605cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1606cf8e06f1SMark Fasheh 1607cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1608cf8e06f1SMark Fasheh 1609cf8e06f1SMark Fasheh retry_cancel: 1610cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1611cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1612cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1613cf8e06f1SMark Fasheh if (ret) { 1614cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1615cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1616cf8e06f1SMark Fasheh if (ret < 0) { 1617cf8e06f1SMark Fasheh mlog_errno(ret); 1618cf8e06f1SMark Fasheh goto out; 1619cf8e06f1SMark Fasheh } 1620cf8e06f1SMark Fasheh goto retry_cancel; 1621cf8e06f1SMark Fasheh } 1622cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1623cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1624cf8e06f1SMark Fasheh 1625cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1626cf8e06f1SMark Fasheh goto retry_cancel; 1627cf8e06f1SMark Fasheh } 1628cf8e06f1SMark Fasheh 1629cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1630cf8e06f1SMark Fasheh /* 1631cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1632cf8e06f1SMark Fasheh * point to restarting the syscall. 1633cf8e06f1SMark Fasheh */ 1634cf8e06f1SMark Fasheh if (lockres->l_level == level) 1635cf8e06f1SMark Fasheh ret = 0; 1636cf8e06f1SMark Fasheh 1637cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1638cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1639cf8e06f1SMark Fasheh 1640cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1641cf8e06f1SMark Fasheh 1642cf8e06f1SMark Fasheh out: 1643cf8e06f1SMark Fasheh return ret; 1644cf8e06f1SMark Fasheh } 1645cf8e06f1SMark Fasheh 1646cf8e06f1SMark Fasheh /* 1647cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1648cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1649cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 1650cf8e06f1SMark Fasheh * seperate path to the "low-level" dlm calls. In particular: 1651cf8e06f1SMark Fasheh * 1652cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1653cf8e06f1SMark Fasheh * what's been requested. 1654cf8e06f1SMark Fasheh * 1655cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1656cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1657cf8e06f1SMark Fasheh * the blocking list). 1658cf8e06f1SMark Fasheh * 1659cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1660cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1661cf8e06f1SMark Fasheh * request. 1662cf8e06f1SMark Fasheh * 1663cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1664cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1665cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1666cf8e06f1SMark Fasheh */ 1667cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1668cf8e06f1SMark Fasheh { 1669e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1670e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1671cf8e06f1SMark Fasheh unsigned long flags; 1672cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1673cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1674cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1675cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1676cf8e06f1SMark Fasheh 1677cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1678cf8e06f1SMark Fasheh 1679cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1680bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1681cf8e06f1SMark Fasheh mlog(ML_ERROR, 1682cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1683cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1684cf8e06f1SMark Fasheh lockres->l_level); 1685cf8e06f1SMark Fasheh return -EINVAL; 1686cf8e06f1SMark Fasheh } 1687cf8e06f1SMark Fasheh 1688cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1689cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1690cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1691cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1692cf8e06f1SMark Fasheh 1693cf8e06f1SMark Fasheh /* 1694cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1695cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1696cf8e06f1SMark Fasheh */ 1697e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1698cf8e06f1SMark Fasheh if (ret < 0) { 1699cf8e06f1SMark Fasheh mlog_errno(ret); 1700cf8e06f1SMark Fasheh goto out; 1701cf8e06f1SMark Fasheh } 1702cf8e06f1SMark Fasheh 1703cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1704cf8e06f1SMark Fasheh if (ret) { 1705cf8e06f1SMark Fasheh mlog_errno(ret); 1706cf8e06f1SMark Fasheh goto out; 1707cf8e06f1SMark Fasheh } 1708cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1709cf8e06f1SMark Fasheh } 1710cf8e06f1SMark Fasheh 1711cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1712e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 1713cf8e06f1SMark Fasheh lockres->l_requested = level; 1714cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1715cf8e06f1SMark Fasheh 1716cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1717cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1718cf8e06f1SMark Fasheh 17194670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1720cf8e06f1SMark Fasheh lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, 172124ef1815SJoel Becker lockres); 17227431cd7eSJoel Becker if (ret) { 17237431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 172424ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1725cf8e06f1SMark Fasheh ret = -EINVAL; 1726cf8e06f1SMark Fasheh } 1727cf8e06f1SMark Fasheh 1728cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1729cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1730cf8e06f1SMark Fasheh goto out; 1731cf8e06f1SMark Fasheh } 1732cf8e06f1SMark Fasheh 1733cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1734cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1735cf8e06f1SMark Fasheh /* 1736cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1737cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1738cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1739cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1740cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1741cf8e06f1SMark Fasheh * reboot. 1742cf8e06f1SMark Fasheh * 1743cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1744cf8e06f1SMark Fasheh * though. We can't exit this function with an 1745cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1746cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1747cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1748cf8e06f1SMark Fasheh * to just bubble sucess back up to the user. 1749cf8e06f1SMark Fasheh */ 1750cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 17511693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 17521693a5c0SDavid Teigland /* Trylock failed asynchronously */ 17531693a5c0SDavid Teigland BUG_ON(!trylock); 17541693a5c0SDavid Teigland ret = -EAGAIN; 1755cf8e06f1SMark Fasheh } 1756cf8e06f1SMark Fasheh 1757cf8e06f1SMark Fasheh out: 1758cf8e06f1SMark Fasheh 1759cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1760cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 1761cf8e06f1SMark Fasheh return ret; 1762cf8e06f1SMark Fasheh } 1763cf8e06f1SMark Fasheh 1764cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 1765cf8e06f1SMark Fasheh { 1766cf8e06f1SMark Fasheh int ret; 1767de551246SJoel Becker unsigned int gen; 1768cf8e06f1SMark Fasheh unsigned long flags; 1769cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1770cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1771cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1772cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1773cf8e06f1SMark Fasheh 1774cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1775cf8e06f1SMark Fasheh 1776cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1777cf8e06f1SMark Fasheh return; 1778cf8e06f1SMark Fasheh 1779e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 1780cf8e06f1SMark Fasheh return; 1781cf8e06f1SMark Fasheh 1782cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1783cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 1784cf8e06f1SMark Fasheh lockres->l_action); 1785cf8e06f1SMark Fasheh 1786cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1787cf8e06f1SMark Fasheh /* 1788cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 1789cf8e06f1SMark Fasheh */ 1790cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1791bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 1792cf8e06f1SMark Fasheh 1793e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 1794cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1795cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1796cf8e06f1SMark Fasheh 1797e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 1798cf8e06f1SMark Fasheh if (ret) { 1799cf8e06f1SMark Fasheh mlog_errno(ret); 1800cf8e06f1SMark Fasheh return; 1801cf8e06f1SMark Fasheh } 1802cf8e06f1SMark Fasheh 1803cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1804cf8e06f1SMark Fasheh if (ret) 1805cf8e06f1SMark Fasheh mlog_errno(ret); 1806cf8e06f1SMark Fasheh } 1807cf8e06f1SMark Fasheh 180834d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1809ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 1810ccd979bdSMark Fasheh { 1811ccd979bdSMark Fasheh int kick = 0; 1812ccd979bdSMark Fasheh 1813ccd979bdSMark Fasheh mlog_entry_void(); 1814ccd979bdSMark Fasheh 1815ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 181634d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 1817ccd979bdSMark Fasheh * condition. */ 1818ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1819ccd979bdSMark Fasheh switch(lockres->l_blocking) { 1820bd3e7610SJoel Becker case DLM_LOCK_EX: 1821ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1822ccd979bdSMark Fasheh kick = 1; 1823ccd979bdSMark Fasheh break; 1824bd3e7610SJoel Becker case DLM_LOCK_PR: 1825ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 1826ccd979bdSMark Fasheh kick = 1; 1827ccd979bdSMark Fasheh break; 1828ccd979bdSMark Fasheh default: 1829ccd979bdSMark Fasheh BUG(); 1830ccd979bdSMark Fasheh } 1831ccd979bdSMark Fasheh } 1832ccd979bdSMark Fasheh 1833ccd979bdSMark Fasheh if (kick) 183434d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1835ccd979bdSMark Fasheh 1836ccd979bdSMark Fasheh mlog_exit_void(); 1837ccd979bdSMark Fasheh } 1838ccd979bdSMark Fasheh 1839ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 1840ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 1841ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1842ccd979bdSMark Fasheh 1843ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 1844ccd979bdSMark Fasheh * now. */ 1845ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 1846ccd979bdSMark Fasheh { 1847ccd979bdSMark Fasheh u64 res; 1848ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 1849ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 1850ccd979bdSMark Fasheh 1851ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1852ccd979bdSMark Fasheh 1853ccd979bdSMark Fasheh return res; 1854ccd979bdSMark Fasheh } 1855ccd979bdSMark Fasheh 1856ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 1857ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 1858e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 1859ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 1860ccd979bdSMark Fasheh { 1861ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1862e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1863ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1864ccd979bdSMark Fasheh 1865ccd979bdSMark Fasheh mlog_entry_void(); 1866ccd979bdSMark Fasheh 18678f2c9c1bSJoel Becker lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1868ccd979bdSMark Fasheh 186924c19ef4SMark Fasheh /* 187024c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 187124c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 187224c19ef4SMark Fasheh * status. 187324c19ef4SMark Fasheh */ 187424c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 187524c19ef4SMark Fasheh lvb->lvb_version = 0; 187624c19ef4SMark Fasheh goto out; 187724c19ef4SMark Fasheh } 187824c19ef4SMark Fasheh 18794d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 1880ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1881ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1882ccd979bdSMark Fasheh lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1883ccd979bdSMark Fasheh lvb->lvb_igid = cpu_to_be32(inode->i_gid); 1884ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 1885ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 1886ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 1887ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 1888ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 1889ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1890ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 1891ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1892ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 189315b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 1894f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1895ccd979bdSMark Fasheh 189624c19ef4SMark Fasheh out: 1897ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1898ccd979bdSMark Fasheh 1899ccd979bdSMark Fasheh mlog_exit_void(); 1900ccd979bdSMark Fasheh } 1901ccd979bdSMark Fasheh 1902ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 1903ccd979bdSMark Fasheh u64 packed_time) 1904ccd979bdSMark Fasheh { 1905ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 1906ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 1907ccd979bdSMark Fasheh } 1908ccd979bdSMark Fasheh 1909ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 1910ccd979bdSMark Fasheh { 1911ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1912e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1913ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1914ccd979bdSMark Fasheh 1915ccd979bdSMark Fasheh mlog_entry_void(); 1916ccd979bdSMark Fasheh 1917ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1918ccd979bdSMark Fasheh 19198f2c9c1bSJoel Becker lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1920ccd979bdSMark Fasheh 1921ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 1922ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 1923ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1924ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1925ccd979bdSMark Fasheh 1926ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 192715b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 1928ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 1929ca4d147eSHerbert Poetzl 1930ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 1931ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1932ccd979bdSMark Fasheh inode->i_blocks = 0; 1933ccd979bdSMark Fasheh else 19348110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 1935ccd979bdSMark Fasheh 1936ccd979bdSMark Fasheh inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 1937ccd979bdSMark Fasheh inode->i_gid = be32_to_cpu(lvb->lvb_igid); 1938ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 1939ccd979bdSMark Fasheh inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 1940ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 1941ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 1942ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 1943ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 1944ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 1945ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 1946ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1947ccd979bdSMark Fasheh 1948ccd979bdSMark Fasheh mlog_exit_void(); 1949ccd979bdSMark Fasheh } 1950ccd979bdSMark Fasheh 1951f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1952f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 1953ccd979bdSMark Fasheh { 19548f2c9c1bSJoel Becker struct ocfs2_meta_lvb *lvb = 19558f2c9c1bSJoel Becker (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1956ccd979bdSMark Fasheh 1957f9e2d82eSMark Fasheh if (lvb->lvb_version == OCFS2_LVB_VERSION 1958f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1959ccd979bdSMark Fasheh return 1; 1960ccd979bdSMark Fasheh return 0; 1961ccd979bdSMark Fasheh } 1962ccd979bdSMark Fasheh 1963ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 1964ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 1965ccd979bdSMark Fasheh * 1966ccd979bdSMark Fasheh * 0 means no refresh needed. 1967ccd979bdSMark Fasheh * 1968ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 1969ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 1970ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 1971ccd979bdSMark Fasheh { 1972ccd979bdSMark Fasheh unsigned long flags; 1973ccd979bdSMark Fasheh int status = 0; 1974ccd979bdSMark Fasheh 1975ccd979bdSMark Fasheh mlog_entry_void(); 1976ccd979bdSMark Fasheh 1977ccd979bdSMark Fasheh refresh_check: 1978ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1979ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 1980ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1981ccd979bdSMark Fasheh goto bail; 1982ccd979bdSMark Fasheh } 1983ccd979bdSMark Fasheh 1984ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 1985ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1986ccd979bdSMark Fasheh 1987ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 1988ccd979bdSMark Fasheh goto refresh_check; 1989ccd979bdSMark Fasheh } 1990ccd979bdSMark Fasheh 1991ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 1992ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 1993ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1994ccd979bdSMark Fasheh 1995ccd979bdSMark Fasheh status = 1; 1996ccd979bdSMark Fasheh bail: 1997ccd979bdSMark Fasheh mlog_exit(status); 1998ccd979bdSMark Fasheh return status; 1999ccd979bdSMark Fasheh } 2000ccd979bdSMark Fasheh 2001ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 2002ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 2003ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2004ccd979bdSMark Fasheh int status) 2005ccd979bdSMark Fasheh { 2006ccd979bdSMark Fasheh unsigned long flags; 2007ccd979bdSMark Fasheh mlog_entry_void(); 2008ccd979bdSMark Fasheh 2009ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2010ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2011ccd979bdSMark Fasheh if (!status) 2012ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2013ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2014ccd979bdSMark Fasheh 2015ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2016ccd979bdSMark Fasheh 2017ccd979bdSMark Fasheh mlog_exit_void(); 2018ccd979bdSMark Fasheh } 2019ccd979bdSMark Fasheh 2020ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 2021e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 2022ccd979bdSMark Fasheh struct buffer_head **bh) 2023ccd979bdSMark Fasheh { 2024ccd979bdSMark Fasheh int status = 0; 2025ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2026e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2027ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 2028c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2029ccd979bdSMark Fasheh 2030ccd979bdSMark Fasheh mlog_entry_void(); 2031ccd979bdSMark Fasheh 2032be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 2033be9e986bSMark Fasheh goto bail; 2034be9e986bSMark Fasheh 2035ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2036ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2037b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2038ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2039b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2040ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2041ccd979bdSMark Fasheh status = -ENOENT; 2042ccd979bdSMark Fasheh goto bail; 2043ccd979bdSMark Fasheh } 2044ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2045ccd979bdSMark Fasheh 2046ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2047ccd979bdSMark Fasheh goto bail; 2048ccd979bdSMark Fasheh 2049ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2050ccd979bdSMark Fasheh * for the inode metadata. */ 2051ccd979bdSMark Fasheh ocfs2_metadata_cache_purge(inode); 2052ccd979bdSMark Fasheh 205383418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 205483418978SMark Fasheh 2055be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2056b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2057b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2058ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2059ccd979bdSMark Fasheh } else { 2060ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2061ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2062b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2063ccd979bdSMark Fasheh if (status < 0) { 2064ccd979bdSMark Fasheh mlog_errno(status); 2065ccd979bdSMark Fasheh goto bail_refresh; 2066ccd979bdSMark Fasheh } 2067ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2068ccd979bdSMark Fasheh 2069ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2070b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2071b657c95cSJoel Becker * already checked that the inode block is sane. 2072ccd979bdSMark Fasheh * 2073ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2074ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2075ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2076ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2077ccd979bdSMark Fasheh * locks associated with it. */ 2078ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2079ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2080b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2081ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2082b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2083b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2084ccd979bdSMark Fasheh inode->i_generation); 2085ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2086ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2087b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2088b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2089b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2090ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2091ccd979bdSMark Fasheh 2092ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 20938ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2094ccd979bdSMark Fasheh } 2095ccd979bdSMark Fasheh 2096ccd979bdSMark Fasheh status = 0; 2097ccd979bdSMark Fasheh bail_refresh: 2098ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2099ccd979bdSMark Fasheh bail: 2100ccd979bdSMark Fasheh mlog_exit(status); 2101ccd979bdSMark Fasheh return status; 2102ccd979bdSMark Fasheh } 2103ccd979bdSMark Fasheh 2104ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2105ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2106ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2107ccd979bdSMark Fasheh { 2108ccd979bdSMark Fasheh int status; 2109ccd979bdSMark Fasheh 2110ccd979bdSMark Fasheh if (passed_bh) { 2111ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2112ccd979bdSMark Fasheh * returned bh. */ 2113ccd979bdSMark Fasheh *ret_bh = passed_bh; 2114ccd979bdSMark Fasheh get_bh(*ret_bh); 2115ccd979bdSMark Fasheh 2116ccd979bdSMark Fasheh return 0; 2117ccd979bdSMark Fasheh } 2118ccd979bdSMark Fasheh 2119b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2120ccd979bdSMark Fasheh if (status < 0) 2121ccd979bdSMark Fasheh mlog_errno(status); 2122ccd979bdSMark Fasheh 2123ccd979bdSMark Fasheh return status; 2124ccd979bdSMark Fasheh } 2125ccd979bdSMark Fasheh 2126ccd979bdSMark Fasheh /* 2127ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2128ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2129ccd979bdSMark Fasheh */ 2130e63aecb6SMark Fasheh int ocfs2_inode_lock_full(struct inode *inode, 2131ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2132ccd979bdSMark Fasheh int ex, 2133ccd979bdSMark Fasheh int arg_flags) 2134ccd979bdSMark Fasheh { 2135bd3e7610SJoel Becker int status, level, acquired; 2136bd3e7610SJoel Becker u32 dlm_flags; 2137c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2138ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2139ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2140ccd979bdSMark Fasheh 2141ccd979bdSMark Fasheh BUG_ON(!inode); 2142ccd979bdSMark Fasheh 2143ccd979bdSMark Fasheh mlog_entry_void(); 2144ccd979bdSMark Fasheh 2145b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2146b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2147ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2148ccd979bdSMark Fasheh 2149ccd979bdSMark Fasheh status = 0; 2150ccd979bdSMark Fasheh acquired = 0; 2151ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2152ccd979bdSMark Fasheh * rodevices. */ 2153ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2154ccd979bdSMark Fasheh if (ex) 2155ccd979bdSMark Fasheh status = -EROFS; 2156ccd979bdSMark Fasheh goto bail; 2157ccd979bdSMark Fasheh } 2158ccd979bdSMark Fasheh 2159c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2160c271c5c2SSunil Mushran goto local; 2161c271c5c2SSunil Mushran 2162ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2163553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2164ccd979bdSMark Fasheh 2165e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2166bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2167ccd979bdSMark Fasheh dlm_flags = 0; 2168ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2169bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2170ccd979bdSMark Fasheh 2171ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2172ccd979bdSMark Fasheh if (status < 0) { 2173ccd979bdSMark Fasheh if (status != -EAGAIN && status != -EIOCBRETRY) 2174ccd979bdSMark Fasheh mlog_errno(status); 2175ccd979bdSMark Fasheh goto bail; 2176ccd979bdSMark Fasheh } 2177ccd979bdSMark Fasheh 2178ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2179ccd979bdSMark Fasheh acquired = 1; 2180ccd979bdSMark Fasheh 2181ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2182ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2183ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2184ccd979bdSMark Fasheh * abort the operation. */ 2185ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2186553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2187ccd979bdSMark Fasheh 2188c271c5c2SSunil Mushran local: 218924c19ef4SMark Fasheh /* 219024c19ef4SMark Fasheh * We only see this flag if we're being called from 219124c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 219224c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 219324c19ef4SMark Fasheh * and let the caller handle it. 219424c19ef4SMark Fasheh */ 219524c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 219624c19ef4SMark Fasheh status = 0; 2197c271c5c2SSunil Mushran if (lockres) 219824c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 219924c19ef4SMark Fasheh goto bail; 220024c19ef4SMark Fasheh } 220124c19ef4SMark Fasheh 2202ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2203e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2204ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2205ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2206ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2207e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2208ccd979bdSMark Fasheh if (status < 0) { 2209ccd979bdSMark Fasheh if (status != -ENOENT) 2210ccd979bdSMark Fasheh mlog_errno(status); 2211ccd979bdSMark Fasheh goto bail; 2212ccd979bdSMark Fasheh } 2213ccd979bdSMark Fasheh 2214ccd979bdSMark Fasheh if (ret_bh) { 2215ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2216ccd979bdSMark Fasheh if (status < 0) { 2217ccd979bdSMark Fasheh mlog_errno(status); 2218ccd979bdSMark Fasheh goto bail; 2219ccd979bdSMark Fasheh } 2220ccd979bdSMark Fasheh } 2221ccd979bdSMark Fasheh 2222ccd979bdSMark Fasheh bail: 2223ccd979bdSMark Fasheh if (status < 0) { 2224ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2225ccd979bdSMark Fasheh brelse(*ret_bh); 2226ccd979bdSMark Fasheh *ret_bh = NULL; 2227ccd979bdSMark Fasheh } 2228ccd979bdSMark Fasheh if (acquired) 2229e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2230ccd979bdSMark Fasheh } 2231ccd979bdSMark Fasheh 2232ccd979bdSMark Fasheh if (local_bh) 2233ccd979bdSMark Fasheh brelse(local_bh); 2234ccd979bdSMark Fasheh 2235ccd979bdSMark Fasheh mlog_exit(status); 2236ccd979bdSMark Fasheh return status; 2237ccd979bdSMark Fasheh } 2238ccd979bdSMark Fasheh 2239ccd979bdSMark Fasheh /* 224034d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 224134d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 224234d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2243ccd979bdSMark Fasheh * 2244ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2245ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2246ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2247ccd979bdSMark Fasheh * 224834d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 224934d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 225034d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 225134d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 225234d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 225334d024f8SMark Fasheh * immediately retry the aop call. 2254ccd979bdSMark Fasheh * 2255ccd979bdSMark Fasheh * We do a blocking lock and immediate unlock before returning, though, so that 2256ccd979bdSMark Fasheh * the lock has a great chance of being cached on this node by the time the VFS 2257ccd979bdSMark Fasheh * calls back to retry the aop. This has a potential to livelock as nodes 2258ccd979bdSMark Fasheh * ping locks back and forth, but that's a risk we're willing to take to avoid 2259ccd979bdSMark Fasheh * the lock inversion simply. 2260ccd979bdSMark Fasheh */ 2261e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2262ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2263ccd979bdSMark Fasheh int ex, 2264ccd979bdSMark Fasheh struct page *page) 2265ccd979bdSMark Fasheh { 2266ccd979bdSMark Fasheh int ret; 2267ccd979bdSMark Fasheh 2268e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2269ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2270ccd979bdSMark Fasheh unlock_page(page); 2271e63aecb6SMark Fasheh if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2272e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2273ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2274ccd979bdSMark Fasheh } 2275ccd979bdSMark Fasheh 2276ccd979bdSMark Fasheh return ret; 2277ccd979bdSMark Fasheh } 2278ccd979bdSMark Fasheh 2279e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 22807f1a37e3STiger Yang struct vfsmount *vfsmnt, 22817f1a37e3STiger Yang int *level) 22827f1a37e3STiger Yang { 22837f1a37e3STiger Yang int ret; 22847f1a37e3STiger Yang 22857f1a37e3STiger Yang mlog_entry_void(); 2286e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 22877f1a37e3STiger Yang if (ret < 0) { 22887f1a37e3STiger Yang mlog_errno(ret); 22897f1a37e3STiger Yang return ret; 22907f1a37e3STiger Yang } 22917f1a37e3STiger Yang 22927f1a37e3STiger Yang /* 22937f1a37e3STiger Yang * If we should update atime, we will get EX lock, 22947f1a37e3STiger Yang * otherwise we just get PR lock. 22957f1a37e3STiger Yang */ 22967f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 22977f1a37e3STiger Yang struct buffer_head *bh = NULL; 22987f1a37e3STiger Yang 2299e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2300e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 23017f1a37e3STiger Yang if (ret < 0) { 23027f1a37e3STiger Yang mlog_errno(ret); 23037f1a37e3STiger Yang return ret; 23047f1a37e3STiger Yang } 23057f1a37e3STiger Yang *level = 1; 23067f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 23077f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 23087f1a37e3STiger Yang if (bh) 23097f1a37e3STiger Yang brelse(bh); 23107f1a37e3STiger Yang } else 23117f1a37e3STiger Yang *level = 0; 23127f1a37e3STiger Yang 23137f1a37e3STiger Yang mlog_exit(ret); 23147f1a37e3STiger Yang return ret; 23157f1a37e3STiger Yang } 23167f1a37e3STiger Yang 2317e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2318ccd979bdSMark Fasheh int ex) 2319ccd979bdSMark Fasheh { 2320bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2321e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2322c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2323ccd979bdSMark Fasheh 2324ccd979bdSMark Fasheh mlog_entry_void(); 2325ccd979bdSMark Fasheh 2326b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2327b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2328ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2329ccd979bdSMark Fasheh 2330c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2331c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2332ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2333ccd979bdSMark Fasheh 2334ccd979bdSMark Fasheh mlog_exit_void(); 2335ccd979bdSMark Fasheh } 2336ccd979bdSMark Fasheh 2337ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2338ccd979bdSMark Fasheh int ex) 2339ccd979bdSMark Fasheh { 2340c271c5c2SSunil Mushran int status = 0; 2341bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2342ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2343ccd979bdSMark Fasheh 2344ccd979bdSMark Fasheh mlog_entry_void(); 2345ccd979bdSMark Fasheh 2346ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2347ccd979bdSMark Fasheh return -EROFS; 2348ccd979bdSMark Fasheh 2349c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2350c271c5c2SSunil Mushran goto bail; 2351c271c5c2SSunil Mushran 2352ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2353ccd979bdSMark Fasheh if (status < 0) { 2354ccd979bdSMark Fasheh mlog_errno(status); 2355ccd979bdSMark Fasheh goto bail; 2356ccd979bdSMark Fasheh } 2357ccd979bdSMark Fasheh 2358ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2359ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2360ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2361ccd979bdSMark Fasheh * everything is up to the caller :) */ 2362ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2363ccd979bdSMark Fasheh if (status < 0) { 2364ccd979bdSMark Fasheh mlog_errno(status); 2365ccd979bdSMark Fasheh goto bail; 2366ccd979bdSMark Fasheh } 2367ccd979bdSMark Fasheh if (status) { 23688e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2369ccd979bdSMark Fasheh 2370ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2371ccd979bdSMark Fasheh 2372ccd979bdSMark Fasheh if (status < 0) 2373ccd979bdSMark Fasheh mlog_errno(status); 23748ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2375ccd979bdSMark Fasheh } 2376ccd979bdSMark Fasheh bail: 2377ccd979bdSMark Fasheh mlog_exit(status); 2378ccd979bdSMark Fasheh return status; 2379ccd979bdSMark Fasheh } 2380ccd979bdSMark Fasheh 2381ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2382ccd979bdSMark Fasheh int ex) 2383ccd979bdSMark Fasheh { 2384bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2385ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2386ccd979bdSMark Fasheh 2387c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2388ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2389ccd979bdSMark Fasheh } 2390ccd979bdSMark Fasheh 2391ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2392ccd979bdSMark Fasheh { 2393ccd979bdSMark Fasheh int status; 2394ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2395ccd979bdSMark Fasheh 2396ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2397ccd979bdSMark Fasheh return -EROFS; 2398ccd979bdSMark Fasheh 2399c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2400c271c5c2SSunil Mushran return 0; 2401c271c5c2SSunil Mushran 2402bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2403ccd979bdSMark Fasheh if (status < 0) 2404ccd979bdSMark Fasheh mlog_errno(status); 2405ccd979bdSMark Fasheh 2406ccd979bdSMark Fasheh return status; 2407ccd979bdSMark Fasheh } 2408ccd979bdSMark Fasheh 2409ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2410ccd979bdSMark Fasheh { 2411ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2412ccd979bdSMark Fasheh 2413c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2414bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2415ccd979bdSMark Fasheh } 2416ccd979bdSMark Fasheh 2417d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2418d680efe9SMark Fasheh { 2419d680efe9SMark Fasheh int ret; 2420bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2421d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2422d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2423d680efe9SMark Fasheh 2424d680efe9SMark Fasheh BUG_ON(!dl); 2425d680efe9SMark Fasheh 2426d680efe9SMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2427d680efe9SMark Fasheh return -EROFS; 2428d680efe9SMark Fasheh 2429c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2430c271c5c2SSunil Mushran return 0; 2431c271c5c2SSunil Mushran 2432d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2433d680efe9SMark Fasheh if (ret < 0) 2434d680efe9SMark Fasheh mlog_errno(ret); 2435d680efe9SMark Fasheh 2436d680efe9SMark Fasheh return ret; 2437d680efe9SMark Fasheh } 2438d680efe9SMark Fasheh 2439d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2440d680efe9SMark Fasheh { 2441bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2442d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2443d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2444d680efe9SMark Fasheh 2445c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2446d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2447d680efe9SMark Fasheh } 2448d680efe9SMark Fasheh 2449ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2450ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2451ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2452ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2453ccd979bdSMark Fasheh { 2454ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2455ccd979bdSMark Fasheh 2456ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2457ccd979bdSMark Fasheh 2458ccd979bdSMark Fasheh kfree(dlm_debug); 2459ccd979bdSMark Fasheh } 2460ccd979bdSMark Fasheh 2461ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2462ccd979bdSMark Fasheh { 2463ccd979bdSMark Fasheh if (dlm_debug) 2464ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2465ccd979bdSMark Fasheh } 2466ccd979bdSMark Fasheh 2467ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2468ccd979bdSMark Fasheh { 2469ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2470ccd979bdSMark Fasheh } 2471ccd979bdSMark Fasheh 2472ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2473ccd979bdSMark Fasheh { 2474ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2475ccd979bdSMark Fasheh 2476ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2477ccd979bdSMark Fasheh if (!dlm_debug) { 2478ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2479ccd979bdSMark Fasheh goto out; 2480ccd979bdSMark Fasheh } 2481ccd979bdSMark Fasheh 2482ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2483ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2484ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2485ccd979bdSMark Fasheh out: 2486ccd979bdSMark Fasheh return dlm_debug; 2487ccd979bdSMark Fasheh } 2488ccd979bdSMark Fasheh 2489ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2490ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2491ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2492ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2493ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2494ccd979bdSMark Fasheh }; 2495ccd979bdSMark Fasheh 2496ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2497ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2498ccd979bdSMark Fasheh { 2499ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2500ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2501ccd979bdSMark Fasheh 2502ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2503ccd979bdSMark Fasheh 2504ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2505ccd979bdSMark Fasheh /* discover the head of the list */ 2506ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2507ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2508ccd979bdSMark Fasheh break; 2509ccd979bdSMark Fasheh } 2510ccd979bdSMark Fasheh 2511ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2512ccd979bdSMark Fasheh * l_ops field. */ 2513ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2514ccd979bdSMark Fasheh ret = iter; 2515ccd979bdSMark Fasheh break; 2516ccd979bdSMark Fasheh } 2517ccd979bdSMark Fasheh } 2518ccd979bdSMark Fasheh 2519ccd979bdSMark Fasheh return ret; 2520ccd979bdSMark Fasheh } 2521ccd979bdSMark Fasheh 2522ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2523ccd979bdSMark Fasheh { 2524ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2525ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2526ccd979bdSMark Fasheh 2527ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2528ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2529ccd979bdSMark Fasheh if (iter) { 2530ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2531ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2532ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2533ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2534ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2535ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2536ccd979bdSMark Fasheh * in them. */ 2537ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2538ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2539ccd979bdSMark Fasheh } 2540ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2541ccd979bdSMark Fasheh 2542ccd979bdSMark Fasheh return iter; 2543ccd979bdSMark Fasheh } 2544ccd979bdSMark Fasheh 2545ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2546ccd979bdSMark Fasheh { 2547ccd979bdSMark Fasheh } 2548ccd979bdSMark Fasheh 2549ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2550ccd979bdSMark Fasheh { 2551ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2552ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2553ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2554ccd979bdSMark Fasheh 2555ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2556ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2557ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2558ccd979bdSMark Fasheh if (iter) { 2559ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2560ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2561ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2562ccd979bdSMark Fasheh } 2563ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2564ccd979bdSMark Fasheh 2565ccd979bdSMark Fasheh return iter; 2566ccd979bdSMark Fasheh } 2567ccd979bdSMark Fasheh 2568ccd979bdSMark Fasheh /* So that debugfs.ocfs2 can determine which format is being used */ 25698ddb7b00SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 2 2570ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2571ccd979bdSMark Fasheh { 2572ccd979bdSMark Fasheh int i; 2573ccd979bdSMark Fasheh char *lvb; 2574ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2575ccd979bdSMark Fasheh 2576ccd979bdSMark Fasheh if (!lockres) 2577ccd979bdSMark Fasheh return -EINVAL; 2578ccd979bdSMark Fasheh 2579d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2580d680efe9SMark Fasheh 2581d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2582d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2583d680efe9SMark Fasheh lockres->l_name, 2584d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2585d680efe9SMark Fasheh else 2586d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2587d680efe9SMark Fasheh 2588d680efe9SMark Fasheh seq_printf(m, "%d\t" 2589ccd979bdSMark Fasheh "0x%lx\t" 2590ccd979bdSMark Fasheh "0x%x\t" 2591ccd979bdSMark Fasheh "0x%x\t" 2592ccd979bdSMark Fasheh "%u\t" 2593ccd979bdSMark Fasheh "%u\t" 2594ccd979bdSMark Fasheh "%d\t" 2595ccd979bdSMark Fasheh "%d\t", 2596ccd979bdSMark Fasheh lockres->l_level, 2597ccd979bdSMark Fasheh lockres->l_flags, 2598ccd979bdSMark Fasheh lockres->l_action, 2599ccd979bdSMark Fasheh lockres->l_unlock_action, 2600ccd979bdSMark Fasheh lockres->l_ro_holders, 2601ccd979bdSMark Fasheh lockres->l_ex_holders, 2602ccd979bdSMark Fasheh lockres->l_requested, 2603ccd979bdSMark Fasheh lockres->l_blocking); 2604ccd979bdSMark Fasheh 2605ccd979bdSMark Fasheh /* Dump the raw LVB */ 26068f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2607ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2608ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2609ccd979bdSMark Fasheh 26108ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 26118ddb7b00SSunil Mushran # define lock_num_prmode(_l) (_l)->l_lock_num_prmode 26128ddb7b00SSunil Mushran # define lock_num_exmode(_l) (_l)->l_lock_num_exmode 26138ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 26148ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 26158ddb7b00SSunil Mushran # define lock_total_prmode(_l) (_l)->l_lock_total_prmode 26168ddb7b00SSunil Mushran # define lock_total_exmode(_l) (_l)->l_lock_total_exmode 26178ddb7b00SSunil Mushran # define lock_max_prmode(_l) (_l)->l_lock_max_prmode 26188ddb7b00SSunil Mushran # define lock_max_exmode(_l) (_l)->l_lock_max_exmode 26198ddb7b00SSunil Mushran # define lock_refresh(_l) (_l)->l_lock_refresh 26208ddb7b00SSunil Mushran #else 2621dd25e55eSRandy Dunlap # define lock_num_prmode(_l) (0ULL) 2622dd25e55eSRandy Dunlap # define lock_num_exmode(_l) (0ULL) 26238ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 26248ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 2625dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 2626dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 26278ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 26288ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 26298ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 26308ddb7b00SSunil Mushran #endif 26318ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 26328ddb7b00SSunil Mushran seq_printf(m, "%llu\t" 26338ddb7b00SSunil Mushran "%llu\t" 26348ddb7b00SSunil Mushran "%u\t" 26358ddb7b00SSunil Mushran "%u\t" 26368ddb7b00SSunil Mushran "%llu\t" 26378ddb7b00SSunil Mushran "%llu\t" 26388ddb7b00SSunil Mushran "%u\t" 26398ddb7b00SSunil Mushran "%u\t" 26408ddb7b00SSunil Mushran "%u\t", 26418ddb7b00SSunil Mushran lock_num_prmode(lockres), 26428ddb7b00SSunil Mushran lock_num_exmode(lockres), 26438ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 26448ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 26458ddb7b00SSunil Mushran lock_total_prmode(lockres), 26468ddb7b00SSunil Mushran lock_total_exmode(lockres), 26478ddb7b00SSunil Mushran lock_max_prmode(lockres), 26488ddb7b00SSunil Mushran lock_max_exmode(lockres), 26498ddb7b00SSunil Mushran lock_refresh(lockres)); 26508ddb7b00SSunil Mushran 2651ccd979bdSMark Fasheh /* End the line */ 2652ccd979bdSMark Fasheh seq_printf(m, "\n"); 2653ccd979bdSMark Fasheh return 0; 2654ccd979bdSMark Fasheh } 2655ccd979bdSMark Fasheh 265690d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2657ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2658ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2659ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2660ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2661ccd979bdSMark Fasheh }; 2662ccd979bdSMark Fasheh 2663ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2664ccd979bdSMark Fasheh { 2665ccd979bdSMark Fasheh struct seq_file *seq = (struct seq_file *) file->private_data; 2666ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2667ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2668ccd979bdSMark Fasheh 2669ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2670ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2671ccd979bdSMark Fasheh return seq_release_private(inode, file); 2672ccd979bdSMark Fasheh } 2673ccd979bdSMark Fasheh 2674ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2675ccd979bdSMark Fasheh { 2676ccd979bdSMark Fasheh int ret; 2677ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2678ccd979bdSMark Fasheh struct seq_file *seq; 2679ccd979bdSMark Fasheh struct ocfs2_super *osb; 2680ccd979bdSMark Fasheh 2681ccd979bdSMark Fasheh priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2682ccd979bdSMark Fasheh if (!priv) { 2683ccd979bdSMark Fasheh ret = -ENOMEM; 2684ccd979bdSMark Fasheh mlog_errno(ret); 2685ccd979bdSMark Fasheh goto out; 2686ccd979bdSMark Fasheh } 26878e18e294STheodore Ts'o osb = inode->i_private; 2688ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2689ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2690ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2691ccd979bdSMark Fasheh 2692ccd979bdSMark Fasheh ret = seq_open(file, &ocfs2_dlm_seq_ops); 2693ccd979bdSMark Fasheh if (ret) { 2694ccd979bdSMark Fasheh kfree(priv); 2695ccd979bdSMark Fasheh mlog_errno(ret); 2696ccd979bdSMark Fasheh goto out; 2697ccd979bdSMark Fasheh } 2698ccd979bdSMark Fasheh 2699ccd979bdSMark Fasheh seq = (struct seq_file *) file->private_data; 2700ccd979bdSMark Fasheh seq->private = priv; 2701ccd979bdSMark Fasheh 2702ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2703ccd979bdSMark Fasheh priv->p_dlm_debug); 2704ccd979bdSMark Fasheh 2705ccd979bdSMark Fasheh out: 2706ccd979bdSMark Fasheh return ret; 2707ccd979bdSMark Fasheh } 2708ccd979bdSMark Fasheh 27094b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2710ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2711ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2712ccd979bdSMark Fasheh .read = seq_read, 2713ccd979bdSMark Fasheh .llseek = seq_lseek, 2714ccd979bdSMark Fasheh }; 2715ccd979bdSMark Fasheh 2716ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2717ccd979bdSMark Fasheh { 2718ccd979bdSMark Fasheh int ret = 0; 2719ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2720ccd979bdSMark Fasheh 2721ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2722ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2723ccd979bdSMark Fasheh osb->osb_debug_root, 2724ccd979bdSMark Fasheh osb, 2725ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 2726ccd979bdSMark Fasheh if (!dlm_debug->d_locking_state) { 2727ccd979bdSMark Fasheh ret = -EINVAL; 2728ccd979bdSMark Fasheh mlog(ML_ERROR, 2729ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2730ccd979bdSMark Fasheh goto out; 2731ccd979bdSMark Fasheh } 2732ccd979bdSMark Fasheh 2733ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2734ccd979bdSMark Fasheh out: 2735ccd979bdSMark Fasheh return ret; 2736ccd979bdSMark Fasheh } 2737ccd979bdSMark Fasheh 2738ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2739ccd979bdSMark Fasheh { 2740ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2741ccd979bdSMark Fasheh 2742ccd979bdSMark Fasheh if (dlm_debug) { 2743ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2744ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2745ccd979bdSMark Fasheh } 2746ccd979bdSMark Fasheh } 2747ccd979bdSMark Fasheh 2748ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2749ccd979bdSMark Fasheh { 2750c271c5c2SSunil Mushran int status = 0; 27514670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 2752ccd979bdSMark Fasheh 2753ccd979bdSMark Fasheh mlog_entry_void(); 2754ccd979bdSMark Fasheh 27550abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 27560abd6d18SMark Fasheh osb->node_num = 0; 2757c271c5c2SSunil Mushran goto local; 27580abd6d18SMark Fasheh } 2759c271c5c2SSunil Mushran 2760ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2761ccd979bdSMark Fasheh if (status < 0) { 2762ccd979bdSMark Fasheh mlog_errno(status); 2763ccd979bdSMark Fasheh goto bail; 2764ccd979bdSMark Fasheh } 2765ccd979bdSMark Fasheh 276634d024f8SMark Fasheh /* launch downconvert thread */ 276734d024f8SMark Fasheh osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 276834d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 276934d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 277034d024f8SMark Fasheh osb->dc_task = NULL; 2771ccd979bdSMark Fasheh mlog_errno(status); 2772ccd979bdSMark Fasheh goto bail; 2773ccd979bdSMark Fasheh } 2774ccd979bdSMark Fasheh 2775ccd979bdSMark Fasheh /* for now, uuid == domain */ 27769c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 27779c6c877cSJoel Becker osb->uuid_str, 27784670c46dSJoel Becker strlen(osb->uuid_str), 27794670c46dSJoel Becker ocfs2_do_node_down, osb, 27804670c46dSJoel Becker &conn); 27814670c46dSJoel Becker if (status) { 2782ccd979bdSMark Fasheh mlog_errno(status); 2783ccd979bdSMark Fasheh goto bail; 2784ccd979bdSMark Fasheh } 2785ccd979bdSMark Fasheh 27860abd6d18SMark Fasheh status = ocfs2_cluster_this_node(&osb->node_num); 27870abd6d18SMark Fasheh if (status < 0) { 27880abd6d18SMark Fasheh mlog_errno(status); 27890abd6d18SMark Fasheh mlog(ML_ERROR, 27900abd6d18SMark Fasheh "could not find this host's node number\n"); 2791286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 27920abd6d18SMark Fasheh goto bail; 27930abd6d18SMark Fasheh } 27940abd6d18SMark Fasheh 2795c271c5c2SSunil Mushran local: 2796ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2797ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2798ccd979bdSMark Fasheh 27994670c46dSJoel Becker osb->cconn = conn; 2800ccd979bdSMark Fasheh 2801ccd979bdSMark Fasheh status = 0; 2802ccd979bdSMark Fasheh bail: 2803ccd979bdSMark Fasheh if (status < 0) { 2804ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 280534d024f8SMark Fasheh if (osb->dc_task) 280634d024f8SMark Fasheh kthread_stop(osb->dc_task); 2807ccd979bdSMark Fasheh } 2808ccd979bdSMark Fasheh 2809ccd979bdSMark Fasheh mlog_exit(status); 2810ccd979bdSMark Fasheh return status; 2811ccd979bdSMark Fasheh } 2812ccd979bdSMark Fasheh 2813286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 2814286eaa95SJoel Becker int hangup_pending) 2815ccd979bdSMark Fasheh { 2816ccd979bdSMark Fasheh mlog_entry_void(); 2817ccd979bdSMark Fasheh 2818ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 2819ccd979bdSMark Fasheh 28204670c46dSJoel Becker /* 28214670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 28224670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 28234670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 28244670c46dSJoel Becker */ 28254670c46dSJoel Becker 282634d024f8SMark Fasheh if (osb->dc_task) { 282734d024f8SMark Fasheh kthread_stop(osb->dc_task); 282834d024f8SMark Fasheh osb->dc_task = NULL; 2829ccd979bdSMark Fasheh } 2830ccd979bdSMark Fasheh 2831ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 2832ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 2833ccd979bdSMark Fasheh 2834286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 28354670c46dSJoel Becker osb->cconn = NULL; 2836ccd979bdSMark Fasheh 2837ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 2838ccd979bdSMark Fasheh 2839ccd979bdSMark Fasheh mlog_exit_void(); 2840ccd979bdSMark Fasheh } 2841ccd979bdSMark Fasheh 28427431cd7eSJoel Becker static void ocfs2_unlock_ast(void *opaque, int error) 2843ccd979bdSMark Fasheh { 2844ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 2845ccd979bdSMark Fasheh unsigned long flags; 2846ccd979bdSMark Fasheh 2847ccd979bdSMark Fasheh mlog_entry_void(); 2848ccd979bdSMark Fasheh 2849ccd979bdSMark Fasheh mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 2850ccd979bdSMark Fasheh lockres->l_unlock_action); 2851ccd979bdSMark Fasheh 2852ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2853de551246SJoel Becker if (error) { 28547431cd7eSJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 28557431cd7eSJoel Becker "unlock_action %d\n", error, lockres->l_name, 2856ccd979bdSMark Fasheh lockres->l_unlock_action); 2857ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2858ccd979bdSMark Fasheh return; 2859ccd979bdSMark Fasheh } 2860ccd979bdSMark Fasheh 2861ccd979bdSMark Fasheh switch(lockres->l_unlock_action) { 2862ccd979bdSMark Fasheh case OCFS2_UNLOCK_CANCEL_CONVERT: 2863ccd979bdSMark Fasheh mlog(0, "Cancel convert success for %s\n", lockres->l_name); 2864ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 2865ccd979bdSMark Fasheh break; 2866ccd979bdSMark Fasheh case OCFS2_UNLOCK_DROP_LOCK: 2867bd3e7610SJoel Becker lockres->l_level = DLM_LOCK_IV; 2868ccd979bdSMark Fasheh break; 2869ccd979bdSMark Fasheh default: 2870ccd979bdSMark Fasheh BUG(); 2871ccd979bdSMark Fasheh } 2872ccd979bdSMark Fasheh 2873ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2874ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2875ccd979bdSMark Fasheh wake_up(&lockres->l_event); 287607f9eebcSDavid Teigland spin_unlock_irqrestore(&lockres->l_lock, flags); 2877ccd979bdSMark Fasheh 2878ccd979bdSMark Fasheh mlog_exit_void(); 2879ccd979bdSMark Fasheh } 2880ccd979bdSMark Fasheh 2881ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 28820d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 2883ccd979bdSMark Fasheh { 28847431cd7eSJoel Becker int ret; 2885ccd979bdSMark Fasheh unsigned long flags; 2886bd3e7610SJoel Becker u32 lkm_flags = 0; 2887ccd979bdSMark Fasheh 2888ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 2889ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2890ccd979bdSMark Fasheh goto out; 2891ccd979bdSMark Fasheh 2892b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 2893bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 2894b80fc012SMark Fasheh 2895ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2896ccd979bdSMark Fasheh 2897ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2898ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 2899ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2900ccd979bdSMark Fasheh 2901ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 2902ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 2903ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 2904ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 2905ccd979bdSMark Fasheh lockres->l_unlock_action); 2906ccd979bdSMark Fasheh 2907ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2908ccd979bdSMark Fasheh 2909ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 2910ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 2911ccd979bdSMark Fasheh * future? */ 2912ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2913ccd979bdSMark Fasheh 2914ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2915ccd979bdSMark Fasheh } 2916ccd979bdSMark Fasheh 29170d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 29180d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 2919bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 29200d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 29210d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 29220d5dc6c2SMark Fasheh } 2923ccd979bdSMark Fasheh 2924ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 2925ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2926ccd979bdSMark Fasheh lockres->l_name); 2927ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 2928ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 2929ccd979bdSMark Fasheh 2930ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 2931ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2932ccd979bdSMark Fasheh goto out; 2933ccd979bdSMark Fasheh } 2934ccd979bdSMark Fasheh 2935ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 2936ccd979bdSMark Fasheh 2937ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 2938ccd979bdSMark Fasheh * fire. */ 2939ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 2940ccd979bdSMark Fasheh 2941ccd979bdSMark Fasheh /* is this necessary? */ 2942ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2943ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 2944ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2945ccd979bdSMark Fasheh 2946ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2947ccd979bdSMark Fasheh 29484670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, 294924ef1815SJoel Becker lockres); 29507431cd7eSJoel Becker if (ret) { 29517431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 2952ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2953cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 2954ccd979bdSMark Fasheh BUG(); 2955ccd979bdSMark Fasheh } 295624ef1815SJoel Becker mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", 2957ccd979bdSMark Fasheh lockres->l_name); 2958ccd979bdSMark Fasheh 2959ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2960ccd979bdSMark Fasheh out: 2961ccd979bdSMark Fasheh mlog_exit(0); 2962ccd979bdSMark Fasheh return 0; 2963ccd979bdSMark Fasheh } 2964ccd979bdSMark Fasheh 2965ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 2966ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 296734d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 2968ccd979bdSMark Fasheh * it safe to drop. 2969ccd979bdSMark Fasheh * 2970ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 2971ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 2972ccd979bdSMark Fasheh { 2973ccd979bdSMark Fasheh int status; 2974ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 2975ccd979bdSMark Fasheh unsigned long flags; 2976ccd979bdSMark Fasheh 2977ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 2978ccd979bdSMark Fasheh 2979ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2980ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 2981ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 2982ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 2983ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2984ccd979bdSMark Fasheh 2985ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 2986ccd979bdSMark Fasheh 2987ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 2988ccd979bdSMark Fasheh if (status) 2989ccd979bdSMark Fasheh mlog_errno(status); 2990ccd979bdSMark Fasheh 2991ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2992ccd979bdSMark Fasheh } 2993ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2994ccd979bdSMark Fasheh } 2995ccd979bdSMark Fasheh 2996d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 2997d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 2998d680efe9SMark Fasheh { 2999d680efe9SMark Fasheh int ret; 3000d680efe9SMark Fasheh 3001d680efe9SMark Fasheh ocfs2_mark_lockres_freeing(lockres); 30020d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 3003d680efe9SMark Fasheh if (ret) 3004d680efe9SMark Fasheh mlog_errno(ret); 3005d680efe9SMark Fasheh } 3006d680efe9SMark Fasheh 3007ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3008ccd979bdSMark Fasheh { 3009d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3010d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 3011ccd979bdSMark Fasheh } 3012ccd979bdSMark Fasheh 3013ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 3014ccd979bdSMark Fasheh { 3015ccd979bdSMark Fasheh int status, err; 3016ccd979bdSMark Fasheh 3017ccd979bdSMark Fasheh mlog_entry_void(); 3018ccd979bdSMark Fasheh 3019ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 3020ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 3021ccd979bdSMark Fasheh 3022ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 302350008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 3024ccd979bdSMark Fasheh if (err < 0) 3025ccd979bdSMark Fasheh mlog_errno(err); 3026ccd979bdSMark Fasheh 3027ccd979bdSMark Fasheh status = err; 3028ccd979bdSMark Fasheh 3029ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3030e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 3031ccd979bdSMark Fasheh if (err < 0) 3032ccd979bdSMark Fasheh mlog_errno(err); 3033ccd979bdSMark Fasheh if (err < 0 && !status) 3034ccd979bdSMark Fasheh status = err; 3035ccd979bdSMark Fasheh 3036ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 30370d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3038ccd979bdSMark Fasheh if (err < 0) 3039ccd979bdSMark Fasheh mlog_errno(err); 3040ccd979bdSMark Fasheh if (err < 0 && !status) 3041ccd979bdSMark Fasheh status = err; 3042ccd979bdSMark Fasheh 3043ccd979bdSMark Fasheh mlog_exit(status); 3044ccd979bdSMark Fasheh return status; 3045ccd979bdSMark Fasheh } 3046ccd979bdSMark Fasheh 3047de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3048ccd979bdSMark Fasheh int new_level) 3049ccd979bdSMark Fasheh { 3050ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3051ccd979bdSMark Fasheh 3052bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3053ccd979bdSMark Fasheh 3054ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 3055bd3e7610SJoel Becker mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3056ccd979bdSMark Fasheh lockres->l_level, new_level); 3057ccd979bdSMark Fasheh BUG(); 3058ccd979bdSMark Fasheh } 3059ccd979bdSMark Fasheh 3060ccd979bdSMark Fasheh mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3061ccd979bdSMark Fasheh lockres->l_name, new_level, lockres->l_blocking); 3062ccd979bdSMark Fasheh 3063ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3064ccd979bdSMark Fasheh lockres->l_requested = new_level; 3065ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3066de551246SJoel Becker return lockres_set_pending(lockres); 3067ccd979bdSMark Fasheh } 3068ccd979bdSMark Fasheh 3069ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3070ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3071ccd979bdSMark Fasheh int new_level, 3072de551246SJoel Becker int lvb, 3073de551246SJoel Becker unsigned int generation) 3074ccd979bdSMark Fasheh { 3075bd3e7610SJoel Becker int ret; 3076bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3077ccd979bdSMark Fasheh 3078ccd979bdSMark Fasheh mlog_entry_void(); 3079ccd979bdSMark Fasheh 3080ccd979bdSMark Fasheh if (lvb) 3081bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3082ccd979bdSMark Fasheh 30834670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3084ccd979bdSMark Fasheh new_level, 3085ccd979bdSMark Fasheh &lockres->l_lksb, 3086ccd979bdSMark Fasheh dlm_flags, 3087ccd979bdSMark Fasheh lockres->l_name, 3088f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 308924ef1815SJoel Becker lockres); 3090de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 30917431cd7eSJoel Becker if (ret) { 30927431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3093ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3094ccd979bdSMark Fasheh goto bail; 3095ccd979bdSMark Fasheh } 3096ccd979bdSMark Fasheh 3097ccd979bdSMark Fasheh ret = 0; 3098ccd979bdSMark Fasheh bail: 3099ccd979bdSMark Fasheh mlog_exit(ret); 3100ccd979bdSMark Fasheh return ret; 3101ccd979bdSMark Fasheh } 3102ccd979bdSMark Fasheh 310324ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3104ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3105ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3106ccd979bdSMark Fasheh { 3107ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3108ccd979bdSMark Fasheh 3109ccd979bdSMark Fasheh mlog_entry_void(); 3110ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3111ccd979bdSMark Fasheh 3112ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3113ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3114ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3115ccd979bdSMark Fasheh * requeue this lock. */ 3116ccd979bdSMark Fasheh 3117ccd979bdSMark Fasheh mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 3118ccd979bdSMark Fasheh return 0; 3119ccd979bdSMark Fasheh } 3120ccd979bdSMark Fasheh 3121ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3122ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3123ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3124ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3125ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3126ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3127ccd979bdSMark Fasheh 3128ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3129ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3130ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3131ccd979bdSMark Fasheh 3132ccd979bdSMark Fasheh return 1; 3133ccd979bdSMark Fasheh } 3134ccd979bdSMark Fasheh 3135ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3136ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3137ccd979bdSMark Fasheh { 3138ccd979bdSMark Fasheh int ret; 3139ccd979bdSMark Fasheh 3140ccd979bdSMark Fasheh mlog_entry_void(); 3141ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3142ccd979bdSMark Fasheh 31434670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 31447431cd7eSJoel Becker DLM_LKF_CANCEL, lockres); 31457431cd7eSJoel Becker if (ret) { 31467431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3147ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3148ccd979bdSMark Fasheh } 3149ccd979bdSMark Fasheh 315024ef1815SJoel Becker mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3151ccd979bdSMark Fasheh 3152ccd979bdSMark Fasheh mlog_exit(ret); 3153ccd979bdSMark Fasheh return ret; 3154ccd979bdSMark Fasheh } 3155ccd979bdSMark Fasheh 3156b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3157ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3158cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3159ccd979bdSMark Fasheh { 3160ccd979bdSMark Fasheh unsigned long flags; 3161ccd979bdSMark Fasheh int blocking; 3162ccd979bdSMark Fasheh int new_level; 3163ccd979bdSMark Fasheh int ret = 0; 31645ef0d4eaSMark Fasheh int set_lvb = 0; 3165de551246SJoel Becker unsigned int gen; 3166ccd979bdSMark Fasheh 3167ccd979bdSMark Fasheh mlog_entry_void(); 3168ccd979bdSMark Fasheh 3169ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3170ccd979bdSMark Fasheh 3171ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 3172ccd979bdSMark Fasheh 3173ccd979bdSMark Fasheh recheck: 3174ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3175de551246SJoel Becker /* XXX 3176de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3177de551246SJoel Becker * exists entirely for one reason - another thread has set 3178de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3179de551246SJoel Becker * 3180de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3181de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3182de551246SJoel Becker * get no ast, and we will have no way of knowing the 3183de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3184de551246SJoel Becker * into dlm_lock() and wait...forever. 3185de551246SJoel Becker * 3186de551246SJoel Becker * Why forever? Because another node has asked for the 3187de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3188de551246SJoel Becker * 3189de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3190de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3191de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3192de551246SJoel Becker * we then cancel their request. 3193de551246SJoel Becker * 3194de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3195de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3196de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3197de551246SJoel Becker */ 3198de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_PENDING) 3199de551246SJoel Becker goto leave_requeue; 3200de551246SJoel Becker 3201d680efe9SMark Fasheh ctl->requeue = 1; 3202ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3203ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3204ccd979bdSMark Fasheh if (ret) { 3205ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3206ccd979bdSMark Fasheh if (ret < 0) 3207ccd979bdSMark Fasheh mlog_errno(ret); 3208ccd979bdSMark Fasheh } 3209ccd979bdSMark Fasheh goto leave; 3210ccd979bdSMark Fasheh } 3211ccd979bdSMark Fasheh 3212ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3213ccd979bdSMark Fasheh * then requeue. */ 3214bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 3215f7fbfdd1SMark Fasheh && (lockres->l_ex_holders || lockres->l_ro_holders)) 3216f7fbfdd1SMark Fasheh goto leave_requeue; 3217ccd979bdSMark Fasheh 3218ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3219ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3220bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 3221f7fbfdd1SMark Fasheh lockres->l_ex_holders) 3222f7fbfdd1SMark Fasheh goto leave_requeue; 3223f7fbfdd1SMark Fasheh 3224f7fbfdd1SMark Fasheh /* 3225f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3226f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3227f7fbfdd1SMark Fasheh */ 3228f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3229f7fbfdd1SMark Fasheh && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3230f7fbfdd1SMark Fasheh goto leave_requeue; 3231ccd979bdSMark Fasheh 323216d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 323316d5b956SMark Fasheh 323416d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 323516d5b956SMark Fasheh && !lockres->l_ops->check_downconvert(lockres, new_level)) 323616d5b956SMark Fasheh goto leave_requeue; 323716d5b956SMark Fasheh 3238ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3239ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3240ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3241cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3242ccd979bdSMark Fasheh goto downconvert; 3243ccd979bdSMark Fasheh 3244ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3245ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3246ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3247ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3248ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3249ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3250ccd979bdSMark Fasheh 3251cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3252d680efe9SMark Fasheh 3253d680efe9SMark Fasheh if (ctl->unblock_action == UNBLOCK_STOP_POST) 3254d680efe9SMark Fasheh goto leave; 3255ccd979bdSMark Fasheh 3256ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3257ccd979bdSMark Fasheh if (blocking != lockres->l_blocking) { 3258ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3259ccd979bdSMark Fasheh * it just yet. */ 3260ccd979bdSMark Fasheh goto recheck; 3261ccd979bdSMark Fasheh } 3262ccd979bdSMark Fasheh 3263ccd979bdSMark Fasheh downconvert: 3264d680efe9SMark Fasheh ctl->requeue = 0; 3265ccd979bdSMark Fasheh 32665ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3267bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 32685ef0d4eaSMark Fasheh set_lvb = 1; 32695ef0d4eaSMark Fasheh 32705ef0d4eaSMark Fasheh /* 32715ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 32725ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 32735ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 32745ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 32755ef0d4eaSMark Fasheh */ 32765ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 32775ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 32785ef0d4eaSMark Fasheh } 32795ef0d4eaSMark Fasheh 3280de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3281ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3282de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3283de551246SJoel Becker gen); 3284de551246SJoel Becker 3285ccd979bdSMark Fasheh leave: 3286ccd979bdSMark Fasheh mlog_exit(ret); 3287ccd979bdSMark Fasheh return ret; 3288f7fbfdd1SMark Fasheh 3289f7fbfdd1SMark Fasheh leave_requeue: 3290f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3291f7fbfdd1SMark Fasheh ctl->requeue = 1; 3292f7fbfdd1SMark Fasheh 3293f7fbfdd1SMark Fasheh mlog_exit(0); 3294f7fbfdd1SMark Fasheh return 0; 3295ccd979bdSMark Fasheh } 3296ccd979bdSMark Fasheh 3297d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3298ccd979bdSMark Fasheh int blocking) 3299ccd979bdSMark Fasheh { 3300ccd979bdSMark Fasheh struct inode *inode; 3301ccd979bdSMark Fasheh struct address_space *mapping; 3302ccd979bdSMark Fasheh 3303ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3304ccd979bdSMark Fasheh mapping = inode->i_mapping; 3305ccd979bdSMark Fasheh 33061044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3307f1f54068SMark Fasheh goto out; 3308f1f54068SMark Fasheh 33097f4a2a97SMark Fasheh /* 33107f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 33117f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 33127f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 33137f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 33147f4a2a97SMark Fasheh * them up again. 33157f4a2a97SMark Fasheh */ 33167f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 33177f4a2a97SMark Fasheh 3318ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3319b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3320b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3321ccd979bdSMark Fasheh } 3322ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3323bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3324ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3325ccd979bdSMark Fasheh } else { 3326ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3327ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3328ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3329ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3330ccd979bdSMark Fasheh * them around in that case. */ 3331ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3332ccd979bdSMark Fasheh } 3333ccd979bdSMark Fasheh 3334f1f54068SMark Fasheh out: 3335d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3336ccd979bdSMark Fasheh } 3337ccd979bdSMark Fasheh 3338810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3339810d5aebSMark Fasheh int new_level) 3340810d5aebSMark Fasheh { 3341810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3342810d5aebSMark Fasheh int checkpointed = ocfs2_inode_fully_checkpointed(inode); 3343810d5aebSMark Fasheh 3344bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3345bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3346810d5aebSMark Fasheh 3347810d5aebSMark Fasheh if (checkpointed) 3348810d5aebSMark Fasheh return 1; 3349810d5aebSMark Fasheh 3350810d5aebSMark Fasheh ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 3351810d5aebSMark Fasheh return 0; 3352810d5aebSMark Fasheh } 3353810d5aebSMark Fasheh 3354810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3355810d5aebSMark Fasheh { 3356810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3357810d5aebSMark Fasheh 3358810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3359810d5aebSMark Fasheh } 3360810d5aebSMark Fasheh 3361d680efe9SMark Fasheh /* 3362d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 336334d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3364d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3365d680efe9SMark Fasheh */ 3366d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3367d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3368d680efe9SMark Fasheh { 3369d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3370d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3371d680efe9SMark Fasheh } 3372d680efe9SMark Fasheh 3373d680efe9SMark Fasheh /* 3374d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3375d680efe9SMark Fasheh * 3376d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3377d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3378d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3379d680efe9SMark Fasheh * 3380d680efe9SMark Fasheh * We have two potential problems 3381d680efe9SMark Fasheh * 3382d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3383d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3384d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3385d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3386d680efe9SMark Fasheh * unblock processing. 3387d680efe9SMark Fasheh * 3388d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3389d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3390d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3391d680efe9SMark Fasheh */ 3392d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3393d680efe9SMark Fasheh int blocking) 3394d680efe9SMark Fasheh { 3395d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3396d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3397d680efe9SMark Fasheh struct dentry *dentry; 3398d680efe9SMark Fasheh unsigned long flags; 3399d680efe9SMark Fasheh int extra_ref = 0; 3400d680efe9SMark Fasheh 3401d680efe9SMark Fasheh /* 3402d680efe9SMark Fasheh * This node is blocking another node from getting a read 3403d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3404d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3405d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3406d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3407d680efe9SMark Fasheh * so there's no further work to do. 3408d680efe9SMark Fasheh */ 3409bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3410d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3411d680efe9SMark Fasheh 3412d680efe9SMark Fasheh /* 3413d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3414d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3415d680efe9SMark Fasheh * needs to be freed or not. 3416d680efe9SMark Fasheh */ 3417d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3418d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3419d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3420d680efe9SMark Fasheh 3421d680efe9SMark Fasheh /* 3422d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3423d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3424d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3425d680efe9SMark Fasheh * flag. 3426d680efe9SMark Fasheh */ 3427d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3428d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3429d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3430d680efe9SMark Fasheh && dl->dl_count) { 3431d680efe9SMark Fasheh dl->dl_count++; 3432d680efe9SMark Fasheh extra_ref = 1; 3433d680efe9SMark Fasheh } 3434d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3435d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3436d680efe9SMark Fasheh 3437d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3438d680efe9SMark Fasheh 3439d680efe9SMark Fasheh /* 3440d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3441d680efe9SMark Fasheh * which means we can't have any more outstanding 3442d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3443d680efe9SMark Fasheh */ 3444d680efe9SMark Fasheh if (!extra_ref) 3445d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3446d680efe9SMark Fasheh 3447d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3448d680efe9SMark Fasheh while (1) { 3449d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3450d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3451d680efe9SMark Fasheh if (!dentry) 3452d680efe9SMark Fasheh break; 3453d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3454d680efe9SMark Fasheh 3455d680efe9SMark Fasheh mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3456d680efe9SMark Fasheh dentry->d_name.name); 3457d680efe9SMark Fasheh 3458d680efe9SMark Fasheh /* 3459d680efe9SMark Fasheh * The following dcache calls may do an 3460d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3461d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3462d680efe9SMark Fasheh * because the requesting node already has an 3463d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3464d680efe9SMark Fasheh * for a downconvert. 3465d680efe9SMark Fasheh */ 3466d680efe9SMark Fasheh d_delete(dentry); 3467d680efe9SMark Fasheh dput(dentry); 3468d680efe9SMark Fasheh 3469d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3470d680efe9SMark Fasheh } 3471d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3472d680efe9SMark Fasheh 3473d680efe9SMark Fasheh /* 3474d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3475d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3476d680efe9SMark Fasheh */ 3477d680efe9SMark Fasheh if (dl->dl_count == 1) 3478d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3479d680efe9SMark Fasheh 3480d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3481d680efe9SMark Fasheh } 3482d680efe9SMark Fasheh 34839e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 34849e33d69fSJan Kara { 34859e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb; 34869e33d69fSJan Kara struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 34879e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 34889e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 34899e33d69fSJan Kara 34909e33d69fSJan Kara mlog_entry_void(); 34919e33d69fSJan Kara 34929e33d69fSJan Kara lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 34939e33d69fSJan Kara lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 34949e33d69fSJan Kara lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 34959e33d69fSJan Kara lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 34969e33d69fSJan Kara lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 34979e33d69fSJan Kara lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 34989e33d69fSJan Kara lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 34999e33d69fSJan Kara lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 35009e33d69fSJan Kara 35019e33d69fSJan Kara mlog_exit_void(); 35029e33d69fSJan Kara } 35039e33d69fSJan Kara 35049e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 35059e33d69fSJan Kara { 35069e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 35079e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 35089e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 35099e33d69fSJan Kara 35109e33d69fSJan Kara mlog_entry_void(); 35119e33d69fSJan Kara if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 35129e33d69fSJan Kara ocfs2_cluster_unlock(osb, lockres, level); 35139e33d69fSJan Kara mlog_exit_void(); 35149e33d69fSJan Kara } 35159e33d69fSJan Kara 35169e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 35179e33d69fSJan Kara { 35189e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 35199e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 35209e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 35219e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 35229e33d69fSJan Kara struct buffer_head *bh; 35239e33d69fSJan Kara struct ocfs2_global_disk_dqinfo *gdinfo; 35249e33d69fSJan Kara int status = 0; 35259e33d69fSJan Kara 35269e33d69fSJan Kara if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 35279e33d69fSJan Kara info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 35289e33d69fSJan Kara info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 35299e33d69fSJan Kara oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 35309e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 35319e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 35329e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 35339e33d69fSJan Kara be32_to_cpu(lvb->lvb_free_entry); 35349e33d69fSJan Kara } else { 35359e33d69fSJan Kara bh = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &status); 35369e33d69fSJan Kara if (!bh) { 35379e33d69fSJan Kara mlog_errno(status); 35389e33d69fSJan Kara goto bail; 35399e33d69fSJan Kara } 35409e33d69fSJan Kara gdinfo = (struct ocfs2_global_disk_dqinfo *) 35419e33d69fSJan Kara (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 35429e33d69fSJan Kara info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 35439e33d69fSJan Kara info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 35449e33d69fSJan Kara oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 35459e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 35469e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 35479e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 35489e33d69fSJan Kara le32_to_cpu(gdinfo->dqi_free_entry); 35499e33d69fSJan Kara brelse(bh); 35509e33d69fSJan Kara ocfs2_track_lock_refresh(lockres); 35519e33d69fSJan Kara } 35529e33d69fSJan Kara 35539e33d69fSJan Kara bail: 35549e33d69fSJan Kara return status; 35559e33d69fSJan Kara } 35569e33d69fSJan Kara 35579e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file 35589e33d69fSJan Kara * so that we can safely refresh quota info from disk. */ 35599e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 35609e33d69fSJan Kara { 35619e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 35629e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 35639e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 35649e33d69fSJan Kara int status = 0; 35659e33d69fSJan Kara 35669e33d69fSJan Kara mlog_entry_void(); 35679e33d69fSJan Kara 35689e33d69fSJan Kara /* On RO devices, locking really isn't needed... */ 35699e33d69fSJan Kara if (ocfs2_is_hard_readonly(osb)) { 35709e33d69fSJan Kara if (ex) 35719e33d69fSJan Kara status = -EROFS; 35729e33d69fSJan Kara goto bail; 35739e33d69fSJan Kara } 35749e33d69fSJan Kara if (ocfs2_mount_local(osb)) 35759e33d69fSJan Kara goto bail; 35769e33d69fSJan Kara 35779e33d69fSJan Kara status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 35789e33d69fSJan Kara if (status < 0) { 35799e33d69fSJan Kara mlog_errno(status); 35809e33d69fSJan Kara goto bail; 35819e33d69fSJan Kara } 35829e33d69fSJan Kara if (!ocfs2_should_refresh_lock_res(lockres)) 35839e33d69fSJan Kara goto bail; 35849e33d69fSJan Kara /* OK, we have the lock but we need to refresh the quota info */ 35859e33d69fSJan Kara status = ocfs2_refresh_qinfo(oinfo); 35869e33d69fSJan Kara if (status) 35879e33d69fSJan Kara ocfs2_qinfo_unlock(oinfo, ex); 35889e33d69fSJan Kara ocfs2_complete_lock_res_refresh(lockres, status); 35899e33d69fSJan Kara bail: 35909e33d69fSJan Kara mlog_exit(status); 35919e33d69fSJan Kara return status; 35929e33d69fSJan Kara } 35939e33d69fSJan Kara 35944670c46dSJoel Becker /* 35954670c46dSJoel Becker * This is the filesystem locking protocol. It provides the lock handling 35964670c46dSJoel Becker * hooks for the underlying DLM. It has a maximum version number. 35974670c46dSJoel Becker * The version number allows interoperability with systems running at 35984670c46dSJoel Becker * the same major number and an equal or smaller minor number. 35994670c46dSJoel Becker * 36004670c46dSJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 36014670c46dSJoel Becker * lock, orders them differently, does different things underneath a lock), 36024670c46dSJoel Becker * the version must be changed. The protocol is negotiated when joining 36034670c46dSJoel Becker * the dlm domain. A node may join the domain if its major version is 36044670c46dSJoel Becker * identical to all other nodes and its minor version is greater than 36054670c46dSJoel Becker * or equal to all other nodes. When its minor version is greater than 36064670c46dSJoel Becker * the other nodes, it will run at the minor version specified by the 36074670c46dSJoel Becker * other nodes. 36084670c46dSJoel Becker * 36094670c46dSJoel Becker * If a locking change is made that will not be compatible with older 36104670c46dSJoel Becker * versions, the major number must be increased and the minor version set 36114670c46dSJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 36124670c46dSJoel Becker * speaking to older versions, the minor version must be increased. If a 36134670c46dSJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 36144670c46dSJoel Becker * are just ignored by older versions), the version does not need to be 36154670c46dSJoel Becker * updated. 36164670c46dSJoel Becker */ 361724ef1815SJoel Becker static struct ocfs2_locking_protocol lproto = { 36184670c46dSJoel Becker .lp_max_version = { 36194670c46dSJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 36204670c46dSJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 36214670c46dSJoel Becker }, 362224ef1815SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 362324ef1815SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 362424ef1815SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 362524ef1815SJoel Becker }; 362624ef1815SJoel Becker 362763e0c48aSJoel Becker void ocfs2_set_locking_protocol(void) 362824ef1815SJoel Becker { 362963e0c48aSJoel Becker ocfs2_stack_glue_set_locking_protocol(&lproto); 363024ef1815SJoel Becker } 363124ef1815SJoel Becker 363224ef1815SJoel Becker 363300600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3634ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3635ccd979bdSMark Fasheh { 3636ccd979bdSMark Fasheh int status; 3637d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 3638ccd979bdSMark Fasheh unsigned long flags; 3639ccd979bdSMark Fasheh 3640ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 3641ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 3642ccd979bdSMark Fasheh * flag. */ 3643ccd979bdSMark Fasheh 3644ccd979bdSMark Fasheh mlog_entry_void(); 3645ccd979bdSMark Fasheh 3646ccd979bdSMark Fasheh BUG_ON(!lockres); 3647ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 3648ccd979bdSMark Fasheh 3649ccd979bdSMark Fasheh mlog(0, "lockres %s blocked.\n", lockres->l_name); 3650ccd979bdSMark Fasheh 3651ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 365234d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 3653ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 3654ccd979bdSMark Fasheh * but short circuiting here will still save us some 3655ccd979bdSMark Fasheh * performance. */ 3656ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3657ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 3658ccd979bdSMark Fasheh goto unqueue; 3659ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3660ccd979bdSMark Fasheh 3661b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 3662ccd979bdSMark Fasheh if (status < 0) 3663ccd979bdSMark Fasheh mlog_errno(status); 3664ccd979bdSMark Fasheh 3665ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3666ccd979bdSMark Fasheh unqueue: 3667d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3668ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3669ccd979bdSMark Fasheh } else 3670ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 3671ccd979bdSMark Fasheh 3672ccd979bdSMark Fasheh mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3673d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 3674ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3675ccd979bdSMark Fasheh 3676d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 3677d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 3678d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 3679d680efe9SMark Fasheh 3680ccd979bdSMark Fasheh mlog_exit_void(); 3681ccd979bdSMark Fasheh } 3682ccd979bdSMark Fasheh 3683ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3684ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3685ccd979bdSMark Fasheh { 3686ccd979bdSMark Fasheh mlog_entry_void(); 3687ccd979bdSMark Fasheh 3688ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3689ccd979bdSMark Fasheh 3690ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3691ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 3692ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 3693ccd979bdSMark Fasheh * to the resource will get it soon. */ 3694ccd979bdSMark Fasheh mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 3695ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3696ccd979bdSMark Fasheh return; 3697ccd979bdSMark Fasheh } 3698ccd979bdSMark Fasheh 3699ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3700ccd979bdSMark Fasheh 370134d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 3702ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 3703ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 3704ccd979bdSMark Fasheh &osb->blocked_lock_list); 3705ccd979bdSMark Fasheh osb->blocked_lock_count++; 3706ccd979bdSMark Fasheh } 370734d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 3708ccd979bdSMark Fasheh 3709ccd979bdSMark Fasheh mlog_exit_void(); 3710ccd979bdSMark Fasheh } 371134d024f8SMark Fasheh 371234d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 371334d024f8SMark Fasheh { 371434d024f8SMark Fasheh unsigned long processed; 371534d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 371634d024f8SMark Fasheh 371734d024f8SMark Fasheh mlog_entry_void(); 371834d024f8SMark Fasheh 371934d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 372034d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 372134d024f8SMark Fasheh * wake happens part-way through our work */ 372234d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 372334d024f8SMark Fasheh 372434d024f8SMark Fasheh processed = osb->blocked_lock_count; 372534d024f8SMark Fasheh while (processed) { 372634d024f8SMark Fasheh BUG_ON(list_empty(&osb->blocked_lock_list)); 372734d024f8SMark Fasheh 372834d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 372934d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 373034d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 373134d024f8SMark Fasheh osb->blocked_lock_count--; 373234d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 373334d024f8SMark Fasheh 373434d024f8SMark Fasheh BUG_ON(!processed); 373534d024f8SMark Fasheh processed--; 373634d024f8SMark Fasheh 373734d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 373834d024f8SMark Fasheh 373934d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 374034d024f8SMark Fasheh } 374134d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 374234d024f8SMark Fasheh 374334d024f8SMark Fasheh mlog_exit_void(); 374434d024f8SMark Fasheh } 374534d024f8SMark Fasheh 374634d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 374734d024f8SMark Fasheh { 374834d024f8SMark Fasheh int empty = 0; 374934d024f8SMark Fasheh 375034d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 375134d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 375234d024f8SMark Fasheh empty = 1; 375334d024f8SMark Fasheh 375434d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 375534d024f8SMark Fasheh return empty; 375634d024f8SMark Fasheh } 375734d024f8SMark Fasheh 375834d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 375934d024f8SMark Fasheh { 376034d024f8SMark Fasheh int should_wake = 0; 376134d024f8SMark Fasheh 376234d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 376334d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 376434d024f8SMark Fasheh should_wake = 1; 376534d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 376634d024f8SMark Fasheh 376734d024f8SMark Fasheh return should_wake; 376834d024f8SMark Fasheh } 376934d024f8SMark Fasheh 3770200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 377134d024f8SMark Fasheh { 377234d024f8SMark Fasheh int status = 0; 377334d024f8SMark Fasheh struct ocfs2_super *osb = arg; 377434d024f8SMark Fasheh 377534d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 377634d024f8SMark Fasheh * work available */ 377734d024f8SMark Fasheh while (!(kthread_should_stop() && 377834d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 377934d024f8SMark Fasheh 378034d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 378134d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 378234d024f8SMark Fasheh kthread_should_stop()); 378334d024f8SMark Fasheh 378434d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 378534d024f8SMark Fasheh 378634d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 378734d024f8SMark Fasheh } 378834d024f8SMark Fasheh 378934d024f8SMark Fasheh osb->dc_task = NULL; 379034d024f8SMark Fasheh return status; 379134d024f8SMark Fasheh } 379234d024f8SMark Fasheh 379334d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 379434d024f8SMark Fasheh { 379534d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 379634d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 379734d024f8SMark Fasheh * the caller may have made to the voting state */ 379834d024f8SMark Fasheh osb->dc_wake_sequence++; 379934d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 380034d024f8SMark Fasheh wake_up(&osb->dc_event); 380134d024f8SMark Fasheh } 3802