1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 359e33d69fSJan Kara #include <linux/quotaops.h> 36ccd979bdSMark Fasheh 37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 38ccd979bdSMark Fasheh #include <cluster/masklog.h> 39ccd979bdSMark Fasheh 40ccd979bdSMark Fasheh #include "ocfs2.h" 41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 42ccd979bdSMark Fasheh 43ccd979bdSMark Fasheh #include "alloc.h" 44d680efe9SMark Fasheh #include "dcache.h" 45ccd979bdSMark Fasheh #include "dlmglue.h" 46ccd979bdSMark Fasheh #include "extent_map.h" 477f1a37e3STiger Yang #include "file.h" 48ccd979bdSMark Fasheh #include "heartbeat.h" 49ccd979bdSMark Fasheh #include "inode.h" 50ccd979bdSMark Fasheh #include "journal.h" 5124ef1815SJoel Becker #include "stackglue.h" 52ccd979bdSMark Fasheh #include "slot_map.h" 53ccd979bdSMark Fasheh #include "super.h" 54ccd979bdSMark Fasheh #include "uptodate.h" 559e33d69fSJan Kara #include "quota.h" 568dec98edSTao Ma #include "refcounttree.h" 57ccd979bdSMark Fasheh 58ccd979bdSMark Fasheh #include "buffer_head_io.h" 59ccd979bdSMark Fasheh 60ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 61ccd979bdSMark Fasheh struct list_head mw_item; 62ccd979bdSMark Fasheh int mw_status; 63ccd979bdSMark Fasheh struct completion mw_complete; 64ccd979bdSMark Fasheh unsigned long mw_mask; 65ccd979bdSMark Fasheh unsigned long mw_goal; 668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 675bc970e8SSunil Mushran ktime_t mw_lock_start; 688ddb7b00SSunil Mushran #endif 69ccd979bdSMark Fasheh }; 70ccd979bdSMark Fasheh 7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 75ccd979bdSMark Fasheh 76d680efe9SMark Fasheh /* 77cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 78d680efe9SMark Fasheh * 79b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 80d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 81d680efe9SMark Fasheh * 82d680efe9SMark Fasheh */ 83d680efe9SMark Fasheh enum ocfs2_unblock_action { 84d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 85d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 86d680efe9SMark Fasheh * ->post_unlock callback */ 87d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 88d680efe9SMark Fasheh * ->post_unlock() callback. */ 89d680efe9SMark Fasheh }; 90d680efe9SMark Fasheh 91d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 92d680efe9SMark Fasheh int requeue; 93d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 94d680efe9SMark Fasheh }; 95d680efe9SMark Fasheh 96cb25797dSJan Kara /* Lockdep class keys */ 97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 98cb25797dSJan Kara 99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 100810d5aebSMark Fasheh int new_level); 101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 102810d5aebSMark Fasheh 103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 104cc567d89SMark Fasheh int blocking); 105cc567d89SMark Fasheh 106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 107cc567d89SMark Fasheh int blocking); 108d680efe9SMark Fasheh 109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 110d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 111ccd979bdSMark Fasheh 1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 1136cb129f5SAdrian Bunk 1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 1158dec98edSTao Ma int new_level); 1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 1178dec98edSTao Ma int blocking); 1188dec98edSTao Ma 1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1206cb129f5SAdrian Bunk 1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1236cb129f5SAdrian Bunk const char *function, 1246cb129f5SAdrian Bunk unsigned int line, 1256cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1266cb129f5SAdrian Bunk { 127a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1286cb129f5SAdrian Bunk 1296cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1306cb129f5SAdrian Bunk lockres->l_name, function, line); 1316cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1326cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1336cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1346cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1356cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1366cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1376cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1386cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1396cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1406cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1416cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1426cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1436cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1446cb129f5SAdrian Bunk } 1456cb129f5SAdrian Bunk 1466cb129f5SAdrian Bunk 147f625c979SMark Fasheh /* 148f625c979SMark Fasheh * OCFS2 Lock Resource Operations 149f625c979SMark Fasheh * 150f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1510d5dc6c2SMark Fasheh * 1520d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1530d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1540d5dc6c2SMark Fasheh * 1550d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1560d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1570d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1580d5dc6c2SMark Fasheh * destruction time). 159f625c979SMark Fasheh */ 160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 16154a7e755SMark Fasheh /* 16254a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 16354a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 16454a7e755SMark Fasheh */ 16554a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 166b5e500e2SMark Fasheh 1670d5dc6c2SMark Fasheh /* 16834d024f8SMark Fasheh * Optionally called in the downconvert thread after a 16934d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 17034d024f8SMark Fasheh * after this callback is called, so it is safe to free 17134d024f8SMark Fasheh * memory, etc. 1720d5dc6c2SMark Fasheh * 1730d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1740d5dc6c2SMark Fasheh * by ->downconvert_worker() 1750d5dc6c2SMark Fasheh */ 176d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 177f625c979SMark Fasheh 178f625c979SMark Fasheh /* 17916d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 18016d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 18116d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 18216d5b956SMark Fasheh * 18316d5b956SMark Fasheh * For most locks, the default checks that there are no 18416d5b956SMark Fasheh * incompatible holders are sufficient. 18516d5b956SMark Fasheh * 18616d5b956SMark Fasheh * Called with the lockres spinlock held. 18716d5b956SMark Fasheh */ 18816d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 18916d5b956SMark Fasheh 19016d5b956SMark Fasheh /* 1915ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1925ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1935ef0d4eaSMark Fasheh * 1945ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1955ef0d4eaSMark Fasheh * in the flags field. 1965ef0d4eaSMark Fasheh * 1975ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1985ef0d4eaSMark Fasheh */ 1995ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 2005ef0d4eaSMark Fasheh 2015ef0d4eaSMark Fasheh /* 202cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 203cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 204cc567d89SMark Fasheh * any locks held so the function can do work that might 205cc567d89SMark Fasheh * schedule (syncing out data, etc). 206cc567d89SMark Fasheh * 207cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 208cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 209cc567d89SMark Fasheh */ 210cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 211cc567d89SMark Fasheh 212cc567d89SMark Fasheh /* 213f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 214f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 215f625c979SMark Fasheh */ 216f625c979SMark Fasheh int flags; 217ccd979bdSMark Fasheh }; 218ccd979bdSMark Fasheh 219f625c979SMark Fasheh /* 220f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 221f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 222f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 223f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 224f625c979SMark Fasheh * expected that the locking wrapper will clear the 225f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 226f625c979SMark Fasheh */ 227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 228f625c979SMark Fasheh 229b80fc012SMark Fasheh /* 2305ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2315ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 232b80fc012SMark Fasheh */ 233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 234b80fc012SMark Fasheh 235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 23654a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 237f625c979SMark Fasheh .flags = 0, 238ccd979bdSMark Fasheh }; 239ccd979bdSMark Fasheh 240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 24154a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 242810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 243810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 244f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 245b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 246ccd979bdSMark Fasheh }; 247ccd979bdSMark Fasheh 248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 249f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 250ccd979bdSMark Fasheh }; 251ccd979bdSMark Fasheh 252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 253f625c979SMark Fasheh .flags = 0, 254ccd979bdSMark Fasheh }; 255ccd979bdSMark Fasheh 2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 2576ca497a8Swengang wang .flags = 0, 2586ca497a8Swengang wang }; 2596ca497a8Swengang wang 26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 26183273932SSrinivas Eeda .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 26283273932SSrinivas Eeda }; 26383273932SSrinivas Eeda 264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 26554a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 266d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 267cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 268f625c979SMark Fasheh .flags = 0, 269d680efe9SMark Fasheh }; 270d680efe9SMark Fasheh 27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 27250008630STiger Yang .get_osb = ocfs2_get_inode_osb, 27350008630STiger Yang .flags = 0, 27450008630STiger Yang }; 27550008630STiger Yang 276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 277cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 278cf8e06f1SMark Fasheh .flags = 0, 279cf8e06f1SMark Fasheh }; 280cf8e06f1SMark Fasheh 2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 2829e33d69fSJan Kara .set_lvb = ocfs2_set_qinfo_lvb, 2839e33d69fSJan Kara .get_osb = ocfs2_get_qinfo_osb, 2849e33d69fSJan Kara .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 2859e33d69fSJan Kara }; 2869e33d69fSJan Kara 2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 2888dec98edSTao Ma .check_downconvert = ocfs2_check_refcount_downconvert, 2898dec98edSTao Ma .downconvert_worker = ocfs2_refcount_convert_worker, 2908dec98edSTao Ma .flags = 0, 2918dec98edSTao Ma }; 2928dec98edSTao Ma 293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 294ccd979bdSMark Fasheh { 295ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 29650008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 29750008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 298ccd979bdSMark Fasheh } 299ccd979bdSMark Fasheh 300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 301a796d286SJoel Becker { 302a796d286SJoel Becker return container_of(lksb, struct ocfs2_lock_res, l_lksb); 303a796d286SJoel Becker } 304a796d286SJoel Becker 305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 306ccd979bdSMark Fasheh { 307ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 308ccd979bdSMark Fasheh 309ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 310ccd979bdSMark Fasheh } 311ccd979bdSMark Fasheh 312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 313d680efe9SMark Fasheh { 314d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 315d680efe9SMark Fasheh 316d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 317d680efe9SMark Fasheh } 318d680efe9SMark Fasheh 3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 3209e33d69fSJan Kara { 3219e33d69fSJan Kara BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 3229e33d69fSJan Kara 3239e33d69fSJan Kara return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 3249e33d69fSJan Kara } 3259e33d69fSJan Kara 3268dec98edSTao Ma static inline struct ocfs2_refcount_tree * 3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 3288dec98edSTao Ma { 3298dec98edSTao Ma return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 3308dec98edSTao Ma } 3318dec98edSTao Ma 33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 33354a7e755SMark Fasheh { 33454a7e755SMark Fasheh if (lockres->l_ops->get_osb) 33554a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 33654a7e755SMark Fasheh 33754a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 33854a7e755SMark Fasheh } 33954a7e755SMark Fasheh 340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 341ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 342ccd979bdSMark Fasheh int level, 343bd3e7610SJoel Becker u32 dlm_flags); 344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 345ccd979bdSMark Fasheh int wanted); 346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 347ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 348cb25797dSJan Kara int level, unsigned long caller_ip); 349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 350cb25797dSJan Kara struct ocfs2_lock_res *lockres, 351cb25797dSJan Kara int level) 352cb25797dSJan Kara { 353cb25797dSJan Kara __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 354cb25797dSJan Kara } 355cb25797dSJan Kara 356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 361ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 363ccd979bdSMark Fasheh int convert); 3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 365c74ff8bbSSunil Mushran if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 3667431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3677431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 368c74ff8bbSSunil Mushran else \ 369c74ff8bbSSunil Mushran mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 370c74ff8bbSSunil Mushran _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 371c74ff8bbSSunil Mushran (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 372ccd979bdSMark Fasheh } while (0) 37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 375ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 377ccd979bdSMark Fasheh struct buffer_head **bh); 378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 381cf8e06f1SMark Fasheh int new_level); 382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 383cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 384cf8e06f1SMark Fasheh int new_level, 385de551246SJoel Becker int lvb, 386de551246SJoel Becker unsigned int generation); 387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 388cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 390cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 391cf8e06f1SMark Fasheh 392ccd979bdSMark Fasheh 393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 394ccd979bdSMark Fasheh u64 blkno, 395ccd979bdSMark Fasheh u32 generation, 396ccd979bdSMark Fasheh char *name) 397ccd979bdSMark Fasheh { 398ccd979bdSMark Fasheh int len; 399ccd979bdSMark Fasheh 400ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 401ccd979bdSMark Fasheh 402b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 403b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 404b0697053SMark Fasheh (long long)blkno, generation); 405ccd979bdSMark Fasheh 406ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 407ccd979bdSMark Fasheh 408ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 409ccd979bdSMark Fasheh } 410ccd979bdSMark Fasheh 41134af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 412ccd979bdSMark Fasheh 413ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 414ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 415ccd979bdSMark Fasheh { 416ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 417ccd979bdSMark Fasheh 418ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 419ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 420ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 421ccd979bdSMark Fasheh } 422ccd979bdSMark Fasheh 423ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 424ccd979bdSMark Fasheh { 425ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 426ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 427ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 428ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 429ccd979bdSMark Fasheh } 430ccd979bdSMark Fasheh 4318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 4328ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4338ddb7b00SSunil Mushran { 4348ddb7b00SSunil Mushran res->l_lock_refresh = 0; 4355bc970e8SSunil Mushran memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); 4365bc970e8SSunil Mushran memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); 4378ddb7b00SSunil Mushran } 4388ddb7b00SSunil Mushran 4398ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 4408ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 4418ddb7b00SSunil Mushran { 4425bc970e8SSunil Mushran u32 usec; 4435bc970e8SSunil Mushran ktime_t kt; 4445bc970e8SSunil Mushran struct ocfs2_lock_stats *stats; 4458ddb7b00SSunil Mushran 4465bc970e8SSunil Mushran if (level == LKM_PRMODE) 4475bc970e8SSunil Mushran stats = &res->l_lock_prmode; 4485bc970e8SSunil Mushran else if (level == LKM_EXMODE) 4495bc970e8SSunil Mushran stats = &res->l_lock_exmode; 4505bc970e8SSunil Mushran else 4518ddb7b00SSunil Mushran return; 4528ddb7b00SSunil Mushran 4535bc970e8SSunil Mushran kt = ktime_sub(ktime_get(), mw->mw_lock_start); 4545bc970e8SSunil Mushran usec = ktime_to_us(kt); 4555bc970e8SSunil Mushran 4565bc970e8SSunil Mushran stats->ls_gets++; 4575bc970e8SSunil Mushran stats->ls_total += ktime_to_ns(kt); 4585bc970e8SSunil Mushran /* overflow */ 45916865b7cSroel if (unlikely(stats->ls_gets == 0)) { 4605bc970e8SSunil Mushran stats->ls_gets++; 4615bc970e8SSunil Mushran stats->ls_total = ktime_to_ns(kt); 4625bc970e8SSunil Mushran } 4635bc970e8SSunil Mushran 4645bc970e8SSunil Mushran if (stats->ls_max < usec) 4655bc970e8SSunil Mushran stats->ls_max = usec; 4665bc970e8SSunil Mushran 4678ddb7b00SSunil Mushran if (ret) 4685bc970e8SSunil Mushran stats->ls_fail++; 4698ddb7b00SSunil Mushran } 4708ddb7b00SSunil Mushran 4718ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4728ddb7b00SSunil Mushran { 4738ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4748ddb7b00SSunil Mushran } 4758ddb7b00SSunil Mushran 4768ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4778ddb7b00SSunil Mushran { 4785bc970e8SSunil Mushran mw->mw_lock_start = ktime_get(); 4798ddb7b00SSunil Mushran } 4808ddb7b00SSunil Mushran #else 4818ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4828ddb7b00SSunil Mushran { 4838ddb7b00SSunil Mushran } 4848ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4858ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4868ddb7b00SSunil Mushran { 4878ddb7b00SSunil Mushran } 4888ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4898ddb7b00SSunil Mushran { 4908ddb7b00SSunil Mushran } 4918ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4928ddb7b00SSunil Mushran { 4938ddb7b00SSunil Mushran } 4948ddb7b00SSunil Mushran #endif 4958ddb7b00SSunil Mushran 496ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 497ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 498ccd979bdSMark Fasheh enum ocfs2_lock_type type, 499ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 500ccd979bdSMark Fasheh void *priv) 501ccd979bdSMark Fasheh { 502ccd979bdSMark Fasheh res->l_type = type; 503ccd979bdSMark Fasheh res->l_ops = ops; 504ccd979bdSMark Fasheh res->l_priv = priv; 505ccd979bdSMark Fasheh 506bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 507bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 508bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 509ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 510ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 511ccd979bdSMark Fasheh 512ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 513ccd979bdSMark Fasheh 514ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 5158ddb7b00SSunil Mushran 5168ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 517cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 518cb25797dSJan Kara if (type != OCFS2_LOCK_TYPE_OPEN) 519cb25797dSJan Kara lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 520cb25797dSJan Kara &lockdep_keys[type], 0); 521cb25797dSJan Kara else 522cb25797dSJan Kara res->l_lockdep_map.key = NULL; 523cb25797dSJan Kara #endif 524ccd979bdSMark Fasheh } 525ccd979bdSMark Fasheh 526ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 527ccd979bdSMark Fasheh { 528ccd979bdSMark Fasheh /* This also clears out the lock status block */ 529ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 530ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 531ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 532ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 533ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 534ccd979bdSMark Fasheh } 535ccd979bdSMark Fasheh 536ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 537ccd979bdSMark Fasheh enum ocfs2_lock_type type, 53824c19ef4SMark Fasheh unsigned int generation, 539ccd979bdSMark Fasheh struct inode *inode) 540ccd979bdSMark Fasheh { 541ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 542ccd979bdSMark Fasheh 543ccd979bdSMark Fasheh switch(type) { 544ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 545ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 546ccd979bdSMark Fasheh break; 547ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 548e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 549ccd979bdSMark Fasheh break; 55050008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 55150008630STiger Yang ops = &ocfs2_inode_open_lops; 55250008630STiger Yang break; 553ccd979bdSMark Fasheh default: 554ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 555ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 556ccd979bdSMark Fasheh break; 557ccd979bdSMark Fasheh }; 558ccd979bdSMark Fasheh 559d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 56024c19ef4SMark Fasheh generation, res->l_name); 561d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 562d680efe9SMark Fasheh } 563d680efe9SMark Fasheh 56454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 56554a7e755SMark Fasheh { 56654a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 56754a7e755SMark Fasheh 56854a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 56954a7e755SMark Fasheh } 57054a7e755SMark Fasheh 5719e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 5729e33d69fSJan Kara { 5739e33d69fSJan Kara struct ocfs2_mem_dqinfo *info = lockres->l_priv; 5749e33d69fSJan Kara 5759e33d69fSJan Kara return OCFS2_SB(info->dqi_gi.dqi_sb); 5769e33d69fSJan Kara } 5779e33d69fSJan Kara 578cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 579cf8e06f1SMark Fasheh { 580cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 581cf8e06f1SMark Fasheh 582cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 583cf8e06f1SMark Fasheh } 584cf8e06f1SMark Fasheh 585d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 586d680efe9SMark Fasheh { 587d680efe9SMark Fasheh __be64 inode_blkno_be; 588d680efe9SMark Fasheh 589d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 590d680efe9SMark Fasheh sizeof(__be64)); 591d680efe9SMark Fasheh 592d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 593d680efe9SMark Fasheh } 594d680efe9SMark Fasheh 59554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 59654a7e755SMark Fasheh { 59754a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 59854a7e755SMark Fasheh 59954a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 60054a7e755SMark Fasheh } 60154a7e755SMark Fasheh 602d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 603d680efe9SMark Fasheh u64 parent, struct inode *inode) 604d680efe9SMark Fasheh { 605d680efe9SMark Fasheh int len; 606d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 607d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 608d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 609d680efe9SMark Fasheh 610d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 611d680efe9SMark Fasheh 612d680efe9SMark Fasheh /* 613d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 614d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 615d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 616d680efe9SMark Fasheh * want error prints to show something without garbling the 617d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 618d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 619d680efe9SMark Fasheh * binary lock names. The stringified names have been a 620d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 621d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 622d680efe9SMark Fasheh * 623d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 624d680efe9SMark Fasheh * name size stays the same though - the last part is all 625d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 626d680efe9SMark Fasheh */ 627d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 628d680efe9SMark Fasheh "%c%016llx", 629d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 630d680efe9SMark Fasheh (long long)parent); 631d680efe9SMark Fasheh 632d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 633d680efe9SMark Fasheh 634d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 635d680efe9SMark Fasheh sizeof(__be64)); 636d680efe9SMark Fasheh 637d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 638d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 639d680efe9SMark Fasheh dl); 640ccd979bdSMark Fasheh } 641ccd979bdSMark Fasheh 642ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 643ccd979bdSMark Fasheh struct ocfs2_super *osb) 644ccd979bdSMark Fasheh { 645ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 646ccd979bdSMark Fasheh * once on it manually. */ 647ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 648d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 649d680efe9SMark Fasheh 0, res->l_name); 650ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 651ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 652ccd979bdSMark Fasheh } 653ccd979bdSMark Fasheh 654ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 655ccd979bdSMark Fasheh struct ocfs2_super *osb) 656ccd979bdSMark Fasheh { 657ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 658ccd979bdSMark Fasheh * once on it manually. */ 659ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 660d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 661d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 662ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 663ccd979bdSMark Fasheh } 664ccd979bdSMark Fasheh 6656ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 6666ca497a8Swengang wang struct ocfs2_super *osb) 6676ca497a8Swengang wang { 6686ca497a8Swengang wang /* nfs_sync lockres doesn't come from a slab so we call init 6696ca497a8Swengang wang * once on it manually. */ 6706ca497a8Swengang wang ocfs2_lock_res_init_once(res); 6716ca497a8Swengang wang ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 6726ca497a8Swengang wang ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 6736ca497a8Swengang wang &ocfs2_nfs_sync_lops, osb); 6746ca497a8Swengang wang } 6756ca497a8Swengang wang 67683273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 67783273932SSrinivas Eeda struct ocfs2_super *osb) 67883273932SSrinivas Eeda { 67983273932SSrinivas Eeda ocfs2_lock_res_init_once(res); 68083273932SSrinivas Eeda ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 68183273932SSrinivas Eeda ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 68283273932SSrinivas Eeda &ocfs2_orphan_scan_lops, osb); 68383273932SSrinivas Eeda } 68483273932SSrinivas Eeda 685cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 686cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 687cf8e06f1SMark Fasheh { 688cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 689cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 690cf8e06f1SMark Fasheh 691cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 692cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 693cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 694cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 695cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 696cf8e06f1SMark Fasheh fp); 697cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 698cf8e06f1SMark Fasheh } 699cf8e06f1SMark Fasheh 7009e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 7019e33d69fSJan Kara struct ocfs2_mem_dqinfo *info) 7029e33d69fSJan Kara { 7039e33d69fSJan Kara ocfs2_lock_res_init_once(lockres); 7049e33d69fSJan Kara ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 7059e33d69fSJan Kara 0, lockres->l_name); 7069e33d69fSJan Kara ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 7079e33d69fSJan Kara OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 7089e33d69fSJan Kara info); 7099e33d69fSJan Kara } 7109e33d69fSJan Kara 7118dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 7128dec98edSTao Ma struct ocfs2_super *osb, u64 ref_blkno, 7138dec98edSTao Ma unsigned int generation) 7148dec98edSTao Ma { 7158dec98edSTao Ma ocfs2_lock_res_init_once(lockres); 7168dec98edSTao Ma ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 7178dec98edSTao Ma generation, lockres->l_name); 7188dec98edSTao Ma ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 7198dec98edSTao Ma &ocfs2_refcount_block_lops, osb); 7208dec98edSTao Ma } 7218dec98edSTao Ma 722ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 723ccd979bdSMark Fasheh { 724ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 725ccd979bdSMark Fasheh return; 726ccd979bdSMark Fasheh 727ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 728ccd979bdSMark Fasheh 729ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 730ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 731ccd979bdSMark Fasheh res->l_name); 732ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 733ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 734ccd979bdSMark Fasheh res->l_name); 735ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 736ccd979bdSMark Fasheh "Lockres %s is locked\n", 737ccd979bdSMark Fasheh res->l_name); 738ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 739ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 740ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 741ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 742ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 743ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 744ccd979bdSMark Fasheh 745ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 746ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 747ccd979bdSMark Fasheh 748ccd979bdSMark Fasheh res->l_flags = 0UL; 749ccd979bdSMark Fasheh } 750ccd979bdSMark Fasheh 751ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 752ccd979bdSMark Fasheh int level) 753ccd979bdSMark Fasheh { 754ccd979bdSMark Fasheh BUG_ON(!lockres); 755ccd979bdSMark Fasheh 756ccd979bdSMark Fasheh switch(level) { 757bd3e7610SJoel Becker case DLM_LOCK_EX: 758ccd979bdSMark Fasheh lockres->l_ex_holders++; 759ccd979bdSMark Fasheh break; 760bd3e7610SJoel Becker case DLM_LOCK_PR: 761ccd979bdSMark Fasheh lockres->l_ro_holders++; 762ccd979bdSMark Fasheh break; 763ccd979bdSMark Fasheh default: 764ccd979bdSMark Fasheh BUG(); 765ccd979bdSMark Fasheh } 766ccd979bdSMark Fasheh } 767ccd979bdSMark Fasheh 768ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 769ccd979bdSMark Fasheh int level) 770ccd979bdSMark Fasheh { 771ccd979bdSMark Fasheh BUG_ON(!lockres); 772ccd979bdSMark Fasheh 773ccd979bdSMark Fasheh switch(level) { 774bd3e7610SJoel Becker case DLM_LOCK_EX: 775ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 776ccd979bdSMark Fasheh lockres->l_ex_holders--; 777ccd979bdSMark Fasheh break; 778bd3e7610SJoel Becker case DLM_LOCK_PR: 779ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 780ccd979bdSMark Fasheh lockres->l_ro_holders--; 781ccd979bdSMark Fasheh break; 782ccd979bdSMark Fasheh default: 783ccd979bdSMark Fasheh BUG(); 784ccd979bdSMark Fasheh } 785ccd979bdSMark Fasheh } 786ccd979bdSMark Fasheh 787ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 788ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 789ccd979bdSMark Fasheh * lock types are added. */ 790ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 791ccd979bdSMark Fasheh { 792bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 793ccd979bdSMark Fasheh 794bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 795bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 796bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 797bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 798ccd979bdSMark Fasheh return new_level; 799ccd979bdSMark Fasheh } 800ccd979bdSMark Fasheh 801ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 802ccd979bdSMark Fasheh unsigned long newflags) 803ccd979bdSMark Fasheh { 804800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 805ccd979bdSMark Fasheh 806ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 807ccd979bdSMark Fasheh 808ccd979bdSMark Fasheh lockres->l_flags = newflags; 809ccd979bdSMark Fasheh 810800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 811ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 812ccd979bdSMark Fasheh continue; 813ccd979bdSMark Fasheh 814ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 815ccd979bdSMark Fasheh mw->mw_status = 0; 816ccd979bdSMark Fasheh complete(&mw->mw_complete); 817ccd979bdSMark Fasheh } 818ccd979bdSMark Fasheh } 819ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 820ccd979bdSMark Fasheh { 821ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 822ccd979bdSMark Fasheh } 823ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 824ccd979bdSMark Fasheh unsigned long clear) 825ccd979bdSMark Fasheh { 826ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 827ccd979bdSMark Fasheh } 828ccd979bdSMark Fasheh 829ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 830ccd979bdSMark Fasheh { 831ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 832ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 833ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 834bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 835ccd979bdSMark Fasheh 836ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 837ccd979bdSMark Fasheh if (lockres->l_level <= 838ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 839bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 840ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 841ccd979bdSMark Fasheh } 842ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 843ccd979bdSMark Fasheh } 844ccd979bdSMark Fasheh 845ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 846ccd979bdSMark Fasheh { 847ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 848ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 849ccd979bdSMark Fasheh 850ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 851ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 852ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 853ccd979bdSMark Fasheh * update */ 854bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 855f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 856ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 857ccd979bdSMark Fasheh 858ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 859a1912826SSunil Mushran 860a1912826SSunil Mushran /* 861a1912826SSunil Mushran * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing 862a1912826SSunil Mushran * the OCFS2_LOCK_BUSY flag to prevent the dc thread from 863a1912826SSunil Mushran * downconverting the lock before the upconvert has fully completed. 864d1e78238SXue jiufei * Do not prevent the dc thread from downconverting if NONBLOCK lock 865d1e78238SXue jiufei * had already returned. 866a1912826SSunil Mushran */ 867d1e78238SXue jiufei if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED)) 868a1912826SSunil Mushran lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 869d1e78238SXue jiufei else 870d1e78238SXue jiufei lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED); 871a1912826SSunil Mushran 872ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 873ccd979bdSMark Fasheh } 874ccd979bdSMark Fasheh 875ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 876ccd979bdSMark Fasheh { 8773cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 878ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 879ccd979bdSMark Fasheh 880bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 881f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 882f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 883ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 884ccd979bdSMark Fasheh 885ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 886ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 887ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 888ccd979bdSMark Fasheh } 889ccd979bdSMark Fasheh 890ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 891ccd979bdSMark Fasheh int level) 892ccd979bdSMark Fasheh { 893ccd979bdSMark Fasheh int needs_downconvert = 0; 894ccd979bdSMark Fasheh 895ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 896ccd979bdSMark Fasheh 897ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 898ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 899ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 900ccd979bdSMark Fasheh * blocking. this also catches the case where we get 901ccd979bdSMark Fasheh * duplicate BASTs */ 902ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 903ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 904ccd979bdSMark Fasheh needs_downconvert = 1; 905ccd979bdSMark Fasheh 906ccd979bdSMark Fasheh lockres->l_blocking = level; 907ccd979bdSMark Fasheh } 908ccd979bdSMark Fasheh 9099b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", 9109b915181SSunil Mushran lockres->l_name, level, lockres->l_level, lockres->l_blocking, 9119b915181SSunil Mushran needs_downconvert); 9129b915181SSunil Mushran 9130b94a909SWengang Wang if (needs_downconvert) 9140b94a909SWengang Wang lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 915c1e8d35eSTao Ma mlog(0, "needs_downconvert = %d\n", needs_downconvert); 916ccd979bdSMark Fasheh return needs_downconvert; 917ccd979bdSMark Fasheh } 918ccd979bdSMark Fasheh 919de551246SJoel Becker /* 920de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 921de551246SJoel Becker * 922de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 923de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 924de551246SJoel Becker * for more details on the race. 925de551246SJoel Becker * 926de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 927de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 928de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 929de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 930de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 931de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 932de551246SJoel Becker * nothing. 933de551246SJoel Becker * 934de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 935de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 936de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 937de551246SJoel Becker * window. 938de551246SJoel Becker * 939de551246SJoel Becker * [Example] 940de551246SJoel Becker * 941de551246SJoel Becker * ocfs2_meta_lock() 942de551246SJoel Becker * ocfs2_cluster_lock() 943de551246SJoel Becker * set BUSY 944de551246SJoel Becker * set PENDING 945de551246SJoel Becker * drop l_lock 946de551246SJoel Becker * ocfs2_dlm_lock() 947de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 948de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 949de551246SJoel Becker * take_l_lock 950de551246SJoel Becker * !BUSY 951de551246SJoel Becker * ocfs2_prepare_downconvert() 952de551246SJoel Becker * set BUSY 953de551246SJoel Becker * set PENDING 954de551246SJoel Becker * drop l_lock 955de551246SJoel Becker * take l_lock 956de551246SJoel Becker * clear PENDING 957de551246SJoel Becker * drop l_lock 958de551246SJoel Becker * <window> 959de551246SJoel Becker * ocfs2_dlm_lock() 960de551246SJoel Becker * 961de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 962de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 963de551246SJoel Becker * 964de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 965de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 966de551246SJoel Becker * 967de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 968de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 969de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 970de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 971de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 972de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 973de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 974de551246SJoel Becker * ocfs2_prepare_downconvert(). 975de551246SJoel Becker */ 976de551246SJoel Becker 977de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 978de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 979de551246SJoel Becker unsigned int generation, 980de551246SJoel Becker struct ocfs2_super *osb) 981de551246SJoel Becker { 982de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 983de551246SJoel Becker 984de551246SJoel Becker /* 985de551246SJoel Becker * The ast and locking functions can race us here. The winner 986de551246SJoel Becker * will clear pending, the loser will not. 987de551246SJoel Becker */ 988de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 989de551246SJoel Becker (lockres->l_pending_gen != generation)) 990de551246SJoel Becker return; 991de551246SJoel Becker 992de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 993de551246SJoel Becker lockres->l_pending_gen++; 994de551246SJoel Becker 995de551246SJoel Becker /* 996de551246SJoel Becker * The downconvert thread may have skipped us because we 997de551246SJoel Becker * were PENDING. Wake it up. 998de551246SJoel Becker */ 999de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1000de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 1001de551246SJoel Becker } 1002de551246SJoel Becker 1003de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 1004de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1005de551246SJoel Becker unsigned int generation, 1006de551246SJoel Becker struct ocfs2_super *osb) 1007de551246SJoel Becker { 1008de551246SJoel Becker unsigned long flags; 1009de551246SJoel Becker 1010de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1011de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 1012de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1013de551246SJoel Becker } 1014de551246SJoel Becker 1015de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1016de551246SJoel Becker { 1017de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1018de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1019de551246SJoel Becker 1020de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1021de551246SJoel Becker 1022de551246SJoel Becker return lockres->l_pending_gen; 1023de551246SJoel Becker } 1024de551246SJoel Becker 1025c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1026ccd979bdSMark Fasheh { 1027a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1028aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1029ccd979bdSMark Fasheh int needs_downconvert; 1030ccd979bdSMark Fasheh unsigned long flags; 1031ccd979bdSMark Fasheh 1032bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 1033ccd979bdSMark Fasheh 10349b915181SSunil Mushran mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " 10359b915181SSunil Mushran "type %s\n", lockres->l_name, level, lockres->l_level, 1036aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 1037aa2623adSMark Fasheh 1038cf8e06f1SMark Fasheh /* 1039cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 1040cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 1041cf8e06f1SMark Fasheh */ 1042cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1043cf8e06f1SMark Fasheh return; 1044cf8e06f1SMark Fasheh 1045ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1046ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1047ccd979bdSMark Fasheh if (needs_downconvert) 1048ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 1049ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1050ccd979bdSMark Fasheh 1051d680efe9SMark Fasheh wake_up(&lockres->l_event); 1052d680efe9SMark Fasheh 105334d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1054ccd979bdSMark Fasheh } 1055ccd979bdSMark Fasheh 1056c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1057ccd979bdSMark Fasheh { 1058a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1059de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1060ccd979bdSMark Fasheh unsigned long flags; 10611693a5c0SDavid Teigland int status; 1062ccd979bdSMark Fasheh 1063ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1064ccd979bdSMark Fasheh 10651693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 10661693a5c0SDavid Teigland 10671693a5c0SDavid Teigland if (status == -EAGAIN) { 10681693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 10691693a5c0SDavid Teigland goto out; 10701693a5c0SDavid Teigland } 10711693a5c0SDavid Teigland 10721693a5c0SDavid Teigland if (status) { 10738f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 10741693a5c0SDavid Teigland lockres->l_name, status); 1075ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1076ccd979bdSMark Fasheh return; 1077ccd979bdSMark Fasheh } 1078ccd979bdSMark Fasheh 10799b915181SSunil Mushran mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " 10809b915181SSunil Mushran "level %d => %d\n", lockres->l_name, lockres->l_action, 10819b915181SSunil Mushran lockres->l_unlock_action, lockres->l_level, lockres->l_requested); 10829b915181SSunil Mushran 1083ccd979bdSMark Fasheh switch(lockres->l_action) { 1084ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 1085ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 1086e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1087ccd979bdSMark Fasheh break; 1088ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 1089ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 1090ccd979bdSMark Fasheh break; 1091ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 1092ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 1093ccd979bdSMark Fasheh break; 1094ccd979bdSMark Fasheh default: 10959b915181SSunil Mushran mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " 10969b915181SSunil Mushran "flags 0x%lx, unlock: %u\n", 1097e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 1098e92d57dfSMark Fasheh lockres->l_unlock_action); 1099ccd979bdSMark Fasheh BUG(); 1100ccd979bdSMark Fasheh } 11011693a5c0SDavid Teigland out: 1102ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 1103ccd979bdSMark Fasheh * can catch it. */ 1104ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1105ccd979bdSMark Fasheh 1106de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1107de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1108de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1109de551246SJoel Becker 1110de551246SJoel Becker /* 1111de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1112de551246SJoel Becker * know that dlm_lock() has been called :-) 1113de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1114de551246SJoel Becker * can use lockres->l_pending_gen. 1115de551246SJoel Becker */ 1116de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1117de551246SJoel Becker 1118ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1119d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1120ccd979bdSMark Fasheh } 1121ccd979bdSMark Fasheh 1122553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 1123553b5eb9SJoel Becker { 1124553b5eb9SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1125553b5eb9SJoel Becker unsigned long flags; 1126553b5eb9SJoel Becker 11279b915181SSunil Mushran mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 11289b915181SSunil Mushran lockres->l_name, lockres->l_unlock_action); 1129553b5eb9SJoel Becker 1130553b5eb9SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1131553b5eb9SJoel Becker if (error) { 1132553b5eb9SJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 1133553b5eb9SJoel Becker "unlock_action %d\n", error, lockres->l_name, 1134553b5eb9SJoel Becker lockres->l_unlock_action); 1135553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1136553b5eb9SJoel Becker return; 1137553b5eb9SJoel Becker } 1138553b5eb9SJoel Becker 1139553b5eb9SJoel Becker switch(lockres->l_unlock_action) { 1140553b5eb9SJoel Becker case OCFS2_UNLOCK_CANCEL_CONVERT: 1141553b5eb9SJoel Becker mlog(0, "Cancel convert success for %s\n", lockres->l_name); 1142553b5eb9SJoel Becker lockres->l_action = OCFS2_AST_INVALID; 1143553b5eb9SJoel Becker /* Downconvert thread may have requeued this lock, we 1144553b5eb9SJoel Becker * need to wake it. */ 1145553b5eb9SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1146553b5eb9SJoel Becker ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 1147553b5eb9SJoel Becker break; 1148553b5eb9SJoel Becker case OCFS2_UNLOCK_DROP_LOCK: 1149553b5eb9SJoel Becker lockres->l_level = DLM_LOCK_IV; 1150553b5eb9SJoel Becker break; 1151553b5eb9SJoel Becker default: 1152553b5eb9SJoel Becker BUG(); 1153553b5eb9SJoel Becker } 1154553b5eb9SJoel Becker 1155553b5eb9SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1156553b5eb9SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1157553b5eb9SJoel Becker wake_up(&lockres->l_event); 1158553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1159553b5eb9SJoel Becker } 1160553b5eb9SJoel Becker 1161553b5eb9SJoel Becker /* 1162553b5eb9SJoel Becker * This is the filesystem locking protocol. It provides the lock handling 1163553b5eb9SJoel Becker * hooks for the underlying DLM. It has a maximum version number. 1164553b5eb9SJoel Becker * The version number allows interoperability with systems running at 1165553b5eb9SJoel Becker * the same major number and an equal or smaller minor number. 1166553b5eb9SJoel Becker * 1167553b5eb9SJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 1168553b5eb9SJoel Becker * lock, orders them differently, does different things underneath a lock), 1169553b5eb9SJoel Becker * the version must be changed. The protocol is negotiated when joining 1170553b5eb9SJoel Becker * the dlm domain. A node may join the domain if its major version is 1171553b5eb9SJoel Becker * identical to all other nodes and its minor version is greater than 1172553b5eb9SJoel Becker * or equal to all other nodes. When its minor version is greater than 1173553b5eb9SJoel Becker * the other nodes, it will run at the minor version specified by the 1174553b5eb9SJoel Becker * other nodes. 1175553b5eb9SJoel Becker * 1176553b5eb9SJoel Becker * If a locking change is made that will not be compatible with older 1177553b5eb9SJoel Becker * versions, the major number must be increased and the minor version set 1178553b5eb9SJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 1179553b5eb9SJoel Becker * speaking to older versions, the minor version must be increased. If a 1180553b5eb9SJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 1181553b5eb9SJoel Becker * are just ignored by older versions), the version does not need to be 1182553b5eb9SJoel Becker * updated. 1183553b5eb9SJoel Becker */ 1184553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = { 1185553b5eb9SJoel Becker .lp_max_version = { 1186553b5eb9SJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 1187553b5eb9SJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 1188553b5eb9SJoel Becker }, 1189553b5eb9SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 1190553b5eb9SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 1191553b5eb9SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 1192553b5eb9SJoel Becker }; 1193553b5eb9SJoel Becker 1194553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void) 1195553b5eb9SJoel Becker { 1196553b5eb9SJoel Becker ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); 1197553b5eb9SJoel Becker } 1198553b5eb9SJoel Becker 1199ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1200ccd979bdSMark Fasheh int convert) 1201ccd979bdSMark Fasheh { 1202ccd979bdSMark Fasheh unsigned long flags; 1203ccd979bdSMark Fasheh 1204ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1205ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1206a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1207ccd979bdSMark Fasheh if (convert) 1208ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1209ccd979bdSMark Fasheh else 1210ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1211ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1212ccd979bdSMark Fasheh 1213ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1214ccd979bdSMark Fasheh } 1215ccd979bdSMark Fasheh 1216ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1217ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1218ccd979bdSMark Fasheh * to do the right thing in that case. 1219ccd979bdSMark Fasheh */ 1220ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1221ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1222ccd979bdSMark Fasheh int level, 1223bd3e7610SJoel Becker u32 dlm_flags) 1224ccd979bdSMark Fasheh { 1225ccd979bdSMark Fasheh int ret = 0; 1226ccd979bdSMark Fasheh unsigned long flags; 1227de551246SJoel Becker unsigned int gen; 1228ccd979bdSMark Fasheh 1229bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1230ccd979bdSMark Fasheh dlm_flags); 1231ccd979bdSMark Fasheh 1232ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1233ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1234ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1235ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1236ccd979bdSMark Fasheh goto bail; 1237ccd979bdSMark Fasheh } 1238ccd979bdSMark Fasheh 1239ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1240ccd979bdSMark Fasheh lockres->l_requested = level; 1241ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1242de551246SJoel Becker gen = lockres_set_pending(lockres); 1243ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1244ccd979bdSMark Fasheh 12454670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1246ccd979bdSMark Fasheh level, 1247ccd979bdSMark Fasheh &lockres->l_lksb, 1248ccd979bdSMark Fasheh dlm_flags, 1249ccd979bdSMark Fasheh lockres->l_name, 1250a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1251de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 12527431cd7eSJoel Becker if (ret) { 12537431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1254ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1255ccd979bdSMark Fasheh } 1256ccd979bdSMark Fasheh 12577431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1258ccd979bdSMark Fasheh 1259ccd979bdSMark Fasheh bail: 1260ccd979bdSMark Fasheh return ret; 1261ccd979bdSMark Fasheh } 1262ccd979bdSMark Fasheh 1263ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1264ccd979bdSMark Fasheh int flag) 1265ccd979bdSMark Fasheh { 1266ccd979bdSMark Fasheh unsigned long flags; 1267ccd979bdSMark Fasheh int ret; 1268ccd979bdSMark Fasheh 1269ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1270ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1271ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1272ccd979bdSMark Fasheh 1273ccd979bdSMark Fasheh return ret; 1274ccd979bdSMark Fasheh } 1275ccd979bdSMark Fasheh 1276ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1277ccd979bdSMark Fasheh 1278ccd979bdSMark Fasheh { 1279ccd979bdSMark Fasheh wait_event(lockres->l_event, 1280ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1281ccd979bdSMark Fasheh } 1282ccd979bdSMark Fasheh 1283ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1284ccd979bdSMark Fasheh 1285ccd979bdSMark Fasheh { 1286ccd979bdSMark Fasheh wait_event(lockres->l_event, 1287ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1288ccd979bdSMark Fasheh } 1289ccd979bdSMark Fasheh 1290ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1291ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1292ccd979bdSMark Fasheh * level will be compatible with it. */ 1293ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1294ccd979bdSMark Fasheh int wanted) 1295ccd979bdSMark Fasheh { 1296ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1297ccd979bdSMark Fasheh 1298ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1299ccd979bdSMark Fasheh } 1300ccd979bdSMark Fasheh 1301ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1302ccd979bdSMark Fasheh { 1303ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1304ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 13058ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1306ccd979bdSMark Fasheh } 1307ccd979bdSMark Fasheh 1308ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1309ccd979bdSMark Fasheh { 1310ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1311ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 131216735d02SWolfram Sang reinit_completion(&mw->mw_complete); 1313ccd979bdSMark Fasheh return mw->mw_status; 1314ccd979bdSMark Fasheh } 1315ccd979bdSMark Fasheh 1316ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1317ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1318ccd979bdSMark Fasheh unsigned long mask, 1319ccd979bdSMark Fasheh unsigned long goal) 1320ccd979bdSMark Fasheh { 1321ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1322ccd979bdSMark Fasheh 1323ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1324ccd979bdSMark Fasheh 1325ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1326ccd979bdSMark Fasheh mw->mw_mask = mask; 1327ccd979bdSMark Fasheh mw->mw_goal = goal; 1328ccd979bdSMark Fasheh } 1329ccd979bdSMark Fasheh 1330ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1331ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1332d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1333ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1334ccd979bdSMark Fasheh { 1335ccd979bdSMark Fasheh int ret = 0; 1336ccd979bdSMark Fasheh 1337d1e78238SXue jiufei assert_spin_locked(&lockres->l_lock); 1338ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1339ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1340ccd979bdSMark Fasheh ret = -EBUSY; 1341ccd979bdSMark Fasheh 1342ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1343ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1344ccd979bdSMark Fasheh } 1345d1e78238SXue jiufei 1346d1e78238SXue jiufei return ret; 1347d1e78238SXue jiufei } 1348d1e78238SXue jiufei 1349d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1350d1e78238SXue jiufei struct ocfs2_mask_waiter *mw) 1351d1e78238SXue jiufei { 1352d1e78238SXue jiufei unsigned long flags; 1353d1e78238SXue jiufei int ret = 0; 1354d1e78238SXue jiufei 1355d1e78238SXue jiufei spin_lock_irqsave(&lockres->l_lock, flags); 1356d1e78238SXue jiufei ret = __lockres_remove_mask_waiter(lockres, mw); 1357ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1358ccd979bdSMark Fasheh 1359ccd979bdSMark Fasheh return ret; 1360ccd979bdSMark Fasheh 1361ccd979bdSMark Fasheh } 1362ccd979bdSMark Fasheh 1363cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1364cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1365cf8e06f1SMark Fasheh { 1366cf8e06f1SMark Fasheh int ret; 1367cf8e06f1SMark Fasheh 1368cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1369cf8e06f1SMark Fasheh if (ret) 1370cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1371cf8e06f1SMark Fasheh else 1372cf8e06f1SMark Fasheh ret = mw->mw_status; 1373cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 137416735d02SWolfram Sang reinit_completion(&mw->mw_complete); 1375cf8e06f1SMark Fasheh return ret; 1376cf8e06f1SMark Fasheh } 1377cf8e06f1SMark Fasheh 1378cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1379ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1380ccd979bdSMark Fasheh int level, 1381bd3e7610SJoel Becker u32 lkm_flags, 1382cb25797dSJan Kara int arg_flags, 1383cb25797dSJan Kara int l_subclass, 1384cb25797dSJan Kara unsigned long caller_ip) 1385ccd979bdSMark Fasheh { 1386ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1387ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1388ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1389ccd979bdSMark Fasheh unsigned long flags; 1390de551246SJoel Becker unsigned int gen; 13911693a5c0SDavid Teigland int noqueue_attempted = 0; 1392d1e78238SXue jiufei int dlm_locked = 0; 1393b1b1e15eSTariq Saeed int kick_dc = 0; 1394ccd979bdSMark Fasheh 13952f2eca20Salex chen if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { 13962f2eca20Salex chen mlog_errno(-EINVAL); 13972f2eca20Salex chen return -EINVAL; 13982f2eca20Salex chen } 13992f2eca20Salex chen 1400ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1401ccd979bdSMark Fasheh 1402b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1403bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1404b80fc012SMark Fasheh 1405ccd979bdSMark Fasheh again: 1406ccd979bdSMark Fasheh wait = 0; 1407ccd979bdSMark Fasheh 1408a1912826SSunil Mushran spin_lock_irqsave(&lockres->l_lock, flags); 1409a1912826SSunil Mushran 1410ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1411ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1412a1912826SSunil Mushran goto unlock; 1413ccd979bdSMark Fasheh } 1414ccd979bdSMark Fasheh 1415ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1416ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1417ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1418ccd979bdSMark Fasheh 1419ccd979bdSMark Fasheh /* We only compare against the currently granted level 1420ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1421ccd979bdSMark Fasheh * we'll get caught below. */ 1422ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1423ccd979bdSMark Fasheh level > lockres->l_level) { 1424ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1425ccd979bdSMark Fasheh * them. */ 1426ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1427ccd979bdSMark Fasheh wait = 1; 1428ccd979bdSMark Fasheh goto unlock; 1429ccd979bdSMark Fasheh } 1430ccd979bdSMark Fasheh 1431a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1432a1912826SSunil Mushran /* 1433a1912826SSunil Mushran * We've upconverted. If the lock now has a level we can 1434a1912826SSunil Mushran * work with, we take it. If, however, the lock is not at the 1435a1912826SSunil Mushran * required level, we go thru the full cycle. One way this could 1436a1912826SSunil Mushran * happen is if a process requesting an upconvert to PR is 1437a1912826SSunil Mushran * closely followed by another requesting upconvert to an EX. 1438a1912826SSunil Mushran * If the process requesting EX lands here, we want it to 1439a1912826SSunil Mushran * continue attempting to upconvert and let the process 1440a1912826SSunil Mushran * requesting PR take the lock. 1441a1912826SSunil Mushran * If multiple processes request upconvert to PR, the first one 1442a1912826SSunil Mushran * here will take the lock. The others will have to go thru the 1443a1912826SSunil Mushran * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1444a1912826SSunil Mushran * downconvert request. 1445a1912826SSunil Mushran */ 1446a1912826SSunil Mushran if (level <= lockres->l_level) 1447a1912826SSunil Mushran goto update_holders; 1448a1912826SSunil Mushran } 1449a1912826SSunil Mushran 1450ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1451ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1452ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1453ccd979bdSMark Fasheh * another node */ 1454ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1455ccd979bdSMark Fasheh wait = 1; 1456ccd979bdSMark Fasheh goto unlock; 1457ccd979bdSMark Fasheh } 1458ccd979bdSMark Fasheh 1459ccd979bdSMark Fasheh if (level > lockres->l_level) { 14601693a5c0SDavid Teigland if (noqueue_attempted > 0) { 14611693a5c0SDavid Teigland ret = -EAGAIN; 14621693a5c0SDavid Teigland goto unlock; 14631693a5c0SDavid Teigland } 14641693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 14651693a5c0SDavid Teigland noqueue_attempted = 1; 14661693a5c0SDavid Teigland 1467ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1468ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1469ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1470ccd979bdSMark Fasheh 1471019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1472019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1473bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1474019d1b22SMark Fasheh } else { 1475ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1476bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1477019d1b22SMark Fasheh } 1478019d1b22SMark Fasheh 1479ccd979bdSMark Fasheh lockres->l_requested = level; 1480ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1481de551246SJoel Becker gen = lockres_set_pending(lockres); 1482ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1483ccd979bdSMark Fasheh 1484bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1485bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1486ccd979bdSMark Fasheh 14879b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1488ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1489ccd979bdSMark Fasheh 1490ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 14914670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1492ccd979bdSMark Fasheh level, 1493ccd979bdSMark Fasheh &lockres->l_lksb, 1494019d1b22SMark Fasheh lkm_flags, 1495ccd979bdSMark Fasheh lockres->l_name, 1496a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1497de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 14987431cd7eSJoel Becker if (ret) { 14997431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 15007431cd7eSJoel Becker (ret != -EAGAIN)) { 150124ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 15027431cd7eSJoel Becker ret, lockres); 1503ccd979bdSMark Fasheh } 1504ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1505ccd979bdSMark Fasheh goto out; 1506ccd979bdSMark Fasheh } 1507d1e78238SXue jiufei dlm_locked = 1; 1508ccd979bdSMark Fasheh 150973ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1510ccd979bdSMark Fasheh lockres->l_name); 1511ccd979bdSMark Fasheh 1512ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1513ccd979bdSMark Fasheh * complete our work regardless. */ 1514ccd979bdSMark Fasheh catch_signals = 0; 1515ccd979bdSMark Fasheh 1516ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1517ccd979bdSMark Fasheh goto again; 1518ccd979bdSMark Fasheh } 1519ccd979bdSMark Fasheh 1520a1912826SSunil Mushran update_holders: 1521ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1522ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1523ccd979bdSMark Fasheh 1524ccd979bdSMark Fasheh ret = 0; 1525ccd979bdSMark Fasheh unlock: 1526a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1527a1912826SSunil Mushran 1528b1b1e15eSTariq Saeed /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ 1529b1b1e15eSTariq Saeed kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); 1530b1b1e15eSTariq Saeed 1531ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1532b1b1e15eSTariq Saeed if (kick_dc) 1533b1b1e15eSTariq Saeed ocfs2_wake_downconvert_thread(osb); 1534ccd979bdSMark Fasheh out: 1535ccd979bdSMark Fasheh /* 1536ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1537ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1538ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1539ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1540ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1541ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1542ccd979bdSMark Fasheh */ 1543ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1544ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1545ccd979bdSMark Fasheh wait = 0; 1546d1e78238SXue jiufei spin_lock_irqsave(&lockres->l_lock, flags); 1547d1e78238SXue jiufei if (__lockres_remove_mask_waiter(lockres, &mw)) { 1548d1e78238SXue jiufei if (dlm_locked) 1549d1e78238SXue jiufei lockres_or_flags(lockres, 1550d1e78238SXue jiufei OCFS2_LOCK_NONBLOCK_FINISHED); 1551d1e78238SXue jiufei spin_unlock_irqrestore(&lockres->l_lock, flags); 1552ccd979bdSMark Fasheh ret = -EAGAIN; 1553d1e78238SXue jiufei } else { 1554d1e78238SXue jiufei spin_unlock_irqrestore(&lockres->l_lock, flags); 1555ccd979bdSMark Fasheh goto again; 1556ccd979bdSMark Fasheh } 1557d1e78238SXue jiufei } 1558ccd979bdSMark Fasheh if (wait) { 1559ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1560ccd979bdSMark Fasheh if (ret == 0) 1561ccd979bdSMark Fasheh goto again; 1562ccd979bdSMark Fasheh mlog_errno(ret); 1563ccd979bdSMark Fasheh } 15648ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1565ccd979bdSMark Fasheh 1566cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1567cb25797dSJan Kara if (!ret && lockres->l_lockdep_map.key != NULL) { 1568cb25797dSJan Kara if (level == DLM_LOCK_PR) 1569cb25797dSJan Kara rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1570cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1571cb25797dSJan Kara caller_ip); 1572cb25797dSJan Kara else 1573cb25797dSJan Kara rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1574cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1575cb25797dSJan Kara caller_ip); 1576cb25797dSJan Kara } 1577cb25797dSJan Kara #endif 1578ccd979bdSMark Fasheh return ret; 1579ccd979bdSMark Fasheh } 1580ccd979bdSMark Fasheh 1581cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1582ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1583cb25797dSJan Kara int level, 1584cb25797dSJan Kara u32 lkm_flags, 1585cb25797dSJan Kara int arg_flags) 1586cb25797dSJan Kara { 1587cb25797dSJan Kara return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1588cb25797dSJan Kara 0, _RET_IP_); 1589cb25797dSJan Kara } 1590cb25797dSJan Kara 1591cb25797dSJan Kara 1592cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1593cb25797dSJan Kara struct ocfs2_lock_res *lockres, 1594cb25797dSJan Kara int level, 1595cb25797dSJan Kara unsigned long caller_ip) 1596ccd979bdSMark Fasheh { 1597ccd979bdSMark Fasheh unsigned long flags; 1598ccd979bdSMark Fasheh 1599ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1600ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 160134d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1602ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1603cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1604cb25797dSJan Kara if (lockres->l_lockdep_map.key != NULL) 1605cb25797dSJan Kara rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1606cb25797dSJan Kara #endif 1607ccd979bdSMark Fasheh } 1608ccd979bdSMark Fasheh 1609da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1610d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 161124c19ef4SMark Fasheh int ex, 161224c19ef4SMark Fasheh int local) 1613ccd979bdSMark Fasheh { 1614bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1615ccd979bdSMark Fasheh unsigned long flags; 1616bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1617ccd979bdSMark Fasheh 1618ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1619ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1620ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1621ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1622ccd979bdSMark Fasheh 162324c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1624ccd979bdSMark Fasheh } 1625ccd979bdSMark Fasheh 1626ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1627ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1628ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1629ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1630ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1631ccd979bdSMark Fasheh * with creating a new lock resource. */ 1632ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1633ccd979bdSMark Fasheh { 1634ccd979bdSMark Fasheh int ret; 1635d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1636ccd979bdSMark Fasheh 1637ccd979bdSMark Fasheh BUG_ON(!inode); 1638ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1639ccd979bdSMark Fasheh 1640b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1641ccd979bdSMark Fasheh 1642ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1643ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1644ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1645ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1646ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1647ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1648ccd979bdSMark Fasheh * valid when we release the EX. */ 1649ccd979bdSMark Fasheh 165024c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1651ccd979bdSMark Fasheh if (ret) { 1652ccd979bdSMark Fasheh mlog_errno(ret); 1653ccd979bdSMark Fasheh goto bail; 1654ccd979bdSMark Fasheh } 1655ccd979bdSMark Fasheh 165624c19ef4SMark Fasheh /* 1657bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 165824c19ef4SMark Fasheh * don't use a generation in their lock names. 165924c19ef4SMark Fasheh */ 1660e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1661ccd979bdSMark Fasheh if (ret) { 1662ccd979bdSMark Fasheh mlog_errno(ret); 1663ccd979bdSMark Fasheh goto bail; 1664ccd979bdSMark Fasheh } 1665ccd979bdSMark Fasheh 166650008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 166750008630STiger Yang if (ret) { 166850008630STiger Yang mlog_errno(ret); 166950008630STiger Yang goto bail; 167050008630STiger Yang } 167150008630STiger Yang 1672ccd979bdSMark Fasheh bail: 1673ccd979bdSMark Fasheh return ret; 1674ccd979bdSMark Fasheh } 1675ccd979bdSMark Fasheh 1676ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1677ccd979bdSMark Fasheh { 1678ccd979bdSMark Fasheh int status, level; 1679ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1680c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1681ccd979bdSMark Fasheh 1682ccd979bdSMark Fasheh BUG_ON(!inode); 1683ccd979bdSMark Fasheh 1684b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1685b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1686ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1687ccd979bdSMark Fasheh 1688c1e8d35eSTao Ma if (ocfs2_mount_local(osb)) 1689c271c5c2SSunil Mushran return 0; 1690c271c5c2SSunil Mushran 1691ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1692ccd979bdSMark Fasheh 1693bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1694ccd979bdSMark Fasheh 1695ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1696ccd979bdSMark Fasheh 0); 1697ccd979bdSMark Fasheh if (status < 0) 1698ccd979bdSMark Fasheh mlog_errno(status); 1699ccd979bdSMark Fasheh 1700ccd979bdSMark Fasheh return status; 1701ccd979bdSMark Fasheh } 1702ccd979bdSMark Fasheh 1703ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1704ccd979bdSMark Fasheh { 1705bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1706ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1707c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1708ccd979bdSMark Fasheh 1709b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1710b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1711ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1712ccd979bdSMark Fasheh 1713c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1714ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1715ccd979bdSMark Fasheh } 1716ccd979bdSMark Fasheh 171750008630STiger Yang /* 171850008630STiger Yang * ocfs2_open_lock always get PR mode lock. 171950008630STiger Yang */ 172050008630STiger Yang int ocfs2_open_lock(struct inode *inode) 172150008630STiger Yang { 172250008630STiger Yang int status = 0; 172350008630STiger Yang struct ocfs2_lock_res *lockres; 172450008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 172550008630STiger Yang 172650008630STiger Yang BUG_ON(!inode); 172750008630STiger Yang 172850008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 172950008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 173050008630STiger Yang 173103efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 173250008630STiger Yang goto out; 173350008630STiger Yang 173450008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 173550008630STiger Yang 173650008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1737bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 173850008630STiger Yang if (status < 0) 173950008630STiger Yang mlog_errno(status); 174050008630STiger Yang 174150008630STiger Yang out: 174250008630STiger Yang return status; 174350008630STiger Yang } 174450008630STiger Yang 174550008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 174650008630STiger Yang { 174750008630STiger Yang int status = 0, level; 174850008630STiger Yang struct ocfs2_lock_res *lockres; 174950008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 175050008630STiger Yang 175150008630STiger Yang BUG_ON(!inode); 175250008630STiger Yang 175350008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 175450008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 175550008630STiger Yang write ? "EXMODE" : "PRMODE"); 175650008630STiger Yang 175703efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 175803efed8aSTiger Yang if (write) 175903efed8aSTiger Yang status = -EROFS; 176003efed8aSTiger Yang goto out; 176103efed8aSTiger Yang } 176203efed8aSTiger Yang 176350008630STiger Yang if (ocfs2_mount_local(osb)) 176450008630STiger Yang goto out; 176550008630STiger Yang 176650008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 176750008630STiger Yang 1768bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 176950008630STiger Yang 177050008630STiger Yang /* 177150008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1772bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 177350008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 177450008630STiger Yang * this inode is still in use. 177550008630STiger Yang */ 177650008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1777bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 177850008630STiger Yang 177950008630STiger Yang out: 178050008630STiger Yang return status; 178150008630STiger Yang } 178250008630STiger Yang 178350008630STiger Yang /* 178450008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 178550008630STiger Yang */ 178650008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 178750008630STiger Yang { 178850008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 178950008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 179050008630STiger Yang 179150008630STiger Yang mlog(0, "inode %llu drop open lock\n", 179250008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 179350008630STiger Yang 179450008630STiger Yang if (ocfs2_mount_local(osb)) 179550008630STiger Yang goto out; 179650008630STiger Yang 179750008630STiger Yang if(lockres->l_ro_holders) 179850008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1799bd3e7610SJoel Becker DLM_LOCK_PR); 180050008630STiger Yang if(lockres->l_ex_holders) 180150008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1802bd3e7610SJoel Becker DLM_LOCK_EX); 180350008630STiger Yang 180450008630STiger Yang out: 1805c1e8d35eSTao Ma return; 180650008630STiger Yang } 180750008630STiger Yang 1808cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1809cf8e06f1SMark Fasheh int level) 1810cf8e06f1SMark Fasheh { 1811cf8e06f1SMark Fasheh int ret; 1812cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1813cf8e06f1SMark Fasheh unsigned long flags; 1814cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1815cf8e06f1SMark Fasheh 1816cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1817cf8e06f1SMark Fasheh 1818cf8e06f1SMark Fasheh retry_cancel: 1819cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1820cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1821cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1822cf8e06f1SMark Fasheh if (ret) { 1823cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1824cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1825cf8e06f1SMark Fasheh if (ret < 0) { 1826cf8e06f1SMark Fasheh mlog_errno(ret); 1827cf8e06f1SMark Fasheh goto out; 1828cf8e06f1SMark Fasheh } 1829cf8e06f1SMark Fasheh goto retry_cancel; 1830cf8e06f1SMark Fasheh } 1831cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1832cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1833cf8e06f1SMark Fasheh 1834cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1835cf8e06f1SMark Fasheh goto retry_cancel; 1836cf8e06f1SMark Fasheh } 1837cf8e06f1SMark Fasheh 1838cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1839cf8e06f1SMark Fasheh /* 1840cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1841cf8e06f1SMark Fasheh * point to restarting the syscall. 1842cf8e06f1SMark Fasheh */ 1843cf8e06f1SMark Fasheh if (lockres->l_level == level) 1844cf8e06f1SMark Fasheh ret = 0; 1845cf8e06f1SMark Fasheh 1846cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1847cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1848cf8e06f1SMark Fasheh 1849cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1850cf8e06f1SMark Fasheh 1851cf8e06f1SMark Fasheh out: 1852cf8e06f1SMark Fasheh return ret; 1853cf8e06f1SMark Fasheh } 1854cf8e06f1SMark Fasheh 1855cf8e06f1SMark Fasheh /* 1856cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1857cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1858cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 18593ad2f3fbSDaniel Mack * separate path to the "low-level" dlm calls. In particular: 1860cf8e06f1SMark Fasheh * 1861cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1862cf8e06f1SMark Fasheh * what's been requested. 1863cf8e06f1SMark Fasheh * 1864cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1865cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1866cf8e06f1SMark Fasheh * the blocking list). 1867cf8e06f1SMark Fasheh * 1868cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1869cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1870cf8e06f1SMark Fasheh * request. 1871cf8e06f1SMark Fasheh * 1872cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1873cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1874cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1875cf8e06f1SMark Fasheh */ 1876cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1877cf8e06f1SMark Fasheh { 1878e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1879e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1880cf8e06f1SMark Fasheh unsigned long flags; 1881cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1882cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1883cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1884cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1885cf8e06f1SMark Fasheh 1886cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1887cf8e06f1SMark Fasheh 1888cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1889bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1890cf8e06f1SMark Fasheh mlog(ML_ERROR, 1891cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1892cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1893cf8e06f1SMark Fasheh lockres->l_level); 1894cf8e06f1SMark Fasheh return -EINVAL; 1895cf8e06f1SMark Fasheh } 1896cf8e06f1SMark Fasheh 1897cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1898cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1899cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1900cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1901cf8e06f1SMark Fasheh 1902cf8e06f1SMark Fasheh /* 1903cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1904cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1905cf8e06f1SMark Fasheh */ 1906e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1907cf8e06f1SMark Fasheh if (ret < 0) { 1908cf8e06f1SMark Fasheh mlog_errno(ret); 1909cf8e06f1SMark Fasheh goto out; 1910cf8e06f1SMark Fasheh } 1911cf8e06f1SMark Fasheh 1912cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1913cf8e06f1SMark Fasheh if (ret) { 1914cf8e06f1SMark Fasheh mlog_errno(ret); 1915cf8e06f1SMark Fasheh goto out; 1916cf8e06f1SMark Fasheh } 1917cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1918cf8e06f1SMark Fasheh } 1919cf8e06f1SMark Fasheh 1920cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1921e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 1922cf8e06f1SMark Fasheh lockres->l_requested = level; 1923cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1924cf8e06f1SMark Fasheh 1925cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1926cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1927cf8e06f1SMark Fasheh 19284670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1929a796d286SJoel Becker lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 19307431cd7eSJoel Becker if (ret) { 19317431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 193224ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1933cf8e06f1SMark Fasheh ret = -EINVAL; 1934cf8e06f1SMark Fasheh } 1935cf8e06f1SMark Fasheh 1936cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1937cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1938cf8e06f1SMark Fasheh goto out; 1939cf8e06f1SMark Fasheh } 1940cf8e06f1SMark Fasheh 1941cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1942cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1943cf8e06f1SMark Fasheh /* 1944cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1945cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1946cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1947cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1948cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1949cf8e06f1SMark Fasheh * reboot. 1950cf8e06f1SMark Fasheh * 1951cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1952cf8e06f1SMark Fasheh * though. We can't exit this function with an 1953cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1954cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1955cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1956af901ca1SAndré Goddard Rosa * to just bubble success back up to the user. 1957cf8e06f1SMark Fasheh */ 1958cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 19591693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 19601693a5c0SDavid Teigland /* Trylock failed asynchronously */ 19611693a5c0SDavid Teigland BUG_ON(!trylock); 19621693a5c0SDavid Teigland ret = -EAGAIN; 1963cf8e06f1SMark Fasheh } 1964cf8e06f1SMark Fasheh 1965cf8e06f1SMark Fasheh out: 1966cf8e06f1SMark Fasheh 1967cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1968cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 1969cf8e06f1SMark Fasheh return ret; 1970cf8e06f1SMark Fasheh } 1971cf8e06f1SMark Fasheh 1972cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 1973cf8e06f1SMark Fasheh { 1974cf8e06f1SMark Fasheh int ret; 1975de551246SJoel Becker unsigned int gen; 1976cf8e06f1SMark Fasheh unsigned long flags; 1977cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1978cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1979cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1980cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1981cf8e06f1SMark Fasheh 1982cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1983cf8e06f1SMark Fasheh 1984cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1985cf8e06f1SMark Fasheh return; 1986cf8e06f1SMark Fasheh 1987e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 1988cf8e06f1SMark Fasheh return; 1989cf8e06f1SMark Fasheh 1990cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1991cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 1992cf8e06f1SMark Fasheh lockres->l_action); 1993cf8e06f1SMark Fasheh 1994cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1995cf8e06f1SMark Fasheh /* 1996cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 1997cf8e06f1SMark Fasheh */ 1998cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1999bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 2000cf8e06f1SMark Fasheh 2001e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 2002cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 2003cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2004cf8e06f1SMark Fasheh 2005e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 2006cf8e06f1SMark Fasheh if (ret) { 2007cf8e06f1SMark Fasheh mlog_errno(ret); 2008cf8e06f1SMark Fasheh return; 2009cf8e06f1SMark Fasheh } 2010cf8e06f1SMark Fasheh 2011cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 2012cf8e06f1SMark Fasheh if (ret) 2013cf8e06f1SMark Fasheh mlog_errno(ret); 2014cf8e06f1SMark Fasheh } 2015cf8e06f1SMark Fasheh 201634d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 2017ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2018ccd979bdSMark Fasheh { 2019ccd979bdSMark Fasheh int kick = 0; 2020ccd979bdSMark Fasheh 2021ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 202234d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 2023ccd979bdSMark Fasheh * condition. */ 2024ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 2025ccd979bdSMark Fasheh switch(lockres->l_blocking) { 2026bd3e7610SJoel Becker case DLM_LOCK_EX: 2027ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 2028ccd979bdSMark Fasheh kick = 1; 2029ccd979bdSMark Fasheh break; 2030bd3e7610SJoel Becker case DLM_LOCK_PR: 2031ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 2032ccd979bdSMark Fasheh kick = 1; 2033ccd979bdSMark Fasheh break; 2034ccd979bdSMark Fasheh default: 2035ccd979bdSMark Fasheh BUG(); 2036ccd979bdSMark Fasheh } 2037ccd979bdSMark Fasheh } 2038ccd979bdSMark Fasheh 2039ccd979bdSMark Fasheh if (kick) 204034d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 2041ccd979bdSMark Fasheh } 2042ccd979bdSMark Fasheh 2043ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 2044ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 2045ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 2046ccd979bdSMark Fasheh 2047ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 2048ccd979bdSMark Fasheh * now. */ 2049ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 2050ccd979bdSMark Fasheh { 2051ccd979bdSMark Fasheh u64 res; 2052ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 2053ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 2054ccd979bdSMark Fasheh 2055ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 2056ccd979bdSMark Fasheh 2057ccd979bdSMark Fasheh return res; 2058ccd979bdSMark Fasheh } 2059ccd979bdSMark Fasheh 2060ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 2061ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 2062e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2063ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2064ccd979bdSMark Fasheh { 2065ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2066e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2067ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2068ccd979bdSMark Fasheh 2069a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2070ccd979bdSMark Fasheh 207124c19ef4SMark Fasheh /* 207224c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 207324c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 207424c19ef4SMark Fasheh * status. 207524c19ef4SMark Fasheh */ 207624c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 207724c19ef4SMark Fasheh lvb->lvb_version = 0; 207824c19ef4SMark Fasheh goto out; 207924c19ef4SMark Fasheh } 208024c19ef4SMark Fasheh 20814d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 2082ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2083ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 208403ab30f7SEric W. Biederman lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode)); 208503ab30f7SEric W. Biederman lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); 2086ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2087ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2088ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 2089ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2090ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 2091ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2092ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 2093ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2094ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 209515b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2096f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2097ccd979bdSMark Fasheh 209824c19ef4SMark Fasheh out: 2099ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2100ccd979bdSMark Fasheh } 2101ccd979bdSMark Fasheh 2102ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 2103ccd979bdSMark Fasheh u64 packed_time) 2104ccd979bdSMark Fasheh { 2105ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2106ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2107ccd979bdSMark Fasheh } 2108ccd979bdSMark Fasheh 2109ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2110ccd979bdSMark Fasheh { 2111ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2112e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2113ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2114ccd979bdSMark Fasheh 2115ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2116ccd979bdSMark Fasheh 2117a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2118ccd979bdSMark Fasheh 2119ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 2120ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2121ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2122ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2123ccd979bdSMark Fasheh 2124ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 212515b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2126ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 2127ca4d147eSHerbert Poetzl 2128ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 2129ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2130ccd979bdSMark Fasheh inode->i_blocks = 0; 2131ccd979bdSMark Fasheh else 21328110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 2133ccd979bdSMark Fasheh 213403ab30f7SEric W. Biederman i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid)); 213503ab30f7SEric W. Biederman i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); 2136ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2137bfe86848SMiklos Szeredi set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2138ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 2139ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 2140ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 2141ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 2142ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 2143ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 2144ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2145ccd979bdSMark Fasheh } 2146ccd979bdSMark Fasheh 2147f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2148f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 2149ccd979bdSMark Fasheh { 2150a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2151ccd979bdSMark Fasheh 21521c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 21531c520dfbSJoel Becker && lvb->lvb_version == OCFS2_LVB_VERSION 2154f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2155ccd979bdSMark Fasheh return 1; 2156ccd979bdSMark Fasheh return 0; 2157ccd979bdSMark Fasheh } 2158ccd979bdSMark Fasheh 2159ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 2160ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 2161ccd979bdSMark Fasheh * 2162ccd979bdSMark Fasheh * 0 means no refresh needed. 2163ccd979bdSMark Fasheh * 2164ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 2165ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 2166ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2167ccd979bdSMark Fasheh { 2168ccd979bdSMark Fasheh unsigned long flags; 2169ccd979bdSMark Fasheh int status = 0; 2170ccd979bdSMark Fasheh 2171ccd979bdSMark Fasheh refresh_check: 2172ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2173ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2174ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2175ccd979bdSMark Fasheh goto bail; 2176ccd979bdSMark Fasheh } 2177ccd979bdSMark Fasheh 2178ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2179ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2180ccd979bdSMark Fasheh 2181ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 2182ccd979bdSMark Fasheh goto refresh_check; 2183ccd979bdSMark Fasheh } 2184ccd979bdSMark Fasheh 2185ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 2186ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2187ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2188ccd979bdSMark Fasheh 2189ccd979bdSMark Fasheh status = 1; 2190ccd979bdSMark Fasheh bail: 2191c1e8d35eSTao Ma mlog(0, "status %d\n", status); 2192ccd979bdSMark Fasheh return status; 2193ccd979bdSMark Fasheh } 2194ccd979bdSMark Fasheh 2195ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 2196ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 2197ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2198ccd979bdSMark Fasheh int status) 2199ccd979bdSMark Fasheh { 2200ccd979bdSMark Fasheh unsigned long flags; 2201ccd979bdSMark Fasheh 2202ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2203ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2204ccd979bdSMark Fasheh if (!status) 2205ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2206ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2207ccd979bdSMark Fasheh 2208ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2209ccd979bdSMark Fasheh } 2210ccd979bdSMark Fasheh 2211ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 2212e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 2213ccd979bdSMark Fasheh struct buffer_head **bh) 2214ccd979bdSMark Fasheh { 2215ccd979bdSMark Fasheh int status = 0; 2216ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2217e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2218ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 2219c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2220ccd979bdSMark Fasheh 2221be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 2222be9e986bSMark Fasheh goto bail; 2223be9e986bSMark Fasheh 2224ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2225ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2226b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2227ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2228b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2229ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2230ccd979bdSMark Fasheh status = -ENOENT; 2231ccd979bdSMark Fasheh goto bail; 2232ccd979bdSMark Fasheh } 2233ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2234ccd979bdSMark Fasheh 2235ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2236ccd979bdSMark Fasheh goto bail; 2237ccd979bdSMark Fasheh 2238ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2239ccd979bdSMark Fasheh * for the inode metadata. */ 22408cb471e8SJoel Becker ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2241ccd979bdSMark Fasheh 224283418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 224383418978SMark Fasheh 2244be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2245b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2246b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2247ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2248ccd979bdSMark Fasheh } else { 2249ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2250ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2251b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2252ccd979bdSMark Fasheh if (status < 0) { 2253ccd979bdSMark Fasheh mlog_errno(status); 2254ccd979bdSMark Fasheh goto bail_refresh; 2255ccd979bdSMark Fasheh } 2256ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2257ccd979bdSMark Fasheh 2258ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2259b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2260b657c95cSJoel Becker * already checked that the inode block is sane. 2261ccd979bdSMark Fasheh * 2262ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2263ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2264ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2265ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2266ccd979bdSMark Fasheh * locks associated with it. */ 2267ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2268ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2269b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2270ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2271b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2272b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2273ccd979bdSMark Fasheh inode->i_generation); 2274ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2275ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2276b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2277b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2278b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2279ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2280ccd979bdSMark Fasheh 2281ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 22828ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2283ccd979bdSMark Fasheh } 2284ccd979bdSMark Fasheh 2285ccd979bdSMark Fasheh status = 0; 2286ccd979bdSMark Fasheh bail_refresh: 2287ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2288ccd979bdSMark Fasheh bail: 2289ccd979bdSMark Fasheh return status; 2290ccd979bdSMark Fasheh } 2291ccd979bdSMark Fasheh 2292ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2293ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2294ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2295ccd979bdSMark Fasheh { 2296ccd979bdSMark Fasheh int status; 2297ccd979bdSMark Fasheh 2298ccd979bdSMark Fasheh if (passed_bh) { 2299ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2300ccd979bdSMark Fasheh * returned bh. */ 2301ccd979bdSMark Fasheh *ret_bh = passed_bh; 2302ccd979bdSMark Fasheh get_bh(*ret_bh); 2303ccd979bdSMark Fasheh 2304ccd979bdSMark Fasheh return 0; 2305ccd979bdSMark Fasheh } 2306ccd979bdSMark Fasheh 2307b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2308ccd979bdSMark Fasheh if (status < 0) 2309ccd979bdSMark Fasheh mlog_errno(status); 2310ccd979bdSMark Fasheh 2311ccd979bdSMark Fasheh return status; 2312ccd979bdSMark Fasheh } 2313ccd979bdSMark Fasheh 2314ccd979bdSMark Fasheh /* 2315ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2316ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2317ccd979bdSMark Fasheh */ 2318cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode, 2319ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2320ccd979bdSMark Fasheh int ex, 2321cb25797dSJan Kara int arg_flags, 2322cb25797dSJan Kara int subclass) 2323ccd979bdSMark Fasheh { 2324bd3e7610SJoel Becker int status, level, acquired; 2325bd3e7610SJoel Becker u32 dlm_flags; 2326c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2327ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2328ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2329ccd979bdSMark Fasheh 2330ccd979bdSMark Fasheh BUG_ON(!inode); 2331ccd979bdSMark Fasheh 2332b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2333b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2334ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2335ccd979bdSMark Fasheh 2336ccd979bdSMark Fasheh status = 0; 2337ccd979bdSMark Fasheh acquired = 0; 2338ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2339ccd979bdSMark Fasheh * rodevices. */ 2340ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2341ccd979bdSMark Fasheh if (ex) 2342ccd979bdSMark Fasheh status = -EROFS; 234303efed8aSTiger Yang goto getbh; 2344ccd979bdSMark Fasheh } 2345ccd979bdSMark Fasheh 2346c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2347c271c5c2SSunil Mushran goto local; 2348c271c5c2SSunil Mushran 2349ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2350553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2351ccd979bdSMark Fasheh 2352e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2353bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2354ccd979bdSMark Fasheh dlm_flags = 0; 2355ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2356bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2357ccd979bdSMark Fasheh 2358cb25797dSJan Kara status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2359cb25797dSJan Kara arg_flags, subclass, _RET_IP_); 2360ccd979bdSMark Fasheh if (status < 0) { 236141003a7bSZach Brown if (status != -EAGAIN) 2362ccd979bdSMark Fasheh mlog_errno(status); 2363ccd979bdSMark Fasheh goto bail; 2364ccd979bdSMark Fasheh } 2365ccd979bdSMark Fasheh 2366ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2367ccd979bdSMark Fasheh acquired = 1; 2368ccd979bdSMark Fasheh 2369ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2370ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2371ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2372ccd979bdSMark Fasheh * abort the operation. */ 2373ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2374553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2375ccd979bdSMark Fasheh 2376c271c5c2SSunil Mushran local: 237724c19ef4SMark Fasheh /* 237824c19ef4SMark Fasheh * We only see this flag if we're being called from 237924c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 238024c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 238124c19ef4SMark Fasheh * and let the caller handle it. 238224c19ef4SMark Fasheh */ 238324c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 238424c19ef4SMark Fasheh status = 0; 2385c271c5c2SSunil Mushran if (lockres) 238624c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 238724c19ef4SMark Fasheh goto bail; 238824c19ef4SMark Fasheh } 238924c19ef4SMark Fasheh 2390ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2391e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2392ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2393ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2394ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2395e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2396ccd979bdSMark Fasheh if (status < 0) { 2397ccd979bdSMark Fasheh if (status != -ENOENT) 2398ccd979bdSMark Fasheh mlog_errno(status); 2399ccd979bdSMark Fasheh goto bail; 2400ccd979bdSMark Fasheh } 240103efed8aSTiger Yang getbh: 2402ccd979bdSMark Fasheh if (ret_bh) { 2403ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2404ccd979bdSMark Fasheh if (status < 0) { 2405ccd979bdSMark Fasheh mlog_errno(status); 2406ccd979bdSMark Fasheh goto bail; 2407ccd979bdSMark Fasheh } 2408ccd979bdSMark Fasheh } 2409ccd979bdSMark Fasheh 2410ccd979bdSMark Fasheh bail: 2411ccd979bdSMark Fasheh if (status < 0) { 2412ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2413ccd979bdSMark Fasheh brelse(*ret_bh); 2414ccd979bdSMark Fasheh *ret_bh = NULL; 2415ccd979bdSMark Fasheh } 2416ccd979bdSMark Fasheh if (acquired) 2417e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2418ccd979bdSMark Fasheh } 2419ccd979bdSMark Fasheh 2420ccd979bdSMark Fasheh if (local_bh) 2421ccd979bdSMark Fasheh brelse(local_bh); 2422ccd979bdSMark Fasheh 2423ccd979bdSMark Fasheh return status; 2424ccd979bdSMark Fasheh } 2425ccd979bdSMark Fasheh 2426ccd979bdSMark Fasheh /* 242734d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 242834d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 242934d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2430ccd979bdSMark Fasheh * 2431ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2432ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2433ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2434ccd979bdSMark Fasheh * 243534d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 243634d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 243734d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 243834d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 243934d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 244034d024f8SMark Fasheh * immediately retry the aop call. 2441ccd979bdSMark Fasheh */ 2442e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2443ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2444ccd979bdSMark Fasheh int ex, 2445ccd979bdSMark Fasheh struct page *page) 2446ccd979bdSMark Fasheh { 2447ccd979bdSMark Fasheh int ret; 2448ccd979bdSMark Fasheh 2449e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2450ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2451ccd979bdSMark Fasheh unlock_page(page); 2452ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2453ccd979bdSMark Fasheh } 2454ccd979bdSMark Fasheh 2455ccd979bdSMark Fasheh return ret; 2456ccd979bdSMark Fasheh } 2457ccd979bdSMark Fasheh 2458e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 24597f1a37e3STiger Yang struct vfsmount *vfsmnt, 24607f1a37e3STiger Yang int *level) 24617f1a37e3STiger Yang { 24627f1a37e3STiger Yang int ret; 24637f1a37e3STiger Yang 2464e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 24657f1a37e3STiger Yang if (ret < 0) { 24667f1a37e3STiger Yang mlog_errno(ret); 24677f1a37e3STiger Yang return ret; 24687f1a37e3STiger Yang } 24697f1a37e3STiger Yang 24707f1a37e3STiger Yang /* 24717f1a37e3STiger Yang * If we should update atime, we will get EX lock, 24727f1a37e3STiger Yang * otherwise we just get PR lock. 24737f1a37e3STiger Yang */ 24747f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 24757f1a37e3STiger Yang struct buffer_head *bh = NULL; 24767f1a37e3STiger Yang 2477e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2478e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 24797f1a37e3STiger Yang if (ret < 0) { 24807f1a37e3STiger Yang mlog_errno(ret); 24817f1a37e3STiger Yang return ret; 24827f1a37e3STiger Yang } 24837f1a37e3STiger Yang *level = 1; 24847f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 24857f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 24867f1a37e3STiger Yang if (bh) 24877f1a37e3STiger Yang brelse(bh); 24887f1a37e3STiger Yang } else 24897f1a37e3STiger Yang *level = 0; 24907f1a37e3STiger Yang 24917f1a37e3STiger Yang return ret; 24927f1a37e3STiger Yang } 24937f1a37e3STiger Yang 2494e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2495ccd979bdSMark Fasheh int ex) 2496ccd979bdSMark Fasheh { 2497bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2498e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2499c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2500ccd979bdSMark Fasheh 2501b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2502b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2503ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2504ccd979bdSMark Fasheh 2505c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2506c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2507ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2508ccd979bdSMark Fasheh } 2509ccd979bdSMark Fasheh 2510df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 251183273932SSrinivas Eeda { 251283273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 251383273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 251483273932SSrinivas Eeda int status = 0; 251583273932SSrinivas Eeda 2516df152c24SSunil Mushran if (ocfs2_is_hard_readonly(osb)) 2517df152c24SSunil Mushran return -EROFS; 2518df152c24SSunil Mushran 2519df152c24SSunil Mushran if (ocfs2_mount_local(osb)) 2520df152c24SSunil Mushran return 0; 2521df152c24SSunil Mushran 252283273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 2523df152c24SSunil Mushran status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 252483273932SSrinivas Eeda if (status < 0) 252583273932SSrinivas Eeda return status; 252683273932SSrinivas Eeda 252783273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 25281c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 25291c520dfbSJoel Becker lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 253083273932SSrinivas Eeda *seqno = be32_to_cpu(lvb->lvb_os_seqno); 25313211949fSSunil Mushran else 25323211949fSSunil Mushran *seqno = osb->osb_orphan_scan.os_seqno + 1; 25333211949fSSunil Mushran 253483273932SSrinivas Eeda return status; 253583273932SSrinivas Eeda } 253683273932SSrinivas Eeda 2537df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 253883273932SSrinivas Eeda { 253983273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 254083273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 254183273932SSrinivas Eeda 2542df152c24SSunil Mushran if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 254383273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 254483273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 254583273932SSrinivas Eeda lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 254683273932SSrinivas Eeda lvb->lvb_os_seqno = cpu_to_be32(seqno); 2547df152c24SSunil Mushran ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2548df152c24SSunil Mushran } 254983273932SSrinivas Eeda } 255083273932SSrinivas Eeda 2551ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2552ccd979bdSMark Fasheh int ex) 2553ccd979bdSMark Fasheh { 2554c271c5c2SSunil Mushran int status = 0; 2555bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2556ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2557ccd979bdSMark Fasheh 2558ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2559ccd979bdSMark Fasheh return -EROFS; 2560ccd979bdSMark Fasheh 2561c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2562c271c5c2SSunil Mushran goto bail; 2563c271c5c2SSunil Mushran 2564ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2565ccd979bdSMark Fasheh if (status < 0) { 2566ccd979bdSMark Fasheh mlog_errno(status); 2567ccd979bdSMark Fasheh goto bail; 2568ccd979bdSMark Fasheh } 2569ccd979bdSMark Fasheh 2570ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2571ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2572ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2573ccd979bdSMark Fasheh * everything is up to the caller :) */ 2574ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2575ccd979bdSMark Fasheh if (status) { 25768e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2577ccd979bdSMark Fasheh 2578ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2579ccd979bdSMark Fasheh 25803278bb74SJunxiao Bi if (status < 0) { 25813278bb74SJunxiao Bi ocfs2_cluster_unlock(osb, lockres, level); 2582ccd979bdSMark Fasheh mlog_errno(status); 25833278bb74SJunxiao Bi } 25848ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2585ccd979bdSMark Fasheh } 2586ccd979bdSMark Fasheh bail: 2587ccd979bdSMark Fasheh return status; 2588ccd979bdSMark Fasheh } 2589ccd979bdSMark Fasheh 2590ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2591ccd979bdSMark Fasheh int ex) 2592ccd979bdSMark Fasheh { 2593bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2594ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2595ccd979bdSMark Fasheh 2596c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2597ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2598ccd979bdSMark Fasheh } 2599ccd979bdSMark Fasheh 2600ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2601ccd979bdSMark Fasheh { 2602ccd979bdSMark Fasheh int status; 2603ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2604ccd979bdSMark Fasheh 2605ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2606ccd979bdSMark Fasheh return -EROFS; 2607ccd979bdSMark Fasheh 2608c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2609c271c5c2SSunil Mushran return 0; 2610c271c5c2SSunil Mushran 2611bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2612ccd979bdSMark Fasheh if (status < 0) 2613ccd979bdSMark Fasheh mlog_errno(status); 2614ccd979bdSMark Fasheh 2615ccd979bdSMark Fasheh return status; 2616ccd979bdSMark Fasheh } 2617ccd979bdSMark Fasheh 2618ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2619ccd979bdSMark Fasheh { 2620ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2621ccd979bdSMark Fasheh 2622c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2623bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2624ccd979bdSMark Fasheh } 2625ccd979bdSMark Fasheh 26266ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 26276ca497a8Swengang wang { 26286ca497a8Swengang wang int status; 26296ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 26306ca497a8Swengang wang 26316ca497a8Swengang wang if (ocfs2_is_hard_readonly(osb)) 26326ca497a8Swengang wang return -EROFS; 26336ca497a8Swengang wang 26346ca497a8Swengang wang if (ocfs2_mount_local(osb)) 26356ca497a8Swengang wang return 0; 26366ca497a8Swengang wang 26376ca497a8Swengang wang status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 26386ca497a8Swengang wang 0, 0); 26396ca497a8Swengang wang if (status < 0) 26406ca497a8Swengang wang mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 26416ca497a8Swengang wang 26426ca497a8Swengang wang return status; 26436ca497a8Swengang wang } 26446ca497a8Swengang wang 26456ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 26466ca497a8Swengang wang { 26476ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 26486ca497a8Swengang wang 26496ca497a8Swengang wang if (!ocfs2_mount_local(osb)) 26506ca497a8Swengang wang ocfs2_cluster_unlock(osb, lockres, 26516ca497a8Swengang wang ex ? LKM_EXMODE : LKM_PRMODE); 26526ca497a8Swengang wang } 26536ca497a8Swengang wang 2654d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2655d680efe9SMark Fasheh { 2656d680efe9SMark Fasheh int ret; 2657bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2658d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2659d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2660d680efe9SMark Fasheh 2661d680efe9SMark Fasheh BUG_ON(!dl); 2662d680efe9SMark Fasheh 266303efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 266403efed8aSTiger Yang if (ex) 2665d680efe9SMark Fasheh return -EROFS; 266603efed8aSTiger Yang return 0; 266703efed8aSTiger Yang } 2668d680efe9SMark Fasheh 2669c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2670c271c5c2SSunil Mushran return 0; 2671c271c5c2SSunil Mushran 2672d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2673d680efe9SMark Fasheh if (ret < 0) 2674d680efe9SMark Fasheh mlog_errno(ret); 2675d680efe9SMark Fasheh 2676d680efe9SMark Fasheh return ret; 2677d680efe9SMark Fasheh } 2678d680efe9SMark Fasheh 2679d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2680d680efe9SMark Fasheh { 2681bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2682d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2683d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2684d680efe9SMark Fasheh 268503efed8aSTiger Yang if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 2686d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2687d680efe9SMark Fasheh } 2688d680efe9SMark Fasheh 2689ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2690ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2691ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2692ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2693ccd979bdSMark Fasheh { 2694ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2695ccd979bdSMark Fasheh 2696ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2697ccd979bdSMark Fasheh 2698ccd979bdSMark Fasheh kfree(dlm_debug); 2699ccd979bdSMark Fasheh } 2700ccd979bdSMark Fasheh 2701ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2702ccd979bdSMark Fasheh { 2703ccd979bdSMark Fasheh if (dlm_debug) 2704ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2705ccd979bdSMark Fasheh } 2706ccd979bdSMark Fasheh 2707ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2708ccd979bdSMark Fasheh { 2709ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2710ccd979bdSMark Fasheh } 2711ccd979bdSMark Fasheh 2712ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2713ccd979bdSMark Fasheh { 2714ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2715ccd979bdSMark Fasheh 2716ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2717ccd979bdSMark Fasheh if (!dlm_debug) { 2718ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2719ccd979bdSMark Fasheh goto out; 2720ccd979bdSMark Fasheh } 2721ccd979bdSMark Fasheh 2722ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2723ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2724ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2725ccd979bdSMark Fasheh out: 2726ccd979bdSMark Fasheh return dlm_debug; 2727ccd979bdSMark Fasheh } 2728ccd979bdSMark Fasheh 2729ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2730ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2731ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2732ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2733ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2734ccd979bdSMark Fasheh }; 2735ccd979bdSMark Fasheh 2736ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2737ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2738ccd979bdSMark Fasheh { 2739ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2740ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2741ccd979bdSMark Fasheh 2742ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2743ccd979bdSMark Fasheh 2744ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2745ccd979bdSMark Fasheh /* discover the head of the list */ 2746ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2747ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2748ccd979bdSMark Fasheh break; 2749ccd979bdSMark Fasheh } 2750ccd979bdSMark Fasheh 2751ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2752ccd979bdSMark Fasheh * l_ops field. */ 2753ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2754ccd979bdSMark Fasheh ret = iter; 2755ccd979bdSMark Fasheh break; 2756ccd979bdSMark Fasheh } 2757ccd979bdSMark Fasheh } 2758ccd979bdSMark Fasheh 2759ccd979bdSMark Fasheh return ret; 2760ccd979bdSMark Fasheh } 2761ccd979bdSMark Fasheh 2762ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2763ccd979bdSMark Fasheh { 2764ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2765ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2766ccd979bdSMark Fasheh 2767ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2768ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2769ccd979bdSMark Fasheh if (iter) { 2770ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2771ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2772ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2773ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2774ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2775ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2776ccd979bdSMark Fasheh * in them. */ 2777ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2778ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2779ccd979bdSMark Fasheh } 2780ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2781ccd979bdSMark Fasheh 2782ccd979bdSMark Fasheh return iter; 2783ccd979bdSMark Fasheh } 2784ccd979bdSMark Fasheh 2785ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2786ccd979bdSMark Fasheh { 2787ccd979bdSMark Fasheh } 2788ccd979bdSMark Fasheh 2789ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2790ccd979bdSMark Fasheh { 2791ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2792ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2793ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2794ccd979bdSMark Fasheh 2795ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2796ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2797ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2798ccd979bdSMark Fasheh if (iter) { 2799ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2800ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2801ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2802ccd979bdSMark Fasheh } 2803ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2804ccd979bdSMark Fasheh 2805ccd979bdSMark Fasheh return iter; 2806ccd979bdSMark Fasheh } 2807ccd979bdSMark Fasheh 28085bc970e8SSunil Mushran /* 28095bc970e8SSunil Mushran * Version is used by debugfs.ocfs2 to determine the format being used 28105bc970e8SSunil Mushran * 28115bc970e8SSunil Mushran * New in version 2 28125bc970e8SSunil Mushran * - Lock stats printed 28135bc970e8SSunil Mushran * New in version 3 28145bc970e8SSunil Mushran * - Max time in lock stats is in usecs (instead of nsecs) 28155bc970e8SSunil Mushran */ 28165bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3 2817ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2818ccd979bdSMark Fasheh { 2819ccd979bdSMark Fasheh int i; 2820ccd979bdSMark Fasheh char *lvb; 2821ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2822ccd979bdSMark Fasheh 2823ccd979bdSMark Fasheh if (!lockres) 2824ccd979bdSMark Fasheh return -EINVAL; 2825ccd979bdSMark Fasheh 2826d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2827d680efe9SMark Fasheh 2828d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2829d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2830d680efe9SMark Fasheh lockres->l_name, 2831d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2832d680efe9SMark Fasheh else 2833d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2834d680efe9SMark Fasheh 2835d680efe9SMark Fasheh seq_printf(m, "%d\t" 2836ccd979bdSMark Fasheh "0x%lx\t" 2837ccd979bdSMark Fasheh "0x%x\t" 2838ccd979bdSMark Fasheh "0x%x\t" 2839ccd979bdSMark Fasheh "%u\t" 2840ccd979bdSMark Fasheh "%u\t" 2841ccd979bdSMark Fasheh "%d\t" 2842ccd979bdSMark Fasheh "%d\t", 2843ccd979bdSMark Fasheh lockres->l_level, 2844ccd979bdSMark Fasheh lockres->l_flags, 2845ccd979bdSMark Fasheh lockres->l_action, 2846ccd979bdSMark Fasheh lockres->l_unlock_action, 2847ccd979bdSMark Fasheh lockres->l_ro_holders, 2848ccd979bdSMark Fasheh lockres->l_ex_holders, 2849ccd979bdSMark Fasheh lockres->l_requested, 2850ccd979bdSMark Fasheh lockres->l_blocking); 2851ccd979bdSMark Fasheh 2852ccd979bdSMark Fasheh /* Dump the raw LVB */ 28538f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2854ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2855ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2856ccd979bdSMark Fasheh 28578ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 28585bc970e8SSunil Mushran # define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets) 28595bc970e8SSunil Mushran # define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets) 28605bc970e8SSunil Mushran # define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail) 28615bc970e8SSunil Mushran # define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail) 28625bc970e8SSunil Mushran # define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total) 28635bc970e8SSunil Mushran # define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total) 28645bc970e8SSunil Mushran # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) 28655bc970e8SSunil Mushran # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) 28665bc970e8SSunil Mushran # define lock_refresh(_l) ((_l)->l_lock_refresh) 28678ddb7b00SSunil Mushran #else 28685bc970e8SSunil Mushran # define lock_num_prmode(_l) (0) 28695bc970e8SSunil Mushran # define lock_num_exmode(_l) (0) 28708ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 28718ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 2872dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 2873dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 28748ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 28758ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 28768ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 28778ddb7b00SSunil Mushran #endif 28788ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 28795bc970e8SSunil Mushran seq_printf(m, "%u\t" 28805bc970e8SSunil Mushran "%u\t" 28818ddb7b00SSunil Mushran "%u\t" 28828ddb7b00SSunil Mushran "%u\t" 28838ddb7b00SSunil Mushran "%llu\t" 28848ddb7b00SSunil Mushran "%llu\t" 28858ddb7b00SSunil Mushran "%u\t" 28868ddb7b00SSunil Mushran "%u\t" 28878ddb7b00SSunil Mushran "%u\t", 28888ddb7b00SSunil Mushran lock_num_prmode(lockres), 28898ddb7b00SSunil Mushran lock_num_exmode(lockres), 28908ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 28918ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 28928ddb7b00SSunil Mushran lock_total_prmode(lockres), 28938ddb7b00SSunil Mushran lock_total_exmode(lockres), 28948ddb7b00SSunil Mushran lock_max_prmode(lockres), 28958ddb7b00SSunil Mushran lock_max_exmode(lockres), 28968ddb7b00SSunil Mushran lock_refresh(lockres)); 28978ddb7b00SSunil Mushran 2898ccd979bdSMark Fasheh /* End the line */ 2899ccd979bdSMark Fasheh seq_printf(m, "\n"); 2900ccd979bdSMark Fasheh return 0; 2901ccd979bdSMark Fasheh } 2902ccd979bdSMark Fasheh 290390d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2904ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2905ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2906ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2907ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2908ccd979bdSMark Fasheh }; 2909ccd979bdSMark Fasheh 2910ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2911ccd979bdSMark Fasheh { 291233fa1d90SJoe Perches struct seq_file *seq = file->private_data; 2913ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2914ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2915ccd979bdSMark Fasheh 2916ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2917ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2918ccd979bdSMark Fasheh return seq_release_private(inode, file); 2919ccd979bdSMark Fasheh } 2920ccd979bdSMark Fasheh 2921ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2922ccd979bdSMark Fasheh { 2923ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2924ccd979bdSMark Fasheh struct ocfs2_super *osb; 2925ccd979bdSMark Fasheh 29261848cb55SRob Jones priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv)); 2927ccd979bdSMark Fasheh if (!priv) { 29281848cb55SRob Jones mlog_errno(-ENOMEM); 29291848cb55SRob Jones return -ENOMEM; 2930ccd979bdSMark Fasheh } 29311848cb55SRob Jones 29328e18e294STheodore Ts'o osb = inode->i_private; 2933ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2934ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2935ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2936ccd979bdSMark Fasheh 2937ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2938ccd979bdSMark Fasheh priv->p_dlm_debug); 2939ccd979bdSMark Fasheh 29401848cb55SRob Jones return 0; 2941ccd979bdSMark Fasheh } 2942ccd979bdSMark Fasheh 29434b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2944ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2945ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2946ccd979bdSMark Fasheh .read = seq_read, 2947ccd979bdSMark Fasheh .llseek = seq_lseek, 2948ccd979bdSMark Fasheh }; 2949ccd979bdSMark Fasheh 2950ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2951ccd979bdSMark Fasheh { 2952ccd979bdSMark Fasheh int ret = 0; 2953ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2954ccd979bdSMark Fasheh 2955ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2956ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2957ccd979bdSMark Fasheh osb->osb_debug_root, 2958ccd979bdSMark Fasheh osb, 2959ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 29608f443e23SLinus Torvalds if (!dlm_debug->d_locking_state) { 2961ccd979bdSMark Fasheh ret = -EINVAL; 2962ccd979bdSMark Fasheh mlog(ML_ERROR, 2963ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2964ccd979bdSMark Fasheh goto out; 2965ccd979bdSMark Fasheh } 2966ccd979bdSMark Fasheh 2967ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2968ccd979bdSMark Fasheh out: 2969ccd979bdSMark Fasheh return ret; 2970ccd979bdSMark Fasheh } 2971ccd979bdSMark Fasheh 2972ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2973ccd979bdSMark Fasheh { 2974ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2975ccd979bdSMark Fasheh 2976ccd979bdSMark Fasheh if (dlm_debug) { 2977ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2978ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2979ccd979bdSMark Fasheh } 2980ccd979bdSMark Fasheh } 2981ccd979bdSMark Fasheh 2982ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2983ccd979bdSMark Fasheh { 2984c271c5c2SSunil Mushran int status = 0; 29854670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 2986ccd979bdSMark Fasheh 29870abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 29880abd6d18SMark Fasheh osb->node_num = 0; 2989c271c5c2SSunil Mushran goto local; 29900abd6d18SMark Fasheh } 2991c271c5c2SSunil Mushran 2992ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2993ccd979bdSMark Fasheh if (status < 0) { 2994ccd979bdSMark Fasheh mlog_errno(status); 2995ccd979bdSMark Fasheh goto bail; 2996ccd979bdSMark Fasheh } 2997ccd979bdSMark Fasheh 299834d024f8SMark Fasheh /* launch downconvert thread */ 29995afc44e2SJoseph Qi osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s", 30005afc44e2SJoseph Qi osb->uuid_str); 300134d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 300234d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 300334d024f8SMark Fasheh osb->dc_task = NULL; 3004ccd979bdSMark Fasheh mlog_errno(status); 3005ccd979bdSMark Fasheh goto bail; 3006ccd979bdSMark Fasheh } 3007ccd979bdSMark Fasheh 3008ccd979bdSMark Fasheh /* for now, uuid == domain */ 30099c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 3010c74a3bddSGoldwyn Rodrigues osb->osb_cluster_name, 3011c74a3bddSGoldwyn Rodrigues strlen(osb->osb_cluster_name), 30129c6c877cSJoel Becker osb->uuid_str, 30134670c46dSJoel Becker strlen(osb->uuid_str), 3014553b5eb9SJoel Becker &lproto, ocfs2_do_node_down, osb, 30154670c46dSJoel Becker &conn); 30164670c46dSJoel Becker if (status) { 3017ccd979bdSMark Fasheh mlog_errno(status); 3018ccd979bdSMark Fasheh goto bail; 3019ccd979bdSMark Fasheh } 3020ccd979bdSMark Fasheh 30213e834151SGoldwyn Rodrigues status = ocfs2_cluster_this_node(conn, &osb->node_num); 30220abd6d18SMark Fasheh if (status < 0) { 30230abd6d18SMark Fasheh mlog_errno(status); 30240abd6d18SMark Fasheh mlog(ML_ERROR, 30250abd6d18SMark Fasheh "could not find this host's node number\n"); 3026286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 30270abd6d18SMark Fasheh goto bail; 30280abd6d18SMark Fasheh } 30290abd6d18SMark Fasheh 3030c271c5c2SSunil Mushran local: 3031ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3032ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 30336ca497a8Swengang wang ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 303483273932SSrinivas Eeda ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3035ccd979bdSMark Fasheh 30364670c46dSJoel Becker osb->cconn = conn; 3037ccd979bdSMark Fasheh bail: 3038ccd979bdSMark Fasheh if (status < 0) { 3039ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 304034d024f8SMark Fasheh if (osb->dc_task) 304134d024f8SMark Fasheh kthread_stop(osb->dc_task); 3042ccd979bdSMark Fasheh } 3043ccd979bdSMark Fasheh 3044ccd979bdSMark Fasheh return status; 3045ccd979bdSMark Fasheh } 3046ccd979bdSMark Fasheh 3047286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3048286eaa95SJoel Becker int hangup_pending) 3049ccd979bdSMark Fasheh { 3050ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 3051ccd979bdSMark Fasheh 30524670c46dSJoel Becker /* 30534670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 30544670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 30554670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 30564670c46dSJoel Becker */ 30574670c46dSJoel Becker 305834d024f8SMark Fasheh if (osb->dc_task) { 305934d024f8SMark Fasheh kthread_stop(osb->dc_task); 306034d024f8SMark Fasheh osb->dc_task = NULL; 3061ccd979bdSMark Fasheh } 3062ccd979bdSMark Fasheh 3063ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 3064ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 30656ca497a8Swengang wang ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 306683273932SSrinivas Eeda ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3067ccd979bdSMark Fasheh 3068286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 30694670c46dSJoel Becker osb->cconn = NULL; 3070ccd979bdSMark Fasheh 3071ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 3072ccd979bdSMark Fasheh } 3073ccd979bdSMark Fasheh 3074ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 30750d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 3076ccd979bdSMark Fasheh { 30777431cd7eSJoel Becker int ret; 3078ccd979bdSMark Fasheh unsigned long flags; 3079bd3e7610SJoel Becker u32 lkm_flags = 0; 3080ccd979bdSMark Fasheh 3081ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 3082ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3083ccd979bdSMark Fasheh goto out; 3084ccd979bdSMark Fasheh 3085b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3086bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 3087b80fc012SMark Fasheh 3088ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3089ccd979bdSMark Fasheh 3090ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3091ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 3092ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3093ccd979bdSMark Fasheh 3094ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3095ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3096ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 3097ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 3098ccd979bdSMark Fasheh lockres->l_unlock_action); 3099ccd979bdSMark Fasheh 3100ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3101ccd979bdSMark Fasheh 3102ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 3103ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 3104ccd979bdSMark Fasheh * future? */ 3105ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3106ccd979bdSMark Fasheh 3107ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3108ccd979bdSMark Fasheh } 3109ccd979bdSMark Fasheh 31100d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 31110d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3112bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 31130d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 31140d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 31150d5dc6c2SMark Fasheh } 3116ccd979bdSMark Fasheh 3117ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 3118ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3119ccd979bdSMark Fasheh lockres->l_name); 3120ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3121ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3122ccd979bdSMark Fasheh 3123ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3124ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3125ccd979bdSMark Fasheh goto out; 3126ccd979bdSMark Fasheh } 3127ccd979bdSMark Fasheh 3128ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3129ccd979bdSMark Fasheh 3130ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 3131ccd979bdSMark Fasheh * fire. */ 3132ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3133ccd979bdSMark Fasheh 3134ccd979bdSMark Fasheh /* is this necessary? */ 3135ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3136ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3137ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3138ccd979bdSMark Fasheh 3139ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3140ccd979bdSMark Fasheh 3141a796d286SJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 31427431cd7eSJoel Becker if (ret) { 31437431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3144ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3145cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3146ccd979bdSMark Fasheh BUG(); 3147ccd979bdSMark Fasheh } 314873ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3149ccd979bdSMark Fasheh lockres->l_name); 3150ccd979bdSMark Fasheh 3151ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3152ccd979bdSMark Fasheh out: 3153ccd979bdSMark Fasheh return 0; 3154ccd979bdSMark Fasheh } 3155ccd979bdSMark Fasheh 315684d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 315784d86f83SJan Kara struct ocfs2_lock_res *lockres); 315884d86f83SJan Kara 3159ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 3160ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 316134d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 3162ccd979bdSMark Fasheh * it safe to drop. 3163ccd979bdSMark Fasheh * 3164ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 316584d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, 316684d86f83SJan Kara struct ocfs2_lock_res *lockres) 3167ccd979bdSMark Fasheh { 3168ccd979bdSMark Fasheh int status; 3169ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 317084d86f83SJan Kara unsigned long flags, flags2; 3171ccd979bdSMark Fasheh 3172ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 3173ccd979bdSMark Fasheh 3174ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3175ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 317684d86f83SJan Kara if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) { 317784d86f83SJan Kara /* 317884d86f83SJan Kara * We know the downconvert is queued but not in progress 317984d86f83SJan Kara * because we are the downconvert thread and processing 318084d86f83SJan Kara * different lock. So we can just remove the lock from the 318184d86f83SJan Kara * queue. This is not only an optimization but also a way 318284d86f83SJan Kara * to avoid the following deadlock: 318384d86f83SJan Kara * ocfs2_dentry_post_unlock() 318484d86f83SJan Kara * ocfs2_dentry_lock_put() 318584d86f83SJan Kara * ocfs2_drop_dentry_lock() 318684d86f83SJan Kara * iput() 318784d86f83SJan Kara * ocfs2_evict_inode() 318884d86f83SJan Kara * ocfs2_clear_inode() 318984d86f83SJan Kara * ocfs2_mark_lockres_freeing() 319084d86f83SJan Kara * ... blocks waiting for OCFS2_LOCK_QUEUED 319184d86f83SJan Kara * since we are the downconvert thread which 319284d86f83SJan Kara * should clear the flag. 319384d86f83SJan Kara */ 319484d86f83SJan Kara spin_unlock_irqrestore(&lockres->l_lock, flags); 319584d86f83SJan Kara spin_lock_irqsave(&osb->dc_task_lock, flags2); 319684d86f83SJan Kara list_del_init(&lockres->l_blocked_list); 319784d86f83SJan Kara osb->blocked_lock_count--; 319884d86f83SJan Kara spin_unlock_irqrestore(&osb->dc_task_lock, flags2); 319984d86f83SJan Kara /* 320084d86f83SJan Kara * Warn if we recurse into another post_unlock call. Strictly 320184d86f83SJan Kara * speaking it isn't a problem but we need to be careful if 320284d86f83SJan Kara * that happens (stack overflow, deadlocks, ...) so warn if 320384d86f83SJan Kara * ocfs2 grows a path for which this can happen. 320484d86f83SJan Kara */ 320584d86f83SJan Kara WARN_ON_ONCE(lockres->l_ops->post_unlock); 320684d86f83SJan Kara /* Since the lock is freeing we don't do much in the fn below */ 320784d86f83SJan Kara ocfs2_process_blocked_lock(osb, lockres); 320884d86f83SJan Kara return; 320984d86f83SJan Kara } 3210ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3211ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3212ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3213ccd979bdSMark Fasheh 3214ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3215ccd979bdSMark Fasheh 3216ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 3217ccd979bdSMark Fasheh if (status) 3218ccd979bdSMark Fasheh mlog_errno(status); 3219ccd979bdSMark Fasheh 3220ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3221ccd979bdSMark Fasheh } 3222ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3223ccd979bdSMark Fasheh } 3224ccd979bdSMark Fasheh 3225d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3226d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3227d680efe9SMark Fasheh { 3228d680efe9SMark Fasheh int ret; 3229d680efe9SMark Fasheh 323084d86f83SJan Kara ocfs2_mark_lockres_freeing(osb, lockres); 32310d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 3232d680efe9SMark Fasheh if (ret) 3233d680efe9SMark Fasheh mlog_errno(ret); 3234d680efe9SMark Fasheh } 3235d680efe9SMark Fasheh 3236ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3237ccd979bdSMark Fasheh { 3238d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3239d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 32406ca497a8Swengang wang ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 324183273932SSrinivas Eeda ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3242ccd979bdSMark Fasheh } 3243ccd979bdSMark Fasheh 3244ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 3245ccd979bdSMark Fasheh { 3246ccd979bdSMark Fasheh int status, err; 3247ccd979bdSMark Fasheh 3248ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 3249ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 3250ccd979bdSMark Fasheh 3251ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 325250008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 3253ccd979bdSMark Fasheh if (err < 0) 3254ccd979bdSMark Fasheh mlog_errno(err); 3255ccd979bdSMark Fasheh 3256ccd979bdSMark Fasheh status = err; 3257ccd979bdSMark Fasheh 3258ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3259e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 3260ccd979bdSMark Fasheh if (err < 0) 3261ccd979bdSMark Fasheh mlog_errno(err); 3262ccd979bdSMark Fasheh if (err < 0 && !status) 3263ccd979bdSMark Fasheh status = err; 3264ccd979bdSMark Fasheh 3265ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 32660d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3267ccd979bdSMark Fasheh if (err < 0) 3268ccd979bdSMark Fasheh mlog_errno(err); 3269ccd979bdSMark Fasheh if (err < 0 && !status) 3270ccd979bdSMark Fasheh status = err; 3271ccd979bdSMark Fasheh 3272ccd979bdSMark Fasheh return status; 3273ccd979bdSMark Fasheh } 3274ccd979bdSMark Fasheh 3275de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3276ccd979bdSMark Fasheh int new_level) 3277ccd979bdSMark Fasheh { 3278ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3279ccd979bdSMark Fasheh 3280bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3281ccd979bdSMark Fasheh 3282ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 32839b915181SSunil Mushran mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " 32849b915181SSunil Mushran "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " 32859b915181SSunil Mushran "block %d, pgen %d\n", lockres->l_name, lockres->l_level, 32869b915181SSunil Mushran new_level, list_empty(&lockres->l_blocked_list), 32879b915181SSunil Mushran list_empty(&lockres->l_mask_waiters), lockres->l_type, 32889b915181SSunil Mushran lockres->l_flags, lockres->l_ro_holders, 32899b915181SSunil Mushran lockres->l_ex_holders, lockres->l_action, 32909b915181SSunil Mushran lockres->l_unlock_action, lockres->l_requested, 32919b915181SSunil Mushran lockres->l_blocking, lockres->l_pending_gen); 3292ccd979bdSMark Fasheh BUG(); 3293ccd979bdSMark Fasheh } 3294ccd979bdSMark Fasheh 32959b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", 32969b915181SSunil Mushran lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); 3297ccd979bdSMark Fasheh 3298ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3299ccd979bdSMark Fasheh lockres->l_requested = new_level; 3300ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3301de551246SJoel Becker return lockres_set_pending(lockres); 3302ccd979bdSMark Fasheh } 3303ccd979bdSMark Fasheh 3304ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3305ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3306ccd979bdSMark Fasheh int new_level, 3307de551246SJoel Becker int lvb, 3308de551246SJoel Becker unsigned int generation) 3309ccd979bdSMark Fasheh { 3310bd3e7610SJoel Becker int ret; 3311bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3312ccd979bdSMark Fasheh 33139b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 33149b915181SSunil Mushran lockres->l_level, new_level); 33159b915181SSunil Mushran 3316ccd979bdSMark Fasheh if (lvb) 3317bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3318ccd979bdSMark Fasheh 33194670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3320ccd979bdSMark Fasheh new_level, 3321ccd979bdSMark Fasheh &lockres->l_lksb, 3322ccd979bdSMark Fasheh dlm_flags, 3323ccd979bdSMark Fasheh lockres->l_name, 3324a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 3325de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 33267431cd7eSJoel Becker if (ret) { 33277431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3328ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3329ccd979bdSMark Fasheh goto bail; 3330ccd979bdSMark Fasheh } 3331ccd979bdSMark Fasheh 3332ccd979bdSMark Fasheh ret = 0; 3333ccd979bdSMark Fasheh bail: 3334ccd979bdSMark Fasheh return ret; 3335ccd979bdSMark Fasheh } 3336ccd979bdSMark Fasheh 333724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3338ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3339ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3340ccd979bdSMark Fasheh { 3341ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3342ccd979bdSMark Fasheh 3343ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3344ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3345ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3346ccd979bdSMark Fasheh * requeue this lock. */ 33479b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); 3348ccd979bdSMark Fasheh return 0; 3349ccd979bdSMark Fasheh } 3350ccd979bdSMark Fasheh 3351ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3352ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3353ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3354ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3355ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3356ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3357ccd979bdSMark Fasheh 3358ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3359ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3360ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3361ccd979bdSMark Fasheh 33629b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 33639b915181SSunil Mushran 3364ccd979bdSMark Fasheh return 1; 3365ccd979bdSMark Fasheh } 3366ccd979bdSMark Fasheh 3367ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3368ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3369ccd979bdSMark Fasheh { 3370ccd979bdSMark Fasheh int ret; 3371ccd979bdSMark Fasheh 33724670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3373a796d286SJoel Becker DLM_LKF_CANCEL); 33747431cd7eSJoel Becker if (ret) { 33757431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3376ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3377ccd979bdSMark Fasheh } 3378ccd979bdSMark Fasheh 33799b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3380ccd979bdSMark Fasheh 3381ccd979bdSMark Fasheh return ret; 3382ccd979bdSMark Fasheh } 3383ccd979bdSMark Fasheh 3384b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3385ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3386cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3387ccd979bdSMark Fasheh { 3388ccd979bdSMark Fasheh unsigned long flags; 3389ccd979bdSMark Fasheh int blocking; 3390ccd979bdSMark Fasheh int new_level; 3391079b8057SSunil Mushran int level; 3392ccd979bdSMark Fasheh int ret = 0; 33935ef0d4eaSMark Fasheh int set_lvb = 0; 3394de551246SJoel Becker unsigned int gen; 3395ccd979bdSMark Fasheh 3396ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3397ccd979bdSMark Fasheh 3398ccd979bdSMark Fasheh recheck: 3399db0f6ce6SSunil Mushran /* 3400db0f6ce6SSunil Mushran * Is it still blocking? If not, we have no more work to do. 3401db0f6ce6SSunil Mushran */ 3402db0f6ce6SSunil Mushran if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { 3403db0f6ce6SSunil Mushran BUG_ON(lockres->l_blocking != DLM_LOCK_NL); 3404db0f6ce6SSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 3405db0f6ce6SSunil Mushran ret = 0; 3406db0f6ce6SSunil Mushran goto leave; 3407db0f6ce6SSunil Mushran } 3408db0f6ce6SSunil Mushran 3409ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3410de551246SJoel Becker /* XXX 3411de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3412de551246SJoel Becker * exists entirely for one reason - another thread has set 3413de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3414de551246SJoel Becker * 3415de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3416de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3417de551246SJoel Becker * get no ast, and we will have no way of knowing the 3418de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3419de551246SJoel Becker * into dlm_lock() and wait...forever. 3420de551246SJoel Becker * 3421de551246SJoel Becker * Why forever? Because another node has asked for the 3422de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3423de551246SJoel Becker * 3424de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3425de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3426de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3427de551246SJoel Becker * we then cancel their request. 3428de551246SJoel Becker * 3429de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3430de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3431de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3432de551246SJoel Becker */ 34339b915181SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_PENDING) { 34349b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", 34359b915181SSunil Mushran lockres->l_name); 3436de551246SJoel Becker goto leave_requeue; 34379b915181SSunil Mushran } 3438de551246SJoel Becker 3439d680efe9SMark Fasheh ctl->requeue = 1; 3440ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3441ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3442ccd979bdSMark Fasheh if (ret) { 3443ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3444ccd979bdSMark Fasheh if (ret < 0) 3445ccd979bdSMark Fasheh mlog_errno(ret); 3446ccd979bdSMark Fasheh } 3447ccd979bdSMark Fasheh goto leave; 3448ccd979bdSMark Fasheh } 3449ccd979bdSMark Fasheh 3450a1912826SSunil Mushran /* 3451a1912826SSunil Mushran * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is 3452a1912826SSunil Mushran * set when the ast is received for an upconvert just before the 3453a1912826SSunil Mushran * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast 3454a1912826SSunil Mushran * on the heels of the ast, we want to delay the downconvert just 3455a1912826SSunil Mushran * enough to allow the up requestor to do its task. Because this 3456a1912826SSunil Mushran * lock is in the blocked queue, the lock will be downconverted 3457a1912826SSunil Mushran * as soon as the requestor is done with the lock. 3458a1912826SSunil Mushran */ 3459a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) 3460a1912826SSunil Mushran goto leave_requeue; 3461a1912826SSunil Mushran 34620d74125aSSunil Mushran /* 34630d74125aSSunil Mushran * How can we block and yet be at NL? We were trying to upconvert 34640d74125aSSunil Mushran * from NL and got canceled. The code comes back here, and now 34650d74125aSSunil Mushran * we notice and clear BLOCKING. 34660d74125aSSunil Mushran */ 34670d74125aSSunil Mushran if (lockres->l_level == DLM_LOCK_NL) { 34680d74125aSSunil Mushran BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 34699b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); 34700d74125aSSunil Mushran lockres->l_blocking = DLM_LOCK_NL; 34710d74125aSSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 34720d74125aSSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 34730d74125aSSunil Mushran goto leave; 34740d74125aSSunil Mushran } 34750d74125aSSunil Mushran 3476ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3477ccd979bdSMark Fasheh * then requeue. */ 3478bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 34799b915181SSunil Mushran && (lockres->l_ex_holders || lockres->l_ro_holders)) { 34809b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", 34819b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders, 34829b915181SSunil Mushran lockres->l_ro_holders); 3483f7fbfdd1SMark Fasheh goto leave_requeue; 34849b915181SSunil Mushran } 3485ccd979bdSMark Fasheh 3486ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3487ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3488bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 34899b915181SSunil Mushran lockres->l_ex_holders) { 34909b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", 34919b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders); 3492f7fbfdd1SMark Fasheh goto leave_requeue; 34939b915181SSunil Mushran } 3494f7fbfdd1SMark Fasheh 3495f7fbfdd1SMark Fasheh /* 3496f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3497f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3498f7fbfdd1SMark Fasheh */ 3499f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 35009b915181SSunil Mushran && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { 35019b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", 35029b915181SSunil Mushran lockres->l_name); 3503f7fbfdd1SMark Fasheh goto leave_requeue; 35049b915181SSunil Mushran } 3505ccd979bdSMark Fasheh 350616d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 350716d5b956SMark Fasheh 350816d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 35099b915181SSunil Mushran && !lockres->l_ops->check_downconvert(lockres, new_level)) { 35109b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", 35119b915181SSunil Mushran lockres->l_name); 351216d5b956SMark Fasheh goto leave_requeue; 35139b915181SSunil Mushran } 351416d5b956SMark Fasheh 3515ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3516ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3517ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3518cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3519ccd979bdSMark Fasheh goto downconvert; 3520ccd979bdSMark Fasheh 3521ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3522ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3523ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3524ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3525ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3526079b8057SSunil Mushran level = lockres->l_level; 3527ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3528ccd979bdSMark Fasheh 3529cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3530d680efe9SMark Fasheh 35319b915181SSunil Mushran if (ctl->unblock_action == UNBLOCK_STOP_POST) { 35329b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", 35339b915181SSunil Mushran lockres->l_name); 3534d680efe9SMark Fasheh goto leave; 35359b915181SSunil Mushran } 3536ccd979bdSMark Fasheh 3537ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3538079b8057SSunil Mushran if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3539ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3540ccd979bdSMark Fasheh * it just yet. */ 35419b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " 35429b915181SSunil Mushran "Recheck\n", lockres->l_name, blocking, 35439b915181SSunil Mushran lockres->l_blocking, level, lockres->l_level); 3544ccd979bdSMark Fasheh goto recheck; 3545ccd979bdSMark Fasheh } 3546ccd979bdSMark Fasheh 3547ccd979bdSMark Fasheh downconvert: 3548d680efe9SMark Fasheh ctl->requeue = 0; 3549ccd979bdSMark Fasheh 35505ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3551bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 35525ef0d4eaSMark Fasheh set_lvb = 1; 35535ef0d4eaSMark Fasheh 35545ef0d4eaSMark Fasheh /* 35555ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 35565ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 35575ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 35585ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 35595ef0d4eaSMark Fasheh */ 35605ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 35615ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 35625ef0d4eaSMark Fasheh } 35635ef0d4eaSMark Fasheh 3564de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3565ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3566de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3567de551246SJoel Becker gen); 3568de551246SJoel Becker 3569ccd979bdSMark Fasheh leave: 3570c1e8d35eSTao Ma if (ret) 3571c1e8d35eSTao Ma mlog_errno(ret); 3572ccd979bdSMark Fasheh return ret; 3573f7fbfdd1SMark Fasheh 3574f7fbfdd1SMark Fasheh leave_requeue: 3575f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3576f7fbfdd1SMark Fasheh ctl->requeue = 1; 3577f7fbfdd1SMark Fasheh 3578f7fbfdd1SMark Fasheh return 0; 3579ccd979bdSMark Fasheh } 3580ccd979bdSMark Fasheh 3581d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3582ccd979bdSMark Fasheh int blocking) 3583ccd979bdSMark Fasheh { 3584ccd979bdSMark Fasheh struct inode *inode; 3585ccd979bdSMark Fasheh struct address_space *mapping; 35865e98d492SGoldwyn Rodrigues struct ocfs2_inode_info *oi; 3587ccd979bdSMark Fasheh 3588ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3589ccd979bdSMark Fasheh mapping = inode->i_mapping; 3590ccd979bdSMark Fasheh 35915e98d492SGoldwyn Rodrigues if (S_ISDIR(inode->i_mode)) { 35925e98d492SGoldwyn Rodrigues oi = OCFS2_I(inode); 35935e98d492SGoldwyn Rodrigues oi->ip_dir_lock_gen++; 35945e98d492SGoldwyn Rodrigues mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); 35955e98d492SGoldwyn Rodrigues goto out; 35965e98d492SGoldwyn Rodrigues } 35975e98d492SGoldwyn Rodrigues 35981044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3599f1f54068SMark Fasheh goto out; 3600f1f54068SMark Fasheh 36017f4a2a97SMark Fasheh /* 36027f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 36037f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 36047f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 36057f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 36067f4a2a97SMark Fasheh * them up again. 36077f4a2a97SMark Fasheh */ 36087f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 36097f4a2a97SMark Fasheh 3610ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3611b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3612b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3613ccd979bdSMark Fasheh } 3614ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3615bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3616ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3617ccd979bdSMark Fasheh } else { 3618ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3619ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3620ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3621ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3622ccd979bdSMark Fasheh * them around in that case. */ 3623ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3624ccd979bdSMark Fasheh } 3625ccd979bdSMark Fasheh 3626f1f54068SMark Fasheh out: 3627d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3628ccd979bdSMark Fasheh } 3629ccd979bdSMark Fasheh 3630a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3631a4338481STao Ma struct ocfs2_lock_res *lockres, 3632810d5aebSMark Fasheh int new_level) 3633810d5aebSMark Fasheh { 3634a4338481STao Ma int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3635810d5aebSMark Fasheh 3636bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3637bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3638810d5aebSMark Fasheh 3639810d5aebSMark Fasheh if (checkpointed) 3640810d5aebSMark Fasheh return 1; 3641810d5aebSMark Fasheh 3642a4338481STao Ma ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3643810d5aebSMark Fasheh return 0; 3644810d5aebSMark Fasheh } 3645810d5aebSMark Fasheh 3646a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3647a4338481STao Ma int new_level) 3648a4338481STao Ma { 3649a4338481STao Ma struct inode *inode = ocfs2_lock_res_inode(lockres); 3650a4338481STao Ma 3651a4338481STao Ma return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3652a4338481STao Ma } 3653a4338481STao Ma 3654810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3655810d5aebSMark Fasheh { 3656810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3657810d5aebSMark Fasheh 3658810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3659810d5aebSMark Fasheh } 3660810d5aebSMark Fasheh 3661d680efe9SMark Fasheh /* 3662d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 366334d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3664d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3665d680efe9SMark Fasheh */ 3666d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3667d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3668d680efe9SMark Fasheh { 3669d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3670d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3671d680efe9SMark Fasheh } 3672d680efe9SMark Fasheh 3673d680efe9SMark Fasheh /* 3674d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3675d680efe9SMark Fasheh * 3676d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3677d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3678d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3679d680efe9SMark Fasheh * 3680d680efe9SMark Fasheh * We have two potential problems 3681d680efe9SMark Fasheh * 3682d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3683d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3684d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3685d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3686d680efe9SMark Fasheh * unblock processing. 3687d680efe9SMark Fasheh * 3688d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3689d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3690d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3691d680efe9SMark Fasheh */ 3692d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3693d680efe9SMark Fasheh int blocking) 3694d680efe9SMark Fasheh { 3695d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3696d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3697d680efe9SMark Fasheh struct dentry *dentry; 3698d680efe9SMark Fasheh unsigned long flags; 3699d680efe9SMark Fasheh int extra_ref = 0; 3700d680efe9SMark Fasheh 3701d680efe9SMark Fasheh /* 3702d680efe9SMark Fasheh * This node is blocking another node from getting a read 3703d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3704d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3705d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3706d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3707d680efe9SMark Fasheh * so there's no further work to do. 3708d680efe9SMark Fasheh */ 3709bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3710d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3711d680efe9SMark Fasheh 3712d680efe9SMark Fasheh /* 3713d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3714d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3715d680efe9SMark Fasheh * needs to be freed or not. 3716d680efe9SMark Fasheh */ 3717d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3718d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3719d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3720d680efe9SMark Fasheh 3721d680efe9SMark Fasheh /* 3722d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3723d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3724d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3725d680efe9SMark Fasheh * flag. 3726d680efe9SMark Fasheh */ 3727d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3728d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3729d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3730d680efe9SMark Fasheh && dl->dl_count) { 3731d680efe9SMark Fasheh dl->dl_count++; 3732d680efe9SMark Fasheh extra_ref = 1; 3733d680efe9SMark Fasheh } 3734d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3735d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3736d680efe9SMark Fasheh 3737d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3738d680efe9SMark Fasheh 3739d680efe9SMark Fasheh /* 3740d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3741d680efe9SMark Fasheh * which means we can't have any more outstanding 3742d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3743d680efe9SMark Fasheh */ 3744d680efe9SMark Fasheh if (!extra_ref) 3745d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3746d680efe9SMark Fasheh 3747d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3748d680efe9SMark Fasheh while (1) { 3749d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3750d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3751d680efe9SMark Fasheh if (!dentry) 3752d680efe9SMark Fasheh break; 3753d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3754d680efe9SMark Fasheh 375510ab8811Salex chen if (S_ISDIR(dl->dl_inode->i_mode)) 375610ab8811Salex chen shrink_dcache_parent(dentry); 375710ab8811Salex chen 3758a455589fSAl Viro mlog(0, "d_delete(%pd);\n", dentry); 3759d680efe9SMark Fasheh 3760d680efe9SMark Fasheh /* 3761d680efe9SMark Fasheh * The following dcache calls may do an 3762d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3763d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3764d680efe9SMark Fasheh * because the requesting node already has an 3765d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3766d680efe9SMark Fasheh * for a downconvert. 3767d680efe9SMark Fasheh */ 3768d680efe9SMark Fasheh d_delete(dentry); 3769d680efe9SMark Fasheh dput(dentry); 3770d680efe9SMark Fasheh 3771d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3772d680efe9SMark Fasheh } 3773d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3774d680efe9SMark Fasheh 3775d680efe9SMark Fasheh /* 3776d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3777d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3778d680efe9SMark Fasheh */ 3779d680efe9SMark Fasheh if (dl->dl_count == 1) 3780d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3781d680efe9SMark Fasheh 3782d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3783d680efe9SMark Fasheh } 3784d680efe9SMark Fasheh 37858dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 37868dec98edSTao Ma int new_level) 37878dec98edSTao Ma { 37888dec98edSTao Ma struct ocfs2_refcount_tree *tree = 37898dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 37908dec98edSTao Ma 37918dec98edSTao Ma return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 37928dec98edSTao Ma } 37938dec98edSTao Ma 37948dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 37958dec98edSTao Ma int blocking) 37968dec98edSTao Ma { 37978dec98edSTao Ma struct ocfs2_refcount_tree *tree = 37988dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 37998dec98edSTao Ma 38008dec98edSTao Ma ocfs2_metadata_cache_purge(&tree->rf_ci); 38018dec98edSTao Ma 38028dec98edSTao Ma return UNBLOCK_CONTINUE; 38038dec98edSTao Ma } 38048dec98edSTao Ma 38059e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 38069e33d69fSJan Kara { 38079e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb; 38089e33d69fSJan Kara struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 38099e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 38109e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 38119e33d69fSJan Kara 3812a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 38139e33d69fSJan Kara lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 38149e33d69fSJan Kara lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 38159e33d69fSJan Kara lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 38169e33d69fSJan Kara lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 38179e33d69fSJan Kara lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 38189e33d69fSJan Kara lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 38199e33d69fSJan Kara lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 38209e33d69fSJan Kara } 38219e33d69fSJan Kara 38229e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 38239e33d69fSJan Kara { 38249e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 38259e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 38269e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38279e33d69fSJan Kara 38289e33d69fSJan Kara if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 38299e33d69fSJan Kara ocfs2_cluster_unlock(osb, lockres, level); 38309e33d69fSJan Kara } 38319e33d69fSJan Kara 38329e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 38339e33d69fSJan Kara { 38349e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 38359e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 38369e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 38379e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 383885eb8b73SJoel Becker struct buffer_head *bh = NULL; 38399e33d69fSJan Kara struct ocfs2_global_disk_dqinfo *gdinfo; 38409e33d69fSJan Kara int status = 0; 38419e33d69fSJan Kara 38421c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 38431c520dfbSJoel Becker lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 38449e33d69fSJan Kara info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 38459e33d69fSJan Kara info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 38469e33d69fSJan Kara oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 38479e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 38489e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 38499e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 38509e33d69fSJan Kara be32_to_cpu(lvb->lvb_free_entry); 38519e33d69fSJan Kara } else { 3852ae4f6ef1SJan Kara status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, 3853ae4f6ef1SJan Kara oinfo->dqi_giblk, &bh); 385485eb8b73SJoel Becker if (status) { 38559e33d69fSJan Kara mlog_errno(status); 38569e33d69fSJan Kara goto bail; 38579e33d69fSJan Kara } 38589e33d69fSJan Kara gdinfo = (struct ocfs2_global_disk_dqinfo *) 38599e33d69fSJan Kara (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 38609e33d69fSJan Kara info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 38619e33d69fSJan Kara info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 38629e33d69fSJan Kara oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 38639e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 38649e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 38659e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 38669e33d69fSJan Kara le32_to_cpu(gdinfo->dqi_free_entry); 38679e33d69fSJan Kara brelse(bh); 38689e33d69fSJan Kara ocfs2_track_lock_refresh(lockres); 38699e33d69fSJan Kara } 38709e33d69fSJan Kara 38719e33d69fSJan Kara bail: 38729e33d69fSJan Kara return status; 38739e33d69fSJan Kara } 38749e33d69fSJan Kara 38759e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file 38769e33d69fSJan Kara * so that we can safely refresh quota info from disk. */ 38779e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 38789e33d69fSJan Kara { 38799e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 38809e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 38819e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38829e33d69fSJan Kara int status = 0; 38839e33d69fSJan Kara 38849e33d69fSJan Kara /* On RO devices, locking really isn't needed... */ 38859e33d69fSJan Kara if (ocfs2_is_hard_readonly(osb)) { 38869e33d69fSJan Kara if (ex) 38879e33d69fSJan Kara status = -EROFS; 38889e33d69fSJan Kara goto bail; 38899e33d69fSJan Kara } 38909e33d69fSJan Kara if (ocfs2_mount_local(osb)) 38919e33d69fSJan Kara goto bail; 38929e33d69fSJan Kara 38939e33d69fSJan Kara status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 38949e33d69fSJan Kara if (status < 0) { 38959e33d69fSJan Kara mlog_errno(status); 38969e33d69fSJan Kara goto bail; 38979e33d69fSJan Kara } 38989e33d69fSJan Kara if (!ocfs2_should_refresh_lock_res(lockres)) 38999e33d69fSJan Kara goto bail; 39009e33d69fSJan Kara /* OK, we have the lock but we need to refresh the quota info */ 39019e33d69fSJan Kara status = ocfs2_refresh_qinfo(oinfo); 39029e33d69fSJan Kara if (status) 39039e33d69fSJan Kara ocfs2_qinfo_unlock(oinfo, ex); 39049e33d69fSJan Kara ocfs2_complete_lock_res_refresh(lockres, status); 39059e33d69fSJan Kara bail: 39069e33d69fSJan Kara return status; 39079e33d69fSJan Kara } 39089e33d69fSJan Kara 39098dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 39108dec98edSTao Ma { 39118dec98edSTao Ma int status; 39128dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 39138dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 39148dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 39158dec98edSTao Ma 39168dec98edSTao Ma 39178dec98edSTao Ma if (ocfs2_is_hard_readonly(osb)) 39188dec98edSTao Ma return -EROFS; 39198dec98edSTao Ma 39208dec98edSTao Ma if (ocfs2_mount_local(osb)) 39218dec98edSTao Ma return 0; 39228dec98edSTao Ma 39238dec98edSTao Ma status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 39248dec98edSTao Ma if (status < 0) 39258dec98edSTao Ma mlog_errno(status); 39268dec98edSTao Ma 39278dec98edSTao Ma return status; 39288dec98edSTao Ma } 39298dec98edSTao Ma 39308dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 39318dec98edSTao Ma { 39328dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 39338dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 39348dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 39358dec98edSTao Ma 39368dec98edSTao Ma if (!ocfs2_mount_local(osb)) 39378dec98edSTao Ma ocfs2_cluster_unlock(osb, lockres, level); 39388dec98edSTao Ma } 39398dec98edSTao Ma 394000600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3941ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3942ccd979bdSMark Fasheh { 3943ccd979bdSMark Fasheh int status; 3944d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 3945ccd979bdSMark Fasheh unsigned long flags; 3946ccd979bdSMark Fasheh 3947ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 3948ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 3949ccd979bdSMark Fasheh * flag. */ 3950ccd979bdSMark Fasheh 3951ccd979bdSMark Fasheh BUG_ON(!lockres); 3952ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 3953ccd979bdSMark Fasheh 39549b915181SSunil Mushran mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); 3955ccd979bdSMark Fasheh 3956ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 395734d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 3958ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 3959ccd979bdSMark Fasheh * but short circuiting here will still save us some 3960ccd979bdSMark Fasheh * performance. */ 3961ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3962ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 3963ccd979bdSMark Fasheh goto unqueue; 3964ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3965ccd979bdSMark Fasheh 3966b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 3967ccd979bdSMark Fasheh if (status < 0) 3968ccd979bdSMark Fasheh mlog_errno(status); 3969ccd979bdSMark Fasheh 3970ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3971ccd979bdSMark Fasheh unqueue: 3972d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3973ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3974ccd979bdSMark Fasheh } else 3975ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 3976ccd979bdSMark Fasheh 39779b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, 3978d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 3979ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3980ccd979bdSMark Fasheh 3981d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 3982d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 3983d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 3984ccd979bdSMark Fasheh } 3985ccd979bdSMark Fasheh 3986ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3987ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3988ccd979bdSMark Fasheh { 3989a75e9ccaSSrinivas Eeda unsigned long flags; 3990a75e9ccaSSrinivas Eeda 3991ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3992ccd979bdSMark Fasheh 3993ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3994ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 3995ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 3996ccd979bdSMark Fasheh * to the resource will get it soon. */ 39979b915181SSunil Mushran mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", 3998ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3999ccd979bdSMark Fasheh return; 4000ccd979bdSMark Fasheh } 4001ccd979bdSMark Fasheh 4002ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 4003ccd979bdSMark Fasheh 4004a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 4005ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 4006ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 4007ccd979bdSMark Fasheh &osb->blocked_lock_list); 4008ccd979bdSMark Fasheh osb->blocked_lock_count++; 4009ccd979bdSMark Fasheh } 4010a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 4011ccd979bdSMark Fasheh } 401234d024f8SMark Fasheh 401334d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 401434d024f8SMark Fasheh { 401534d024f8SMark Fasheh unsigned long processed; 4016a75e9ccaSSrinivas Eeda unsigned long flags; 401734d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 401834d024f8SMark Fasheh 4019a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 402034d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 402134d024f8SMark Fasheh * wake happens part-way through our work */ 402234d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 402334d024f8SMark Fasheh 402434d024f8SMark Fasheh processed = osb->blocked_lock_count; 4025209f7512SJoseph Qi /* 4026209f7512SJoseph Qi * blocked lock processing in this loop might call iput which can 4027209f7512SJoseph Qi * remove items off osb->blocked_lock_list. Downconvert up to 4028209f7512SJoseph Qi * 'processed' number of locks, but stop short if we had some 4029209f7512SJoseph Qi * removed in ocfs2_mark_lockres_freeing when downconverting. 4030209f7512SJoseph Qi */ 4031209f7512SJoseph Qi while (processed && !list_empty(&osb->blocked_lock_list)) { 403234d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 403334d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 403434d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 403534d024f8SMark Fasheh osb->blocked_lock_count--; 4036a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 403734d024f8SMark Fasheh 403834d024f8SMark Fasheh BUG_ON(!processed); 403934d024f8SMark Fasheh processed--; 404034d024f8SMark Fasheh 404134d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 404234d024f8SMark Fasheh 4043a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 404434d024f8SMark Fasheh } 4045a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 404634d024f8SMark Fasheh } 404734d024f8SMark Fasheh 404834d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 404934d024f8SMark Fasheh { 405034d024f8SMark Fasheh int empty = 0; 4051a75e9ccaSSrinivas Eeda unsigned long flags; 405234d024f8SMark Fasheh 4053a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 405434d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 405534d024f8SMark Fasheh empty = 1; 405634d024f8SMark Fasheh 4057a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 405834d024f8SMark Fasheh return empty; 405934d024f8SMark Fasheh } 406034d024f8SMark Fasheh 406134d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 406234d024f8SMark Fasheh { 406334d024f8SMark Fasheh int should_wake = 0; 4064a75e9ccaSSrinivas Eeda unsigned long flags; 406534d024f8SMark Fasheh 4066a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 406734d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 406834d024f8SMark Fasheh should_wake = 1; 4069a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 407034d024f8SMark Fasheh 407134d024f8SMark Fasheh return should_wake; 407234d024f8SMark Fasheh } 407334d024f8SMark Fasheh 4074200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 407534d024f8SMark Fasheh { 407634d024f8SMark Fasheh int status = 0; 407734d024f8SMark Fasheh struct ocfs2_super *osb = arg; 407834d024f8SMark Fasheh 407934d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 408034d024f8SMark Fasheh * work available */ 408134d024f8SMark Fasheh while (!(kthread_should_stop() && 408234d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 408334d024f8SMark Fasheh 408434d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 408534d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 408634d024f8SMark Fasheh kthread_should_stop()); 408734d024f8SMark Fasheh 408834d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 408934d024f8SMark Fasheh 409034d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 409134d024f8SMark Fasheh } 409234d024f8SMark Fasheh 409334d024f8SMark Fasheh osb->dc_task = NULL; 409434d024f8SMark Fasheh return status; 409534d024f8SMark Fasheh } 409634d024f8SMark Fasheh 409734d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 409834d024f8SMark Fasheh { 4099a75e9ccaSSrinivas Eeda unsigned long flags; 4100a75e9ccaSSrinivas Eeda 4101a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 410234d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 410334d024f8SMark Fasheh * the caller may have made to the voting state */ 410434d024f8SMark Fasheh osb->dc_wake_sequence++; 4105a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 410634d024f8SMark Fasheh wake_up(&osb->dc_event); 410734d024f8SMark Fasheh } 4108