1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 359e33d69fSJan Kara #include <linux/quotaops.h> 36ccd979bdSMark Fasheh 37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 38ccd979bdSMark Fasheh #include <cluster/masklog.h> 39ccd979bdSMark Fasheh 40ccd979bdSMark Fasheh #include "ocfs2.h" 41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 42ccd979bdSMark Fasheh 43ccd979bdSMark Fasheh #include "alloc.h" 44d680efe9SMark Fasheh #include "dcache.h" 45ccd979bdSMark Fasheh #include "dlmglue.h" 46ccd979bdSMark Fasheh #include "extent_map.h" 477f1a37e3STiger Yang #include "file.h" 48ccd979bdSMark Fasheh #include "heartbeat.h" 49ccd979bdSMark Fasheh #include "inode.h" 50ccd979bdSMark Fasheh #include "journal.h" 5124ef1815SJoel Becker #include "stackglue.h" 52ccd979bdSMark Fasheh #include "slot_map.h" 53ccd979bdSMark Fasheh #include "super.h" 54ccd979bdSMark Fasheh #include "uptodate.h" 559e33d69fSJan Kara #include "quota.h" 568dec98edSTao Ma #include "refcounttree.h" 57ccd979bdSMark Fasheh 58ccd979bdSMark Fasheh #include "buffer_head_io.h" 59ccd979bdSMark Fasheh 60ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 61ccd979bdSMark Fasheh struct list_head mw_item; 62ccd979bdSMark Fasheh int mw_status; 63ccd979bdSMark Fasheh struct completion mw_complete; 64ccd979bdSMark Fasheh unsigned long mw_mask; 65ccd979bdSMark Fasheh unsigned long mw_goal; 668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 675bc970e8SSunil Mushran ktime_t mw_lock_start; 688ddb7b00SSunil Mushran #endif 69ccd979bdSMark Fasheh }; 70ccd979bdSMark Fasheh 7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 75ccd979bdSMark Fasheh 76d680efe9SMark Fasheh /* 77cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 78d680efe9SMark Fasheh * 79b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 80d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 81d680efe9SMark Fasheh * 82d680efe9SMark Fasheh */ 83d680efe9SMark Fasheh enum ocfs2_unblock_action { 84d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 85d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 86d680efe9SMark Fasheh * ->post_unlock callback */ 87d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 88d680efe9SMark Fasheh * ->post_unlock() callback. */ 89d680efe9SMark Fasheh }; 90d680efe9SMark Fasheh 91d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 92d680efe9SMark Fasheh int requeue; 93d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 94d680efe9SMark Fasheh }; 95d680efe9SMark Fasheh 96cb25797dSJan Kara /* Lockdep class keys */ 97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 98cb25797dSJan Kara 99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 100810d5aebSMark Fasheh int new_level); 101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 102810d5aebSMark Fasheh 103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 104cc567d89SMark Fasheh int blocking); 105cc567d89SMark Fasheh 106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 107cc567d89SMark Fasheh int blocking); 108d680efe9SMark Fasheh 109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 110d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 111ccd979bdSMark Fasheh 1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 1136cb129f5SAdrian Bunk 1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 1158dec98edSTao Ma int new_level); 1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 1178dec98edSTao Ma int blocking); 1188dec98edSTao Ma 1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1206cb129f5SAdrian Bunk 1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1236cb129f5SAdrian Bunk const char *function, 1246cb129f5SAdrian Bunk unsigned int line, 1256cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1266cb129f5SAdrian Bunk { 127a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1286cb129f5SAdrian Bunk 1296cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1306cb129f5SAdrian Bunk lockres->l_name, function, line); 1316cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1326cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1336cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1346cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1356cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1366cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1376cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1386cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1396cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1406cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1416cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1426cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1436cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1446cb129f5SAdrian Bunk } 1456cb129f5SAdrian Bunk 1466cb129f5SAdrian Bunk 147f625c979SMark Fasheh /* 148f625c979SMark Fasheh * OCFS2 Lock Resource Operations 149f625c979SMark Fasheh * 150f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1510d5dc6c2SMark Fasheh * 1520d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1530d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1540d5dc6c2SMark Fasheh * 1550d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1560d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1570d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1580d5dc6c2SMark Fasheh * destruction time). 159f625c979SMark Fasheh */ 160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 16154a7e755SMark Fasheh /* 16254a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 16354a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 16454a7e755SMark Fasheh */ 16554a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 166b5e500e2SMark Fasheh 1670d5dc6c2SMark Fasheh /* 16834d024f8SMark Fasheh * Optionally called in the downconvert thread after a 16934d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 17034d024f8SMark Fasheh * after this callback is called, so it is safe to free 17134d024f8SMark Fasheh * memory, etc. 1720d5dc6c2SMark Fasheh * 1730d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1740d5dc6c2SMark Fasheh * by ->downconvert_worker() 1750d5dc6c2SMark Fasheh */ 176d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 177f625c979SMark Fasheh 178f625c979SMark Fasheh /* 17916d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 18016d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 18116d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 18216d5b956SMark Fasheh * 18316d5b956SMark Fasheh * For most locks, the default checks that there are no 18416d5b956SMark Fasheh * incompatible holders are sufficient. 18516d5b956SMark Fasheh * 18616d5b956SMark Fasheh * Called with the lockres spinlock held. 18716d5b956SMark Fasheh */ 18816d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 18916d5b956SMark Fasheh 19016d5b956SMark Fasheh /* 1915ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1925ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1935ef0d4eaSMark Fasheh * 1945ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1955ef0d4eaSMark Fasheh * in the flags field. 1965ef0d4eaSMark Fasheh * 1975ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1985ef0d4eaSMark Fasheh */ 1995ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 2005ef0d4eaSMark Fasheh 2015ef0d4eaSMark Fasheh /* 202cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 203cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 204cc567d89SMark Fasheh * any locks held so the function can do work that might 205cc567d89SMark Fasheh * schedule (syncing out data, etc). 206cc567d89SMark Fasheh * 207cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 208cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 209cc567d89SMark Fasheh */ 210cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 211cc567d89SMark Fasheh 212cc567d89SMark Fasheh /* 213f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 214f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 215f625c979SMark Fasheh */ 216f625c979SMark Fasheh int flags; 217ccd979bdSMark Fasheh }; 218ccd979bdSMark Fasheh 219f625c979SMark Fasheh /* 220f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 221f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 222f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 223f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 224f625c979SMark Fasheh * expected that the locking wrapper will clear the 225f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 226f625c979SMark Fasheh */ 227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 228f625c979SMark Fasheh 229b80fc012SMark Fasheh /* 2305ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2315ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 232b80fc012SMark Fasheh */ 233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 234b80fc012SMark Fasheh 235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 23654a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 237f625c979SMark Fasheh .flags = 0, 238ccd979bdSMark Fasheh }; 239ccd979bdSMark Fasheh 240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 24154a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 242810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 243810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 244f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 245b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 246ccd979bdSMark Fasheh }; 247ccd979bdSMark Fasheh 248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 249f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 250ccd979bdSMark Fasheh }; 251ccd979bdSMark Fasheh 252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 253f625c979SMark Fasheh .flags = 0, 254ccd979bdSMark Fasheh }; 255ccd979bdSMark Fasheh 2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 2576ca497a8Swengang wang .flags = 0, 2586ca497a8Swengang wang }; 2596ca497a8Swengang wang 26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 26183273932SSrinivas Eeda .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 26283273932SSrinivas Eeda }; 26383273932SSrinivas Eeda 264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 26554a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 266d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 267cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 268f625c979SMark Fasheh .flags = 0, 269d680efe9SMark Fasheh }; 270d680efe9SMark Fasheh 27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 27250008630STiger Yang .get_osb = ocfs2_get_inode_osb, 27350008630STiger Yang .flags = 0, 27450008630STiger Yang }; 27550008630STiger Yang 276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 277cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 278cf8e06f1SMark Fasheh .flags = 0, 279cf8e06f1SMark Fasheh }; 280cf8e06f1SMark Fasheh 2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 2829e33d69fSJan Kara .set_lvb = ocfs2_set_qinfo_lvb, 2839e33d69fSJan Kara .get_osb = ocfs2_get_qinfo_osb, 2849e33d69fSJan Kara .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 2859e33d69fSJan Kara }; 2869e33d69fSJan Kara 2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 2888dec98edSTao Ma .check_downconvert = ocfs2_check_refcount_downconvert, 2898dec98edSTao Ma .downconvert_worker = ocfs2_refcount_convert_worker, 2908dec98edSTao Ma .flags = 0, 2918dec98edSTao Ma }; 2928dec98edSTao Ma 293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 294ccd979bdSMark Fasheh { 295ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 29650008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 29750008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 298ccd979bdSMark Fasheh } 299ccd979bdSMark Fasheh 300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 301a796d286SJoel Becker { 302a796d286SJoel Becker return container_of(lksb, struct ocfs2_lock_res, l_lksb); 303a796d286SJoel Becker } 304a796d286SJoel Becker 305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 306ccd979bdSMark Fasheh { 307ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 308ccd979bdSMark Fasheh 309ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 310ccd979bdSMark Fasheh } 311ccd979bdSMark Fasheh 312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 313d680efe9SMark Fasheh { 314d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 315d680efe9SMark Fasheh 316d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 317d680efe9SMark Fasheh } 318d680efe9SMark Fasheh 3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 3209e33d69fSJan Kara { 3219e33d69fSJan Kara BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 3229e33d69fSJan Kara 3239e33d69fSJan Kara return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 3249e33d69fSJan Kara } 3259e33d69fSJan Kara 3268dec98edSTao Ma static inline struct ocfs2_refcount_tree * 3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 3288dec98edSTao Ma { 3298dec98edSTao Ma return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 3308dec98edSTao Ma } 3318dec98edSTao Ma 33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 33354a7e755SMark Fasheh { 33454a7e755SMark Fasheh if (lockres->l_ops->get_osb) 33554a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 33654a7e755SMark Fasheh 33754a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 33854a7e755SMark Fasheh } 33954a7e755SMark Fasheh 340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 341ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 342ccd979bdSMark Fasheh int level, 343bd3e7610SJoel Becker u32 dlm_flags); 344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 345ccd979bdSMark Fasheh int wanted); 346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 347ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 348cb25797dSJan Kara int level, unsigned long caller_ip); 349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 350cb25797dSJan Kara struct ocfs2_lock_res *lockres, 351cb25797dSJan Kara int level) 352cb25797dSJan Kara { 353cb25797dSJan Kara __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 354cb25797dSJan Kara } 355cb25797dSJan Kara 356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 361ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 363ccd979bdSMark Fasheh int convert); 3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 365c74ff8bbSSunil Mushran if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 3667431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3677431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 368c74ff8bbSSunil Mushran else \ 369c74ff8bbSSunil Mushran mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 370c74ff8bbSSunil Mushran _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 371c74ff8bbSSunil Mushran (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 372ccd979bdSMark Fasheh } while (0) 37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 375ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 377ccd979bdSMark Fasheh struct buffer_head **bh); 378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 381cf8e06f1SMark Fasheh int new_level); 382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 383cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 384cf8e06f1SMark Fasheh int new_level, 385de551246SJoel Becker int lvb, 386de551246SJoel Becker unsigned int generation); 387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 388cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 390cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 391cf8e06f1SMark Fasheh 392ccd979bdSMark Fasheh 393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 394ccd979bdSMark Fasheh u64 blkno, 395ccd979bdSMark Fasheh u32 generation, 396ccd979bdSMark Fasheh char *name) 397ccd979bdSMark Fasheh { 398ccd979bdSMark Fasheh int len; 399ccd979bdSMark Fasheh 400ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 401ccd979bdSMark Fasheh 402b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 403b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 404b0697053SMark Fasheh (long long)blkno, generation); 405ccd979bdSMark Fasheh 406ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 407ccd979bdSMark Fasheh 408ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 409ccd979bdSMark Fasheh } 410ccd979bdSMark Fasheh 41134af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 412ccd979bdSMark Fasheh 413ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 414ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 415ccd979bdSMark Fasheh { 416ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 417ccd979bdSMark Fasheh 418ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 419ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 420ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 421ccd979bdSMark Fasheh } 422ccd979bdSMark Fasheh 423ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 424ccd979bdSMark Fasheh { 425ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 426ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 427ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 428ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 429ccd979bdSMark Fasheh } 430ccd979bdSMark Fasheh 4318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 4328ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4338ddb7b00SSunil Mushran { 4348ddb7b00SSunil Mushran res->l_lock_refresh = 0; 4355bc970e8SSunil Mushran memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); 4365bc970e8SSunil Mushran memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); 4378ddb7b00SSunil Mushran } 4388ddb7b00SSunil Mushran 4398ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 4408ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 4418ddb7b00SSunil Mushran { 4425bc970e8SSunil Mushran u32 usec; 4435bc970e8SSunil Mushran ktime_t kt; 4445bc970e8SSunil Mushran struct ocfs2_lock_stats *stats; 4458ddb7b00SSunil Mushran 4465bc970e8SSunil Mushran if (level == LKM_PRMODE) 4475bc970e8SSunil Mushran stats = &res->l_lock_prmode; 4485bc970e8SSunil Mushran else if (level == LKM_EXMODE) 4495bc970e8SSunil Mushran stats = &res->l_lock_exmode; 4505bc970e8SSunil Mushran else 4518ddb7b00SSunil Mushran return; 4528ddb7b00SSunil Mushran 4535bc970e8SSunil Mushran kt = ktime_sub(ktime_get(), mw->mw_lock_start); 4545bc970e8SSunil Mushran usec = ktime_to_us(kt); 4555bc970e8SSunil Mushran 4565bc970e8SSunil Mushran stats->ls_gets++; 4575bc970e8SSunil Mushran stats->ls_total += ktime_to_ns(kt); 4585bc970e8SSunil Mushran /* overflow */ 45916865b7cSroel if (unlikely(stats->ls_gets == 0)) { 4605bc970e8SSunil Mushran stats->ls_gets++; 4615bc970e8SSunil Mushran stats->ls_total = ktime_to_ns(kt); 4625bc970e8SSunil Mushran } 4635bc970e8SSunil Mushran 4645bc970e8SSunil Mushran if (stats->ls_max < usec) 4655bc970e8SSunil Mushran stats->ls_max = usec; 4665bc970e8SSunil Mushran 4678ddb7b00SSunil Mushran if (ret) 4685bc970e8SSunil Mushran stats->ls_fail++; 4698ddb7b00SSunil Mushran } 4708ddb7b00SSunil Mushran 4718ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4728ddb7b00SSunil Mushran { 4738ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4748ddb7b00SSunil Mushran } 4758ddb7b00SSunil Mushran 4768ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4778ddb7b00SSunil Mushran { 4785bc970e8SSunil Mushran mw->mw_lock_start = ktime_get(); 4798ddb7b00SSunil Mushran } 4808ddb7b00SSunil Mushran #else 4818ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4828ddb7b00SSunil Mushran { 4838ddb7b00SSunil Mushran } 4848ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4858ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4868ddb7b00SSunil Mushran { 4878ddb7b00SSunil Mushran } 4888ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4898ddb7b00SSunil Mushran { 4908ddb7b00SSunil Mushran } 4918ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4928ddb7b00SSunil Mushran { 4938ddb7b00SSunil Mushran } 4948ddb7b00SSunil Mushran #endif 4958ddb7b00SSunil Mushran 496ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 497ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 498ccd979bdSMark Fasheh enum ocfs2_lock_type type, 499ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 500ccd979bdSMark Fasheh void *priv) 501ccd979bdSMark Fasheh { 502ccd979bdSMark Fasheh res->l_type = type; 503ccd979bdSMark Fasheh res->l_ops = ops; 504ccd979bdSMark Fasheh res->l_priv = priv; 505ccd979bdSMark Fasheh 506bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 507bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 508bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 509ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 510ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 511ccd979bdSMark Fasheh 512ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 513ccd979bdSMark Fasheh 514ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 5158ddb7b00SSunil Mushran 5168ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 517cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 518cb25797dSJan Kara if (type != OCFS2_LOCK_TYPE_OPEN) 519cb25797dSJan Kara lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 520cb25797dSJan Kara &lockdep_keys[type], 0); 521cb25797dSJan Kara else 522cb25797dSJan Kara res->l_lockdep_map.key = NULL; 523cb25797dSJan Kara #endif 524ccd979bdSMark Fasheh } 525ccd979bdSMark Fasheh 526ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 527ccd979bdSMark Fasheh { 528ccd979bdSMark Fasheh /* This also clears out the lock status block */ 529ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 530ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 531ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 532ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 533ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 534ccd979bdSMark Fasheh } 535ccd979bdSMark Fasheh 536ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 537ccd979bdSMark Fasheh enum ocfs2_lock_type type, 53824c19ef4SMark Fasheh unsigned int generation, 539ccd979bdSMark Fasheh struct inode *inode) 540ccd979bdSMark Fasheh { 541ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 542ccd979bdSMark Fasheh 543ccd979bdSMark Fasheh switch(type) { 544ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 545ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 546ccd979bdSMark Fasheh break; 547ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 548e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 549ccd979bdSMark Fasheh break; 55050008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 55150008630STiger Yang ops = &ocfs2_inode_open_lops; 55250008630STiger Yang break; 553ccd979bdSMark Fasheh default: 554ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 555ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 556ccd979bdSMark Fasheh break; 557ccd979bdSMark Fasheh }; 558ccd979bdSMark Fasheh 559d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 56024c19ef4SMark Fasheh generation, res->l_name); 561d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 562d680efe9SMark Fasheh } 563d680efe9SMark Fasheh 56454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 56554a7e755SMark Fasheh { 56654a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 56754a7e755SMark Fasheh 56854a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 56954a7e755SMark Fasheh } 57054a7e755SMark Fasheh 5719e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 5729e33d69fSJan Kara { 5739e33d69fSJan Kara struct ocfs2_mem_dqinfo *info = lockres->l_priv; 5749e33d69fSJan Kara 5759e33d69fSJan Kara return OCFS2_SB(info->dqi_gi.dqi_sb); 5769e33d69fSJan Kara } 5779e33d69fSJan Kara 578cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 579cf8e06f1SMark Fasheh { 580cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 581cf8e06f1SMark Fasheh 582cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 583cf8e06f1SMark Fasheh } 584cf8e06f1SMark Fasheh 585d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 586d680efe9SMark Fasheh { 587d680efe9SMark Fasheh __be64 inode_blkno_be; 588d680efe9SMark Fasheh 589d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 590d680efe9SMark Fasheh sizeof(__be64)); 591d680efe9SMark Fasheh 592d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 593d680efe9SMark Fasheh } 594d680efe9SMark Fasheh 59554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 59654a7e755SMark Fasheh { 59754a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 59854a7e755SMark Fasheh 59954a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 60054a7e755SMark Fasheh } 60154a7e755SMark Fasheh 602d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 603d680efe9SMark Fasheh u64 parent, struct inode *inode) 604d680efe9SMark Fasheh { 605d680efe9SMark Fasheh int len; 606d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 607d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 608d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 609d680efe9SMark Fasheh 610d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 611d680efe9SMark Fasheh 612d680efe9SMark Fasheh /* 613d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 614d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 615d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 616d680efe9SMark Fasheh * want error prints to show something without garbling the 617d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 618d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 619d680efe9SMark Fasheh * binary lock names. The stringified names have been a 620d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 621d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 622d680efe9SMark Fasheh * 623d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 624d680efe9SMark Fasheh * name size stays the same though - the last part is all 625d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 626d680efe9SMark Fasheh */ 627d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 628d680efe9SMark Fasheh "%c%016llx", 629d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 630d680efe9SMark Fasheh (long long)parent); 631d680efe9SMark Fasheh 632d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 633d680efe9SMark Fasheh 634d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 635d680efe9SMark Fasheh sizeof(__be64)); 636d680efe9SMark Fasheh 637d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 638d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 639d680efe9SMark Fasheh dl); 640ccd979bdSMark Fasheh } 641ccd979bdSMark Fasheh 642ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 643ccd979bdSMark Fasheh struct ocfs2_super *osb) 644ccd979bdSMark Fasheh { 645ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 646ccd979bdSMark Fasheh * once on it manually. */ 647ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 648d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 649d680efe9SMark Fasheh 0, res->l_name); 650ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 651ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 652ccd979bdSMark Fasheh } 653ccd979bdSMark Fasheh 654ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 655ccd979bdSMark Fasheh struct ocfs2_super *osb) 656ccd979bdSMark Fasheh { 657ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 658ccd979bdSMark Fasheh * once on it manually. */ 659ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 660d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 661d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 662ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 663ccd979bdSMark Fasheh } 664ccd979bdSMark Fasheh 6656ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 6666ca497a8Swengang wang struct ocfs2_super *osb) 6676ca497a8Swengang wang { 6686ca497a8Swengang wang /* nfs_sync lockres doesn't come from a slab so we call init 6696ca497a8Swengang wang * once on it manually. */ 6706ca497a8Swengang wang ocfs2_lock_res_init_once(res); 6716ca497a8Swengang wang ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 6726ca497a8Swengang wang ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 6736ca497a8Swengang wang &ocfs2_nfs_sync_lops, osb); 6746ca497a8Swengang wang } 6756ca497a8Swengang wang 67683273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 67783273932SSrinivas Eeda struct ocfs2_super *osb) 67883273932SSrinivas Eeda { 67983273932SSrinivas Eeda ocfs2_lock_res_init_once(res); 68083273932SSrinivas Eeda ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 68183273932SSrinivas Eeda ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 68283273932SSrinivas Eeda &ocfs2_orphan_scan_lops, osb); 68383273932SSrinivas Eeda } 68483273932SSrinivas Eeda 685cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 686cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 687cf8e06f1SMark Fasheh { 688cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 689cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 690cf8e06f1SMark Fasheh 691cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 692cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 693cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 694cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 695cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 696cf8e06f1SMark Fasheh fp); 697cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 698cf8e06f1SMark Fasheh } 699cf8e06f1SMark Fasheh 7009e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 7019e33d69fSJan Kara struct ocfs2_mem_dqinfo *info) 7029e33d69fSJan Kara { 7039e33d69fSJan Kara ocfs2_lock_res_init_once(lockres); 7049e33d69fSJan Kara ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 7059e33d69fSJan Kara 0, lockres->l_name); 7069e33d69fSJan Kara ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 7079e33d69fSJan Kara OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 7089e33d69fSJan Kara info); 7099e33d69fSJan Kara } 7109e33d69fSJan Kara 7118dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 7128dec98edSTao Ma struct ocfs2_super *osb, u64 ref_blkno, 7138dec98edSTao Ma unsigned int generation) 7148dec98edSTao Ma { 7158dec98edSTao Ma ocfs2_lock_res_init_once(lockres); 7168dec98edSTao Ma ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 7178dec98edSTao Ma generation, lockres->l_name); 7188dec98edSTao Ma ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 7198dec98edSTao Ma &ocfs2_refcount_block_lops, osb); 7208dec98edSTao Ma } 7218dec98edSTao Ma 722ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 723ccd979bdSMark Fasheh { 724ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 725ccd979bdSMark Fasheh return; 726ccd979bdSMark Fasheh 727ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 728ccd979bdSMark Fasheh 729ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 730ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 731ccd979bdSMark Fasheh res->l_name); 732ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 733ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 734ccd979bdSMark Fasheh res->l_name); 735ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 736ccd979bdSMark Fasheh "Lockres %s is locked\n", 737ccd979bdSMark Fasheh res->l_name); 738ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 739ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 740ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 741ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 742ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 743ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 744ccd979bdSMark Fasheh 745ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 746ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 747ccd979bdSMark Fasheh 748ccd979bdSMark Fasheh res->l_flags = 0UL; 749ccd979bdSMark Fasheh } 750ccd979bdSMark Fasheh 751ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 752ccd979bdSMark Fasheh int level) 753ccd979bdSMark Fasheh { 754ccd979bdSMark Fasheh BUG_ON(!lockres); 755ccd979bdSMark Fasheh 756ccd979bdSMark Fasheh switch(level) { 757bd3e7610SJoel Becker case DLM_LOCK_EX: 758ccd979bdSMark Fasheh lockres->l_ex_holders++; 759ccd979bdSMark Fasheh break; 760bd3e7610SJoel Becker case DLM_LOCK_PR: 761ccd979bdSMark Fasheh lockres->l_ro_holders++; 762ccd979bdSMark Fasheh break; 763ccd979bdSMark Fasheh default: 764ccd979bdSMark Fasheh BUG(); 765ccd979bdSMark Fasheh } 766ccd979bdSMark Fasheh } 767ccd979bdSMark Fasheh 768ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 769ccd979bdSMark Fasheh int level) 770ccd979bdSMark Fasheh { 771ccd979bdSMark Fasheh BUG_ON(!lockres); 772ccd979bdSMark Fasheh 773ccd979bdSMark Fasheh switch(level) { 774bd3e7610SJoel Becker case DLM_LOCK_EX: 775ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 776ccd979bdSMark Fasheh lockres->l_ex_holders--; 777ccd979bdSMark Fasheh break; 778bd3e7610SJoel Becker case DLM_LOCK_PR: 779ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 780ccd979bdSMark Fasheh lockres->l_ro_holders--; 781ccd979bdSMark Fasheh break; 782ccd979bdSMark Fasheh default: 783ccd979bdSMark Fasheh BUG(); 784ccd979bdSMark Fasheh } 785ccd979bdSMark Fasheh } 786ccd979bdSMark Fasheh 787ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 788ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 789ccd979bdSMark Fasheh * lock types are added. */ 790ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 791ccd979bdSMark Fasheh { 792bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 793ccd979bdSMark Fasheh 794bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 795bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 796bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 797bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 798ccd979bdSMark Fasheh return new_level; 799ccd979bdSMark Fasheh } 800ccd979bdSMark Fasheh 801ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 802ccd979bdSMark Fasheh unsigned long newflags) 803ccd979bdSMark Fasheh { 804800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 805ccd979bdSMark Fasheh 806ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 807ccd979bdSMark Fasheh 808ccd979bdSMark Fasheh lockres->l_flags = newflags; 809ccd979bdSMark Fasheh 810800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 811ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 812ccd979bdSMark Fasheh continue; 813ccd979bdSMark Fasheh 814ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 815ccd979bdSMark Fasheh mw->mw_status = 0; 816ccd979bdSMark Fasheh complete(&mw->mw_complete); 817ccd979bdSMark Fasheh } 818ccd979bdSMark Fasheh } 819ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 820ccd979bdSMark Fasheh { 821ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 822ccd979bdSMark Fasheh } 823ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 824ccd979bdSMark Fasheh unsigned long clear) 825ccd979bdSMark Fasheh { 826ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 827ccd979bdSMark Fasheh } 828ccd979bdSMark Fasheh 829ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 830ccd979bdSMark Fasheh { 831ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 832ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 833ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 834bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 835ccd979bdSMark Fasheh 836ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 837ccd979bdSMark Fasheh if (lockres->l_level <= 838ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 839bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 840ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 841ccd979bdSMark Fasheh } 842ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 843ccd979bdSMark Fasheh } 844ccd979bdSMark Fasheh 845ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 846ccd979bdSMark Fasheh { 847ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 848ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 849ccd979bdSMark Fasheh 850ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 851ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 852ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 853ccd979bdSMark Fasheh * update */ 854bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 855f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 856ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 857ccd979bdSMark Fasheh 858ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 859a1912826SSunil Mushran 860a1912826SSunil Mushran /* 861a1912826SSunil Mushran * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing 862a1912826SSunil Mushran * the OCFS2_LOCK_BUSY flag to prevent the dc thread from 863a1912826SSunil Mushran * downconverting the lock before the upconvert has fully completed. 864a1912826SSunil Mushran */ 865a1912826SSunil Mushran lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 866a1912826SSunil Mushran 867ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 868ccd979bdSMark Fasheh } 869ccd979bdSMark Fasheh 870ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 871ccd979bdSMark Fasheh { 8723cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 873ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 874ccd979bdSMark Fasheh 875bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 876f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 877f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 878ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 879ccd979bdSMark Fasheh 880ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 881ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 882ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 883ccd979bdSMark Fasheh } 884ccd979bdSMark Fasheh 885ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 886ccd979bdSMark Fasheh int level) 887ccd979bdSMark Fasheh { 888ccd979bdSMark Fasheh int needs_downconvert = 0; 889ccd979bdSMark Fasheh 890ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 891ccd979bdSMark Fasheh 892ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 893ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 894ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 895ccd979bdSMark Fasheh * blocking. this also catches the case where we get 896ccd979bdSMark Fasheh * duplicate BASTs */ 897ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 898ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 899ccd979bdSMark Fasheh needs_downconvert = 1; 900ccd979bdSMark Fasheh 901ccd979bdSMark Fasheh lockres->l_blocking = level; 902ccd979bdSMark Fasheh } 903ccd979bdSMark Fasheh 9049b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n", 9059b915181SSunil Mushran lockres->l_name, level, lockres->l_level, lockres->l_blocking, 9069b915181SSunil Mushran needs_downconvert); 9079b915181SSunil Mushran 9080b94a909SWengang Wang if (needs_downconvert) 9090b94a909SWengang Wang lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 910c1e8d35eSTao Ma mlog(0, "needs_downconvert = %d\n", needs_downconvert); 911ccd979bdSMark Fasheh return needs_downconvert; 912ccd979bdSMark Fasheh } 913ccd979bdSMark Fasheh 914de551246SJoel Becker /* 915de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 916de551246SJoel Becker * 917de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 918de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 919de551246SJoel Becker * for more details on the race. 920de551246SJoel Becker * 921de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 922de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 923de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 924de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 925de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 926de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 927de551246SJoel Becker * nothing. 928de551246SJoel Becker * 929de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 930de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 931de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 932de551246SJoel Becker * window. 933de551246SJoel Becker * 934de551246SJoel Becker * [Example] 935de551246SJoel Becker * 936de551246SJoel Becker * ocfs2_meta_lock() 937de551246SJoel Becker * ocfs2_cluster_lock() 938de551246SJoel Becker * set BUSY 939de551246SJoel Becker * set PENDING 940de551246SJoel Becker * drop l_lock 941de551246SJoel Becker * ocfs2_dlm_lock() 942de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 943de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 944de551246SJoel Becker * take_l_lock 945de551246SJoel Becker * !BUSY 946de551246SJoel Becker * ocfs2_prepare_downconvert() 947de551246SJoel Becker * set BUSY 948de551246SJoel Becker * set PENDING 949de551246SJoel Becker * drop l_lock 950de551246SJoel Becker * take l_lock 951de551246SJoel Becker * clear PENDING 952de551246SJoel Becker * drop l_lock 953de551246SJoel Becker * <window> 954de551246SJoel Becker * ocfs2_dlm_lock() 955de551246SJoel Becker * 956de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 957de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 958de551246SJoel Becker * 959de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 960de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 961de551246SJoel Becker * 962de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 963de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 964de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 965de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 966de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 967de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 968de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 969de551246SJoel Becker * ocfs2_prepare_downconvert(). 970de551246SJoel Becker */ 971de551246SJoel Becker 972de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 973de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 974de551246SJoel Becker unsigned int generation, 975de551246SJoel Becker struct ocfs2_super *osb) 976de551246SJoel Becker { 977de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 978de551246SJoel Becker 979de551246SJoel Becker /* 980de551246SJoel Becker * The ast and locking functions can race us here. The winner 981de551246SJoel Becker * will clear pending, the loser will not. 982de551246SJoel Becker */ 983de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 984de551246SJoel Becker (lockres->l_pending_gen != generation)) 985de551246SJoel Becker return; 986de551246SJoel Becker 987de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 988de551246SJoel Becker lockres->l_pending_gen++; 989de551246SJoel Becker 990de551246SJoel Becker /* 991de551246SJoel Becker * The downconvert thread may have skipped us because we 992de551246SJoel Becker * were PENDING. Wake it up. 993de551246SJoel Becker */ 994de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 995de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 996de551246SJoel Becker } 997de551246SJoel Becker 998de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 999de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1000de551246SJoel Becker unsigned int generation, 1001de551246SJoel Becker struct ocfs2_super *osb) 1002de551246SJoel Becker { 1003de551246SJoel Becker unsigned long flags; 1004de551246SJoel Becker 1005de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1006de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 1007de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1008de551246SJoel Becker } 1009de551246SJoel Becker 1010de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1011de551246SJoel Becker { 1012de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1013de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1014de551246SJoel Becker 1015de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1016de551246SJoel Becker 1017de551246SJoel Becker return lockres->l_pending_gen; 1018de551246SJoel Becker } 1019de551246SJoel Becker 1020c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1021ccd979bdSMark Fasheh { 1022a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1023aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1024ccd979bdSMark Fasheh int needs_downconvert; 1025ccd979bdSMark Fasheh unsigned long flags; 1026ccd979bdSMark Fasheh 1027bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 1028ccd979bdSMark Fasheh 10299b915181SSunil Mushran mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, " 10309b915181SSunil Mushran "type %s\n", lockres->l_name, level, lockres->l_level, 1031aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 1032aa2623adSMark Fasheh 1033cf8e06f1SMark Fasheh /* 1034cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 1035cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 1036cf8e06f1SMark Fasheh */ 1037cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1038cf8e06f1SMark Fasheh return; 1039cf8e06f1SMark Fasheh 1040ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1041ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1042ccd979bdSMark Fasheh if (needs_downconvert) 1043ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 1044ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1045ccd979bdSMark Fasheh 1046d680efe9SMark Fasheh wake_up(&lockres->l_event); 1047d680efe9SMark Fasheh 104834d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1049ccd979bdSMark Fasheh } 1050ccd979bdSMark Fasheh 1051c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1052ccd979bdSMark Fasheh { 1053a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1054de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1055ccd979bdSMark Fasheh unsigned long flags; 10561693a5c0SDavid Teigland int status; 1057ccd979bdSMark Fasheh 1058ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1059ccd979bdSMark Fasheh 10601693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 10611693a5c0SDavid Teigland 10621693a5c0SDavid Teigland if (status == -EAGAIN) { 10631693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 10641693a5c0SDavid Teigland goto out; 10651693a5c0SDavid Teigland } 10661693a5c0SDavid Teigland 10671693a5c0SDavid Teigland if (status) { 10688f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 10691693a5c0SDavid Teigland lockres->l_name, status); 1070ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1071ccd979bdSMark Fasheh return; 1072ccd979bdSMark Fasheh } 1073ccd979bdSMark Fasheh 10749b915181SSunil Mushran mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, " 10759b915181SSunil Mushran "level %d => %d\n", lockres->l_name, lockres->l_action, 10769b915181SSunil Mushran lockres->l_unlock_action, lockres->l_level, lockres->l_requested); 10779b915181SSunil Mushran 1078ccd979bdSMark Fasheh switch(lockres->l_action) { 1079ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 1080ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 1081e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1082ccd979bdSMark Fasheh break; 1083ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 1084ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 1085ccd979bdSMark Fasheh break; 1086ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 1087ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 1088ccd979bdSMark Fasheh break; 1089ccd979bdSMark Fasheh default: 10909b915181SSunil Mushran mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, " 10919b915181SSunil Mushran "flags 0x%lx, unlock: %u\n", 1092e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 1093e92d57dfSMark Fasheh lockres->l_unlock_action); 1094ccd979bdSMark Fasheh BUG(); 1095ccd979bdSMark Fasheh } 10961693a5c0SDavid Teigland out: 1097ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 1098ccd979bdSMark Fasheh * can catch it. */ 1099ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1100ccd979bdSMark Fasheh 1101de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1102de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1103de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1104de551246SJoel Becker 1105de551246SJoel Becker /* 1106de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1107de551246SJoel Becker * know that dlm_lock() has been called :-) 1108de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1109de551246SJoel Becker * can use lockres->l_pending_gen. 1110de551246SJoel Becker */ 1111de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1112de551246SJoel Becker 1113ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1114d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1115ccd979bdSMark Fasheh } 1116ccd979bdSMark Fasheh 1117553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 1118553b5eb9SJoel Becker { 1119553b5eb9SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1120553b5eb9SJoel Becker unsigned long flags; 1121553b5eb9SJoel Becker 11229b915181SSunil Mushran mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n", 11239b915181SSunil Mushran lockres->l_name, lockres->l_unlock_action); 1124553b5eb9SJoel Becker 1125553b5eb9SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1126553b5eb9SJoel Becker if (error) { 1127553b5eb9SJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 1128553b5eb9SJoel Becker "unlock_action %d\n", error, lockres->l_name, 1129553b5eb9SJoel Becker lockres->l_unlock_action); 1130553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1131553b5eb9SJoel Becker return; 1132553b5eb9SJoel Becker } 1133553b5eb9SJoel Becker 1134553b5eb9SJoel Becker switch(lockres->l_unlock_action) { 1135553b5eb9SJoel Becker case OCFS2_UNLOCK_CANCEL_CONVERT: 1136553b5eb9SJoel Becker mlog(0, "Cancel convert success for %s\n", lockres->l_name); 1137553b5eb9SJoel Becker lockres->l_action = OCFS2_AST_INVALID; 1138553b5eb9SJoel Becker /* Downconvert thread may have requeued this lock, we 1139553b5eb9SJoel Becker * need to wake it. */ 1140553b5eb9SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1141553b5eb9SJoel Becker ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 1142553b5eb9SJoel Becker break; 1143553b5eb9SJoel Becker case OCFS2_UNLOCK_DROP_LOCK: 1144553b5eb9SJoel Becker lockres->l_level = DLM_LOCK_IV; 1145553b5eb9SJoel Becker break; 1146553b5eb9SJoel Becker default: 1147553b5eb9SJoel Becker BUG(); 1148553b5eb9SJoel Becker } 1149553b5eb9SJoel Becker 1150553b5eb9SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1151553b5eb9SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1152553b5eb9SJoel Becker wake_up(&lockres->l_event); 1153553b5eb9SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1154553b5eb9SJoel Becker } 1155553b5eb9SJoel Becker 1156553b5eb9SJoel Becker /* 1157553b5eb9SJoel Becker * This is the filesystem locking protocol. It provides the lock handling 1158553b5eb9SJoel Becker * hooks for the underlying DLM. It has a maximum version number. 1159553b5eb9SJoel Becker * The version number allows interoperability with systems running at 1160553b5eb9SJoel Becker * the same major number and an equal or smaller minor number. 1161553b5eb9SJoel Becker * 1162553b5eb9SJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 1163553b5eb9SJoel Becker * lock, orders them differently, does different things underneath a lock), 1164553b5eb9SJoel Becker * the version must be changed. The protocol is negotiated when joining 1165553b5eb9SJoel Becker * the dlm domain. A node may join the domain if its major version is 1166553b5eb9SJoel Becker * identical to all other nodes and its minor version is greater than 1167553b5eb9SJoel Becker * or equal to all other nodes. When its minor version is greater than 1168553b5eb9SJoel Becker * the other nodes, it will run at the minor version specified by the 1169553b5eb9SJoel Becker * other nodes. 1170553b5eb9SJoel Becker * 1171553b5eb9SJoel Becker * If a locking change is made that will not be compatible with older 1172553b5eb9SJoel Becker * versions, the major number must be increased and the minor version set 1173553b5eb9SJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 1174553b5eb9SJoel Becker * speaking to older versions, the minor version must be increased. If a 1175553b5eb9SJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 1176553b5eb9SJoel Becker * are just ignored by older versions), the version does not need to be 1177553b5eb9SJoel Becker * updated. 1178553b5eb9SJoel Becker */ 1179553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = { 1180553b5eb9SJoel Becker .lp_max_version = { 1181553b5eb9SJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 1182553b5eb9SJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 1183553b5eb9SJoel Becker }, 1184553b5eb9SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 1185553b5eb9SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 1186553b5eb9SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 1187553b5eb9SJoel Becker }; 1188553b5eb9SJoel Becker 1189553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void) 1190553b5eb9SJoel Becker { 1191553b5eb9SJoel Becker ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version); 1192553b5eb9SJoel Becker } 1193553b5eb9SJoel Becker 1194ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1195ccd979bdSMark Fasheh int convert) 1196ccd979bdSMark Fasheh { 1197ccd979bdSMark Fasheh unsigned long flags; 1198ccd979bdSMark Fasheh 1199ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1200ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1201a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1202ccd979bdSMark Fasheh if (convert) 1203ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1204ccd979bdSMark Fasheh else 1205ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1206ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1207ccd979bdSMark Fasheh 1208ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1209ccd979bdSMark Fasheh } 1210ccd979bdSMark Fasheh 1211ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1212ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1213ccd979bdSMark Fasheh * to do the right thing in that case. 1214ccd979bdSMark Fasheh */ 1215ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1216ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1217ccd979bdSMark Fasheh int level, 1218bd3e7610SJoel Becker u32 dlm_flags) 1219ccd979bdSMark Fasheh { 1220ccd979bdSMark Fasheh int ret = 0; 1221ccd979bdSMark Fasheh unsigned long flags; 1222de551246SJoel Becker unsigned int gen; 1223ccd979bdSMark Fasheh 1224bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1225ccd979bdSMark Fasheh dlm_flags); 1226ccd979bdSMark Fasheh 1227ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1228ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1229ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1230ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1231ccd979bdSMark Fasheh goto bail; 1232ccd979bdSMark Fasheh } 1233ccd979bdSMark Fasheh 1234ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1235ccd979bdSMark Fasheh lockres->l_requested = level; 1236ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1237de551246SJoel Becker gen = lockres_set_pending(lockres); 1238ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1239ccd979bdSMark Fasheh 12404670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1241ccd979bdSMark Fasheh level, 1242ccd979bdSMark Fasheh &lockres->l_lksb, 1243ccd979bdSMark Fasheh dlm_flags, 1244ccd979bdSMark Fasheh lockres->l_name, 1245a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1246de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 12477431cd7eSJoel Becker if (ret) { 12487431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1249ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1250ccd979bdSMark Fasheh } 1251ccd979bdSMark Fasheh 12527431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1253ccd979bdSMark Fasheh 1254ccd979bdSMark Fasheh bail: 1255ccd979bdSMark Fasheh return ret; 1256ccd979bdSMark Fasheh } 1257ccd979bdSMark Fasheh 1258ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1259ccd979bdSMark Fasheh int flag) 1260ccd979bdSMark Fasheh { 1261ccd979bdSMark Fasheh unsigned long flags; 1262ccd979bdSMark Fasheh int ret; 1263ccd979bdSMark Fasheh 1264ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1265ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1266ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1267ccd979bdSMark Fasheh 1268ccd979bdSMark Fasheh return ret; 1269ccd979bdSMark Fasheh } 1270ccd979bdSMark Fasheh 1271ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1272ccd979bdSMark Fasheh 1273ccd979bdSMark Fasheh { 1274ccd979bdSMark Fasheh wait_event(lockres->l_event, 1275ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1276ccd979bdSMark Fasheh } 1277ccd979bdSMark Fasheh 1278ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1279ccd979bdSMark Fasheh 1280ccd979bdSMark Fasheh { 1281ccd979bdSMark Fasheh wait_event(lockres->l_event, 1282ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1283ccd979bdSMark Fasheh } 1284ccd979bdSMark Fasheh 1285ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1286ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1287ccd979bdSMark Fasheh * level will be compatible with it. */ 1288ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1289ccd979bdSMark Fasheh int wanted) 1290ccd979bdSMark Fasheh { 1291ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1292ccd979bdSMark Fasheh 1293ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1294ccd979bdSMark Fasheh } 1295ccd979bdSMark Fasheh 1296ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1297ccd979bdSMark Fasheh { 1298ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1299ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 13008ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1301ccd979bdSMark Fasheh } 1302ccd979bdSMark Fasheh 1303ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1304ccd979bdSMark Fasheh { 1305ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1306ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1307ccd979bdSMark Fasheh INIT_COMPLETION(mw->mw_complete); 1308ccd979bdSMark Fasheh return mw->mw_status; 1309ccd979bdSMark Fasheh } 1310ccd979bdSMark Fasheh 1311ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1312ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1313ccd979bdSMark Fasheh unsigned long mask, 1314ccd979bdSMark Fasheh unsigned long goal) 1315ccd979bdSMark Fasheh { 1316ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1317ccd979bdSMark Fasheh 1318ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1319ccd979bdSMark Fasheh 1320ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1321ccd979bdSMark Fasheh mw->mw_mask = mask; 1322ccd979bdSMark Fasheh mw->mw_goal = goal; 1323ccd979bdSMark Fasheh } 1324ccd979bdSMark Fasheh 1325ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1326ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1327ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1328ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1329ccd979bdSMark Fasheh { 1330ccd979bdSMark Fasheh unsigned long flags; 1331ccd979bdSMark Fasheh int ret = 0; 1332ccd979bdSMark Fasheh 1333ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1334ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1335ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1336ccd979bdSMark Fasheh ret = -EBUSY; 1337ccd979bdSMark Fasheh 1338ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1339ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1340ccd979bdSMark Fasheh } 1341ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1342ccd979bdSMark Fasheh 1343ccd979bdSMark Fasheh return ret; 1344ccd979bdSMark Fasheh 1345ccd979bdSMark Fasheh } 1346ccd979bdSMark Fasheh 1347cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1348cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1349cf8e06f1SMark Fasheh { 1350cf8e06f1SMark Fasheh int ret; 1351cf8e06f1SMark Fasheh 1352cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1353cf8e06f1SMark Fasheh if (ret) 1354cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1355cf8e06f1SMark Fasheh else 1356cf8e06f1SMark Fasheh ret = mw->mw_status; 1357cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1358cf8e06f1SMark Fasheh INIT_COMPLETION(mw->mw_complete); 1359cf8e06f1SMark Fasheh return ret; 1360cf8e06f1SMark Fasheh } 1361cf8e06f1SMark Fasheh 1362cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1363ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1364ccd979bdSMark Fasheh int level, 1365bd3e7610SJoel Becker u32 lkm_flags, 1366cb25797dSJan Kara int arg_flags, 1367cb25797dSJan Kara int l_subclass, 1368cb25797dSJan Kara unsigned long caller_ip) 1369ccd979bdSMark Fasheh { 1370ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1371ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1372ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1373ccd979bdSMark Fasheh unsigned long flags; 1374de551246SJoel Becker unsigned int gen; 13751693a5c0SDavid Teigland int noqueue_attempted = 0; 1376ccd979bdSMark Fasheh 1377ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1378ccd979bdSMark Fasheh 1379b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1380bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1381b80fc012SMark Fasheh 1382ccd979bdSMark Fasheh again: 1383ccd979bdSMark Fasheh wait = 0; 1384ccd979bdSMark Fasheh 1385a1912826SSunil Mushran spin_lock_irqsave(&lockres->l_lock, flags); 1386a1912826SSunil Mushran 1387ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1388ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1389a1912826SSunil Mushran goto unlock; 1390ccd979bdSMark Fasheh } 1391ccd979bdSMark Fasheh 1392ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1393ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1394ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1395ccd979bdSMark Fasheh 1396ccd979bdSMark Fasheh /* We only compare against the currently granted level 1397ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1398ccd979bdSMark Fasheh * we'll get caught below. */ 1399ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1400ccd979bdSMark Fasheh level > lockres->l_level) { 1401ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1402ccd979bdSMark Fasheh * them. */ 1403ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1404ccd979bdSMark Fasheh wait = 1; 1405ccd979bdSMark Fasheh goto unlock; 1406ccd979bdSMark Fasheh } 1407ccd979bdSMark Fasheh 1408a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1409a1912826SSunil Mushran /* 1410a1912826SSunil Mushran * We've upconverted. If the lock now has a level we can 1411a1912826SSunil Mushran * work with, we take it. If, however, the lock is not at the 1412a1912826SSunil Mushran * required level, we go thru the full cycle. One way this could 1413a1912826SSunil Mushran * happen is if a process requesting an upconvert to PR is 1414a1912826SSunil Mushran * closely followed by another requesting upconvert to an EX. 1415a1912826SSunil Mushran * If the process requesting EX lands here, we want it to 1416a1912826SSunil Mushran * continue attempting to upconvert and let the process 1417a1912826SSunil Mushran * requesting PR take the lock. 1418a1912826SSunil Mushran * If multiple processes request upconvert to PR, the first one 1419a1912826SSunil Mushran * here will take the lock. The others will have to go thru the 1420a1912826SSunil Mushran * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1421a1912826SSunil Mushran * downconvert request. 1422a1912826SSunil Mushran */ 1423a1912826SSunil Mushran if (level <= lockres->l_level) 1424a1912826SSunil Mushran goto update_holders; 1425a1912826SSunil Mushran } 1426a1912826SSunil Mushran 1427ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1428ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1429ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1430ccd979bdSMark Fasheh * another node */ 1431ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1432ccd979bdSMark Fasheh wait = 1; 1433ccd979bdSMark Fasheh goto unlock; 1434ccd979bdSMark Fasheh } 1435ccd979bdSMark Fasheh 1436ccd979bdSMark Fasheh if (level > lockres->l_level) { 14371693a5c0SDavid Teigland if (noqueue_attempted > 0) { 14381693a5c0SDavid Teigland ret = -EAGAIN; 14391693a5c0SDavid Teigland goto unlock; 14401693a5c0SDavid Teigland } 14411693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 14421693a5c0SDavid Teigland noqueue_attempted = 1; 14431693a5c0SDavid Teigland 1444ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1445ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1446ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1447ccd979bdSMark Fasheh 1448019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1449019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1450bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1451019d1b22SMark Fasheh } else { 1452ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1453bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1454019d1b22SMark Fasheh } 1455019d1b22SMark Fasheh 1456ccd979bdSMark Fasheh lockres->l_requested = level; 1457ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1458de551246SJoel Becker gen = lockres_set_pending(lockres); 1459ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1460ccd979bdSMark Fasheh 1461bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1462bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1463ccd979bdSMark Fasheh 14649b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, convert from %d to %d\n", 1465ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1466ccd979bdSMark Fasheh 1467ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 14684670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1469ccd979bdSMark Fasheh level, 1470ccd979bdSMark Fasheh &lockres->l_lksb, 1471019d1b22SMark Fasheh lkm_flags, 1472ccd979bdSMark Fasheh lockres->l_name, 1473a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1474de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 14757431cd7eSJoel Becker if (ret) { 14767431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 14777431cd7eSJoel Becker (ret != -EAGAIN)) { 147824ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 14797431cd7eSJoel Becker ret, lockres); 1480ccd979bdSMark Fasheh } 1481ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1482ccd979bdSMark Fasheh goto out; 1483ccd979bdSMark Fasheh } 1484ccd979bdSMark Fasheh 148573ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1486ccd979bdSMark Fasheh lockres->l_name); 1487ccd979bdSMark Fasheh 1488ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1489ccd979bdSMark Fasheh * complete our work regardless. */ 1490ccd979bdSMark Fasheh catch_signals = 0; 1491ccd979bdSMark Fasheh 1492ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1493ccd979bdSMark Fasheh goto again; 1494ccd979bdSMark Fasheh } 1495ccd979bdSMark Fasheh 1496a1912826SSunil Mushran update_holders: 1497ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1498ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1499ccd979bdSMark Fasheh 1500ccd979bdSMark Fasheh ret = 0; 1501ccd979bdSMark Fasheh unlock: 1502a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1503a1912826SSunil Mushran 1504ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1505ccd979bdSMark Fasheh out: 1506ccd979bdSMark Fasheh /* 1507ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1508ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1509ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1510ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1511ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1512ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1513ccd979bdSMark Fasheh */ 1514ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1515ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1516ccd979bdSMark Fasheh wait = 0; 1517ccd979bdSMark Fasheh if (lockres_remove_mask_waiter(lockres, &mw)) 1518ccd979bdSMark Fasheh ret = -EAGAIN; 1519ccd979bdSMark Fasheh else 1520ccd979bdSMark Fasheh goto again; 1521ccd979bdSMark Fasheh } 1522ccd979bdSMark Fasheh if (wait) { 1523ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1524ccd979bdSMark Fasheh if (ret == 0) 1525ccd979bdSMark Fasheh goto again; 1526ccd979bdSMark Fasheh mlog_errno(ret); 1527ccd979bdSMark Fasheh } 15288ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1529ccd979bdSMark Fasheh 1530cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1531cb25797dSJan Kara if (!ret && lockres->l_lockdep_map.key != NULL) { 1532cb25797dSJan Kara if (level == DLM_LOCK_PR) 1533cb25797dSJan Kara rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1534cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1535cb25797dSJan Kara caller_ip); 1536cb25797dSJan Kara else 1537cb25797dSJan Kara rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1538cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1539cb25797dSJan Kara caller_ip); 1540cb25797dSJan Kara } 1541cb25797dSJan Kara #endif 1542ccd979bdSMark Fasheh return ret; 1543ccd979bdSMark Fasheh } 1544ccd979bdSMark Fasheh 1545cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1546ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1547cb25797dSJan Kara int level, 1548cb25797dSJan Kara u32 lkm_flags, 1549cb25797dSJan Kara int arg_flags) 1550cb25797dSJan Kara { 1551cb25797dSJan Kara return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1552cb25797dSJan Kara 0, _RET_IP_); 1553cb25797dSJan Kara } 1554cb25797dSJan Kara 1555cb25797dSJan Kara 1556cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1557cb25797dSJan Kara struct ocfs2_lock_res *lockres, 1558cb25797dSJan Kara int level, 1559cb25797dSJan Kara unsigned long caller_ip) 1560ccd979bdSMark Fasheh { 1561ccd979bdSMark Fasheh unsigned long flags; 1562ccd979bdSMark Fasheh 1563ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1564ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 156534d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1566ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1567cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1568cb25797dSJan Kara if (lockres->l_lockdep_map.key != NULL) 1569cb25797dSJan Kara rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1570cb25797dSJan Kara #endif 1571ccd979bdSMark Fasheh } 1572ccd979bdSMark Fasheh 1573da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1574d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 157524c19ef4SMark Fasheh int ex, 157624c19ef4SMark Fasheh int local) 1577ccd979bdSMark Fasheh { 1578bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1579ccd979bdSMark Fasheh unsigned long flags; 1580bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1581ccd979bdSMark Fasheh 1582ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1583ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1584ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1585ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1586ccd979bdSMark Fasheh 158724c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1588ccd979bdSMark Fasheh } 1589ccd979bdSMark Fasheh 1590ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1591ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1592ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1593ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1594ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1595ccd979bdSMark Fasheh * with creating a new lock resource. */ 1596ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1597ccd979bdSMark Fasheh { 1598ccd979bdSMark Fasheh int ret; 1599d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1600ccd979bdSMark Fasheh 1601ccd979bdSMark Fasheh BUG_ON(!inode); 1602ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1603ccd979bdSMark Fasheh 1604b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1605ccd979bdSMark Fasheh 1606ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1607ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1608ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1609ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1610ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1611ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1612ccd979bdSMark Fasheh * valid when we release the EX. */ 1613ccd979bdSMark Fasheh 161424c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1615ccd979bdSMark Fasheh if (ret) { 1616ccd979bdSMark Fasheh mlog_errno(ret); 1617ccd979bdSMark Fasheh goto bail; 1618ccd979bdSMark Fasheh } 1619ccd979bdSMark Fasheh 162024c19ef4SMark Fasheh /* 1621bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 162224c19ef4SMark Fasheh * don't use a generation in their lock names. 162324c19ef4SMark Fasheh */ 1624e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1625ccd979bdSMark Fasheh if (ret) { 1626ccd979bdSMark Fasheh mlog_errno(ret); 1627ccd979bdSMark Fasheh goto bail; 1628ccd979bdSMark Fasheh } 1629ccd979bdSMark Fasheh 163050008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 163150008630STiger Yang if (ret) { 163250008630STiger Yang mlog_errno(ret); 163350008630STiger Yang goto bail; 163450008630STiger Yang } 163550008630STiger Yang 1636ccd979bdSMark Fasheh bail: 1637ccd979bdSMark Fasheh return ret; 1638ccd979bdSMark Fasheh } 1639ccd979bdSMark Fasheh 1640ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1641ccd979bdSMark Fasheh { 1642ccd979bdSMark Fasheh int status, level; 1643ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1644c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1645ccd979bdSMark Fasheh 1646ccd979bdSMark Fasheh BUG_ON(!inode); 1647ccd979bdSMark Fasheh 1648b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1649b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1650ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1651ccd979bdSMark Fasheh 1652c1e8d35eSTao Ma if (ocfs2_mount_local(osb)) 1653c271c5c2SSunil Mushran return 0; 1654c271c5c2SSunil Mushran 1655ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1656ccd979bdSMark Fasheh 1657bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1658ccd979bdSMark Fasheh 1659ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1660ccd979bdSMark Fasheh 0); 1661ccd979bdSMark Fasheh if (status < 0) 1662ccd979bdSMark Fasheh mlog_errno(status); 1663ccd979bdSMark Fasheh 1664ccd979bdSMark Fasheh return status; 1665ccd979bdSMark Fasheh } 1666ccd979bdSMark Fasheh 1667ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1668ccd979bdSMark Fasheh { 1669bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1670ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1671c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1672ccd979bdSMark Fasheh 1673b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1674b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1675ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1676ccd979bdSMark Fasheh 1677c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1678ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1679ccd979bdSMark Fasheh } 1680ccd979bdSMark Fasheh 168150008630STiger Yang /* 168250008630STiger Yang * ocfs2_open_lock always get PR mode lock. 168350008630STiger Yang */ 168450008630STiger Yang int ocfs2_open_lock(struct inode *inode) 168550008630STiger Yang { 168650008630STiger Yang int status = 0; 168750008630STiger Yang struct ocfs2_lock_res *lockres; 168850008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 168950008630STiger Yang 169050008630STiger Yang BUG_ON(!inode); 169150008630STiger Yang 169250008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 169350008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 169450008630STiger Yang 169503efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 169650008630STiger Yang goto out; 169750008630STiger Yang 169850008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 169950008630STiger Yang 170050008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1701bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 170250008630STiger Yang if (status < 0) 170350008630STiger Yang mlog_errno(status); 170450008630STiger Yang 170550008630STiger Yang out: 170650008630STiger Yang return status; 170750008630STiger Yang } 170850008630STiger Yang 170950008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 171050008630STiger Yang { 171150008630STiger Yang int status = 0, level; 171250008630STiger Yang struct ocfs2_lock_res *lockres; 171350008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 171450008630STiger Yang 171550008630STiger Yang BUG_ON(!inode); 171650008630STiger Yang 171750008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 171850008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 171950008630STiger Yang write ? "EXMODE" : "PRMODE"); 172050008630STiger Yang 172103efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 172203efed8aSTiger Yang if (write) 172303efed8aSTiger Yang status = -EROFS; 172403efed8aSTiger Yang goto out; 172503efed8aSTiger Yang } 172603efed8aSTiger Yang 172750008630STiger Yang if (ocfs2_mount_local(osb)) 172850008630STiger Yang goto out; 172950008630STiger Yang 173050008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 173150008630STiger Yang 1732bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 173350008630STiger Yang 173450008630STiger Yang /* 173550008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1736bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 173750008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 173850008630STiger Yang * this inode is still in use. 173950008630STiger Yang */ 174050008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1741bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 174250008630STiger Yang 174350008630STiger Yang out: 174450008630STiger Yang return status; 174550008630STiger Yang } 174650008630STiger Yang 174750008630STiger Yang /* 174850008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 174950008630STiger Yang */ 175050008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 175150008630STiger Yang { 175250008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 175350008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 175450008630STiger Yang 175550008630STiger Yang mlog(0, "inode %llu drop open lock\n", 175650008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 175750008630STiger Yang 175850008630STiger Yang if (ocfs2_mount_local(osb)) 175950008630STiger Yang goto out; 176050008630STiger Yang 176150008630STiger Yang if(lockres->l_ro_holders) 176250008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1763bd3e7610SJoel Becker DLM_LOCK_PR); 176450008630STiger Yang if(lockres->l_ex_holders) 176550008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1766bd3e7610SJoel Becker DLM_LOCK_EX); 176750008630STiger Yang 176850008630STiger Yang out: 1769c1e8d35eSTao Ma return; 177050008630STiger Yang } 177150008630STiger Yang 1772cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1773cf8e06f1SMark Fasheh int level) 1774cf8e06f1SMark Fasheh { 1775cf8e06f1SMark Fasheh int ret; 1776cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1777cf8e06f1SMark Fasheh unsigned long flags; 1778cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1779cf8e06f1SMark Fasheh 1780cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1781cf8e06f1SMark Fasheh 1782cf8e06f1SMark Fasheh retry_cancel: 1783cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1784cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1785cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1786cf8e06f1SMark Fasheh if (ret) { 1787cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1788cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1789cf8e06f1SMark Fasheh if (ret < 0) { 1790cf8e06f1SMark Fasheh mlog_errno(ret); 1791cf8e06f1SMark Fasheh goto out; 1792cf8e06f1SMark Fasheh } 1793cf8e06f1SMark Fasheh goto retry_cancel; 1794cf8e06f1SMark Fasheh } 1795cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1796cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1797cf8e06f1SMark Fasheh 1798cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1799cf8e06f1SMark Fasheh goto retry_cancel; 1800cf8e06f1SMark Fasheh } 1801cf8e06f1SMark Fasheh 1802cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1803cf8e06f1SMark Fasheh /* 1804cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1805cf8e06f1SMark Fasheh * point to restarting the syscall. 1806cf8e06f1SMark Fasheh */ 1807cf8e06f1SMark Fasheh if (lockres->l_level == level) 1808cf8e06f1SMark Fasheh ret = 0; 1809cf8e06f1SMark Fasheh 1810cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1811cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1812cf8e06f1SMark Fasheh 1813cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1814cf8e06f1SMark Fasheh 1815cf8e06f1SMark Fasheh out: 1816cf8e06f1SMark Fasheh return ret; 1817cf8e06f1SMark Fasheh } 1818cf8e06f1SMark Fasheh 1819cf8e06f1SMark Fasheh /* 1820cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1821cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1822cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 18233ad2f3fbSDaniel Mack * separate path to the "low-level" dlm calls. In particular: 1824cf8e06f1SMark Fasheh * 1825cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1826cf8e06f1SMark Fasheh * what's been requested. 1827cf8e06f1SMark Fasheh * 1828cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1829cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1830cf8e06f1SMark Fasheh * the blocking list). 1831cf8e06f1SMark Fasheh * 1832cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1833cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1834cf8e06f1SMark Fasheh * request. 1835cf8e06f1SMark Fasheh * 1836cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1837cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1838cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1839cf8e06f1SMark Fasheh */ 1840cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1841cf8e06f1SMark Fasheh { 1842e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1843e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1844cf8e06f1SMark Fasheh unsigned long flags; 1845cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1846cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1847cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1848cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1849cf8e06f1SMark Fasheh 1850cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1851cf8e06f1SMark Fasheh 1852cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1853bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1854cf8e06f1SMark Fasheh mlog(ML_ERROR, 1855cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1856cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1857cf8e06f1SMark Fasheh lockres->l_level); 1858cf8e06f1SMark Fasheh return -EINVAL; 1859cf8e06f1SMark Fasheh } 1860cf8e06f1SMark Fasheh 1861cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1862cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1863cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1864cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1865cf8e06f1SMark Fasheh 1866cf8e06f1SMark Fasheh /* 1867cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1868cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1869cf8e06f1SMark Fasheh */ 1870e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1871cf8e06f1SMark Fasheh if (ret < 0) { 1872cf8e06f1SMark Fasheh mlog_errno(ret); 1873cf8e06f1SMark Fasheh goto out; 1874cf8e06f1SMark Fasheh } 1875cf8e06f1SMark Fasheh 1876cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1877cf8e06f1SMark Fasheh if (ret) { 1878cf8e06f1SMark Fasheh mlog_errno(ret); 1879cf8e06f1SMark Fasheh goto out; 1880cf8e06f1SMark Fasheh } 1881cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1882cf8e06f1SMark Fasheh } 1883cf8e06f1SMark Fasheh 1884cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1885e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 1886cf8e06f1SMark Fasheh lockres->l_requested = level; 1887cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1888cf8e06f1SMark Fasheh 1889cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1890cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1891cf8e06f1SMark Fasheh 18924670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1893a796d286SJoel Becker lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 18947431cd7eSJoel Becker if (ret) { 18957431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 189624ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1897cf8e06f1SMark Fasheh ret = -EINVAL; 1898cf8e06f1SMark Fasheh } 1899cf8e06f1SMark Fasheh 1900cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1901cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1902cf8e06f1SMark Fasheh goto out; 1903cf8e06f1SMark Fasheh } 1904cf8e06f1SMark Fasheh 1905cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1906cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1907cf8e06f1SMark Fasheh /* 1908cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1909cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1910cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1911cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1912cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1913cf8e06f1SMark Fasheh * reboot. 1914cf8e06f1SMark Fasheh * 1915cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1916cf8e06f1SMark Fasheh * though. We can't exit this function with an 1917cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1918cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1919cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1920af901ca1SAndré Goddard Rosa * to just bubble success back up to the user. 1921cf8e06f1SMark Fasheh */ 1922cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 19231693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 19241693a5c0SDavid Teigland /* Trylock failed asynchronously */ 19251693a5c0SDavid Teigland BUG_ON(!trylock); 19261693a5c0SDavid Teigland ret = -EAGAIN; 1927cf8e06f1SMark Fasheh } 1928cf8e06f1SMark Fasheh 1929cf8e06f1SMark Fasheh out: 1930cf8e06f1SMark Fasheh 1931cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1932cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 1933cf8e06f1SMark Fasheh return ret; 1934cf8e06f1SMark Fasheh } 1935cf8e06f1SMark Fasheh 1936cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 1937cf8e06f1SMark Fasheh { 1938cf8e06f1SMark Fasheh int ret; 1939de551246SJoel Becker unsigned int gen; 1940cf8e06f1SMark Fasheh unsigned long flags; 1941cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1942cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1943cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1944cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1945cf8e06f1SMark Fasheh 1946cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1947cf8e06f1SMark Fasheh 1948cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1949cf8e06f1SMark Fasheh return; 1950cf8e06f1SMark Fasheh 1951e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 1952cf8e06f1SMark Fasheh return; 1953cf8e06f1SMark Fasheh 1954cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1955cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 1956cf8e06f1SMark Fasheh lockres->l_action); 1957cf8e06f1SMark Fasheh 1958cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1959cf8e06f1SMark Fasheh /* 1960cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 1961cf8e06f1SMark Fasheh */ 1962cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1963bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 1964cf8e06f1SMark Fasheh 1965e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 1966cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1967cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1968cf8e06f1SMark Fasheh 1969e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 1970cf8e06f1SMark Fasheh if (ret) { 1971cf8e06f1SMark Fasheh mlog_errno(ret); 1972cf8e06f1SMark Fasheh return; 1973cf8e06f1SMark Fasheh } 1974cf8e06f1SMark Fasheh 1975cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1976cf8e06f1SMark Fasheh if (ret) 1977cf8e06f1SMark Fasheh mlog_errno(ret); 1978cf8e06f1SMark Fasheh } 1979cf8e06f1SMark Fasheh 198034d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1981ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 1982ccd979bdSMark Fasheh { 1983ccd979bdSMark Fasheh int kick = 0; 1984ccd979bdSMark Fasheh 1985ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 198634d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 1987ccd979bdSMark Fasheh * condition. */ 1988ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1989ccd979bdSMark Fasheh switch(lockres->l_blocking) { 1990bd3e7610SJoel Becker case DLM_LOCK_EX: 1991ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1992ccd979bdSMark Fasheh kick = 1; 1993ccd979bdSMark Fasheh break; 1994bd3e7610SJoel Becker case DLM_LOCK_PR: 1995ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 1996ccd979bdSMark Fasheh kick = 1; 1997ccd979bdSMark Fasheh break; 1998ccd979bdSMark Fasheh default: 1999ccd979bdSMark Fasheh BUG(); 2000ccd979bdSMark Fasheh } 2001ccd979bdSMark Fasheh } 2002ccd979bdSMark Fasheh 2003ccd979bdSMark Fasheh if (kick) 200434d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 2005ccd979bdSMark Fasheh } 2006ccd979bdSMark Fasheh 2007ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 2008ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 2009ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 2010ccd979bdSMark Fasheh 2011ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 2012ccd979bdSMark Fasheh * now. */ 2013ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 2014ccd979bdSMark Fasheh { 2015ccd979bdSMark Fasheh u64 res; 2016ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 2017ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 2018ccd979bdSMark Fasheh 2019ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 2020ccd979bdSMark Fasheh 2021ccd979bdSMark Fasheh return res; 2022ccd979bdSMark Fasheh } 2023ccd979bdSMark Fasheh 2024ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 2025ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 2026e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2027ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2028ccd979bdSMark Fasheh { 2029ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2030e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2031ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2032ccd979bdSMark Fasheh 2033a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2034ccd979bdSMark Fasheh 203524c19ef4SMark Fasheh /* 203624c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 203724c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 203824c19ef4SMark Fasheh * status. 203924c19ef4SMark Fasheh */ 204024c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 204124c19ef4SMark Fasheh lvb->lvb_version = 0; 204224c19ef4SMark Fasheh goto out; 204324c19ef4SMark Fasheh } 204424c19ef4SMark Fasheh 20454d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 2046ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2047ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 204803ab30f7SEric W. Biederman lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode)); 204903ab30f7SEric W. Biederman lvb->lvb_igid = cpu_to_be32(i_gid_read(inode)); 2050ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2051ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2052ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 2053ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2054ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 2055ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2056ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 2057ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2058ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 205915b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2060f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2061ccd979bdSMark Fasheh 206224c19ef4SMark Fasheh out: 2063ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2064ccd979bdSMark Fasheh } 2065ccd979bdSMark Fasheh 2066ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 2067ccd979bdSMark Fasheh u64 packed_time) 2068ccd979bdSMark Fasheh { 2069ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2070ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2071ccd979bdSMark Fasheh } 2072ccd979bdSMark Fasheh 2073ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2074ccd979bdSMark Fasheh { 2075ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2076e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2077ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2078ccd979bdSMark Fasheh 2079ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2080ccd979bdSMark Fasheh 2081a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2082ccd979bdSMark Fasheh 2083ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 2084ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2085ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2086ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2087ccd979bdSMark Fasheh 2088ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 208915b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2090ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 2091ca4d147eSHerbert Poetzl 2092ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 2093ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2094ccd979bdSMark Fasheh inode->i_blocks = 0; 2095ccd979bdSMark Fasheh else 20968110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 2097ccd979bdSMark Fasheh 209803ab30f7SEric W. Biederman i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid)); 209903ab30f7SEric W. Biederman i_gid_write(inode, be32_to_cpu(lvb->lvb_igid)); 2100ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2101bfe86848SMiklos Szeredi set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2102ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 2103ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 2104ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 2105ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 2106ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 2107ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 2108ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2109ccd979bdSMark Fasheh } 2110ccd979bdSMark Fasheh 2111f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2112f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 2113ccd979bdSMark Fasheh { 2114a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2115ccd979bdSMark Fasheh 21161c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 21171c520dfbSJoel Becker && lvb->lvb_version == OCFS2_LVB_VERSION 2118f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2119ccd979bdSMark Fasheh return 1; 2120ccd979bdSMark Fasheh return 0; 2121ccd979bdSMark Fasheh } 2122ccd979bdSMark Fasheh 2123ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 2124ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 2125ccd979bdSMark Fasheh * 2126ccd979bdSMark Fasheh * 0 means no refresh needed. 2127ccd979bdSMark Fasheh * 2128ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 2129ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 2130ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2131ccd979bdSMark Fasheh { 2132ccd979bdSMark Fasheh unsigned long flags; 2133ccd979bdSMark Fasheh int status = 0; 2134ccd979bdSMark Fasheh 2135ccd979bdSMark Fasheh refresh_check: 2136ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2137ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2138ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2139ccd979bdSMark Fasheh goto bail; 2140ccd979bdSMark Fasheh } 2141ccd979bdSMark Fasheh 2142ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2143ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2144ccd979bdSMark Fasheh 2145ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 2146ccd979bdSMark Fasheh goto refresh_check; 2147ccd979bdSMark Fasheh } 2148ccd979bdSMark Fasheh 2149ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 2150ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2151ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2152ccd979bdSMark Fasheh 2153ccd979bdSMark Fasheh status = 1; 2154ccd979bdSMark Fasheh bail: 2155c1e8d35eSTao Ma mlog(0, "status %d\n", status); 2156ccd979bdSMark Fasheh return status; 2157ccd979bdSMark Fasheh } 2158ccd979bdSMark Fasheh 2159ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 2160ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 2161ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2162ccd979bdSMark Fasheh int status) 2163ccd979bdSMark Fasheh { 2164ccd979bdSMark Fasheh unsigned long flags; 2165ccd979bdSMark Fasheh 2166ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2167ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2168ccd979bdSMark Fasheh if (!status) 2169ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2170ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2171ccd979bdSMark Fasheh 2172ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2173ccd979bdSMark Fasheh } 2174ccd979bdSMark Fasheh 2175ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 2176e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 2177ccd979bdSMark Fasheh struct buffer_head **bh) 2178ccd979bdSMark Fasheh { 2179ccd979bdSMark Fasheh int status = 0; 2180ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2181e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2182ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 2183c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2184ccd979bdSMark Fasheh 2185be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 2186be9e986bSMark Fasheh goto bail; 2187be9e986bSMark Fasheh 2188ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2189ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2190b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2191ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2192b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2193ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2194ccd979bdSMark Fasheh status = -ENOENT; 2195ccd979bdSMark Fasheh goto bail; 2196ccd979bdSMark Fasheh } 2197ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2198ccd979bdSMark Fasheh 2199ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2200ccd979bdSMark Fasheh goto bail; 2201ccd979bdSMark Fasheh 2202ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2203ccd979bdSMark Fasheh * for the inode metadata. */ 22048cb471e8SJoel Becker ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2205ccd979bdSMark Fasheh 220683418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 220783418978SMark Fasheh 2208be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2209b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2210b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2211ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2212ccd979bdSMark Fasheh } else { 2213ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2214ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2215b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2216ccd979bdSMark Fasheh if (status < 0) { 2217ccd979bdSMark Fasheh mlog_errno(status); 2218ccd979bdSMark Fasheh goto bail_refresh; 2219ccd979bdSMark Fasheh } 2220ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2221ccd979bdSMark Fasheh 2222ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2223b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2224b657c95cSJoel Becker * already checked that the inode block is sane. 2225ccd979bdSMark Fasheh * 2226ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2227ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2228ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2229ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2230ccd979bdSMark Fasheh * locks associated with it. */ 2231ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2232ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2233b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2234ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2235b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2236b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2237ccd979bdSMark Fasheh inode->i_generation); 2238ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2239ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2240b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2241b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2242b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2243ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2244ccd979bdSMark Fasheh 2245ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 22468ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2247ccd979bdSMark Fasheh } 2248ccd979bdSMark Fasheh 2249ccd979bdSMark Fasheh status = 0; 2250ccd979bdSMark Fasheh bail_refresh: 2251ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2252ccd979bdSMark Fasheh bail: 2253ccd979bdSMark Fasheh return status; 2254ccd979bdSMark Fasheh } 2255ccd979bdSMark Fasheh 2256ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2257ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2258ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2259ccd979bdSMark Fasheh { 2260ccd979bdSMark Fasheh int status; 2261ccd979bdSMark Fasheh 2262ccd979bdSMark Fasheh if (passed_bh) { 2263ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2264ccd979bdSMark Fasheh * returned bh. */ 2265ccd979bdSMark Fasheh *ret_bh = passed_bh; 2266ccd979bdSMark Fasheh get_bh(*ret_bh); 2267ccd979bdSMark Fasheh 2268ccd979bdSMark Fasheh return 0; 2269ccd979bdSMark Fasheh } 2270ccd979bdSMark Fasheh 2271b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2272ccd979bdSMark Fasheh if (status < 0) 2273ccd979bdSMark Fasheh mlog_errno(status); 2274ccd979bdSMark Fasheh 2275ccd979bdSMark Fasheh return status; 2276ccd979bdSMark Fasheh } 2277ccd979bdSMark Fasheh 2278ccd979bdSMark Fasheh /* 2279ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2280ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2281ccd979bdSMark Fasheh */ 2282cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode, 2283ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2284ccd979bdSMark Fasheh int ex, 2285cb25797dSJan Kara int arg_flags, 2286cb25797dSJan Kara int subclass) 2287ccd979bdSMark Fasheh { 2288bd3e7610SJoel Becker int status, level, acquired; 2289bd3e7610SJoel Becker u32 dlm_flags; 2290c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2291ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2292ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2293ccd979bdSMark Fasheh 2294ccd979bdSMark Fasheh BUG_ON(!inode); 2295ccd979bdSMark Fasheh 2296b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2297b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2298ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2299ccd979bdSMark Fasheh 2300ccd979bdSMark Fasheh status = 0; 2301ccd979bdSMark Fasheh acquired = 0; 2302ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2303ccd979bdSMark Fasheh * rodevices. */ 2304ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2305ccd979bdSMark Fasheh if (ex) 2306ccd979bdSMark Fasheh status = -EROFS; 230703efed8aSTiger Yang goto getbh; 2308ccd979bdSMark Fasheh } 2309ccd979bdSMark Fasheh 2310c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2311c271c5c2SSunil Mushran goto local; 2312c271c5c2SSunil Mushran 2313ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2314553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2315ccd979bdSMark Fasheh 2316e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2317bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2318ccd979bdSMark Fasheh dlm_flags = 0; 2319ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2320bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2321ccd979bdSMark Fasheh 2322cb25797dSJan Kara status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2323cb25797dSJan Kara arg_flags, subclass, _RET_IP_); 2324ccd979bdSMark Fasheh if (status < 0) { 2325ccd979bdSMark Fasheh if (status != -EAGAIN && status != -EIOCBRETRY) 2326ccd979bdSMark Fasheh mlog_errno(status); 2327ccd979bdSMark Fasheh goto bail; 2328ccd979bdSMark Fasheh } 2329ccd979bdSMark Fasheh 2330ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2331ccd979bdSMark Fasheh acquired = 1; 2332ccd979bdSMark Fasheh 2333ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2334ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2335ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2336ccd979bdSMark Fasheh * abort the operation. */ 2337ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2338553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2339ccd979bdSMark Fasheh 2340c271c5c2SSunil Mushran local: 234124c19ef4SMark Fasheh /* 234224c19ef4SMark Fasheh * We only see this flag if we're being called from 234324c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 234424c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 234524c19ef4SMark Fasheh * and let the caller handle it. 234624c19ef4SMark Fasheh */ 234724c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 234824c19ef4SMark Fasheh status = 0; 2349c271c5c2SSunil Mushran if (lockres) 235024c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 235124c19ef4SMark Fasheh goto bail; 235224c19ef4SMark Fasheh } 235324c19ef4SMark Fasheh 2354ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2355e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2356ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2357ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2358ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2359e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2360ccd979bdSMark Fasheh if (status < 0) { 2361ccd979bdSMark Fasheh if (status != -ENOENT) 2362ccd979bdSMark Fasheh mlog_errno(status); 2363ccd979bdSMark Fasheh goto bail; 2364ccd979bdSMark Fasheh } 236503efed8aSTiger Yang getbh: 2366ccd979bdSMark Fasheh if (ret_bh) { 2367ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2368ccd979bdSMark Fasheh if (status < 0) { 2369ccd979bdSMark Fasheh mlog_errno(status); 2370ccd979bdSMark Fasheh goto bail; 2371ccd979bdSMark Fasheh } 2372ccd979bdSMark Fasheh } 2373ccd979bdSMark Fasheh 2374ccd979bdSMark Fasheh bail: 2375ccd979bdSMark Fasheh if (status < 0) { 2376ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2377ccd979bdSMark Fasheh brelse(*ret_bh); 2378ccd979bdSMark Fasheh *ret_bh = NULL; 2379ccd979bdSMark Fasheh } 2380ccd979bdSMark Fasheh if (acquired) 2381e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2382ccd979bdSMark Fasheh } 2383ccd979bdSMark Fasheh 2384ccd979bdSMark Fasheh if (local_bh) 2385ccd979bdSMark Fasheh brelse(local_bh); 2386ccd979bdSMark Fasheh 2387ccd979bdSMark Fasheh return status; 2388ccd979bdSMark Fasheh } 2389ccd979bdSMark Fasheh 2390ccd979bdSMark Fasheh /* 239134d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 239234d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 239334d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2394ccd979bdSMark Fasheh * 2395ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2396ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2397ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2398ccd979bdSMark Fasheh * 239934d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 240034d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 240134d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 240234d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 240334d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 240434d024f8SMark Fasheh * immediately retry the aop call. 2405ccd979bdSMark Fasheh * 2406ccd979bdSMark Fasheh * We do a blocking lock and immediate unlock before returning, though, so that 2407ccd979bdSMark Fasheh * the lock has a great chance of being cached on this node by the time the VFS 2408ccd979bdSMark Fasheh * calls back to retry the aop. This has a potential to livelock as nodes 2409ccd979bdSMark Fasheh * ping locks back and forth, but that's a risk we're willing to take to avoid 2410ccd979bdSMark Fasheh * the lock inversion simply. 2411ccd979bdSMark Fasheh */ 2412e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2413ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2414ccd979bdSMark Fasheh int ex, 2415ccd979bdSMark Fasheh struct page *page) 2416ccd979bdSMark Fasheh { 2417ccd979bdSMark Fasheh int ret; 2418ccd979bdSMark Fasheh 2419e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2420ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2421ccd979bdSMark Fasheh unlock_page(page); 2422e63aecb6SMark Fasheh if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2423e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2424ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2425ccd979bdSMark Fasheh } 2426ccd979bdSMark Fasheh 2427ccd979bdSMark Fasheh return ret; 2428ccd979bdSMark Fasheh } 2429ccd979bdSMark Fasheh 2430e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 24317f1a37e3STiger Yang struct vfsmount *vfsmnt, 24327f1a37e3STiger Yang int *level) 24337f1a37e3STiger Yang { 24347f1a37e3STiger Yang int ret; 24357f1a37e3STiger Yang 2436e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 24377f1a37e3STiger Yang if (ret < 0) { 24387f1a37e3STiger Yang mlog_errno(ret); 24397f1a37e3STiger Yang return ret; 24407f1a37e3STiger Yang } 24417f1a37e3STiger Yang 24427f1a37e3STiger Yang /* 24437f1a37e3STiger Yang * If we should update atime, we will get EX lock, 24447f1a37e3STiger Yang * otherwise we just get PR lock. 24457f1a37e3STiger Yang */ 24467f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 24477f1a37e3STiger Yang struct buffer_head *bh = NULL; 24487f1a37e3STiger Yang 2449e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2450e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 24517f1a37e3STiger Yang if (ret < 0) { 24527f1a37e3STiger Yang mlog_errno(ret); 24537f1a37e3STiger Yang return ret; 24547f1a37e3STiger Yang } 24557f1a37e3STiger Yang *level = 1; 24567f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 24577f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 24587f1a37e3STiger Yang if (bh) 24597f1a37e3STiger Yang brelse(bh); 24607f1a37e3STiger Yang } else 24617f1a37e3STiger Yang *level = 0; 24627f1a37e3STiger Yang 24637f1a37e3STiger Yang return ret; 24647f1a37e3STiger Yang } 24657f1a37e3STiger Yang 2466e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2467ccd979bdSMark Fasheh int ex) 2468ccd979bdSMark Fasheh { 2469bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2470e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2471c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2472ccd979bdSMark Fasheh 2473b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2474b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2475ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2476ccd979bdSMark Fasheh 2477c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2478c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2479ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2480ccd979bdSMark Fasheh } 2481ccd979bdSMark Fasheh 2482df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 248383273932SSrinivas Eeda { 248483273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 248583273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 248683273932SSrinivas Eeda int status = 0; 248783273932SSrinivas Eeda 2488df152c24SSunil Mushran if (ocfs2_is_hard_readonly(osb)) 2489df152c24SSunil Mushran return -EROFS; 2490df152c24SSunil Mushran 2491df152c24SSunil Mushran if (ocfs2_mount_local(osb)) 2492df152c24SSunil Mushran return 0; 2493df152c24SSunil Mushran 249483273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 2495df152c24SSunil Mushran status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 249683273932SSrinivas Eeda if (status < 0) 249783273932SSrinivas Eeda return status; 249883273932SSrinivas Eeda 249983273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 25001c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 25011c520dfbSJoel Becker lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 250283273932SSrinivas Eeda *seqno = be32_to_cpu(lvb->lvb_os_seqno); 25033211949fSSunil Mushran else 25043211949fSSunil Mushran *seqno = osb->osb_orphan_scan.os_seqno + 1; 25053211949fSSunil Mushran 250683273932SSrinivas Eeda return status; 250783273932SSrinivas Eeda } 250883273932SSrinivas Eeda 2509df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 251083273932SSrinivas Eeda { 251183273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 251283273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 251383273932SSrinivas Eeda 2514df152c24SSunil Mushran if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 251583273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 251683273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 251783273932SSrinivas Eeda lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 251883273932SSrinivas Eeda lvb->lvb_os_seqno = cpu_to_be32(seqno); 2519df152c24SSunil Mushran ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2520df152c24SSunil Mushran } 252183273932SSrinivas Eeda } 252283273932SSrinivas Eeda 2523ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2524ccd979bdSMark Fasheh int ex) 2525ccd979bdSMark Fasheh { 2526c271c5c2SSunil Mushran int status = 0; 2527bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2528ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2529ccd979bdSMark Fasheh 2530ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2531ccd979bdSMark Fasheh return -EROFS; 2532ccd979bdSMark Fasheh 2533c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2534c271c5c2SSunil Mushran goto bail; 2535c271c5c2SSunil Mushran 2536ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2537ccd979bdSMark Fasheh if (status < 0) { 2538ccd979bdSMark Fasheh mlog_errno(status); 2539ccd979bdSMark Fasheh goto bail; 2540ccd979bdSMark Fasheh } 2541ccd979bdSMark Fasheh 2542ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2543ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2544ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2545ccd979bdSMark Fasheh * everything is up to the caller :) */ 2546ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2547ccd979bdSMark Fasheh if (status < 0) { 2548ccd979bdSMark Fasheh mlog_errno(status); 2549ccd979bdSMark Fasheh goto bail; 2550ccd979bdSMark Fasheh } 2551ccd979bdSMark Fasheh if (status) { 25528e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2553ccd979bdSMark Fasheh 2554ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2555ccd979bdSMark Fasheh 2556ccd979bdSMark Fasheh if (status < 0) 2557ccd979bdSMark Fasheh mlog_errno(status); 25588ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2559ccd979bdSMark Fasheh } 2560ccd979bdSMark Fasheh bail: 2561ccd979bdSMark Fasheh return status; 2562ccd979bdSMark Fasheh } 2563ccd979bdSMark Fasheh 2564ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2565ccd979bdSMark Fasheh int ex) 2566ccd979bdSMark Fasheh { 2567bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2568ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2569ccd979bdSMark Fasheh 2570c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2571ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2572ccd979bdSMark Fasheh } 2573ccd979bdSMark Fasheh 2574ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2575ccd979bdSMark Fasheh { 2576ccd979bdSMark Fasheh int status; 2577ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2578ccd979bdSMark Fasheh 2579ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2580ccd979bdSMark Fasheh return -EROFS; 2581ccd979bdSMark Fasheh 2582c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2583c271c5c2SSunil Mushran return 0; 2584c271c5c2SSunil Mushran 2585bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2586ccd979bdSMark Fasheh if (status < 0) 2587ccd979bdSMark Fasheh mlog_errno(status); 2588ccd979bdSMark Fasheh 2589ccd979bdSMark Fasheh return status; 2590ccd979bdSMark Fasheh } 2591ccd979bdSMark Fasheh 2592ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2593ccd979bdSMark Fasheh { 2594ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2595ccd979bdSMark Fasheh 2596c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2597bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2598ccd979bdSMark Fasheh } 2599ccd979bdSMark Fasheh 26006ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 26016ca497a8Swengang wang { 26026ca497a8Swengang wang int status; 26036ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 26046ca497a8Swengang wang 26056ca497a8Swengang wang if (ocfs2_is_hard_readonly(osb)) 26066ca497a8Swengang wang return -EROFS; 26076ca497a8Swengang wang 26086ca497a8Swengang wang if (ocfs2_mount_local(osb)) 26096ca497a8Swengang wang return 0; 26106ca497a8Swengang wang 26116ca497a8Swengang wang status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 26126ca497a8Swengang wang 0, 0); 26136ca497a8Swengang wang if (status < 0) 26146ca497a8Swengang wang mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 26156ca497a8Swengang wang 26166ca497a8Swengang wang return status; 26176ca497a8Swengang wang } 26186ca497a8Swengang wang 26196ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 26206ca497a8Swengang wang { 26216ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 26226ca497a8Swengang wang 26236ca497a8Swengang wang if (!ocfs2_mount_local(osb)) 26246ca497a8Swengang wang ocfs2_cluster_unlock(osb, lockres, 26256ca497a8Swengang wang ex ? LKM_EXMODE : LKM_PRMODE); 26266ca497a8Swengang wang } 26276ca497a8Swengang wang 2628d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2629d680efe9SMark Fasheh { 2630d680efe9SMark Fasheh int ret; 2631bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2632d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2633d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2634d680efe9SMark Fasheh 2635d680efe9SMark Fasheh BUG_ON(!dl); 2636d680efe9SMark Fasheh 263703efed8aSTiger Yang if (ocfs2_is_hard_readonly(osb)) { 263803efed8aSTiger Yang if (ex) 2639d680efe9SMark Fasheh return -EROFS; 264003efed8aSTiger Yang return 0; 264103efed8aSTiger Yang } 2642d680efe9SMark Fasheh 2643c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2644c271c5c2SSunil Mushran return 0; 2645c271c5c2SSunil Mushran 2646d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2647d680efe9SMark Fasheh if (ret < 0) 2648d680efe9SMark Fasheh mlog_errno(ret); 2649d680efe9SMark Fasheh 2650d680efe9SMark Fasheh return ret; 2651d680efe9SMark Fasheh } 2652d680efe9SMark Fasheh 2653d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2654d680efe9SMark Fasheh { 2655bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2656d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2657d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2658d680efe9SMark Fasheh 265903efed8aSTiger Yang if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 2660d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2661d680efe9SMark Fasheh } 2662d680efe9SMark Fasheh 2663ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2664ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2665ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2666ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2667ccd979bdSMark Fasheh { 2668ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2669ccd979bdSMark Fasheh 2670ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2671ccd979bdSMark Fasheh 2672ccd979bdSMark Fasheh kfree(dlm_debug); 2673ccd979bdSMark Fasheh } 2674ccd979bdSMark Fasheh 2675ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2676ccd979bdSMark Fasheh { 2677ccd979bdSMark Fasheh if (dlm_debug) 2678ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2679ccd979bdSMark Fasheh } 2680ccd979bdSMark Fasheh 2681ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2682ccd979bdSMark Fasheh { 2683ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2684ccd979bdSMark Fasheh } 2685ccd979bdSMark Fasheh 2686ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2687ccd979bdSMark Fasheh { 2688ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2689ccd979bdSMark Fasheh 2690ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2691ccd979bdSMark Fasheh if (!dlm_debug) { 2692ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2693ccd979bdSMark Fasheh goto out; 2694ccd979bdSMark Fasheh } 2695ccd979bdSMark Fasheh 2696ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2697ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2698ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2699ccd979bdSMark Fasheh out: 2700ccd979bdSMark Fasheh return dlm_debug; 2701ccd979bdSMark Fasheh } 2702ccd979bdSMark Fasheh 2703ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2704ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2705ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2706ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2707ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2708ccd979bdSMark Fasheh }; 2709ccd979bdSMark Fasheh 2710ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2711ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2712ccd979bdSMark Fasheh { 2713ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2714ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2715ccd979bdSMark Fasheh 2716ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2717ccd979bdSMark Fasheh 2718ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2719ccd979bdSMark Fasheh /* discover the head of the list */ 2720ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2721ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2722ccd979bdSMark Fasheh break; 2723ccd979bdSMark Fasheh } 2724ccd979bdSMark Fasheh 2725ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2726ccd979bdSMark Fasheh * l_ops field. */ 2727ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2728ccd979bdSMark Fasheh ret = iter; 2729ccd979bdSMark Fasheh break; 2730ccd979bdSMark Fasheh } 2731ccd979bdSMark Fasheh } 2732ccd979bdSMark Fasheh 2733ccd979bdSMark Fasheh return ret; 2734ccd979bdSMark Fasheh } 2735ccd979bdSMark Fasheh 2736ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2737ccd979bdSMark Fasheh { 2738ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2739ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2740ccd979bdSMark Fasheh 2741ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2742ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2743ccd979bdSMark Fasheh if (iter) { 2744ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2745ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2746ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2747ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2748ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2749ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2750ccd979bdSMark Fasheh * in them. */ 2751ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2752ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2753ccd979bdSMark Fasheh } 2754ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2755ccd979bdSMark Fasheh 2756ccd979bdSMark Fasheh return iter; 2757ccd979bdSMark Fasheh } 2758ccd979bdSMark Fasheh 2759ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2760ccd979bdSMark Fasheh { 2761ccd979bdSMark Fasheh } 2762ccd979bdSMark Fasheh 2763ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2764ccd979bdSMark Fasheh { 2765ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2766ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2767ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2768ccd979bdSMark Fasheh 2769ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2770ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2771ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2772ccd979bdSMark Fasheh if (iter) { 2773ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2774ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2775ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2776ccd979bdSMark Fasheh } 2777ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2778ccd979bdSMark Fasheh 2779ccd979bdSMark Fasheh return iter; 2780ccd979bdSMark Fasheh } 2781ccd979bdSMark Fasheh 27825bc970e8SSunil Mushran /* 27835bc970e8SSunil Mushran * Version is used by debugfs.ocfs2 to determine the format being used 27845bc970e8SSunil Mushran * 27855bc970e8SSunil Mushran * New in version 2 27865bc970e8SSunil Mushran * - Lock stats printed 27875bc970e8SSunil Mushran * New in version 3 27885bc970e8SSunil Mushran * - Max time in lock stats is in usecs (instead of nsecs) 27895bc970e8SSunil Mushran */ 27905bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3 2791ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2792ccd979bdSMark Fasheh { 2793ccd979bdSMark Fasheh int i; 2794ccd979bdSMark Fasheh char *lvb; 2795ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2796ccd979bdSMark Fasheh 2797ccd979bdSMark Fasheh if (!lockres) 2798ccd979bdSMark Fasheh return -EINVAL; 2799ccd979bdSMark Fasheh 2800d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2801d680efe9SMark Fasheh 2802d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2803d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2804d680efe9SMark Fasheh lockres->l_name, 2805d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2806d680efe9SMark Fasheh else 2807d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2808d680efe9SMark Fasheh 2809d680efe9SMark Fasheh seq_printf(m, "%d\t" 2810ccd979bdSMark Fasheh "0x%lx\t" 2811ccd979bdSMark Fasheh "0x%x\t" 2812ccd979bdSMark Fasheh "0x%x\t" 2813ccd979bdSMark Fasheh "%u\t" 2814ccd979bdSMark Fasheh "%u\t" 2815ccd979bdSMark Fasheh "%d\t" 2816ccd979bdSMark Fasheh "%d\t", 2817ccd979bdSMark Fasheh lockres->l_level, 2818ccd979bdSMark Fasheh lockres->l_flags, 2819ccd979bdSMark Fasheh lockres->l_action, 2820ccd979bdSMark Fasheh lockres->l_unlock_action, 2821ccd979bdSMark Fasheh lockres->l_ro_holders, 2822ccd979bdSMark Fasheh lockres->l_ex_holders, 2823ccd979bdSMark Fasheh lockres->l_requested, 2824ccd979bdSMark Fasheh lockres->l_blocking); 2825ccd979bdSMark Fasheh 2826ccd979bdSMark Fasheh /* Dump the raw LVB */ 28278f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2828ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2829ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2830ccd979bdSMark Fasheh 28318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 28325bc970e8SSunil Mushran # define lock_num_prmode(_l) ((_l)->l_lock_prmode.ls_gets) 28335bc970e8SSunil Mushran # define lock_num_exmode(_l) ((_l)->l_lock_exmode.ls_gets) 28345bc970e8SSunil Mushran # define lock_num_prmode_failed(_l) ((_l)->l_lock_prmode.ls_fail) 28355bc970e8SSunil Mushran # define lock_num_exmode_failed(_l) ((_l)->l_lock_exmode.ls_fail) 28365bc970e8SSunil Mushran # define lock_total_prmode(_l) ((_l)->l_lock_prmode.ls_total) 28375bc970e8SSunil Mushran # define lock_total_exmode(_l) ((_l)->l_lock_exmode.ls_total) 28385bc970e8SSunil Mushran # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) 28395bc970e8SSunil Mushran # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) 28405bc970e8SSunil Mushran # define lock_refresh(_l) ((_l)->l_lock_refresh) 28418ddb7b00SSunil Mushran #else 28425bc970e8SSunil Mushran # define lock_num_prmode(_l) (0) 28435bc970e8SSunil Mushran # define lock_num_exmode(_l) (0) 28448ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 28458ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 2846dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 2847dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 28488ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 28498ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 28508ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 28518ddb7b00SSunil Mushran #endif 28528ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 28535bc970e8SSunil Mushran seq_printf(m, "%u\t" 28545bc970e8SSunil Mushran "%u\t" 28558ddb7b00SSunil Mushran "%u\t" 28568ddb7b00SSunil Mushran "%u\t" 28578ddb7b00SSunil Mushran "%llu\t" 28588ddb7b00SSunil Mushran "%llu\t" 28598ddb7b00SSunil Mushran "%u\t" 28608ddb7b00SSunil Mushran "%u\t" 28618ddb7b00SSunil Mushran "%u\t", 28628ddb7b00SSunil Mushran lock_num_prmode(lockres), 28638ddb7b00SSunil Mushran lock_num_exmode(lockres), 28648ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 28658ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 28668ddb7b00SSunil Mushran lock_total_prmode(lockres), 28678ddb7b00SSunil Mushran lock_total_exmode(lockres), 28688ddb7b00SSunil Mushran lock_max_prmode(lockres), 28698ddb7b00SSunil Mushran lock_max_exmode(lockres), 28708ddb7b00SSunil Mushran lock_refresh(lockres)); 28718ddb7b00SSunil Mushran 2872ccd979bdSMark Fasheh /* End the line */ 2873ccd979bdSMark Fasheh seq_printf(m, "\n"); 2874ccd979bdSMark Fasheh return 0; 2875ccd979bdSMark Fasheh } 2876ccd979bdSMark Fasheh 287790d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2878ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2879ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2880ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2881ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2882ccd979bdSMark Fasheh }; 2883ccd979bdSMark Fasheh 2884ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2885ccd979bdSMark Fasheh { 288633fa1d90SJoe Perches struct seq_file *seq = file->private_data; 2887ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2888ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2889ccd979bdSMark Fasheh 2890ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2891ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2892ccd979bdSMark Fasheh return seq_release_private(inode, file); 2893ccd979bdSMark Fasheh } 2894ccd979bdSMark Fasheh 2895ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2896ccd979bdSMark Fasheh { 2897ccd979bdSMark Fasheh int ret; 2898ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2899ccd979bdSMark Fasheh struct seq_file *seq; 2900ccd979bdSMark Fasheh struct ocfs2_super *osb; 2901ccd979bdSMark Fasheh 2902ccd979bdSMark Fasheh priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2903ccd979bdSMark Fasheh if (!priv) { 2904ccd979bdSMark Fasheh ret = -ENOMEM; 2905ccd979bdSMark Fasheh mlog_errno(ret); 2906ccd979bdSMark Fasheh goto out; 2907ccd979bdSMark Fasheh } 29088e18e294STheodore Ts'o osb = inode->i_private; 2909ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2910ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2911ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2912ccd979bdSMark Fasheh 2913ccd979bdSMark Fasheh ret = seq_open(file, &ocfs2_dlm_seq_ops); 2914ccd979bdSMark Fasheh if (ret) { 2915ccd979bdSMark Fasheh kfree(priv); 2916ccd979bdSMark Fasheh mlog_errno(ret); 2917ccd979bdSMark Fasheh goto out; 2918ccd979bdSMark Fasheh } 2919ccd979bdSMark Fasheh 292033fa1d90SJoe Perches seq = file->private_data; 2921ccd979bdSMark Fasheh seq->private = priv; 2922ccd979bdSMark Fasheh 2923ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2924ccd979bdSMark Fasheh priv->p_dlm_debug); 2925ccd979bdSMark Fasheh 2926ccd979bdSMark Fasheh out: 2927ccd979bdSMark Fasheh return ret; 2928ccd979bdSMark Fasheh } 2929ccd979bdSMark Fasheh 29304b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2931ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2932ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2933ccd979bdSMark Fasheh .read = seq_read, 2934ccd979bdSMark Fasheh .llseek = seq_lseek, 2935ccd979bdSMark Fasheh }; 2936ccd979bdSMark Fasheh 2937ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2938ccd979bdSMark Fasheh { 2939ccd979bdSMark Fasheh int ret = 0; 2940ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2941ccd979bdSMark Fasheh 2942ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2943ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2944ccd979bdSMark Fasheh osb->osb_debug_root, 2945ccd979bdSMark Fasheh osb, 2946ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 2947ccd979bdSMark Fasheh if (!dlm_debug->d_locking_state) { 2948ccd979bdSMark Fasheh ret = -EINVAL; 2949ccd979bdSMark Fasheh mlog(ML_ERROR, 2950ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2951ccd979bdSMark Fasheh goto out; 2952ccd979bdSMark Fasheh } 2953ccd979bdSMark Fasheh 2954ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2955ccd979bdSMark Fasheh out: 2956ccd979bdSMark Fasheh return ret; 2957ccd979bdSMark Fasheh } 2958ccd979bdSMark Fasheh 2959ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2960ccd979bdSMark Fasheh { 2961ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2962ccd979bdSMark Fasheh 2963ccd979bdSMark Fasheh if (dlm_debug) { 2964ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2965ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2966ccd979bdSMark Fasheh } 2967ccd979bdSMark Fasheh } 2968ccd979bdSMark Fasheh 2969ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2970ccd979bdSMark Fasheh { 2971c271c5c2SSunil Mushran int status = 0; 29724670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 2973ccd979bdSMark Fasheh 29740abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 29750abd6d18SMark Fasheh osb->node_num = 0; 2976c271c5c2SSunil Mushran goto local; 29770abd6d18SMark Fasheh } 2978c271c5c2SSunil Mushran 2979ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2980ccd979bdSMark Fasheh if (status < 0) { 2981ccd979bdSMark Fasheh mlog_errno(status); 2982ccd979bdSMark Fasheh goto bail; 2983ccd979bdSMark Fasheh } 2984ccd979bdSMark Fasheh 298534d024f8SMark Fasheh /* launch downconvert thread */ 298634d024f8SMark Fasheh osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 298734d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 298834d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 298934d024f8SMark Fasheh osb->dc_task = NULL; 2990ccd979bdSMark Fasheh mlog_errno(status); 2991ccd979bdSMark Fasheh goto bail; 2992ccd979bdSMark Fasheh } 2993ccd979bdSMark Fasheh 2994ccd979bdSMark Fasheh /* for now, uuid == domain */ 29959c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 29969c6c877cSJoel Becker osb->uuid_str, 29974670c46dSJoel Becker strlen(osb->uuid_str), 2998553b5eb9SJoel Becker &lproto, ocfs2_do_node_down, osb, 29994670c46dSJoel Becker &conn); 30004670c46dSJoel Becker if (status) { 3001ccd979bdSMark Fasheh mlog_errno(status); 3002ccd979bdSMark Fasheh goto bail; 3003ccd979bdSMark Fasheh } 3004ccd979bdSMark Fasheh 30050abd6d18SMark Fasheh status = ocfs2_cluster_this_node(&osb->node_num); 30060abd6d18SMark Fasheh if (status < 0) { 30070abd6d18SMark Fasheh mlog_errno(status); 30080abd6d18SMark Fasheh mlog(ML_ERROR, 30090abd6d18SMark Fasheh "could not find this host's node number\n"); 3010286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 30110abd6d18SMark Fasheh goto bail; 30120abd6d18SMark Fasheh } 30130abd6d18SMark Fasheh 3014c271c5c2SSunil Mushran local: 3015ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3016ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 30176ca497a8Swengang wang ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 301883273932SSrinivas Eeda ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3019ccd979bdSMark Fasheh 30204670c46dSJoel Becker osb->cconn = conn; 3021ccd979bdSMark Fasheh 3022ccd979bdSMark Fasheh status = 0; 3023ccd979bdSMark Fasheh bail: 3024ccd979bdSMark Fasheh if (status < 0) { 3025ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 302634d024f8SMark Fasheh if (osb->dc_task) 302734d024f8SMark Fasheh kthread_stop(osb->dc_task); 3028ccd979bdSMark Fasheh } 3029ccd979bdSMark Fasheh 3030ccd979bdSMark Fasheh return status; 3031ccd979bdSMark Fasheh } 3032ccd979bdSMark Fasheh 3033286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3034286eaa95SJoel Becker int hangup_pending) 3035ccd979bdSMark Fasheh { 3036ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 3037ccd979bdSMark Fasheh 30384670c46dSJoel Becker /* 30394670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 30404670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 30414670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 30424670c46dSJoel Becker */ 30434670c46dSJoel Becker 304434d024f8SMark Fasheh if (osb->dc_task) { 304534d024f8SMark Fasheh kthread_stop(osb->dc_task); 304634d024f8SMark Fasheh osb->dc_task = NULL; 3047ccd979bdSMark Fasheh } 3048ccd979bdSMark Fasheh 3049ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 3050ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 30516ca497a8Swengang wang ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 305283273932SSrinivas Eeda ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3053ccd979bdSMark Fasheh 3054286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 30554670c46dSJoel Becker osb->cconn = NULL; 3056ccd979bdSMark Fasheh 3057ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 3058ccd979bdSMark Fasheh } 3059ccd979bdSMark Fasheh 3060ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 30610d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 3062ccd979bdSMark Fasheh { 30637431cd7eSJoel Becker int ret; 3064ccd979bdSMark Fasheh unsigned long flags; 3065bd3e7610SJoel Becker u32 lkm_flags = 0; 3066ccd979bdSMark Fasheh 3067ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 3068ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3069ccd979bdSMark Fasheh goto out; 3070ccd979bdSMark Fasheh 3071b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3072bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 3073b80fc012SMark Fasheh 3074ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3075ccd979bdSMark Fasheh 3076ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3077ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 3078ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3079ccd979bdSMark Fasheh 3080ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3081ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3082ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 3083ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 3084ccd979bdSMark Fasheh lockres->l_unlock_action); 3085ccd979bdSMark Fasheh 3086ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3087ccd979bdSMark Fasheh 3088ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 3089ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 3090ccd979bdSMark Fasheh * future? */ 3091ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3092ccd979bdSMark Fasheh 3093ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3094ccd979bdSMark Fasheh } 3095ccd979bdSMark Fasheh 30960d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 30970d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3098bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 30990d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 31000d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 31010d5dc6c2SMark Fasheh } 3102ccd979bdSMark Fasheh 3103ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 3104ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3105ccd979bdSMark Fasheh lockres->l_name); 3106ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3107ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3108ccd979bdSMark Fasheh 3109ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3110ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3111ccd979bdSMark Fasheh goto out; 3112ccd979bdSMark Fasheh } 3113ccd979bdSMark Fasheh 3114ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3115ccd979bdSMark Fasheh 3116ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 3117ccd979bdSMark Fasheh * fire. */ 3118ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3119ccd979bdSMark Fasheh 3120ccd979bdSMark Fasheh /* is this necessary? */ 3121ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3122ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3123ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3124ccd979bdSMark Fasheh 3125ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3126ccd979bdSMark Fasheh 3127a796d286SJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 31287431cd7eSJoel Becker if (ret) { 31297431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3130ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3131cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3132ccd979bdSMark Fasheh BUG(); 3133ccd979bdSMark Fasheh } 313473ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3135ccd979bdSMark Fasheh lockres->l_name); 3136ccd979bdSMark Fasheh 3137ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3138ccd979bdSMark Fasheh out: 3139ccd979bdSMark Fasheh return 0; 3140ccd979bdSMark Fasheh } 3141ccd979bdSMark Fasheh 3142ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 3143ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 314434d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 3145ccd979bdSMark Fasheh * it safe to drop. 3146ccd979bdSMark Fasheh * 3147ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3148ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3149ccd979bdSMark Fasheh { 3150ccd979bdSMark Fasheh int status; 3151ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 3152ccd979bdSMark Fasheh unsigned long flags; 3153ccd979bdSMark Fasheh 3154ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 3155ccd979bdSMark Fasheh 3156ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3157ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 3158ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3159ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3160ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3161ccd979bdSMark Fasheh 3162ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3163ccd979bdSMark Fasheh 3164ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 3165ccd979bdSMark Fasheh if (status) 3166ccd979bdSMark Fasheh mlog_errno(status); 3167ccd979bdSMark Fasheh 3168ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3169ccd979bdSMark Fasheh } 3170ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3171ccd979bdSMark Fasheh } 3172ccd979bdSMark Fasheh 3173d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3174d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3175d680efe9SMark Fasheh { 3176d680efe9SMark Fasheh int ret; 3177d680efe9SMark Fasheh 3178d680efe9SMark Fasheh ocfs2_mark_lockres_freeing(lockres); 31790d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 3180d680efe9SMark Fasheh if (ret) 3181d680efe9SMark Fasheh mlog_errno(ret); 3182d680efe9SMark Fasheh } 3183d680efe9SMark Fasheh 3184ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3185ccd979bdSMark Fasheh { 3186d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3187d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 31886ca497a8Swengang wang ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 318983273932SSrinivas Eeda ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3190ccd979bdSMark Fasheh } 3191ccd979bdSMark Fasheh 3192ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 3193ccd979bdSMark Fasheh { 3194ccd979bdSMark Fasheh int status, err; 3195ccd979bdSMark Fasheh 3196ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 3197ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 3198ccd979bdSMark Fasheh 3199ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 320050008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 3201ccd979bdSMark Fasheh if (err < 0) 3202ccd979bdSMark Fasheh mlog_errno(err); 3203ccd979bdSMark Fasheh 3204ccd979bdSMark Fasheh status = err; 3205ccd979bdSMark Fasheh 3206ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3207e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 3208ccd979bdSMark Fasheh if (err < 0) 3209ccd979bdSMark Fasheh mlog_errno(err); 3210ccd979bdSMark Fasheh if (err < 0 && !status) 3211ccd979bdSMark Fasheh status = err; 3212ccd979bdSMark Fasheh 3213ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 32140d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3215ccd979bdSMark Fasheh if (err < 0) 3216ccd979bdSMark Fasheh mlog_errno(err); 3217ccd979bdSMark Fasheh if (err < 0 && !status) 3218ccd979bdSMark Fasheh status = err; 3219ccd979bdSMark Fasheh 3220ccd979bdSMark Fasheh return status; 3221ccd979bdSMark Fasheh } 3222ccd979bdSMark Fasheh 3223de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3224ccd979bdSMark Fasheh int new_level) 3225ccd979bdSMark Fasheh { 3226ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3227ccd979bdSMark Fasheh 3228bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3229ccd979bdSMark Fasheh 3230ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 32319b915181SSunil Mushran mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, " 32329b915181SSunil Mushran "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, " 32339b915181SSunil Mushran "block %d, pgen %d\n", lockres->l_name, lockres->l_level, 32349b915181SSunil Mushran new_level, list_empty(&lockres->l_blocked_list), 32359b915181SSunil Mushran list_empty(&lockres->l_mask_waiters), lockres->l_type, 32369b915181SSunil Mushran lockres->l_flags, lockres->l_ro_holders, 32379b915181SSunil Mushran lockres->l_ex_holders, lockres->l_action, 32389b915181SSunil Mushran lockres->l_unlock_action, lockres->l_requested, 32399b915181SSunil Mushran lockres->l_blocking, lockres->l_pending_gen); 3240ccd979bdSMark Fasheh BUG(); 3241ccd979bdSMark Fasheh } 3242ccd979bdSMark Fasheh 32439b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n", 32449b915181SSunil Mushran lockres->l_name, lockres->l_level, new_level, lockres->l_blocking); 3245ccd979bdSMark Fasheh 3246ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3247ccd979bdSMark Fasheh lockres->l_requested = new_level; 3248ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3249de551246SJoel Becker return lockres_set_pending(lockres); 3250ccd979bdSMark Fasheh } 3251ccd979bdSMark Fasheh 3252ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3253ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3254ccd979bdSMark Fasheh int new_level, 3255de551246SJoel Becker int lvb, 3256de551246SJoel Becker unsigned int generation) 3257ccd979bdSMark Fasheh { 3258bd3e7610SJoel Becker int ret; 3259bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3260ccd979bdSMark Fasheh 32619b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, 32629b915181SSunil Mushran lockres->l_level, new_level); 32639b915181SSunil Mushran 3264ccd979bdSMark Fasheh if (lvb) 3265bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3266ccd979bdSMark Fasheh 32674670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3268ccd979bdSMark Fasheh new_level, 3269ccd979bdSMark Fasheh &lockres->l_lksb, 3270ccd979bdSMark Fasheh dlm_flags, 3271ccd979bdSMark Fasheh lockres->l_name, 3272a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 3273de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 32747431cd7eSJoel Becker if (ret) { 32757431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3276ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3277ccd979bdSMark Fasheh goto bail; 3278ccd979bdSMark Fasheh } 3279ccd979bdSMark Fasheh 3280ccd979bdSMark Fasheh ret = 0; 3281ccd979bdSMark Fasheh bail: 3282ccd979bdSMark Fasheh return ret; 3283ccd979bdSMark Fasheh } 3284ccd979bdSMark Fasheh 328524ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3286ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3287ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3288ccd979bdSMark Fasheh { 3289ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3290ccd979bdSMark Fasheh 3291ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3292ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3293ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3294ccd979bdSMark Fasheh * requeue this lock. */ 32959b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name); 3296ccd979bdSMark Fasheh return 0; 3297ccd979bdSMark Fasheh } 3298ccd979bdSMark Fasheh 3299ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3300ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3301ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3302ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3303ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3304ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3305ccd979bdSMark Fasheh 3306ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3307ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3308ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3309ccd979bdSMark Fasheh 33109b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 33119b915181SSunil Mushran 3312ccd979bdSMark Fasheh return 1; 3313ccd979bdSMark Fasheh } 3314ccd979bdSMark Fasheh 3315ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3316ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3317ccd979bdSMark Fasheh { 3318ccd979bdSMark Fasheh int ret; 3319ccd979bdSMark Fasheh 33204670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3321a796d286SJoel Becker DLM_LKF_CANCEL); 33227431cd7eSJoel Becker if (ret) { 33237431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3324ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3325ccd979bdSMark Fasheh } 3326ccd979bdSMark Fasheh 33279b915181SSunil Mushran mlog(ML_BASTS, "lockres %s\n", lockres->l_name); 3328ccd979bdSMark Fasheh 3329ccd979bdSMark Fasheh return ret; 3330ccd979bdSMark Fasheh } 3331ccd979bdSMark Fasheh 3332b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3333ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3334cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3335ccd979bdSMark Fasheh { 3336ccd979bdSMark Fasheh unsigned long flags; 3337ccd979bdSMark Fasheh int blocking; 3338ccd979bdSMark Fasheh int new_level; 3339079b8057SSunil Mushran int level; 3340ccd979bdSMark Fasheh int ret = 0; 33415ef0d4eaSMark Fasheh int set_lvb = 0; 3342de551246SJoel Becker unsigned int gen; 3343ccd979bdSMark Fasheh 3344ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3345ccd979bdSMark Fasheh 3346ccd979bdSMark Fasheh recheck: 3347db0f6ce6SSunil Mushran /* 3348db0f6ce6SSunil Mushran * Is it still blocking? If not, we have no more work to do. 3349db0f6ce6SSunil Mushran */ 3350db0f6ce6SSunil Mushran if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { 3351db0f6ce6SSunil Mushran BUG_ON(lockres->l_blocking != DLM_LOCK_NL); 3352db0f6ce6SSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 3353db0f6ce6SSunil Mushran ret = 0; 3354db0f6ce6SSunil Mushran goto leave; 3355db0f6ce6SSunil Mushran } 3356db0f6ce6SSunil Mushran 3357ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3358de551246SJoel Becker /* XXX 3359de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3360de551246SJoel Becker * exists entirely for one reason - another thread has set 3361de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3362de551246SJoel Becker * 3363de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3364de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3365de551246SJoel Becker * get no ast, and we will have no way of knowing the 3366de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3367de551246SJoel Becker * into dlm_lock() and wait...forever. 3368de551246SJoel Becker * 3369de551246SJoel Becker * Why forever? Because another node has asked for the 3370de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3371de551246SJoel Becker * 3372de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3373de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3374de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3375de551246SJoel Becker * we then cancel their request. 3376de551246SJoel Becker * 3377de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3378de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3379de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3380de551246SJoel Becker */ 33819b915181SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_PENDING) { 33829b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Pending\n", 33839b915181SSunil Mushran lockres->l_name); 3384de551246SJoel Becker goto leave_requeue; 33859b915181SSunil Mushran } 3386de551246SJoel Becker 3387d680efe9SMark Fasheh ctl->requeue = 1; 3388ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3389ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3390ccd979bdSMark Fasheh if (ret) { 3391ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3392ccd979bdSMark Fasheh if (ret < 0) 3393ccd979bdSMark Fasheh mlog_errno(ret); 3394ccd979bdSMark Fasheh } 3395ccd979bdSMark Fasheh goto leave; 3396ccd979bdSMark Fasheh } 3397ccd979bdSMark Fasheh 3398a1912826SSunil Mushran /* 3399a1912826SSunil Mushran * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is 3400a1912826SSunil Mushran * set when the ast is received for an upconvert just before the 3401a1912826SSunil Mushran * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast 3402a1912826SSunil Mushran * on the heels of the ast, we want to delay the downconvert just 3403a1912826SSunil Mushran * enough to allow the up requestor to do its task. Because this 3404a1912826SSunil Mushran * lock is in the blocked queue, the lock will be downconverted 3405a1912826SSunil Mushran * as soon as the requestor is done with the lock. 3406a1912826SSunil Mushran */ 3407a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) 3408a1912826SSunil Mushran goto leave_requeue; 3409a1912826SSunil Mushran 34100d74125aSSunil Mushran /* 34110d74125aSSunil Mushran * How can we block and yet be at NL? We were trying to upconvert 34120d74125aSSunil Mushran * from NL and got canceled. The code comes back here, and now 34130d74125aSSunil Mushran * we notice and clear BLOCKING. 34140d74125aSSunil Mushran */ 34150d74125aSSunil Mushran if (lockres->l_level == DLM_LOCK_NL) { 34160d74125aSSunil Mushran BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 34179b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name); 34180d74125aSSunil Mushran lockres->l_blocking = DLM_LOCK_NL; 34190d74125aSSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 34200d74125aSSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 34210d74125aSSunil Mushran goto leave; 34220d74125aSSunil Mushran } 34230d74125aSSunil Mushran 3424ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3425ccd979bdSMark Fasheh * then requeue. */ 3426bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 34279b915181SSunil Mushran && (lockres->l_ex_holders || lockres->l_ro_holders)) { 34289b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n", 34299b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders, 34309b915181SSunil Mushran lockres->l_ro_holders); 3431f7fbfdd1SMark Fasheh goto leave_requeue; 34329b915181SSunil Mushran } 3433ccd979bdSMark Fasheh 3434ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3435ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3436bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 34379b915181SSunil Mushran lockres->l_ex_holders) { 34389b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n", 34399b915181SSunil Mushran lockres->l_name, lockres->l_ex_holders); 3440f7fbfdd1SMark Fasheh goto leave_requeue; 34419b915181SSunil Mushran } 3442f7fbfdd1SMark Fasheh 3443f7fbfdd1SMark Fasheh /* 3444f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3445f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3446f7fbfdd1SMark Fasheh */ 3447f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 34489b915181SSunil Mushran && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) { 34499b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n", 34509b915181SSunil Mushran lockres->l_name); 3451f7fbfdd1SMark Fasheh goto leave_requeue; 34529b915181SSunil Mushran } 3453ccd979bdSMark Fasheh 345416d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 345516d5b956SMark Fasheh 345616d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 34579b915181SSunil Mushran && !lockres->l_ops->check_downconvert(lockres, new_level)) { 34589b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n", 34599b915181SSunil Mushran lockres->l_name); 346016d5b956SMark Fasheh goto leave_requeue; 34619b915181SSunil Mushran } 346216d5b956SMark Fasheh 3463ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3464ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3465ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3466cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3467ccd979bdSMark Fasheh goto downconvert; 3468ccd979bdSMark Fasheh 3469ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3470ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3471ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3472ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3473ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3474079b8057SSunil Mushran level = lockres->l_level; 3475ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3476ccd979bdSMark Fasheh 3477cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3478d680efe9SMark Fasheh 34799b915181SSunil Mushran if (ctl->unblock_action == UNBLOCK_STOP_POST) { 34809b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n", 34819b915181SSunil Mushran lockres->l_name); 3482d680efe9SMark Fasheh goto leave; 34839b915181SSunil Mushran } 3484ccd979bdSMark Fasheh 3485ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3486079b8057SSunil Mushran if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3487ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3488ccd979bdSMark Fasheh * it just yet. */ 34899b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, " 34909b915181SSunil Mushran "Recheck\n", lockres->l_name, blocking, 34919b915181SSunil Mushran lockres->l_blocking, level, lockres->l_level); 3492ccd979bdSMark Fasheh goto recheck; 3493ccd979bdSMark Fasheh } 3494ccd979bdSMark Fasheh 3495ccd979bdSMark Fasheh downconvert: 3496d680efe9SMark Fasheh ctl->requeue = 0; 3497ccd979bdSMark Fasheh 34985ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3499bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 35005ef0d4eaSMark Fasheh set_lvb = 1; 35015ef0d4eaSMark Fasheh 35025ef0d4eaSMark Fasheh /* 35035ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 35045ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 35055ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 35065ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 35075ef0d4eaSMark Fasheh */ 35085ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 35095ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 35105ef0d4eaSMark Fasheh } 35115ef0d4eaSMark Fasheh 3512de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3513ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3514de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3515de551246SJoel Becker gen); 3516de551246SJoel Becker 3517ccd979bdSMark Fasheh leave: 3518c1e8d35eSTao Ma if (ret) 3519c1e8d35eSTao Ma mlog_errno(ret); 3520ccd979bdSMark Fasheh return ret; 3521f7fbfdd1SMark Fasheh 3522f7fbfdd1SMark Fasheh leave_requeue: 3523f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3524f7fbfdd1SMark Fasheh ctl->requeue = 1; 3525f7fbfdd1SMark Fasheh 3526f7fbfdd1SMark Fasheh return 0; 3527ccd979bdSMark Fasheh } 3528ccd979bdSMark Fasheh 3529d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3530ccd979bdSMark Fasheh int blocking) 3531ccd979bdSMark Fasheh { 3532ccd979bdSMark Fasheh struct inode *inode; 3533ccd979bdSMark Fasheh struct address_space *mapping; 35345e98d492SGoldwyn Rodrigues struct ocfs2_inode_info *oi; 3535ccd979bdSMark Fasheh 3536ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3537ccd979bdSMark Fasheh mapping = inode->i_mapping; 3538ccd979bdSMark Fasheh 35395e98d492SGoldwyn Rodrigues if (S_ISDIR(inode->i_mode)) { 35405e98d492SGoldwyn Rodrigues oi = OCFS2_I(inode); 35415e98d492SGoldwyn Rodrigues oi->ip_dir_lock_gen++; 35425e98d492SGoldwyn Rodrigues mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); 35435e98d492SGoldwyn Rodrigues goto out; 35445e98d492SGoldwyn Rodrigues } 35455e98d492SGoldwyn Rodrigues 35461044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3547f1f54068SMark Fasheh goto out; 3548f1f54068SMark Fasheh 35497f4a2a97SMark Fasheh /* 35507f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 35517f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 35527f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 35537f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 35547f4a2a97SMark Fasheh * them up again. 35557f4a2a97SMark Fasheh */ 35567f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 35577f4a2a97SMark Fasheh 3558ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3559b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3560b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3561ccd979bdSMark Fasheh } 3562ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3563bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3564ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3565ccd979bdSMark Fasheh } else { 3566ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3567ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3568ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3569ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3570ccd979bdSMark Fasheh * them around in that case. */ 3571ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3572ccd979bdSMark Fasheh } 3573ccd979bdSMark Fasheh 3574f1f54068SMark Fasheh out: 3575d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3576ccd979bdSMark Fasheh } 3577ccd979bdSMark Fasheh 3578a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3579a4338481STao Ma struct ocfs2_lock_res *lockres, 3580810d5aebSMark Fasheh int new_level) 3581810d5aebSMark Fasheh { 3582a4338481STao Ma int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3583810d5aebSMark Fasheh 3584bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3585bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3586810d5aebSMark Fasheh 3587810d5aebSMark Fasheh if (checkpointed) 3588810d5aebSMark Fasheh return 1; 3589810d5aebSMark Fasheh 3590a4338481STao Ma ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3591810d5aebSMark Fasheh return 0; 3592810d5aebSMark Fasheh } 3593810d5aebSMark Fasheh 3594a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3595a4338481STao Ma int new_level) 3596a4338481STao Ma { 3597a4338481STao Ma struct inode *inode = ocfs2_lock_res_inode(lockres); 3598a4338481STao Ma 3599a4338481STao Ma return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3600a4338481STao Ma } 3601a4338481STao Ma 3602810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3603810d5aebSMark Fasheh { 3604810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3605810d5aebSMark Fasheh 3606810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3607810d5aebSMark Fasheh } 3608810d5aebSMark Fasheh 3609d680efe9SMark Fasheh /* 3610d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 361134d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3612d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3613d680efe9SMark Fasheh */ 3614d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3615d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3616d680efe9SMark Fasheh { 3617d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3618d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3619d680efe9SMark Fasheh } 3620d680efe9SMark Fasheh 3621d680efe9SMark Fasheh /* 3622d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3623d680efe9SMark Fasheh * 3624d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3625d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3626d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3627d680efe9SMark Fasheh * 3628d680efe9SMark Fasheh * We have two potential problems 3629d680efe9SMark Fasheh * 3630d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3631d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3632d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3633d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3634d680efe9SMark Fasheh * unblock processing. 3635d680efe9SMark Fasheh * 3636d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3637d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3638d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3639d680efe9SMark Fasheh */ 3640d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3641d680efe9SMark Fasheh int blocking) 3642d680efe9SMark Fasheh { 3643d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3644d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3645d680efe9SMark Fasheh struct dentry *dentry; 3646d680efe9SMark Fasheh unsigned long flags; 3647d680efe9SMark Fasheh int extra_ref = 0; 3648d680efe9SMark Fasheh 3649d680efe9SMark Fasheh /* 3650d680efe9SMark Fasheh * This node is blocking another node from getting a read 3651d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3652d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3653d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3654d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3655d680efe9SMark Fasheh * so there's no further work to do. 3656d680efe9SMark Fasheh */ 3657bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3658d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3659d680efe9SMark Fasheh 3660d680efe9SMark Fasheh /* 3661d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3662d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3663d680efe9SMark Fasheh * needs to be freed or not. 3664d680efe9SMark Fasheh */ 3665d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3666d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3667d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3668d680efe9SMark Fasheh 3669d680efe9SMark Fasheh /* 3670d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3671d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3672d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3673d680efe9SMark Fasheh * flag. 3674d680efe9SMark Fasheh */ 3675d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3676d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3677d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3678d680efe9SMark Fasheh && dl->dl_count) { 3679d680efe9SMark Fasheh dl->dl_count++; 3680d680efe9SMark Fasheh extra_ref = 1; 3681d680efe9SMark Fasheh } 3682d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3683d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3684d680efe9SMark Fasheh 3685d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3686d680efe9SMark Fasheh 3687d680efe9SMark Fasheh /* 3688d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3689d680efe9SMark Fasheh * which means we can't have any more outstanding 3690d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3691d680efe9SMark Fasheh */ 3692d680efe9SMark Fasheh if (!extra_ref) 3693d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3694d680efe9SMark Fasheh 3695d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3696d680efe9SMark Fasheh while (1) { 3697d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3698d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3699d680efe9SMark Fasheh if (!dentry) 3700d680efe9SMark Fasheh break; 3701d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3702d680efe9SMark Fasheh 3703d680efe9SMark Fasheh mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3704d680efe9SMark Fasheh dentry->d_name.name); 3705d680efe9SMark Fasheh 3706d680efe9SMark Fasheh /* 3707d680efe9SMark Fasheh * The following dcache calls may do an 3708d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3709d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3710d680efe9SMark Fasheh * because the requesting node already has an 3711d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3712d680efe9SMark Fasheh * for a downconvert. 3713d680efe9SMark Fasheh */ 3714d680efe9SMark Fasheh d_delete(dentry); 3715d680efe9SMark Fasheh dput(dentry); 3716d680efe9SMark Fasheh 3717d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3718d680efe9SMark Fasheh } 3719d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3720d680efe9SMark Fasheh 3721d680efe9SMark Fasheh /* 3722d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3723d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3724d680efe9SMark Fasheh */ 3725d680efe9SMark Fasheh if (dl->dl_count == 1) 3726d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3727d680efe9SMark Fasheh 3728d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3729d680efe9SMark Fasheh } 3730d680efe9SMark Fasheh 37318dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 37328dec98edSTao Ma int new_level) 37338dec98edSTao Ma { 37348dec98edSTao Ma struct ocfs2_refcount_tree *tree = 37358dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 37368dec98edSTao Ma 37378dec98edSTao Ma return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 37388dec98edSTao Ma } 37398dec98edSTao Ma 37408dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 37418dec98edSTao Ma int blocking) 37428dec98edSTao Ma { 37438dec98edSTao Ma struct ocfs2_refcount_tree *tree = 37448dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 37458dec98edSTao Ma 37468dec98edSTao Ma ocfs2_metadata_cache_purge(&tree->rf_ci); 37478dec98edSTao Ma 37488dec98edSTao Ma return UNBLOCK_CONTINUE; 37498dec98edSTao Ma } 37508dec98edSTao Ma 37519e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 37529e33d69fSJan Kara { 37539e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb; 37549e33d69fSJan Kara struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 37559e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 37569e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 37579e33d69fSJan Kara 3758a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 37599e33d69fSJan Kara lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 37609e33d69fSJan Kara lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 37619e33d69fSJan Kara lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 37629e33d69fSJan Kara lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 37639e33d69fSJan Kara lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 37649e33d69fSJan Kara lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 37659e33d69fSJan Kara lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 37669e33d69fSJan Kara } 37679e33d69fSJan Kara 37689e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 37699e33d69fSJan Kara { 37709e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 37719e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 37729e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 37739e33d69fSJan Kara 37749e33d69fSJan Kara if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 37759e33d69fSJan Kara ocfs2_cluster_unlock(osb, lockres, level); 37769e33d69fSJan Kara } 37779e33d69fSJan Kara 37789e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 37799e33d69fSJan Kara { 37809e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 37819e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 37829e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 37839e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 378485eb8b73SJoel Becker struct buffer_head *bh = NULL; 37859e33d69fSJan Kara struct ocfs2_global_disk_dqinfo *gdinfo; 37869e33d69fSJan Kara int status = 0; 37879e33d69fSJan Kara 37881c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 37891c520dfbSJoel Becker lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 37909e33d69fSJan Kara info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 37919e33d69fSJan Kara info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 37929e33d69fSJan Kara oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 37939e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 37949e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 37959e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 37969e33d69fSJan Kara be32_to_cpu(lvb->lvb_free_entry); 37979e33d69fSJan Kara } else { 3798ae4f6ef1SJan Kara status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode, 3799ae4f6ef1SJan Kara oinfo->dqi_giblk, &bh); 380085eb8b73SJoel Becker if (status) { 38019e33d69fSJan Kara mlog_errno(status); 38029e33d69fSJan Kara goto bail; 38039e33d69fSJan Kara } 38049e33d69fSJan Kara gdinfo = (struct ocfs2_global_disk_dqinfo *) 38059e33d69fSJan Kara (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 38069e33d69fSJan Kara info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 38079e33d69fSJan Kara info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 38089e33d69fSJan Kara oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 38099e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 38109e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 38119e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 38129e33d69fSJan Kara le32_to_cpu(gdinfo->dqi_free_entry); 38139e33d69fSJan Kara brelse(bh); 38149e33d69fSJan Kara ocfs2_track_lock_refresh(lockres); 38159e33d69fSJan Kara } 38169e33d69fSJan Kara 38179e33d69fSJan Kara bail: 38189e33d69fSJan Kara return status; 38199e33d69fSJan Kara } 38209e33d69fSJan Kara 38219e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file 38229e33d69fSJan Kara * so that we can safely refresh quota info from disk. */ 38239e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 38249e33d69fSJan Kara { 38259e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 38269e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 38279e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38289e33d69fSJan Kara int status = 0; 38299e33d69fSJan Kara 38309e33d69fSJan Kara /* On RO devices, locking really isn't needed... */ 38319e33d69fSJan Kara if (ocfs2_is_hard_readonly(osb)) { 38329e33d69fSJan Kara if (ex) 38339e33d69fSJan Kara status = -EROFS; 38349e33d69fSJan Kara goto bail; 38359e33d69fSJan Kara } 38369e33d69fSJan Kara if (ocfs2_mount_local(osb)) 38379e33d69fSJan Kara goto bail; 38389e33d69fSJan Kara 38399e33d69fSJan Kara status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 38409e33d69fSJan Kara if (status < 0) { 38419e33d69fSJan Kara mlog_errno(status); 38429e33d69fSJan Kara goto bail; 38439e33d69fSJan Kara } 38449e33d69fSJan Kara if (!ocfs2_should_refresh_lock_res(lockres)) 38459e33d69fSJan Kara goto bail; 38469e33d69fSJan Kara /* OK, we have the lock but we need to refresh the quota info */ 38479e33d69fSJan Kara status = ocfs2_refresh_qinfo(oinfo); 38489e33d69fSJan Kara if (status) 38499e33d69fSJan Kara ocfs2_qinfo_unlock(oinfo, ex); 38509e33d69fSJan Kara ocfs2_complete_lock_res_refresh(lockres, status); 38519e33d69fSJan Kara bail: 38529e33d69fSJan Kara return status; 38539e33d69fSJan Kara } 38549e33d69fSJan Kara 38558dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 38568dec98edSTao Ma { 38578dec98edSTao Ma int status; 38588dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38598dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 38608dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 38618dec98edSTao Ma 38628dec98edSTao Ma 38638dec98edSTao Ma if (ocfs2_is_hard_readonly(osb)) 38648dec98edSTao Ma return -EROFS; 38658dec98edSTao Ma 38668dec98edSTao Ma if (ocfs2_mount_local(osb)) 38678dec98edSTao Ma return 0; 38688dec98edSTao Ma 38698dec98edSTao Ma status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 38708dec98edSTao Ma if (status < 0) 38718dec98edSTao Ma mlog_errno(status); 38728dec98edSTao Ma 38738dec98edSTao Ma return status; 38748dec98edSTao Ma } 38758dec98edSTao Ma 38768dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 38778dec98edSTao Ma { 38788dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38798dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 38808dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 38818dec98edSTao Ma 38828dec98edSTao Ma if (!ocfs2_mount_local(osb)) 38838dec98edSTao Ma ocfs2_cluster_unlock(osb, lockres, level); 38848dec98edSTao Ma } 38858dec98edSTao Ma 388600600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3887ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3888ccd979bdSMark Fasheh { 3889ccd979bdSMark Fasheh int status; 3890d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 3891ccd979bdSMark Fasheh unsigned long flags; 3892ccd979bdSMark Fasheh 3893ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 3894ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 3895ccd979bdSMark Fasheh * flag. */ 3896ccd979bdSMark Fasheh 3897ccd979bdSMark Fasheh BUG_ON(!lockres); 3898ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 3899ccd979bdSMark Fasheh 39009b915181SSunil Mushran mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name); 3901ccd979bdSMark Fasheh 3902ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 390334d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 3904ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 3905ccd979bdSMark Fasheh * but short circuiting here will still save us some 3906ccd979bdSMark Fasheh * performance. */ 3907ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3908ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 3909ccd979bdSMark Fasheh goto unqueue; 3910ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3911ccd979bdSMark Fasheh 3912b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 3913ccd979bdSMark Fasheh if (status < 0) 3914ccd979bdSMark Fasheh mlog_errno(status); 3915ccd979bdSMark Fasheh 3916ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3917ccd979bdSMark Fasheh unqueue: 3918d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3919ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3920ccd979bdSMark Fasheh } else 3921ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 3922ccd979bdSMark Fasheh 39239b915181SSunil Mushran mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, 3924d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 3925ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3926ccd979bdSMark Fasheh 3927d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 3928d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 3929d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 3930ccd979bdSMark Fasheh } 3931ccd979bdSMark Fasheh 3932ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3933ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3934ccd979bdSMark Fasheh { 3935a75e9ccaSSrinivas Eeda unsigned long flags; 3936a75e9ccaSSrinivas Eeda 3937ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3938ccd979bdSMark Fasheh 3939ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3940ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 3941ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 3942ccd979bdSMark Fasheh * to the resource will get it soon. */ 39439b915181SSunil Mushran mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n", 3944ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3945ccd979bdSMark Fasheh return; 3946ccd979bdSMark Fasheh } 3947ccd979bdSMark Fasheh 3948ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3949ccd979bdSMark Fasheh 3950a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 3951ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 3952ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 3953ccd979bdSMark Fasheh &osb->blocked_lock_list); 3954ccd979bdSMark Fasheh osb->blocked_lock_count++; 3955ccd979bdSMark Fasheh } 3956a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 3957ccd979bdSMark Fasheh } 395834d024f8SMark Fasheh 395934d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 396034d024f8SMark Fasheh { 396134d024f8SMark Fasheh unsigned long processed; 3962a75e9ccaSSrinivas Eeda unsigned long flags; 396334d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 396434d024f8SMark Fasheh 3965a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 396634d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 396734d024f8SMark Fasheh * wake happens part-way through our work */ 396834d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 396934d024f8SMark Fasheh 397034d024f8SMark Fasheh processed = osb->blocked_lock_count; 397134d024f8SMark Fasheh while (processed) { 397234d024f8SMark Fasheh BUG_ON(list_empty(&osb->blocked_lock_list)); 397334d024f8SMark Fasheh 397434d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 397534d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 397634d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 397734d024f8SMark Fasheh osb->blocked_lock_count--; 3978a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 397934d024f8SMark Fasheh 398034d024f8SMark Fasheh BUG_ON(!processed); 398134d024f8SMark Fasheh processed--; 398234d024f8SMark Fasheh 398334d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 398434d024f8SMark Fasheh 3985a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 398634d024f8SMark Fasheh } 3987a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 398834d024f8SMark Fasheh } 398934d024f8SMark Fasheh 399034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 399134d024f8SMark Fasheh { 399234d024f8SMark Fasheh int empty = 0; 3993a75e9ccaSSrinivas Eeda unsigned long flags; 399434d024f8SMark Fasheh 3995a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 399634d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 399734d024f8SMark Fasheh empty = 1; 399834d024f8SMark Fasheh 3999a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 400034d024f8SMark Fasheh return empty; 400134d024f8SMark Fasheh } 400234d024f8SMark Fasheh 400334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 400434d024f8SMark Fasheh { 400534d024f8SMark Fasheh int should_wake = 0; 4006a75e9ccaSSrinivas Eeda unsigned long flags; 400734d024f8SMark Fasheh 4008a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 400934d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 401034d024f8SMark Fasheh should_wake = 1; 4011a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 401234d024f8SMark Fasheh 401334d024f8SMark Fasheh return should_wake; 401434d024f8SMark Fasheh } 401534d024f8SMark Fasheh 4016200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 401734d024f8SMark Fasheh { 401834d024f8SMark Fasheh int status = 0; 401934d024f8SMark Fasheh struct ocfs2_super *osb = arg; 402034d024f8SMark Fasheh 402134d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 402234d024f8SMark Fasheh * work available */ 402334d024f8SMark Fasheh while (!(kthread_should_stop() && 402434d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 402534d024f8SMark Fasheh 402634d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 402734d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 402834d024f8SMark Fasheh kthread_should_stop()); 402934d024f8SMark Fasheh 403034d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 403134d024f8SMark Fasheh 403234d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 403334d024f8SMark Fasheh } 403434d024f8SMark Fasheh 403534d024f8SMark Fasheh osb->dc_task = NULL; 403634d024f8SMark Fasheh return status; 403734d024f8SMark Fasheh } 403834d024f8SMark Fasheh 403934d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 404034d024f8SMark Fasheh { 4041a75e9ccaSSrinivas Eeda unsigned long flags; 4042a75e9ccaSSrinivas Eeda 4043a75e9ccaSSrinivas Eeda spin_lock_irqsave(&osb->dc_task_lock, flags); 404434d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 404534d024f8SMark Fasheh * the caller may have made to the voting state */ 404634d024f8SMark Fasheh osb->dc_wake_sequence++; 4047a75e9ccaSSrinivas Eeda spin_unlock_irqrestore(&osb->dc_task_lock, flags); 404834d024f8SMark Fasheh wake_up(&osb->dc_event); 404934d024f8SMark Fasheh } 4050