1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/kthread.h> 31ccd979bdSMark Fasheh #include <linux/pagemap.h> 32ccd979bdSMark Fasheh #include <linux/debugfs.h> 33ccd979bdSMark Fasheh #include <linux/seq_file.h> 348ddb7b00SSunil Mushran #include <linux/time.h> 359e33d69fSJan Kara #include <linux/quotaops.h> 36ccd979bdSMark Fasheh 37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 38ccd979bdSMark Fasheh #include <cluster/masklog.h> 39ccd979bdSMark Fasheh 40ccd979bdSMark Fasheh #include "ocfs2.h" 41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h" 42ccd979bdSMark Fasheh 43ccd979bdSMark Fasheh #include "alloc.h" 44d680efe9SMark Fasheh #include "dcache.h" 45ccd979bdSMark Fasheh #include "dlmglue.h" 46ccd979bdSMark Fasheh #include "extent_map.h" 477f1a37e3STiger Yang #include "file.h" 48ccd979bdSMark Fasheh #include "heartbeat.h" 49ccd979bdSMark Fasheh #include "inode.h" 50ccd979bdSMark Fasheh #include "journal.h" 5124ef1815SJoel Becker #include "stackglue.h" 52ccd979bdSMark Fasheh #include "slot_map.h" 53ccd979bdSMark Fasheh #include "super.h" 54ccd979bdSMark Fasheh #include "uptodate.h" 559e33d69fSJan Kara #include "quota.h" 568dec98edSTao Ma #include "refcounttree.h" 57ccd979bdSMark Fasheh 58ccd979bdSMark Fasheh #include "buffer_head_io.h" 59ccd979bdSMark Fasheh 60ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 61ccd979bdSMark Fasheh struct list_head mw_item; 62ccd979bdSMark Fasheh int mw_status; 63ccd979bdSMark Fasheh struct completion mw_complete; 64ccd979bdSMark Fasheh unsigned long mw_mask; 65ccd979bdSMark Fasheh unsigned long mw_goal; 668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 678ddb7b00SSunil Mushran unsigned long long mw_lock_start; 688ddb7b00SSunil Mushran #endif 69ccd979bdSMark Fasheh }; 70ccd979bdSMark Fasheh 7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); 75ccd979bdSMark Fasheh 76d680efe9SMark Fasheh /* 77cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 78d680efe9SMark Fasheh * 79b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 80d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 81d680efe9SMark Fasheh * 82d680efe9SMark Fasheh */ 83d680efe9SMark Fasheh enum ocfs2_unblock_action { 84d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 85d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 86d680efe9SMark Fasheh * ->post_unlock callback */ 87d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 88d680efe9SMark Fasheh * ->post_unlock() callback. */ 89d680efe9SMark Fasheh }; 90d680efe9SMark Fasheh 91d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 92d680efe9SMark Fasheh int requeue; 93d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 94d680efe9SMark Fasheh }; 95d680efe9SMark Fasheh 96cb25797dSJan Kara /* Lockdep class keys */ 97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; 98cb25797dSJan Kara 99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 100810d5aebSMark Fasheh int new_level); 101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 102810d5aebSMark Fasheh 103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 104cc567d89SMark Fasheh int blocking); 105cc567d89SMark Fasheh 106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 107cc567d89SMark Fasheh int blocking); 108d680efe9SMark Fasheh 109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 110d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 111ccd979bdSMark Fasheh 1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 1136cb129f5SAdrian Bunk 1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 1158dec98edSTao Ma int new_level); 1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 1178dec98edSTao Ma int blocking); 1188dec98edSTao Ma 1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1206cb129f5SAdrian Bunk 1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1236cb129f5SAdrian Bunk const char *function, 1246cb129f5SAdrian Bunk unsigned int line, 1256cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1266cb129f5SAdrian Bunk { 127a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 1286cb129f5SAdrian Bunk 1296cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1306cb129f5SAdrian Bunk lockres->l_name, function, line); 1316cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1326cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1336cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1346cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1356cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1366cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1376cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1386cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1396cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1406cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1416cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1426cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1436cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1446cb129f5SAdrian Bunk } 1456cb129f5SAdrian Bunk 1466cb129f5SAdrian Bunk 147f625c979SMark Fasheh /* 148f625c979SMark Fasheh * OCFS2 Lock Resource Operations 149f625c979SMark Fasheh * 150f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1510d5dc6c2SMark Fasheh * 1520d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1530d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1540d5dc6c2SMark Fasheh * 1550d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1560d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1570d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1580d5dc6c2SMark Fasheh * destruction time). 159f625c979SMark Fasheh */ 160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 16154a7e755SMark Fasheh /* 16254a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 16354a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 16454a7e755SMark Fasheh */ 16554a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 166b5e500e2SMark Fasheh 1670d5dc6c2SMark Fasheh /* 16834d024f8SMark Fasheh * Optionally called in the downconvert thread after a 16934d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 17034d024f8SMark Fasheh * after this callback is called, so it is safe to free 17134d024f8SMark Fasheh * memory, etc. 1720d5dc6c2SMark Fasheh * 1730d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1740d5dc6c2SMark Fasheh * by ->downconvert_worker() 1750d5dc6c2SMark Fasheh */ 176d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 177f625c979SMark Fasheh 178f625c979SMark Fasheh /* 17916d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 18016d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 18116d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 18216d5b956SMark Fasheh * 18316d5b956SMark Fasheh * For most locks, the default checks that there are no 18416d5b956SMark Fasheh * incompatible holders are sufficient. 18516d5b956SMark Fasheh * 18616d5b956SMark Fasheh * Called with the lockres spinlock held. 18716d5b956SMark Fasheh */ 18816d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 18916d5b956SMark Fasheh 19016d5b956SMark Fasheh /* 1915ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1925ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1935ef0d4eaSMark Fasheh * 1945ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1955ef0d4eaSMark Fasheh * in the flags field. 1965ef0d4eaSMark Fasheh * 1975ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1985ef0d4eaSMark Fasheh */ 1995ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 2005ef0d4eaSMark Fasheh 2015ef0d4eaSMark Fasheh /* 202cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 203cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 204cc567d89SMark Fasheh * any locks held so the function can do work that might 205cc567d89SMark Fasheh * schedule (syncing out data, etc). 206cc567d89SMark Fasheh * 207cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 208cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 209cc567d89SMark Fasheh */ 210cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 211cc567d89SMark Fasheh 212cc567d89SMark Fasheh /* 213f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 214f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 215f625c979SMark Fasheh */ 216f625c979SMark Fasheh int flags; 217ccd979bdSMark Fasheh }; 218ccd979bdSMark Fasheh 219f625c979SMark Fasheh /* 220f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 221f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 222f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 223f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 224f625c979SMark Fasheh * expected that the locking wrapper will clear the 225f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 226f625c979SMark Fasheh */ 227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 228f625c979SMark Fasheh 229b80fc012SMark Fasheh /* 2305ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2315ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 232b80fc012SMark Fasheh */ 233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 234b80fc012SMark Fasheh 235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 23654a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 237f625c979SMark Fasheh .flags = 0, 238ccd979bdSMark Fasheh }; 239ccd979bdSMark Fasheh 240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 24154a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 242810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 243810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 244f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 245b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 246ccd979bdSMark Fasheh }; 247ccd979bdSMark Fasheh 248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 249f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 250ccd979bdSMark Fasheh }; 251ccd979bdSMark Fasheh 252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 253f625c979SMark Fasheh .flags = 0, 254ccd979bdSMark Fasheh }; 255ccd979bdSMark Fasheh 2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { 2576ca497a8Swengang wang .flags = 0, 2586ca497a8Swengang wang }; 2596ca497a8Swengang wang 26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { 26183273932SSrinivas Eeda .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 26283273932SSrinivas Eeda }; 26383273932SSrinivas Eeda 264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 26554a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 266d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 267cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 268f625c979SMark Fasheh .flags = 0, 269d680efe9SMark Fasheh }; 270d680efe9SMark Fasheh 27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 27250008630STiger Yang .get_osb = ocfs2_get_inode_osb, 27350008630STiger Yang .flags = 0, 27450008630STiger Yang }; 27550008630STiger Yang 276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = { 277cf8e06f1SMark Fasheh .get_osb = ocfs2_get_file_osb, 278cf8e06f1SMark Fasheh .flags = 0, 279cf8e06f1SMark Fasheh }; 280cf8e06f1SMark Fasheh 2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { 2829e33d69fSJan Kara .set_lvb = ocfs2_set_qinfo_lvb, 2839e33d69fSJan Kara .get_osb = ocfs2_get_qinfo_osb, 2849e33d69fSJan Kara .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 2859e33d69fSJan Kara }; 2869e33d69fSJan Kara 2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { 2888dec98edSTao Ma .check_downconvert = ocfs2_check_refcount_downconvert, 2898dec98edSTao Ma .downconvert_worker = ocfs2_refcount_convert_worker, 2908dec98edSTao Ma .flags = 0, 2918dec98edSTao Ma }; 2928dec98edSTao Ma 293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 294ccd979bdSMark Fasheh { 295ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 29650008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 29750008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 298ccd979bdSMark Fasheh } 299ccd979bdSMark Fasheh 300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 301a796d286SJoel Becker { 302a796d286SJoel Becker return container_of(lksb, struct ocfs2_lock_res, l_lksb); 303a796d286SJoel Becker } 304a796d286SJoel Becker 305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 306ccd979bdSMark Fasheh { 307ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 308ccd979bdSMark Fasheh 309ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 310ccd979bdSMark Fasheh } 311ccd979bdSMark Fasheh 312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 313d680efe9SMark Fasheh { 314d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 315d680efe9SMark Fasheh 316d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 317d680efe9SMark Fasheh } 318d680efe9SMark Fasheh 3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) 3209e33d69fSJan Kara { 3219e33d69fSJan Kara BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); 3229e33d69fSJan Kara 3239e33d69fSJan Kara return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 3249e33d69fSJan Kara } 3259e33d69fSJan Kara 3268dec98edSTao Ma static inline struct ocfs2_refcount_tree * 3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res) 3288dec98edSTao Ma { 3298dec98edSTao Ma return container_of(res, struct ocfs2_refcount_tree, rf_lockres); 3308dec98edSTao Ma } 3318dec98edSTao Ma 33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 33354a7e755SMark Fasheh { 33454a7e755SMark Fasheh if (lockres->l_ops->get_osb) 33554a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 33654a7e755SMark Fasheh 33754a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 33854a7e755SMark Fasheh } 33954a7e755SMark Fasheh 340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 341ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 342ccd979bdSMark Fasheh int level, 343bd3e7610SJoel Becker u32 dlm_flags); 344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 345ccd979bdSMark Fasheh int wanted); 346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 347ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 348cb25797dSJan Kara int level, unsigned long caller_ip); 349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, 350cb25797dSJan Kara struct ocfs2_lock_res *lockres, 351cb25797dSJan Kara int level) 352cb25797dSJan Kara { 353cb25797dSJan Kara __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); 354cb25797dSJan Kara } 355cb25797dSJan Kara 356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 361ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 363ccd979bdSMark Fasheh int convert); 3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 365c74ff8bbSSunil Mushran if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \ 3667431cd7eSJoel Becker mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 3677431cd7eSJoel Becker _err, _func, _lockres->l_name); \ 368c74ff8bbSSunil Mushran else \ 369c74ff8bbSSunil Mushran mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \ 370c74ff8bbSSunil Mushran _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \ 371c74ff8bbSSunil Mushran (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \ 372ccd979bdSMark Fasheh } while (0) 37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 375ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 377ccd979bdSMark Fasheh struct buffer_head **bh); 378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 381cf8e06f1SMark Fasheh int new_level); 382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 383cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres, 384cf8e06f1SMark Fasheh int new_level, 385de551246SJoel Becker int lvb, 386de551246SJoel Becker unsigned int generation); 387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 388cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 390cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres); 391cf8e06f1SMark Fasheh 392ccd979bdSMark Fasheh 393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 394ccd979bdSMark Fasheh u64 blkno, 395ccd979bdSMark Fasheh u32 generation, 396ccd979bdSMark Fasheh char *name) 397ccd979bdSMark Fasheh { 398ccd979bdSMark Fasheh int len; 399ccd979bdSMark Fasheh 400ccd979bdSMark Fasheh mlog_entry_void(); 401ccd979bdSMark Fasheh 402ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 403ccd979bdSMark Fasheh 404b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 405b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 406b0697053SMark Fasheh (long long)blkno, generation); 407ccd979bdSMark Fasheh 408ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 409ccd979bdSMark Fasheh 410ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 411ccd979bdSMark Fasheh 412ccd979bdSMark Fasheh mlog_exit_void(); 413ccd979bdSMark Fasheh } 414ccd979bdSMark Fasheh 41534af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 416ccd979bdSMark Fasheh 417ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 418ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 419ccd979bdSMark Fasheh { 420ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 421ccd979bdSMark Fasheh 422ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 423ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 424ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 425ccd979bdSMark Fasheh } 426ccd979bdSMark Fasheh 427ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 428ccd979bdSMark Fasheh { 429ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 430ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 431ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 432ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 433ccd979bdSMark Fasheh } 434ccd979bdSMark Fasheh 4358ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 4368ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4378ddb7b00SSunil Mushran { 4388ddb7b00SSunil Mushran res->l_lock_num_prmode = 0; 4398ddb7b00SSunil Mushran res->l_lock_num_prmode_failed = 0; 4408ddb7b00SSunil Mushran res->l_lock_total_prmode = 0; 4418ddb7b00SSunil Mushran res->l_lock_max_prmode = 0; 4428ddb7b00SSunil Mushran res->l_lock_num_exmode = 0; 4438ddb7b00SSunil Mushran res->l_lock_num_exmode_failed = 0; 4448ddb7b00SSunil Mushran res->l_lock_total_exmode = 0; 4458ddb7b00SSunil Mushran res->l_lock_max_exmode = 0; 4468ddb7b00SSunil Mushran res->l_lock_refresh = 0; 4478ddb7b00SSunil Mushran } 4488ddb7b00SSunil Mushran 4498ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, 4508ddb7b00SSunil Mushran struct ocfs2_mask_waiter *mw, int ret) 4518ddb7b00SSunil Mushran { 4528ddb7b00SSunil Mushran unsigned long long *num, *sum; 4538ddb7b00SSunil Mushran unsigned int *max, *failed; 4548ddb7b00SSunil Mushran struct timespec ts = current_kernel_time(); 4558ddb7b00SSunil Mushran unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start; 4568ddb7b00SSunil Mushran 4578ddb7b00SSunil Mushran if (level == LKM_PRMODE) { 4588ddb7b00SSunil Mushran num = &res->l_lock_num_prmode; 4598ddb7b00SSunil Mushran sum = &res->l_lock_total_prmode; 4608ddb7b00SSunil Mushran max = &res->l_lock_max_prmode; 4618ddb7b00SSunil Mushran failed = &res->l_lock_num_prmode_failed; 4628ddb7b00SSunil Mushran } else if (level == LKM_EXMODE) { 4638ddb7b00SSunil Mushran num = &res->l_lock_num_exmode; 4648ddb7b00SSunil Mushran sum = &res->l_lock_total_exmode; 4658ddb7b00SSunil Mushran max = &res->l_lock_max_exmode; 4668ddb7b00SSunil Mushran failed = &res->l_lock_num_exmode_failed; 4678ddb7b00SSunil Mushran } else 4688ddb7b00SSunil Mushran return; 4698ddb7b00SSunil Mushran 4708ddb7b00SSunil Mushran (*num)++; 4718ddb7b00SSunil Mushran (*sum) += time; 4728ddb7b00SSunil Mushran if (time > *max) 4738ddb7b00SSunil Mushran *max = time; 4748ddb7b00SSunil Mushran if (ret) 4758ddb7b00SSunil Mushran (*failed)++; 4768ddb7b00SSunil Mushran } 4778ddb7b00SSunil Mushran 4788ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4798ddb7b00SSunil Mushran { 4808ddb7b00SSunil Mushran lockres->l_lock_refresh++; 4818ddb7b00SSunil Mushran } 4828ddb7b00SSunil Mushran 4838ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 4848ddb7b00SSunil Mushran { 4858ddb7b00SSunil Mushran struct timespec ts = current_kernel_time(); 4868ddb7b00SSunil Mushran mw->mw_lock_start = timespec_to_ns(&ts); 4878ddb7b00SSunil Mushran } 4888ddb7b00SSunil Mushran #else 4898ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) 4908ddb7b00SSunil Mushran { 4918ddb7b00SSunil Mushran } 4928ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, 4938ddb7b00SSunil Mushran int level, struct ocfs2_mask_waiter *mw, int ret) 4948ddb7b00SSunil Mushran { 4958ddb7b00SSunil Mushran } 4968ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) 4978ddb7b00SSunil Mushran { 4988ddb7b00SSunil Mushran } 4998ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) 5008ddb7b00SSunil Mushran { 5018ddb7b00SSunil Mushran } 5028ddb7b00SSunil Mushran #endif 5038ddb7b00SSunil Mushran 504ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 505ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 506ccd979bdSMark Fasheh enum ocfs2_lock_type type, 507ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 508ccd979bdSMark Fasheh void *priv) 509ccd979bdSMark Fasheh { 510ccd979bdSMark Fasheh res->l_type = type; 511ccd979bdSMark Fasheh res->l_ops = ops; 512ccd979bdSMark Fasheh res->l_priv = priv; 513ccd979bdSMark Fasheh 514bd3e7610SJoel Becker res->l_level = DLM_LOCK_IV; 515bd3e7610SJoel Becker res->l_requested = DLM_LOCK_IV; 516bd3e7610SJoel Becker res->l_blocking = DLM_LOCK_IV; 517ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 518ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 519ccd979bdSMark Fasheh 520ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 521ccd979bdSMark Fasheh 522ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 5238ddb7b00SSunil Mushran 5248ddb7b00SSunil Mushran ocfs2_init_lock_stats(res); 525cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 526cb25797dSJan Kara if (type != OCFS2_LOCK_TYPE_OPEN) 527cb25797dSJan Kara lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], 528cb25797dSJan Kara &lockdep_keys[type], 0); 529cb25797dSJan Kara else 530cb25797dSJan Kara res->l_lockdep_map.key = NULL; 531cb25797dSJan Kara #endif 532ccd979bdSMark Fasheh } 533ccd979bdSMark Fasheh 534ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 535ccd979bdSMark Fasheh { 536ccd979bdSMark Fasheh /* This also clears out the lock status block */ 537ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 538ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 539ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 540ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 541ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 542ccd979bdSMark Fasheh } 543ccd979bdSMark Fasheh 544ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 545ccd979bdSMark Fasheh enum ocfs2_lock_type type, 54624c19ef4SMark Fasheh unsigned int generation, 547ccd979bdSMark Fasheh struct inode *inode) 548ccd979bdSMark Fasheh { 549ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 550ccd979bdSMark Fasheh 551ccd979bdSMark Fasheh switch(type) { 552ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 553ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 554ccd979bdSMark Fasheh break; 555ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 556e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 557ccd979bdSMark Fasheh break; 55850008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 55950008630STiger Yang ops = &ocfs2_inode_open_lops; 56050008630STiger Yang break; 561ccd979bdSMark Fasheh default: 562ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 563ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 564ccd979bdSMark Fasheh break; 565ccd979bdSMark Fasheh }; 566ccd979bdSMark Fasheh 567d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 56824c19ef4SMark Fasheh generation, res->l_name); 569d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 570d680efe9SMark Fasheh } 571d680efe9SMark Fasheh 57254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 57354a7e755SMark Fasheh { 57454a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 57554a7e755SMark Fasheh 57654a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 57754a7e755SMark Fasheh } 57854a7e755SMark Fasheh 5799e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) 5809e33d69fSJan Kara { 5819e33d69fSJan Kara struct ocfs2_mem_dqinfo *info = lockres->l_priv; 5829e33d69fSJan Kara 5839e33d69fSJan Kara return OCFS2_SB(info->dqi_gi.dqi_sb); 5849e33d69fSJan Kara } 5859e33d69fSJan Kara 586cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 587cf8e06f1SMark Fasheh { 588cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = lockres->l_priv; 589cf8e06f1SMark Fasheh 590cf8e06f1SMark Fasheh return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); 591cf8e06f1SMark Fasheh } 592cf8e06f1SMark Fasheh 593d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 594d680efe9SMark Fasheh { 595d680efe9SMark Fasheh __be64 inode_blkno_be; 596d680efe9SMark Fasheh 597d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 598d680efe9SMark Fasheh sizeof(__be64)); 599d680efe9SMark Fasheh 600d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 601d680efe9SMark Fasheh } 602d680efe9SMark Fasheh 60354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 60454a7e755SMark Fasheh { 60554a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 60654a7e755SMark Fasheh 60754a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 60854a7e755SMark Fasheh } 60954a7e755SMark Fasheh 610d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 611d680efe9SMark Fasheh u64 parent, struct inode *inode) 612d680efe9SMark Fasheh { 613d680efe9SMark Fasheh int len; 614d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 615d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 616d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 617d680efe9SMark Fasheh 618d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 619d680efe9SMark Fasheh 620d680efe9SMark Fasheh /* 621d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 622d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 623d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 624d680efe9SMark Fasheh * want error prints to show something without garbling the 625d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 626d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 627d680efe9SMark Fasheh * binary lock names. The stringified names have been a 628d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 629d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 630d680efe9SMark Fasheh * 631d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 632d680efe9SMark Fasheh * name size stays the same though - the last part is all 633d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 634d680efe9SMark Fasheh */ 635d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 636d680efe9SMark Fasheh "%c%016llx", 637d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 638d680efe9SMark Fasheh (long long)parent); 639d680efe9SMark Fasheh 640d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 641d680efe9SMark Fasheh 642d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 643d680efe9SMark Fasheh sizeof(__be64)); 644d680efe9SMark Fasheh 645d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 646d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 647d680efe9SMark Fasheh dl); 648ccd979bdSMark Fasheh } 649ccd979bdSMark Fasheh 650ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 651ccd979bdSMark Fasheh struct ocfs2_super *osb) 652ccd979bdSMark Fasheh { 653ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 654ccd979bdSMark Fasheh * once on it manually. */ 655ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 656d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 657d680efe9SMark Fasheh 0, res->l_name); 658ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 659ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 660ccd979bdSMark Fasheh } 661ccd979bdSMark Fasheh 662ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 663ccd979bdSMark Fasheh struct ocfs2_super *osb) 664ccd979bdSMark Fasheh { 665ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 666ccd979bdSMark Fasheh * once on it manually. */ 667ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 668d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 669d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 670ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 671ccd979bdSMark Fasheh } 672ccd979bdSMark Fasheh 6736ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, 6746ca497a8Swengang wang struct ocfs2_super *osb) 6756ca497a8Swengang wang { 6766ca497a8Swengang wang /* nfs_sync lockres doesn't come from a slab so we call init 6776ca497a8Swengang wang * once on it manually. */ 6786ca497a8Swengang wang ocfs2_lock_res_init_once(res); 6796ca497a8Swengang wang ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name); 6806ca497a8Swengang wang ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC, 6816ca497a8Swengang wang &ocfs2_nfs_sync_lops, osb); 6826ca497a8Swengang wang } 6836ca497a8Swengang wang 68483273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 68583273932SSrinivas Eeda struct ocfs2_super *osb) 68683273932SSrinivas Eeda { 68783273932SSrinivas Eeda ocfs2_lock_res_init_once(res); 68883273932SSrinivas Eeda ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 68983273932SSrinivas Eeda ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 69083273932SSrinivas Eeda &ocfs2_orphan_scan_lops, osb); 69183273932SSrinivas Eeda } 69283273932SSrinivas Eeda 693cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 694cf8e06f1SMark Fasheh struct ocfs2_file_private *fp) 695cf8e06f1SMark Fasheh { 696cf8e06f1SMark Fasheh struct inode *inode = fp->fp_file->f_mapping->host; 697cf8e06f1SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 698cf8e06f1SMark Fasheh 699cf8e06f1SMark Fasheh ocfs2_lock_res_init_once(lockres); 700cf8e06f1SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, 701cf8e06f1SMark Fasheh inode->i_generation, lockres->l_name); 702cf8e06f1SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 703cf8e06f1SMark Fasheh OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, 704cf8e06f1SMark Fasheh fp); 705cf8e06f1SMark Fasheh lockres->l_flags |= OCFS2_LOCK_NOCACHE; 706cf8e06f1SMark Fasheh } 707cf8e06f1SMark Fasheh 7089e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 7099e33d69fSJan Kara struct ocfs2_mem_dqinfo *info) 7109e33d69fSJan Kara { 7119e33d69fSJan Kara ocfs2_lock_res_init_once(lockres); 7129e33d69fSJan Kara ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, 7139e33d69fSJan Kara 0, lockres->l_name); 7149e33d69fSJan Kara ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, 7159e33d69fSJan Kara OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, 7169e33d69fSJan Kara info); 7179e33d69fSJan Kara } 7189e33d69fSJan Kara 7198dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres, 7208dec98edSTao Ma struct ocfs2_super *osb, u64 ref_blkno, 7218dec98edSTao Ma unsigned int generation) 7228dec98edSTao Ma { 7238dec98edSTao Ma ocfs2_lock_res_init_once(lockres); 7248dec98edSTao Ma ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno, 7258dec98edSTao Ma generation, lockres->l_name); 7268dec98edSTao Ma ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT, 7278dec98edSTao Ma &ocfs2_refcount_block_lops, osb); 7288dec98edSTao Ma } 7298dec98edSTao Ma 730ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 731ccd979bdSMark Fasheh { 732ccd979bdSMark Fasheh mlog_entry_void(); 733ccd979bdSMark Fasheh 734ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 735ccd979bdSMark Fasheh return; 736ccd979bdSMark Fasheh 737ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 738ccd979bdSMark Fasheh 739ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 740ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 741ccd979bdSMark Fasheh res->l_name); 742ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 743ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 744ccd979bdSMark Fasheh res->l_name); 745ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 746ccd979bdSMark Fasheh "Lockres %s is locked\n", 747ccd979bdSMark Fasheh res->l_name); 748ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 749ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 750ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 751ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 752ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 753ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 754ccd979bdSMark Fasheh 755ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 756ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 757ccd979bdSMark Fasheh 758ccd979bdSMark Fasheh res->l_flags = 0UL; 759ccd979bdSMark Fasheh mlog_exit_void(); 760ccd979bdSMark Fasheh } 761ccd979bdSMark Fasheh 762ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 763ccd979bdSMark Fasheh int level) 764ccd979bdSMark Fasheh { 765ccd979bdSMark Fasheh mlog_entry_void(); 766ccd979bdSMark Fasheh 767ccd979bdSMark Fasheh BUG_ON(!lockres); 768ccd979bdSMark Fasheh 769ccd979bdSMark Fasheh switch(level) { 770bd3e7610SJoel Becker case DLM_LOCK_EX: 771ccd979bdSMark Fasheh lockres->l_ex_holders++; 772ccd979bdSMark Fasheh break; 773bd3e7610SJoel Becker case DLM_LOCK_PR: 774ccd979bdSMark Fasheh lockres->l_ro_holders++; 775ccd979bdSMark Fasheh break; 776ccd979bdSMark Fasheh default: 777ccd979bdSMark Fasheh BUG(); 778ccd979bdSMark Fasheh } 779ccd979bdSMark Fasheh 780ccd979bdSMark Fasheh mlog_exit_void(); 781ccd979bdSMark Fasheh } 782ccd979bdSMark Fasheh 783ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 784ccd979bdSMark Fasheh int level) 785ccd979bdSMark Fasheh { 786ccd979bdSMark Fasheh mlog_entry_void(); 787ccd979bdSMark Fasheh 788ccd979bdSMark Fasheh BUG_ON(!lockres); 789ccd979bdSMark Fasheh 790ccd979bdSMark Fasheh switch(level) { 791bd3e7610SJoel Becker case DLM_LOCK_EX: 792ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 793ccd979bdSMark Fasheh lockres->l_ex_holders--; 794ccd979bdSMark Fasheh break; 795bd3e7610SJoel Becker case DLM_LOCK_PR: 796ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 797ccd979bdSMark Fasheh lockres->l_ro_holders--; 798ccd979bdSMark Fasheh break; 799ccd979bdSMark Fasheh default: 800ccd979bdSMark Fasheh BUG(); 801ccd979bdSMark Fasheh } 802ccd979bdSMark Fasheh mlog_exit_void(); 803ccd979bdSMark Fasheh } 804ccd979bdSMark Fasheh 805ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 806ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 807ccd979bdSMark Fasheh * lock types are added. */ 808ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 809ccd979bdSMark Fasheh { 810bd3e7610SJoel Becker int new_level = DLM_LOCK_EX; 811ccd979bdSMark Fasheh 812bd3e7610SJoel Becker if (level == DLM_LOCK_EX) 813bd3e7610SJoel Becker new_level = DLM_LOCK_NL; 814bd3e7610SJoel Becker else if (level == DLM_LOCK_PR) 815bd3e7610SJoel Becker new_level = DLM_LOCK_PR; 816ccd979bdSMark Fasheh return new_level; 817ccd979bdSMark Fasheh } 818ccd979bdSMark Fasheh 819ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 820ccd979bdSMark Fasheh unsigned long newflags) 821ccd979bdSMark Fasheh { 822800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 823ccd979bdSMark Fasheh 824ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 825ccd979bdSMark Fasheh 826ccd979bdSMark Fasheh lockres->l_flags = newflags; 827ccd979bdSMark Fasheh 828800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 829ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 830ccd979bdSMark Fasheh continue; 831ccd979bdSMark Fasheh 832ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 833ccd979bdSMark Fasheh mw->mw_status = 0; 834ccd979bdSMark Fasheh complete(&mw->mw_complete); 835ccd979bdSMark Fasheh } 836ccd979bdSMark Fasheh } 837ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 838ccd979bdSMark Fasheh { 839ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 840ccd979bdSMark Fasheh } 841ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 842ccd979bdSMark Fasheh unsigned long clear) 843ccd979bdSMark Fasheh { 844ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 845ccd979bdSMark Fasheh } 846ccd979bdSMark Fasheh 847ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 848ccd979bdSMark Fasheh { 849ccd979bdSMark Fasheh mlog_entry_void(); 850ccd979bdSMark Fasheh 851ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 852ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 853ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 854bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 855ccd979bdSMark Fasheh 856ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 857ccd979bdSMark Fasheh if (lockres->l_level <= 858ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 859bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_NL; 860ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 861ccd979bdSMark Fasheh } 862ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 863ccd979bdSMark Fasheh 864ccd979bdSMark Fasheh mlog_exit_void(); 865ccd979bdSMark Fasheh } 866ccd979bdSMark Fasheh 867ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 868ccd979bdSMark Fasheh { 869ccd979bdSMark Fasheh mlog_entry_void(); 870ccd979bdSMark Fasheh 871ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 872ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 873ccd979bdSMark Fasheh 874ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 875ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 876ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 877ccd979bdSMark Fasheh * update */ 878bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_NL && 879f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 880ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 881ccd979bdSMark Fasheh 882ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 883a1912826SSunil Mushran 884a1912826SSunil Mushran /* 885a1912826SSunil Mushran * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing 886a1912826SSunil Mushran * the OCFS2_LOCK_BUSY flag to prevent the dc thread from 887a1912826SSunil Mushran * downconverting the lock before the upconvert has fully completed. 888a1912826SSunil Mushran */ 889a1912826SSunil Mushran lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 890a1912826SSunil Mushran 891ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 892ccd979bdSMark Fasheh 893ccd979bdSMark Fasheh mlog_exit_void(); 894ccd979bdSMark Fasheh } 895ccd979bdSMark Fasheh 896ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 897ccd979bdSMark Fasheh { 898ccd979bdSMark Fasheh mlog_entry_void(); 899ccd979bdSMark Fasheh 9003cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 901ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 902ccd979bdSMark Fasheh 903bd3e7610SJoel Becker if (lockres->l_requested > DLM_LOCK_NL && 904f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 905f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 906ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 907ccd979bdSMark Fasheh 908ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 909ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 910ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 911ccd979bdSMark Fasheh 912ccd979bdSMark Fasheh mlog_exit_void(); 913ccd979bdSMark Fasheh } 914ccd979bdSMark Fasheh 915ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 916ccd979bdSMark Fasheh int level) 917ccd979bdSMark Fasheh { 918ccd979bdSMark Fasheh int needs_downconvert = 0; 919ccd979bdSMark Fasheh mlog_entry_void(); 920ccd979bdSMark Fasheh 921ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 922ccd979bdSMark Fasheh 923ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 924ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 925ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 926ccd979bdSMark Fasheh * blocking. this also catches the case where we get 927ccd979bdSMark Fasheh * duplicate BASTs */ 928ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 929ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 930ccd979bdSMark Fasheh needs_downconvert = 1; 931ccd979bdSMark Fasheh 932ccd979bdSMark Fasheh lockres->l_blocking = level; 933ccd979bdSMark Fasheh } 934ccd979bdSMark Fasheh 9350b94a909SWengang Wang if (needs_downconvert) 9360b94a909SWengang Wang lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 9370b94a909SWengang Wang 938ccd979bdSMark Fasheh mlog_exit(needs_downconvert); 939ccd979bdSMark Fasheh return needs_downconvert; 940ccd979bdSMark Fasheh } 941ccd979bdSMark Fasheh 942de551246SJoel Becker /* 943de551246SJoel Becker * OCFS2_LOCK_PENDING and l_pending_gen. 944de551246SJoel Becker * 945de551246SJoel Becker * Why does OCFS2_LOCK_PENDING exist? To close a race between setting 946de551246SJoel Becker * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() 947de551246SJoel Becker * for more details on the race. 948de551246SJoel Becker * 949de551246SJoel Becker * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces 950de551246SJoel Becker * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() 951de551246SJoel Becker * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear 952de551246SJoel Becker * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, 953de551246SJoel Becker * the caller is going to try to clear PENDING again. If nothing else is 954de551246SJoel Becker * happening, __lockres_clear_pending() sees PENDING is unset and does 955de551246SJoel Becker * nothing. 956de551246SJoel Becker * 957de551246SJoel Becker * But what if another path (eg downconvert thread) has just started a 958de551246SJoel Becker * new locking action? The other path has re-set PENDING. Our path 959de551246SJoel Becker * cannot clear PENDING, because that will re-open the original race 960de551246SJoel Becker * window. 961de551246SJoel Becker * 962de551246SJoel Becker * [Example] 963de551246SJoel Becker * 964de551246SJoel Becker * ocfs2_meta_lock() 965de551246SJoel Becker * ocfs2_cluster_lock() 966de551246SJoel Becker * set BUSY 967de551246SJoel Becker * set PENDING 968de551246SJoel Becker * drop l_lock 969de551246SJoel Becker * ocfs2_dlm_lock() 970de551246SJoel Becker * ocfs2_locking_ast() ocfs2_downconvert_thread() 971de551246SJoel Becker * clear PENDING ocfs2_unblock_lock() 972de551246SJoel Becker * take_l_lock 973de551246SJoel Becker * !BUSY 974de551246SJoel Becker * ocfs2_prepare_downconvert() 975de551246SJoel Becker * set BUSY 976de551246SJoel Becker * set PENDING 977de551246SJoel Becker * drop l_lock 978de551246SJoel Becker * take l_lock 979de551246SJoel Becker * clear PENDING 980de551246SJoel Becker * drop l_lock 981de551246SJoel Becker * <window> 982de551246SJoel Becker * ocfs2_dlm_lock() 983de551246SJoel Becker * 984de551246SJoel Becker * So as you can see, we now have a window where l_lock is not held, 985de551246SJoel Becker * PENDING is not set, and ocfs2_dlm_lock() has not been called. 986de551246SJoel Becker * 987de551246SJoel Becker * The core problem is that ocfs2_cluster_lock() has cleared the PENDING 988de551246SJoel Becker * set by ocfs2_prepare_downconvert(). That wasn't nice. 989de551246SJoel Becker * 990de551246SJoel Becker * To solve this we introduce l_pending_gen. A call to 991de551246SJoel Becker * lockres_clear_pending() will only do so when it is passed a generation 992de551246SJoel Becker * number that matches the lockres. lockres_set_pending() will return the 993de551246SJoel Becker * current generation number. When ocfs2_cluster_lock() goes to clear 994de551246SJoel Becker * PENDING, it passes the generation it got from set_pending(). In our 995de551246SJoel Becker * example above, the generation numbers will *not* match. Thus, 996de551246SJoel Becker * ocfs2_cluster_lock() will not clear the PENDING set by 997de551246SJoel Becker * ocfs2_prepare_downconvert(). 998de551246SJoel Becker */ 999de551246SJoel Becker 1000de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */ 1001de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, 1002de551246SJoel Becker unsigned int generation, 1003de551246SJoel Becker struct ocfs2_super *osb) 1004de551246SJoel Becker { 1005de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1006de551246SJoel Becker 1007de551246SJoel Becker /* 1008de551246SJoel Becker * The ast and locking functions can race us here. The winner 1009de551246SJoel Becker * will clear pending, the loser will not. 1010de551246SJoel Becker */ 1011de551246SJoel Becker if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || 1012de551246SJoel Becker (lockres->l_pending_gen != generation)) 1013de551246SJoel Becker return; 1014de551246SJoel Becker 1015de551246SJoel Becker lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); 1016de551246SJoel Becker lockres->l_pending_gen++; 1017de551246SJoel Becker 1018de551246SJoel Becker /* 1019de551246SJoel Becker * The downconvert thread may have skipped us because we 1020de551246SJoel Becker * were PENDING. Wake it up. 1021de551246SJoel Becker */ 1022de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 1023de551246SJoel Becker ocfs2_wake_downconvert_thread(osb); 1024de551246SJoel Becker } 1025de551246SJoel Becker 1026de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */ 1027de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres, 1028de551246SJoel Becker unsigned int generation, 1029de551246SJoel Becker struct ocfs2_super *osb) 1030de551246SJoel Becker { 1031de551246SJoel Becker unsigned long flags; 1032de551246SJoel Becker 1033de551246SJoel Becker spin_lock_irqsave(&lockres->l_lock, flags); 1034de551246SJoel Becker __lockres_clear_pending(lockres, generation, osb); 1035de551246SJoel Becker spin_unlock_irqrestore(&lockres->l_lock, flags); 1036de551246SJoel Becker } 1037de551246SJoel Becker 1038de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) 1039de551246SJoel Becker { 1040de551246SJoel Becker assert_spin_locked(&lockres->l_lock); 1041de551246SJoel Becker BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 1042de551246SJoel Becker 1043de551246SJoel Becker lockres_or_flags(lockres, OCFS2_LOCK_PENDING); 1044de551246SJoel Becker 1045de551246SJoel Becker return lockres->l_pending_gen; 1046de551246SJoel Becker } 1047de551246SJoel Becker 1048de551246SJoel Becker 1049c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level) 1050ccd979bdSMark Fasheh { 1051a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1052aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1053ccd979bdSMark Fasheh int needs_downconvert; 1054ccd979bdSMark Fasheh unsigned long flags; 1055ccd979bdSMark Fasheh 1056bd3e7610SJoel Becker BUG_ON(level <= DLM_LOCK_NL); 1057ccd979bdSMark Fasheh 1058aa2623adSMark Fasheh mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 1059aa2623adSMark Fasheh lockres->l_name, level, lockres->l_level, 1060aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 1061aa2623adSMark Fasheh 1062cf8e06f1SMark Fasheh /* 1063cf8e06f1SMark Fasheh * We can skip the bast for locks which don't enable caching - 1064cf8e06f1SMark Fasheh * they'll be dropped at the earliest possible time anyway. 1065cf8e06f1SMark Fasheh */ 1066cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_NOCACHE) 1067cf8e06f1SMark Fasheh return; 1068cf8e06f1SMark Fasheh 1069ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1070ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 1071ccd979bdSMark Fasheh if (needs_downconvert) 1072ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 1073ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1074ccd979bdSMark Fasheh 1075d680efe9SMark Fasheh wake_up(&lockres->l_event); 1076d680efe9SMark Fasheh 107734d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1078ccd979bdSMark Fasheh } 1079ccd979bdSMark Fasheh 1080c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb) 1081ccd979bdSMark Fasheh { 1082a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 1083de551246SJoel Becker struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1084ccd979bdSMark Fasheh unsigned long flags; 10851693a5c0SDavid Teigland int status; 1086ccd979bdSMark Fasheh 1087ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1088ccd979bdSMark Fasheh 10891693a5c0SDavid Teigland status = ocfs2_dlm_lock_status(&lockres->l_lksb); 10901693a5c0SDavid Teigland 10911693a5c0SDavid Teigland if (status == -EAGAIN) { 10921693a5c0SDavid Teigland lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 10931693a5c0SDavid Teigland goto out; 10941693a5c0SDavid Teigland } 10951693a5c0SDavid Teigland 10961693a5c0SDavid Teigland if (status) { 10978f2c9c1bSJoel Becker mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", 10981693a5c0SDavid Teigland lockres->l_name, status); 1099ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1100ccd979bdSMark Fasheh return; 1101ccd979bdSMark Fasheh } 1102ccd979bdSMark Fasheh 1103ccd979bdSMark Fasheh switch(lockres->l_action) { 1104ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 1105ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 1106e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 1107ccd979bdSMark Fasheh break; 1108ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 1109ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 1110ccd979bdSMark Fasheh break; 1111ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 1112ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 1113ccd979bdSMark Fasheh break; 1114ccd979bdSMark Fasheh default: 1115e92d57dfSMark Fasheh mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 1116e92d57dfSMark Fasheh "lockres flags = 0x%lx, unlock action: %u\n", 1117e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 1118e92d57dfSMark Fasheh lockres->l_unlock_action); 1119ccd979bdSMark Fasheh BUG(); 1120ccd979bdSMark Fasheh } 11211693a5c0SDavid Teigland out: 1122ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 1123ccd979bdSMark Fasheh * can catch it. */ 1124ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1125ccd979bdSMark Fasheh 1126de551246SJoel Becker /* Did we try to cancel this lock? Clear that state */ 1127de551246SJoel Becker if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) 1128de551246SJoel Becker lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1129de551246SJoel Becker 1130de551246SJoel Becker /* 1131de551246SJoel Becker * We may have beaten the locking functions here. We certainly 1132de551246SJoel Becker * know that dlm_lock() has been called :-) 1133de551246SJoel Becker * Because we can't have two lock calls in flight at once, we 1134de551246SJoel Becker * can use lockres->l_pending_gen. 1135de551246SJoel Becker */ 1136de551246SJoel Becker __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); 1137de551246SJoel Becker 1138ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1139d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1140ccd979bdSMark Fasheh } 1141ccd979bdSMark Fasheh 1142ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 1143ccd979bdSMark Fasheh int convert) 1144ccd979bdSMark Fasheh { 1145ccd979bdSMark Fasheh unsigned long flags; 1146ccd979bdSMark Fasheh 1147ccd979bdSMark Fasheh mlog_entry_void(); 1148ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1149ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 1150a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1151ccd979bdSMark Fasheh if (convert) 1152ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 1153ccd979bdSMark Fasheh else 1154ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 1155ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1156ccd979bdSMark Fasheh 1157ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1158ccd979bdSMark Fasheh mlog_exit_void(); 1159ccd979bdSMark Fasheh } 1160ccd979bdSMark Fasheh 1161ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 1162ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 1163ccd979bdSMark Fasheh * to do the right thing in that case. 1164ccd979bdSMark Fasheh */ 1165ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 1166ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1167ccd979bdSMark Fasheh int level, 1168bd3e7610SJoel Becker u32 dlm_flags) 1169ccd979bdSMark Fasheh { 1170ccd979bdSMark Fasheh int ret = 0; 1171ccd979bdSMark Fasheh unsigned long flags; 1172de551246SJoel Becker unsigned int gen; 1173ccd979bdSMark Fasheh 1174ccd979bdSMark Fasheh mlog_entry_void(); 1175ccd979bdSMark Fasheh 1176bd3e7610SJoel Becker mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, 1177ccd979bdSMark Fasheh dlm_flags); 1178ccd979bdSMark Fasheh 1179ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1180ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 1181ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 1182ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1183ccd979bdSMark Fasheh goto bail; 1184ccd979bdSMark Fasheh } 1185ccd979bdSMark Fasheh 1186ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1187ccd979bdSMark Fasheh lockres->l_requested = level; 1188ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1189de551246SJoel Becker gen = lockres_set_pending(lockres); 1190ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1191ccd979bdSMark Fasheh 11924670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1193ccd979bdSMark Fasheh level, 1194ccd979bdSMark Fasheh &lockres->l_lksb, 1195ccd979bdSMark Fasheh dlm_flags, 1196ccd979bdSMark Fasheh lockres->l_name, 1197a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1198de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 11997431cd7eSJoel Becker if (ret) { 12007431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1201ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1202ccd979bdSMark Fasheh } 1203ccd979bdSMark Fasheh 12047431cd7eSJoel Becker mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); 1205ccd979bdSMark Fasheh 1206ccd979bdSMark Fasheh bail: 1207ccd979bdSMark Fasheh mlog_exit(ret); 1208ccd979bdSMark Fasheh return ret; 1209ccd979bdSMark Fasheh } 1210ccd979bdSMark Fasheh 1211ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 1212ccd979bdSMark Fasheh int flag) 1213ccd979bdSMark Fasheh { 1214ccd979bdSMark Fasheh unsigned long flags; 1215ccd979bdSMark Fasheh int ret; 1216ccd979bdSMark Fasheh 1217ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1218ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 1219ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1220ccd979bdSMark Fasheh 1221ccd979bdSMark Fasheh return ret; 1222ccd979bdSMark Fasheh } 1223ccd979bdSMark Fasheh 1224ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 1225ccd979bdSMark Fasheh 1226ccd979bdSMark Fasheh { 1227ccd979bdSMark Fasheh wait_event(lockres->l_event, 1228ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 1229ccd979bdSMark Fasheh } 1230ccd979bdSMark Fasheh 1231ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 1232ccd979bdSMark Fasheh 1233ccd979bdSMark Fasheh { 1234ccd979bdSMark Fasheh wait_event(lockres->l_event, 1235ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 1236ccd979bdSMark Fasheh } 1237ccd979bdSMark Fasheh 1238ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 1239ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 1240ccd979bdSMark Fasheh * level will be compatible with it. */ 1241ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 1242ccd979bdSMark Fasheh int wanted) 1243ccd979bdSMark Fasheh { 1244ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 1245ccd979bdSMark Fasheh 1246ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 1247ccd979bdSMark Fasheh } 1248ccd979bdSMark Fasheh 1249ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 1250ccd979bdSMark Fasheh { 1251ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 1252ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 12538ddb7b00SSunil Mushran ocfs2_init_start_time(mw); 1254ccd979bdSMark Fasheh } 1255ccd979bdSMark Fasheh 1256ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 1257ccd979bdSMark Fasheh { 1258ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 1259ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1260ccd979bdSMark Fasheh INIT_COMPLETION(mw->mw_complete); 1261ccd979bdSMark Fasheh return mw->mw_status; 1262ccd979bdSMark Fasheh } 1263ccd979bdSMark Fasheh 1264ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 1265ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 1266ccd979bdSMark Fasheh unsigned long mask, 1267ccd979bdSMark Fasheh unsigned long goal) 1268ccd979bdSMark Fasheh { 1269ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 1270ccd979bdSMark Fasheh 1271ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 1272ccd979bdSMark Fasheh 1273ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 1274ccd979bdSMark Fasheh mw->mw_mask = mask; 1275ccd979bdSMark Fasheh mw->mw_goal = goal; 1276ccd979bdSMark Fasheh } 1277ccd979bdSMark Fasheh 1278ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 1279ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 1280ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 1281ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 1282ccd979bdSMark Fasheh { 1283ccd979bdSMark Fasheh unsigned long flags; 1284ccd979bdSMark Fasheh int ret = 0; 1285ccd979bdSMark Fasheh 1286ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1287ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 1288ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 1289ccd979bdSMark Fasheh ret = -EBUSY; 1290ccd979bdSMark Fasheh 1291ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 1292ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 1293ccd979bdSMark Fasheh } 1294ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1295ccd979bdSMark Fasheh 1296ccd979bdSMark Fasheh return ret; 1297ccd979bdSMark Fasheh 1298ccd979bdSMark Fasheh } 1299ccd979bdSMark Fasheh 1300cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, 1301cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres) 1302cf8e06f1SMark Fasheh { 1303cf8e06f1SMark Fasheh int ret; 1304cf8e06f1SMark Fasheh 1305cf8e06f1SMark Fasheh ret = wait_for_completion_interruptible(&mw->mw_complete); 1306cf8e06f1SMark Fasheh if (ret) 1307cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, mw); 1308cf8e06f1SMark Fasheh else 1309cf8e06f1SMark Fasheh ret = mw->mw_status; 1310cf8e06f1SMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 1311cf8e06f1SMark Fasheh INIT_COMPLETION(mw->mw_complete); 1312cf8e06f1SMark Fasheh return ret; 1313cf8e06f1SMark Fasheh } 1314cf8e06f1SMark Fasheh 1315cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb, 1316ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1317ccd979bdSMark Fasheh int level, 1318bd3e7610SJoel Becker u32 lkm_flags, 1319cb25797dSJan Kara int arg_flags, 1320cb25797dSJan Kara int l_subclass, 1321cb25797dSJan Kara unsigned long caller_ip) 1322ccd979bdSMark Fasheh { 1323ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 1324ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1325ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 1326ccd979bdSMark Fasheh unsigned long flags; 1327de551246SJoel Becker unsigned int gen; 13281693a5c0SDavid Teigland int noqueue_attempted = 0; 1329ccd979bdSMark Fasheh 1330ccd979bdSMark Fasheh mlog_entry_void(); 1331ccd979bdSMark Fasheh 1332ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 1333ccd979bdSMark Fasheh 1334b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1335bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 1336b80fc012SMark Fasheh 1337ccd979bdSMark Fasheh again: 1338ccd979bdSMark Fasheh wait = 0; 1339ccd979bdSMark Fasheh 1340a1912826SSunil Mushran spin_lock_irqsave(&lockres->l_lock, flags); 1341a1912826SSunil Mushran 1342ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 1343ccd979bdSMark Fasheh ret = -ERESTARTSYS; 1344a1912826SSunil Mushran goto unlock; 1345ccd979bdSMark Fasheh } 1346ccd979bdSMark Fasheh 1347ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 1348ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 1349ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 1350ccd979bdSMark Fasheh 1351ccd979bdSMark Fasheh /* We only compare against the currently granted level 1352ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 1353ccd979bdSMark Fasheh * we'll get caught below. */ 1354ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 1355ccd979bdSMark Fasheh level > lockres->l_level) { 1356ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 1357ccd979bdSMark Fasheh * them. */ 1358ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1359ccd979bdSMark Fasheh wait = 1; 1360ccd979bdSMark Fasheh goto unlock; 1361ccd979bdSMark Fasheh } 1362ccd979bdSMark Fasheh 1363a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) { 1364a1912826SSunil Mushran /* 1365a1912826SSunil Mushran * We've upconverted. If the lock now has a level we can 1366a1912826SSunil Mushran * work with, we take it. If, however, the lock is not at the 1367a1912826SSunil Mushran * required level, we go thru the full cycle. One way this could 1368a1912826SSunil Mushran * happen is if a process requesting an upconvert to PR is 1369a1912826SSunil Mushran * closely followed by another requesting upconvert to an EX. 1370a1912826SSunil Mushran * If the process requesting EX lands here, we want it to 1371a1912826SSunil Mushran * continue attempting to upconvert and let the process 1372a1912826SSunil Mushran * requesting PR take the lock. 1373a1912826SSunil Mushran * If multiple processes request upconvert to PR, the first one 1374a1912826SSunil Mushran * here will take the lock. The others will have to go thru the 1375a1912826SSunil Mushran * OCFS2_LOCK_BLOCKED check to ensure that there is no pending 1376a1912826SSunil Mushran * downconvert request. 1377a1912826SSunil Mushran */ 1378a1912826SSunil Mushran if (level <= lockres->l_level) 1379a1912826SSunil Mushran goto update_holders; 1380a1912826SSunil Mushran } 1381a1912826SSunil Mushran 1382ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 1383ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 1384ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 1385ccd979bdSMark Fasheh * another node */ 1386ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 1387ccd979bdSMark Fasheh wait = 1; 1388ccd979bdSMark Fasheh goto unlock; 1389ccd979bdSMark Fasheh } 1390ccd979bdSMark Fasheh 1391ccd979bdSMark Fasheh if (level > lockres->l_level) { 13921693a5c0SDavid Teigland if (noqueue_attempted > 0) { 13931693a5c0SDavid Teigland ret = -EAGAIN; 13941693a5c0SDavid Teigland goto unlock; 13951693a5c0SDavid Teigland } 13961693a5c0SDavid Teigland if (lkm_flags & DLM_LKF_NOQUEUE) 13971693a5c0SDavid Teigland noqueue_attempted = 1; 13981693a5c0SDavid Teigland 1399ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 1400ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 1401ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 1402ccd979bdSMark Fasheh 1403019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1404019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 1405bd3e7610SJoel Becker lkm_flags &= ~DLM_LKF_CONVERT; 1406019d1b22SMark Fasheh } else { 1407ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1408bd3e7610SJoel Becker lkm_flags |= DLM_LKF_CONVERT; 1409019d1b22SMark Fasheh } 1410019d1b22SMark Fasheh 1411ccd979bdSMark Fasheh lockres->l_requested = level; 1412ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1413de551246SJoel Becker gen = lockres_set_pending(lockres); 1414ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1415ccd979bdSMark Fasheh 1416bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_IV); 1417bd3e7610SJoel Becker BUG_ON(level == DLM_LOCK_NL); 1418ccd979bdSMark Fasheh 1419ccd979bdSMark Fasheh mlog(0, "lock %s, convert from %d to level = %d\n", 1420ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1421ccd979bdSMark Fasheh 1422ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 14234670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 1424ccd979bdSMark Fasheh level, 1425ccd979bdSMark Fasheh &lockres->l_lksb, 1426019d1b22SMark Fasheh lkm_flags, 1427ccd979bdSMark Fasheh lockres->l_name, 1428a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 1429de551246SJoel Becker lockres_clear_pending(lockres, gen, osb); 14307431cd7eSJoel Becker if (ret) { 14317431cd7eSJoel Becker if (!(lkm_flags & DLM_LKF_NOQUEUE) || 14327431cd7eSJoel Becker (ret != -EAGAIN)) { 143324ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", 14347431cd7eSJoel Becker ret, lockres); 1435ccd979bdSMark Fasheh } 1436ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1437ccd979bdSMark Fasheh goto out; 1438ccd979bdSMark Fasheh } 1439ccd979bdSMark Fasheh 144073ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n", 1441ccd979bdSMark Fasheh lockres->l_name); 1442ccd979bdSMark Fasheh 1443ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1444ccd979bdSMark Fasheh * complete our work regardless. */ 1445ccd979bdSMark Fasheh catch_signals = 0; 1446ccd979bdSMark Fasheh 1447ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1448ccd979bdSMark Fasheh goto again; 1449ccd979bdSMark Fasheh } 1450ccd979bdSMark Fasheh 1451a1912826SSunil Mushran update_holders: 1452ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1453ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1454ccd979bdSMark Fasheh 1455ccd979bdSMark Fasheh ret = 0; 1456ccd979bdSMark Fasheh unlock: 1457a1912826SSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); 1458a1912826SSunil Mushran 1459ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1460ccd979bdSMark Fasheh out: 1461ccd979bdSMark Fasheh /* 1462ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1463ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1464ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1465ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1466ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1467ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1468ccd979bdSMark Fasheh */ 1469ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1470ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1471ccd979bdSMark Fasheh wait = 0; 1472ccd979bdSMark Fasheh if (lockres_remove_mask_waiter(lockres, &mw)) 1473ccd979bdSMark Fasheh ret = -EAGAIN; 1474ccd979bdSMark Fasheh else 1475ccd979bdSMark Fasheh goto again; 1476ccd979bdSMark Fasheh } 1477ccd979bdSMark Fasheh if (wait) { 1478ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1479ccd979bdSMark Fasheh if (ret == 0) 1480ccd979bdSMark Fasheh goto again; 1481ccd979bdSMark Fasheh mlog_errno(ret); 1482ccd979bdSMark Fasheh } 14838ddb7b00SSunil Mushran ocfs2_update_lock_stats(lockres, level, &mw, ret); 1484ccd979bdSMark Fasheh 1485cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1486cb25797dSJan Kara if (!ret && lockres->l_lockdep_map.key != NULL) { 1487cb25797dSJan Kara if (level == DLM_LOCK_PR) 1488cb25797dSJan Kara rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, 1489cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1490cb25797dSJan Kara caller_ip); 1491cb25797dSJan Kara else 1492cb25797dSJan Kara rwsem_acquire(&lockres->l_lockdep_map, l_subclass, 1493cb25797dSJan Kara !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), 1494cb25797dSJan Kara caller_ip); 1495cb25797dSJan Kara } 1496cb25797dSJan Kara #endif 1497ccd979bdSMark Fasheh mlog_exit(ret); 1498ccd979bdSMark Fasheh return ret; 1499ccd979bdSMark Fasheh } 1500ccd979bdSMark Fasheh 1501cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, 1502ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1503cb25797dSJan Kara int level, 1504cb25797dSJan Kara u32 lkm_flags, 1505cb25797dSJan Kara int arg_flags) 1506cb25797dSJan Kara { 1507cb25797dSJan Kara return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, 1508cb25797dSJan Kara 0, _RET_IP_); 1509cb25797dSJan Kara } 1510cb25797dSJan Kara 1511cb25797dSJan Kara 1512cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, 1513cb25797dSJan Kara struct ocfs2_lock_res *lockres, 1514cb25797dSJan Kara int level, 1515cb25797dSJan Kara unsigned long caller_ip) 1516ccd979bdSMark Fasheh { 1517ccd979bdSMark Fasheh unsigned long flags; 1518ccd979bdSMark Fasheh 1519ccd979bdSMark Fasheh mlog_entry_void(); 1520ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1521ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 152234d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1523ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1524cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC 1525cb25797dSJan Kara if (lockres->l_lockdep_map.key != NULL) 1526cb25797dSJan Kara rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); 1527cb25797dSJan Kara #endif 1528ccd979bdSMark Fasheh mlog_exit_void(); 1529ccd979bdSMark Fasheh } 1530ccd979bdSMark Fasheh 1531da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1532d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 153324c19ef4SMark Fasheh int ex, 153424c19ef4SMark Fasheh int local) 1535ccd979bdSMark Fasheh { 1536bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1537ccd979bdSMark Fasheh unsigned long flags; 1538bd3e7610SJoel Becker u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; 1539ccd979bdSMark Fasheh 1540ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1541ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1542ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1543ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1544ccd979bdSMark Fasheh 154524c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1546ccd979bdSMark Fasheh } 1547ccd979bdSMark Fasheh 1548ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1549ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1550ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1551ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1552ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1553ccd979bdSMark Fasheh * with creating a new lock resource. */ 1554ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1555ccd979bdSMark Fasheh { 1556ccd979bdSMark Fasheh int ret; 1557d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1558ccd979bdSMark Fasheh 1559ccd979bdSMark Fasheh BUG_ON(!inode); 1560ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1561ccd979bdSMark Fasheh 1562ccd979bdSMark Fasheh mlog_entry_void(); 1563ccd979bdSMark Fasheh 1564b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1565ccd979bdSMark Fasheh 1566ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1567ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1568ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1569ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1570ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1571ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1572ccd979bdSMark Fasheh * valid when we release the EX. */ 1573ccd979bdSMark Fasheh 157424c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1575ccd979bdSMark Fasheh if (ret) { 1576ccd979bdSMark Fasheh mlog_errno(ret); 1577ccd979bdSMark Fasheh goto bail; 1578ccd979bdSMark Fasheh } 1579ccd979bdSMark Fasheh 158024c19ef4SMark Fasheh /* 1581bd3e7610SJoel Becker * We don't want to use DLM_LKF_LOCAL on a meta data lock as they 158224c19ef4SMark Fasheh * don't use a generation in their lock names. 158324c19ef4SMark Fasheh */ 1584e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1585ccd979bdSMark Fasheh if (ret) { 1586ccd979bdSMark Fasheh mlog_errno(ret); 1587ccd979bdSMark Fasheh goto bail; 1588ccd979bdSMark Fasheh } 1589ccd979bdSMark Fasheh 159050008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 159150008630STiger Yang if (ret) { 159250008630STiger Yang mlog_errno(ret); 159350008630STiger Yang goto bail; 159450008630STiger Yang } 159550008630STiger Yang 1596ccd979bdSMark Fasheh bail: 1597ccd979bdSMark Fasheh mlog_exit(ret); 1598ccd979bdSMark Fasheh return ret; 1599ccd979bdSMark Fasheh } 1600ccd979bdSMark Fasheh 1601ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1602ccd979bdSMark Fasheh { 1603ccd979bdSMark Fasheh int status, level; 1604ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1605c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1606ccd979bdSMark Fasheh 1607ccd979bdSMark Fasheh BUG_ON(!inode); 1608ccd979bdSMark Fasheh 1609ccd979bdSMark Fasheh mlog_entry_void(); 1610ccd979bdSMark Fasheh 1611b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1612b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1613ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1614ccd979bdSMark Fasheh 1615d92bc512SColy Li if (ocfs2_mount_local(osb)) { 1616d92bc512SColy Li mlog_exit(0); 1617c271c5c2SSunil Mushran return 0; 1618d92bc512SColy Li } 1619c271c5c2SSunil Mushran 1620ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1621ccd979bdSMark Fasheh 1622bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1623ccd979bdSMark Fasheh 1624ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1625ccd979bdSMark Fasheh 0); 1626ccd979bdSMark Fasheh if (status < 0) 1627ccd979bdSMark Fasheh mlog_errno(status); 1628ccd979bdSMark Fasheh 1629ccd979bdSMark Fasheh mlog_exit(status); 1630ccd979bdSMark Fasheh return status; 1631ccd979bdSMark Fasheh } 1632ccd979bdSMark Fasheh 1633ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1634ccd979bdSMark Fasheh { 1635bd3e7610SJoel Becker int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 1636ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1637c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1638ccd979bdSMark Fasheh 1639ccd979bdSMark Fasheh mlog_entry_void(); 1640ccd979bdSMark Fasheh 1641b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1642b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1643ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1644ccd979bdSMark Fasheh 1645c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1646ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1647ccd979bdSMark Fasheh 1648ccd979bdSMark Fasheh mlog_exit_void(); 1649ccd979bdSMark Fasheh } 1650ccd979bdSMark Fasheh 165150008630STiger Yang /* 165250008630STiger Yang * ocfs2_open_lock always get PR mode lock. 165350008630STiger Yang */ 165450008630STiger Yang int ocfs2_open_lock(struct inode *inode) 165550008630STiger Yang { 165650008630STiger Yang int status = 0; 165750008630STiger Yang struct ocfs2_lock_res *lockres; 165850008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 165950008630STiger Yang 166050008630STiger Yang BUG_ON(!inode); 166150008630STiger Yang 166250008630STiger Yang mlog_entry_void(); 166350008630STiger Yang 166450008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 166550008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 166650008630STiger Yang 166750008630STiger Yang if (ocfs2_mount_local(osb)) 166850008630STiger Yang goto out; 166950008630STiger Yang 167050008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 167150008630STiger Yang 167250008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1673bd3e7610SJoel Becker DLM_LOCK_PR, 0, 0); 167450008630STiger Yang if (status < 0) 167550008630STiger Yang mlog_errno(status); 167650008630STiger Yang 167750008630STiger Yang out: 167850008630STiger Yang mlog_exit(status); 167950008630STiger Yang return status; 168050008630STiger Yang } 168150008630STiger Yang 168250008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 168350008630STiger Yang { 168450008630STiger Yang int status = 0, level; 168550008630STiger Yang struct ocfs2_lock_res *lockres; 168650008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 168750008630STiger Yang 168850008630STiger Yang BUG_ON(!inode); 168950008630STiger Yang 169050008630STiger Yang mlog_entry_void(); 169150008630STiger Yang 169250008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 169350008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 169450008630STiger Yang write ? "EXMODE" : "PRMODE"); 169550008630STiger Yang 169650008630STiger Yang if (ocfs2_mount_local(osb)) 169750008630STiger Yang goto out; 169850008630STiger Yang 169950008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 170050008630STiger Yang 1701bd3e7610SJoel Becker level = write ? DLM_LOCK_EX : DLM_LOCK_PR; 170250008630STiger Yang 170350008630STiger Yang /* 170450008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 1705bd3e7610SJoel Becker * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on 170650008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 170750008630STiger Yang * this inode is still in use. 170850008630STiger Yang */ 170950008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 1710bd3e7610SJoel Becker level, DLM_LKF_NOQUEUE, 0); 171150008630STiger Yang 171250008630STiger Yang out: 171350008630STiger Yang mlog_exit(status); 171450008630STiger Yang return status; 171550008630STiger Yang } 171650008630STiger Yang 171750008630STiger Yang /* 171850008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 171950008630STiger Yang */ 172050008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 172150008630STiger Yang { 172250008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 172350008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 172450008630STiger Yang 172550008630STiger Yang mlog_entry_void(); 172650008630STiger Yang 172750008630STiger Yang mlog(0, "inode %llu drop open lock\n", 172850008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 172950008630STiger Yang 173050008630STiger Yang if (ocfs2_mount_local(osb)) 173150008630STiger Yang goto out; 173250008630STiger Yang 173350008630STiger Yang if(lockres->l_ro_holders) 173450008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1735bd3e7610SJoel Becker DLM_LOCK_PR); 173650008630STiger Yang if(lockres->l_ex_holders) 173750008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 1738bd3e7610SJoel Becker DLM_LOCK_EX); 173950008630STiger Yang 174050008630STiger Yang out: 174150008630STiger Yang mlog_exit_void(); 174250008630STiger Yang } 174350008630STiger Yang 1744cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, 1745cf8e06f1SMark Fasheh int level) 1746cf8e06f1SMark Fasheh { 1747cf8e06f1SMark Fasheh int ret; 1748cf8e06f1SMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 1749cf8e06f1SMark Fasheh unsigned long flags; 1750cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1751cf8e06f1SMark Fasheh 1752cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1753cf8e06f1SMark Fasheh 1754cf8e06f1SMark Fasheh retry_cancel: 1755cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1756cf8e06f1SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 1757cf8e06f1SMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 1758cf8e06f1SMark Fasheh if (ret) { 1759cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1760cf8e06f1SMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 1761cf8e06f1SMark Fasheh if (ret < 0) { 1762cf8e06f1SMark Fasheh mlog_errno(ret); 1763cf8e06f1SMark Fasheh goto out; 1764cf8e06f1SMark Fasheh } 1765cf8e06f1SMark Fasheh goto retry_cancel; 1766cf8e06f1SMark Fasheh } 1767cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1768cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1769cf8e06f1SMark Fasheh 1770cf8e06f1SMark Fasheh ocfs2_wait_for_mask(&mw); 1771cf8e06f1SMark Fasheh goto retry_cancel; 1772cf8e06f1SMark Fasheh } 1773cf8e06f1SMark Fasheh 1774cf8e06f1SMark Fasheh ret = -ERESTARTSYS; 1775cf8e06f1SMark Fasheh /* 1776cf8e06f1SMark Fasheh * We may still have gotten the lock, in which case there's no 1777cf8e06f1SMark Fasheh * point to restarting the syscall. 1778cf8e06f1SMark Fasheh */ 1779cf8e06f1SMark Fasheh if (lockres->l_level == level) 1780cf8e06f1SMark Fasheh ret = 0; 1781cf8e06f1SMark Fasheh 1782cf8e06f1SMark Fasheh mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, 1783cf8e06f1SMark Fasheh lockres->l_flags, lockres->l_level, lockres->l_action); 1784cf8e06f1SMark Fasheh 1785cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1786cf8e06f1SMark Fasheh 1787cf8e06f1SMark Fasheh out: 1788cf8e06f1SMark Fasheh return ret; 1789cf8e06f1SMark Fasheh } 1790cf8e06f1SMark Fasheh 1791cf8e06f1SMark Fasheh /* 1792cf8e06f1SMark Fasheh * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of 1793cf8e06f1SMark Fasheh * flock() calls. The locking approach this requires is sufficiently 1794cf8e06f1SMark Fasheh * different from all other cluster lock types that we implement a 1795cf8e06f1SMark Fasheh * seperate path to the "low-level" dlm calls. In particular: 1796cf8e06f1SMark Fasheh * 1797cf8e06f1SMark Fasheh * - No optimization of lock levels is done - we take at exactly 1798cf8e06f1SMark Fasheh * what's been requested. 1799cf8e06f1SMark Fasheh * 1800cf8e06f1SMark Fasheh * - No lock caching is employed. We immediately downconvert to 1801cf8e06f1SMark Fasheh * no-lock at unlock time. This also means flock locks never go on 1802cf8e06f1SMark Fasheh * the blocking list). 1803cf8e06f1SMark Fasheh * 1804cf8e06f1SMark Fasheh * - Since userspace can trivially deadlock itself with flock, we make 1805cf8e06f1SMark Fasheh * sure to allow cancellation of a misbehaving applications flock() 1806cf8e06f1SMark Fasheh * request. 1807cf8e06f1SMark Fasheh * 1808cf8e06f1SMark Fasheh * - Access to any flock lockres doesn't require concurrency, so we 1809cf8e06f1SMark Fasheh * can simplify the code by requiring the caller to guarantee 1810cf8e06f1SMark Fasheh * serialization of dlmglue flock calls. 1811cf8e06f1SMark Fasheh */ 1812cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock) 1813cf8e06f1SMark Fasheh { 1814e988cf1cSMark Fasheh int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 1815e988cf1cSMark Fasheh unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0; 1816cf8e06f1SMark Fasheh unsigned long flags; 1817cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1818cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1819cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1820cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1821cf8e06f1SMark Fasheh 1822cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1823cf8e06f1SMark Fasheh 1824cf8e06f1SMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_BUSY) || 1825bd3e7610SJoel Becker (lockres->l_level > DLM_LOCK_NL)) { 1826cf8e06f1SMark Fasheh mlog(ML_ERROR, 1827cf8e06f1SMark Fasheh "File lock \"%s\" has busy or locked state: flags: 0x%lx, " 1828cf8e06f1SMark Fasheh "level: %u\n", lockres->l_name, lockres->l_flags, 1829cf8e06f1SMark Fasheh lockres->l_level); 1830cf8e06f1SMark Fasheh return -EINVAL; 1831cf8e06f1SMark Fasheh } 1832cf8e06f1SMark Fasheh 1833cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1834cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 1835cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1836cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1837cf8e06f1SMark Fasheh 1838cf8e06f1SMark Fasheh /* 1839cf8e06f1SMark Fasheh * Get the lock at NLMODE to start - that way we 1840cf8e06f1SMark Fasheh * can cancel the upconvert request if need be. 1841cf8e06f1SMark Fasheh */ 1842e988cf1cSMark Fasheh ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0); 1843cf8e06f1SMark Fasheh if (ret < 0) { 1844cf8e06f1SMark Fasheh mlog_errno(ret); 1845cf8e06f1SMark Fasheh goto out; 1846cf8e06f1SMark Fasheh } 1847cf8e06f1SMark Fasheh 1848cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1849cf8e06f1SMark Fasheh if (ret) { 1850cf8e06f1SMark Fasheh mlog_errno(ret); 1851cf8e06f1SMark Fasheh goto out; 1852cf8e06f1SMark Fasheh } 1853cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1854cf8e06f1SMark Fasheh } 1855cf8e06f1SMark Fasheh 1856cf8e06f1SMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 1857e988cf1cSMark Fasheh lkm_flags |= DLM_LKF_CONVERT; 1858cf8e06f1SMark Fasheh lockres->l_requested = level; 1859cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 1860cf8e06f1SMark Fasheh 1861cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1862cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1863cf8e06f1SMark Fasheh 18644670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, 1865a796d286SJoel Becker lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1); 18667431cd7eSJoel Becker if (ret) { 18677431cd7eSJoel Becker if (!trylock || (ret != -EAGAIN)) { 186824ef1815SJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 1869cf8e06f1SMark Fasheh ret = -EINVAL; 1870cf8e06f1SMark Fasheh } 1871cf8e06f1SMark Fasheh 1872cf8e06f1SMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1873cf8e06f1SMark Fasheh lockres_remove_mask_waiter(lockres, &mw); 1874cf8e06f1SMark Fasheh goto out; 1875cf8e06f1SMark Fasheh } 1876cf8e06f1SMark Fasheh 1877cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); 1878cf8e06f1SMark Fasheh if (ret == -ERESTARTSYS) { 1879cf8e06f1SMark Fasheh /* 1880cf8e06f1SMark Fasheh * Userspace can cause deadlock itself with 1881cf8e06f1SMark Fasheh * flock(). Current behavior locally is to allow the 1882cf8e06f1SMark Fasheh * deadlock, but abort the system call if a signal is 1883cf8e06f1SMark Fasheh * received. We follow this example, otherwise a 1884cf8e06f1SMark Fasheh * poorly written program could sit in kernel until 1885cf8e06f1SMark Fasheh * reboot. 1886cf8e06f1SMark Fasheh * 1887cf8e06f1SMark Fasheh * Handling this is a bit more complicated for Ocfs2 1888cf8e06f1SMark Fasheh * though. We can't exit this function with an 1889cf8e06f1SMark Fasheh * outstanding lock request, so a cancel convert is 1890cf8e06f1SMark Fasheh * required. We intentionally overwrite 'ret' - if the 1891cf8e06f1SMark Fasheh * cancel fails and the lock was granted, it's easier 1892af901ca1SAndré Goddard Rosa * to just bubble success back up to the user. 1893cf8e06f1SMark Fasheh */ 1894cf8e06f1SMark Fasheh ret = ocfs2_flock_handle_signal(lockres, level); 18951693a5c0SDavid Teigland } else if (!ret && (level > lockres->l_level)) { 18961693a5c0SDavid Teigland /* Trylock failed asynchronously */ 18971693a5c0SDavid Teigland BUG_ON(!trylock); 18981693a5c0SDavid Teigland ret = -EAGAIN; 1899cf8e06f1SMark Fasheh } 1900cf8e06f1SMark Fasheh 1901cf8e06f1SMark Fasheh out: 1902cf8e06f1SMark Fasheh 1903cf8e06f1SMark Fasheh mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", 1904cf8e06f1SMark Fasheh lockres->l_name, ex, trylock, ret); 1905cf8e06f1SMark Fasheh return ret; 1906cf8e06f1SMark Fasheh } 1907cf8e06f1SMark Fasheh 1908cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file) 1909cf8e06f1SMark Fasheh { 1910cf8e06f1SMark Fasheh int ret; 1911de551246SJoel Becker unsigned int gen; 1912cf8e06f1SMark Fasheh unsigned long flags; 1913cf8e06f1SMark Fasheh struct ocfs2_file_private *fp = file->private_data; 1914cf8e06f1SMark Fasheh struct ocfs2_lock_res *lockres = &fp->fp_flock; 1915cf8e06f1SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); 1916cf8e06f1SMark Fasheh struct ocfs2_mask_waiter mw; 1917cf8e06f1SMark Fasheh 1918cf8e06f1SMark Fasheh ocfs2_init_mask_waiter(&mw); 1919cf8e06f1SMark Fasheh 1920cf8e06f1SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) 1921cf8e06f1SMark Fasheh return; 1922cf8e06f1SMark Fasheh 1923e988cf1cSMark Fasheh if (lockres->l_level == DLM_LOCK_NL) 1924cf8e06f1SMark Fasheh return; 1925cf8e06f1SMark Fasheh 1926cf8e06f1SMark Fasheh mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", 1927cf8e06f1SMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_level, 1928cf8e06f1SMark Fasheh lockres->l_action); 1929cf8e06f1SMark Fasheh 1930cf8e06f1SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1931cf8e06f1SMark Fasheh /* 1932cf8e06f1SMark Fasheh * Fake a blocking ast for the downconvert code. 1933cf8e06f1SMark Fasheh */ 1934cf8e06f1SMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 1935bd3e7610SJoel Becker lockres->l_blocking = DLM_LOCK_EX; 1936cf8e06f1SMark Fasheh 1937e988cf1cSMark Fasheh gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL); 1938cf8e06f1SMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 1939cf8e06f1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1940cf8e06f1SMark Fasheh 1941e988cf1cSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen); 1942cf8e06f1SMark Fasheh if (ret) { 1943cf8e06f1SMark Fasheh mlog_errno(ret); 1944cf8e06f1SMark Fasheh return; 1945cf8e06f1SMark Fasheh } 1946cf8e06f1SMark Fasheh 1947cf8e06f1SMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1948cf8e06f1SMark Fasheh if (ret) 1949cf8e06f1SMark Fasheh mlog_errno(ret); 1950cf8e06f1SMark Fasheh } 1951cf8e06f1SMark Fasheh 195234d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1953ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 1954ccd979bdSMark Fasheh { 1955ccd979bdSMark Fasheh int kick = 0; 1956ccd979bdSMark Fasheh 1957ccd979bdSMark Fasheh mlog_entry_void(); 1958ccd979bdSMark Fasheh 1959ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 196034d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 1961ccd979bdSMark Fasheh * condition. */ 1962ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1963ccd979bdSMark Fasheh switch(lockres->l_blocking) { 1964bd3e7610SJoel Becker case DLM_LOCK_EX: 1965ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1966ccd979bdSMark Fasheh kick = 1; 1967ccd979bdSMark Fasheh break; 1968bd3e7610SJoel Becker case DLM_LOCK_PR: 1969ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 1970ccd979bdSMark Fasheh kick = 1; 1971ccd979bdSMark Fasheh break; 1972ccd979bdSMark Fasheh default: 1973ccd979bdSMark Fasheh BUG(); 1974ccd979bdSMark Fasheh } 1975ccd979bdSMark Fasheh } 1976ccd979bdSMark Fasheh 1977ccd979bdSMark Fasheh if (kick) 197834d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1979ccd979bdSMark Fasheh 1980ccd979bdSMark Fasheh mlog_exit_void(); 1981ccd979bdSMark Fasheh } 1982ccd979bdSMark Fasheh 1983ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 1984ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 1985ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1986ccd979bdSMark Fasheh 1987ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 1988ccd979bdSMark Fasheh * now. */ 1989ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 1990ccd979bdSMark Fasheh { 1991ccd979bdSMark Fasheh u64 res; 1992ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 1993ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 1994ccd979bdSMark Fasheh 1995ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1996ccd979bdSMark Fasheh 1997ccd979bdSMark Fasheh return res; 1998ccd979bdSMark Fasheh } 1999ccd979bdSMark Fasheh 2000ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 2001ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 2002e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 2003ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 2004ccd979bdSMark Fasheh { 2005ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2006e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2007ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2008ccd979bdSMark Fasheh 2009ccd979bdSMark Fasheh mlog_entry_void(); 2010ccd979bdSMark Fasheh 2011a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2012ccd979bdSMark Fasheh 201324c19ef4SMark Fasheh /* 201424c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 201524c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 201624c19ef4SMark Fasheh * status. 201724c19ef4SMark Fasheh */ 201824c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 201924c19ef4SMark Fasheh lvb->lvb_version = 0; 202024c19ef4SMark Fasheh goto out; 202124c19ef4SMark Fasheh } 202224c19ef4SMark Fasheh 20234d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 2024ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2025ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 2026ccd979bdSMark Fasheh lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 2027ccd979bdSMark Fasheh lvb->lvb_igid = cpu_to_be32(inode->i_gid); 2028ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2029ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2030ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 2031ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 2032ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 2033ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 2034ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 2035ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 2036ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 203715b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 2038f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 2039ccd979bdSMark Fasheh 204024c19ef4SMark Fasheh out: 2041ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2042ccd979bdSMark Fasheh 2043ccd979bdSMark Fasheh mlog_exit_void(); 2044ccd979bdSMark Fasheh } 2045ccd979bdSMark Fasheh 2046ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 2047ccd979bdSMark Fasheh u64 packed_time) 2048ccd979bdSMark Fasheh { 2049ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 2050ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 2051ccd979bdSMark Fasheh } 2052ccd979bdSMark Fasheh 2053ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 2054ccd979bdSMark Fasheh { 2055ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2056e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2057ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 2058ccd979bdSMark Fasheh 2059ccd979bdSMark Fasheh mlog_entry_void(); 2060ccd979bdSMark Fasheh 2061ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 2062ccd979bdSMark Fasheh 2063a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2064ccd979bdSMark Fasheh 2065ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 2066ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2067ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 2068ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 2069ccd979bdSMark Fasheh 2070ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 207115b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 2072ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 2073ca4d147eSHerbert Poetzl 2074ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 2075ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 2076ccd979bdSMark Fasheh inode->i_blocks = 0; 2077ccd979bdSMark Fasheh else 20788110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 2079ccd979bdSMark Fasheh 2080ccd979bdSMark Fasheh inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2081ccd979bdSMark Fasheh inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2082ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2083ccd979bdSMark Fasheh inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 2084ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 2085ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 2086ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 2087ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 2088ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 2089ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 2090ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2091ccd979bdSMark Fasheh 2092ccd979bdSMark Fasheh mlog_exit_void(); 2093ccd979bdSMark Fasheh } 2094ccd979bdSMark Fasheh 2095f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 2096f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 2097ccd979bdSMark Fasheh { 2098a641dc2aSMark Fasheh struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2099ccd979bdSMark Fasheh 21001c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) 21011c520dfbSJoel Becker && lvb->lvb_version == OCFS2_LVB_VERSION 2102f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2103ccd979bdSMark Fasheh return 1; 2104ccd979bdSMark Fasheh return 0; 2105ccd979bdSMark Fasheh } 2106ccd979bdSMark Fasheh 2107ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 2108ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 2109ccd979bdSMark Fasheh * 2110ccd979bdSMark Fasheh * 0 means no refresh needed. 2111ccd979bdSMark Fasheh * 2112ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 2113ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 2114ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 2115ccd979bdSMark Fasheh { 2116ccd979bdSMark Fasheh unsigned long flags; 2117ccd979bdSMark Fasheh int status = 0; 2118ccd979bdSMark Fasheh 2119ccd979bdSMark Fasheh mlog_entry_void(); 2120ccd979bdSMark Fasheh 2121ccd979bdSMark Fasheh refresh_check: 2122ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2123ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 2124ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2125ccd979bdSMark Fasheh goto bail; 2126ccd979bdSMark Fasheh } 2127ccd979bdSMark Fasheh 2128ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 2129ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2130ccd979bdSMark Fasheh 2131ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 2132ccd979bdSMark Fasheh goto refresh_check; 2133ccd979bdSMark Fasheh } 2134ccd979bdSMark Fasheh 2135ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 2136ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 2137ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2138ccd979bdSMark Fasheh 2139ccd979bdSMark Fasheh status = 1; 2140ccd979bdSMark Fasheh bail: 2141ccd979bdSMark Fasheh mlog_exit(status); 2142ccd979bdSMark Fasheh return status; 2143ccd979bdSMark Fasheh } 2144ccd979bdSMark Fasheh 2145ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 2146ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 2147ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 2148ccd979bdSMark Fasheh int status) 2149ccd979bdSMark Fasheh { 2150ccd979bdSMark Fasheh unsigned long flags; 2151ccd979bdSMark Fasheh mlog_entry_void(); 2152ccd979bdSMark Fasheh 2153ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2154ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 2155ccd979bdSMark Fasheh if (!status) 2156ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 2157ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2158ccd979bdSMark Fasheh 2159ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2160ccd979bdSMark Fasheh 2161ccd979bdSMark Fasheh mlog_exit_void(); 2162ccd979bdSMark Fasheh } 2163ccd979bdSMark Fasheh 2164ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 2165e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 2166ccd979bdSMark Fasheh struct buffer_head **bh) 2167ccd979bdSMark Fasheh { 2168ccd979bdSMark Fasheh int status = 0; 2169ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 2170e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 2171ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 2172c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2173ccd979bdSMark Fasheh 2174ccd979bdSMark Fasheh mlog_entry_void(); 2175ccd979bdSMark Fasheh 2176be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 2177be9e986bSMark Fasheh goto bail; 2178be9e986bSMark Fasheh 2179ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 2180ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 2181b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 2182ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 2183b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 2184ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2185ccd979bdSMark Fasheh status = -ENOENT; 2186ccd979bdSMark Fasheh goto bail; 2187ccd979bdSMark Fasheh } 2188ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 2189ccd979bdSMark Fasheh 2190ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 2191ccd979bdSMark Fasheh goto bail; 2192ccd979bdSMark Fasheh 2193ccd979bdSMark Fasheh /* This will discard any caching information we might have had 2194ccd979bdSMark Fasheh * for the inode metadata. */ 21958cb471e8SJoel Becker ocfs2_metadata_cache_purge(INODE_CACHE(inode)); 2196ccd979bdSMark Fasheh 219783418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 219883418978SMark Fasheh 2199be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 2200b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 2201b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 2202ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 2203ccd979bdSMark Fasheh } else { 2204ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 2205ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 2206b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, bh); 2207ccd979bdSMark Fasheh if (status < 0) { 2208ccd979bdSMark Fasheh mlog_errno(status); 2209ccd979bdSMark Fasheh goto bail_refresh; 2210ccd979bdSMark Fasheh } 2211ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 2212ccd979bdSMark Fasheh 2213ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 2214b657c95cSJoel Becker * locking an invalid object. ocfs2_read_inode_block() 2215b657c95cSJoel Becker * already checked that the inode block is sane. 2216ccd979bdSMark Fasheh * 2217ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 2218ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 2219ccd979bdSMark Fasheh * node provides a guarantee that we receive that 2220ccd979bdSMark Fasheh * message and can mark the inode before dropping any 2221ccd979bdSMark Fasheh * locks associated with it. */ 2222ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 2223ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 2224b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 2225ccd979bdSMark Fasheh "inode->i_generation: %u\n", 2226b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2227b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 2228ccd979bdSMark Fasheh inode->i_generation); 2229ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 2230ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 2231b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 2232b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 2233b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 2234ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 2235ccd979bdSMark Fasheh 2236ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 22378ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2238ccd979bdSMark Fasheh } 2239ccd979bdSMark Fasheh 2240ccd979bdSMark Fasheh status = 0; 2241ccd979bdSMark Fasheh bail_refresh: 2242ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2243ccd979bdSMark Fasheh bail: 2244ccd979bdSMark Fasheh mlog_exit(status); 2245ccd979bdSMark Fasheh return status; 2246ccd979bdSMark Fasheh } 2247ccd979bdSMark Fasheh 2248ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 2249ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2250ccd979bdSMark Fasheh struct buffer_head *passed_bh) 2251ccd979bdSMark Fasheh { 2252ccd979bdSMark Fasheh int status; 2253ccd979bdSMark Fasheh 2254ccd979bdSMark Fasheh if (passed_bh) { 2255ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 2256ccd979bdSMark Fasheh * returned bh. */ 2257ccd979bdSMark Fasheh *ret_bh = passed_bh; 2258ccd979bdSMark Fasheh get_bh(*ret_bh); 2259ccd979bdSMark Fasheh 2260ccd979bdSMark Fasheh return 0; 2261ccd979bdSMark Fasheh } 2262ccd979bdSMark Fasheh 2263b657c95cSJoel Becker status = ocfs2_read_inode_block(inode, ret_bh); 2264ccd979bdSMark Fasheh if (status < 0) 2265ccd979bdSMark Fasheh mlog_errno(status); 2266ccd979bdSMark Fasheh 2267ccd979bdSMark Fasheh return status; 2268ccd979bdSMark Fasheh } 2269ccd979bdSMark Fasheh 2270ccd979bdSMark Fasheh /* 2271ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 2272ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 2273ccd979bdSMark Fasheh */ 2274cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode, 2275ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2276ccd979bdSMark Fasheh int ex, 2277cb25797dSJan Kara int arg_flags, 2278cb25797dSJan Kara int subclass) 2279ccd979bdSMark Fasheh { 2280bd3e7610SJoel Becker int status, level, acquired; 2281bd3e7610SJoel Becker u32 dlm_flags; 2282c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 2283ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2284ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 2285ccd979bdSMark Fasheh 2286ccd979bdSMark Fasheh BUG_ON(!inode); 2287ccd979bdSMark Fasheh 2288ccd979bdSMark Fasheh mlog_entry_void(); 2289ccd979bdSMark Fasheh 2290b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 2291b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2292ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2293ccd979bdSMark Fasheh 2294ccd979bdSMark Fasheh status = 0; 2295ccd979bdSMark Fasheh acquired = 0; 2296ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 2297ccd979bdSMark Fasheh * rodevices. */ 2298ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 2299ccd979bdSMark Fasheh if (ex) 2300ccd979bdSMark Fasheh status = -EROFS; 2301ccd979bdSMark Fasheh goto bail; 2302ccd979bdSMark Fasheh } 2303ccd979bdSMark Fasheh 2304c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2305c271c5c2SSunil Mushran goto local; 2306c271c5c2SSunil Mushran 2307ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2308553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2309ccd979bdSMark Fasheh 2310e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 2311bd3e7610SJoel Becker level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2312ccd979bdSMark Fasheh dlm_flags = 0; 2313ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2314bd3e7610SJoel Becker dlm_flags |= DLM_LKF_NOQUEUE; 2315ccd979bdSMark Fasheh 2316cb25797dSJan Kara status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, 2317cb25797dSJan Kara arg_flags, subclass, _RET_IP_); 2318ccd979bdSMark Fasheh if (status < 0) { 2319ccd979bdSMark Fasheh if (status != -EAGAIN && status != -EIOCBRETRY) 2320ccd979bdSMark Fasheh mlog_errno(status); 2321ccd979bdSMark Fasheh goto bail; 2322ccd979bdSMark Fasheh } 2323ccd979bdSMark Fasheh 2324ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 2325ccd979bdSMark Fasheh acquired = 1; 2326ccd979bdSMark Fasheh 2327ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 2328ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 2329ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 2330ccd979bdSMark Fasheh * abort the operation. */ 2331ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 2332553abd04SJoel Becker ocfs2_wait_for_recovery(osb); 2333ccd979bdSMark Fasheh 2334c271c5c2SSunil Mushran local: 233524c19ef4SMark Fasheh /* 233624c19ef4SMark Fasheh * We only see this flag if we're being called from 233724c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 233824c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 233924c19ef4SMark Fasheh * and let the caller handle it. 234024c19ef4SMark Fasheh */ 234124c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 234224c19ef4SMark Fasheh status = 0; 2343c271c5c2SSunil Mushran if (lockres) 234424c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 234524c19ef4SMark Fasheh goto bail; 234624c19ef4SMark Fasheh } 234724c19ef4SMark Fasheh 2348ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 2349e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 2350ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 2351ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 2352ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 2353e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 2354ccd979bdSMark Fasheh if (status < 0) { 2355ccd979bdSMark Fasheh if (status != -ENOENT) 2356ccd979bdSMark Fasheh mlog_errno(status); 2357ccd979bdSMark Fasheh goto bail; 2358ccd979bdSMark Fasheh } 2359ccd979bdSMark Fasheh 2360ccd979bdSMark Fasheh if (ret_bh) { 2361ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 2362ccd979bdSMark Fasheh if (status < 0) { 2363ccd979bdSMark Fasheh mlog_errno(status); 2364ccd979bdSMark Fasheh goto bail; 2365ccd979bdSMark Fasheh } 2366ccd979bdSMark Fasheh } 2367ccd979bdSMark Fasheh 2368ccd979bdSMark Fasheh bail: 2369ccd979bdSMark Fasheh if (status < 0) { 2370ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 2371ccd979bdSMark Fasheh brelse(*ret_bh); 2372ccd979bdSMark Fasheh *ret_bh = NULL; 2373ccd979bdSMark Fasheh } 2374ccd979bdSMark Fasheh if (acquired) 2375e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2376ccd979bdSMark Fasheh } 2377ccd979bdSMark Fasheh 2378ccd979bdSMark Fasheh if (local_bh) 2379ccd979bdSMark Fasheh brelse(local_bh); 2380ccd979bdSMark Fasheh 2381ccd979bdSMark Fasheh mlog_exit(status); 2382ccd979bdSMark Fasheh return status; 2383ccd979bdSMark Fasheh } 2384ccd979bdSMark Fasheh 2385ccd979bdSMark Fasheh /* 238634d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 238734d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 238834d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 2389ccd979bdSMark Fasheh * 2390ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 2391ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 2392ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 2393ccd979bdSMark Fasheh * 239434d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 239534d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 239634d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 239734d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 239834d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 239934d024f8SMark Fasheh * immediately retry the aop call. 2400ccd979bdSMark Fasheh * 2401ccd979bdSMark Fasheh * We do a blocking lock and immediate unlock before returning, though, so that 2402ccd979bdSMark Fasheh * the lock has a great chance of being cached on this node by the time the VFS 2403ccd979bdSMark Fasheh * calls back to retry the aop. This has a potential to livelock as nodes 2404ccd979bdSMark Fasheh * ping locks back and forth, but that's a risk we're willing to take to avoid 2405ccd979bdSMark Fasheh * the lock inversion simply. 2406ccd979bdSMark Fasheh */ 2407e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 2408ccd979bdSMark Fasheh struct buffer_head **ret_bh, 2409ccd979bdSMark Fasheh int ex, 2410ccd979bdSMark Fasheh struct page *page) 2411ccd979bdSMark Fasheh { 2412ccd979bdSMark Fasheh int ret; 2413ccd979bdSMark Fasheh 2414e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 2415ccd979bdSMark Fasheh if (ret == -EAGAIN) { 2416ccd979bdSMark Fasheh unlock_page(page); 2417e63aecb6SMark Fasheh if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 2418e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 2419ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 2420ccd979bdSMark Fasheh } 2421ccd979bdSMark Fasheh 2422ccd979bdSMark Fasheh return ret; 2423ccd979bdSMark Fasheh } 2424ccd979bdSMark Fasheh 2425e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 24267f1a37e3STiger Yang struct vfsmount *vfsmnt, 24277f1a37e3STiger Yang int *level) 24287f1a37e3STiger Yang { 24297f1a37e3STiger Yang int ret; 24307f1a37e3STiger Yang 24317f1a37e3STiger Yang mlog_entry_void(); 2432e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 24337f1a37e3STiger Yang if (ret < 0) { 24347f1a37e3STiger Yang mlog_errno(ret); 24357f1a37e3STiger Yang return ret; 24367f1a37e3STiger Yang } 24377f1a37e3STiger Yang 24387f1a37e3STiger Yang /* 24397f1a37e3STiger Yang * If we should update atime, we will get EX lock, 24407f1a37e3STiger Yang * otherwise we just get PR lock. 24417f1a37e3STiger Yang */ 24427f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 24437f1a37e3STiger Yang struct buffer_head *bh = NULL; 24447f1a37e3STiger Yang 2445e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 2446e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 24477f1a37e3STiger Yang if (ret < 0) { 24487f1a37e3STiger Yang mlog_errno(ret); 24497f1a37e3STiger Yang return ret; 24507f1a37e3STiger Yang } 24517f1a37e3STiger Yang *level = 1; 24527f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 24537f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 24547f1a37e3STiger Yang if (bh) 24557f1a37e3STiger Yang brelse(bh); 24567f1a37e3STiger Yang } else 24577f1a37e3STiger Yang *level = 0; 24587f1a37e3STiger Yang 24597f1a37e3STiger Yang mlog_exit(ret); 24607f1a37e3STiger Yang return ret; 24617f1a37e3STiger Yang } 24627f1a37e3STiger Yang 2463e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 2464ccd979bdSMark Fasheh int ex) 2465ccd979bdSMark Fasheh { 2466bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2467e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 2468c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2469ccd979bdSMark Fasheh 2470ccd979bdSMark Fasheh mlog_entry_void(); 2471ccd979bdSMark Fasheh 2472b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 2473b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 2474ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 2475ccd979bdSMark Fasheh 2476c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 2477c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 2478ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 2479ccd979bdSMark Fasheh 2480ccd979bdSMark Fasheh mlog_exit_void(); 2481ccd979bdSMark Fasheh } 2482ccd979bdSMark Fasheh 2483df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) 248483273932SSrinivas Eeda { 248583273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 248683273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 248783273932SSrinivas Eeda int status = 0; 248883273932SSrinivas Eeda 2489df152c24SSunil Mushran if (ocfs2_is_hard_readonly(osb)) 2490df152c24SSunil Mushran return -EROFS; 2491df152c24SSunil Mushran 2492df152c24SSunil Mushran if (ocfs2_mount_local(osb)) 2493df152c24SSunil Mushran return 0; 2494df152c24SSunil Mushran 249583273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 2496df152c24SSunil Mushran status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 249783273932SSrinivas Eeda if (status < 0) 249883273932SSrinivas Eeda return status; 249983273932SSrinivas Eeda 250083273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 25011c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 25021c520dfbSJoel Becker lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 250383273932SSrinivas Eeda *seqno = be32_to_cpu(lvb->lvb_os_seqno); 25043211949fSSunil Mushran else 25053211949fSSunil Mushran *seqno = osb->osb_orphan_scan.os_seqno + 1; 25063211949fSSunil Mushran 250783273932SSrinivas Eeda return status; 250883273932SSrinivas Eeda } 250983273932SSrinivas Eeda 2510df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) 251183273932SSrinivas Eeda { 251283273932SSrinivas Eeda struct ocfs2_lock_res *lockres; 251383273932SSrinivas Eeda struct ocfs2_orphan_scan_lvb *lvb; 251483273932SSrinivas Eeda 2515df152c24SSunil Mushran if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { 251683273932SSrinivas Eeda lockres = &osb->osb_orphan_scan.os_lockres; 251783273932SSrinivas Eeda lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 251883273932SSrinivas Eeda lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 251983273932SSrinivas Eeda lvb->lvb_os_seqno = cpu_to_be32(seqno); 2520df152c24SSunil Mushran ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2521df152c24SSunil Mushran } 252283273932SSrinivas Eeda } 252383273932SSrinivas Eeda 2524ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 2525ccd979bdSMark Fasheh int ex) 2526ccd979bdSMark Fasheh { 2527c271c5c2SSunil Mushran int status = 0; 2528bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2529ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2530ccd979bdSMark Fasheh 2531ccd979bdSMark Fasheh mlog_entry_void(); 2532ccd979bdSMark Fasheh 2533ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2534ccd979bdSMark Fasheh return -EROFS; 2535ccd979bdSMark Fasheh 2536c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2537c271c5c2SSunil Mushran goto bail; 2538c271c5c2SSunil Mushran 2539ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2540ccd979bdSMark Fasheh if (status < 0) { 2541ccd979bdSMark Fasheh mlog_errno(status); 2542ccd979bdSMark Fasheh goto bail; 2543ccd979bdSMark Fasheh } 2544ccd979bdSMark Fasheh 2545ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 2546ccd979bdSMark Fasheh * know when resources covered by the lock need to be 2547ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 2548ccd979bdSMark Fasheh * everything is up to the caller :) */ 2549ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 2550ccd979bdSMark Fasheh if (status < 0) { 2551ccd979bdSMark Fasheh mlog_errno(status); 2552ccd979bdSMark Fasheh goto bail; 2553ccd979bdSMark Fasheh } 2554ccd979bdSMark Fasheh if (status) { 25558e8a4603SMark Fasheh status = ocfs2_refresh_slot_info(osb); 2556ccd979bdSMark Fasheh 2557ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 2558ccd979bdSMark Fasheh 2559ccd979bdSMark Fasheh if (status < 0) 2560ccd979bdSMark Fasheh mlog_errno(status); 25618ddb7b00SSunil Mushran ocfs2_track_lock_refresh(lockres); 2562ccd979bdSMark Fasheh } 2563ccd979bdSMark Fasheh bail: 2564ccd979bdSMark Fasheh mlog_exit(status); 2565ccd979bdSMark Fasheh return status; 2566ccd979bdSMark Fasheh } 2567ccd979bdSMark Fasheh 2568ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 2569ccd979bdSMark Fasheh int ex) 2570ccd979bdSMark Fasheh { 2571bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2572ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 2573ccd979bdSMark Fasheh 2574c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2575ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 2576ccd979bdSMark Fasheh } 2577ccd979bdSMark Fasheh 2578ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 2579ccd979bdSMark Fasheh { 2580ccd979bdSMark Fasheh int status; 2581ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2582ccd979bdSMark Fasheh 2583ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2584ccd979bdSMark Fasheh return -EROFS; 2585ccd979bdSMark Fasheh 2586c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2587c271c5c2SSunil Mushran return 0; 2588c271c5c2SSunil Mushran 2589bd3e7610SJoel Becker status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); 2590ccd979bdSMark Fasheh if (status < 0) 2591ccd979bdSMark Fasheh mlog_errno(status); 2592ccd979bdSMark Fasheh 2593ccd979bdSMark Fasheh return status; 2594ccd979bdSMark Fasheh } 2595ccd979bdSMark Fasheh 2596ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 2597ccd979bdSMark Fasheh { 2598ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 2599ccd979bdSMark Fasheh 2600c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2601bd3e7610SJoel Becker ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); 2602ccd979bdSMark Fasheh } 2603ccd979bdSMark Fasheh 26046ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex) 26056ca497a8Swengang wang { 26066ca497a8Swengang wang int status; 26076ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 26086ca497a8Swengang wang 26096ca497a8Swengang wang if (ocfs2_is_hard_readonly(osb)) 26106ca497a8Swengang wang return -EROFS; 26116ca497a8Swengang wang 26126ca497a8Swengang wang if (ocfs2_mount_local(osb)) 26136ca497a8Swengang wang return 0; 26146ca497a8Swengang wang 26156ca497a8Swengang wang status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE, 26166ca497a8Swengang wang 0, 0); 26176ca497a8Swengang wang if (status < 0) 26186ca497a8Swengang wang mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status); 26196ca497a8Swengang wang 26206ca497a8Swengang wang return status; 26216ca497a8Swengang wang } 26226ca497a8Swengang wang 26236ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex) 26246ca497a8Swengang wang { 26256ca497a8Swengang wang struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres; 26266ca497a8Swengang wang 26276ca497a8Swengang wang if (!ocfs2_mount_local(osb)) 26286ca497a8Swengang wang ocfs2_cluster_unlock(osb, lockres, 26296ca497a8Swengang wang ex ? LKM_EXMODE : LKM_PRMODE); 26306ca497a8Swengang wang } 26316ca497a8Swengang wang 2632d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 2633d680efe9SMark Fasheh { 2634d680efe9SMark Fasheh int ret; 2635bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2636d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2637d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2638d680efe9SMark Fasheh 2639d680efe9SMark Fasheh BUG_ON(!dl); 2640d680efe9SMark Fasheh 2641d680efe9SMark Fasheh if (ocfs2_is_hard_readonly(osb)) 2642d680efe9SMark Fasheh return -EROFS; 2643d680efe9SMark Fasheh 2644c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2645c271c5c2SSunil Mushran return 0; 2646c271c5c2SSunil Mushran 2647d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 2648d680efe9SMark Fasheh if (ret < 0) 2649d680efe9SMark Fasheh mlog_errno(ret); 2650d680efe9SMark Fasheh 2651d680efe9SMark Fasheh return ret; 2652d680efe9SMark Fasheh } 2653d680efe9SMark Fasheh 2654d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 2655d680efe9SMark Fasheh { 2656bd3e7610SJoel Becker int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 2657d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 2658d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 2659d680efe9SMark Fasheh 2660c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 2661d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 2662d680efe9SMark Fasheh } 2663d680efe9SMark Fasheh 2664ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 2665ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 2666ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 2667ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 2668ccd979bdSMark Fasheh { 2669ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2670ccd979bdSMark Fasheh 2671ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 2672ccd979bdSMark Fasheh 2673ccd979bdSMark Fasheh kfree(dlm_debug); 2674ccd979bdSMark Fasheh } 2675ccd979bdSMark Fasheh 2676ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 2677ccd979bdSMark Fasheh { 2678ccd979bdSMark Fasheh if (dlm_debug) 2679ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 2680ccd979bdSMark Fasheh } 2681ccd979bdSMark Fasheh 2682ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 2683ccd979bdSMark Fasheh { 2684ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 2685ccd979bdSMark Fasheh } 2686ccd979bdSMark Fasheh 2687ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 2688ccd979bdSMark Fasheh { 2689ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 2690ccd979bdSMark Fasheh 2691ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 2692ccd979bdSMark Fasheh if (!dlm_debug) { 2693ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 2694ccd979bdSMark Fasheh goto out; 2695ccd979bdSMark Fasheh } 2696ccd979bdSMark Fasheh 2697ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 2698ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 2699ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 2700ccd979bdSMark Fasheh out: 2701ccd979bdSMark Fasheh return dlm_debug; 2702ccd979bdSMark Fasheh } 2703ccd979bdSMark Fasheh 2704ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 2705ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 2706ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 2707ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 2708ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 2709ccd979bdSMark Fasheh }; 2710ccd979bdSMark Fasheh 2711ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2712ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2713ccd979bdSMark Fasheh { 2714ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2715ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2716ccd979bdSMark Fasheh 2717ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2718ccd979bdSMark Fasheh 2719ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2720ccd979bdSMark Fasheh /* discover the head of the list */ 2721ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2722ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2723ccd979bdSMark Fasheh break; 2724ccd979bdSMark Fasheh } 2725ccd979bdSMark Fasheh 2726ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2727ccd979bdSMark Fasheh * l_ops field. */ 2728ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2729ccd979bdSMark Fasheh ret = iter; 2730ccd979bdSMark Fasheh break; 2731ccd979bdSMark Fasheh } 2732ccd979bdSMark Fasheh } 2733ccd979bdSMark Fasheh 2734ccd979bdSMark Fasheh return ret; 2735ccd979bdSMark Fasheh } 2736ccd979bdSMark Fasheh 2737ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2738ccd979bdSMark Fasheh { 2739ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2740ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2741ccd979bdSMark Fasheh 2742ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2743ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2744ccd979bdSMark Fasheh if (iter) { 2745ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2746ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2747ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2748ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2749ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2750ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2751ccd979bdSMark Fasheh * in them. */ 2752ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2753ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2754ccd979bdSMark Fasheh } 2755ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2756ccd979bdSMark Fasheh 2757ccd979bdSMark Fasheh return iter; 2758ccd979bdSMark Fasheh } 2759ccd979bdSMark Fasheh 2760ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2761ccd979bdSMark Fasheh { 2762ccd979bdSMark Fasheh } 2763ccd979bdSMark Fasheh 2764ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2765ccd979bdSMark Fasheh { 2766ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2767ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2768ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2769ccd979bdSMark Fasheh 2770ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2771ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2772ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2773ccd979bdSMark Fasheh if (iter) { 2774ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2775ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2776ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2777ccd979bdSMark Fasheh } 2778ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2779ccd979bdSMark Fasheh 2780ccd979bdSMark Fasheh return iter; 2781ccd979bdSMark Fasheh } 2782ccd979bdSMark Fasheh 2783ccd979bdSMark Fasheh /* So that debugfs.ocfs2 can determine which format is being used */ 27848ddb7b00SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 2 2785ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2786ccd979bdSMark Fasheh { 2787ccd979bdSMark Fasheh int i; 2788ccd979bdSMark Fasheh char *lvb; 2789ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2790ccd979bdSMark Fasheh 2791ccd979bdSMark Fasheh if (!lockres) 2792ccd979bdSMark Fasheh return -EINVAL; 2793ccd979bdSMark Fasheh 2794d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2795d680efe9SMark Fasheh 2796d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2797d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2798d680efe9SMark Fasheh lockres->l_name, 2799d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2800d680efe9SMark Fasheh else 2801d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2802d680efe9SMark Fasheh 2803d680efe9SMark Fasheh seq_printf(m, "%d\t" 2804ccd979bdSMark Fasheh "0x%lx\t" 2805ccd979bdSMark Fasheh "0x%x\t" 2806ccd979bdSMark Fasheh "0x%x\t" 2807ccd979bdSMark Fasheh "%u\t" 2808ccd979bdSMark Fasheh "%u\t" 2809ccd979bdSMark Fasheh "%d\t" 2810ccd979bdSMark Fasheh "%d\t", 2811ccd979bdSMark Fasheh lockres->l_level, 2812ccd979bdSMark Fasheh lockres->l_flags, 2813ccd979bdSMark Fasheh lockres->l_action, 2814ccd979bdSMark Fasheh lockres->l_unlock_action, 2815ccd979bdSMark Fasheh lockres->l_ro_holders, 2816ccd979bdSMark Fasheh lockres->l_ex_holders, 2817ccd979bdSMark Fasheh lockres->l_requested, 2818ccd979bdSMark Fasheh lockres->l_blocking); 2819ccd979bdSMark Fasheh 2820ccd979bdSMark Fasheh /* Dump the raw LVB */ 28218f2c9c1bSJoel Becker lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2822ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2823ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2824ccd979bdSMark Fasheh 28258ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS 28268ddb7b00SSunil Mushran # define lock_num_prmode(_l) (_l)->l_lock_num_prmode 28278ddb7b00SSunil Mushran # define lock_num_exmode(_l) (_l)->l_lock_num_exmode 28288ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (_l)->l_lock_num_prmode_failed 28298ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (_l)->l_lock_num_exmode_failed 28308ddb7b00SSunil Mushran # define lock_total_prmode(_l) (_l)->l_lock_total_prmode 28318ddb7b00SSunil Mushran # define lock_total_exmode(_l) (_l)->l_lock_total_exmode 28328ddb7b00SSunil Mushran # define lock_max_prmode(_l) (_l)->l_lock_max_prmode 28338ddb7b00SSunil Mushran # define lock_max_exmode(_l) (_l)->l_lock_max_exmode 28348ddb7b00SSunil Mushran # define lock_refresh(_l) (_l)->l_lock_refresh 28358ddb7b00SSunil Mushran #else 2836dd25e55eSRandy Dunlap # define lock_num_prmode(_l) (0ULL) 2837dd25e55eSRandy Dunlap # define lock_num_exmode(_l) (0ULL) 28388ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l) (0) 28398ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l) (0) 2840dd25e55eSRandy Dunlap # define lock_total_prmode(_l) (0ULL) 2841dd25e55eSRandy Dunlap # define lock_total_exmode(_l) (0ULL) 28428ddb7b00SSunil Mushran # define lock_max_prmode(_l) (0) 28438ddb7b00SSunil Mushran # define lock_max_exmode(_l) (0) 28448ddb7b00SSunil Mushran # define lock_refresh(_l) (0) 28458ddb7b00SSunil Mushran #endif 28468ddb7b00SSunil Mushran /* The following seq_print was added in version 2 of this output */ 28478ddb7b00SSunil Mushran seq_printf(m, "%llu\t" 28488ddb7b00SSunil Mushran "%llu\t" 28498ddb7b00SSunil Mushran "%u\t" 28508ddb7b00SSunil Mushran "%u\t" 28518ddb7b00SSunil Mushran "%llu\t" 28528ddb7b00SSunil Mushran "%llu\t" 28538ddb7b00SSunil Mushran "%u\t" 28548ddb7b00SSunil Mushran "%u\t" 28558ddb7b00SSunil Mushran "%u\t", 28568ddb7b00SSunil Mushran lock_num_prmode(lockres), 28578ddb7b00SSunil Mushran lock_num_exmode(lockres), 28588ddb7b00SSunil Mushran lock_num_prmode_failed(lockres), 28598ddb7b00SSunil Mushran lock_num_exmode_failed(lockres), 28608ddb7b00SSunil Mushran lock_total_prmode(lockres), 28618ddb7b00SSunil Mushran lock_total_exmode(lockres), 28628ddb7b00SSunil Mushran lock_max_prmode(lockres), 28638ddb7b00SSunil Mushran lock_max_exmode(lockres), 28648ddb7b00SSunil Mushran lock_refresh(lockres)); 28658ddb7b00SSunil Mushran 2866ccd979bdSMark Fasheh /* End the line */ 2867ccd979bdSMark Fasheh seq_printf(m, "\n"); 2868ccd979bdSMark Fasheh return 0; 2869ccd979bdSMark Fasheh } 2870ccd979bdSMark Fasheh 287190d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = { 2872ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2873ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2874ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2875ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2876ccd979bdSMark Fasheh }; 2877ccd979bdSMark Fasheh 2878ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2879ccd979bdSMark Fasheh { 2880ccd979bdSMark Fasheh struct seq_file *seq = (struct seq_file *) file->private_data; 2881ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2882ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2883ccd979bdSMark Fasheh 2884ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2885ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2886ccd979bdSMark Fasheh return seq_release_private(inode, file); 2887ccd979bdSMark Fasheh } 2888ccd979bdSMark Fasheh 2889ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2890ccd979bdSMark Fasheh { 2891ccd979bdSMark Fasheh int ret; 2892ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2893ccd979bdSMark Fasheh struct seq_file *seq; 2894ccd979bdSMark Fasheh struct ocfs2_super *osb; 2895ccd979bdSMark Fasheh 2896ccd979bdSMark Fasheh priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2897ccd979bdSMark Fasheh if (!priv) { 2898ccd979bdSMark Fasheh ret = -ENOMEM; 2899ccd979bdSMark Fasheh mlog_errno(ret); 2900ccd979bdSMark Fasheh goto out; 2901ccd979bdSMark Fasheh } 29028e18e294STheodore Ts'o osb = inode->i_private; 2903ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2904ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2905ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2906ccd979bdSMark Fasheh 2907ccd979bdSMark Fasheh ret = seq_open(file, &ocfs2_dlm_seq_ops); 2908ccd979bdSMark Fasheh if (ret) { 2909ccd979bdSMark Fasheh kfree(priv); 2910ccd979bdSMark Fasheh mlog_errno(ret); 2911ccd979bdSMark Fasheh goto out; 2912ccd979bdSMark Fasheh } 2913ccd979bdSMark Fasheh 2914ccd979bdSMark Fasheh seq = (struct seq_file *) file->private_data; 2915ccd979bdSMark Fasheh seq->private = priv; 2916ccd979bdSMark Fasheh 2917ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2918ccd979bdSMark Fasheh priv->p_dlm_debug); 2919ccd979bdSMark Fasheh 2920ccd979bdSMark Fasheh out: 2921ccd979bdSMark Fasheh return ret; 2922ccd979bdSMark Fasheh } 2923ccd979bdSMark Fasheh 29244b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2925ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2926ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2927ccd979bdSMark Fasheh .read = seq_read, 2928ccd979bdSMark Fasheh .llseek = seq_lseek, 2929ccd979bdSMark Fasheh }; 2930ccd979bdSMark Fasheh 2931ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2932ccd979bdSMark Fasheh { 2933ccd979bdSMark Fasheh int ret = 0; 2934ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2935ccd979bdSMark Fasheh 2936ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2937ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2938ccd979bdSMark Fasheh osb->osb_debug_root, 2939ccd979bdSMark Fasheh osb, 2940ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 2941ccd979bdSMark Fasheh if (!dlm_debug->d_locking_state) { 2942ccd979bdSMark Fasheh ret = -EINVAL; 2943ccd979bdSMark Fasheh mlog(ML_ERROR, 2944ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2945ccd979bdSMark Fasheh goto out; 2946ccd979bdSMark Fasheh } 2947ccd979bdSMark Fasheh 2948ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2949ccd979bdSMark Fasheh out: 2950ccd979bdSMark Fasheh return ret; 2951ccd979bdSMark Fasheh } 2952ccd979bdSMark Fasheh 2953ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2954ccd979bdSMark Fasheh { 2955ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2956ccd979bdSMark Fasheh 2957ccd979bdSMark Fasheh if (dlm_debug) { 2958ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2959ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2960ccd979bdSMark Fasheh } 2961ccd979bdSMark Fasheh } 2962ccd979bdSMark Fasheh 2963ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2964ccd979bdSMark Fasheh { 2965c271c5c2SSunil Mushran int status = 0; 29664670c46dSJoel Becker struct ocfs2_cluster_connection *conn = NULL; 2967ccd979bdSMark Fasheh 2968ccd979bdSMark Fasheh mlog_entry_void(); 2969ccd979bdSMark Fasheh 29700abd6d18SMark Fasheh if (ocfs2_mount_local(osb)) { 29710abd6d18SMark Fasheh osb->node_num = 0; 2972c271c5c2SSunil Mushran goto local; 29730abd6d18SMark Fasheh } 2974c271c5c2SSunil Mushran 2975ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2976ccd979bdSMark Fasheh if (status < 0) { 2977ccd979bdSMark Fasheh mlog_errno(status); 2978ccd979bdSMark Fasheh goto bail; 2979ccd979bdSMark Fasheh } 2980ccd979bdSMark Fasheh 298134d024f8SMark Fasheh /* launch downconvert thread */ 298234d024f8SMark Fasheh osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 298334d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 298434d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 298534d024f8SMark Fasheh osb->dc_task = NULL; 2986ccd979bdSMark Fasheh mlog_errno(status); 2987ccd979bdSMark Fasheh goto bail; 2988ccd979bdSMark Fasheh } 2989ccd979bdSMark Fasheh 2990ccd979bdSMark Fasheh /* for now, uuid == domain */ 29919c6c877cSJoel Becker status = ocfs2_cluster_connect(osb->osb_cluster_stack, 29929c6c877cSJoel Becker osb->uuid_str, 29934670c46dSJoel Becker strlen(osb->uuid_str), 29944670c46dSJoel Becker ocfs2_do_node_down, osb, 29954670c46dSJoel Becker &conn); 29964670c46dSJoel Becker if (status) { 2997ccd979bdSMark Fasheh mlog_errno(status); 2998ccd979bdSMark Fasheh goto bail; 2999ccd979bdSMark Fasheh } 3000ccd979bdSMark Fasheh 30010abd6d18SMark Fasheh status = ocfs2_cluster_this_node(&osb->node_num); 30020abd6d18SMark Fasheh if (status < 0) { 30030abd6d18SMark Fasheh mlog_errno(status); 30040abd6d18SMark Fasheh mlog(ML_ERROR, 30050abd6d18SMark Fasheh "could not find this host's node number\n"); 3006286eaa95SJoel Becker ocfs2_cluster_disconnect(conn, 0); 30070abd6d18SMark Fasheh goto bail; 30080abd6d18SMark Fasheh } 30090abd6d18SMark Fasheh 3010c271c5c2SSunil Mushran local: 3011ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 3012ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 30136ca497a8Swengang wang ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb); 301483273932SSrinivas Eeda ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb); 3015ccd979bdSMark Fasheh 30164670c46dSJoel Becker osb->cconn = conn; 3017ccd979bdSMark Fasheh 3018ccd979bdSMark Fasheh status = 0; 3019ccd979bdSMark Fasheh bail: 3020ccd979bdSMark Fasheh if (status < 0) { 3021ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 302234d024f8SMark Fasheh if (osb->dc_task) 302334d024f8SMark Fasheh kthread_stop(osb->dc_task); 3024ccd979bdSMark Fasheh } 3025ccd979bdSMark Fasheh 3026ccd979bdSMark Fasheh mlog_exit(status); 3027ccd979bdSMark Fasheh return status; 3028ccd979bdSMark Fasheh } 3029ccd979bdSMark Fasheh 3030286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb, 3031286eaa95SJoel Becker int hangup_pending) 3032ccd979bdSMark Fasheh { 3033ccd979bdSMark Fasheh mlog_entry_void(); 3034ccd979bdSMark Fasheh 3035ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 3036ccd979bdSMark Fasheh 30374670c46dSJoel Becker /* 30384670c46dSJoel Becker * Now that we have dropped all locks and ocfs2_dismount_volume() 30394670c46dSJoel Becker * has disabled recovery, the DLM won't be talking to us. It's 30404670c46dSJoel Becker * safe to tear things down before disconnecting the cluster. 30414670c46dSJoel Becker */ 30424670c46dSJoel Becker 304334d024f8SMark Fasheh if (osb->dc_task) { 304434d024f8SMark Fasheh kthread_stop(osb->dc_task); 304534d024f8SMark Fasheh osb->dc_task = NULL; 3046ccd979bdSMark Fasheh } 3047ccd979bdSMark Fasheh 3048ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 3049ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 30506ca497a8Swengang wang ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); 305183273932SSrinivas Eeda ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); 3052ccd979bdSMark Fasheh 3053286eaa95SJoel Becker ocfs2_cluster_disconnect(osb->cconn, hangup_pending); 30544670c46dSJoel Becker osb->cconn = NULL; 3055ccd979bdSMark Fasheh 3056ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 3057ccd979bdSMark Fasheh 3058ccd979bdSMark Fasheh mlog_exit_void(); 3059ccd979bdSMark Fasheh } 3060ccd979bdSMark Fasheh 3061c0e41338SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error) 3062ccd979bdSMark Fasheh { 3063a796d286SJoel Becker struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb); 3064ccd979bdSMark Fasheh unsigned long flags; 3065ccd979bdSMark Fasheh 3066ccd979bdSMark Fasheh mlog_entry_void(); 3067ccd979bdSMark Fasheh 3068ccd979bdSMark Fasheh mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 3069ccd979bdSMark Fasheh lockres->l_unlock_action); 3070ccd979bdSMark Fasheh 3071ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3072de551246SJoel Becker if (error) { 30737431cd7eSJoel Becker mlog(ML_ERROR, "Dlm passes error %d for lock %s, " 30747431cd7eSJoel Becker "unlock_action %d\n", error, lockres->l_name, 3075ccd979bdSMark Fasheh lockres->l_unlock_action); 3076ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3077d92bc512SColy Li mlog_exit_void(); 3078ccd979bdSMark Fasheh return; 3079ccd979bdSMark Fasheh } 3080ccd979bdSMark Fasheh 3081ccd979bdSMark Fasheh switch(lockres->l_unlock_action) { 3082ccd979bdSMark Fasheh case OCFS2_UNLOCK_CANCEL_CONVERT: 3083ccd979bdSMark Fasheh mlog(0, "Cancel convert success for %s\n", lockres->l_name); 3084ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 3085a4b91965SSunil Mushran /* Downconvert thread may have requeued this lock, we 3086a4b91965SSunil Mushran * need to wake it. */ 3087a4b91965SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3088a4b91965SSunil Mushran ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres)); 3089ccd979bdSMark Fasheh break; 3090ccd979bdSMark Fasheh case OCFS2_UNLOCK_DROP_LOCK: 3091bd3e7610SJoel Becker lockres->l_level = DLM_LOCK_IV; 3092ccd979bdSMark Fasheh break; 3093ccd979bdSMark Fasheh default: 3094ccd979bdSMark Fasheh BUG(); 3095ccd979bdSMark Fasheh } 3096ccd979bdSMark Fasheh 3097ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 3098ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 3099ccd979bdSMark Fasheh wake_up(&lockres->l_event); 310007f9eebcSDavid Teigland spin_unlock_irqrestore(&lockres->l_lock, flags); 3101ccd979bdSMark Fasheh 3102ccd979bdSMark Fasheh mlog_exit_void(); 3103ccd979bdSMark Fasheh } 3104ccd979bdSMark Fasheh 3105ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 31060d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 3107ccd979bdSMark Fasheh { 31087431cd7eSJoel Becker int ret; 3109ccd979bdSMark Fasheh unsigned long flags; 3110bd3e7610SJoel Becker u32 lkm_flags = 0; 3111ccd979bdSMark Fasheh 3112ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 3113ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 3114ccd979bdSMark Fasheh goto out; 3115ccd979bdSMark Fasheh 3116b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 3117bd3e7610SJoel Becker lkm_flags |= DLM_LKF_VALBLK; 3118b80fc012SMark Fasheh 3119ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3120ccd979bdSMark Fasheh 3121ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 3122ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 3123ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3124ccd979bdSMark Fasheh 3125ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 3126ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 3127ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 3128ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 3129ccd979bdSMark Fasheh lockres->l_unlock_action); 3130ccd979bdSMark Fasheh 3131ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3132ccd979bdSMark Fasheh 3133ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 3134ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 3135ccd979bdSMark Fasheh * future? */ 3136ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3137ccd979bdSMark Fasheh 3138ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3139ccd979bdSMark Fasheh } 3140ccd979bdSMark Fasheh 31410d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 31420d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 3143bd3e7610SJoel Becker lockres->l_level == DLM_LOCK_EX && 31440d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 31450d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 31460d5dc6c2SMark Fasheh } 3147ccd979bdSMark Fasheh 3148ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 3149ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 3150ccd979bdSMark Fasheh lockres->l_name); 3151ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 3152ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 3153ccd979bdSMark Fasheh 3154ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 3155ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3156ccd979bdSMark Fasheh goto out; 3157ccd979bdSMark Fasheh } 3158ccd979bdSMark Fasheh 3159ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 3160ccd979bdSMark Fasheh 3161ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 3162ccd979bdSMark Fasheh * fire. */ 3163ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 3164ccd979bdSMark Fasheh 3165ccd979bdSMark Fasheh /* is this necessary? */ 3166ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3167ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 3168ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3169ccd979bdSMark Fasheh 3170ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3171ccd979bdSMark Fasheh 3172a796d286SJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags); 31737431cd7eSJoel Becker if (ret) { 31747431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3175ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 3176cf0acdcdSJoel Becker ocfs2_dlm_dump_lksb(&lockres->l_lksb); 3177ccd979bdSMark Fasheh BUG(); 3178ccd979bdSMark Fasheh } 317973ac36eaSColy Li mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n", 3180ccd979bdSMark Fasheh lockres->l_name); 3181ccd979bdSMark Fasheh 3182ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 3183ccd979bdSMark Fasheh out: 3184ccd979bdSMark Fasheh mlog_exit(0); 3185ccd979bdSMark Fasheh return 0; 3186ccd979bdSMark Fasheh } 3187ccd979bdSMark Fasheh 3188ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 3189ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 319034d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 3191ccd979bdSMark Fasheh * it safe to drop. 3192ccd979bdSMark Fasheh * 3193ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 3194ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 3195ccd979bdSMark Fasheh { 3196ccd979bdSMark Fasheh int status; 3197ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 3198ccd979bdSMark Fasheh unsigned long flags; 3199ccd979bdSMark Fasheh 3200ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 3201ccd979bdSMark Fasheh 3202ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3203ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 3204ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 3205ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 3206ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3207ccd979bdSMark Fasheh 3208ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 3209ccd979bdSMark Fasheh 3210ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 3211ccd979bdSMark Fasheh if (status) 3212ccd979bdSMark Fasheh mlog_errno(status); 3213ccd979bdSMark Fasheh 3214ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3215ccd979bdSMark Fasheh } 3216ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3217ccd979bdSMark Fasheh } 3218ccd979bdSMark Fasheh 3219d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 3220d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3221d680efe9SMark Fasheh { 3222d680efe9SMark Fasheh int ret; 3223d680efe9SMark Fasheh 3224d680efe9SMark Fasheh ocfs2_mark_lockres_freeing(lockres); 32250d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 3226d680efe9SMark Fasheh if (ret) 3227d680efe9SMark Fasheh mlog_errno(ret); 3228d680efe9SMark Fasheh } 3229d680efe9SMark Fasheh 3230ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 3231ccd979bdSMark Fasheh { 3232d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 3233d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 32346ca497a8Swengang wang ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres); 323583273932SSrinivas Eeda ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres); 3236ccd979bdSMark Fasheh } 3237ccd979bdSMark Fasheh 3238ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 3239ccd979bdSMark Fasheh { 3240ccd979bdSMark Fasheh int status, err; 3241ccd979bdSMark Fasheh 3242ccd979bdSMark Fasheh mlog_entry_void(); 3243ccd979bdSMark Fasheh 3244ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 3245ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 3246ccd979bdSMark Fasheh 3247ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 324850008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 3249ccd979bdSMark Fasheh if (err < 0) 3250ccd979bdSMark Fasheh mlog_errno(err); 3251ccd979bdSMark Fasheh 3252ccd979bdSMark Fasheh status = err; 3253ccd979bdSMark Fasheh 3254ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 3255e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 3256ccd979bdSMark Fasheh if (err < 0) 3257ccd979bdSMark Fasheh mlog_errno(err); 3258ccd979bdSMark Fasheh if (err < 0 && !status) 3259ccd979bdSMark Fasheh status = err; 3260ccd979bdSMark Fasheh 3261ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 32620d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 3263ccd979bdSMark Fasheh if (err < 0) 3264ccd979bdSMark Fasheh mlog_errno(err); 3265ccd979bdSMark Fasheh if (err < 0 && !status) 3266ccd979bdSMark Fasheh status = err; 3267ccd979bdSMark Fasheh 3268ccd979bdSMark Fasheh mlog_exit(status); 3269ccd979bdSMark Fasheh return status; 3270ccd979bdSMark Fasheh } 3271ccd979bdSMark Fasheh 3272de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 3273ccd979bdSMark Fasheh int new_level) 3274ccd979bdSMark Fasheh { 3275ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3276ccd979bdSMark Fasheh 3277bd3e7610SJoel Becker BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); 3278ccd979bdSMark Fasheh 3279ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 3280bd3e7610SJoel Becker mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", 3281ccd979bdSMark Fasheh lockres->l_level, new_level); 3282ccd979bdSMark Fasheh BUG(); 3283ccd979bdSMark Fasheh } 3284ccd979bdSMark Fasheh 3285ccd979bdSMark Fasheh mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 3286ccd979bdSMark Fasheh lockres->l_name, new_level, lockres->l_blocking); 3287ccd979bdSMark Fasheh 3288ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 3289ccd979bdSMark Fasheh lockres->l_requested = new_level; 3290ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 3291de551246SJoel Becker return lockres_set_pending(lockres); 3292ccd979bdSMark Fasheh } 3293ccd979bdSMark Fasheh 3294ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 3295ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3296ccd979bdSMark Fasheh int new_level, 3297de551246SJoel Becker int lvb, 3298de551246SJoel Becker unsigned int generation) 3299ccd979bdSMark Fasheh { 3300bd3e7610SJoel Becker int ret; 3301bd3e7610SJoel Becker u32 dlm_flags = DLM_LKF_CONVERT; 3302ccd979bdSMark Fasheh 3303ccd979bdSMark Fasheh mlog_entry_void(); 3304ccd979bdSMark Fasheh 3305ccd979bdSMark Fasheh if (lvb) 3306bd3e7610SJoel Becker dlm_flags |= DLM_LKF_VALBLK; 3307ccd979bdSMark Fasheh 33084670c46dSJoel Becker ret = ocfs2_dlm_lock(osb->cconn, 3309ccd979bdSMark Fasheh new_level, 3310ccd979bdSMark Fasheh &lockres->l_lksb, 3311ccd979bdSMark Fasheh dlm_flags, 3312ccd979bdSMark Fasheh lockres->l_name, 3313a796d286SJoel Becker OCFS2_LOCK_ID_MAX_LEN - 1); 3314de551246SJoel Becker lockres_clear_pending(lockres, generation, osb); 33157431cd7eSJoel Becker if (ret) { 33167431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); 3317ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 3318ccd979bdSMark Fasheh goto bail; 3319ccd979bdSMark Fasheh } 3320ccd979bdSMark Fasheh 3321ccd979bdSMark Fasheh ret = 0; 3322ccd979bdSMark Fasheh bail: 3323ccd979bdSMark Fasheh mlog_exit(ret); 3324ccd979bdSMark Fasheh return ret; 3325ccd979bdSMark Fasheh } 3326ccd979bdSMark Fasheh 332724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ 3328ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 3329ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3330ccd979bdSMark Fasheh { 3331ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 3332ccd979bdSMark Fasheh 3333ccd979bdSMark Fasheh mlog_entry_void(); 3334ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3335ccd979bdSMark Fasheh 3336ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 3337ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 3338ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 3339ccd979bdSMark Fasheh * requeue this lock. */ 3340ccd979bdSMark Fasheh 3341ccd979bdSMark Fasheh mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 3342ccd979bdSMark Fasheh return 0; 3343ccd979bdSMark Fasheh } 3344ccd979bdSMark Fasheh 3345ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 3346ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 3347ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 3348ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 3349ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 3350ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 3351ccd979bdSMark Fasheh 3352ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 3353ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 3354ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 3355ccd979bdSMark Fasheh 3356ccd979bdSMark Fasheh return 1; 3357ccd979bdSMark Fasheh } 3358ccd979bdSMark Fasheh 3359ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 3360ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3361ccd979bdSMark Fasheh { 3362ccd979bdSMark Fasheh int ret; 3363ccd979bdSMark Fasheh 3364ccd979bdSMark Fasheh mlog_entry_void(); 3365ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 3366ccd979bdSMark Fasheh 33674670c46dSJoel Becker ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, 3368a796d286SJoel Becker DLM_LKF_CANCEL); 33697431cd7eSJoel Becker if (ret) { 33707431cd7eSJoel Becker ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); 3371ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 3372ccd979bdSMark Fasheh } 3373ccd979bdSMark Fasheh 337424ef1815SJoel Becker mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); 3375ccd979bdSMark Fasheh 3376ccd979bdSMark Fasheh mlog_exit(ret); 3377ccd979bdSMark Fasheh return ret; 3378ccd979bdSMark Fasheh } 3379ccd979bdSMark Fasheh 3380b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 3381ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 3382cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 3383ccd979bdSMark Fasheh { 3384ccd979bdSMark Fasheh unsigned long flags; 3385ccd979bdSMark Fasheh int blocking; 3386ccd979bdSMark Fasheh int new_level; 3387079b8057SSunil Mushran int level; 3388ccd979bdSMark Fasheh int ret = 0; 33895ef0d4eaSMark Fasheh int set_lvb = 0; 3390de551246SJoel Becker unsigned int gen; 3391ccd979bdSMark Fasheh 3392ccd979bdSMark Fasheh mlog_entry_void(); 3393ccd979bdSMark Fasheh 3394ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3395ccd979bdSMark Fasheh 3396ccd979bdSMark Fasheh recheck: 3397db0f6ce6SSunil Mushran /* 3398db0f6ce6SSunil Mushran * Is it still blocking? If not, we have no more work to do. 3399db0f6ce6SSunil Mushran */ 3400db0f6ce6SSunil Mushran if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) { 3401db0f6ce6SSunil Mushran BUG_ON(lockres->l_blocking != DLM_LOCK_NL); 3402db0f6ce6SSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 3403db0f6ce6SSunil Mushran ret = 0; 3404db0f6ce6SSunil Mushran goto leave; 3405db0f6ce6SSunil Mushran } 3406db0f6ce6SSunil Mushran 3407ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 3408de551246SJoel Becker /* XXX 3409de551246SJoel Becker * This is a *big* race. The OCFS2_LOCK_PENDING flag 3410de551246SJoel Becker * exists entirely for one reason - another thread has set 3411de551246SJoel Becker * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). 3412de551246SJoel Becker * 3413de551246SJoel Becker * If we do ocfs2_cancel_convert() before the other thread 3414de551246SJoel Becker * calls dlm_lock(), our cancel will do nothing. We will 3415de551246SJoel Becker * get no ast, and we will have no way of knowing the 3416de551246SJoel Becker * cancel failed. Meanwhile, the other thread will call 3417de551246SJoel Becker * into dlm_lock() and wait...forever. 3418de551246SJoel Becker * 3419de551246SJoel Becker * Why forever? Because another node has asked for the 3420de551246SJoel Becker * lock first; that's why we're here in unblock_lock(). 3421de551246SJoel Becker * 3422de551246SJoel Becker * The solution is OCFS2_LOCK_PENDING. When PENDING is 3423de551246SJoel Becker * set, we just requeue the unblock. Only when the other 3424de551246SJoel Becker * thread has called dlm_lock() and cleared PENDING will 3425de551246SJoel Becker * we then cancel their request. 3426de551246SJoel Becker * 3427de551246SJoel Becker * All callers of dlm_lock() must set OCFS2_DLM_PENDING 3428de551246SJoel Becker * at the same time they set OCFS2_DLM_BUSY. They must 3429de551246SJoel Becker * clear OCFS2_DLM_PENDING after dlm_lock() returns. 3430de551246SJoel Becker */ 3431de551246SJoel Becker if (lockres->l_flags & OCFS2_LOCK_PENDING) 3432de551246SJoel Becker goto leave_requeue; 3433de551246SJoel Becker 3434d680efe9SMark Fasheh ctl->requeue = 1; 3435ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 3436ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3437ccd979bdSMark Fasheh if (ret) { 3438ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 3439ccd979bdSMark Fasheh if (ret < 0) 3440ccd979bdSMark Fasheh mlog_errno(ret); 3441ccd979bdSMark Fasheh } 3442ccd979bdSMark Fasheh goto leave; 3443ccd979bdSMark Fasheh } 3444ccd979bdSMark Fasheh 3445a1912826SSunil Mushran /* 3446a1912826SSunil Mushran * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is 3447a1912826SSunil Mushran * set when the ast is received for an upconvert just before the 3448a1912826SSunil Mushran * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast 3449a1912826SSunil Mushran * on the heels of the ast, we want to delay the downconvert just 3450a1912826SSunil Mushran * enough to allow the up requestor to do its task. Because this 3451a1912826SSunil Mushran * lock is in the blocked queue, the lock will be downconverted 3452a1912826SSunil Mushran * as soon as the requestor is done with the lock. 3453a1912826SSunil Mushran */ 3454a1912826SSunil Mushran if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) 3455a1912826SSunil Mushran goto leave_requeue; 3456a1912826SSunil Mushran 34570d74125aSSunil Mushran /* 34580d74125aSSunil Mushran * How can we block and yet be at NL? We were trying to upconvert 34590d74125aSSunil Mushran * from NL and got canceled. The code comes back here, and now 34600d74125aSSunil Mushran * we notice and clear BLOCKING. 34610d74125aSSunil Mushran */ 34620d74125aSSunil Mushran if (lockres->l_level == DLM_LOCK_NL) { 34630d74125aSSunil Mushran BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders); 34640d74125aSSunil Mushran lockres->l_blocking = DLM_LOCK_NL; 34650d74125aSSunil Mushran lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 34660d74125aSSunil Mushran spin_unlock_irqrestore(&lockres->l_lock, flags); 34670d74125aSSunil Mushran goto leave; 34680d74125aSSunil Mushran } 34690d74125aSSunil Mushran 3470ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 3471ccd979bdSMark Fasheh * then requeue. */ 3472bd3e7610SJoel Becker if ((lockres->l_blocking == DLM_LOCK_EX) 3473f7fbfdd1SMark Fasheh && (lockres->l_ex_holders || lockres->l_ro_holders)) 3474f7fbfdd1SMark Fasheh goto leave_requeue; 3475ccd979bdSMark Fasheh 3476ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 3477ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 3478bd3e7610SJoel Becker if (lockres->l_blocking == DLM_LOCK_PR && 3479f7fbfdd1SMark Fasheh lockres->l_ex_holders) 3480f7fbfdd1SMark Fasheh goto leave_requeue; 3481f7fbfdd1SMark Fasheh 3482f7fbfdd1SMark Fasheh /* 3483f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 3484f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 3485f7fbfdd1SMark Fasheh */ 3486f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 3487f7fbfdd1SMark Fasheh && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 3488f7fbfdd1SMark Fasheh goto leave_requeue; 3489ccd979bdSMark Fasheh 349016d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 349116d5b956SMark Fasheh 349216d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 349316d5b956SMark Fasheh && !lockres->l_ops->check_downconvert(lockres, new_level)) 349416d5b956SMark Fasheh goto leave_requeue; 349516d5b956SMark Fasheh 3496ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 3497ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 3498ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 3499cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 3500ccd979bdSMark Fasheh goto downconvert; 3501ccd979bdSMark Fasheh 3502ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 3503ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 3504ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 3505ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 3506ccd979bdSMark Fasheh blocking = lockres->l_blocking; 3507079b8057SSunil Mushran level = lockres->l_level; 3508ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3509ccd979bdSMark Fasheh 3510cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 3511d680efe9SMark Fasheh 3512d680efe9SMark Fasheh if (ctl->unblock_action == UNBLOCK_STOP_POST) 3513d680efe9SMark Fasheh goto leave; 3514ccd979bdSMark Fasheh 3515ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3516079b8057SSunil Mushran if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) { 3517ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 3518ccd979bdSMark Fasheh * it just yet. */ 3519ccd979bdSMark Fasheh goto recheck; 3520ccd979bdSMark Fasheh } 3521ccd979bdSMark Fasheh 3522ccd979bdSMark Fasheh downconvert: 3523d680efe9SMark Fasheh ctl->requeue = 0; 3524ccd979bdSMark Fasheh 35255ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 3526bd3e7610SJoel Becker if (lockres->l_level == DLM_LOCK_EX) 35275ef0d4eaSMark Fasheh set_lvb = 1; 35285ef0d4eaSMark Fasheh 35295ef0d4eaSMark Fasheh /* 35305ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 35315ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 35325ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 35335ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 35345ef0d4eaSMark Fasheh */ 35355ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 35365ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 35375ef0d4eaSMark Fasheh } 35385ef0d4eaSMark Fasheh 3539de551246SJoel Becker gen = ocfs2_prepare_downconvert(lockres, new_level); 3540ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3541de551246SJoel Becker ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, 3542de551246SJoel Becker gen); 3543de551246SJoel Becker 3544ccd979bdSMark Fasheh leave: 3545ccd979bdSMark Fasheh mlog_exit(ret); 3546ccd979bdSMark Fasheh return ret; 3547f7fbfdd1SMark Fasheh 3548f7fbfdd1SMark Fasheh leave_requeue: 3549f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3550f7fbfdd1SMark Fasheh ctl->requeue = 1; 3551f7fbfdd1SMark Fasheh 3552f7fbfdd1SMark Fasheh mlog_exit(0); 3553f7fbfdd1SMark Fasheh return 0; 3554ccd979bdSMark Fasheh } 3555ccd979bdSMark Fasheh 3556d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 3557ccd979bdSMark Fasheh int blocking) 3558ccd979bdSMark Fasheh { 3559ccd979bdSMark Fasheh struct inode *inode; 3560ccd979bdSMark Fasheh struct address_space *mapping; 3561ccd979bdSMark Fasheh 3562ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 3563ccd979bdSMark Fasheh mapping = inode->i_mapping; 3564ccd979bdSMark Fasheh 35651044e401SMark Fasheh if (!S_ISREG(inode->i_mode)) 3566f1f54068SMark Fasheh goto out; 3567f1f54068SMark Fasheh 35687f4a2a97SMark Fasheh /* 35697f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 35707f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 35717f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 35727f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 35737f4a2a97SMark Fasheh * them up again. 35747f4a2a97SMark Fasheh */ 35757f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 35767f4a2a97SMark Fasheh 3577ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 3578b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 3579b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 3580ccd979bdSMark Fasheh } 3581ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 3582bd3e7610SJoel Becker if (blocking == DLM_LOCK_EX) { 3583ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 3584ccd979bdSMark Fasheh } else { 3585ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 3586ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 3587ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 3588ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 3589ccd979bdSMark Fasheh * them around in that case. */ 3590ccd979bdSMark Fasheh filemap_fdatawait(mapping); 3591ccd979bdSMark Fasheh } 3592ccd979bdSMark Fasheh 3593f1f54068SMark Fasheh out: 3594d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3595ccd979bdSMark Fasheh } 3596ccd979bdSMark Fasheh 3597a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci, 3598a4338481STao Ma struct ocfs2_lock_res *lockres, 3599810d5aebSMark Fasheh int new_level) 3600810d5aebSMark Fasheh { 3601a4338481STao Ma int checkpointed = ocfs2_ci_fully_checkpointed(ci); 3602810d5aebSMark Fasheh 3603bd3e7610SJoel Becker BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3604bd3e7610SJoel Becker BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3605810d5aebSMark Fasheh 3606810d5aebSMark Fasheh if (checkpointed) 3607810d5aebSMark Fasheh return 1; 3608810d5aebSMark Fasheh 3609a4338481STao Ma ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci))); 3610810d5aebSMark Fasheh return 0; 3611810d5aebSMark Fasheh } 3612810d5aebSMark Fasheh 3613a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3614a4338481STao Ma int new_level) 3615a4338481STao Ma { 3616a4338481STao Ma struct inode *inode = ocfs2_lock_res_inode(lockres); 3617a4338481STao Ma 3618a4338481STao Ma return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level); 3619a4338481STao Ma } 3620a4338481STao Ma 3621810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3622810d5aebSMark Fasheh { 3623810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 3624810d5aebSMark Fasheh 3625810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 3626810d5aebSMark Fasheh } 3627810d5aebSMark Fasheh 3628d680efe9SMark Fasheh /* 3629d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 363034d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 3631d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 3632d680efe9SMark Fasheh */ 3633d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 3634d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 3635d680efe9SMark Fasheh { 3636d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3637d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 3638d680efe9SMark Fasheh } 3639d680efe9SMark Fasheh 3640d680efe9SMark Fasheh /* 3641d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 3642d680efe9SMark Fasheh * 3643d680efe9SMark Fasheh * At this point, any process waiting to destroy the 3644d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 3645d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 3646d680efe9SMark Fasheh * 3647d680efe9SMark Fasheh * We have two potential problems 3648d680efe9SMark Fasheh * 3649d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 3650d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 3651d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 3652d680efe9SMark Fasheh * reference and push the drop until after we've completed our 3653d680efe9SMark Fasheh * unblock processing. 3654d680efe9SMark Fasheh * 3655d680efe9SMark Fasheh * 2) There might be another process with a final reference, 3656d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 3657d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 3658d680efe9SMark Fasheh */ 3659d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 3660d680efe9SMark Fasheh int blocking) 3661d680efe9SMark Fasheh { 3662d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 3663d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 3664d680efe9SMark Fasheh struct dentry *dentry; 3665d680efe9SMark Fasheh unsigned long flags; 3666d680efe9SMark Fasheh int extra_ref = 0; 3667d680efe9SMark Fasheh 3668d680efe9SMark Fasheh /* 3669d680efe9SMark Fasheh * This node is blocking another node from getting a read 3670d680efe9SMark Fasheh * lock. This happens when we've renamed within a 3671d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 3672d680efe9SMark Fasheh * we never actually dropped our lock because it's still 3673d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 3674d680efe9SMark Fasheh * so there's no further work to do. 3675d680efe9SMark Fasheh */ 3676bd3e7610SJoel Becker if (blocking == DLM_LOCK_PR) 3677d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3678d680efe9SMark Fasheh 3679d680efe9SMark Fasheh /* 3680d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 3681d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 3682d680efe9SMark Fasheh * needs to be freed or not. 3683d680efe9SMark Fasheh */ 3684d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 3685d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 3686d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 3687d680efe9SMark Fasheh 3688d680efe9SMark Fasheh /* 3689d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 3690d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 3691d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 3692d680efe9SMark Fasheh * flag. 3693d680efe9SMark Fasheh */ 3694d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3695d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3696d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 3697d680efe9SMark Fasheh && dl->dl_count) { 3698d680efe9SMark Fasheh dl->dl_count++; 3699d680efe9SMark Fasheh extra_ref = 1; 3700d680efe9SMark Fasheh } 3701d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3702d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3703d680efe9SMark Fasheh 3704d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 3705d680efe9SMark Fasheh 3706d680efe9SMark Fasheh /* 3707d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 3708d680efe9SMark Fasheh * which means we can't have any more outstanding 3709d680efe9SMark Fasheh * aliases. There's no need to do any more work. 3710d680efe9SMark Fasheh */ 3711d680efe9SMark Fasheh if (!extra_ref) 3712d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 3713d680efe9SMark Fasheh 3714d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3715d680efe9SMark Fasheh while (1) { 3716d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 3717d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 3718d680efe9SMark Fasheh if (!dentry) 3719d680efe9SMark Fasheh break; 3720d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3721d680efe9SMark Fasheh 3722d680efe9SMark Fasheh mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 3723d680efe9SMark Fasheh dentry->d_name.name); 3724d680efe9SMark Fasheh 3725d680efe9SMark Fasheh /* 3726d680efe9SMark Fasheh * The following dcache calls may do an 3727d680efe9SMark Fasheh * iput(). Normally we don't want that from the 3728d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 3729d680efe9SMark Fasheh * because the requesting node already has an 3730d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 3731d680efe9SMark Fasheh * for a downconvert. 3732d680efe9SMark Fasheh */ 3733d680efe9SMark Fasheh d_delete(dentry); 3734d680efe9SMark Fasheh dput(dentry); 3735d680efe9SMark Fasheh 3736d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 3737d680efe9SMark Fasheh } 3738d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 3739d680efe9SMark Fasheh 3740d680efe9SMark Fasheh /* 3741d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 3742d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 3743d680efe9SMark Fasheh */ 3744d680efe9SMark Fasheh if (dl->dl_count == 1) 3745d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 3746d680efe9SMark Fasheh 3747d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 3748d680efe9SMark Fasheh } 3749d680efe9SMark Fasheh 37508dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres, 37518dec98edSTao Ma int new_level) 37528dec98edSTao Ma { 37538dec98edSTao Ma struct ocfs2_refcount_tree *tree = 37548dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 37558dec98edSTao Ma 37568dec98edSTao Ma return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level); 37578dec98edSTao Ma } 37588dec98edSTao Ma 37598dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres, 37608dec98edSTao Ma int blocking) 37618dec98edSTao Ma { 37628dec98edSTao Ma struct ocfs2_refcount_tree *tree = 37638dec98edSTao Ma ocfs2_lock_res_refcount_tree(lockres); 37648dec98edSTao Ma 37658dec98edSTao Ma ocfs2_metadata_cache_purge(&tree->rf_ci); 37668dec98edSTao Ma 37678dec98edSTao Ma return UNBLOCK_CONTINUE; 37688dec98edSTao Ma } 37698dec98edSTao Ma 37709e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 37719e33d69fSJan Kara { 37729e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb; 37739e33d69fSJan Kara struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); 37749e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 37759e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 37769e33d69fSJan Kara 37779e33d69fSJan Kara mlog_entry_void(); 37789e33d69fSJan Kara 3779a641dc2aSMark Fasheh lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 37809e33d69fSJan Kara lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; 37819e33d69fSJan Kara lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); 37829e33d69fSJan Kara lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); 37839e33d69fSJan Kara lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); 37849e33d69fSJan Kara lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); 37859e33d69fSJan Kara lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); 37869e33d69fSJan Kara lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); 37879e33d69fSJan Kara 37889e33d69fSJan Kara mlog_exit_void(); 37899e33d69fSJan Kara } 37909e33d69fSJan Kara 37919e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) 37929e33d69fSJan Kara { 37939e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 37949e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 37959e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 37969e33d69fSJan Kara 37979e33d69fSJan Kara mlog_entry_void(); 37989e33d69fSJan Kara if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) 37999e33d69fSJan Kara ocfs2_cluster_unlock(osb, lockres, level); 38009e33d69fSJan Kara mlog_exit_void(); 38019e33d69fSJan Kara } 38029e33d69fSJan Kara 38039e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) 38049e33d69fSJan Kara { 38059e33d69fSJan Kara struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, 38069e33d69fSJan Kara oinfo->dqi_gi.dqi_type); 38079e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 38089e33d69fSJan Kara struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 380985eb8b73SJoel Becker struct buffer_head *bh = NULL; 38109e33d69fSJan Kara struct ocfs2_global_disk_dqinfo *gdinfo; 38119e33d69fSJan Kara int status = 0; 38129e33d69fSJan Kara 38131c520dfbSJoel Becker if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && 38141c520dfbSJoel Becker lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 38159e33d69fSJan Kara info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 38169e33d69fSJan Kara info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 38179e33d69fSJan Kara oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 38189e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); 38199e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); 38209e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 38219e33d69fSJan Kara be32_to_cpu(lvb->lvb_free_entry); 38229e33d69fSJan Kara } else { 382385eb8b73SJoel Becker status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); 382485eb8b73SJoel Becker if (status) { 38259e33d69fSJan Kara mlog_errno(status); 38269e33d69fSJan Kara goto bail; 38279e33d69fSJan Kara } 38289e33d69fSJan Kara gdinfo = (struct ocfs2_global_disk_dqinfo *) 38299e33d69fSJan Kara (bh->b_data + OCFS2_GLOBAL_INFO_OFF); 38309e33d69fSJan Kara info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); 38319e33d69fSJan Kara info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); 38329e33d69fSJan Kara oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); 38339e33d69fSJan Kara oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); 38349e33d69fSJan Kara oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); 38359e33d69fSJan Kara oinfo->dqi_gi.dqi_free_entry = 38369e33d69fSJan Kara le32_to_cpu(gdinfo->dqi_free_entry); 38379e33d69fSJan Kara brelse(bh); 38389e33d69fSJan Kara ocfs2_track_lock_refresh(lockres); 38399e33d69fSJan Kara } 38409e33d69fSJan Kara 38419e33d69fSJan Kara bail: 38429e33d69fSJan Kara return status; 38439e33d69fSJan Kara } 38449e33d69fSJan Kara 38459e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file 38469e33d69fSJan Kara * so that we can safely refresh quota info from disk. */ 38479e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) 38489e33d69fSJan Kara { 38499e33d69fSJan Kara struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; 38509e33d69fSJan Kara struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); 38519e33d69fSJan Kara int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38529e33d69fSJan Kara int status = 0; 38539e33d69fSJan Kara 38549e33d69fSJan Kara mlog_entry_void(); 38559e33d69fSJan Kara 38569e33d69fSJan Kara /* On RO devices, locking really isn't needed... */ 38579e33d69fSJan Kara if (ocfs2_is_hard_readonly(osb)) { 38589e33d69fSJan Kara if (ex) 38599e33d69fSJan Kara status = -EROFS; 38609e33d69fSJan Kara goto bail; 38619e33d69fSJan Kara } 38629e33d69fSJan Kara if (ocfs2_mount_local(osb)) 38639e33d69fSJan Kara goto bail; 38649e33d69fSJan Kara 38659e33d69fSJan Kara status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 38669e33d69fSJan Kara if (status < 0) { 38679e33d69fSJan Kara mlog_errno(status); 38689e33d69fSJan Kara goto bail; 38699e33d69fSJan Kara } 38709e33d69fSJan Kara if (!ocfs2_should_refresh_lock_res(lockres)) 38719e33d69fSJan Kara goto bail; 38729e33d69fSJan Kara /* OK, we have the lock but we need to refresh the quota info */ 38739e33d69fSJan Kara status = ocfs2_refresh_qinfo(oinfo); 38749e33d69fSJan Kara if (status) 38759e33d69fSJan Kara ocfs2_qinfo_unlock(oinfo, ex); 38769e33d69fSJan Kara ocfs2_complete_lock_res_refresh(lockres, status); 38779e33d69fSJan Kara bail: 38789e33d69fSJan Kara mlog_exit(status); 38799e33d69fSJan Kara return status; 38809e33d69fSJan Kara } 38819e33d69fSJan Kara 38828dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex) 38838dec98edSTao Ma { 38848dec98edSTao Ma int status; 38858dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 38868dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 38878dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 38888dec98edSTao Ma 38898dec98edSTao Ma 38908dec98edSTao Ma if (ocfs2_is_hard_readonly(osb)) 38918dec98edSTao Ma return -EROFS; 38928dec98edSTao Ma 38938dec98edSTao Ma if (ocfs2_mount_local(osb)) 38948dec98edSTao Ma return 0; 38958dec98edSTao Ma 38968dec98edSTao Ma status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 38978dec98edSTao Ma if (status < 0) 38988dec98edSTao Ma mlog_errno(status); 38998dec98edSTao Ma 39008dec98edSTao Ma return status; 39018dec98edSTao Ma } 39028dec98edSTao Ma 39038dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex) 39048dec98edSTao Ma { 39058dec98edSTao Ma int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; 39068dec98edSTao Ma struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres; 39078dec98edSTao Ma struct ocfs2_super *osb = lockres->l_priv; 39088dec98edSTao Ma 39098dec98edSTao Ma if (!ocfs2_mount_local(osb)) 39108dec98edSTao Ma ocfs2_cluster_unlock(osb, lockres, level); 39118dec98edSTao Ma } 39128dec98edSTao Ma 39134670c46dSJoel Becker /* 39144670c46dSJoel Becker * This is the filesystem locking protocol. It provides the lock handling 39154670c46dSJoel Becker * hooks for the underlying DLM. It has a maximum version number. 39164670c46dSJoel Becker * The version number allows interoperability with systems running at 39174670c46dSJoel Becker * the same major number and an equal or smaller minor number. 39184670c46dSJoel Becker * 39194670c46dSJoel Becker * Whenever the filesystem does new things with locks (adds or removes a 39204670c46dSJoel Becker * lock, orders them differently, does different things underneath a lock), 39214670c46dSJoel Becker * the version must be changed. The protocol is negotiated when joining 39224670c46dSJoel Becker * the dlm domain. A node may join the domain if its major version is 39234670c46dSJoel Becker * identical to all other nodes and its minor version is greater than 39244670c46dSJoel Becker * or equal to all other nodes. When its minor version is greater than 39254670c46dSJoel Becker * the other nodes, it will run at the minor version specified by the 39264670c46dSJoel Becker * other nodes. 39274670c46dSJoel Becker * 39284670c46dSJoel Becker * If a locking change is made that will not be compatible with older 39294670c46dSJoel Becker * versions, the major number must be increased and the minor version set 39304670c46dSJoel Becker * to zero. If a change merely adds a behavior that can be disabled when 39314670c46dSJoel Becker * speaking to older versions, the minor version must be increased. If a 39324670c46dSJoel Becker * change adds a fully backwards compatible change (eg, LVB changes that 39334670c46dSJoel Becker * are just ignored by older versions), the version does not need to be 39344670c46dSJoel Becker * updated. 39354670c46dSJoel Becker */ 393624ef1815SJoel Becker static struct ocfs2_locking_protocol lproto = { 39374670c46dSJoel Becker .lp_max_version = { 39384670c46dSJoel Becker .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 39394670c46dSJoel Becker .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 39404670c46dSJoel Becker }, 394124ef1815SJoel Becker .lp_lock_ast = ocfs2_locking_ast, 394224ef1815SJoel Becker .lp_blocking_ast = ocfs2_blocking_ast, 394324ef1815SJoel Becker .lp_unlock_ast = ocfs2_unlock_ast, 394424ef1815SJoel Becker }; 394524ef1815SJoel Becker 394663e0c48aSJoel Becker void ocfs2_set_locking_protocol(void) 394724ef1815SJoel Becker { 394863e0c48aSJoel Becker ocfs2_stack_glue_set_locking_protocol(&lproto); 394924ef1815SJoel Becker } 395024ef1815SJoel Becker 395124ef1815SJoel Becker 395200600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 3953ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 3954ccd979bdSMark Fasheh { 3955ccd979bdSMark Fasheh int status; 3956d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 3957ccd979bdSMark Fasheh unsigned long flags; 3958ccd979bdSMark Fasheh 3959ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 3960ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 3961ccd979bdSMark Fasheh * flag. */ 3962ccd979bdSMark Fasheh 3963ccd979bdSMark Fasheh mlog_entry_void(); 3964ccd979bdSMark Fasheh 3965ccd979bdSMark Fasheh BUG_ON(!lockres); 3966ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 3967ccd979bdSMark Fasheh 3968ccd979bdSMark Fasheh mlog(0, "lockres %s blocked.\n", lockres->l_name); 3969ccd979bdSMark Fasheh 3970ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 397134d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 3972ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 3973ccd979bdSMark Fasheh * but short circuiting here will still save us some 3974ccd979bdSMark Fasheh * performance. */ 3975ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3976ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 3977ccd979bdSMark Fasheh goto unqueue; 3978ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3979ccd979bdSMark Fasheh 3980b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 3981ccd979bdSMark Fasheh if (status < 0) 3982ccd979bdSMark Fasheh mlog_errno(status); 3983ccd979bdSMark Fasheh 3984ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 3985ccd979bdSMark Fasheh unqueue: 3986d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 3987ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 3988ccd979bdSMark Fasheh } else 3989ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 3990ccd979bdSMark Fasheh 3991ccd979bdSMark Fasheh mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 3992d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 3993ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 3994ccd979bdSMark Fasheh 3995d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 3996d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 3997d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 3998d680efe9SMark Fasheh 3999ccd979bdSMark Fasheh mlog_exit_void(); 4000ccd979bdSMark Fasheh } 4001ccd979bdSMark Fasheh 4002ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 4003ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 4004ccd979bdSMark Fasheh { 4005ccd979bdSMark Fasheh mlog_entry_void(); 4006ccd979bdSMark Fasheh 4007ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 4008ccd979bdSMark Fasheh 4009ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 4010ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 4011ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 4012ccd979bdSMark Fasheh * to the resource will get it soon. */ 4013ccd979bdSMark Fasheh mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 4014ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 4015ccd979bdSMark Fasheh return; 4016ccd979bdSMark Fasheh } 4017ccd979bdSMark Fasheh 4018ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 4019ccd979bdSMark Fasheh 402034d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 4021ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 4022ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 4023ccd979bdSMark Fasheh &osb->blocked_lock_list); 4024ccd979bdSMark Fasheh osb->blocked_lock_count++; 4025ccd979bdSMark Fasheh } 402634d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 4027ccd979bdSMark Fasheh 4028ccd979bdSMark Fasheh mlog_exit_void(); 4029ccd979bdSMark Fasheh } 403034d024f8SMark Fasheh 403134d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 403234d024f8SMark Fasheh { 403334d024f8SMark Fasheh unsigned long processed; 403434d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 403534d024f8SMark Fasheh 403634d024f8SMark Fasheh mlog_entry_void(); 403734d024f8SMark Fasheh 403834d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 403934d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 404034d024f8SMark Fasheh * wake happens part-way through our work */ 404134d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 404234d024f8SMark Fasheh 404334d024f8SMark Fasheh processed = osb->blocked_lock_count; 404434d024f8SMark Fasheh while (processed) { 404534d024f8SMark Fasheh BUG_ON(list_empty(&osb->blocked_lock_list)); 404634d024f8SMark Fasheh 404734d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 404834d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 404934d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 405034d024f8SMark Fasheh osb->blocked_lock_count--; 405134d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 405234d024f8SMark Fasheh 405334d024f8SMark Fasheh BUG_ON(!processed); 405434d024f8SMark Fasheh processed--; 405534d024f8SMark Fasheh 405634d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 405734d024f8SMark Fasheh 405834d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 405934d024f8SMark Fasheh } 406034d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 406134d024f8SMark Fasheh 406234d024f8SMark Fasheh mlog_exit_void(); 406334d024f8SMark Fasheh } 406434d024f8SMark Fasheh 406534d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 406634d024f8SMark Fasheh { 406734d024f8SMark Fasheh int empty = 0; 406834d024f8SMark Fasheh 406934d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 407034d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 407134d024f8SMark Fasheh empty = 1; 407234d024f8SMark Fasheh 407334d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 407434d024f8SMark Fasheh return empty; 407534d024f8SMark Fasheh } 407634d024f8SMark Fasheh 407734d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 407834d024f8SMark Fasheh { 407934d024f8SMark Fasheh int should_wake = 0; 408034d024f8SMark Fasheh 408134d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 408234d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 408334d024f8SMark Fasheh should_wake = 1; 408434d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 408534d024f8SMark Fasheh 408634d024f8SMark Fasheh return should_wake; 408734d024f8SMark Fasheh } 408834d024f8SMark Fasheh 4089200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg) 409034d024f8SMark Fasheh { 409134d024f8SMark Fasheh int status = 0; 409234d024f8SMark Fasheh struct ocfs2_super *osb = arg; 409334d024f8SMark Fasheh 409434d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 409534d024f8SMark Fasheh * work available */ 409634d024f8SMark Fasheh while (!(kthread_should_stop() && 409734d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 409834d024f8SMark Fasheh 409934d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 410034d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 410134d024f8SMark Fasheh kthread_should_stop()); 410234d024f8SMark Fasheh 410334d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 410434d024f8SMark Fasheh 410534d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 410634d024f8SMark Fasheh } 410734d024f8SMark Fasheh 410834d024f8SMark Fasheh osb->dc_task = NULL; 410934d024f8SMark Fasheh return status; 411034d024f8SMark Fasheh } 411134d024f8SMark Fasheh 411234d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 411334d024f8SMark Fasheh { 411434d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 411534d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 411634d024f8SMark Fasheh * the caller may have made to the voting state */ 411734d024f8SMark Fasheh osb->dc_wake_sequence++; 411834d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 411934d024f8SMark Fasheh wake_up(&osb->dc_event); 412034d024f8SMark Fasheh } 4121