1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*- 2ccd979bdSMark Fasheh * vim: noexpandtab sw=8 ts=8 sts=0: 3ccd979bdSMark Fasheh * 4ccd979bdSMark Fasheh * dlmglue.c 5ccd979bdSMark Fasheh * 6ccd979bdSMark Fasheh * Code which implements an OCFS2 specific interface to our DLM. 7ccd979bdSMark Fasheh * 8ccd979bdSMark Fasheh * Copyright (C) 2003, 2004 Oracle. All rights reserved. 9ccd979bdSMark Fasheh * 10ccd979bdSMark Fasheh * This program is free software; you can redistribute it and/or 11ccd979bdSMark Fasheh * modify it under the terms of the GNU General Public 12ccd979bdSMark Fasheh * License as published by the Free Software Foundation; either 13ccd979bdSMark Fasheh * version 2 of the License, or (at your option) any later version. 14ccd979bdSMark Fasheh * 15ccd979bdSMark Fasheh * This program is distributed in the hope that it will be useful, 16ccd979bdSMark Fasheh * but WITHOUT ANY WARRANTY; without even the implied warranty of 17ccd979bdSMark Fasheh * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18ccd979bdSMark Fasheh * General Public License for more details. 19ccd979bdSMark Fasheh * 20ccd979bdSMark Fasheh * You should have received a copy of the GNU General Public 21ccd979bdSMark Fasheh * License along with this program; if not, write to the 22ccd979bdSMark Fasheh * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23ccd979bdSMark Fasheh * Boston, MA 021110-1307, USA. 24ccd979bdSMark Fasheh */ 25ccd979bdSMark Fasheh 26ccd979bdSMark Fasheh #include <linux/types.h> 27ccd979bdSMark Fasheh #include <linux/slab.h> 28ccd979bdSMark Fasheh #include <linux/highmem.h> 29ccd979bdSMark Fasheh #include <linux/mm.h> 30ccd979bdSMark Fasheh #include <linux/crc32.h> 31ccd979bdSMark Fasheh #include <linux/kthread.h> 32ccd979bdSMark Fasheh #include <linux/pagemap.h> 33ccd979bdSMark Fasheh #include <linux/debugfs.h> 34ccd979bdSMark Fasheh #include <linux/seq_file.h> 35ccd979bdSMark Fasheh 36ccd979bdSMark Fasheh #include <cluster/heartbeat.h> 37ccd979bdSMark Fasheh #include <cluster/nodemanager.h> 38ccd979bdSMark Fasheh #include <cluster/tcp.h> 39ccd979bdSMark Fasheh 40ccd979bdSMark Fasheh #include <dlm/dlmapi.h> 41ccd979bdSMark Fasheh 42ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE 43ccd979bdSMark Fasheh #include <cluster/masklog.h> 44ccd979bdSMark Fasheh 45ccd979bdSMark Fasheh #include "ocfs2.h" 46ccd979bdSMark Fasheh 47ccd979bdSMark Fasheh #include "alloc.h" 48d680efe9SMark Fasheh #include "dcache.h" 49ccd979bdSMark Fasheh #include "dlmglue.h" 50ccd979bdSMark Fasheh #include "extent_map.h" 517f1a37e3STiger Yang #include "file.h" 52ccd979bdSMark Fasheh #include "heartbeat.h" 53ccd979bdSMark Fasheh #include "inode.h" 54ccd979bdSMark Fasheh #include "journal.h" 55ccd979bdSMark Fasheh #include "slot_map.h" 56ccd979bdSMark Fasheh #include "super.h" 57ccd979bdSMark Fasheh #include "uptodate.h" 58ccd979bdSMark Fasheh 59ccd979bdSMark Fasheh #include "buffer_head_io.h" 60ccd979bdSMark Fasheh 61ccd979bdSMark Fasheh struct ocfs2_mask_waiter { 62ccd979bdSMark Fasheh struct list_head mw_item; 63ccd979bdSMark Fasheh int mw_status; 64ccd979bdSMark Fasheh struct completion mw_complete; 65ccd979bdSMark Fasheh unsigned long mw_mask; 66ccd979bdSMark Fasheh unsigned long mw_goal; 67ccd979bdSMark Fasheh }; 68ccd979bdSMark Fasheh 6954a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 7054a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 71ccd979bdSMark Fasheh 72d680efe9SMark Fasheh /* 73cc567d89SMark Fasheh * Return value from ->downconvert_worker functions. 74d680efe9SMark Fasheh * 75b5e500e2SMark Fasheh * These control the precise actions of ocfs2_unblock_lock() 76d680efe9SMark Fasheh * and ocfs2_process_blocked_lock() 77d680efe9SMark Fasheh * 78d680efe9SMark Fasheh */ 79d680efe9SMark Fasheh enum ocfs2_unblock_action { 80d680efe9SMark Fasheh UNBLOCK_CONTINUE = 0, /* Continue downconvert */ 81d680efe9SMark Fasheh UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire 82d680efe9SMark Fasheh * ->post_unlock callback */ 83d680efe9SMark Fasheh UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire 84d680efe9SMark Fasheh * ->post_unlock() callback. */ 85d680efe9SMark Fasheh }; 86d680efe9SMark Fasheh 87d680efe9SMark Fasheh struct ocfs2_unblock_ctl { 88d680efe9SMark Fasheh int requeue; 89d680efe9SMark Fasheh enum ocfs2_unblock_action unblock_action; 90d680efe9SMark Fasheh }; 91d680efe9SMark Fasheh 92810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 93810d5aebSMark Fasheh int new_level); 94810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 95810d5aebSMark Fasheh 96cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 97cc567d89SMark Fasheh int blocking); 98cc567d89SMark Fasheh 99cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 100cc567d89SMark Fasheh int blocking); 101d680efe9SMark Fasheh 102d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 103d680efe9SMark Fasheh struct ocfs2_lock_res *lockres); 104ccd979bdSMark Fasheh 1056cb129f5SAdrian Bunk 1066cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 1076cb129f5SAdrian Bunk 1086cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */ 1096cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level, 1106cb129f5SAdrian Bunk const char *function, 1116cb129f5SAdrian Bunk unsigned int line, 1126cb129f5SAdrian Bunk struct ocfs2_lock_res *lockres) 1136cb129f5SAdrian Bunk { 1146cb129f5SAdrian Bunk struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1156cb129f5SAdrian Bunk 1166cb129f5SAdrian Bunk mlog(level, "LVB information for %s (called from %s:%u):\n", 1176cb129f5SAdrian Bunk lockres->l_name, function, line); 1186cb129f5SAdrian Bunk mlog(level, "version: %u, clusters: %u, generation: 0x%x\n", 1196cb129f5SAdrian Bunk lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters), 1206cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_igeneration)); 1216cb129f5SAdrian Bunk mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 1226cb129f5SAdrian Bunk (unsigned long long)be64_to_cpu(lvb->lvb_isize), 1236cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 1246cb129f5SAdrian Bunk be16_to_cpu(lvb->lvb_imode)); 1256cb129f5SAdrian Bunk mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 1266cb129f5SAdrian Bunk "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink), 1276cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_iatime_packed), 1286cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_ictime_packed), 1296cb129f5SAdrian Bunk (long long)be64_to_cpu(lvb->lvb_imtime_packed), 1306cb129f5SAdrian Bunk be32_to_cpu(lvb->lvb_iattr)); 1316cb129f5SAdrian Bunk } 1326cb129f5SAdrian Bunk 1336cb129f5SAdrian Bunk 134f625c979SMark Fasheh /* 135f625c979SMark Fasheh * OCFS2 Lock Resource Operations 136f625c979SMark Fasheh * 137f625c979SMark Fasheh * These fine tune the behavior of the generic dlmglue locking infrastructure. 1380d5dc6c2SMark Fasheh * 1390d5dc6c2SMark Fasheh * The most basic of lock types can point ->l_priv to their respective 1400d5dc6c2SMark Fasheh * struct ocfs2_super and allow the default actions to manage things. 1410d5dc6c2SMark Fasheh * 1420d5dc6c2SMark Fasheh * Right now, each lock type also needs to implement an init function, 1430d5dc6c2SMark Fasheh * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres() 1440d5dc6c2SMark Fasheh * should be called when the lock is no longer needed (i.e., object 1450d5dc6c2SMark Fasheh * destruction time). 146f625c979SMark Fasheh */ 147ccd979bdSMark Fasheh struct ocfs2_lock_res_ops { 14854a7e755SMark Fasheh /* 14954a7e755SMark Fasheh * Translate an ocfs2_lock_res * into an ocfs2_super *. Define 15054a7e755SMark Fasheh * this callback if ->l_priv is not an ocfs2_super pointer 15154a7e755SMark Fasheh */ 15254a7e755SMark Fasheh struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); 153b5e500e2SMark Fasheh 1540d5dc6c2SMark Fasheh /* 15534d024f8SMark Fasheh * Optionally called in the downconvert thread after a 15634d024f8SMark Fasheh * successful downconvert. The lockres will not be referenced 15734d024f8SMark Fasheh * after this callback is called, so it is safe to free 15834d024f8SMark Fasheh * memory, etc. 1590d5dc6c2SMark Fasheh * 1600d5dc6c2SMark Fasheh * The exact semantics of when this is called are controlled 1610d5dc6c2SMark Fasheh * by ->downconvert_worker() 1620d5dc6c2SMark Fasheh */ 163d680efe9SMark Fasheh void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *); 164f625c979SMark Fasheh 165f625c979SMark Fasheh /* 16616d5b956SMark Fasheh * Allow a lock type to add checks to determine whether it is 16716d5b956SMark Fasheh * safe to downconvert a lock. Return 0 to re-queue the 16816d5b956SMark Fasheh * downconvert at a later time, nonzero to continue. 16916d5b956SMark Fasheh * 17016d5b956SMark Fasheh * For most locks, the default checks that there are no 17116d5b956SMark Fasheh * incompatible holders are sufficient. 17216d5b956SMark Fasheh * 17316d5b956SMark Fasheh * Called with the lockres spinlock held. 17416d5b956SMark Fasheh */ 17516d5b956SMark Fasheh int (*check_downconvert)(struct ocfs2_lock_res *, int); 17616d5b956SMark Fasheh 17716d5b956SMark Fasheh /* 1785ef0d4eaSMark Fasheh * Allows a lock type to populate the lock value block. This 1795ef0d4eaSMark Fasheh * is called on downconvert, and when we drop a lock. 1805ef0d4eaSMark Fasheh * 1815ef0d4eaSMark Fasheh * Locks that want to use this should set LOCK_TYPE_USES_LVB 1825ef0d4eaSMark Fasheh * in the flags field. 1835ef0d4eaSMark Fasheh * 1845ef0d4eaSMark Fasheh * Called with the lockres spinlock held. 1855ef0d4eaSMark Fasheh */ 1865ef0d4eaSMark Fasheh void (*set_lvb)(struct ocfs2_lock_res *); 1875ef0d4eaSMark Fasheh 1885ef0d4eaSMark Fasheh /* 189cc567d89SMark Fasheh * Called from the downconvert thread when it is determined 190cc567d89SMark Fasheh * that a lock will be downconverted. This is called without 191cc567d89SMark Fasheh * any locks held so the function can do work that might 192cc567d89SMark Fasheh * schedule (syncing out data, etc). 193cc567d89SMark Fasheh * 194cc567d89SMark Fasheh * This should return any one of the ocfs2_unblock_action 195cc567d89SMark Fasheh * values, depending on what it wants the thread to do. 196cc567d89SMark Fasheh */ 197cc567d89SMark Fasheh int (*downconvert_worker)(struct ocfs2_lock_res *, int); 198cc567d89SMark Fasheh 199cc567d89SMark Fasheh /* 200f625c979SMark Fasheh * LOCK_TYPE_* flags which describe the specific requirements 201f625c979SMark Fasheh * of a lock type. Descriptions of each individual flag follow. 202f625c979SMark Fasheh */ 203f625c979SMark Fasheh int flags; 204ccd979bdSMark Fasheh }; 205ccd979bdSMark Fasheh 206f625c979SMark Fasheh /* 207f625c979SMark Fasheh * Some locks want to "refresh" potentially stale data when a 208f625c979SMark Fasheh * meaningful (PRMODE or EXMODE) lock level is first obtained. If this 209f625c979SMark Fasheh * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the 210f625c979SMark Fasheh * individual lockres l_flags member from the ast function. It is 211f625c979SMark Fasheh * expected that the locking wrapper will clear the 212f625c979SMark Fasheh * OCFS2_LOCK_NEEDS_REFRESH flag when done. 213f625c979SMark Fasheh */ 214f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1 215f625c979SMark Fasheh 216b80fc012SMark Fasheh /* 2175ef0d4eaSMark Fasheh * Indicate that a lock type makes use of the lock value block. The 2185ef0d4eaSMark Fasheh * ->set_lvb lock type callback must be defined. 219b80fc012SMark Fasheh */ 220b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB 0x2 221b80fc012SMark Fasheh 222ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 22354a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 224f625c979SMark Fasheh .flags = 0, 225ccd979bdSMark Fasheh }; 226ccd979bdSMark Fasheh 227e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { 22854a7e755SMark Fasheh .get_osb = ocfs2_get_inode_osb, 229810d5aebSMark Fasheh .check_downconvert = ocfs2_check_meta_downconvert, 230810d5aebSMark Fasheh .set_lvb = ocfs2_set_meta_lvb, 231f1f54068SMark Fasheh .downconvert_worker = ocfs2_data_convert_worker, 232b80fc012SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, 233ccd979bdSMark Fasheh }; 234ccd979bdSMark Fasheh 235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = { 236f625c979SMark Fasheh .flags = LOCK_TYPE_REQUIRES_REFRESH, 237ccd979bdSMark Fasheh }; 238ccd979bdSMark Fasheh 239ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 240f625c979SMark Fasheh .flags = 0, 241ccd979bdSMark Fasheh }; 242ccd979bdSMark Fasheh 243d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { 24454a7e755SMark Fasheh .get_osb = ocfs2_get_dentry_osb, 245d680efe9SMark Fasheh .post_unlock = ocfs2_dentry_post_unlock, 246cc567d89SMark Fasheh .downconvert_worker = ocfs2_dentry_convert_worker, 247f625c979SMark Fasheh .flags = 0, 248d680efe9SMark Fasheh }; 249d680efe9SMark Fasheh 25050008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { 25150008630STiger Yang .get_osb = ocfs2_get_inode_osb, 25250008630STiger Yang .flags = 0, 25350008630STiger Yang }; 25450008630STiger Yang 255ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 256ccd979bdSMark Fasheh { 257ccd979bdSMark Fasheh return lockres->l_type == OCFS2_LOCK_TYPE_META || 25850008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_RW || 25950008630STiger Yang lockres->l_type == OCFS2_LOCK_TYPE_OPEN; 260ccd979bdSMark Fasheh } 261ccd979bdSMark Fasheh 262ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 263ccd979bdSMark Fasheh { 264ccd979bdSMark Fasheh BUG_ON(!ocfs2_is_inode_lock(lockres)); 265ccd979bdSMark Fasheh 266ccd979bdSMark Fasheh return (struct inode *) lockres->l_priv; 267ccd979bdSMark Fasheh } 268ccd979bdSMark Fasheh 269d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres) 270d680efe9SMark Fasheh { 271d680efe9SMark Fasheh BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY); 272d680efe9SMark Fasheh 273d680efe9SMark Fasheh return (struct ocfs2_dentry_lock *)lockres->l_priv; 274d680efe9SMark Fasheh } 275d680efe9SMark Fasheh 27654a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 27754a7e755SMark Fasheh { 27854a7e755SMark Fasheh if (lockres->l_ops->get_osb) 27954a7e755SMark Fasheh return lockres->l_ops->get_osb(lockres); 28054a7e755SMark Fasheh 28154a7e755SMark Fasheh return (struct ocfs2_super *)lockres->l_priv; 28254a7e755SMark Fasheh } 28354a7e755SMark Fasheh 284ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 285ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 286ccd979bdSMark Fasheh int level, 287ccd979bdSMark Fasheh int dlm_flags); 288ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 289ccd979bdSMark Fasheh int wanted); 290ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 291ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 292ccd979bdSMark Fasheh int level); 293ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 294ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 295ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 296ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level); 297ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 298ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 299ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 300ccd979bdSMark Fasheh int convert); 301ccd979bdSMark Fasheh #define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \ 302ccd979bdSMark Fasheh mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ 303ccd979bdSMark Fasheh "resource %s: %s\n", dlm_errname(_stat), _func, \ 304ccd979bdSMark Fasheh _lockres->l_name, dlm_errmsg(_stat)); \ 305ccd979bdSMark Fasheh } while (0) 30634d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg); 30734d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 308ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres); 309e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 310ccd979bdSMark Fasheh struct buffer_head **bh); 311ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 312ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level); 313ccd979bdSMark Fasheh 314ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 315ccd979bdSMark Fasheh u64 blkno, 316ccd979bdSMark Fasheh u32 generation, 317ccd979bdSMark Fasheh char *name) 318ccd979bdSMark Fasheh { 319ccd979bdSMark Fasheh int len; 320ccd979bdSMark Fasheh 321ccd979bdSMark Fasheh mlog_entry_void(); 322ccd979bdSMark Fasheh 323ccd979bdSMark Fasheh BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); 324ccd979bdSMark Fasheh 325b0697053SMark Fasheh len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x", 326b0697053SMark Fasheh ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD, 327b0697053SMark Fasheh (long long)blkno, generation); 328ccd979bdSMark Fasheh 329ccd979bdSMark Fasheh BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1)); 330ccd979bdSMark Fasheh 331ccd979bdSMark Fasheh mlog(0, "built lock resource with name: %s\n", name); 332ccd979bdSMark Fasheh 333ccd979bdSMark Fasheh mlog_exit_void(); 334ccd979bdSMark Fasheh } 335ccd979bdSMark Fasheh 33634af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock); 337ccd979bdSMark Fasheh 338ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res, 339ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug) 340ccd979bdSMark Fasheh { 341ccd979bdSMark Fasheh mlog(0, "Add tracking for lockres %s\n", res->l_name); 342ccd979bdSMark Fasheh 343ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 344ccd979bdSMark Fasheh list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking); 345ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 346ccd979bdSMark Fasheh } 347ccd979bdSMark Fasheh 348ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) 349ccd979bdSMark Fasheh { 350ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 351ccd979bdSMark Fasheh if (!list_empty(&res->l_debug_list)) 352ccd979bdSMark Fasheh list_del_init(&res->l_debug_list); 353ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 354ccd979bdSMark Fasheh } 355ccd979bdSMark Fasheh 356ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 357ccd979bdSMark Fasheh struct ocfs2_lock_res *res, 358ccd979bdSMark Fasheh enum ocfs2_lock_type type, 359ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops, 360ccd979bdSMark Fasheh void *priv) 361ccd979bdSMark Fasheh { 362ccd979bdSMark Fasheh res->l_type = type; 363ccd979bdSMark Fasheh res->l_ops = ops; 364ccd979bdSMark Fasheh res->l_priv = priv; 365ccd979bdSMark Fasheh 366ccd979bdSMark Fasheh res->l_level = LKM_IVMODE; 367ccd979bdSMark Fasheh res->l_requested = LKM_IVMODE; 368ccd979bdSMark Fasheh res->l_blocking = LKM_IVMODE; 369ccd979bdSMark Fasheh res->l_action = OCFS2_AST_INVALID; 370ccd979bdSMark Fasheh res->l_unlock_action = OCFS2_UNLOCK_INVALID; 371ccd979bdSMark Fasheh 372ccd979bdSMark Fasheh res->l_flags = OCFS2_LOCK_INITIALIZED; 373ccd979bdSMark Fasheh 374ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 375ccd979bdSMark Fasheh } 376ccd979bdSMark Fasheh 377ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 378ccd979bdSMark Fasheh { 379ccd979bdSMark Fasheh /* This also clears out the lock status block */ 380ccd979bdSMark Fasheh memset(res, 0, sizeof(struct ocfs2_lock_res)); 381ccd979bdSMark Fasheh spin_lock_init(&res->l_lock); 382ccd979bdSMark Fasheh init_waitqueue_head(&res->l_event); 383ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_blocked_list); 384ccd979bdSMark Fasheh INIT_LIST_HEAD(&res->l_mask_waiters); 385ccd979bdSMark Fasheh } 386ccd979bdSMark Fasheh 387ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 388ccd979bdSMark Fasheh enum ocfs2_lock_type type, 38924c19ef4SMark Fasheh unsigned int generation, 390ccd979bdSMark Fasheh struct inode *inode) 391ccd979bdSMark Fasheh { 392ccd979bdSMark Fasheh struct ocfs2_lock_res_ops *ops; 393ccd979bdSMark Fasheh 394ccd979bdSMark Fasheh switch(type) { 395ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_RW: 396ccd979bdSMark Fasheh ops = &ocfs2_inode_rw_lops; 397ccd979bdSMark Fasheh break; 398ccd979bdSMark Fasheh case OCFS2_LOCK_TYPE_META: 399e63aecb6SMark Fasheh ops = &ocfs2_inode_inode_lops; 400ccd979bdSMark Fasheh break; 40150008630STiger Yang case OCFS2_LOCK_TYPE_OPEN: 40250008630STiger Yang ops = &ocfs2_inode_open_lops; 40350008630STiger Yang break; 404ccd979bdSMark Fasheh default: 405ccd979bdSMark Fasheh mlog_bug_on_msg(1, "type: %d\n", type); 406ccd979bdSMark Fasheh ops = NULL; /* thanks, gcc */ 407ccd979bdSMark Fasheh break; 408ccd979bdSMark Fasheh }; 409ccd979bdSMark Fasheh 410d680efe9SMark Fasheh ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 41124c19ef4SMark Fasheh generation, res->l_name); 412d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 413d680efe9SMark Fasheh } 414d680efe9SMark Fasheh 41554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) 41654a7e755SMark Fasheh { 41754a7e755SMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 41854a7e755SMark Fasheh 41954a7e755SMark Fasheh return OCFS2_SB(inode->i_sb); 42054a7e755SMark Fasheh } 42154a7e755SMark Fasheh 422d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) 423d680efe9SMark Fasheh { 424d680efe9SMark Fasheh __be64 inode_blkno_be; 425d680efe9SMark Fasheh 426d680efe9SMark Fasheh memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], 427d680efe9SMark Fasheh sizeof(__be64)); 428d680efe9SMark Fasheh 429d680efe9SMark Fasheh return be64_to_cpu(inode_blkno_be); 430d680efe9SMark Fasheh } 431d680efe9SMark Fasheh 43254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres) 43354a7e755SMark Fasheh { 43454a7e755SMark Fasheh struct ocfs2_dentry_lock *dl = lockres->l_priv; 43554a7e755SMark Fasheh 43654a7e755SMark Fasheh return OCFS2_SB(dl->dl_inode->i_sb); 43754a7e755SMark Fasheh } 43854a7e755SMark Fasheh 439d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 440d680efe9SMark Fasheh u64 parent, struct inode *inode) 441d680efe9SMark Fasheh { 442d680efe9SMark Fasheh int len; 443d680efe9SMark Fasheh u64 inode_blkno = OCFS2_I(inode)->ip_blkno; 444d680efe9SMark Fasheh __be64 inode_blkno_be = cpu_to_be64(inode_blkno); 445d680efe9SMark Fasheh struct ocfs2_lock_res *lockres = &dl->dl_lockres; 446d680efe9SMark Fasheh 447d680efe9SMark Fasheh ocfs2_lock_res_init_once(lockres); 448d680efe9SMark Fasheh 449d680efe9SMark Fasheh /* 450d680efe9SMark Fasheh * Unfortunately, the standard lock naming scheme won't work 451d680efe9SMark Fasheh * here because we have two 16 byte values to use. Instead, 452d680efe9SMark Fasheh * we'll stuff the inode number as a binary value. We still 453d680efe9SMark Fasheh * want error prints to show something without garbling the 454d680efe9SMark Fasheh * display, so drop a null byte in there before the inode 455d680efe9SMark Fasheh * number. A future version of OCFS2 will likely use all 456d680efe9SMark Fasheh * binary lock names. The stringified names have been a 457d680efe9SMark Fasheh * tremendous aid in debugging, but now that the debugfs 458d680efe9SMark Fasheh * interface exists, we can mangle things there if need be. 459d680efe9SMark Fasheh * 460d680efe9SMark Fasheh * NOTE: We also drop the standard "pad" value (the total lock 461d680efe9SMark Fasheh * name size stays the same though - the last part is all 462d680efe9SMark Fasheh * zeros due to the memset in ocfs2_lock_res_init_once() 463d680efe9SMark Fasheh */ 464d680efe9SMark Fasheh len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START, 465d680efe9SMark Fasheh "%c%016llx", 466d680efe9SMark Fasheh ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY), 467d680efe9SMark Fasheh (long long)parent); 468d680efe9SMark Fasheh 469d680efe9SMark Fasheh BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1)); 470d680efe9SMark Fasheh 471d680efe9SMark Fasheh memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be, 472d680efe9SMark Fasheh sizeof(__be64)); 473d680efe9SMark Fasheh 474d680efe9SMark Fasheh ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, 475d680efe9SMark Fasheh OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops, 476d680efe9SMark Fasheh dl); 477ccd979bdSMark Fasheh } 478ccd979bdSMark Fasheh 479ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 480ccd979bdSMark Fasheh struct ocfs2_super *osb) 481ccd979bdSMark Fasheh { 482ccd979bdSMark Fasheh /* Superblock lockres doesn't come from a slab so we call init 483ccd979bdSMark Fasheh * once on it manually. */ 484ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 485d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO, 486d680efe9SMark Fasheh 0, res->l_name); 487ccd979bdSMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 488ccd979bdSMark Fasheh &ocfs2_super_lops, osb); 489ccd979bdSMark Fasheh } 490ccd979bdSMark Fasheh 491ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, 492ccd979bdSMark Fasheh struct ocfs2_super *osb) 493ccd979bdSMark Fasheh { 494ccd979bdSMark Fasheh /* Rename lockres doesn't come from a slab so we call init 495ccd979bdSMark Fasheh * once on it manually. */ 496ccd979bdSMark Fasheh ocfs2_lock_res_init_once(res); 497d680efe9SMark Fasheh ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name); 498d680efe9SMark Fasheh ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 499ccd979bdSMark Fasheh &ocfs2_rename_lops, osb); 500ccd979bdSMark Fasheh } 501ccd979bdSMark Fasheh 502ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 503ccd979bdSMark Fasheh { 504ccd979bdSMark Fasheh mlog_entry_void(); 505ccd979bdSMark Fasheh 506ccd979bdSMark Fasheh if (!(res->l_flags & OCFS2_LOCK_INITIALIZED)) 507ccd979bdSMark Fasheh return; 508ccd979bdSMark Fasheh 509ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 510ccd979bdSMark Fasheh 511ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_blocked_list), 512ccd979bdSMark Fasheh "Lockres %s is on the blocked list\n", 513ccd979bdSMark Fasheh res->l_name); 514ccd979bdSMark Fasheh mlog_bug_on_msg(!list_empty(&res->l_mask_waiters), 515ccd979bdSMark Fasheh "Lockres %s has mask waiters pending\n", 516ccd979bdSMark Fasheh res->l_name); 517ccd979bdSMark Fasheh mlog_bug_on_msg(spin_is_locked(&res->l_lock), 518ccd979bdSMark Fasheh "Lockres %s is locked\n", 519ccd979bdSMark Fasheh res->l_name); 520ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ro_holders, 521ccd979bdSMark Fasheh "Lockres %s has %u ro holders\n", 522ccd979bdSMark Fasheh res->l_name, res->l_ro_holders); 523ccd979bdSMark Fasheh mlog_bug_on_msg(res->l_ex_holders, 524ccd979bdSMark Fasheh "Lockres %s has %u ex holders\n", 525ccd979bdSMark Fasheh res->l_name, res->l_ex_holders); 526ccd979bdSMark Fasheh 527ccd979bdSMark Fasheh /* Need to clear out the lock status block for the dlm */ 528ccd979bdSMark Fasheh memset(&res->l_lksb, 0, sizeof(res->l_lksb)); 529ccd979bdSMark Fasheh 530ccd979bdSMark Fasheh res->l_flags = 0UL; 531ccd979bdSMark Fasheh mlog_exit_void(); 532ccd979bdSMark Fasheh } 533ccd979bdSMark Fasheh 534ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, 535ccd979bdSMark Fasheh int level) 536ccd979bdSMark Fasheh { 537ccd979bdSMark Fasheh mlog_entry_void(); 538ccd979bdSMark Fasheh 539ccd979bdSMark Fasheh BUG_ON(!lockres); 540ccd979bdSMark Fasheh 541ccd979bdSMark Fasheh switch(level) { 542ccd979bdSMark Fasheh case LKM_EXMODE: 543ccd979bdSMark Fasheh lockres->l_ex_holders++; 544ccd979bdSMark Fasheh break; 545ccd979bdSMark Fasheh case LKM_PRMODE: 546ccd979bdSMark Fasheh lockres->l_ro_holders++; 547ccd979bdSMark Fasheh break; 548ccd979bdSMark Fasheh default: 549ccd979bdSMark Fasheh BUG(); 550ccd979bdSMark Fasheh } 551ccd979bdSMark Fasheh 552ccd979bdSMark Fasheh mlog_exit_void(); 553ccd979bdSMark Fasheh } 554ccd979bdSMark Fasheh 555ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, 556ccd979bdSMark Fasheh int level) 557ccd979bdSMark Fasheh { 558ccd979bdSMark Fasheh mlog_entry_void(); 559ccd979bdSMark Fasheh 560ccd979bdSMark Fasheh BUG_ON(!lockres); 561ccd979bdSMark Fasheh 562ccd979bdSMark Fasheh switch(level) { 563ccd979bdSMark Fasheh case LKM_EXMODE: 564ccd979bdSMark Fasheh BUG_ON(!lockres->l_ex_holders); 565ccd979bdSMark Fasheh lockres->l_ex_holders--; 566ccd979bdSMark Fasheh break; 567ccd979bdSMark Fasheh case LKM_PRMODE: 568ccd979bdSMark Fasheh BUG_ON(!lockres->l_ro_holders); 569ccd979bdSMark Fasheh lockres->l_ro_holders--; 570ccd979bdSMark Fasheh break; 571ccd979bdSMark Fasheh default: 572ccd979bdSMark Fasheh BUG(); 573ccd979bdSMark Fasheh } 574ccd979bdSMark Fasheh mlog_exit_void(); 575ccd979bdSMark Fasheh } 576ccd979bdSMark Fasheh 577ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock 578ccd979bdSMark Fasheh * levels are EX, PR, and NL. It *will* have to be adjusted when more 579ccd979bdSMark Fasheh * lock types are added. */ 580ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level) 581ccd979bdSMark Fasheh { 582ccd979bdSMark Fasheh int new_level = LKM_EXMODE; 583ccd979bdSMark Fasheh 584ccd979bdSMark Fasheh if (level == LKM_EXMODE) 585ccd979bdSMark Fasheh new_level = LKM_NLMODE; 586ccd979bdSMark Fasheh else if (level == LKM_PRMODE) 587ccd979bdSMark Fasheh new_level = LKM_PRMODE; 588ccd979bdSMark Fasheh return new_level; 589ccd979bdSMark Fasheh } 590ccd979bdSMark Fasheh 591ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres, 592ccd979bdSMark Fasheh unsigned long newflags) 593ccd979bdSMark Fasheh { 594800deef3SChristoph Hellwig struct ocfs2_mask_waiter *mw, *tmp; 595ccd979bdSMark Fasheh 596ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 597ccd979bdSMark Fasheh 598ccd979bdSMark Fasheh lockres->l_flags = newflags; 599ccd979bdSMark Fasheh 600800deef3SChristoph Hellwig list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { 601ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 602ccd979bdSMark Fasheh continue; 603ccd979bdSMark Fasheh 604ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 605ccd979bdSMark Fasheh mw->mw_status = 0; 606ccd979bdSMark Fasheh complete(&mw->mw_complete); 607ccd979bdSMark Fasheh } 608ccd979bdSMark Fasheh } 609ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) 610ccd979bdSMark Fasheh { 611ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags | or); 612ccd979bdSMark Fasheh } 613ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres, 614ccd979bdSMark Fasheh unsigned long clear) 615ccd979bdSMark Fasheh { 616ccd979bdSMark Fasheh lockres_set_flags(lockres, lockres->l_flags & ~clear); 617ccd979bdSMark Fasheh } 618ccd979bdSMark Fasheh 619ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres) 620ccd979bdSMark Fasheh { 621ccd979bdSMark Fasheh mlog_entry_void(); 622ccd979bdSMark Fasheh 623ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 624ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 625ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 626ccd979bdSMark Fasheh BUG_ON(lockres->l_blocking <= LKM_NLMODE); 627ccd979bdSMark Fasheh 628ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 629ccd979bdSMark Fasheh if (lockres->l_level <= 630ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) { 631ccd979bdSMark Fasheh lockres->l_blocking = LKM_NLMODE; 632ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); 633ccd979bdSMark Fasheh } 634ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 635ccd979bdSMark Fasheh 636ccd979bdSMark Fasheh mlog_exit_void(); 637ccd979bdSMark Fasheh } 638ccd979bdSMark Fasheh 639ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres) 640ccd979bdSMark Fasheh { 641ccd979bdSMark Fasheh mlog_entry_void(); 642ccd979bdSMark Fasheh 643ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); 644ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); 645ccd979bdSMark Fasheh 646ccd979bdSMark Fasheh /* Convert from RO to EX doesn't really need anything as our 647ccd979bdSMark Fasheh * information is already up to data. Convert from NL to 648ccd979bdSMark Fasheh * *anything* however should mark ourselves as needing an 649ccd979bdSMark Fasheh * update */ 650f625c979SMark Fasheh if (lockres->l_level == LKM_NLMODE && 651f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 652ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 653ccd979bdSMark Fasheh 654ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 655ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 656ccd979bdSMark Fasheh 657ccd979bdSMark Fasheh mlog_exit_void(); 658ccd979bdSMark Fasheh } 659ccd979bdSMark Fasheh 660ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres) 661ccd979bdSMark Fasheh { 662ccd979bdSMark Fasheh mlog_entry_void(); 663ccd979bdSMark Fasheh 6643cf0c507SRoel Kluin BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); 665ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 666ccd979bdSMark Fasheh 667ccd979bdSMark Fasheh if (lockres->l_requested > LKM_NLMODE && 668f625c979SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_LOCAL) && 669f625c979SMark Fasheh lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 670ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 671ccd979bdSMark Fasheh 672ccd979bdSMark Fasheh lockres->l_level = lockres->l_requested; 673ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED); 674ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 675ccd979bdSMark Fasheh 676ccd979bdSMark Fasheh mlog_exit_void(); 677ccd979bdSMark Fasheh } 678ccd979bdSMark Fasheh 679ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 680ccd979bdSMark Fasheh int level) 681ccd979bdSMark Fasheh { 682ccd979bdSMark Fasheh int needs_downconvert = 0; 683ccd979bdSMark Fasheh mlog_entry_void(); 684ccd979bdSMark Fasheh 685ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 686ccd979bdSMark Fasheh 687ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); 688ccd979bdSMark Fasheh 689ccd979bdSMark Fasheh if (level > lockres->l_blocking) { 690ccd979bdSMark Fasheh /* only schedule a downconvert if we haven't already scheduled 691ccd979bdSMark Fasheh * one that goes low enough to satisfy the level we're 692ccd979bdSMark Fasheh * blocking. this also catches the case where we get 693ccd979bdSMark Fasheh * duplicate BASTs */ 694ccd979bdSMark Fasheh if (ocfs2_highest_compat_lock_level(level) < 695ccd979bdSMark Fasheh ocfs2_highest_compat_lock_level(lockres->l_blocking)) 696ccd979bdSMark Fasheh needs_downconvert = 1; 697ccd979bdSMark Fasheh 698ccd979bdSMark Fasheh lockres->l_blocking = level; 699ccd979bdSMark Fasheh } 700ccd979bdSMark Fasheh 701ccd979bdSMark Fasheh mlog_exit(needs_downconvert); 702ccd979bdSMark Fasheh return needs_downconvert; 703ccd979bdSMark Fasheh } 704ccd979bdSMark Fasheh 705aa2623adSMark Fasheh static void ocfs2_blocking_ast(void *opaque, int level) 706ccd979bdSMark Fasheh { 707aa2623adSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 708aa2623adSMark Fasheh struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); 709ccd979bdSMark Fasheh int needs_downconvert; 710ccd979bdSMark Fasheh unsigned long flags; 711ccd979bdSMark Fasheh 712ccd979bdSMark Fasheh BUG_ON(level <= LKM_NLMODE); 713ccd979bdSMark Fasheh 714aa2623adSMark Fasheh mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", 715aa2623adSMark Fasheh lockres->l_name, level, lockres->l_level, 716aa2623adSMark Fasheh ocfs2_lock_type_string(lockres->l_type)); 717aa2623adSMark Fasheh 718ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 719ccd979bdSMark Fasheh needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 720ccd979bdSMark Fasheh if (needs_downconvert) 721ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 722ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 723ccd979bdSMark Fasheh 724d680efe9SMark Fasheh wake_up(&lockres->l_event); 725d680efe9SMark Fasheh 72634d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 727ccd979bdSMark Fasheh } 728ccd979bdSMark Fasheh 729e92d57dfSMark Fasheh static void ocfs2_locking_ast(void *opaque) 730ccd979bdSMark Fasheh { 731e92d57dfSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 732ccd979bdSMark Fasheh struct dlm_lockstatus *lksb = &lockres->l_lksb; 733ccd979bdSMark Fasheh unsigned long flags; 734ccd979bdSMark Fasheh 735ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 736ccd979bdSMark Fasheh 737ccd979bdSMark Fasheh if (lksb->status != DLM_NORMAL) { 738ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n", 739ccd979bdSMark Fasheh lockres->l_name, lksb->status); 740ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 741ccd979bdSMark Fasheh return; 742ccd979bdSMark Fasheh } 743ccd979bdSMark Fasheh 744ccd979bdSMark Fasheh switch(lockres->l_action) { 745ccd979bdSMark Fasheh case OCFS2_AST_ATTACH: 746ccd979bdSMark Fasheh ocfs2_generic_handle_attach_action(lockres); 747e92d57dfSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL); 748ccd979bdSMark Fasheh break; 749ccd979bdSMark Fasheh case OCFS2_AST_CONVERT: 750ccd979bdSMark Fasheh ocfs2_generic_handle_convert_action(lockres); 751ccd979bdSMark Fasheh break; 752ccd979bdSMark Fasheh case OCFS2_AST_DOWNCONVERT: 753ccd979bdSMark Fasheh ocfs2_generic_handle_downconvert_action(lockres); 754ccd979bdSMark Fasheh break; 755ccd979bdSMark Fasheh default: 756e92d57dfSMark Fasheh mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u " 757e92d57dfSMark Fasheh "lockres flags = 0x%lx, unlock action: %u\n", 758e92d57dfSMark Fasheh lockres->l_name, lockres->l_action, lockres->l_flags, 759e92d57dfSMark Fasheh lockres->l_unlock_action); 760ccd979bdSMark Fasheh BUG(); 761ccd979bdSMark Fasheh } 762ccd979bdSMark Fasheh 763ccd979bdSMark Fasheh /* set it to something invalid so if we get called again we 764ccd979bdSMark Fasheh * can catch it. */ 765ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 766ccd979bdSMark Fasheh 767ccd979bdSMark Fasheh wake_up(&lockres->l_event); 768d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 769ccd979bdSMark Fasheh } 770ccd979bdSMark Fasheh 771ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 772ccd979bdSMark Fasheh int convert) 773ccd979bdSMark Fasheh { 774ccd979bdSMark Fasheh unsigned long flags; 775ccd979bdSMark Fasheh 776ccd979bdSMark Fasheh mlog_entry_void(); 777ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 778ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 779ccd979bdSMark Fasheh if (convert) 780ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 781ccd979bdSMark Fasheh else 782ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 783ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 784ccd979bdSMark Fasheh 785ccd979bdSMark Fasheh wake_up(&lockres->l_event); 786ccd979bdSMark Fasheh mlog_exit_void(); 787ccd979bdSMark Fasheh } 788ccd979bdSMark Fasheh 789ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e., 790ccd979bdSMark Fasheh * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller 791ccd979bdSMark Fasheh * to do the right thing in that case. 792ccd979bdSMark Fasheh */ 793ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb, 794ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 795ccd979bdSMark Fasheh int level, 796ccd979bdSMark Fasheh int dlm_flags) 797ccd979bdSMark Fasheh { 798ccd979bdSMark Fasheh int ret = 0; 799c271c5c2SSunil Mushran enum dlm_status status = DLM_NORMAL; 800ccd979bdSMark Fasheh unsigned long flags; 801ccd979bdSMark Fasheh 802ccd979bdSMark Fasheh mlog_entry_void(); 803ccd979bdSMark Fasheh 804ccd979bdSMark Fasheh mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level, 805ccd979bdSMark Fasheh dlm_flags); 806ccd979bdSMark Fasheh 807ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 808ccd979bdSMark Fasheh if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) || 809ccd979bdSMark Fasheh (lockres->l_flags & OCFS2_LOCK_BUSY)) { 810ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 811ccd979bdSMark Fasheh goto bail; 812ccd979bdSMark Fasheh } 813ccd979bdSMark Fasheh 814ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 815ccd979bdSMark Fasheh lockres->l_requested = level; 816ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 817ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 818ccd979bdSMark Fasheh 819ccd979bdSMark Fasheh status = dlmlock(osb->dlm, 820ccd979bdSMark Fasheh level, 821ccd979bdSMark Fasheh &lockres->l_lksb, 822ccd979bdSMark Fasheh dlm_flags, 823ccd979bdSMark Fasheh lockres->l_name, 824f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 825e92d57dfSMark Fasheh ocfs2_locking_ast, 826ccd979bdSMark Fasheh lockres, 827aa2623adSMark Fasheh ocfs2_blocking_ast); 828ccd979bdSMark Fasheh if (status != DLM_NORMAL) { 829ccd979bdSMark Fasheh ocfs2_log_dlm_error("dlmlock", status, lockres); 830ccd979bdSMark Fasheh ret = -EINVAL; 831ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 832ccd979bdSMark Fasheh } 833ccd979bdSMark Fasheh 834ccd979bdSMark Fasheh mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name); 835ccd979bdSMark Fasheh 836ccd979bdSMark Fasheh bail: 837ccd979bdSMark Fasheh mlog_exit(ret); 838ccd979bdSMark Fasheh return ret; 839ccd979bdSMark Fasheh } 840ccd979bdSMark Fasheh 841ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres, 842ccd979bdSMark Fasheh int flag) 843ccd979bdSMark Fasheh { 844ccd979bdSMark Fasheh unsigned long flags; 845ccd979bdSMark Fasheh int ret; 846ccd979bdSMark Fasheh 847ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 848ccd979bdSMark Fasheh ret = lockres->l_flags & flag; 849ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 850ccd979bdSMark Fasheh 851ccd979bdSMark Fasheh return ret; 852ccd979bdSMark Fasheh } 853ccd979bdSMark Fasheh 854ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres) 855ccd979bdSMark Fasheh 856ccd979bdSMark Fasheh { 857ccd979bdSMark Fasheh wait_event(lockres->l_event, 858ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY)); 859ccd979bdSMark Fasheh } 860ccd979bdSMark Fasheh 861ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres) 862ccd979bdSMark Fasheh 863ccd979bdSMark Fasheh { 864ccd979bdSMark Fasheh wait_event(lockres->l_event, 865ccd979bdSMark Fasheh !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING)); 866ccd979bdSMark Fasheh } 867ccd979bdSMark Fasheh 868ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf 869ccd979bdSMark Fasheh * of another node, and return true if the currently wanted 870ccd979bdSMark Fasheh * level will be compatible with it. */ 871ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 872ccd979bdSMark Fasheh int wanted) 873ccd979bdSMark Fasheh { 874ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 875ccd979bdSMark Fasheh 876ccd979bdSMark Fasheh return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking); 877ccd979bdSMark Fasheh } 878ccd979bdSMark Fasheh 879ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw) 880ccd979bdSMark Fasheh { 881ccd979bdSMark Fasheh INIT_LIST_HEAD(&mw->mw_item); 882ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 883ccd979bdSMark Fasheh } 884ccd979bdSMark Fasheh 885ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) 886ccd979bdSMark Fasheh { 887ccd979bdSMark Fasheh wait_for_completion(&mw->mw_complete); 888ccd979bdSMark Fasheh /* Re-arm the completion in case we want to wait on it again */ 889ccd979bdSMark Fasheh INIT_COMPLETION(mw->mw_complete); 890ccd979bdSMark Fasheh return mw->mw_status; 891ccd979bdSMark Fasheh } 892ccd979bdSMark Fasheh 893ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, 894ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw, 895ccd979bdSMark Fasheh unsigned long mask, 896ccd979bdSMark Fasheh unsigned long goal) 897ccd979bdSMark Fasheh { 898ccd979bdSMark Fasheh BUG_ON(!list_empty(&mw->mw_item)); 899ccd979bdSMark Fasheh 900ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 901ccd979bdSMark Fasheh 902ccd979bdSMark Fasheh list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); 903ccd979bdSMark Fasheh mw->mw_mask = mask; 904ccd979bdSMark Fasheh mw->mw_goal = goal; 905ccd979bdSMark Fasheh } 906ccd979bdSMark Fasheh 907ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY 908ccd979bdSMark Fasheh * if the mask still hadn't reached its goal */ 909ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, 910ccd979bdSMark Fasheh struct ocfs2_mask_waiter *mw) 911ccd979bdSMark Fasheh { 912ccd979bdSMark Fasheh unsigned long flags; 913ccd979bdSMark Fasheh int ret = 0; 914ccd979bdSMark Fasheh 915ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 916ccd979bdSMark Fasheh if (!list_empty(&mw->mw_item)) { 917ccd979bdSMark Fasheh if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) 918ccd979bdSMark Fasheh ret = -EBUSY; 919ccd979bdSMark Fasheh 920ccd979bdSMark Fasheh list_del_init(&mw->mw_item); 921ccd979bdSMark Fasheh init_completion(&mw->mw_complete); 922ccd979bdSMark Fasheh } 923ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 924ccd979bdSMark Fasheh 925ccd979bdSMark Fasheh return ret; 926ccd979bdSMark Fasheh 927ccd979bdSMark Fasheh } 928ccd979bdSMark Fasheh 929ccd979bdSMark Fasheh static int ocfs2_cluster_lock(struct ocfs2_super *osb, 930ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 931ccd979bdSMark Fasheh int level, 932ccd979bdSMark Fasheh int lkm_flags, 933ccd979bdSMark Fasheh int arg_flags) 934ccd979bdSMark Fasheh { 935ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 936ccd979bdSMark Fasheh enum dlm_status status; 937ccd979bdSMark Fasheh int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 938ccd979bdSMark Fasheh int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ 939ccd979bdSMark Fasheh unsigned long flags; 940ccd979bdSMark Fasheh 941ccd979bdSMark Fasheh mlog_entry_void(); 942ccd979bdSMark Fasheh 943ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 944ccd979bdSMark Fasheh 945b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 946b80fc012SMark Fasheh lkm_flags |= LKM_VALBLK; 947b80fc012SMark Fasheh 948ccd979bdSMark Fasheh again: 949ccd979bdSMark Fasheh wait = 0; 950ccd979bdSMark Fasheh 951ccd979bdSMark Fasheh if (catch_signals && signal_pending(current)) { 952ccd979bdSMark Fasheh ret = -ERESTARTSYS; 953ccd979bdSMark Fasheh goto out; 954ccd979bdSMark Fasheh } 955ccd979bdSMark Fasheh 956ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 957ccd979bdSMark Fasheh 958ccd979bdSMark Fasheh mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING, 959ccd979bdSMark Fasheh "Cluster lock called on freeing lockres %s! flags " 960ccd979bdSMark Fasheh "0x%lx\n", lockres->l_name, lockres->l_flags); 961ccd979bdSMark Fasheh 962ccd979bdSMark Fasheh /* We only compare against the currently granted level 963ccd979bdSMark Fasheh * here. If the lock is blocked waiting on a downconvert, 964ccd979bdSMark Fasheh * we'll get caught below. */ 965ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY && 966ccd979bdSMark Fasheh level > lockres->l_level) { 967ccd979bdSMark Fasheh /* is someone sitting in dlm_lock? If so, wait on 968ccd979bdSMark Fasheh * them. */ 969ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); 970ccd979bdSMark Fasheh wait = 1; 971ccd979bdSMark Fasheh goto unlock; 972ccd979bdSMark Fasheh } 973ccd979bdSMark Fasheh 974ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED && 975ccd979bdSMark Fasheh !ocfs2_may_continue_on_blocked_lock(lockres, level)) { 976ccd979bdSMark Fasheh /* is the lock is currently blocked on behalf of 977ccd979bdSMark Fasheh * another node */ 978ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0); 979ccd979bdSMark Fasheh wait = 1; 980ccd979bdSMark Fasheh goto unlock; 981ccd979bdSMark Fasheh } 982ccd979bdSMark Fasheh 983ccd979bdSMark Fasheh if (level > lockres->l_level) { 984ccd979bdSMark Fasheh if (lockres->l_action != OCFS2_AST_INVALID) 985ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres %s has action %u pending\n", 986ccd979bdSMark Fasheh lockres->l_name, lockres->l_action); 987ccd979bdSMark Fasheh 988019d1b22SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 989019d1b22SMark Fasheh lockres->l_action = OCFS2_AST_ATTACH; 990019d1b22SMark Fasheh lkm_flags &= ~LKM_CONVERT; 991019d1b22SMark Fasheh } else { 992ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_CONVERT; 993019d1b22SMark Fasheh lkm_flags |= LKM_CONVERT; 994019d1b22SMark Fasheh } 995019d1b22SMark Fasheh 996ccd979bdSMark Fasheh lockres->l_requested = level; 997ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 998ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 999ccd979bdSMark Fasheh 1000ccd979bdSMark Fasheh BUG_ON(level == LKM_IVMODE); 1001ccd979bdSMark Fasheh BUG_ON(level == LKM_NLMODE); 1002ccd979bdSMark Fasheh 1003ccd979bdSMark Fasheh mlog(0, "lock %s, convert from %d to level = %d\n", 1004ccd979bdSMark Fasheh lockres->l_name, lockres->l_level, level); 1005ccd979bdSMark Fasheh 1006ccd979bdSMark Fasheh /* call dlm_lock to upgrade lock now */ 1007ccd979bdSMark Fasheh status = dlmlock(osb->dlm, 1008ccd979bdSMark Fasheh level, 1009ccd979bdSMark Fasheh &lockres->l_lksb, 1010019d1b22SMark Fasheh lkm_flags, 1011ccd979bdSMark Fasheh lockres->l_name, 1012f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 1013e92d57dfSMark Fasheh ocfs2_locking_ast, 1014ccd979bdSMark Fasheh lockres, 1015aa2623adSMark Fasheh ocfs2_blocking_ast); 1016ccd979bdSMark Fasheh if (status != DLM_NORMAL) { 1017ccd979bdSMark Fasheh if ((lkm_flags & LKM_NOQUEUE) && 1018ccd979bdSMark Fasheh (status == DLM_NOTQUEUED)) 1019ccd979bdSMark Fasheh ret = -EAGAIN; 1020ccd979bdSMark Fasheh else { 1021ccd979bdSMark Fasheh ocfs2_log_dlm_error("dlmlock", status, 1022ccd979bdSMark Fasheh lockres); 1023ccd979bdSMark Fasheh ret = -EINVAL; 1024ccd979bdSMark Fasheh } 1025ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 1026ccd979bdSMark Fasheh goto out; 1027ccd979bdSMark Fasheh } 1028ccd979bdSMark Fasheh 1029ccd979bdSMark Fasheh mlog(0, "lock %s, successfull return from dlmlock\n", 1030ccd979bdSMark Fasheh lockres->l_name); 1031ccd979bdSMark Fasheh 1032ccd979bdSMark Fasheh /* At this point we've gone inside the dlm and need to 1033ccd979bdSMark Fasheh * complete our work regardless. */ 1034ccd979bdSMark Fasheh catch_signals = 0; 1035ccd979bdSMark Fasheh 1036ccd979bdSMark Fasheh /* wait for busy to clear and carry on */ 1037ccd979bdSMark Fasheh goto again; 1038ccd979bdSMark Fasheh } 1039ccd979bdSMark Fasheh 1040ccd979bdSMark Fasheh /* Ok, if we get here then we're good to go. */ 1041ccd979bdSMark Fasheh ocfs2_inc_holders(lockres, level); 1042ccd979bdSMark Fasheh 1043ccd979bdSMark Fasheh ret = 0; 1044ccd979bdSMark Fasheh unlock: 1045ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1046ccd979bdSMark Fasheh out: 1047ccd979bdSMark Fasheh /* 1048ccd979bdSMark Fasheh * This is helping work around a lock inversion between the page lock 1049ccd979bdSMark Fasheh * and dlm locks. One path holds the page lock while calling aops 1050ccd979bdSMark Fasheh * which block acquiring dlm locks. The voting thread holds dlm 1051ccd979bdSMark Fasheh * locks while acquiring page locks while down converting data locks. 1052ccd979bdSMark Fasheh * This block is helping an aop path notice the inversion and back 1053ccd979bdSMark Fasheh * off to unlock its page lock before trying the dlm lock again. 1054ccd979bdSMark Fasheh */ 1055ccd979bdSMark Fasheh if (wait && arg_flags & OCFS2_LOCK_NONBLOCK && 1056ccd979bdSMark Fasheh mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) { 1057ccd979bdSMark Fasheh wait = 0; 1058ccd979bdSMark Fasheh if (lockres_remove_mask_waiter(lockres, &mw)) 1059ccd979bdSMark Fasheh ret = -EAGAIN; 1060ccd979bdSMark Fasheh else 1061ccd979bdSMark Fasheh goto again; 1062ccd979bdSMark Fasheh } 1063ccd979bdSMark Fasheh if (wait) { 1064ccd979bdSMark Fasheh ret = ocfs2_wait_for_mask(&mw); 1065ccd979bdSMark Fasheh if (ret == 0) 1066ccd979bdSMark Fasheh goto again; 1067ccd979bdSMark Fasheh mlog_errno(ret); 1068ccd979bdSMark Fasheh } 1069ccd979bdSMark Fasheh 1070ccd979bdSMark Fasheh mlog_exit(ret); 1071ccd979bdSMark Fasheh return ret; 1072ccd979bdSMark Fasheh } 1073ccd979bdSMark Fasheh 1074ccd979bdSMark Fasheh static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1075ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 1076ccd979bdSMark Fasheh int level) 1077ccd979bdSMark Fasheh { 1078ccd979bdSMark Fasheh unsigned long flags; 1079ccd979bdSMark Fasheh 1080ccd979bdSMark Fasheh mlog_entry_void(); 1081ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1082ccd979bdSMark Fasheh ocfs2_dec_holders(lockres, level); 108334d024f8SMark Fasheh ocfs2_downconvert_on_unlock(osb, lockres); 1084ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1085ccd979bdSMark Fasheh mlog_exit_void(); 1086ccd979bdSMark Fasheh } 1087ccd979bdSMark Fasheh 1088da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb, 1089d680efe9SMark Fasheh struct ocfs2_lock_res *lockres, 109024c19ef4SMark Fasheh int ex, 109124c19ef4SMark Fasheh int local) 1092ccd979bdSMark Fasheh { 1093d680efe9SMark Fasheh int level = ex ? LKM_EXMODE : LKM_PRMODE; 1094ccd979bdSMark Fasheh unsigned long flags; 109524c19ef4SMark Fasheh int lkm_flags = local ? LKM_LOCAL : 0; 1096ccd979bdSMark Fasheh 1097ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1098ccd979bdSMark Fasheh BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1099ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1100ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1101ccd979bdSMark Fasheh 110224c19ef4SMark Fasheh return ocfs2_lock_create(osb, lockres, level, lkm_flags); 1103ccd979bdSMark Fasheh } 1104ccd979bdSMark Fasheh 1105ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping 1106ccd979bdSMark Fasheh * the normal cluster directory lookup. Use this ONLY on newly created 1107ccd979bdSMark Fasheh * inodes which other nodes can't possibly see, and which haven't been 1108ccd979bdSMark Fasheh * hashed in the inode hash yet. This can give us a good performance 1109ccd979bdSMark Fasheh * increase as it'll skip the network broadcast normally associated 1110ccd979bdSMark Fasheh * with creating a new lock resource. */ 1111ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode) 1112ccd979bdSMark Fasheh { 1113ccd979bdSMark Fasheh int ret; 1114d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1115ccd979bdSMark Fasheh 1116ccd979bdSMark Fasheh BUG_ON(!inode); 1117ccd979bdSMark Fasheh BUG_ON(!ocfs2_inode_is_new(inode)); 1118ccd979bdSMark Fasheh 1119ccd979bdSMark Fasheh mlog_entry_void(); 1120ccd979bdSMark Fasheh 1121b0697053SMark Fasheh mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); 1122ccd979bdSMark Fasheh 1123ccd979bdSMark Fasheh /* NOTE: That we don't increment any of the holder counts, nor 1124ccd979bdSMark Fasheh * do we add anything to a journal handle. Since this is 1125ccd979bdSMark Fasheh * supposed to be a new inode which the cluster doesn't know 1126ccd979bdSMark Fasheh * about yet, there is no need to. As far as the LVB handling 1127ccd979bdSMark Fasheh * is concerned, this is basically like acquiring an EX lock 1128ccd979bdSMark Fasheh * on a resource which has an invalid one -- we'll set it 1129ccd979bdSMark Fasheh * valid when we release the EX. */ 1130ccd979bdSMark Fasheh 113124c19ef4SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1); 1132ccd979bdSMark Fasheh if (ret) { 1133ccd979bdSMark Fasheh mlog_errno(ret); 1134ccd979bdSMark Fasheh goto bail; 1135ccd979bdSMark Fasheh } 1136ccd979bdSMark Fasheh 113724c19ef4SMark Fasheh /* 113824c19ef4SMark Fasheh * We don't want to use LKM_LOCAL on a meta data lock as they 113924c19ef4SMark Fasheh * don't use a generation in their lock names. 114024c19ef4SMark Fasheh */ 1141e63aecb6SMark Fasheh ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); 1142ccd979bdSMark Fasheh if (ret) { 1143ccd979bdSMark Fasheh mlog_errno(ret); 1144ccd979bdSMark Fasheh goto bail; 1145ccd979bdSMark Fasheh } 1146ccd979bdSMark Fasheh 114750008630STiger Yang ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0); 114850008630STiger Yang if (ret) { 114950008630STiger Yang mlog_errno(ret); 115050008630STiger Yang goto bail; 115150008630STiger Yang } 115250008630STiger Yang 1153ccd979bdSMark Fasheh bail: 1154ccd979bdSMark Fasheh mlog_exit(ret); 1155ccd979bdSMark Fasheh return ret; 1156ccd979bdSMark Fasheh } 1157ccd979bdSMark Fasheh 1158ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write) 1159ccd979bdSMark Fasheh { 1160ccd979bdSMark Fasheh int status, level; 1161ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres; 1162c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1163ccd979bdSMark Fasheh 1164ccd979bdSMark Fasheh BUG_ON(!inode); 1165ccd979bdSMark Fasheh 1166ccd979bdSMark Fasheh mlog_entry_void(); 1167ccd979bdSMark Fasheh 1168b0697053SMark Fasheh mlog(0, "inode %llu take %s RW lock\n", 1169b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1170ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1171ccd979bdSMark Fasheh 1172c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1173c271c5c2SSunil Mushran return 0; 1174c271c5c2SSunil Mushran 1175ccd979bdSMark Fasheh lockres = &OCFS2_I(inode)->ip_rw_lockres; 1176ccd979bdSMark Fasheh 1177ccd979bdSMark Fasheh level = write ? LKM_EXMODE : LKM_PRMODE; 1178ccd979bdSMark Fasheh 1179ccd979bdSMark Fasheh status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, 1180ccd979bdSMark Fasheh 0); 1181ccd979bdSMark Fasheh if (status < 0) 1182ccd979bdSMark Fasheh mlog_errno(status); 1183ccd979bdSMark Fasheh 1184ccd979bdSMark Fasheh mlog_exit(status); 1185ccd979bdSMark Fasheh return status; 1186ccd979bdSMark Fasheh } 1187ccd979bdSMark Fasheh 1188ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write) 1189ccd979bdSMark Fasheh { 1190ccd979bdSMark Fasheh int level = write ? LKM_EXMODE : LKM_PRMODE; 1191ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; 1192c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1193ccd979bdSMark Fasheh 1194ccd979bdSMark Fasheh mlog_entry_void(); 1195ccd979bdSMark Fasheh 1196b0697053SMark Fasheh mlog(0, "inode %llu drop %s RW lock\n", 1197b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1198ccd979bdSMark Fasheh write ? "EXMODE" : "PRMODE"); 1199ccd979bdSMark Fasheh 1200c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1201ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1202ccd979bdSMark Fasheh 1203ccd979bdSMark Fasheh mlog_exit_void(); 1204ccd979bdSMark Fasheh } 1205ccd979bdSMark Fasheh 120650008630STiger Yang /* 120750008630STiger Yang * ocfs2_open_lock always get PR mode lock. 120850008630STiger Yang */ 120950008630STiger Yang int ocfs2_open_lock(struct inode *inode) 121050008630STiger Yang { 121150008630STiger Yang int status = 0; 121250008630STiger Yang struct ocfs2_lock_res *lockres; 121350008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 121450008630STiger Yang 121550008630STiger Yang BUG_ON(!inode); 121650008630STiger Yang 121750008630STiger Yang mlog_entry_void(); 121850008630STiger Yang 121950008630STiger Yang mlog(0, "inode %llu take PRMODE open lock\n", 122050008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 122150008630STiger Yang 122250008630STiger Yang if (ocfs2_mount_local(osb)) 122350008630STiger Yang goto out; 122450008630STiger Yang 122550008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 122650008630STiger Yang 122750008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 122850008630STiger Yang LKM_PRMODE, 0, 0); 122950008630STiger Yang if (status < 0) 123050008630STiger Yang mlog_errno(status); 123150008630STiger Yang 123250008630STiger Yang out: 123350008630STiger Yang mlog_exit(status); 123450008630STiger Yang return status; 123550008630STiger Yang } 123650008630STiger Yang 123750008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write) 123850008630STiger Yang { 123950008630STiger Yang int status = 0, level; 124050008630STiger Yang struct ocfs2_lock_res *lockres; 124150008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 124250008630STiger Yang 124350008630STiger Yang BUG_ON(!inode); 124450008630STiger Yang 124550008630STiger Yang mlog_entry_void(); 124650008630STiger Yang 124750008630STiger Yang mlog(0, "inode %llu try to take %s open lock\n", 124850008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno, 124950008630STiger Yang write ? "EXMODE" : "PRMODE"); 125050008630STiger Yang 125150008630STiger Yang if (ocfs2_mount_local(osb)) 125250008630STiger Yang goto out; 125350008630STiger Yang 125450008630STiger Yang lockres = &OCFS2_I(inode)->ip_open_lockres; 125550008630STiger Yang 125650008630STiger Yang level = write ? LKM_EXMODE : LKM_PRMODE; 125750008630STiger Yang 125850008630STiger Yang /* 125950008630STiger Yang * The file system may already holding a PRMODE/EXMODE open lock. 126050008630STiger Yang * Since we pass LKM_NOQUEUE, the request won't block waiting on 126150008630STiger Yang * other nodes and the -EAGAIN will indicate to the caller that 126250008630STiger Yang * this inode is still in use. 126350008630STiger Yang */ 126450008630STiger Yang status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, 126550008630STiger Yang level, LKM_NOQUEUE, 0); 126650008630STiger Yang 126750008630STiger Yang out: 126850008630STiger Yang mlog_exit(status); 126950008630STiger Yang return status; 127050008630STiger Yang } 127150008630STiger Yang 127250008630STiger Yang /* 127350008630STiger Yang * ocfs2_open_unlock unlock PR and EX mode open locks. 127450008630STiger Yang */ 127550008630STiger Yang void ocfs2_open_unlock(struct inode *inode) 127650008630STiger Yang { 127750008630STiger Yang struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres; 127850008630STiger Yang struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 127950008630STiger Yang 128050008630STiger Yang mlog_entry_void(); 128150008630STiger Yang 128250008630STiger Yang mlog(0, "inode %llu drop open lock\n", 128350008630STiger Yang (unsigned long long)OCFS2_I(inode)->ip_blkno); 128450008630STiger Yang 128550008630STiger Yang if (ocfs2_mount_local(osb)) 128650008630STiger Yang goto out; 128750008630STiger Yang 128850008630STiger Yang if(lockres->l_ro_holders) 128950008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 129050008630STiger Yang LKM_PRMODE); 129150008630STiger Yang if(lockres->l_ex_holders) 129250008630STiger Yang ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, 129350008630STiger Yang LKM_EXMODE); 129450008630STiger Yang 129550008630STiger Yang out: 129650008630STiger Yang mlog_exit_void(); 129750008630STiger Yang } 129850008630STiger Yang 129934d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 1300ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 1301ccd979bdSMark Fasheh { 1302ccd979bdSMark Fasheh int kick = 0; 1303ccd979bdSMark Fasheh 1304ccd979bdSMark Fasheh mlog_entry_void(); 1305ccd979bdSMark Fasheh 1306ccd979bdSMark Fasheh /* If we know that another node is waiting on our lock, kick 130734d024f8SMark Fasheh * the downconvert thread * pre-emptively when we reach a release 1308ccd979bdSMark Fasheh * condition. */ 1309ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { 1310ccd979bdSMark Fasheh switch(lockres->l_blocking) { 1311ccd979bdSMark Fasheh case LKM_EXMODE: 1312ccd979bdSMark Fasheh if (!lockres->l_ex_holders && !lockres->l_ro_holders) 1313ccd979bdSMark Fasheh kick = 1; 1314ccd979bdSMark Fasheh break; 1315ccd979bdSMark Fasheh case LKM_PRMODE: 1316ccd979bdSMark Fasheh if (!lockres->l_ex_holders) 1317ccd979bdSMark Fasheh kick = 1; 1318ccd979bdSMark Fasheh break; 1319ccd979bdSMark Fasheh default: 1320ccd979bdSMark Fasheh BUG(); 1321ccd979bdSMark Fasheh } 1322ccd979bdSMark Fasheh } 1323ccd979bdSMark Fasheh 1324ccd979bdSMark Fasheh if (kick) 132534d024f8SMark Fasheh ocfs2_wake_downconvert_thread(osb); 1326ccd979bdSMark Fasheh 1327ccd979bdSMark Fasheh mlog_exit_void(); 1328ccd979bdSMark Fasheh } 1329ccd979bdSMark Fasheh 1330ccd979bdSMark Fasheh #define OCFS2_SEC_BITS 34 1331ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT (64 - 34) 1332ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1) 1333ccd979bdSMark Fasheh 1334ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for 1335ccd979bdSMark Fasheh * now. */ 1336ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec) 1337ccd979bdSMark Fasheh { 1338ccd979bdSMark Fasheh u64 res; 1339ccd979bdSMark Fasheh u64 sec = spec->tv_sec; 1340ccd979bdSMark Fasheh u32 nsec = spec->tv_nsec; 1341ccd979bdSMark Fasheh 1342ccd979bdSMark Fasheh res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK); 1343ccd979bdSMark Fasheh 1344ccd979bdSMark Fasheh return res; 1345ccd979bdSMark Fasheh } 1346ccd979bdSMark Fasheh 1347ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't 1348ccd979bdSMark Fasheh * need ip_lock in this function as anyone who would be changing those 1349e63aecb6SMark Fasheh * values is supposed to be blocked in ocfs2_inode_lock right now. */ 1350ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode) 1351ccd979bdSMark Fasheh { 1352ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1353e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1354ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1355ccd979bdSMark Fasheh 1356ccd979bdSMark Fasheh mlog_entry_void(); 1357ccd979bdSMark Fasheh 1358ccd979bdSMark Fasheh lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1359ccd979bdSMark Fasheh 136024c19ef4SMark Fasheh /* 136124c19ef4SMark Fasheh * Invalidate the LVB of a deleted inode - this way other 136224c19ef4SMark Fasheh * nodes are forced to go to disk and discover the new inode 136324c19ef4SMark Fasheh * status. 136424c19ef4SMark Fasheh */ 136524c19ef4SMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 136624c19ef4SMark Fasheh lvb->lvb_version = 0; 136724c19ef4SMark Fasheh goto out; 136824c19ef4SMark Fasheh } 136924c19ef4SMark Fasheh 13704d3b83f7SMark Fasheh lvb->lvb_version = OCFS2_LVB_VERSION; 1371ccd979bdSMark Fasheh lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1372ccd979bdSMark Fasheh lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1373ccd979bdSMark Fasheh lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1374ccd979bdSMark Fasheh lvb->lvb_igid = cpu_to_be32(inode->i_gid); 1375ccd979bdSMark Fasheh lvb->lvb_imode = cpu_to_be16(inode->i_mode); 1376ccd979bdSMark Fasheh lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 1377ccd979bdSMark Fasheh lvb->lvb_iatime_packed = 1378ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime)); 1379ccd979bdSMark Fasheh lvb->lvb_ictime_packed = 1380ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1381ccd979bdSMark Fasheh lvb->lvb_imtime_packed = 1382ccd979bdSMark Fasheh cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1383ca4d147eSHerbert Poetzl lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 138415b1e36bSMark Fasheh lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features); 1385f9e2d82eSMark Fasheh lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1386ccd979bdSMark Fasheh 138724c19ef4SMark Fasheh out: 1388ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1389ccd979bdSMark Fasheh 1390ccd979bdSMark Fasheh mlog_exit_void(); 1391ccd979bdSMark Fasheh } 1392ccd979bdSMark Fasheh 1393ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec, 1394ccd979bdSMark Fasheh u64 packed_time) 1395ccd979bdSMark Fasheh { 1396ccd979bdSMark Fasheh spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT; 1397ccd979bdSMark Fasheh spec->tv_nsec = packed_time & OCFS2_NSEC_MASK; 1398ccd979bdSMark Fasheh } 1399ccd979bdSMark Fasheh 1400ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode) 1401ccd979bdSMark Fasheh { 1402ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1403e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1404ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb; 1405ccd979bdSMark Fasheh 1406ccd979bdSMark Fasheh mlog_entry_void(); 1407ccd979bdSMark Fasheh 1408ccd979bdSMark Fasheh mlog_meta_lvb(0, lockres); 1409ccd979bdSMark Fasheh 1410ccd979bdSMark Fasheh lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1411ccd979bdSMark Fasheh 1412ccd979bdSMark Fasheh /* We're safe here without the lockres lock... */ 1413ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 1414ccd979bdSMark Fasheh oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1415ccd979bdSMark Fasheh i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1416ccd979bdSMark Fasheh 1417ca4d147eSHerbert Poetzl oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 141815b1e36bSMark Fasheh oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures); 1419ca4d147eSHerbert Poetzl ocfs2_set_inode_flags(inode); 1420ca4d147eSHerbert Poetzl 1421ccd979bdSMark Fasheh /* fast-symlinks are a special case */ 1422ccd979bdSMark Fasheh if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1423ccd979bdSMark Fasheh inode->i_blocks = 0; 1424ccd979bdSMark Fasheh else 14258110b073SMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode); 1426ccd979bdSMark Fasheh 1427ccd979bdSMark Fasheh inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 1428ccd979bdSMark Fasheh inode->i_gid = be32_to_cpu(lvb->lvb_igid); 1429ccd979bdSMark Fasheh inode->i_mode = be16_to_cpu(lvb->lvb_imode); 1430ccd979bdSMark Fasheh inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 1431ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_atime, 1432ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_iatime_packed)); 1433ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_mtime, 1434ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_imtime_packed)); 1435ccd979bdSMark Fasheh ocfs2_unpack_timespec(&inode->i_ctime, 1436ccd979bdSMark Fasheh be64_to_cpu(lvb->lvb_ictime_packed)); 1437ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1438ccd979bdSMark Fasheh 1439ccd979bdSMark Fasheh mlog_exit_void(); 1440ccd979bdSMark Fasheh } 1441ccd979bdSMark Fasheh 1442f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1443f9e2d82eSMark Fasheh struct ocfs2_lock_res *lockres) 1444ccd979bdSMark Fasheh { 1445ccd979bdSMark Fasheh struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1446ccd979bdSMark Fasheh 1447f9e2d82eSMark Fasheh if (lvb->lvb_version == OCFS2_LVB_VERSION 1448f9e2d82eSMark Fasheh && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1449ccd979bdSMark Fasheh return 1; 1450ccd979bdSMark Fasheh return 0; 1451ccd979bdSMark Fasheh } 1452ccd979bdSMark Fasheh 1453ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and 1454ccd979bdSMark Fasheh * arbitrate who gets to refresh it. 1455ccd979bdSMark Fasheh * 1456ccd979bdSMark Fasheh * 0 means no refresh needed. 1457ccd979bdSMark Fasheh * 1458ccd979bdSMark Fasheh * > 0 means you need to refresh this and you MUST call 1459ccd979bdSMark Fasheh * ocfs2_complete_lock_res_refresh afterwards. */ 1460ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres) 1461ccd979bdSMark Fasheh { 1462ccd979bdSMark Fasheh unsigned long flags; 1463ccd979bdSMark Fasheh int status = 0; 1464ccd979bdSMark Fasheh 1465ccd979bdSMark Fasheh mlog_entry_void(); 1466ccd979bdSMark Fasheh 1467ccd979bdSMark Fasheh refresh_check: 1468ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1469ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) { 1470ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1471ccd979bdSMark Fasheh goto bail; 1472ccd979bdSMark Fasheh } 1473ccd979bdSMark Fasheh 1474ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_REFRESHING) { 1475ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1476ccd979bdSMark Fasheh 1477ccd979bdSMark Fasheh ocfs2_wait_on_refreshing_lock(lockres); 1478ccd979bdSMark Fasheh goto refresh_check; 1479ccd979bdSMark Fasheh } 1480ccd979bdSMark Fasheh 1481ccd979bdSMark Fasheh /* Ok, I'll be the one to refresh this lock. */ 1482ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING); 1483ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1484ccd979bdSMark Fasheh 1485ccd979bdSMark Fasheh status = 1; 1486ccd979bdSMark Fasheh bail: 1487ccd979bdSMark Fasheh mlog_exit(status); 1488ccd979bdSMark Fasheh return status; 1489ccd979bdSMark Fasheh } 1490ccd979bdSMark Fasheh 1491ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh 1492ccd979bdSMark Fasheh * anymroe, but i won't clear the needs refresh flag. */ 1493ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres, 1494ccd979bdSMark Fasheh int status) 1495ccd979bdSMark Fasheh { 1496ccd979bdSMark Fasheh unsigned long flags; 1497ccd979bdSMark Fasheh mlog_entry_void(); 1498ccd979bdSMark Fasheh 1499ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 1500ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING); 1501ccd979bdSMark Fasheh if (!status) 1502ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 1503ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 1504ccd979bdSMark Fasheh 1505ccd979bdSMark Fasheh wake_up(&lockres->l_event); 1506ccd979bdSMark Fasheh 1507ccd979bdSMark Fasheh mlog_exit_void(); 1508ccd979bdSMark Fasheh } 1509ccd979bdSMark Fasheh 1510ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */ 1511e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode, 1512ccd979bdSMark Fasheh struct buffer_head **bh) 1513ccd979bdSMark Fasheh { 1514ccd979bdSMark Fasheh int status = 0; 1515ccd979bdSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(inode); 1516e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; 1517ccd979bdSMark Fasheh struct ocfs2_dinode *fe; 1518c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1519ccd979bdSMark Fasheh 1520ccd979bdSMark Fasheh mlog_entry_void(); 1521ccd979bdSMark Fasheh 1522be9e986bSMark Fasheh if (ocfs2_mount_local(osb)) 1523be9e986bSMark Fasheh goto bail; 1524be9e986bSMark Fasheh 1525ccd979bdSMark Fasheh spin_lock(&oi->ip_lock); 1526ccd979bdSMark Fasheh if (oi->ip_flags & OCFS2_INODE_DELETED) { 1527b0697053SMark Fasheh mlog(0, "Orphaned inode %llu was deleted while we " 1528ccd979bdSMark Fasheh "were waiting on a lock. ip_flags = 0x%x\n", 1529b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, oi->ip_flags); 1530ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1531ccd979bdSMark Fasheh status = -ENOENT; 1532ccd979bdSMark Fasheh goto bail; 1533ccd979bdSMark Fasheh } 1534ccd979bdSMark Fasheh spin_unlock(&oi->ip_lock); 1535ccd979bdSMark Fasheh 1536ccd979bdSMark Fasheh if (!ocfs2_should_refresh_lock_res(lockres)) 1537ccd979bdSMark Fasheh goto bail; 1538ccd979bdSMark Fasheh 1539ccd979bdSMark Fasheh /* This will discard any caching information we might have had 1540ccd979bdSMark Fasheh * for the inode metadata. */ 1541ccd979bdSMark Fasheh ocfs2_metadata_cache_purge(inode); 1542ccd979bdSMark Fasheh 154383418978SMark Fasheh ocfs2_extent_map_trunc(inode, 0); 154483418978SMark Fasheh 1545be9e986bSMark Fasheh if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 1546b0697053SMark Fasheh mlog(0, "Trusting LVB on inode %llu\n", 1547b0697053SMark Fasheh (unsigned long long)oi->ip_blkno); 1548ccd979bdSMark Fasheh ocfs2_refresh_inode_from_lvb(inode); 1549ccd979bdSMark Fasheh } else { 1550ccd979bdSMark Fasheh /* Boo, we have to go to disk. */ 1551ccd979bdSMark Fasheh /* read bh, cast, ocfs2_refresh_inode */ 1552ccd979bdSMark Fasheh status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, 1553ccd979bdSMark Fasheh bh, OCFS2_BH_CACHED, inode); 1554ccd979bdSMark Fasheh if (status < 0) { 1555ccd979bdSMark Fasheh mlog_errno(status); 1556ccd979bdSMark Fasheh goto bail_refresh; 1557ccd979bdSMark Fasheh } 1558ccd979bdSMark Fasheh fe = (struct ocfs2_dinode *) (*bh)->b_data; 1559ccd979bdSMark Fasheh 1560ccd979bdSMark Fasheh /* This is a good chance to make sure we're not 1561ccd979bdSMark Fasheh * locking an invalid object. 1562ccd979bdSMark Fasheh * 1563ccd979bdSMark Fasheh * We bug on a stale inode here because we checked 1564ccd979bdSMark Fasheh * above whether it was wiped from disk. The wiping 1565ccd979bdSMark Fasheh * node provides a guarantee that we receive that 1566ccd979bdSMark Fasheh * message and can mark the inode before dropping any 1567ccd979bdSMark Fasheh * locks associated with it. */ 1568ccd979bdSMark Fasheh if (!OCFS2_IS_VALID_DINODE(fe)) { 1569ccd979bdSMark Fasheh OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 1570ccd979bdSMark Fasheh status = -EIO; 1571ccd979bdSMark Fasheh goto bail_refresh; 1572ccd979bdSMark Fasheh } 1573ccd979bdSMark Fasheh mlog_bug_on_msg(inode->i_generation != 1574ccd979bdSMark Fasheh le32_to_cpu(fe->i_generation), 1575b0697053SMark Fasheh "Invalid dinode %llu disk generation: %u " 1576ccd979bdSMark Fasheh "inode->i_generation: %u\n", 1577b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 1578b0697053SMark Fasheh le32_to_cpu(fe->i_generation), 1579ccd979bdSMark Fasheh inode->i_generation); 1580ccd979bdSMark Fasheh mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || 1581ccd979bdSMark Fasheh !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), 1582b0697053SMark Fasheh "Stale dinode %llu dtime: %llu flags: 0x%x\n", 1583b0697053SMark Fasheh (unsigned long long)oi->ip_blkno, 1584b0697053SMark Fasheh (unsigned long long)le64_to_cpu(fe->i_dtime), 1585ccd979bdSMark Fasheh le32_to_cpu(fe->i_flags)); 1586ccd979bdSMark Fasheh 1587ccd979bdSMark Fasheh ocfs2_refresh_inode(inode, fe); 1588ccd979bdSMark Fasheh } 1589ccd979bdSMark Fasheh 1590ccd979bdSMark Fasheh status = 0; 1591ccd979bdSMark Fasheh bail_refresh: 1592ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 1593ccd979bdSMark Fasheh bail: 1594ccd979bdSMark Fasheh mlog_exit(status); 1595ccd979bdSMark Fasheh return status; 1596ccd979bdSMark Fasheh } 1597ccd979bdSMark Fasheh 1598ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode, 1599ccd979bdSMark Fasheh struct buffer_head **ret_bh, 1600ccd979bdSMark Fasheh struct buffer_head *passed_bh) 1601ccd979bdSMark Fasheh { 1602ccd979bdSMark Fasheh int status; 1603ccd979bdSMark Fasheh 1604ccd979bdSMark Fasheh if (passed_bh) { 1605ccd979bdSMark Fasheh /* Ok, the update went to disk for us, use the 1606ccd979bdSMark Fasheh * returned bh. */ 1607ccd979bdSMark Fasheh *ret_bh = passed_bh; 1608ccd979bdSMark Fasheh get_bh(*ret_bh); 1609ccd979bdSMark Fasheh 1610ccd979bdSMark Fasheh return 0; 1611ccd979bdSMark Fasheh } 1612ccd979bdSMark Fasheh 1613ccd979bdSMark Fasheh status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1614ccd979bdSMark Fasheh OCFS2_I(inode)->ip_blkno, 1615ccd979bdSMark Fasheh ret_bh, 1616ccd979bdSMark Fasheh OCFS2_BH_CACHED, 1617ccd979bdSMark Fasheh inode); 1618ccd979bdSMark Fasheh if (status < 0) 1619ccd979bdSMark Fasheh mlog_errno(status); 1620ccd979bdSMark Fasheh 1621ccd979bdSMark Fasheh return status; 1622ccd979bdSMark Fasheh } 1623ccd979bdSMark Fasheh 1624ccd979bdSMark Fasheh /* 1625ccd979bdSMark Fasheh * returns < 0 error if the callback will never be called, otherwise 1626ccd979bdSMark Fasheh * the result of the lock will be communicated via the callback. 1627ccd979bdSMark Fasheh */ 1628e63aecb6SMark Fasheh int ocfs2_inode_lock_full(struct inode *inode, 1629ccd979bdSMark Fasheh struct buffer_head **ret_bh, 1630ccd979bdSMark Fasheh int ex, 1631ccd979bdSMark Fasheh int arg_flags) 1632ccd979bdSMark Fasheh { 1633ccd979bdSMark Fasheh int status, level, dlm_flags, acquired; 1634c271c5c2SSunil Mushran struct ocfs2_lock_res *lockres = NULL; 1635ccd979bdSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1636ccd979bdSMark Fasheh struct buffer_head *local_bh = NULL; 1637ccd979bdSMark Fasheh 1638ccd979bdSMark Fasheh BUG_ON(!inode); 1639ccd979bdSMark Fasheh 1640ccd979bdSMark Fasheh mlog_entry_void(); 1641ccd979bdSMark Fasheh 1642b0697053SMark Fasheh mlog(0, "inode %llu, take %s META lock\n", 1643b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1644ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 1645ccd979bdSMark Fasheh 1646ccd979bdSMark Fasheh status = 0; 1647ccd979bdSMark Fasheh acquired = 0; 1648ccd979bdSMark Fasheh /* We'll allow faking a readonly metadata lock for 1649ccd979bdSMark Fasheh * rodevices. */ 1650ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) { 1651ccd979bdSMark Fasheh if (ex) 1652ccd979bdSMark Fasheh status = -EROFS; 1653ccd979bdSMark Fasheh goto bail; 1654ccd979bdSMark Fasheh } 1655ccd979bdSMark Fasheh 1656c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1657c271c5c2SSunil Mushran goto local; 1658c271c5c2SSunil Mushran 1659ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 1660ccd979bdSMark Fasheh wait_event(osb->recovery_event, 1661ccd979bdSMark Fasheh ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1662ccd979bdSMark Fasheh 1663e63aecb6SMark Fasheh lockres = &OCFS2_I(inode)->ip_inode_lockres; 1664ccd979bdSMark Fasheh level = ex ? LKM_EXMODE : LKM_PRMODE; 1665ccd979bdSMark Fasheh dlm_flags = 0; 1666ccd979bdSMark Fasheh if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 1667ccd979bdSMark Fasheh dlm_flags |= LKM_NOQUEUE; 1668ccd979bdSMark Fasheh 1669ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 1670ccd979bdSMark Fasheh if (status < 0) { 1671ccd979bdSMark Fasheh if (status != -EAGAIN && status != -EIOCBRETRY) 1672ccd979bdSMark Fasheh mlog_errno(status); 1673ccd979bdSMark Fasheh goto bail; 1674ccd979bdSMark Fasheh } 1675ccd979bdSMark Fasheh 1676ccd979bdSMark Fasheh /* Notify the error cleanup path to drop the cluster lock. */ 1677ccd979bdSMark Fasheh acquired = 1; 1678ccd979bdSMark Fasheh 1679ccd979bdSMark Fasheh /* We wait twice because a node may have died while we were in 1680ccd979bdSMark Fasheh * the lower dlm layers. The second time though, we've 1681ccd979bdSMark Fasheh * committed to owning this lock so we don't allow signals to 1682ccd979bdSMark Fasheh * abort the operation. */ 1683ccd979bdSMark Fasheh if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) 1684ccd979bdSMark Fasheh wait_event(osb->recovery_event, 1685ccd979bdSMark Fasheh ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1686ccd979bdSMark Fasheh 1687c271c5c2SSunil Mushran local: 168824c19ef4SMark Fasheh /* 168924c19ef4SMark Fasheh * We only see this flag if we're being called from 169024c19ef4SMark Fasheh * ocfs2_read_locked_inode(). It means we're locking an inode 169124c19ef4SMark Fasheh * which hasn't been populated yet, so clear the refresh flag 169224c19ef4SMark Fasheh * and let the caller handle it. 169324c19ef4SMark Fasheh */ 169424c19ef4SMark Fasheh if (inode->i_state & I_NEW) { 169524c19ef4SMark Fasheh status = 0; 1696c271c5c2SSunil Mushran if (lockres) 169724c19ef4SMark Fasheh ocfs2_complete_lock_res_refresh(lockres, 0); 169824c19ef4SMark Fasheh goto bail; 169924c19ef4SMark Fasheh } 170024c19ef4SMark Fasheh 1701ccd979bdSMark Fasheh /* This is fun. The caller may want a bh back, or it may 1702e63aecb6SMark Fasheh * not. ocfs2_inode_lock_update definitely wants one in, but 1703ccd979bdSMark Fasheh * may or may not read one, depending on what's in the 1704ccd979bdSMark Fasheh * LVB. The result of all of this is that we've *only* gone to 1705ccd979bdSMark Fasheh * disk if we have to, so the complexity is worthwhile. */ 1706e63aecb6SMark Fasheh status = ocfs2_inode_lock_update(inode, &local_bh); 1707ccd979bdSMark Fasheh if (status < 0) { 1708ccd979bdSMark Fasheh if (status != -ENOENT) 1709ccd979bdSMark Fasheh mlog_errno(status); 1710ccd979bdSMark Fasheh goto bail; 1711ccd979bdSMark Fasheh } 1712ccd979bdSMark Fasheh 1713ccd979bdSMark Fasheh if (ret_bh) { 1714ccd979bdSMark Fasheh status = ocfs2_assign_bh(inode, ret_bh, local_bh); 1715ccd979bdSMark Fasheh if (status < 0) { 1716ccd979bdSMark Fasheh mlog_errno(status); 1717ccd979bdSMark Fasheh goto bail; 1718ccd979bdSMark Fasheh } 1719ccd979bdSMark Fasheh } 1720ccd979bdSMark Fasheh 1721ccd979bdSMark Fasheh bail: 1722ccd979bdSMark Fasheh if (status < 0) { 1723ccd979bdSMark Fasheh if (ret_bh && (*ret_bh)) { 1724ccd979bdSMark Fasheh brelse(*ret_bh); 1725ccd979bdSMark Fasheh *ret_bh = NULL; 1726ccd979bdSMark Fasheh } 1727ccd979bdSMark Fasheh if (acquired) 1728e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 1729ccd979bdSMark Fasheh } 1730ccd979bdSMark Fasheh 1731ccd979bdSMark Fasheh if (local_bh) 1732ccd979bdSMark Fasheh brelse(local_bh); 1733ccd979bdSMark Fasheh 1734ccd979bdSMark Fasheh mlog_exit(status); 1735ccd979bdSMark Fasheh return status; 1736ccd979bdSMark Fasheh } 1737ccd979bdSMark Fasheh 1738ccd979bdSMark Fasheh /* 173934d024f8SMark Fasheh * This is working around a lock inversion between tasks acquiring DLM 174034d024f8SMark Fasheh * locks while holding a page lock and the downconvert thread which 174134d024f8SMark Fasheh * blocks dlm lock acquiry while acquiring page locks. 1742ccd979bdSMark Fasheh * 1743ccd979bdSMark Fasheh * ** These _with_page variantes are only intended to be called from aop 1744ccd979bdSMark Fasheh * methods that hold page locks and return a very specific *positive* error 1745ccd979bdSMark Fasheh * code that aop methods pass up to the VFS -- test for errors with != 0. ** 1746ccd979bdSMark Fasheh * 174734d024f8SMark Fasheh * The DLM is called such that it returns -EAGAIN if it would have 174834d024f8SMark Fasheh * blocked waiting for the downconvert thread. In that case we unlock 174934d024f8SMark Fasheh * our page so the downconvert thread can make progress. Once we've 175034d024f8SMark Fasheh * done this we have to return AOP_TRUNCATED_PAGE so the aop method 175134d024f8SMark Fasheh * that called us can bubble that back up into the VFS who will then 175234d024f8SMark Fasheh * immediately retry the aop call. 1753ccd979bdSMark Fasheh * 1754ccd979bdSMark Fasheh * We do a blocking lock and immediate unlock before returning, though, so that 1755ccd979bdSMark Fasheh * the lock has a great chance of being cached on this node by the time the VFS 1756ccd979bdSMark Fasheh * calls back to retry the aop. This has a potential to livelock as nodes 1757ccd979bdSMark Fasheh * ping locks back and forth, but that's a risk we're willing to take to avoid 1758ccd979bdSMark Fasheh * the lock inversion simply. 1759ccd979bdSMark Fasheh */ 1760e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode, 1761ccd979bdSMark Fasheh struct buffer_head **ret_bh, 1762ccd979bdSMark Fasheh int ex, 1763ccd979bdSMark Fasheh struct page *page) 1764ccd979bdSMark Fasheh { 1765ccd979bdSMark Fasheh int ret; 1766ccd979bdSMark Fasheh 1767e63aecb6SMark Fasheh ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); 1768ccd979bdSMark Fasheh if (ret == -EAGAIN) { 1769ccd979bdSMark Fasheh unlock_page(page); 1770e63aecb6SMark Fasheh if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) 1771e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, ex); 1772ccd979bdSMark Fasheh ret = AOP_TRUNCATED_PAGE; 1773ccd979bdSMark Fasheh } 1774ccd979bdSMark Fasheh 1775ccd979bdSMark Fasheh return ret; 1776ccd979bdSMark Fasheh } 1777ccd979bdSMark Fasheh 1778e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode, 17797f1a37e3STiger Yang struct vfsmount *vfsmnt, 17807f1a37e3STiger Yang int *level) 17817f1a37e3STiger Yang { 17827f1a37e3STiger Yang int ret; 17837f1a37e3STiger Yang 17847f1a37e3STiger Yang mlog_entry_void(); 1785e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, NULL, 0); 17867f1a37e3STiger Yang if (ret < 0) { 17877f1a37e3STiger Yang mlog_errno(ret); 17887f1a37e3STiger Yang return ret; 17897f1a37e3STiger Yang } 17907f1a37e3STiger Yang 17917f1a37e3STiger Yang /* 17927f1a37e3STiger Yang * If we should update atime, we will get EX lock, 17937f1a37e3STiger Yang * otherwise we just get PR lock. 17947f1a37e3STiger Yang */ 17957f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) { 17967f1a37e3STiger Yang struct buffer_head *bh = NULL; 17977f1a37e3STiger Yang 1798e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 0); 1799e63aecb6SMark Fasheh ret = ocfs2_inode_lock(inode, &bh, 1); 18007f1a37e3STiger Yang if (ret < 0) { 18017f1a37e3STiger Yang mlog_errno(ret); 18027f1a37e3STiger Yang return ret; 18037f1a37e3STiger Yang } 18047f1a37e3STiger Yang *level = 1; 18057f1a37e3STiger Yang if (ocfs2_should_update_atime(inode, vfsmnt)) 18067f1a37e3STiger Yang ocfs2_update_inode_atime(inode, bh); 18077f1a37e3STiger Yang if (bh) 18087f1a37e3STiger Yang brelse(bh); 18097f1a37e3STiger Yang } else 18107f1a37e3STiger Yang *level = 0; 18117f1a37e3STiger Yang 18127f1a37e3STiger Yang mlog_exit(ret); 18137f1a37e3STiger Yang return ret; 18147f1a37e3STiger Yang } 18157f1a37e3STiger Yang 1816e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode, 1817ccd979bdSMark Fasheh int ex) 1818ccd979bdSMark Fasheh { 1819ccd979bdSMark Fasheh int level = ex ? LKM_EXMODE : LKM_PRMODE; 1820e63aecb6SMark Fasheh struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; 1821c271c5c2SSunil Mushran struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1822ccd979bdSMark Fasheh 1823ccd979bdSMark Fasheh mlog_entry_void(); 1824ccd979bdSMark Fasheh 1825b0697053SMark Fasheh mlog(0, "inode %llu drop %s META lock\n", 1826b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno, 1827ccd979bdSMark Fasheh ex ? "EXMODE" : "PRMODE"); 1828ccd979bdSMark Fasheh 1829c271c5c2SSunil Mushran if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && 1830c271c5c2SSunil Mushran !ocfs2_mount_local(osb)) 1831ccd979bdSMark Fasheh ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); 1832ccd979bdSMark Fasheh 1833ccd979bdSMark Fasheh mlog_exit_void(); 1834ccd979bdSMark Fasheh } 1835ccd979bdSMark Fasheh 1836ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb, 1837ccd979bdSMark Fasheh int ex) 1838ccd979bdSMark Fasheh { 1839c271c5c2SSunil Mushran int status = 0; 1840ccd979bdSMark Fasheh int level = ex ? LKM_EXMODE : LKM_PRMODE; 1841ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 1842ccd979bdSMark Fasheh struct buffer_head *bh; 1843ccd979bdSMark Fasheh struct ocfs2_slot_info *si = osb->slot_info; 1844ccd979bdSMark Fasheh 1845ccd979bdSMark Fasheh mlog_entry_void(); 1846ccd979bdSMark Fasheh 1847ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 1848ccd979bdSMark Fasheh return -EROFS; 1849ccd979bdSMark Fasheh 1850c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1851c271c5c2SSunil Mushran goto bail; 1852c271c5c2SSunil Mushran 1853ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 1854ccd979bdSMark Fasheh if (status < 0) { 1855ccd979bdSMark Fasheh mlog_errno(status); 1856ccd979bdSMark Fasheh goto bail; 1857ccd979bdSMark Fasheh } 1858ccd979bdSMark Fasheh 1859ccd979bdSMark Fasheh /* The super block lock path is really in the best position to 1860ccd979bdSMark Fasheh * know when resources covered by the lock need to be 1861ccd979bdSMark Fasheh * refreshed, so we do it here. Of course, making sense of 1862ccd979bdSMark Fasheh * everything is up to the caller :) */ 1863ccd979bdSMark Fasheh status = ocfs2_should_refresh_lock_res(lockres); 1864ccd979bdSMark Fasheh if (status < 0) { 1865ccd979bdSMark Fasheh mlog_errno(status); 1866ccd979bdSMark Fasheh goto bail; 1867ccd979bdSMark Fasheh } 1868ccd979bdSMark Fasheh if (status) { 1869ccd979bdSMark Fasheh bh = si->si_bh; 1870ccd979bdSMark Fasheh status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, 1871ccd979bdSMark Fasheh si->si_inode); 1872ccd979bdSMark Fasheh if (status == 0) 1873ccd979bdSMark Fasheh ocfs2_update_slot_info(si); 1874ccd979bdSMark Fasheh 1875ccd979bdSMark Fasheh ocfs2_complete_lock_res_refresh(lockres, status); 1876ccd979bdSMark Fasheh 1877ccd979bdSMark Fasheh if (status < 0) 1878ccd979bdSMark Fasheh mlog_errno(status); 1879ccd979bdSMark Fasheh } 1880ccd979bdSMark Fasheh bail: 1881ccd979bdSMark Fasheh mlog_exit(status); 1882ccd979bdSMark Fasheh return status; 1883ccd979bdSMark Fasheh } 1884ccd979bdSMark Fasheh 1885ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb, 1886ccd979bdSMark Fasheh int ex) 1887ccd979bdSMark Fasheh { 1888ccd979bdSMark Fasheh int level = ex ? LKM_EXMODE : LKM_PRMODE; 1889ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; 1890ccd979bdSMark Fasheh 1891c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1892ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, level); 1893ccd979bdSMark Fasheh } 1894ccd979bdSMark Fasheh 1895ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb) 1896ccd979bdSMark Fasheh { 1897ccd979bdSMark Fasheh int status; 1898ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 1899ccd979bdSMark Fasheh 1900ccd979bdSMark Fasheh if (ocfs2_is_hard_readonly(osb)) 1901ccd979bdSMark Fasheh return -EROFS; 1902ccd979bdSMark Fasheh 1903c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1904c271c5c2SSunil Mushran return 0; 1905c271c5c2SSunil Mushran 1906ccd979bdSMark Fasheh status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); 1907ccd979bdSMark Fasheh if (status < 0) 1908ccd979bdSMark Fasheh mlog_errno(status); 1909ccd979bdSMark Fasheh 1910ccd979bdSMark Fasheh return status; 1911ccd979bdSMark Fasheh } 1912ccd979bdSMark Fasheh 1913ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb) 1914ccd979bdSMark Fasheh { 1915ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; 1916ccd979bdSMark Fasheh 1917c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1918ccd979bdSMark Fasheh ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1919ccd979bdSMark Fasheh } 1920ccd979bdSMark Fasheh 1921d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex) 1922d680efe9SMark Fasheh { 1923d680efe9SMark Fasheh int ret; 1924d680efe9SMark Fasheh int level = ex ? LKM_EXMODE : LKM_PRMODE; 1925d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 1926d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 1927d680efe9SMark Fasheh 1928d680efe9SMark Fasheh BUG_ON(!dl); 1929d680efe9SMark Fasheh 1930d680efe9SMark Fasheh if (ocfs2_is_hard_readonly(osb)) 1931d680efe9SMark Fasheh return -EROFS; 1932d680efe9SMark Fasheh 1933c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 1934c271c5c2SSunil Mushran return 0; 1935c271c5c2SSunil Mushran 1936d680efe9SMark Fasheh ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0); 1937d680efe9SMark Fasheh if (ret < 0) 1938d680efe9SMark Fasheh mlog_errno(ret); 1939d680efe9SMark Fasheh 1940d680efe9SMark Fasheh return ret; 1941d680efe9SMark Fasheh } 1942d680efe9SMark Fasheh 1943d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex) 1944d680efe9SMark Fasheh { 1945d680efe9SMark Fasheh int level = ex ? LKM_EXMODE : LKM_PRMODE; 1946d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = dentry->d_fsdata; 1947d680efe9SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 1948d680efe9SMark Fasheh 1949c271c5c2SSunil Mushran if (!ocfs2_mount_local(osb)) 1950d680efe9SMark Fasheh ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); 1951d680efe9SMark Fasheh } 1952d680efe9SMark Fasheh 1953ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because 1954ccd979bdSMark Fasheh * open references on the debug inodes can live on after a mount, so 1955ccd979bdSMark Fasheh * we can't rely on the ocfs2_super to always exist. */ 1956ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref) 1957ccd979bdSMark Fasheh { 1958ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 1959ccd979bdSMark Fasheh 1960ccd979bdSMark Fasheh dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt); 1961ccd979bdSMark Fasheh 1962ccd979bdSMark Fasheh kfree(dlm_debug); 1963ccd979bdSMark Fasheh } 1964ccd979bdSMark Fasheh 1965ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug) 1966ccd979bdSMark Fasheh { 1967ccd979bdSMark Fasheh if (dlm_debug) 1968ccd979bdSMark Fasheh kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free); 1969ccd979bdSMark Fasheh } 1970ccd979bdSMark Fasheh 1971ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug) 1972ccd979bdSMark Fasheh { 1973ccd979bdSMark Fasheh kref_get(&debug->d_refcnt); 1974ccd979bdSMark Fasheh } 1975ccd979bdSMark Fasheh 1976ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) 1977ccd979bdSMark Fasheh { 1978ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug; 1979ccd979bdSMark Fasheh 1980ccd979bdSMark Fasheh dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL); 1981ccd979bdSMark Fasheh if (!dlm_debug) { 1982ccd979bdSMark Fasheh mlog_errno(-ENOMEM); 1983ccd979bdSMark Fasheh goto out; 1984ccd979bdSMark Fasheh } 1985ccd979bdSMark Fasheh 1986ccd979bdSMark Fasheh kref_init(&dlm_debug->d_refcnt); 1987ccd979bdSMark Fasheh INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); 1988ccd979bdSMark Fasheh dlm_debug->d_locking_state = NULL; 1989ccd979bdSMark Fasheh out: 1990ccd979bdSMark Fasheh return dlm_debug; 1991ccd979bdSMark Fasheh } 1992ccd979bdSMark Fasheh 1993ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */ 1994ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv { 1995ccd979bdSMark Fasheh struct ocfs2_dlm_debug *p_dlm_debug; 1996ccd979bdSMark Fasheh struct ocfs2_lock_res p_iter_res; 1997ccd979bdSMark Fasheh struct ocfs2_lock_res p_tmp_res; 1998ccd979bdSMark Fasheh }; 1999ccd979bdSMark Fasheh 2000ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start, 2001ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv) 2002ccd979bdSMark Fasheh { 2003ccd979bdSMark Fasheh struct ocfs2_lock_res *iter, *ret = NULL; 2004ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug; 2005ccd979bdSMark Fasheh 2006ccd979bdSMark Fasheh assert_spin_locked(&ocfs2_dlm_tracking_lock); 2007ccd979bdSMark Fasheh 2008ccd979bdSMark Fasheh list_for_each_entry(iter, &start->l_debug_list, l_debug_list) { 2009ccd979bdSMark Fasheh /* discover the head of the list */ 2010ccd979bdSMark Fasheh if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) { 2011ccd979bdSMark Fasheh mlog(0, "End of list found, %p\n", ret); 2012ccd979bdSMark Fasheh break; 2013ccd979bdSMark Fasheh } 2014ccd979bdSMark Fasheh 2015ccd979bdSMark Fasheh /* We track our "dummy" iteration lockres' by a NULL 2016ccd979bdSMark Fasheh * l_ops field. */ 2017ccd979bdSMark Fasheh if (iter->l_ops != NULL) { 2018ccd979bdSMark Fasheh ret = iter; 2019ccd979bdSMark Fasheh break; 2020ccd979bdSMark Fasheh } 2021ccd979bdSMark Fasheh } 2022ccd979bdSMark Fasheh 2023ccd979bdSMark Fasheh return ret; 2024ccd979bdSMark Fasheh } 2025ccd979bdSMark Fasheh 2026ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos) 2027ccd979bdSMark Fasheh { 2028ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2029ccd979bdSMark Fasheh struct ocfs2_lock_res *iter; 2030ccd979bdSMark Fasheh 2031ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2032ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv); 2033ccd979bdSMark Fasheh if (iter) { 2034ccd979bdSMark Fasheh /* Since lockres' have the lifetime of their container 2035ccd979bdSMark Fasheh * (which can be inodes, ocfs2_supers, etc) we want to 2036ccd979bdSMark Fasheh * copy this out to a temporary lockres while still 2037ccd979bdSMark Fasheh * under the spinlock. Obviously after this we can't 2038ccd979bdSMark Fasheh * trust any pointers on the copy returned, but that's 2039ccd979bdSMark Fasheh * ok as the information we want isn't typically held 2040ccd979bdSMark Fasheh * in them. */ 2041ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2042ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2043ccd979bdSMark Fasheh } 2044ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2045ccd979bdSMark Fasheh 2046ccd979bdSMark Fasheh return iter; 2047ccd979bdSMark Fasheh } 2048ccd979bdSMark Fasheh 2049ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v) 2050ccd979bdSMark Fasheh { 2051ccd979bdSMark Fasheh } 2052ccd979bdSMark Fasheh 2053ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) 2054ccd979bdSMark Fasheh { 2055ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = m->private; 2056ccd979bdSMark Fasheh struct ocfs2_lock_res *iter = v; 2057ccd979bdSMark Fasheh struct ocfs2_lock_res *dummy = &priv->p_iter_res; 2058ccd979bdSMark Fasheh 2059ccd979bdSMark Fasheh spin_lock(&ocfs2_dlm_tracking_lock); 2060ccd979bdSMark Fasheh iter = ocfs2_dlm_next_res(iter, priv); 2061ccd979bdSMark Fasheh list_del_init(&dummy->l_debug_list); 2062ccd979bdSMark Fasheh if (iter) { 2063ccd979bdSMark Fasheh list_add(&dummy->l_debug_list, &iter->l_debug_list); 2064ccd979bdSMark Fasheh priv->p_tmp_res = *iter; 2065ccd979bdSMark Fasheh iter = &priv->p_tmp_res; 2066ccd979bdSMark Fasheh } 2067ccd979bdSMark Fasheh spin_unlock(&ocfs2_dlm_tracking_lock); 2068ccd979bdSMark Fasheh 2069ccd979bdSMark Fasheh return iter; 2070ccd979bdSMark Fasheh } 2071ccd979bdSMark Fasheh 2072ccd979bdSMark Fasheh /* So that debugfs.ocfs2 can determine which format is being used */ 2073ccd979bdSMark Fasheh #define OCFS2_DLM_DEBUG_STR_VERSION 1 2074ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) 2075ccd979bdSMark Fasheh { 2076ccd979bdSMark Fasheh int i; 2077ccd979bdSMark Fasheh char *lvb; 2078ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = v; 2079ccd979bdSMark Fasheh 2080ccd979bdSMark Fasheh if (!lockres) 2081ccd979bdSMark Fasheh return -EINVAL; 2082ccd979bdSMark Fasheh 2083d680efe9SMark Fasheh seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); 2084d680efe9SMark Fasheh 2085d680efe9SMark Fasheh if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) 2086d680efe9SMark Fasheh seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1, 2087d680efe9SMark Fasheh lockres->l_name, 2088d680efe9SMark Fasheh (unsigned int)ocfs2_get_dentry_lock_ino(lockres)); 2089d680efe9SMark Fasheh else 2090d680efe9SMark Fasheh seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name); 2091d680efe9SMark Fasheh 2092d680efe9SMark Fasheh seq_printf(m, "%d\t" 2093ccd979bdSMark Fasheh "0x%lx\t" 2094ccd979bdSMark Fasheh "0x%x\t" 2095ccd979bdSMark Fasheh "0x%x\t" 2096ccd979bdSMark Fasheh "%u\t" 2097ccd979bdSMark Fasheh "%u\t" 2098ccd979bdSMark Fasheh "%d\t" 2099ccd979bdSMark Fasheh "%d\t", 2100ccd979bdSMark Fasheh lockres->l_level, 2101ccd979bdSMark Fasheh lockres->l_flags, 2102ccd979bdSMark Fasheh lockres->l_action, 2103ccd979bdSMark Fasheh lockres->l_unlock_action, 2104ccd979bdSMark Fasheh lockres->l_ro_holders, 2105ccd979bdSMark Fasheh lockres->l_ex_holders, 2106ccd979bdSMark Fasheh lockres->l_requested, 2107ccd979bdSMark Fasheh lockres->l_blocking); 2108ccd979bdSMark Fasheh 2109ccd979bdSMark Fasheh /* Dump the raw LVB */ 2110ccd979bdSMark Fasheh lvb = lockres->l_lksb.lvb; 2111ccd979bdSMark Fasheh for(i = 0; i < DLM_LVB_LEN; i++) 2112ccd979bdSMark Fasheh seq_printf(m, "0x%x\t", lvb[i]); 2113ccd979bdSMark Fasheh 2114ccd979bdSMark Fasheh /* End the line */ 2115ccd979bdSMark Fasheh seq_printf(m, "\n"); 2116ccd979bdSMark Fasheh return 0; 2117ccd979bdSMark Fasheh } 2118ccd979bdSMark Fasheh 2119ccd979bdSMark Fasheh static struct seq_operations ocfs2_dlm_seq_ops = { 2120ccd979bdSMark Fasheh .start = ocfs2_dlm_seq_start, 2121ccd979bdSMark Fasheh .stop = ocfs2_dlm_seq_stop, 2122ccd979bdSMark Fasheh .next = ocfs2_dlm_seq_next, 2123ccd979bdSMark Fasheh .show = ocfs2_dlm_seq_show, 2124ccd979bdSMark Fasheh }; 2125ccd979bdSMark Fasheh 2126ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) 2127ccd979bdSMark Fasheh { 2128ccd979bdSMark Fasheh struct seq_file *seq = (struct seq_file *) file->private_data; 2129ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv = seq->private; 2130ccd979bdSMark Fasheh struct ocfs2_lock_res *res = &priv->p_iter_res; 2131ccd979bdSMark Fasheh 2132ccd979bdSMark Fasheh ocfs2_remove_lockres_tracking(res); 2133ccd979bdSMark Fasheh ocfs2_put_dlm_debug(priv->p_dlm_debug); 2134ccd979bdSMark Fasheh return seq_release_private(inode, file); 2135ccd979bdSMark Fasheh } 2136ccd979bdSMark Fasheh 2137ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) 2138ccd979bdSMark Fasheh { 2139ccd979bdSMark Fasheh int ret; 2140ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv *priv; 2141ccd979bdSMark Fasheh struct seq_file *seq; 2142ccd979bdSMark Fasheh struct ocfs2_super *osb; 2143ccd979bdSMark Fasheh 2144ccd979bdSMark Fasheh priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); 2145ccd979bdSMark Fasheh if (!priv) { 2146ccd979bdSMark Fasheh ret = -ENOMEM; 2147ccd979bdSMark Fasheh mlog_errno(ret); 2148ccd979bdSMark Fasheh goto out; 2149ccd979bdSMark Fasheh } 21508e18e294STheodore Ts'o osb = inode->i_private; 2151ccd979bdSMark Fasheh ocfs2_get_dlm_debug(osb->osb_dlm_debug); 2152ccd979bdSMark Fasheh priv->p_dlm_debug = osb->osb_dlm_debug; 2153ccd979bdSMark Fasheh INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); 2154ccd979bdSMark Fasheh 2155ccd979bdSMark Fasheh ret = seq_open(file, &ocfs2_dlm_seq_ops); 2156ccd979bdSMark Fasheh if (ret) { 2157ccd979bdSMark Fasheh kfree(priv); 2158ccd979bdSMark Fasheh mlog_errno(ret); 2159ccd979bdSMark Fasheh goto out; 2160ccd979bdSMark Fasheh } 2161ccd979bdSMark Fasheh 2162ccd979bdSMark Fasheh seq = (struct seq_file *) file->private_data; 2163ccd979bdSMark Fasheh seq->private = priv; 2164ccd979bdSMark Fasheh 2165ccd979bdSMark Fasheh ocfs2_add_lockres_tracking(&priv->p_iter_res, 2166ccd979bdSMark Fasheh priv->p_dlm_debug); 2167ccd979bdSMark Fasheh 2168ccd979bdSMark Fasheh out: 2169ccd979bdSMark Fasheh return ret; 2170ccd979bdSMark Fasheh } 2171ccd979bdSMark Fasheh 21724b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = { 2173ccd979bdSMark Fasheh .open = ocfs2_dlm_debug_open, 2174ccd979bdSMark Fasheh .release = ocfs2_dlm_debug_release, 2175ccd979bdSMark Fasheh .read = seq_read, 2176ccd979bdSMark Fasheh .llseek = seq_lseek, 2177ccd979bdSMark Fasheh }; 2178ccd979bdSMark Fasheh 2179ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) 2180ccd979bdSMark Fasheh { 2181ccd979bdSMark Fasheh int ret = 0; 2182ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2183ccd979bdSMark Fasheh 2184ccd979bdSMark Fasheh dlm_debug->d_locking_state = debugfs_create_file("locking_state", 2185ccd979bdSMark Fasheh S_IFREG|S_IRUSR, 2186ccd979bdSMark Fasheh osb->osb_debug_root, 2187ccd979bdSMark Fasheh osb, 2188ccd979bdSMark Fasheh &ocfs2_dlm_debug_fops); 2189ccd979bdSMark Fasheh if (!dlm_debug->d_locking_state) { 2190ccd979bdSMark Fasheh ret = -EINVAL; 2191ccd979bdSMark Fasheh mlog(ML_ERROR, 2192ccd979bdSMark Fasheh "Unable to create locking state debugfs file.\n"); 2193ccd979bdSMark Fasheh goto out; 2194ccd979bdSMark Fasheh } 2195ccd979bdSMark Fasheh 2196ccd979bdSMark Fasheh ocfs2_get_dlm_debug(dlm_debug); 2197ccd979bdSMark Fasheh out: 2198ccd979bdSMark Fasheh return ret; 2199ccd979bdSMark Fasheh } 2200ccd979bdSMark Fasheh 2201ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) 2202ccd979bdSMark Fasheh { 2203ccd979bdSMark Fasheh struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; 2204ccd979bdSMark Fasheh 2205ccd979bdSMark Fasheh if (dlm_debug) { 2206ccd979bdSMark Fasheh debugfs_remove(dlm_debug->d_locking_state); 2207ccd979bdSMark Fasheh ocfs2_put_dlm_debug(dlm_debug); 2208ccd979bdSMark Fasheh } 2209ccd979bdSMark Fasheh } 2210ccd979bdSMark Fasheh 2211ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb) 2212ccd979bdSMark Fasheh { 2213c271c5c2SSunil Mushran int status = 0; 2214ccd979bdSMark Fasheh u32 dlm_key; 2215c271c5c2SSunil Mushran struct dlm_ctxt *dlm = NULL; 2216ccd979bdSMark Fasheh 2217ccd979bdSMark Fasheh mlog_entry_void(); 2218ccd979bdSMark Fasheh 2219c271c5c2SSunil Mushran if (ocfs2_mount_local(osb)) 2220c271c5c2SSunil Mushran goto local; 2221c271c5c2SSunil Mushran 2222ccd979bdSMark Fasheh status = ocfs2_dlm_init_debug(osb); 2223ccd979bdSMark Fasheh if (status < 0) { 2224ccd979bdSMark Fasheh mlog_errno(status); 2225ccd979bdSMark Fasheh goto bail; 2226ccd979bdSMark Fasheh } 2227ccd979bdSMark Fasheh 222834d024f8SMark Fasheh /* launch downconvert thread */ 222934d024f8SMark Fasheh osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); 223034d024f8SMark Fasheh if (IS_ERR(osb->dc_task)) { 223134d024f8SMark Fasheh status = PTR_ERR(osb->dc_task); 223234d024f8SMark Fasheh osb->dc_task = NULL; 2233ccd979bdSMark Fasheh mlog_errno(status); 2234ccd979bdSMark Fasheh goto bail; 2235ccd979bdSMark Fasheh } 2236ccd979bdSMark Fasheh 2237ccd979bdSMark Fasheh /* used by the dlm code to make message headers unique, each 2238ccd979bdSMark Fasheh * node in this domain must agree on this. */ 2239ccd979bdSMark Fasheh dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str)); 2240ccd979bdSMark Fasheh 2241ccd979bdSMark Fasheh /* for now, uuid == domain */ 2242ccd979bdSMark Fasheh dlm = dlm_register_domain(osb->uuid_str, dlm_key); 2243ccd979bdSMark Fasheh if (IS_ERR(dlm)) { 2244ccd979bdSMark Fasheh status = PTR_ERR(dlm); 2245ccd979bdSMark Fasheh mlog_errno(status); 2246ccd979bdSMark Fasheh goto bail; 2247ccd979bdSMark Fasheh } 2248ccd979bdSMark Fasheh 2249c271c5c2SSunil Mushran dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); 2250c271c5c2SSunil Mushran 2251c271c5c2SSunil Mushran local: 2252ccd979bdSMark Fasheh ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); 2253ccd979bdSMark Fasheh ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); 2254ccd979bdSMark Fasheh 2255ccd979bdSMark Fasheh osb->dlm = dlm; 2256ccd979bdSMark Fasheh 2257ccd979bdSMark Fasheh status = 0; 2258ccd979bdSMark Fasheh bail: 2259ccd979bdSMark Fasheh if (status < 0) { 2260ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 226134d024f8SMark Fasheh if (osb->dc_task) 226234d024f8SMark Fasheh kthread_stop(osb->dc_task); 2263ccd979bdSMark Fasheh } 2264ccd979bdSMark Fasheh 2265ccd979bdSMark Fasheh mlog_exit(status); 2266ccd979bdSMark Fasheh return status; 2267ccd979bdSMark Fasheh } 2268ccd979bdSMark Fasheh 2269ccd979bdSMark Fasheh void ocfs2_dlm_shutdown(struct ocfs2_super *osb) 2270ccd979bdSMark Fasheh { 2271ccd979bdSMark Fasheh mlog_entry_void(); 2272ccd979bdSMark Fasheh 2273ccd979bdSMark Fasheh dlm_unregister_eviction_cb(&osb->osb_eviction_cb); 2274ccd979bdSMark Fasheh 2275ccd979bdSMark Fasheh ocfs2_drop_osb_locks(osb); 2276ccd979bdSMark Fasheh 227734d024f8SMark Fasheh if (osb->dc_task) { 227834d024f8SMark Fasheh kthread_stop(osb->dc_task); 227934d024f8SMark Fasheh osb->dc_task = NULL; 2280ccd979bdSMark Fasheh } 2281ccd979bdSMark Fasheh 2282ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_super_lockres); 2283ccd979bdSMark Fasheh ocfs2_lock_res_free(&osb->osb_rename_lockres); 2284ccd979bdSMark Fasheh 2285ccd979bdSMark Fasheh dlm_unregister_domain(osb->dlm); 2286ccd979bdSMark Fasheh osb->dlm = NULL; 2287ccd979bdSMark Fasheh 2288ccd979bdSMark Fasheh ocfs2_dlm_shutdown_debug(osb); 2289ccd979bdSMark Fasheh 2290ccd979bdSMark Fasheh mlog_exit_void(); 2291ccd979bdSMark Fasheh } 2292ccd979bdSMark Fasheh 22932a45f2d1SMark Fasheh static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) 2294ccd979bdSMark Fasheh { 2295ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres = opaque; 2296ccd979bdSMark Fasheh unsigned long flags; 2297ccd979bdSMark Fasheh 2298ccd979bdSMark Fasheh mlog_entry_void(); 2299ccd979bdSMark Fasheh 2300ccd979bdSMark Fasheh mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name, 2301ccd979bdSMark Fasheh lockres->l_unlock_action); 2302ccd979bdSMark Fasheh 2303ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2304ccd979bdSMark Fasheh /* We tried to cancel a convert request, but it was already 2305ccd979bdSMark Fasheh * granted. All we want to do here is clear our unlock 2306ccd979bdSMark Fasheh * state. The wake_up call done at the bottom is redundant 2307ccd979bdSMark Fasheh * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't 2308ccd979bdSMark Fasheh * hurt anything anyway */ 2309ccd979bdSMark Fasheh if (status == DLM_CANCELGRANT && 2310ccd979bdSMark Fasheh lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 2311ccd979bdSMark Fasheh mlog(0, "Got cancelgrant for %s\n", lockres->l_name); 2312ccd979bdSMark Fasheh 2313ccd979bdSMark Fasheh /* We don't clear the busy flag in this case as it 2314ccd979bdSMark Fasheh * should have been cleared by the ast which the dlm 2315ccd979bdSMark Fasheh * has called. */ 2316ccd979bdSMark Fasheh goto complete_unlock; 2317ccd979bdSMark Fasheh } 2318ccd979bdSMark Fasheh 2319ccd979bdSMark Fasheh if (status != DLM_NORMAL) { 2320ccd979bdSMark Fasheh mlog(ML_ERROR, "Dlm passes status %d for lock %s, " 2321ccd979bdSMark Fasheh "unlock_action %d\n", status, lockres->l_name, 2322ccd979bdSMark Fasheh lockres->l_unlock_action); 2323ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2324ccd979bdSMark Fasheh return; 2325ccd979bdSMark Fasheh } 2326ccd979bdSMark Fasheh 2327ccd979bdSMark Fasheh switch(lockres->l_unlock_action) { 2328ccd979bdSMark Fasheh case OCFS2_UNLOCK_CANCEL_CONVERT: 2329ccd979bdSMark Fasheh mlog(0, "Cancel convert success for %s\n", lockres->l_name); 2330ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_INVALID; 2331ccd979bdSMark Fasheh break; 2332ccd979bdSMark Fasheh case OCFS2_UNLOCK_DROP_LOCK: 2333ccd979bdSMark Fasheh lockres->l_level = LKM_IVMODE; 2334ccd979bdSMark Fasheh break; 2335ccd979bdSMark Fasheh default: 2336ccd979bdSMark Fasheh BUG(); 2337ccd979bdSMark Fasheh } 2338ccd979bdSMark Fasheh 2339ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2340ccd979bdSMark Fasheh complete_unlock: 2341ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2342ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2343ccd979bdSMark Fasheh 2344ccd979bdSMark Fasheh wake_up(&lockres->l_event); 2345ccd979bdSMark Fasheh 2346ccd979bdSMark Fasheh mlog_exit_void(); 2347ccd979bdSMark Fasheh } 2348ccd979bdSMark Fasheh 2349ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb, 23500d5dc6c2SMark Fasheh struct ocfs2_lock_res *lockres) 2351ccd979bdSMark Fasheh { 2352ccd979bdSMark Fasheh enum dlm_status status; 2353ccd979bdSMark Fasheh unsigned long flags; 2354b80fc012SMark Fasheh int lkm_flags = 0; 2355ccd979bdSMark Fasheh 2356ccd979bdSMark Fasheh /* We didn't get anywhere near actually using this lockres. */ 2357ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2358ccd979bdSMark Fasheh goto out; 2359ccd979bdSMark Fasheh 2360b80fc012SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 2361b80fc012SMark Fasheh lkm_flags |= LKM_VALBLK; 2362b80fc012SMark Fasheh 2363ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2364ccd979bdSMark Fasheh 2365ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2366ccd979bdSMark Fasheh "lockres %s, flags 0x%lx\n", 2367ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2368ccd979bdSMark Fasheh 2369ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_BUSY) { 2370ccd979bdSMark Fasheh mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = " 2371ccd979bdSMark Fasheh "%u, unlock_action = %u\n", 2372ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags, lockres->l_action, 2373ccd979bdSMark Fasheh lockres->l_unlock_action); 2374ccd979bdSMark Fasheh 2375ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2376ccd979bdSMark Fasheh 2377ccd979bdSMark Fasheh /* XXX: Today we just wait on any busy 2378ccd979bdSMark Fasheh * locks... Perhaps we need to cancel converts in the 2379ccd979bdSMark Fasheh * future? */ 2380ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2381ccd979bdSMark Fasheh 2382ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2383ccd979bdSMark Fasheh } 2384ccd979bdSMark Fasheh 23850d5dc6c2SMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 23860d5dc6c2SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_ATTACHED && 23870d5dc6c2SMark Fasheh lockres->l_level == LKM_EXMODE && 23880d5dc6c2SMark Fasheh !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 23890d5dc6c2SMark Fasheh lockres->l_ops->set_lvb(lockres); 23900d5dc6c2SMark Fasheh } 2391ccd979bdSMark Fasheh 2392ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) 2393ccd979bdSMark Fasheh mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2394ccd979bdSMark Fasheh lockres->l_name); 2395ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BLOCKED) 2396ccd979bdSMark Fasheh mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name); 2397ccd979bdSMark Fasheh 2398ccd979bdSMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { 2399ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2400ccd979bdSMark Fasheh goto out; 2401ccd979bdSMark Fasheh } 2402ccd979bdSMark Fasheh 2403ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED); 2404ccd979bdSMark Fasheh 2405ccd979bdSMark Fasheh /* make sure we never get here while waiting for an ast to 2406ccd979bdSMark Fasheh * fire. */ 2407ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_INVALID); 2408ccd979bdSMark Fasheh 2409ccd979bdSMark Fasheh /* is this necessary? */ 2410ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2411ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK; 2412ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2413ccd979bdSMark Fasheh 2414ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2415ccd979bdSMark Fasheh 2416b80fc012SMark Fasheh status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, 24172a45f2d1SMark Fasheh ocfs2_unlock_ast, lockres); 2418ccd979bdSMark Fasheh if (status != DLM_NORMAL) { 2419ccd979bdSMark Fasheh ocfs2_log_dlm_error("dlmunlock", status, lockres); 2420ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2421ccd979bdSMark Fasheh dlm_print_one_lock(lockres->l_lksb.lockid); 2422ccd979bdSMark Fasheh BUG(); 2423ccd979bdSMark Fasheh } 2424ccd979bdSMark Fasheh mlog(0, "lock %s, successfull return from dlmunlock\n", 2425ccd979bdSMark Fasheh lockres->l_name); 2426ccd979bdSMark Fasheh 2427ccd979bdSMark Fasheh ocfs2_wait_on_busy_lock(lockres); 2428ccd979bdSMark Fasheh out: 2429ccd979bdSMark Fasheh mlog_exit(0); 2430ccd979bdSMark Fasheh return 0; 2431ccd979bdSMark Fasheh } 2432ccd979bdSMark Fasheh 2433ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be 2434ccd979bdSMark Fasheh * queued if blocking, but we still may have to wait on it 243534d024f8SMark Fasheh * being dequeued from the downconvert thread before we can consider 2436ccd979bdSMark Fasheh * it safe to drop. 2437ccd979bdSMark Fasheh * 2438ccd979bdSMark Fasheh * You can *not* attempt to call cluster_lock on this lockres anymore. */ 2439ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) 2440ccd979bdSMark Fasheh { 2441ccd979bdSMark Fasheh int status; 2442ccd979bdSMark Fasheh struct ocfs2_mask_waiter mw; 2443ccd979bdSMark Fasheh unsigned long flags; 2444ccd979bdSMark Fasheh 2445ccd979bdSMark Fasheh ocfs2_init_mask_waiter(&mw); 2446ccd979bdSMark Fasheh 2447ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2448ccd979bdSMark Fasheh lockres->l_flags |= OCFS2_LOCK_FREEING; 2449ccd979bdSMark Fasheh while (lockres->l_flags & OCFS2_LOCK_QUEUED) { 2450ccd979bdSMark Fasheh lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); 2451ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2452ccd979bdSMark Fasheh 2453ccd979bdSMark Fasheh mlog(0, "Waiting on lockres %s\n", lockres->l_name); 2454ccd979bdSMark Fasheh 2455ccd979bdSMark Fasheh status = ocfs2_wait_for_mask(&mw); 2456ccd979bdSMark Fasheh if (status) 2457ccd979bdSMark Fasheh mlog_errno(status); 2458ccd979bdSMark Fasheh 2459ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2460ccd979bdSMark Fasheh } 2461ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2462ccd979bdSMark Fasheh } 2463ccd979bdSMark Fasheh 2464d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 2465d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 2466d680efe9SMark Fasheh { 2467d680efe9SMark Fasheh int ret; 2468d680efe9SMark Fasheh 2469d680efe9SMark Fasheh ocfs2_mark_lockres_freeing(lockres); 24700d5dc6c2SMark Fasheh ret = ocfs2_drop_lock(osb, lockres); 2471d680efe9SMark Fasheh if (ret) 2472d680efe9SMark Fasheh mlog_errno(ret); 2473d680efe9SMark Fasheh } 2474d680efe9SMark Fasheh 2475ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 2476ccd979bdSMark Fasheh { 2477d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres); 2478d680efe9SMark Fasheh ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres); 2479ccd979bdSMark Fasheh } 2480ccd979bdSMark Fasheh 2481ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode) 2482ccd979bdSMark Fasheh { 2483ccd979bdSMark Fasheh int status, err; 2484ccd979bdSMark Fasheh 2485ccd979bdSMark Fasheh mlog_entry_void(); 2486ccd979bdSMark Fasheh 2487ccd979bdSMark Fasheh /* No need to call ocfs2_mark_lockres_freeing here - 2488ccd979bdSMark Fasheh * ocfs2_clear_inode has done it for us. */ 2489ccd979bdSMark Fasheh 2490ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 249150008630STiger Yang &OCFS2_I(inode)->ip_open_lockres); 2492ccd979bdSMark Fasheh if (err < 0) 2493ccd979bdSMark Fasheh mlog_errno(err); 2494ccd979bdSMark Fasheh 2495ccd979bdSMark Fasheh status = err; 2496ccd979bdSMark Fasheh 2497ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2498e63aecb6SMark Fasheh &OCFS2_I(inode)->ip_inode_lockres); 2499ccd979bdSMark Fasheh if (err < 0) 2500ccd979bdSMark Fasheh mlog_errno(err); 2501ccd979bdSMark Fasheh if (err < 0 && !status) 2502ccd979bdSMark Fasheh status = err; 2503ccd979bdSMark Fasheh 2504ccd979bdSMark Fasheh err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 25050d5dc6c2SMark Fasheh &OCFS2_I(inode)->ip_rw_lockres); 2506ccd979bdSMark Fasheh if (err < 0) 2507ccd979bdSMark Fasheh mlog_errno(err); 2508ccd979bdSMark Fasheh if (err < 0 && !status) 2509ccd979bdSMark Fasheh status = err; 2510ccd979bdSMark Fasheh 2511ccd979bdSMark Fasheh mlog_exit(status); 2512ccd979bdSMark Fasheh return status; 2513ccd979bdSMark Fasheh } 2514ccd979bdSMark Fasheh 2515ccd979bdSMark Fasheh static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, 2516ccd979bdSMark Fasheh int new_level) 2517ccd979bdSMark Fasheh { 2518ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 2519ccd979bdSMark Fasheh 2520ccd979bdSMark Fasheh BUG_ON(lockres->l_blocking <= LKM_NLMODE); 2521ccd979bdSMark Fasheh 2522ccd979bdSMark Fasheh if (lockres->l_level <= new_level) { 2523ccd979bdSMark Fasheh mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n", 2524ccd979bdSMark Fasheh lockres->l_level, new_level); 2525ccd979bdSMark Fasheh BUG(); 2526ccd979bdSMark Fasheh } 2527ccd979bdSMark Fasheh 2528ccd979bdSMark Fasheh mlog(0, "lock %s, new_level = %d, l_blocking = %d\n", 2529ccd979bdSMark Fasheh lockres->l_name, new_level, lockres->l_blocking); 2530ccd979bdSMark Fasheh 2531ccd979bdSMark Fasheh lockres->l_action = OCFS2_AST_DOWNCONVERT; 2532ccd979bdSMark Fasheh lockres->l_requested = new_level; 2533ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_BUSY); 2534ccd979bdSMark Fasheh } 2535ccd979bdSMark Fasheh 2536ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb, 2537ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 2538ccd979bdSMark Fasheh int new_level, 2539ccd979bdSMark Fasheh int lvb) 2540ccd979bdSMark Fasheh { 2541ccd979bdSMark Fasheh int ret, dlm_flags = LKM_CONVERT; 2542ccd979bdSMark Fasheh enum dlm_status status; 2543ccd979bdSMark Fasheh 2544ccd979bdSMark Fasheh mlog_entry_void(); 2545ccd979bdSMark Fasheh 2546ccd979bdSMark Fasheh if (lvb) 2547ccd979bdSMark Fasheh dlm_flags |= LKM_VALBLK; 2548ccd979bdSMark Fasheh 2549ccd979bdSMark Fasheh status = dlmlock(osb->dlm, 2550ccd979bdSMark Fasheh new_level, 2551ccd979bdSMark Fasheh &lockres->l_lksb, 2552ccd979bdSMark Fasheh dlm_flags, 2553ccd979bdSMark Fasheh lockres->l_name, 2554f0681062SMark Fasheh OCFS2_LOCK_ID_MAX_LEN - 1, 2555e92d57dfSMark Fasheh ocfs2_locking_ast, 2556ccd979bdSMark Fasheh lockres, 2557aa2623adSMark Fasheh ocfs2_blocking_ast); 2558ccd979bdSMark Fasheh if (status != DLM_NORMAL) { 2559ccd979bdSMark Fasheh ocfs2_log_dlm_error("dlmlock", status, lockres); 2560ccd979bdSMark Fasheh ret = -EINVAL; 2561ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 1); 2562ccd979bdSMark Fasheh goto bail; 2563ccd979bdSMark Fasheh } 2564ccd979bdSMark Fasheh 2565ccd979bdSMark Fasheh ret = 0; 2566ccd979bdSMark Fasheh bail: 2567ccd979bdSMark Fasheh mlog_exit(ret); 2568ccd979bdSMark Fasheh return ret; 2569ccd979bdSMark Fasheh } 2570ccd979bdSMark Fasheh 2571ccd979bdSMark Fasheh /* returns 1 when the caller should unlock and call dlmunlock */ 2572ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, 2573ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2574ccd979bdSMark Fasheh { 2575ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 2576ccd979bdSMark Fasheh 2577ccd979bdSMark Fasheh mlog_entry_void(); 2578ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2579ccd979bdSMark Fasheh 2580ccd979bdSMark Fasheh if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { 2581ccd979bdSMark Fasheh /* If we're already trying to cancel a lock conversion 2582ccd979bdSMark Fasheh * then just drop the spinlock and allow the caller to 2583ccd979bdSMark Fasheh * requeue this lock. */ 2584ccd979bdSMark Fasheh 2585ccd979bdSMark Fasheh mlog(0, "Lockres %s, skip convert\n", lockres->l_name); 2586ccd979bdSMark Fasheh return 0; 2587ccd979bdSMark Fasheh } 2588ccd979bdSMark Fasheh 2589ccd979bdSMark Fasheh /* were we in a convert when we got the bast fire? */ 2590ccd979bdSMark Fasheh BUG_ON(lockres->l_action != OCFS2_AST_CONVERT && 2591ccd979bdSMark Fasheh lockres->l_action != OCFS2_AST_DOWNCONVERT); 2592ccd979bdSMark Fasheh /* set things up for the unlockast to know to just 2593ccd979bdSMark Fasheh * clear out the ast_action and unset busy, etc. */ 2594ccd979bdSMark Fasheh lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT; 2595ccd979bdSMark Fasheh 2596ccd979bdSMark Fasheh mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY), 2597ccd979bdSMark Fasheh "lock %s, invalid flags: 0x%lx\n", 2598ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2599ccd979bdSMark Fasheh 2600ccd979bdSMark Fasheh return 1; 2601ccd979bdSMark Fasheh } 2602ccd979bdSMark Fasheh 2603ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb, 2604ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2605ccd979bdSMark Fasheh { 2606ccd979bdSMark Fasheh int ret; 2607ccd979bdSMark Fasheh enum dlm_status status; 2608ccd979bdSMark Fasheh 2609ccd979bdSMark Fasheh mlog_entry_void(); 2610ccd979bdSMark Fasheh mlog(0, "lock %s\n", lockres->l_name); 2611ccd979bdSMark Fasheh 2612ccd979bdSMark Fasheh ret = 0; 2613ccd979bdSMark Fasheh status = dlmunlock(osb->dlm, 2614ccd979bdSMark Fasheh &lockres->l_lksb, 2615ccd979bdSMark Fasheh LKM_CANCEL, 26162a45f2d1SMark Fasheh ocfs2_unlock_ast, 2617ccd979bdSMark Fasheh lockres); 2618ccd979bdSMark Fasheh if (status != DLM_NORMAL) { 2619ccd979bdSMark Fasheh ocfs2_log_dlm_error("dlmunlock", status, lockres); 2620ccd979bdSMark Fasheh ret = -EINVAL; 2621ccd979bdSMark Fasheh ocfs2_recover_from_dlm_error(lockres, 0); 2622ccd979bdSMark Fasheh } 2623ccd979bdSMark Fasheh 2624ccd979bdSMark Fasheh mlog(0, "lock %s return from dlmunlock\n", lockres->l_name); 2625ccd979bdSMark Fasheh 2626ccd979bdSMark Fasheh mlog_exit(ret); 2627ccd979bdSMark Fasheh return ret; 2628ccd979bdSMark Fasheh } 2629ccd979bdSMark Fasheh 2630b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb, 2631ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres, 2632cc567d89SMark Fasheh struct ocfs2_unblock_ctl *ctl) 2633ccd979bdSMark Fasheh { 2634ccd979bdSMark Fasheh unsigned long flags; 2635ccd979bdSMark Fasheh int blocking; 2636ccd979bdSMark Fasheh int new_level; 2637ccd979bdSMark Fasheh int ret = 0; 26385ef0d4eaSMark Fasheh int set_lvb = 0; 2639ccd979bdSMark Fasheh 2640ccd979bdSMark Fasheh mlog_entry_void(); 2641ccd979bdSMark Fasheh 2642ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2643ccd979bdSMark Fasheh 2644ccd979bdSMark Fasheh BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); 2645ccd979bdSMark Fasheh 2646ccd979bdSMark Fasheh recheck: 2647ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_BUSY) { 2648d680efe9SMark Fasheh ctl->requeue = 1; 2649ccd979bdSMark Fasheh ret = ocfs2_prepare_cancel_convert(osb, lockres); 2650ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2651ccd979bdSMark Fasheh if (ret) { 2652ccd979bdSMark Fasheh ret = ocfs2_cancel_convert(osb, lockres); 2653ccd979bdSMark Fasheh if (ret < 0) 2654ccd979bdSMark Fasheh mlog_errno(ret); 2655ccd979bdSMark Fasheh } 2656ccd979bdSMark Fasheh goto leave; 2657ccd979bdSMark Fasheh } 2658ccd979bdSMark Fasheh 2659ccd979bdSMark Fasheh /* if we're blocking an exclusive and we have *any* holders, 2660ccd979bdSMark Fasheh * then requeue. */ 2661ccd979bdSMark Fasheh if ((lockres->l_blocking == LKM_EXMODE) 2662f7fbfdd1SMark Fasheh && (lockres->l_ex_holders || lockres->l_ro_holders)) 2663f7fbfdd1SMark Fasheh goto leave_requeue; 2664ccd979bdSMark Fasheh 2665ccd979bdSMark Fasheh /* If it's a PR we're blocking, then only 2666ccd979bdSMark Fasheh * requeue if we've got any EX holders */ 2667ccd979bdSMark Fasheh if (lockres->l_blocking == LKM_PRMODE && 2668f7fbfdd1SMark Fasheh lockres->l_ex_holders) 2669f7fbfdd1SMark Fasheh goto leave_requeue; 2670f7fbfdd1SMark Fasheh 2671f7fbfdd1SMark Fasheh /* 2672f7fbfdd1SMark Fasheh * Can we get a lock in this state if the holder counts are 2673f7fbfdd1SMark Fasheh * zero? The meta data unblock code used to check this. 2674f7fbfdd1SMark Fasheh */ 2675f7fbfdd1SMark Fasheh if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) 2676f7fbfdd1SMark Fasheh && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) 2677f7fbfdd1SMark Fasheh goto leave_requeue; 2678ccd979bdSMark Fasheh 267916d5b956SMark Fasheh new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 268016d5b956SMark Fasheh 268116d5b956SMark Fasheh if (lockres->l_ops->check_downconvert 268216d5b956SMark Fasheh && !lockres->l_ops->check_downconvert(lockres, new_level)) 268316d5b956SMark Fasheh goto leave_requeue; 268416d5b956SMark Fasheh 2685ccd979bdSMark Fasheh /* If we get here, then we know that there are no more 2686ccd979bdSMark Fasheh * incompatible holders (and anyone asking for an incompatible 2687ccd979bdSMark Fasheh * lock is blocked). We can now downconvert the lock */ 2688cc567d89SMark Fasheh if (!lockres->l_ops->downconvert_worker) 2689ccd979bdSMark Fasheh goto downconvert; 2690ccd979bdSMark Fasheh 2691ccd979bdSMark Fasheh /* Some lockres types want to do a bit of work before 2692ccd979bdSMark Fasheh * downconverting a lock. Allow that here. The worker function 2693ccd979bdSMark Fasheh * may sleep, so we save off a copy of what we're blocking as 2694ccd979bdSMark Fasheh * it may change while we're not holding the spin lock. */ 2695ccd979bdSMark Fasheh blocking = lockres->l_blocking; 2696ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2697ccd979bdSMark Fasheh 2698cc567d89SMark Fasheh ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking); 2699d680efe9SMark Fasheh 2700d680efe9SMark Fasheh if (ctl->unblock_action == UNBLOCK_STOP_POST) 2701d680efe9SMark Fasheh goto leave; 2702ccd979bdSMark Fasheh 2703ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2704ccd979bdSMark Fasheh if (blocking != lockres->l_blocking) { 2705ccd979bdSMark Fasheh /* If this changed underneath us, then we can't drop 2706ccd979bdSMark Fasheh * it just yet. */ 2707ccd979bdSMark Fasheh goto recheck; 2708ccd979bdSMark Fasheh } 2709ccd979bdSMark Fasheh 2710ccd979bdSMark Fasheh downconvert: 2711d680efe9SMark Fasheh ctl->requeue = 0; 2712ccd979bdSMark Fasheh 27135ef0d4eaSMark Fasheh if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { 27145ef0d4eaSMark Fasheh if (lockres->l_level == LKM_EXMODE) 27155ef0d4eaSMark Fasheh set_lvb = 1; 27165ef0d4eaSMark Fasheh 27175ef0d4eaSMark Fasheh /* 27185ef0d4eaSMark Fasheh * We only set the lvb if the lock has been fully 27195ef0d4eaSMark Fasheh * refreshed - otherwise we risk setting stale 27205ef0d4eaSMark Fasheh * data. Otherwise, there's no need to actually clear 27215ef0d4eaSMark Fasheh * out the lvb here as it's value is still valid. 27225ef0d4eaSMark Fasheh */ 27235ef0d4eaSMark Fasheh if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) 27245ef0d4eaSMark Fasheh lockres->l_ops->set_lvb(lockres); 27255ef0d4eaSMark Fasheh } 27265ef0d4eaSMark Fasheh 2727ccd979bdSMark Fasheh ocfs2_prepare_downconvert(lockres, new_level); 2728ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 27295ef0d4eaSMark Fasheh ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); 2730ccd979bdSMark Fasheh leave: 2731ccd979bdSMark Fasheh mlog_exit(ret); 2732ccd979bdSMark Fasheh return ret; 2733f7fbfdd1SMark Fasheh 2734f7fbfdd1SMark Fasheh leave_requeue: 2735f7fbfdd1SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2736f7fbfdd1SMark Fasheh ctl->requeue = 1; 2737f7fbfdd1SMark Fasheh 2738f7fbfdd1SMark Fasheh mlog_exit(0); 2739f7fbfdd1SMark Fasheh return 0; 2740ccd979bdSMark Fasheh } 2741ccd979bdSMark Fasheh 2742d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 2743ccd979bdSMark Fasheh int blocking) 2744ccd979bdSMark Fasheh { 2745ccd979bdSMark Fasheh struct inode *inode; 2746ccd979bdSMark Fasheh struct address_space *mapping; 2747ccd979bdSMark Fasheh 2748ccd979bdSMark Fasheh inode = ocfs2_lock_res_inode(lockres); 2749ccd979bdSMark Fasheh mapping = inode->i_mapping; 2750ccd979bdSMark Fasheh 2751f1f54068SMark Fasheh if (S_ISREG(inode->i_mode)) 2752f1f54068SMark Fasheh goto out; 2753f1f54068SMark Fasheh 27547f4a2a97SMark Fasheh /* 27557f4a2a97SMark Fasheh * We need this before the filemap_fdatawrite() so that it can 27567f4a2a97SMark Fasheh * transfer the dirty bit from the PTE to the 27577f4a2a97SMark Fasheh * page. Unfortunately this means that even for EX->PR 27587f4a2a97SMark Fasheh * downconverts, we'll lose our mappings and have to build 27597f4a2a97SMark Fasheh * them up again. 27607f4a2a97SMark Fasheh */ 27617f4a2a97SMark Fasheh unmap_mapping_range(mapping, 0, 0, 0); 27627f4a2a97SMark Fasheh 2763ccd979bdSMark Fasheh if (filemap_fdatawrite(mapping)) { 2764b0697053SMark Fasheh mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", 2765b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno); 2766ccd979bdSMark Fasheh } 2767ccd979bdSMark Fasheh sync_mapping_buffers(mapping); 2768ccd979bdSMark Fasheh if (blocking == LKM_EXMODE) { 2769ccd979bdSMark Fasheh truncate_inode_pages(mapping, 0); 2770ccd979bdSMark Fasheh } else { 2771ccd979bdSMark Fasheh /* We only need to wait on the I/O if we're not also 2772ccd979bdSMark Fasheh * truncating pages because truncate_inode_pages waits 2773ccd979bdSMark Fasheh * for us above. We don't truncate pages if we're 2774ccd979bdSMark Fasheh * blocking anything < EXMODE because we want to keep 2775ccd979bdSMark Fasheh * them around in that case. */ 2776ccd979bdSMark Fasheh filemap_fdatawait(mapping); 2777ccd979bdSMark Fasheh } 2778ccd979bdSMark Fasheh 2779f1f54068SMark Fasheh out: 2780d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 2781ccd979bdSMark Fasheh } 2782ccd979bdSMark Fasheh 2783810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 2784810d5aebSMark Fasheh int new_level) 2785810d5aebSMark Fasheh { 2786810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 2787810d5aebSMark Fasheh int checkpointed = ocfs2_inode_fully_checkpointed(inode); 2788810d5aebSMark Fasheh 2789810d5aebSMark Fasheh BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); 2790810d5aebSMark Fasheh BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); 2791810d5aebSMark Fasheh 2792810d5aebSMark Fasheh if (checkpointed) 2793810d5aebSMark Fasheh return 1; 2794810d5aebSMark Fasheh 2795810d5aebSMark Fasheh ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 2796810d5aebSMark Fasheh return 0; 2797810d5aebSMark Fasheh } 2798810d5aebSMark Fasheh 2799810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 2800810d5aebSMark Fasheh { 2801810d5aebSMark Fasheh struct inode *inode = ocfs2_lock_res_inode(lockres); 2802810d5aebSMark Fasheh 2803810d5aebSMark Fasheh __ocfs2_stuff_meta_lvb(inode); 2804810d5aebSMark Fasheh } 2805810d5aebSMark Fasheh 2806d680efe9SMark Fasheh /* 2807d680efe9SMark Fasheh * Does the final reference drop on our dentry lock. Right now this 280834d024f8SMark Fasheh * happens in the downconvert thread, but we could choose to simplify the 2809d680efe9SMark Fasheh * dlmglue API and push these off to the ocfs2_wq in the future. 2810d680efe9SMark Fasheh */ 2811d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 2812d680efe9SMark Fasheh struct ocfs2_lock_res *lockres) 2813d680efe9SMark Fasheh { 2814d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 2815d680efe9SMark Fasheh ocfs2_dentry_lock_put(osb, dl); 2816d680efe9SMark Fasheh } 2817d680efe9SMark Fasheh 2818d680efe9SMark Fasheh /* 2819d680efe9SMark Fasheh * d_delete() matching dentries before the lock downconvert. 2820d680efe9SMark Fasheh * 2821d680efe9SMark Fasheh * At this point, any process waiting to destroy the 2822d680efe9SMark Fasheh * dentry_lock due to last ref count is stopped by the 2823d680efe9SMark Fasheh * OCFS2_LOCK_QUEUED flag. 2824d680efe9SMark Fasheh * 2825d680efe9SMark Fasheh * We have two potential problems 2826d680efe9SMark Fasheh * 2827d680efe9SMark Fasheh * 1) If we do the last reference drop on our dentry_lock (via dput) 2828d680efe9SMark Fasheh * we'll wind up in ocfs2_release_dentry_lock(), waiting on 2829d680efe9SMark Fasheh * the downconvert to finish. Instead we take an elevated 2830d680efe9SMark Fasheh * reference and push the drop until after we've completed our 2831d680efe9SMark Fasheh * unblock processing. 2832d680efe9SMark Fasheh * 2833d680efe9SMark Fasheh * 2) There might be another process with a final reference, 2834d680efe9SMark Fasheh * waiting on us to finish processing. If this is the case, we 2835d680efe9SMark Fasheh * detect it and exit out - there's no more dentries anyway. 2836d680efe9SMark Fasheh */ 2837d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, 2838d680efe9SMark Fasheh int blocking) 2839d680efe9SMark Fasheh { 2840d680efe9SMark Fasheh struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); 2841d680efe9SMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); 2842d680efe9SMark Fasheh struct dentry *dentry; 2843d680efe9SMark Fasheh unsigned long flags; 2844d680efe9SMark Fasheh int extra_ref = 0; 2845d680efe9SMark Fasheh 2846d680efe9SMark Fasheh /* 2847d680efe9SMark Fasheh * This node is blocking another node from getting a read 2848d680efe9SMark Fasheh * lock. This happens when we've renamed within a 2849d680efe9SMark Fasheh * directory. We've forced the other nodes to d_delete(), but 2850d680efe9SMark Fasheh * we never actually dropped our lock because it's still 2851d680efe9SMark Fasheh * valid. The downconvert code will retain a PR for this node, 2852d680efe9SMark Fasheh * so there's no further work to do. 2853d680efe9SMark Fasheh */ 2854d680efe9SMark Fasheh if (blocking == LKM_PRMODE) 2855d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 2856d680efe9SMark Fasheh 2857d680efe9SMark Fasheh /* 2858d680efe9SMark Fasheh * Mark this inode as potentially orphaned. The code in 2859d680efe9SMark Fasheh * ocfs2_delete_inode() will figure out whether it actually 2860d680efe9SMark Fasheh * needs to be freed or not. 2861d680efe9SMark Fasheh */ 2862d680efe9SMark Fasheh spin_lock(&oi->ip_lock); 2863d680efe9SMark Fasheh oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 2864d680efe9SMark Fasheh spin_unlock(&oi->ip_lock); 2865d680efe9SMark Fasheh 2866d680efe9SMark Fasheh /* 2867d680efe9SMark Fasheh * Yuck. We need to make sure however that the check of 2868d680efe9SMark Fasheh * OCFS2_LOCK_FREEING and the extra reference are atomic with 2869d680efe9SMark Fasheh * respect to a reference decrement or the setting of that 2870d680efe9SMark Fasheh * flag. 2871d680efe9SMark Fasheh */ 2872d680efe9SMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2873d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 2874d680efe9SMark Fasheh if (!(lockres->l_flags & OCFS2_LOCK_FREEING) 2875d680efe9SMark Fasheh && dl->dl_count) { 2876d680efe9SMark Fasheh dl->dl_count++; 2877d680efe9SMark Fasheh extra_ref = 1; 2878d680efe9SMark Fasheh } 2879d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 2880d680efe9SMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2881d680efe9SMark Fasheh 2882d680efe9SMark Fasheh mlog(0, "extra_ref = %d\n", extra_ref); 2883d680efe9SMark Fasheh 2884d680efe9SMark Fasheh /* 2885d680efe9SMark Fasheh * We have a process waiting on us in ocfs2_dentry_iput(), 2886d680efe9SMark Fasheh * which means we can't have any more outstanding 2887d680efe9SMark Fasheh * aliases. There's no need to do any more work. 2888d680efe9SMark Fasheh */ 2889d680efe9SMark Fasheh if (!extra_ref) 2890d680efe9SMark Fasheh return UNBLOCK_CONTINUE; 2891d680efe9SMark Fasheh 2892d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 2893d680efe9SMark Fasheh while (1) { 2894d680efe9SMark Fasheh dentry = ocfs2_find_local_alias(dl->dl_inode, 2895d680efe9SMark Fasheh dl->dl_parent_blkno, 1); 2896d680efe9SMark Fasheh if (!dentry) 2897d680efe9SMark Fasheh break; 2898d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 2899d680efe9SMark Fasheh 2900d680efe9SMark Fasheh mlog(0, "d_delete(%.*s);\n", dentry->d_name.len, 2901d680efe9SMark Fasheh dentry->d_name.name); 2902d680efe9SMark Fasheh 2903d680efe9SMark Fasheh /* 2904d680efe9SMark Fasheh * The following dcache calls may do an 2905d680efe9SMark Fasheh * iput(). Normally we don't want that from the 2906d680efe9SMark Fasheh * downconverting thread, but in this case it's ok 2907d680efe9SMark Fasheh * because the requesting node already has an 2908d680efe9SMark Fasheh * exclusive lock on the inode, so it can't be queued 2909d680efe9SMark Fasheh * for a downconvert. 2910d680efe9SMark Fasheh */ 2911d680efe9SMark Fasheh d_delete(dentry); 2912d680efe9SMark Fasheh dput(dentry); 2913d680efe9SMark Fasheh 2914d680efe9SMark Fasheh spin_lock(&dentry_attach_lock); 2915d680efe9SMark Fasheh } 2916d680efe9SMark Fasheh spin_unlock(&dentry_attach_lock); 2917d680efe9SMark Fasheh 2918d680efe9SMark Fasheh /* 2919d680efe9SMark Fasheh * If we are the last holder of this dentry lock, there is no 2920d680efe9SMark Fasheh * reason to downconvert so skip straight to the unlock. 2921d680efe9SMark Fasheh */ 2922d680efe9SMark Fasheh if (dl->dl_count == 1) 2923d680efe9SMark Fasheh return UNBLOCK_STOP_POST; 2924d680efe9SMark Fasheh 2925d680efe9SMark Fasheh return UNBLOCK_CONTINUE_POST; 2926d680efe9SMark Fasheh } 2927d680efe9SMark Fasheh 2928ccd979bdSMark Fasheh void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 2929ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2930ccd979bdSMark Fasheh { 2931ccd979bdSMark Fasheh int status; 2932d680efe9SMark Fasheh struct ocfs2_unblock_ctl ctl = {0, 0,}; 2933ccd979bdSMark Fasheh unsigned long flags; 2934ccd979bdSMark Fasheh 2935ccd979bdSMark Fasheh /* Our reference to the lockres in this function can be 2936ccd979bdSMark Fasheh * considered valid until we remove the OCFS2_LOCK_QUEUED 2937ccd979bdSMark Fasheh * flag. */ 2938ccd979bdSMark Fasheh 2939ccd979bdSMark Fasheh mlog_entry_void(); 2940ccd979bdSMark Fasheh 2941ccd979bdSMark Fasheh BUG_ON(!lockres); 2942ccd979bdSMark Fasheh BUG_ON(!lockres->l_ops); 2943ccd979bdSMark Fasheh 2944ccd979bdSMark Fasheh mlog(0, "lockres %s blocked.\n", lockres->l_name); 2945ccd979bdSMark Fasheh 2946ccd979bdSMark Fasheh /* Detect whether a lock has been marked as going away while 294734d024f8SMark Fasheh * the downconvert thread was processing other things. A lock can 2948ccd979bdSMark Fasheh * still be marked with OCFS2_LOCK_FREEING after this check, 2949ccd979bdSMark Fasheh * but short circuiting here will still save us some 2950ccd979bdSMark Fasheh * performance. */ 2951ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2952ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) 2953ccd979bdSMark Fasheh goto unqueue; 2954ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2955ccd979bdSMark Fasheh 2956b5e500e2SMark Fasheh status = ocfs2_unblock_lock(osb, lockres, &ctl); 2957ccd979bdSMark Fasheh if (status < 0) 2958ccd979bdSMark Fasheh mlog_errno(status); 2959ccd979bdSMark Fasheh 2960ccd979bdSMark Fasheh spin_lock_irqsave(&lockres->l_lock, flags); 2961ccd979bdSMark Fasheh unqueue: 2962d680efe9SMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) { 2963ccd979bdSMark Fasheh lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 2964ccd979bdSMark Fasheh } else 2965ccd979bdSMark Fasheh ocfs2_schedule_blocked_lock(osb, lockres); 2966ccd979bdSMark Fasheh 2967ccd979bdSMark Fasheh mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 2968d680efe9SMark Fasheh ctl.requeue ? "yes" : "no"); 2969ccd979bdSMark Fasheh spin_unlock_irqrestore(&lockres->l_lock, flags); 2970ccd979bdSMark Fasheh 2971d680efe9SMark Fasheh if (ctl.unblock_action != UNBLOCK_CONTINUE 2972d680efe9SMark Fasheh && lockres->l_ops->post_unlock) 2973d680efe9SMark Fasheh lockres->l_ops->post_unlock(osb, lockres); 2974d680efe9SMark Fasheh 2975ccd979bdSMark Fasheh mlog_exit_void(); 2976ccd979bdSMark Fasheh } 2977ccd979bdSMark Fasheh 2978ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 2979ccd979bdSMark Fasheh struct ocfs2_lock_res *lockres) 2980ccd979bdSMark Fasheh { 2981ccd979bdSMark Fasheh mlog_entry_void(); 2982ccd979bdSMark Fasheh 2983ccd979bdSMark Fasheh assert_spin_locked(&lockres->l_lock); 2984ccd979bdSMark Fasheh 2985ccd979bdSMark Fasheh if (lockres->l_flags & OCFS2_LOCK_FREEING) { 2986ccd979bdSMark Fasheh /* Do not schedule a lock for downconvert when it's on 2987ccd979bdSMark Fasheh * the way to destruction - any nodes wanting access 2988ccd979bdSMark Fasheh * to the resource will get it soon. */ 2989ccd979bdSMark Fasheh mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n", 2990ccd979bdSMark Fasheh lockres->l_name, lockres->l_flags); 2991ccd979bdSMark Fasheh return; 2992ccd979bdSMark Fasheh } 2993ccd979bdSMark Fasheh 2994ccd979bdSMark Fasheh lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 2995ccd979bdSMark Fasheh 299634d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 2997ccd979bdSMark Fasheh if (list_empty(&lockres->l_blocked_list)) { 2998ccd979bdSMark Fasheh list_add_tail(&lockres->l_blocked_list, 2999ccd979bdSMark Fasheh &osb->blocked_lock_list); 3000ccd979bdSMark Fasheh osb->blocked_lock_count++; 3001ccd979bdSMark Fasheh } 300234d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 3003ccd979bdSMark Fasheh 3004ccd979bdSMark Fasheh mlog_exit_void(); 3005ccd979bdSMark Fasheh } 300634d024f8SMark Fasheh 300734d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 300834d024f8SMark Fasheh { 300934d024f8SMark Fasheh unsigned long processed; 301034d024f8SMark Fasheh struct ocfs2_lock_res *lockres; 301134d024f8SMark Fasheh 301234d024f8SMark Fasheh mlog_entry_void(); 301334d024f8SMark Fasheh 301434d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 301534d024f8SMark Fasheh /* grab this early so we know to try again if a state change and 301634d024f8SMark Fasheh * wake happens part-way through our work */ 301734d024f8SMark Fasheh osb->dc_work_sequence = osb->dc_wake_sequence; 301834d024f8SMark Fasheh 301934d024f8SMark Fasheh processed = osb->blocked_lock_count; 302034d024f8SMark Fasheh while (processed) { 302134d024f8SMark Fasheh BUG_ON(list_empty(&osb->blocked_lock_list)); 302234d024f8SMark Fasheh 302334d024f8SMark Fasheh lockres = list_entry(osb->blocked_lock_list.next, 302434d024f8SMark Fasheh struct ocfs2_lock_res, l_blocked_list); 302534d024f8SMark Fasheh list_del_init(&lockres->l_blocked_list); 302634d024f8SMark Fasheh osb->blocked_lock_count--; 302734d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 302834d024f8SMark Fasheh 302934d024f8SMark Fasheh BUG_ON(!processed); 303034d024f8SMark Fasheh processed--; 303134d024f8SMark Fasheh 303234d024f8SMark Fasheh ocfs2_process_blocked_lock(osb, lockres); 303334d024f8SMark Fasheh 303434d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 303534d024f8SMark Fasheh } 303634d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 303734d024f8SMark Fasheh 303834d024f8SMark Fasheh mlog_exit_void(); 303934d024f8SMark Fasheh } 304034d024f8SMark Fasheh 304134d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 304234d024f8SMark Fasheh { 304334d024f8SMark Fasheh int empty = 0; 304434d024f8SMark Fasheh 304534d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 304634d024f8SMark Fasheh if (list_empty(&osb->blocked_lock_list)) 304734d024f8SMark Fasheh empty = 1; 304834d024f8SMark Fasheh 304934d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 305034d024f8SMark Fasheh return empty; 305134d024f8SMark Fasheh } 305234d024f8SMark Fasheh 305334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 305434d024f8SMark Fasheh { 305534d024f8SMark Fasheh int should_wake = 0; 305634d024f8SMark Fasheh 305734d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 305834d024f8SMark Fasheh if (osb->dc_work_sequence != osb->dc_wake_sequence) 305934d024f8SMark Fasheh should_wake = 1; 306034d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 306134d024f8SMark Fasheh 306234d024f8SMark Fasheh return should_wake; 306334d024f8SMark Fasheh } 306434d024f8SMark Fasheh 306534d024f8SMark Fasheh int ocfs2_downconvert_thread(void *arg) 306634d024f8SMark Fasheh { 306734d024f8SMark Fasheh int status = 0; 306834d024f8SMark Fasheh struct ocfs2_super *osb = arg; 306934d024f8SMark Fasheh 307034d024f8SMark Fasheh /* only quit once we've been asked to stop and there is no more 307134d024f8SMark Fasheh * work available */ 307234d024f8SMark Fasheh while (!(kthread_should_stop() && 307334d024f8SMark Fasheh ocfs2_downconvert_thread_lists_empty(osb))) { 307434d024f8SMark Fasheh 307534d024f8SMark Fasheh wait_event_interruptible(osb->dc_event, 307634d024f8SMark Fasheh ocfs2_downconvert_thread_should_wake(osb) || 307734d024f8SMark Fasheh kthread_should_stop()); 307834d024f8SMark Fasheh 307934d024f8SMark Fasheh mlog(0, "downconvert_thread: awoken\n"); 308034d024f8SMark Fasheh 308134d024f8SMark Fasheh ocfs2_downconvert_thread_do_work(osb); 308234d024f8SMark Fasheh } 308334d024f8SMark Fasheh 308434d024f8SMark Fasheh osb->dc_task = NULL; 308534d024f8SMark Fasheh return status; 308634d024f8SMark Fasheh } 308734d024f8SMark Fasheh 308834d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 308934d024f8SMark Fasheh { 309034d024f8SMark Fasheh spin_lock(&osb->dc_task_lock); 309134d024f8SMark Fasheh /* make sure the voting thread gets a swipe at whatever changes 309234d024f8SMark Fasheh * the caller may have made to the voting state */ 309334d024f8SMark Fasheh osb->dc_wake_sequence++; 309434d024f8SMark Fasheh spin_unlock(&osb->dc_task_lock); 309534d024f8SMark Fasheh wake_up(&osb->dc_event); 309634d024f8SMark Fasheh } 3097