xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision 10fc3a18)
1328970deSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2fa60ce2cSMasahiro Yamada /*
3ccd979bdSMark Fasheh  * dlmglue.c
4ccd979bdSMark Fasheh  *
5ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
6ccd979bdSMark Fasheh  *
7ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
8ccd979bdSMark Fasheh  */
9ccd979bdSMark Fasheh 
10ccd979bdSMark Fasheh #include <linux/types.h>
11ccd979bdSMark Fasheh #include <linux/slab.h>
12ccd979bdSMark Fasheh #include <linux/highmem.h>
13ccd979bdSMark Fasheh #include <linux/mm.h>
14ccd979bdSMark Fasheh #include <linux/kthread.h>
15ccd979bdSMark Fasheh #include <linux/pagemap.h>
16ccd979bdSMark Fasheh #include <linux/debugfs.h>
17ccd979bdSMark Fasheh #include <linux/seq_file.h>
188ddb7b00SSunil Mushran #include <linux/time.h>
199673e005SGang He #include <linux/delay.h>
209e33d69fSJan Kara #include <linux/quotaops.h>
21174cd4b1SIngo Molnar #include <linux/sched/signal.h>
22ccd979bdSMark Fasheh 
23ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
24ccd979bdSMark Fasheh #include <cluster/masklog.h>
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include "ocfs2.h"
27d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
28ccd979bdSMark Fasheh 
29ccd979bdSMark Fasheh #include "alloc.h"
30d680efe9SMark Fasheh #include "dcache.h"
31ccd979bdSMark Fasheh #include "dlmglue.h"
32ccd979bdSMark Fasheh #include "extent_map.h"
337f1a37e3STiger Yang #include "file.h"
34ccd979bdSMark Fasheh #include "heartbeat.h"
35ccd979bdSMark Fasheh #include "inode.h"
36ccd979bdSMark Fasheh #include "journal.h"
3724ef1815SJoel Becker #include "stackglue.h"
38ccd979bdSMark Fasheh #include "slot_map.h"
39ccd979bdSMark Fasheh #include "super.h"
40ccd979bdSMark Fasheh #include "uptodate.h"
419e33d69fSJan Kara #include "quota.h"
428dec98edSTao Ma #include "refcounttree.h"
43b8a7a3a6SAndreas Gruenbacher #include "acl.h"
44ccd979bdSMark Fasheh 
45ccd979bdSMark Fasheh #include "buffer_head_io.h"
46ccd979bdSMark Fasheh 
47ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
48ccd979bdSMark Fasheh 	struct list_head	mw_item;
49ccd979bdSMark Fasheh 	int			mw_status;
50ccd979bdSMark Fasheh 	struct completion	mw_complete;
51ccd979bdSMark Fasheh 	unsigned long		mw_mask;
52ccd979bdSMark Fasheh 	unsigned long		mw_goal;
538ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
545bc970e8SSunil Mushran 	ktime_t			mw_lock_start;
558ddb7b00SSunil Mushran #endif
56ccd979bdSMark Fasheh };
57ccd979bdSMark Fasheh 
5854a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
5954a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
60cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
619e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
62ccd979bdSMark Fasheh 
63d680efe9SMark Fasheh /*
64cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
65d680efe9SMark Fasheh  *
66b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
67d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
68d680efe9SMark Fasheh  *
69d680efe9SMark Fasheh  */
70d680efe9SMark Fasheh enum ocfs2_unblock_action {
71d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
72d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
73d680efe9SMark Fasheh 				      * ->post_unlock callback */
74d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
75d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
76d680efe9SMark Fasheh };
77d680efe9SMark Fasheh 
78d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
79d680efe9SMark Fasheh 	int requeue;
80d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
81d680efe9SMark Fasheh };
82d680efe9SMark Fasheh 
83cb25797dSJan Kara /* Lockdep class keys */
841cff514aSzhong jiang #ifdef CONFIG_DEBUG_LOCK_ALLOC
85480bd564SColin Ian King static struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
861cff514aSzhong jiang #endif
87cb25797dSJan Kara 
88810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
89810d5aebSMark Fasheh 					int new_level);
90810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
91810d5aebSMark Fasheh 
92cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
93cc567d89SMark Fasheh 				     int blocking);
94cc567d89SMark Fasheh 
95cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
96cc567d89SMark Fasheh 				       int blocking);
97d680efe9SMark Fasheh 
98d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
99d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
100ccd979bdSMark Fasheh 
1019e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1026cb129f5SAdrian Bunk 
1038dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1048dec98edSTao Ma 					    int new_level);
1058dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1068dec98edSTao Ma 					 int blocking);
1078dec98edSTao Ma 
1086cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1096cb129f5SAdrian Bunk 
1106cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
ocfs2_dump_meta_lvb_info(u64 level,const char * function,unsigned int line,struct ocfs2_lock_res * lockres)1116cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1126cb129f5SAdrian Bunk 				     const char *function,
1136cb129f5SAdrian Bunk 				     unsigned int line,
1146cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1156cb129f5SAdrian Bunk {
116a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1176cb129f5SAdrian Bunk 
1186cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1196cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1206cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1216cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1226cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1236cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1246cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1256cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1266cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1276cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1286cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1296cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1306cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1316cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1326cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1336cb129f5SAdrian Bunk }
1346cb129f5SAdrian Bunk 
1356cb129f5SAdrian Bunk 
136f625c979SMark Fasheh /*
137f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
138f625c979SMark Fasheh  *
139f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1400d5dc6c2SMark Fasheh  *
1410d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1420d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1430d5dc6c2SMark Fasheh  *
1440d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1450d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1460d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1470d5dc6c2SMark Fasheh  * destruction time).
148f625c979SMark Fasheh  */
149ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
15054a7e755SMark Fasheh 	/*
15154a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
15254a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
15354a7e755SMark Fasheh 	 */
15454a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
155b5e500e2SMark Fasheh 
1560d5dc6c2SMark Fasheh 	/*
15734d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
15834d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
15934d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
16034d024f8SMark Fasheh 	 * memory, etc.
1610d5dc6c2SMark Fasheh 	 *
1620d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1630d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1640d5dc6c2SMark Fasheh 	 */
165d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
166f625c979SMark Fasheh 
167f625c979SMark Fasheh 	/*
16816d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
16916d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
17016d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
17116d5b956SMark Fasheh 	 *
17216d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
17316d5b956SMark Fasheh 	 * incompatible holders are sufficient.
17416d5b956SMark Fasheh 	 *
17516d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
17616d5b956SMark Fasheh 	 */
17716d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
17816d5b956SMark Fasheh 
17916d5b956SMark Fasheh 	/*
1805ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1815ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1825ef0d4eaSMark Fasheh 	 *
1835ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1845ef0d4eaSMark Fasheh 	 * in the flags field.
1855ef0d4eaSMark Fasheh 	 *
1865ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
1875ef0d4eaSMark Fasheh 	 */
1885ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
1895ef0d4eaSMark Fasheh 
1905ef0d4eaSMark Fasheh 	/*
191cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
192cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
193cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
194cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
195cc567d89SMark Fasheh 	 *
196cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
197cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
198cc567d89SMark Fasheh 	 */
199cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
200cc567d89SMark Fasheh 
201cc567d89SMark Fasheh 	/*
202f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
203f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
204f625c979SMark Fasheh 	 */
205f625c979SMark Fasheh 	int flags;
206ccd979bdSMark Fasheh };
207ccd979bdSMark Fasheh 
208f625c979SMark Fasheh /*
209f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
210f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
211f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
212f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
213f625c979SMark Fasheh  * expected that the locking wrapper will clear the
214f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
215f625c979SMark Fasheh  */
216f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
217f625c979SMark Fasheh 
218b80fc012SMark Fasheh /*
2195ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2205ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
221b80fc012SMark Fasheh  */
222b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
223b80fc012SMark Fasheh 
224ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
22554a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
226f625c979SMark Fasheh 	.flags		= 0,
227ccd979bdSMark Fasheh };
228ccd979bdSMark Fasheh 
229e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
23054a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
231810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
232810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
233f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
234b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
235ccd979bdSMark Fasheh };
236ccd979bdSMark Fasheh 
237ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
238f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
239ccd979bdSMark Fasheh };
240ccd979bdSMark Fasheh 
241ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
242f625c979SMark Fasheh 	.flags		= 0,
243ccd979bdSMark Fasheh };
244ccd979bdSMark Fasheh 
2456ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2466ca497a8Swengang wang 	.flags		= 0,
2476ca497a8Swengang wang };
2486ca497a8Swengang wang 
2494882abebSGang He static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
2504882abebSGang He 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
2514882abebSGang He };
2524882abebSGang He 
25383273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
25483273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
25583273932SSrinivas Eeda };
25683273932SSrinivas Eeda 
257d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
25854a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
259d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
260cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
261f625c979SMark Fasheh 	.flags		= 0,
262d680efe9SMark Fasheh };
263d680efe9SMark Fasheh 
26450008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
26550008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
26650008630STiger Yang 	.flags		= 0,
26750008630STiger Yang };
26850008630STiger Yang 
269cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
270cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
271cf8e06f1SMark Fasheh 	.flags		= 0,
272cf8e06f1SMark Fasheh };
273cf8e06f1SMark Fasheh 
2749e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2759e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2769e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2779e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2789e33d69fSJan Kara };
2799e33d69fSJan Kara 
2808dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2818dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2828dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2838dec98edSTao Ma 	.flags		= 0,
2848dec98edSTao Ma };
2858dec98edSTao Ma 
ocfs2_is_inode_lock(struct ocfs2_lock_res * lockres)286ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
287ccd979bdSMark Fasheh {
288ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
28950008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
29050008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
291ccd979bdSMark Fasheh }
292ccd979bdSMark Fasheh 
ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb * lksb)293c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
294a796d286SJoel Becker {
295a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
296a796d286SJoel Becker }
297a796d286SJoel Becker 
ocfs2_lock_res_inode(struct ocfs2_lock_res * lockres)298ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
299ccd979bdSMark Fasheh {
300ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
301ccd979bdSMark Fasheh 
302ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
303ccd979bdSMark Fasheh }
304ccd979bdSMark Fasheh 
ocfs2_lock_res_dl(struct ocfs2_lock_res * lockres)305d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
306d680efe9SMark Fasheh {
307d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
308d680efe9SMark Fasheh 
309d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
310d680efe9SMark Fasheh }
311d680efe9SMark Fasheh 
ocfs2_lock_res_qinfo(struct ocfs2_lock_res * lockres)3129e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3139e33d69fSJan Kara {
3149e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3159e33d69fSJan Kara 
3169e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3179e33d69fSJan Kara }
3189e33d69fSJan Kara 
3198dec98edSTao Ma static inline struct ocfs2_refcount_tree *
ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res * res)3208dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3218dec98edSTao Ma {
3228dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3238dec98edSTao Ma }
3248dec98edSTao Ma 
ocfs2_get_lockres_osb(struct ocfs2_lock_res * lockres)32554a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
32654a7e755SMark Fasheh {
32754a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
32854a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
32954a7e755SMark Fasheh 
33054a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
33154a7e755SMark Fasheh }
33254a7e755SMark Fasheh 
333ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
334ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
335ccd979bdSMark Fasheh 			     int level,
336bd3e7610SJoel Becker 			     u32 dlm_flags);
337ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
338ccd979bdSMark Fasheh 						     int wanted);
339cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
340ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
341cb25797dSJan Kara 				   int level, unsigned long caller_ip);
ocfs2_cluster_unlock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int level)342cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
343cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
344cb25797dSJan Kara 					int level)
345cb25797dSJan Kara {
346cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
347cb25797dSJan Kara }
348cb25797dSJan Kara 
349ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
350ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
351ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
352ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
353ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
354ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
355ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
356ccd979bdSMark Fasheh 						int convert);
3577431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
358c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3597431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3607431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
361c74ff8bbSSunil Mushran 	else										\
362c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
363c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
364c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
365ccd979bdSMark Fasheh } while (0)
36634d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
36734d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
368ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
369e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
370ccd979bdSMark Fasheh 				  struct buffer_head **bh);
371ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
372ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
373de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
374cf8e06f1SMark Fasheh 					      int new_level);
375cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
376cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
377cf8e06f1SMark Fasheh 				  int new_level,
378de551246SJoel Becker 				  int lvb,
379de551246SJoel Becker 				  unsigned int generation);
380cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
381cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
382cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
383cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
384cf8e06f1SMark Fasheh 
385ccd979bdSMark Fasheh 
ocfs2_build_lock_name(enum ocfs2_lock_type type,u64 blkno,u32 generation,char * name)386ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
387ccd979bdSMark Fasheh 				  u64 blkno,
388ccd979bdSMark Fasheh 				  u32 generation,
389ccd979bdSMark Fasheh 				  char *name)
390ccd979bdSMark Fasheh {
391ccd979bdSMark Fasheh 	int len;
392ccd979bdSMark Fasheh 
393ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
394ccd979bdSMark Fasheh 
395b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
396b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
397b0697053SMark Fasheh 		       (long long)blkno, generation);
398ccd979bdSMark Fasheh 
399ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
400ccd979bdSMark Fasheh 
401ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
402ccd979bdSMark Fasheh }
403ccd979bdSMark Fasheh 
40434af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
405ccd979bdSMark Fasheh 
ocfs2_add_lockres_tracking(struct ocfs2_lock_res * res,struct ocfs2_dlm_debug * dlm_debug)406ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
407ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
408ccd979bdSMark Fasheh {
409ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
410ccd979bdSMark Fasheh 
411ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
412ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
413ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
414ccd979bdSMark Fasheh }
415ccd979bdSMark Fasheh 
ocfs2_remove_lockres_tracking(struct ocfs2_lock_res * res)416ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
417ccd979bdSMark Fasheh {
418ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
419ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
420ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
421ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
422ccd979bdSMark Fasheh }
423ccd979bdSMark Fasheh 
4248ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
ocfs2_init_lock_stats(struct ocfs2_lock_res * res)4258ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4268ddb7b00SSunil Mushran {
4278ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4285da844a2SGang He 	res->l_lock_wait = 0;
4295bc970e8SSunil Mushran 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4305bc970e8SSunil Mushran 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4318ddb7b00SSunil Mushran }
4328ddb7b00SSunil Mushran 
ocfs2_update_lock_stats(struct ocfs2_lock_res * res,int level,struct ocfs2_mask_waiter * mw,int ret)4338ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4348ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4358ddb7b00SSunil Mushran {
4365bc970e8SSunil Mushran 	u32 usec;
4375bc970e8SSunil Mushran 	ktime_t kt;
4385bc970e8SSunil Mushran 	struct ocfs2_lock_stats *stats;
4398ddb7b00SSunil Mushran 
4405bc970e8SSunil Mushran 	if (level == LKM_PRMODE)
4415bc970e8SSunil Mushran 		stats = &res->l_lock_prmode;
4425bc970e8SSunil Mushran 	else if (level == LKM_EXMODE)
4435bc970e8SSunil Mushran 		stats = &res->l_lock_exmode;
4445bc970e8SSunil Mushran 	else
4458ddb7b00SSunil Mushran 		return;
4468ddb7b00SSunil Mushran 
4475bc970e8SSunil Mushran 	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4485bc970e8SSunil Mushran 	usec = ktime_to_us(kt);
4495bc970e8SSunil Mushran 
4505bc970e8SSunil Mushran 	stats->ls_gets++;
4515bc970e8SSunil Mushran 	stats->ls_total += ktime_to_ns(kt);
4525bc970e8SSunil Mushran 	/* overflow */
45316865b7cSroel 	if (unlikely(stats->ls_gets == 0)) {
4545bc970e8SSunil Mushran 		stats->ls_gets++;
4555bc970e8SSunil Mushran 		stats->ls_total = ktime_to_ns(kt);
4565bc970e8SSunil Mushran 	}
4575bc970e8SSunil Mushran 
4585bc970e8SSunil Mushran 	if (stats->ls_max < usec)
4595bc970e8SSunil Mushran 		stats->ls_max = usec;
4605bc970e8SSunil Mushran 
4618ddb7b00SSunil Mushran 	if (ret)
4625bc970e8SSunil Mushran 		stats->ls_fail++;
4638a7f5f4cSGang He 
4648a7f5f4cSGang He 	stats->ls_last = ktime_to_us(ktime_get_real());
4658ddb7b00SSunil Mushran }
4668ddb7b00SSunil Mushran 
ocfs2_track_lock_refresh(struct ocfs2_lock_res * lockres)4678ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4688ddb7b00SSunil Mushran {
4698ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4708ddb7b00SSunil Mushran }
4718ddb7b00SSunil Mushran 
ocfs2_track_lock_wait(struct ocfs2_lock_res * lockres)4725da844a2SGang He static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
4735da844a2SGang He {
4745da844a2SGang He 	struct ocfs2_mask_waiter *mw;
4755da844a2SGang He 
4765da844a2SGang He 	if (list_empty(&lockres->l_mask_waiters)) {
4775da844a2SGang He 		lockres->l_lock_wait = 0;
4785da844a2SGang He 		return;
4795da844a2SGang He 	}
4805da844a2SGang He 
4815da844a2SGang He 	mw = list_first_entry(&lockres->l_mask_waiters,
4825da844a2SGang He 				struct ocfs2_mask_waiter, mw_item);
4835da844a2SGang He 	lockres->l_lock_wait =
4845da844a2SGang He 			ktime_to_us(ktime_mono_to_real(mw->mw_lock_start));
4855da844a2SGang He }
4865da844a2SGang He 
ocfs2_init_start_time(struct ocfs2_mask_waiter * mw)4878ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4888ddb7b00SSunil Mushran {
4895bc970e8SSunil Mushran 	mw->mw_lock_start = ktime_get();
4908ddb7b00SSunil Mushran }
4918ddb7b00SSunil Mushran #else
ocfs2_init_lock_stats(struct ocfs2_lock_res * res)4928ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4938ddb7b00SSunil Mushran {
4948ddb7b00SSunil Mushran }
ocfs2_update_lock_stats(struct ocfs2_lock_res * res,int level,struct ocfs2_mask_waiter * mw,int ret)4958ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4968ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4978ddb7b00SSunil Mushran {
4988ddb7b00SSunil Mushran }
ocfs2_track_lock_refresh(struct ocfs2_lock_res * lockres)4998ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
5008ddb7b00SSunil Mushran {
5018ddb7b00SSunil Mushran }
ocfs2_track_lock_wait(struct ocfs2_lock_res * lockres)5025da844a2SGang He static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
5035da844a2SGang He {
5045da844a2SGang He }
ocfs2_init_start_time(struct ocfs2_mask_waiter * mw)5058ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
5068ddb7b00SSunil Mushran {
5078ddb7b00SSunil Mushran }
5088ddb7b00SSunil Mushran #endif
5098ddb7b00SSunil Mushran 
ocfs2_lock_res_init_common(struct ocfs2_super * osb,struct ocfs2_lock_res * res,enum ocfs2_lock_type type,struct ocfs2_lock_res_ops * ops,void * priv)510ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
511ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
512ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
513ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
514ccd979bdSMark Fasheh 				       void *priv)
515ccd979bdSMark Fasheh {
516ccd979bdSMark Fasheh 	res->l_type          = type;
517ccd979bdSMark Fasheh 	res->l_ops           = ops;
518ccd979bdSMark Fasheh 	res->l_priv          = priv;
519ccd979bdSMark Fasheh 
520bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
521bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
522bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
523ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
524ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
525ccd979bdSMark Fasheh 
526ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
527ccd979bdSMark Fasheh 
528ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5298ddb7b00SSunil Mushran 
5308ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
531cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
532cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
533cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
534cb25797dSJan Kara 				 &lockdep_keys[type], 0);
535cb25797dSJan Kara 	else
536cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
537cb25797dSJan Kara #endif
538ccd979bdSMark Fasheh }
539ccd979bdSMark Fasheh 
ocfs2_lock_res_init_once(struct ocfs2_lock_res * res)540ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
541ccd979bdSMark Fasheh {
542ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
543ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
544ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
545ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
546ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
547ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
548439a36b8SEric Ren 	INIT_LIST_HEAD(&res->l_holders);
549ccd979bdSMark Fasheh }
550ccd979bdSMark Fasheh 
ocfs2_inode_lock_res_init(struct ocfs2_lock_res * res,enum ocfs2_lock_type type,unsigned int generation,struct inode * inode)551ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
552ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
55324c19ef4SMark Fasheh 			       unsigned int generation,
554ccd979bdSMark Fasheh 			       struct inode *inode)
555ccd979bdSMark Fasheh {
556ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
557ccd979bdSMark Fasheh 
558ccd979bdSMark Fasheh 	switch(type) {
559ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
560ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
561ccd979bdSMark Fasheh 			break;
562ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
563e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
564ccd979bdSMark Fasheh 			break;
56550008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
56650008630STiger Yang 			ops = &ocfs2_inode_open_lops;
56750008630STiger Yang 			break;
568ccd979bdSMark Fasheh 		default:
569ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
570ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
571ccd979bdSMark Fasheh 			break;
5725b43d645Szhengbin 	}
573ccd979bdSMark Fasheh 
574d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
57524c19ef4SMark Fasheh 			      generation, res->l_name);
576d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
577d680efe9SMark Fasheh }
578d680efe9SMark Fasheh 
ocfs2_get_inode_osb(struct ocfs2_lock_res * lockres)57954a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
58054a7e755SMark Fasheh {
58154a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
58254a7e755SMark Fasheh 
58354a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
58454a7e755SMark Fasheh }
58554a7e755SMark Fasheh 
ocfs2_get_qinfo_osb(struct ocfs2_lock_res * lockres)5869e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5879e33d69fSJan Kara {
5889e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5899e33d69fSJan Kara 
5909e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5919e33d69fSJan Kara }
5929e33d69fSJan Kara 
ocfs2_get_file_osb(struct ocfs2_lock_res * lockres)593cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
594cf8e06f1SMark Fasheh {
595cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
596cf8e06f1SMark Fasheh 
597cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
598cf8e06f1SMark Fasheh }
599cf8e06f1SMark Fasheh 
ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res * lockres)600d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
601d680efe9SMark Fasheh {
602d680efe9SMark Fasheh 	__be64 inode_blkno_be;
603d680efe9SMark Fasheh 
604d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
605d680efe9SMark Fasheh 	       sizeof(__be64));
606d680efe9SMark Fasheh 
607d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
608d680efe9SMark Fasheh }
609d680efe9SMark Fasheh 
ocfs2_get_dentry_osb(struct ocfs2_lock_res * lockres)61054a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
61154a7e755SMark Fasheh {
61254a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
61354a7e755SMark Fasheh 
61454a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
61554a7e755SMark Fasheh }
61654a7e755SMark Fasheh 
ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock * dl,u64 parent,struct inode * inode)617d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
618d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
619d680efe9SMark Fasheh {
620d680efe9SMark Fasheh 	int len;
621d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
622d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
623d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
624d680efe9SMark Fasheh 
625d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
626d680efe9SMark Fasheh 
627d680efe9SMark Fasheh 	/*
628d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
629d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
630d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
631d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
632d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
633d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
634d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
635d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
636d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
637d680efe9SMark Fasheh 	 *
638d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
639d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
640d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
641d680efe9SMark Fasheh 	 */
642d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
643d680efe9SMark Fasheh 		       "%c%016llx",
644d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
645d680efe9SMark Fasheh 		       (long long)parent);
646d680efe9SMark Fasheh 
647d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
648d680efe9SMark Fasheh 
649d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
650d680efe9SMark Fasheh 	       sizeof(__be64));
651d680efe9SMark Fasheh 
652d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
653d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
654d680efe9SMark Fasheh 				   dl);
655ccd979bdSMark Fasheh }
656ccd979bdSMark Fasheh 
ocfs2_super_lock_res_init(struct ocfs2_lock_res * res,struct ocfs2_super * osb)657ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
658ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
659ccd979bdSMark Fasheh {
660ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
661ccd979bdSMark Fasheh 	 * once on it manually.  */
662ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
663d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
664d680efe9SMark Fasheh 			      0, res->l_name);
665ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
666ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
667ccd979bdSMark Fasheh }
668ccd979bdSMark Fasheh 
ocfs2_rename_lock_res_init(struct ocfs2_lock_res * res,struct ocfs2_super * osb)669ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
670ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
671ccd979bdSMark Fasheh {
672ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
673ccd979bdSMark Fasheh 	 * once on it manually.  */
674ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
675d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
676d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
677ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
678ccd979bdSMark Fasheh }
679ccd979bdSMark Fasheh 
ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res * res,struct ocfs2_super * osb)6806ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6816ca497a8Swengang wang 					 struct ocfs2_super *osb)
6826ca497a8Swengang wang {
6836ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6846ca497a8Swengang wang 	 * once on it manually.  */
6856ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6866ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6876ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6886ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6896ca497a8Swengang wang }
6906ca497a8Swengang wang 
ocfs2_nfs_sync_lock_init(struct ocfs2_super * osb)6914cd9973fSJunxiao Bi static void ocfs2_nfs_sync_lock_init(struct ocfs2_super *osb)
6924cd9973fSJunxiao Bi {
6934cd9973fSJunxiao Bi 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
6944cd9973fSJunxiao Bi 	init_rwsem(&osb->nfs_sync_rwlock);
6954cd9973fSJunxiao Bi }
6964cd9973fSJunxiao Bi 
ocfs2_trim_fs_lock_res_init(struct ocfs2_super * osb)6974882abebSGang He void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
6984882abebSGang He {
6994882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
7004882abebSGang He 
7015500ab4eSGang He 	/* Only one trimfs thread are allowed to work at the same time. */
7025500ab4eSGang He 	mutex_lock(&osb->obs_trim_fs_mutex);
7035500ab4eSGang He 
7044882abebSGang He 	ocfs2_lock_res_init_once(lockres);
7054882abebSGang He 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name);
7064882abebSGang He 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS,
7074882abebSGang He 				   &ocfs2_trim_fs_lops, osb);
7084882abebSGang He }
7094882abebSGang He 
ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super * osb)7104882abebSGang He void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb)
7114882abebSGang He {
7124882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
7134882abebSGang He 
7144882abebSGang He 	ocfs2_simple_drop_lockres(osb, lockres);
7154882abebSGang He 	ocfs2_lock_res_free(lockres);
7165500ab4eSGang He 
7175500ab4eSGang He 	mutex_unlock(&osb->obs_trim_fs_mutex);
7184882abebSGang He }
7194882abebSGang He 
ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res * res,struct ocfs2_super * osb)72083273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
72183273932SSrinivas Eeda 					    struct ocfs2_super *osb)
72283273932SSrinivas Eeda {
72383273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
72483273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
72583273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
72683273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
72783273932SSrinivas Eeda }
72883273932SSrinivas Eeda 
ocfs2_file_lock_res_init(struct ocfs2_lock_res * lockres,struct ocfs2_file_private * fp)729cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
730cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
731cf8e06f1SMark Fasheh {
732cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
733cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
734cf8e06f1SMark Fasheh 
735cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
736cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
737cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
738cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
739cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
740cf8e06f1SMark Fasheh 				   fp);
741cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
742cf8e06f1SMark Fasheh }
743cf8e06f1SMark Fasheh 
ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res * lockres,struct ocfs2_mem_dqinfo * info)7449e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7459e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7469e33d69fSJan Kara {
7479e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7489e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7499e33d69fSJan Kara 			      0, lockres->l_name);
7509e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7519e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7529e33d69fSJan Kara 				   info);
7539e33d69fSJan Kara }
7549e33d69fSJan Kara 
ocfs2_refcount_lock_res_init(struct ocfs2_lock_res * lockres,struct ocfs2_super * osb,u64 ref_blkno,unsigned int generation)7558dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7568dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7578dec98edSTao Ma 				  unsigned int generation)
7588dec98edSTao Ma {
7598dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7608dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7618dec98edSTao Ma 			      generation, lockres->l_name);
7628dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7638dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7648dec98edSTao Ma }
7658dec98edSTao Ma 
ocfs2_lock_res_free(struct ocfs2_lock_res * res)766ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
767ccd979bdSMark Fasheh {
768ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
769ccd979bdSMark Fasheh 		return;
770ccd979bdSMark Fasheh 
771ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
772ccd979bdSMark Fasheh 
773ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
774ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
775ccd979bdSMark Fasheh 			res->l_name);
776ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
777ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
778ccd979bdSMark Fasheh 			res->l_name);
779ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
780ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
781ccd979bdSMark Fasheh 			res->l_name);
782ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
783ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
784ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
785ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
786ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
787ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
788ccd979bdSMark Fasheh 
789ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
790ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
791ccd979bdSMark Fasheh 
792ccd979bdSMark Fasheh 	res->l_flags = 0UL;
793ccd979bdSMark Fasheh }
794ccd979bdSMark Fasheh 
795439a36b8SEric Ren /*
796439a36b8SEric Ren  * Keep a list of processes who have interest in a lockres.
797439a36b8SEric Ren  * Note: this is now only uesed for check recursive cluster locking.
798439a36b8SEric Ren  */
ocfs2_add_holder(struct ocfs2_lock_res * lockres,struct ocfs2_lock_holder * oh)799439a36b8SEric Ren static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
800439a36b8SEric Ren 				   struct ocfs2_lock_holder *oh)
801439a36b8SEric Ren {
802439a36b8SEric Ren 	INIT_LIST_HEAD(&oh->oh_list);
803439a36b8SEric Ren 	oh->oh_owner_pid = get_pid(task_pid(current));
804439a36b8SEric Ren 
805439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
806439a36b8SEric Ren 	list_add_tail(&oh->oh_list, &lockres->l_holders);
807439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
808439a36b8SEric Ren }
809439a36b8SEric Ren 
810133b81f2SLarry Chen static struct ocfs2_lock_holder *
ocfs2_pid_holder(struct ocfs2_lock_res * lockres,struct pid * pid)811133b81f2SLarry Chen ocfs2_pid_holder(struct ocfs2_lock_res *lockres,
812133b81f2SLarry Chen 		struct pid *pid)
813133b81f2SLarry Chen {
814133b81f2SLarry Chen 	struct ocfs2_lock_holder *oh;
815133b81f2SLarry Chen 
816133b81f2SLarry Chen 	spin_lock(&lockres->l_lock);
817133b81f2SLarry Chen 	list_for_each_entry(oh, &lockres->l_holders, oh_list) {
818133b81f2SLarry Chen 		if (oh->oh_owner_pid == pid) {
819133b81f2SLarry Chen 			spin_unlock(&lockres->l_lock);
820133b81f2SLarry Chen 			return oh;
821133b81f2SLarry Chen 		}
822133b81f2SLarry Chen 	}
823133b81f2SLarry Chen 	spin_unlock(&lockres->l_lock);
824133b81f2SLarry Chen 	return NULL;
825133b81f2SLarry Chen }
826133b81f2SLarry Chen 
ocfs2_remove_holder(struct ocfs2_lock_res * lockres,struct ocfs2_lock_holder * oh)827439a36b8SEric Ren static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
828439a36b8SEric Ren 				       struct ocfs2_lock_holder *oh)
829439a36b8SEric Ren {
830439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
831439a36b8SEric Ren 	list_del(&oh->oh_list);
832439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
833439a36b8SEric Ren 
834439a36b8SEric Ren 	put_pid(oh->oh_owner_pid);
835439a36b8SEric Ren }
836439a36b8SEric Ren 
837439a36b8SEric Ren 
ocfs2_inc_holders(struct ocfs2_lock_res * lockres,int level)838ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
839ccd979bdSMark Fasheh 				     int level)
840ccd979bdSMark Fasheh {
841ccd979bdSMark Fasheh 	BUG_ON(!lockres);
842ccd979bdSMark Fasheh 
843ccd979bdSMark Fasheh 	switch(level) {
844bd3e7610SJoel Becker 	case DLM_LOCK_EX:
845ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
846ccd979bdSMark Fasheh 		break;
847bd3e7610SJoel Becker 	case DLM_LOCK_PR:
848ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
849ccd979bdSMark Fasheh 		break;
850ccd979bdSMark Fasheh 	default:
851ccd979bdSMark Fasheh 		BUG();
852ccd979bdSMark Fasheh 	}
853ccd979bdSMark Fasheh }
854ccd979bdSMark Fasheh 
ocfs2_dec_holders(struct ocfs2_lock_res * lockres,int level)855ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
856ccd979bdSMark Fasheh 				     int level)
857ccd979bdSMark Fasheh {
858ccd979bdSMark Fasheh 	BUG_ON(!lockres);
859ccd979bdSMark Fasheh 
860ccd979bdSMark Fasheh 	switch(level) {
861bd3e7610SJoel Becker 	case DLM_LOCK_EX:
862ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
863ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
864ccd979bdSMark Fasheh 		break;
865bd3e7610SJoel Becker 	case DLM_LOCK_PR:
866ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
867ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
868ccd979bdSMark Fasheh 		break;
869ccd979bdSMark Fasheh 	default:
870ccd979bdSMark Fasheh 		BUG();
871ccd979bdSMark Fasheh 	}
872ccd979bdSMark Fasheh }
873ccd979bdSMark Fasheh 
874ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
875ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
876ccd979bdSMark Fasheh  * lock types are added. */
ocfs2_highest_compat_lock_level(int level)877ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
878ccd979bdSMark Fasheh {
879bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
880ccd979bdSMark Fasheh 
881bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
882bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
883bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
884bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
885ccd979bdSMark Fasheh 	return new_level;
886ccd979bdSMark Fasheh }
887ccd979bdSMark Fasheh 
lockres_set_flags(struct ocfs2_lock_res * lockres,unsigned long newflags)888ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
889ccd979bdSMark Fasheh 			      unsigned long newflags)
890ccd979bdSMark Fasheh {
891800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
892ccd979bdSMark Fasheh 
893ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
894ccd979bdSMark Fasheh 
895ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
896ccd979bdSMark Fasheh 
897800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
898ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
899ccd979bdSMark Fasheh 			continue;
900ccd979bdSMark Fasheh 
901ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
902ccd979bdSMark Fasheh 		mw->mw_status = 0;
903ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
9045da844a2SGang He 		ocfs2_track_lock_wait(lockres);
905ccd979bdSMark Fasheh 	}
906ccd979bdSMark Fasheh }
lockres_or_flags(struct ocfs2_lock_res * lockres,unsigned long or)907ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
908ccd979bdSMark Fasheh {
909ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
910ccd979bdSMark Fasheh }
lockres_clear_flags(struct ocfs2_lock_res * lockres,unsigned long clear)911ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
912ccd979bdSMark Fasheh 				unsigned long clear)
913ccd979bdSMark Fasheh {
914ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
915ccd979bdSMark Fasheh }
916ccd979bdSMark Fasheh 
ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res * lockres)917ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
918ccd979bdSMark Fasheh {
919ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
920ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
921ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
922bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
923ccd979bdSMark Fasheh 
924ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
925ccd979bdSMark Fasheh 	if (lockres->l_level <=
926ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
927bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
928ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
929ccd979bdSMark Fasheh 	}
930ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
931ccd979bdSMark Fasheh }
932ccd979bdSMark Fasheh 
ocfs2_generic_handle_convert_action(struct ocfs2_lock_res * lockres)933ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
934ccd979bdSMark Fasheh {
935ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
936ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
937ccd979bdSMark Fasheh 
938ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
939ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
940ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
941ccd979bdSMark Fasheh 	 * update */
942bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
943f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
944ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
945ccd979bdSMark Fasheh 
946ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
947a1912826SSunil Mushran 
948a1912826SSunil Mushran 	/*
949a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
950a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
951a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
952d1e78238SXue jiufei 	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
953d1e78238SXue jiufei 	 * had already returned.
954a1912826SSunil Mushran 	 */
955d1e78238SXue jiufei 	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
956a1912826SSunil Mushran 		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
957d1e78238SXue jiufei 	else
958d1e78238SXue jiufei 		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
959a1912826SSunil Mushran 
960ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
961ccd979bdSMark Fasheh }
962ccd979bdSMark Fasheh 
ocfs2_generic_handle_attach_action(struct ocfs2_lock_res * lockres)963ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
964ccd979bdSMark Fasheh {
9653cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
966ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
967ccd979bdSMark Fasheh 
968bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
969f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
970f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
971ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
972ccd979bdSMark Fasheh 
973ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
974ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
975ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
976ccd979bdSMark Fasheh }
977ccd979bdSMark Fasheh 
ocfs2_generic_handle_bast(struct ocfs2_lock_res * lockres,int level)978ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
979ccd979bdSMark Fasheh 				     int level)
980ccd979bdSMark Fasheh {
981ccd979bdSMark Fasheh 	int needs_downconvert = 0;
982ccd979bdSMark Fasheh 
983ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
984ccd979bdSMark Fasheh 
985ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
986ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
987ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
988ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
989ccd979bdSMark Fasheh 		 * duplicate BASTs */
990ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
991ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
992ccd979bdSMark Fasheh 			needs_downconvert = 1;
993ccd979bdSMark Fasheh 
994ccd979bdSMark Fasheh 		lockres->l_blocking = level;
995ccd979bdSMark Fasheh 	}
996ccd979bdSMark Fasheh 
9979b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9989b915181SSunil Mushran 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
9999b915181SSunil Mushran 	     needs_downconvert);
10009b915181SSunil Mushran 
10010b94a909SWengang Wang 	if (needs_downconvert)
10020b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1003c1e8d35eSTao Ma 	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
1004ccd979bdSMark Fasheh 	return needs_downconvert;
1005ccd979bdSMark Fasheh }
1006ccd979bdSMark Fasheh 
1007de551246SJoel Becker /*
1008de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
1009de551246SJoel Becker  *
1010de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
1011de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
1012de551246SJoel Becker  * for more details on the race.
1013de551246SJoel Becker  *
1014de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
1015de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
1016de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
1017de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
1018de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
1019de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
1020de551246SJoel Becker  * nothing.
1021de551246SJoel Becker  *
1022de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
1023de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
1024de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
1025de551246SJoel Becker  * window.
1026de551246SJoel Becker  *
1027de551246SJoel Becker  * [Example]
1028de551246SJoel Becker  *
1029de551246SJoel Becker  * ocfs2_meta_lock()
1030de551246SJoel Becker  *  ocfs2_cluster_lock()
1031de551246SJoel Becker  *   set BUSY
1032de551246SJoel Becker  *   set PENDING
1033de551246SJoel Becker  *   drop l_lock
1034de551246SJoel Becker  *   ocfs2_dlm_lock()
1035de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
1036de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
1037de551246SJoel Becker  *					  take_l_lock
1038de551246SJoel Becker  *					  !BUSY
1039de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
1040de551246SJoel Becker  *					   set BUSY
1041de551246SJoel Becker  *					   set PENDING
1042de551246SJoel Becker  *					  drop l_lock
1043de551246SJoel Becker  *   take l_lock
1044de551246SJoel Becker  *   clear PENDING
1045de551246SJoel Becker  *   drop l_lock
1046de551246SJoel Becker  *			<window>
1047de551246SJoel Becker  *					  ocfs2_dlm_lock()
1048de551246SJoel Becker  *
1049de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
1050de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
1051de551246SJoel Becker  *
1052de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
1053de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
1054de551246SJoel Becker  *
1055de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
1056de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
1057de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
1058de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
1059de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
1060de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
1061de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
1062de551246SJoel Becker  * ocfs2_prepare_downconvert().
1063de551246SJoel Becker  */
1064de551246SJoel Becker 
1065de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
__lockres_clear_pending(struct ocfs2_lock_res * lockres,unsigned int generation,struct ocfs2_super * osb)1066de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
1067de551246SJoel Becker 				    unsigned int generation,
1068de551246SJoel Becker 				    struct ocfs2_super *osb)
1069de551246SJoel Becker {
1070de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1071de551246SJoel Becker 
1072de551246SJoel Becker 	/*
1073de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
1074de551246SJoel Becker 	 * will clear pending, the loser will not.
1075de551246SJoel Becker 	 */
1076de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
1077de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
1078de551246SJoel Becker 		return;
1079de551246SJoel Becker 
1080de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1081de551246SJoel Becker 	lockres->l_pending_gen++;
1082de551246SJoel Becker 
1083de551246SJoel Becker 	/*
1084de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
1085de551246SJoel Becker 	 * were PENDING.  Wake it up.
1086de551246SJoel Becker 	 */
1087de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1088de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
1089de551246SJoel Becker }
1090de551246SJoel Becker 
1091de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
lockres_clear_pending(struct ocfs2_lock_res * lockres,unsigned int generation,struct ocfs2_super * osb)1092de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1093de551246SJoel Becker 				  unsigned int generation,
1094de551246SJoel Becker 				  struct ocfs2_super *osb)
1095de551246SJoel Becker {
1096de551246SJoel Becker 	unsigned long flags;
1097de551246SJoel Becker 
1098de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1099de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1100de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1101de551246SJoel Becker }
1102de551246SJoel Becker 
lockres_set_pending(struct ocfs2_lock_res * lockres)1103de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1104de551246SJoel Becker {
1105de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1106de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1107de551246SJoel Becker 
1108de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1109de551246SJoel Becker 
1110de551246SJoel Becker 	return lockres->l_pending_gen;
1111de551246SJoel Becker }
1112de551246SJoel Becker 
ocfs2_blocking_ast(struct ocfs2_dlm_lksb * lksb,int level)1113c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1114ccd979bdSMark Fasheh {
1115a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1116aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1117ccd979bdSMark Fasheh 	int needs_downconvert;
1118ccd979bdSMark Fasheh 	unsigned long flags;
1119ccd979bdSMark Fasheh 
1120bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1121ccd979bdSMark Fasheh 
11229b915181SSunil Mushran 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
11239b915181SSunil Mushran 	     "type %s\n", lockres->l_name, level, lockres->l_level,
1124aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1125aa2623adSMark Fasheh 
1126cf8e06f1SMark Fasheh 	/*
1127cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1128cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1129cf8e06f1SMark Fasheh 	 */
1130cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1131cf8e06f1SMark Fasheh 		return;
1132cf8e06f1SMark Fasheh 
1133ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1134ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1135ccd979bdSMark Fasheh 	if (needs_downconvert)
1136ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1137ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1138ccd979bdSMark Fasheh 
1139d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1140d680efe9SMark Fasheh 
114134d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1142ccd979bdSMark Fasheh }
1143ccd979bdSMark Fasheh 
ocfs2_locking_ast(struct ocfs2_dlm_lksb * lksb)1144c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1145ccd979bdSMark Fasheh {
1146a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1147de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1148ccd979bdSMark Fasheh 	unsigned long flags;
11491693a5c0SDavid Teigland 	int status;
1150ccd979bdSMark Fasheh 
1151ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1152ccd979bdSMark Fasheh 
11531693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
11541693a5c0SDavid Teigland 
11551693a5c0SDavid Teigland 	if (status == -EAGAIN) {
11561693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
11571693a5c0SDavid Teigland 		goto out;
11581693a5c0SDavid Teigland 	}
11591693a5c0SDavid Teigland 
11601693a5c0SDavid Teigland 	if (status) {
11618f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
11621693a5c0SDavid Teigland 		     lockres->l_name, status);
1163ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1164ccd979bdSMark Fasheh 		return;
1165ccd979bdSMark Fasheh 	}
1166ccd979bdSMark Fasheh 
11679b915181SSunil Mushran 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
11689b915181SSunil Mushran 	     "level %d => %d\n", lockres->l_name, lockres->l_action,
11699b915181SSunil Mushran 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
11709b915181SSunil Mushran 
1171ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1172ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1173ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1174e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1175ccd979bdSMark Fasheh 		break;
1176ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1177ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1178ccd979bdSMark Fasheh 		break;
1179ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1180ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1181ccd979bdSMark Fasheh 		break;
1182ccd979bdSMark Fasheh 	default:
11839b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
11849b915181SSunil Mushran 		     "flags 0x%lx, unlock: %u\n",
1185e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1186e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1187ccd979bdSMark Fasheh 		BUG();
1188ccd979bdSMark Fasheh 	}
11891693a5c0SDavid Teigland out:
1190ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1191ccd979bdSMark Fasheh 	 * can catch it. */
1192ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1193ccd979bdSMark Fasheh 
1194de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1195de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1196de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1197de551246SJoel Becker 
1198de551246SJoel Becker 	/*
1199de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1200de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1201de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1202de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1203de551246SJoel Becker 	 */
1204de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1205de551246SJoel Becker 
1206ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1207d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1208ccd979bdSMark Fasheh }
1209ccd979bdSMark Fasheh 
ocfs2_unlock_ast(struct ocfs2_dlm_lksb * lksb,int error)1210553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1211553b5eb9SJoel Becker {
1212553b5eb9SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1213553b5eb9SJoel Becker 	unsigned long flags;
1214553b5eb9SJoel Becker 
12159b915181SSunil Mushran 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
12169b915181SSunil Mushran 	     lockres->l_name, lockres->l_unlock_action);
1217553b5eb9SJoel Becker 
1218553b5eb9SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1219553b5eb9SJoel Becker 	if (error) {
1220553b5eb9SJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1221553b5eb9SJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
1222553b5eb9SJoel Becker 		     lockres->l_unlock_action);
1223553b5eb9SJoel Becker 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1224553b5eb9SJoel Becker 		return;
1225553b5eb9SJoel Becker 	}
1226553b5eb9SJoel Becker 
1227553b5eb9SJoel Becker 	switch(lockres->l_unlock_action) {
1228553b5eb9SJoel Becker 	case OCFS2_UNLOCK_CANCEL_CONVERT:
1229553b5eb9SJoel Becker 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1230553b5eb9SJoel Becker 		lockres->l_action = OCFS2_AST_INVALID;
1231553b5eb9SJoel Becker 		/* Downconvert thread may have requeued this lock, we
1232553b5eb9SJoel Becker 		 * need to wake it. */
1233553b5eb9SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1234553b5eb9SJoel Becker 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1235553b5eb9SJoel Becker 		break;
1236553b5eb9SJoel Becker 	case OCFS2_UNLOCK_DROP_LOCK:
1237553b5eb9SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
1238553b5eb9SJoel Becker 		break;
1239553b5eb9SJoel Becker 	default:
1240553b5eb9SJoel Becker 		BUG();
1241553b5eb9SJoel Becker 	}
1242553b5eb9SJoel Becker 
1243553b5eb9SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1244553b5eb9SJoel Becker 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1245553b5eb9SJoel Becker 	wake_up(&lockres->l_event);
1246553b5eb9SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1247553b5eb9SJoel Becker }
1248553b5eb9SJoel Becker 
1249553b5eb9SJoel Becker /*
1250553b5eb9SJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
1251553b5eb9SJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
1252553b5eb9SJoel Becker  * The version number allows interoperability with systems running at
1253553b5eb9SJoel Becker  * the same major number and an equal or smaller minor number.
1254553b5eb9SJoel Becker  *
1255553b5eb9SJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
1256553b5eb9SJoel Becker  * lock, orders them differently, does different things underneath a lock),
1257553b5eb9SJoel Becker  * the version must be changed.  The protocol is negotiated when joining
1258553b5eb9SJoel Becker  * the dlm domain.  A node may join the domain if its major version is
1259553b5eb9SJoel Becker  * identical to all other nodes and its minor version is greater than
1260553b5eb9SJoel Becker  * or equal to all other nodes.  When its minor version is greater than
1261553b5eb9SJoel Becker  * the other nodes, it will run at the minor version specified by the
1262553b5eb9SJoel Becker  * other nodes.
1263553b5eb9SJoel Becker  *
1264553b5eb9SJoel Becker  * If a locking change is made that will not be compatible with older
1265553b5eb9SJoel Becker  * versions, the major number must be increased and the minor version set
1266553b5eb9SJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
1267553b5eb9SJoel Becker  * speaking to older versions, the minor version must be increased.  If a
1268553b5eb9SJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
1269553b5eb9SJoel Becker  * are just ignored by older versions), the version does not need to be
1270553b5eb9SJoel Becker  * updated.
1271553b5eb9SJoel Becker  */
1272553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = {
1273553b5eb9SJoel Becker 	.lp_max_version = {
1274553b5eb9SJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1275553b5eb9SJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1276553b5eb9SJoel Becker 	},
1277553b5eb9SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
1278553b5eb9SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
1279553b5eb9SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
1280553b5eb9SJoel Becker };
1281553b5eb9SJoel Becker 
ocfs2_set_locking_protocol(void)1282553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void)
1283553b5eb9SJoel Becker {
1284553b5eb9SJoel Becker 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1285553b5eb9SJoel Becker }
1286553b5eb9SJoel Becker 
ocfs2_recover_from_dlm_error(struct ocfs2_lock_res * lockres,int convert)1287ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1288ccd979bdSMark Fasheh 						int convert)
1289ccd979bdSMark Fasheh {
1290ccd979bdSMark Fasheh 	unsigned long flags;
1291ccd979bdSMark Fasheh 
1292ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1293ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1294a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1295ccd979bdSMark Fasheh 	if (convert)
1296ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1297ccd979bdSMark Fasheh 	else
1298ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1299ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1300ccd979bdSMark Fasheh 
1301ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1302ccd979bdSMark Fasheh }
1303ccd979bdSMark Fasheh 
1304ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1305ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1306ccd979bdSMark Fasheh  * to do the right thing in that case.
1307ccd979bdSMark Fasheh  */
ocfs2_lock_create(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int level,u32 dlm_flags)1308ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1309ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1310ccd979bdSMark Fasheh 			     int level,
1311bd3e7610SJoel Becker 			     u32 dlm_flags)
1312ccd979bdSMark Fasheh {
1313ccd979bdSMark Fasheh 	int ret = 0;
1314ccd979bdSMark Fasheh 	unsigned long flags;
1315de551246SJoel Becker 	unsigned int gen;
1316ccd979bdSMark Fasheh 
1317bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1318ccd979bdSMark Fasheh 	     dlm_flags);
1319ccd979bdSMark Fasheh 
1320ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1321ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1322ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1323ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1324ccd979bdSMark Fasheh 		goto bail;
1325ccd979bdSMark Fasheh 	}
1326ccd979bdSMark Fasheh 
1327ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1328ccd979bdSMark Fasheh 	lockres->l_requested = level;
1329ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1330de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1331ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1332ccd979bdSMark Fasheh 
13334670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1334ccd979bdSMark Fasheh 			     level,
1335ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1336ccd979bdSMark Fasheh 			     dlm_flags,
1337ccd979bdSMark Fasheh 			     lockres->l_name,
1338a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1339de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
13407431cd7eSJoel Becker 	if (ret) {
13417431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1342ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1343ccd979bdSMark Fasheh 	}
1344ccd979bdSMark Fasheh 
13457431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1346ccd979bdSMark Fasheh 
1347ccd979bdSMark Fasheh bail:
1348ccd979bdSMark Fasheh 	return ret;
1349ccd979bdSMark Fasheh }
1350ccd979bdSMark Fasheh 
ocfs2_check_wait_flag(struct ocfs2_lock_res * lockres,int flag)1351ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1352ccd979bdSMark Fasheh 					int flag)
1353ccd979bdSMark Fasheh {
1354ccd979bdSMark Fasheh 	unsigned long flags;
1355ccd979bdSMark Fasheh 	int ret;
1356ccd979bdSMark Fasheh 
1357ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1358ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1359ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1360ccd979bdSMark Fasheh 
1361ccd979bdSMark Fasheh 	return ret;
1362ccd979bdSMark Fasheh }
1363ccd979bdSMark Fasheh 
ocfs2_wait_on_busy_lock(struct ocfs2_lock_res * lockres)1364ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1365ccd979bdSMark Fasheh 
1366ccd979bdSMark Fasheh {
1367ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1368ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1369ccd979bdSMark Fasheh }
1370ccd979bdSMark Fasheh 
ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res * lockres)1371ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1372ccd979bdSMark Fasheh 
1373ccd979bdSMark Fasheh {
1374ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1375ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1376ccd979bdSMark Fasheh }
1377ccd979bdSMark Fasheh 
1378ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1379ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1380ccd979bdSMark Fasheh  * level will be compatible with it. */
ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res * lockres,int wanted)1381ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1382ccd979bdSMark Fasheh 						     int wanted)
1383ccd979bdSMark Fasheh {
1384ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1385ccd979bdSMark Fasheh 
1386ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1387ccd979bdSMark Fasheh }
1388ccd979bdSMark Fasheh 
ocfs2_init_mask_waiter(struct ocfs2_mask_waiter * mw)1389ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1390ccd979bdSMark Fasheh {
1391ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1392ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
13938ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1394ccd979bdSMark Fasheh }
1395ccd979bdSMark Fasheh 
ocfs2_wait_for_mask(struct ocfs2_mask_waiter * mw)1396ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1397ccd979bdSMark Fasheh {
1398ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1399ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
140016735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1401ccd979bdSMark Fasheh 	return mw->mw_status;
1402ccd979bdSMark Fasheh }
1403ccd979bdSMark Fasheh 
lockres_add_mask_waiter(struct ocfs2_lock_res * lockres,struct ocfs2_mask_waiter * mw,unsigned long mask,unsigned long goal)1404ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1405ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1406ccd979bdSMark Fasheh 				    unsigned long mask,
1407ccd979bdSMark Fasheh 				    unsigned long goal)
1408ccd979bdSMark Fasheh {
1409ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1410ccd979bdSMark Fasheh 
1411ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1412ccd979bdSMark Fasheh 
1413ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1414ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1415ccd979bdSMark Fasheh 	mw->mw_goal = goal;
14165da844a2SGang He 	ocfs2_track_lock_wait(lockres);
1417ccd979bdSMark Fasheh }
1418ccd979bdSMark Fasheh 
1419ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1420ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
__lockres_remove_mask_waiter(struct ocfs2_lock_res * lockres,struct ocfs2_mask_waiter * mw)1421d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1422ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1423ccd979bdSMark Fasheh {
1424ccd979bdSMark Fasheh 	int ret = 0;
1425ccd979bdSMark Fasheh 
1426d1e78238SXue jiufei 	assert_spin_locked(&lockres->l_lock);
1427ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1428ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1429ccd979bdSMark Fasheh 			ret = -EBUSY;
1430ccd979bdSMark Fasheh 
1431ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1432ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
14335da844a2SGang He 		ocfs2_track_lock_wait(lockres);
1434ccd979bdSMark Fasheh 	}
1435d1e78238SXue jiufei 
1436d1e78238SXue jiufei 	return ret;
1437d1e78238SXue jiufei }
1438d1e78238SXue jiufei 
lockres_remove_mask_waiter(struct ocfs2_lock_res * lockres,struct ocfs2_mask_waiter * mw)1439d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1440d1e78238SXue jiufei 				      struct ocfs2_mask_waiter *mw)
1441d1e78238SXue jiufei {
1442d1e78238SXue jiufei 	unsigned long flags;
1443d1e78238SXue jiufei 	int ret = 0;
1444d1e78238SXue jiufei 
1445d1e78238SXue jiufei 	spin_lock_irqsave(&lockres->l_lock, flags);
1446d1e78238SXue jiufei 	ret = __lockres_remove_mask_waiter(lockres, mw);
1447ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1448ccd979bdSMark Fasheh 
1449ccd979bdSMark Fasheh 	return ret;
1450ccd979bdSMark Fasheh 
1451ccd979bdSMark Fasheh }
1452ccd979bdSMark Fasheh 
ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter * mw,struct ocfs2_lock_res * lockres)1453cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1454cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1455cf8e06f1SMark Fasheh {
1456cf8e06f1SMark Fasheh 	int ret;
1457cf8e06f1SMark Fasheh 
1458cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1459cf8e06f1SMark Fasheh 	if (ret)
1460cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1461cf8e06f1SMark Fasheh 	else
1462cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1463cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
146416735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1465cf8e06f1SMark Fasheh 	return ret;
1466cf8e06f1SMark Fasheh }
1467cf8e06f1SMark Fasheh 
__ocfs2_cluster_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int level,u32 lkm_flags,int arg_flags,int l_subclass,unsigned long caller_ip)1468cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1469ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1470ccd979bdSMark Fasheh 				int level,
1471bd3e7610SJoel Becker 				u32 lkm_flags,
1472cb25797dSJan Kara 				int arg_flags,
1473cb25797dSJan Kara 				int l_subclass,
1474cb25797dSJan Kara 				unsigned long caller_ip)
1475ccd979bdSMark Fasheh {
1476ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1477ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1478ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1479ccd979bdSMark Fasheh 	unsigned long flags;
1480de551246SJoel Becker 	unsigned int gen;
14811693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1482d1e78238SXue jiufei 	int dlm_locked = 0;
1483b1b1e15eSTariq Saeed 	int kick_dc = 0;
1484ccd979bdSMark Fasheh 
14852f2eca20Salex chen 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
14862f2eca20Salex chen 		mlog_errno(-EINVAL);
14872f2eca20Salex chen 		return -EINVAL;
14882f2eca20Salex chen 	}
14892f2eca20Salex chen 
1490ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1491ccd979bdSMark Fasheh 
1492b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1493bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1494b80fc012SMark Fasheh 
1495ccd979bdSMark Fasheh again:
1496ccd979bdSMark Fasheh 	wait = 0;
1497ccd979bdSMark Fasheh 
1498a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1499a1912826SSunil Mushran 
1500ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1501ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1502a1912826SSunil Mushran 		goto unlock;
1503ccd979bdSMark Fasheh 	}
1504ccd979bdSMark Fasheh 
1505ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1506ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1507ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1508ccd979bdSMark Fasheh 
1509ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1510ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1511ccd979bdSMark Fasheh 	 * we'll get caught below. */
1512ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1513ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1514ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1515ccd979bdSMark Fasheh 		 * them. */
1516ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1517ccd979bdSMark Fasheh 		wait = 1;
1518ccd979bdSMark Fasheh 		goto unlock;
1519ccd979bdSMark Fasheh 	}
1520ccd979bdSMark Fasheh 
1521a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1522a1912826SSunil Mushran 		/*
1523a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1524a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1525a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1526a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1527a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1528a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1529a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1530a1912826SSunil Mushran 		 * requesting PR take the lock.
1531a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1532a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1533a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1534a1912826SSunil Mushran 		 * downconvert request.
1535a1912826SSunil Mushran 		 */
1536a1912826SSunil Mushran 		if (level <= lockres->l_level)
1537a1912826SSunil Mushran 			goto update_holders;
1538a1912826SSunil Mushran 	}
1539a1912826SSunil Mushran 
1540ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1541ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1542ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1543ccd979bdSMark Fasheh 		 * another node */
1544ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1545ccd979bdSMark Fasheh 		wait = 1;
1546ccd979bdSMark Fasheh 		goto unlock;
1547ccd979bdSMark Fasheh 	}
1548ccd979bdSMark Fasheh 
1549ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
15501693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
15511693a5c0SDavid Teigland 			ret = -EAGAIN;
15521693a5c0SDavid Teigland 			goto unlock;
15531693a5c0SDavid Teigland 		}
15541693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
15551693a5c0SDavid Teigland 			noqueue_attempted = 1;
15561693a5c0SDavid Teigland 
1557ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1558ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1559ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1560ccd979bdSMark Fasheh 
1561019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1562019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1563bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1564019d1b22SMark Fasheh 		} else {
1565ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1566bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1567019d1b22SMark Fasheh 		}
1568019d1b22SMark Fasheh 
1569ccd979bdSMark Fasheh 		lockres->l_requested = level;
1570ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1571de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1572ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1573ccd979bdSMark Fasheh 
1574bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1575bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1576ccd979bdSMark Fasheh 
15779b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1578ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1579ccd979bdSMark Fasheh 
1580ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
15814670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1582ccd979bdSMark Fasheh 				     level,
1583ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1584019d1b22SMark Fasheh 				     lkm_flags,
1585ccd979bdSMark Fasheh 				     lockres->l_name,
1586a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1587de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
15887431cd7eSJoel Becker 		if (ret) {
15897431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
15907431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
159124ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
15927431cd7eSJoel Becker 						    ret, lockres);
1593ccd979bdSMark Fasheh 			}
1594ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1595ccd979bdSMark Fasheh 			goto out;
1596ccd979bdSMark Fasheh 		}
1597d1e78238SXue jiufei 		dlm_locked = 1;
1598ccd979bdSMark Fasheh 
159973ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1600ccd979bdSMark Fasheh 		     lockres->l_name);
1601ccd979bdSMark Fasheh 
1602ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1603ccd979bdSMark Fasheh 		 * complete our work regardless. */
1604ccd979bdSMark Fasheh 		catch_signals = 0;
1605ccd979bdSMark Fasheh 
1606ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1607ccd979bdSMark Fasheh 		goto again;
1608ccd979bdSMark Fasheh 	}
1609ccd979bdSMark Fasheh 
1610a1912826SSunil Mushran update_holders:
1611ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1612ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1613ccd979bdSMark Fasheh 
1614ccd979bdSMark Fasheh 	ret = 0;
1615ccd979bdSMark Fasheh unlock:
1616a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1617a1912826SSunil Mushran 
1618b1b1e15eSTariq Saeed 	/* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
1619b1b1e15eSTariq Saeed 	kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
1620b1b1e15eSTariq Saeed 
1621ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1622b1b1e15eSTariq Saeed 	if (kick_dc)
1623b1b1e15eSTariq Saeed 		ocfs2_wake_downconvert_thread(osb);
1624ccd979bdSMark Fasheh out:
1625ccd979bdSMark Fasheh 	/*
1626ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1627ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1628ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1629ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1630ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1631ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1632ccd979bdSMark Fasheh 	 */
1633ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1634ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1635ccd979bdSMark Fasheh 		wait = 0;
1636d1e78238SXue jiufei 		spin_lock_irqsave(&lockres->l_lock, flags);
1637d1e78238SXue jiufei 		if (__lockres_remove_mask_waiter(lockres, &mw)) {
1638d1e78238SXue jiufei 			if (dlm_locked)
1639d1e78238SXue jiufei 				lockres_or_flags(lockres,
1640d1e78238SXue jiufei 					OCFS2_LOCK_NONBLOCK_FINISHED);
1641d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1642ccd979bdSMark Fasheh 			ret = -EAGAIN;
1643d1e78238SXue jiufei 		} else {
1644d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1645ccd979bdSMark Fasheh 			goto again;
1646ccd979bdSMark Fasheh 		}
1647d1e78238SXue jiufei 	}
1648ccd979bdSMark Fasheh 	if (wait) {
1649ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1650ccd979bdSMark Fasheh 		if (ret == 0)
1651ccd979bdSMark Fasheh 			goto again;
1652ccd979bdSMark Fasheh 		mlog_errno(ret);
1653ccd979bdSMark Fasheh 	}
16548ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1655ccd979bdSMark Fasheh 
1656cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1657cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1658cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1659cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1660cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1661cb25797dSJan Kara 				caller_ip);
1662cb25797dSJan Kara 		else
1663cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1664cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1665cb25797dSJan Kara 				caller_ip);
1666cb25797dSJan Kara 	}
1667cb25797dSJan Kara #endif
1668ccd979bdSMark Fasheh 	return ret;
1669ccd979bdSMark Fasheh }
1670ccd979bdSMark Fasheh 
ocfs2_cluster_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int level,u32 lkm_flags,int arg_flags)1671cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1672ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1673cb25797dSJan Kara 				     int level,
1674cb25797dSJan Kara 				     u32 lkm_flags,
1675cb25797dSJan Kara 				     int arg_flags)
1676cb25797dSJan Kara {
1677cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1678cb25797dSJan Kara 				    0, _RET_IP_);
1679cb25797dSJan Kara }
1680cb25797dSJan Kara 
1681cb25797dSJan Kara 
__ocfs2_cluster_unlock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int level,unsigned long caller_ip)1682cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1683cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1684cb25797dSJan Kara 				   int level,
1685cb25797dSJan Kara 				   unsigned long caller_ip)
1686ccd979bdSMark Fasheh {
1687ccd979bdSMark Fasheh 	unsigned long flags;
1688ccd979bdSMark Fasheh 
1689ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1690ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
169134d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1692ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1693cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1694cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
16955facae4fSQian Cai 		rwsem_release(&lockres->l_lockdep_map, caller_ip);
1696cb25797dSJan Kara #endif
1697ccd979bdSMark Fasheh }
1698ccd979bdSMark Fasheh 
ocfs2_create_new_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int ex,int local)1699da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1700d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
170124c19ef4SMark Fasheh 				 int ex,
170224c19ef4SMark Fasheh 				 int local)
1703ccd979bdSMark Fasheh {
1704bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1705ccd979bdSMark Fasheh 	unsigned long flags;
1706bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1707ccd979bdSMark Fasheh 
1708ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1709ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1710ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1711ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1712ccd979bdSMark Fasheh 
171324c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1714ccd979bdSMark Fasheh }
1715ccd979bdSMark Fasheh 
1716ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1717ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1718ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1719ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1720ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1721ccd979bdSMark Fasheh  * with creating a new lock resource. */
ocfs2_create_new_inode_locks(struct inode * inode)1722ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1723ccd979bdSMark Fasheh {
1724ccd979bdSMark Fasheh 	int ret;
1725d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1726ccd979bdSMark Fasheh 
1727ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1728ccd979bdSMark Fasheh 
1729b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1730ccd979bdSMark Fasheh 
1731ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1732ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1733ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1734ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1735ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1736ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1737ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1738ccd979bdSMark Fasheh 
173924c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1740ccd979bdSMark Fasheh 	if (ret) {
1741ccd979bdSMark Fasheh 		mlog_errno(ret);
1742ccd979bdSMark Fasheh 		goto bail;
1743ccd979bdSMark Fasheh 	}
1744ccd979bdSMark Fasheh 
174524c19ef4SMark Fasheh 	/*
1746bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
174724c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
174824c19ef4SMark Fasheh 	 */
1749e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1750ccd979bdSMark Fasheh 	if (ret) {
1751ccd979bdSMark Fasheh 		mlog_errno(ret);
1752ccd979bdSMark Fasheh 		goto bail;
1753ccd979bdSMark Fasheh 	}
1754ccd979bdSMark Fasheh 
175550008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1756a8f24f1bSJoseph Qi 	if (ret)
175750008630STiger Yang 		mlog_errno(ret);
175850008630STiger Yang 
1759ccd979bdSMark Fasheh bail:
1760ccd979bdSMark Fasheh 	return ret;
1761ccd979bdSMark Fasheh }
1762ccd979bdSMark Fasheh 
ocfs2_rw_lock(struct inode * inode,int write)1763ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1764ccd979bdSMark Fasheh {
1765ccd979bdSMark Fasheh 	int status, level;
1766ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1767c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1768ccd979bdSMark Fasheh 
1769b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1770b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1771ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1772ccd979bdSMark Fasheh 
1773c1e8d35eSTao Ma 	if (ocfs2_mount_local(osb))
1774c271c5c2SSunil Mushran 		return 0;
1775c271c5c2SSunil Mushran 
1776ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1777ccd979bdSMark Fasheh 
1778bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1779ccd979bdSMark Fasheh 
17801119d3c0Spiaojun 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
1781ccd979bdSMark Fasheh 	if (status < 0)
1782ccd979bdSMark Fasheh 		mlog_errno(status);
1783ccd979bdSMark Fasheh 
1784ccd979bdSMark Fasheh 	return status;
1785ccd979bdSMark Fasheh }
1786ccd979bdSMark Fasheh 
ocfs2_try_rw_lock(struct inode * inode,int write)178706e7f13dSGang He int ocfs2_try_rw_lock(struct inode *inode, int write)
178806e7f13dSGang He {
178906e7f13dSGang He 	int status, level;
179006e7f13dSGang He 	struct ocfs2_lock_res *lockres;
179106e7f13dSGang He 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
179206e7f13dSGang He 
179306e7f13dSGang He 	mlog(0, "inode %llu try to take %s RW lock\n",
179406e7f13dSGang He 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
179506e7f13dSGang He 	     write ? "EXMODE" : "PRMODE");
179606e7f13dSGang He 
179706e7f13dSGang He 	if (ocfs2_mount_local(osb))
179806e7f13dSGang He 		return 0;
179906e7f13dSGang He 
180006e7f13dSGang He 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
180106e7f13dSGang He 
180206e7f13dSGang He 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
180306e7f13dSGang He 
180406e7f13dSGang He 	status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
180506e7f13dSGang He 	return status;
180606e7f13dSGang He }
180706e7f13dSGang He 
ocfs2_rw_unlock(struct inode * inode,int write)1808ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1809ccd979bdSMark Fasheh {
1810bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1811ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1812c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1813ccd979bdSMark Fasheh 
1814b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1815b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1816ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1817ccd979bdSMark Fasheh 
1818c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
18191119d3c0Spiaojun 		ocfs2_cluster_unlock(osb, lockres, level);
1820ccd979bdSMark Fasheh }
1821ccd979bdSMark Fasheh 
182250008630STiger Yang /*
182350008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
182450008630STiger Yang  */
ocfs2_open_lock(struct inode * inode)182550008630STiger Yang int ocfs2_open_lock(struct inode *inode)
182650008630STiger Yang {
182750008630STiger Yang 	int status = 0;
182850008630STiger Yang 	struct ocfs2_lock_res *lockres;
182950008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
183050008630STiger Yang 
183150008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
183250008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
183350008630STiger Yang 
183403efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
183550008630STiger Yang 		goto out;
183650008630STiger Yang 
183750008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
183850008630STiger Yang 
18391119d3c0Spiaojun 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_PR, 0, 0);
184050008630STiger Yang 	if (status < 0)
184150008630STiger Yang 		mlog_errno(status);
184250008630STiger Yang 
184350008630STiger Yang out:
184450008630STiger Yang 	return status;
184550008630STiger Yang }
184650008630STiger Yang 
ocfs2_try_open_lock(struct inode * inode,int write)184750008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
184850008630STiger Yang {
184950008630STiger Yang 	int status = 0, level;
185050008630STiger Yang 	struct ocfs2_lock_res *lockres;
185150008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
185250008630STiger Yang 
185350008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
185450008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
185550008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
185650008630STiger Yang 
185703efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
185803efed8aSTiger Yang 		if (write)
185903efed8aSTiger Yang 			status = -EROFS;
186003efed8aSTiger Yang 		goto out;
186103efed8aSTiger Yang 	}
186203efed8aSTiger Yang 
186350008630STiger Yang 	if (ocfs2_mount_local(osb))
186450008630STiger Yang 		goto out;
186550008630STiger Yang 
186650008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
186750008630STiger Yang 
1868bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
186950008630STiger Yang 
187050008630STiger Yang 	/*
187150008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1872bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
187350008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
187450008630STiger Yang 	 * this inode is still in use.
187550008630STiger Yang 	 */
18761119d3c0Spiaojun 	status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
187750008630STiger Yang 
187850008630STiger Yang out:
187950008630STiger Yang 	return status;
188050008630STiger Yang }
188150008630STiger Yang 
188250008630STiger Yang /*
188350008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
188450008630STiger Yang  */
ocfs2_open_unlock(struct inode * inode)188550008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
188650008630STiger Yang {
188750008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
188850008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
188950008630STiger Yang 
189050008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
189150008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
189250008630STiger Yang 
189350008630STiger Yang 	if (ocfs2_mount_local(osb))
189450008630STiger Yang 		goto out;
189550008630STiger Yang 
189650008630STiger Yang 	if(lockres->l_ro_holders)
18971119d3c0Spiaojun 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_PR);
189850008630STiger Yang 	if(lockres->l_ex_holders)
18991119d3c0Spiaojun 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
190050008630STiger Yang 
190150008630STiger Yang out:
1902c1e8d35eSTao Ma 	return;
190350008630STiger Yang }
190450008630STiger Yang 
ocfs2_flock_handle_signal(struct ocfs2_lock_res * lockres,int level)1905cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1906cf8e06f1SMark Fasheh 				     int level)
1907cf8e06f1SMark Fasheh {
1908cf8e06f1SMark Fasheh 	int ret;
1909cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1910cf8e06f1SMark Fasheh 	unsigned long flags;
1911cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1912cf8e06f1SMark Fasheh 
1913cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1914cf8e06f1SMark Fasheh 
1915cf8e06f1SMark Fasheh retry_cancel:
1916cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1917cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1918cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1919cf8e06f1SMark Fasheh 		if (ret) {
1920cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1921cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1922cf8e06f1SMark Fasheh 			if (ret < 0) {
1923cf8e06f1SMark Fasheh 				mlog_errno(ret);
1924cf8e06f1SMark Fasheh 				goto out;
1925cf8e06f1SMark Fasheh 			}
1926cf8e06f1SMark Fasheh 			goto retry_cancel;
1927cf8e06f1SMark Fasheh 		}
1928cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1929cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1930cf8e06f1SMark Fasheh 
1931cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1932cf8e06f1SMark Fasheh 		goto retry_cancel;
1933cf8e06f1SMark Fasheh 	}
1934cf8e06f1SMark Fasheh 
1935cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1936cf8e06f1SMark Fasheh 	/*
1937cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1938cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1939cf8e06f1SMark Fasheh 	 */
1940cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1941cf8e06f1SMark Fasheh 		ret = 0;
1942cf8e06f1SMark Fasheh 
1943cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1944cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1945cf8e06f1SMark Fasheh 
1946cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1947cf8e06f1SMark Fasheh 
1948cf8e06f1SMark Fasheh out:
1949cf8e06f1SMark Fasheh 	return ret;
1950cf8e06f1SMark Fasheh }
1951cf8e06f1SMark Fasheh 
1952cf8e06f1SMark Fasheh /*
1953cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1954cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1955cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
19563ad2f3fbSDaniel Mack  * separate path to the "low-level" dlm calls. In particular:
1957cf8e06f1SMark Fasheh  *
1958cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1959cf8e06f1SMark Fasheh  *   what's been requested.
1960cf8e06f1SMark Fasheh  *
1961cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1962cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1963cf8e06f1SMark Fasheh  *   the blocking list).
1964cf8e06f1SMark Fasheh  *
1965cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1966cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1967cf8e06f1SMark Fasheh  *   request.
1968cf8e06f1SMark Fasheh  *
1969cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1970cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1971cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1972cf8e06f1SMark Fasheh  */
ocfs2_file_lock(struct file * file,int ex,int trylock)1973cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1974cf8e06f1SMark Fasheh {
1975e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1976e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1977cf8e06f1SMark Fasheh 	unsigned long flags;
1978cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1979cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1980cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1981cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1982cf8e06f1SMark Fasheh 
1983cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1984cf8e06f1SMark Fasheh 
1985cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1986bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1987cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1988cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1989cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1990cf8e06f1SMark Fasheh 		     lockres->l_level);
1991cf8e06f1SMark Fasheh 		return -EINVAL;
1992cf8e06f1SMark Fasheh 	}
1993cf8e06f1SMark Fasheh 
1994cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1995cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1996cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1997cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1998cf8e06f1SMark Fasheh 
1999cf8e06f1SMark Fasheh 		/*
2000cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
2001cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
2002cf8e06f1SMark Fasheh 		 */
2003e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
2004cf8e06f1SMark Fasheh 		if (ret < 0) {
2005cf8e06f1SMark Fasheh 			mlog_errno(ret);
2006cf8e06f1SMark Fasheh 			goto out;
2007cf8e06f1SMark Fasheh 		}
2008cf8e06f1SMark Fasheh 
2009cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
2010cf8e06f1SMark Fasheh 		if (ret) {
2011cf8e06f1SMark Fasheh 			mlog_errno(ret);
2012cf8e06f1SMark Fasheh 			goto out;
2013cf8e06f1SMark Fasheh 		}
2014cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
2015cf8e06f1SMark Fasheh 	}
2016cf8e06f1SMark Fasheh 
2017cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
2018e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
2019cf8e06f1SMark Fasheh 	lockres->l_requested = level;
2020cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
2021cf8e06f1SMark Fasheh 
2022cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2023cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2024cf8e06f1SMark Fasheh 
20254670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
2026a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
20277431cd7eSJoel Becker 	if (ret) {
20287431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
202924ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
2030cf8e06f1SMark Fasheh 			ret = -EINVAL;
2031cf8e06f1SMark Fasheh 		}
2032cf8e06f1SMark Fasheh 
2033cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
2034cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
2035cf8e06f1SMark Fasheh 		goto out;
2036cf8e06f1SMark Fasheh 	}
2037cf8e06f1SMark Fasheh 
2038cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
2039cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
2040cf8e06f1SMark Fasheh 		/*
2041cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
2042cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
2043cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
2044cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
2045cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
2046cf8e06f1SMark Fasheh 		 * reboot.
2047cf8e06f1SMark Fasheh 		 *
2048cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
2049cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
2050cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
2051cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
2052cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
2053af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
2054cf8e06f1SMark Fasheh 		 */
2055cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
20561693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
20571693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
20581693a5c0SDavid Teigland 		BUG_ON(!trylock);
20591693a5c0SDavid Teigland 		ret = -EAGAIN;
2060cf8e06f1SMark Fasheh 	}
2061cf8e06f1SMark Fasheh 
2062cf8e06f1SMark Fasheh out:
2063cf8e06f1SMark Fasheh 
2064cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
2065cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
2066cf8e06f1SMark Fasheh 	return ret;
2067cf8e06f1SMark Fasheh }
2068cf8e06f1SMark Fasheh 
ocfs2_file_unlock(struct file * file)2069cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
2070cf8e06f1SMark Fasheh {
2071cf8e06f1SMark Fasheh 	int ret;
2072de551246SJoel Becker 	unsigned int gen;
2073cf8e06f1SMark Fasheh 	unsigned long flags;
2074cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
2075cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
2076cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
2077cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
2078cf8e06f1SMark Fasheh 
2079cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
2080cf8e06f1SMark Fasheh 
2081cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
2082cf8e06f1SMark Fasheh 		return;
2083cf8e06f1SMark Fasheh 
2084e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
2085cf8e06f1SMark Fasheh 		return;
2086cf8e06f1SMark Fasheh 
2087cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
2088cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
2089cf8e06f1SMark Fasheh 	     lockres->l_action);
2090cf8e06f1SMark Fasheh 
2091cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2092cf8e06f1SMark Fasheh 	/*
2093cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
2094cf8e06f1SMark Fasheh 	 */
2095cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
2096bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
2097cf8e06f1SMark Fasheh 
2098e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
2099cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2100cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2101cf8e06f1SMark Fasheh 
2102e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
2103cf8e06f1SMark Fasheh 	if (ret) {
2104cf8e06f1SMark Fasheh 		mlog_errno(ret);
2105cf8e06f1SMark Fasheh 		return;
2106cf8e06f1SMark Fasheh 	}
2107cf8e06f1SMark Fasheh 
2108cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
2109cf8e06f1SMark Fasheh 	if (ret)
2110cf8e06f1SMark Fasheh 		mlog_errno(ret);
2111cf8e06f1SMark Fasheh }
2112cf8e06f1SMark Fasheh 
ocfs2_downconvert_on_unlock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)211334d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2114ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
2115ccd979bdSMark Fasheh {
2116ccd979bdSMark Fasheh 	int kick = 0;
2117ccd979bdSMark Fasheh 
2118ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
211934d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
2120ccd979bdSMark Fasheh 	 * condition. */
2121ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
2122ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
2123bd3e7610SJoel Becker 		case DLM_LOCK_EX:
2124ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
2125ccd979bdSMark Fasheh 				kick = 1;
2126ccd979bdSMark Fasheh 			break;
2127bd3e7610SJoel Becker 		case DLM_LOCK_PR:
2128ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
2129ccd979bdSMark Fasheh 				kick = 1;
2130ccd979bdSMark Fasheh 			break;
2131ccd979bdSMark Fasheh 		default:
2132ccd979bdSMark Fasheh 			BUG();
2133ccd979bdSMark Fasheh 		}
2134ccd979bdSMark Fasheh 	}
2135ccd979bdSMark Fasheh 
2136ccd979bdSMark Fasheh 	if (kick)
213734d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
2138ccd979bdSMark Fasheh }
2139ccd979bdSMark Fasheh 
2140ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
214131cc0c80SAlex Shi #define OCFS2_SEC_SHIFT  (64 - OCFS2_SEC_BITS)
2142ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
2143ccd979bdSMark Fasheh 
2144ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
2145ccd979bdSMark Fasheh  * now. */
ocfs2_pack_timespec(struct timespec64 * spec)21463a3d1e51SArnd Bergmann static u64 ocfs2_pack_timespec(struct timespec64 *spec)
2147ccd979bdSMark Fasheh {
2148ccd979bdSMark Fasheh 	u64 res;
21493a3d1e51SArnd Bergmann 	u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
2150ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
2151ccd979bdSMark Fasheh 
2152ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2153ccd979bdSMark Fasheh 
2154ccd979bdSMark Fasheh 	return res;
2155ccd979bdSMark Fasheh }
2156ccd979bdSMark Fasheh 
2157ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2158ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2159e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
__ocfs2_stuff_meta_lvb(struct inode * inode)2160ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2161ccd979bdSMark Fasheh {
2162ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2163e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2164ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2165*10fc3a18SJeff Layton 	struct timespec64 ts;
2166ccd979bdSMark Fasheh 
2167a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2168ccd979bdSMark Fasheh 
216924c19ef4SMark Fasheh 	/*
217024c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
217124c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
217224c19ef4SMark Fasheh 	 * status.
217324c19ef4SMark Fasheh 	 */
217424c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
217524c19ef4SMark Fasheh 		lvb->lvb_version = 0;
217624c19ef4SMark Fasheh 		goto out;
217724c19ef4SMark Fasheh 	}
217824c19ef4SMark Fasheh 
21794d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2180ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2181ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
218203ab30f7SEric W. Biederman 	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
218303ab30f7SEric W. Biederman 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
2184ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2185ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2186*10fc3a18SJeff Layton 	ts = inode_get_atime(inode);
2187*10fc3a18SJeff Layton 	lvb->lvb_iatime_packed = cpu_to_be64(ocfs2_pack_timespec(&ts));
2188*10fc3a18SJeff Layton 	ts = inode_get_ctime(inode);
2189*10fc3a18SJeff Layton 	lvb->lvb_ictime_packed = cpu_to_be64(ocfs2_pack_timespec(&ts));
2190*10fc3a18SJeff Layton 	ts = inode_get_mtime(inode);
2191*10fc3a18SJeff Layton 	lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&ts));
2192ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
219315b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2194f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2195ccd979bdSMark Fasheh 
219624c19ef4SMark Fasheh out:
2197ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2198ccd979bdSMark Fasheh }
2199ccd979bdSMark Fasheh 
ocfs2_unpack_timespec(struct timespec64 * spec,u64 packed_time)22003a3d1e51SArnd Bergmann static void ocfs2_unpack_timespec(struct timespec64 *spec,
2201ccd979bdSMark Fasheh 				  u64 packed_time)
2202ccd979bdSMark Fasheh {
2203ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2204ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2205ccd979bdSMark Fasheh }
2206ccd979bdSMark Fasheh 
ocfs2_refresh_inode_from_lvb(struct inode * inode)220760606ecaSAl Viro static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
2208ccd979bdSMark Fasheh {
2209ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2210e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2211ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2212*10fc3a18SJeff Layton 	struct timespec64 ts;
2213ccd979bdSMark Fasheh 
2214ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2215ccd979bdSMark Fasheh 
2216a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
221760606ecaSAl Viro 	if (inode_wrong_type(inode, be16_to_cpu(lvb->lvb_imode)))
221860606ecaSAl Viro 		return -ESTALE;
2219ccd979bdSMark Fasheh 
2220ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2221ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2222ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2223ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2224ccd979bdSMark Fasheh 
2225ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
222615b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2227ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2228ca4d147eSHerbert Poetzl 
2229ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2230ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2231ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2232ccd979bdSMark Fasheh 	else
22338110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2234ccd979bdSMark Fasheh 
223503ab30f7SEric W. Biederman 	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
223603ab30f7SEric W. Biederman 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2237ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2238bfe86848SMiklos Szeredi 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2239*10fc3a18SJeff Layton 	ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_iatime_packed));
2240*10fc3a18SJeff Layton 	inode_set_atime_to_ts(inode, ts);
2241*10fc3a18SJeff Layton 	ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_imtime_packed));
2242*10fc3a18SJeff Layton 	inode_set_mtime_to_ts(inode, ts);
2243*10fc3a18SJeff Layton 	ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_ictime_packed));
2244*10fc3a18SJeff Layton 	inode_set_ctime_to_ts(inode, ts);
2245ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
224660606ecaSAl Viro 	return 0;
2247ccd979bdSMark Fasheh }
2248ccd979bdSMark Fasheh 
ocfs2_meta_lvb_is_trustable(struct inode * inode,struct ocfs2_lock_res * lockres)2249f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2250f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2251ccd979bdSMark Fasheh {
2252a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2253ccd979bdSMark Fasheh 
22541c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
22551c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2256f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2257ccd979bdSMark Fasheh 		return 1;
2258ccd979bdSMark Fasheh 	return 0;
2259ccd979bdSMark Fasheh }
2260ccd979bdSMark Fasheh 
2261ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2262ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2263ccd979bdSMark Fasheh  *
2264ccd979bdSMark Fasheh  *   0 means no refresh needed.
2265ccd979bdSMark Fasheh  *
2266ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2267ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
ocfs2_should_refresh_lock_res(struct ocfs2_lock_res * lockres)2268ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2269ccd979bdSMark Fasheh {
2270ccd979bdSMark Fasheh 	unsigned long flags;
2271ccd979bdSMark Fasheh 	int status = 0;
2272ccd979bdSMark Fasheh 
2273ccd979bdSMark Fasheh refresh_check:
2274ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2275ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2276ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2277ccd979bdSMark Fasheh 		goto bail;
2278ccd979bdSMark Fasheh 	}
2279ccd979bdSMark Fasheh 
2280ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2281ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2282ccd979bdSMark Fasheh 
2283ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2284ccd979bdSMark Fasheh 		goto refresh_check;
2285ccd979bdSMark Fasheh 	}
2286ccd979bdSMark Fasheh 
2287ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2288ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2289ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2290ccd979bdSMark Fasheh 
2291ccd979bdSMark Fasheh 	status = 1;
2292ccd979bdSMark Fasheh bail:
2293c1e8d35eSTao Ma 	mlog(0, "status %d\n", status);
2294ccd979bdSMark Fasheh 	return status;
2295ccd979bdSMark Fasheh }
2296ccd979bdSMark Fasheh 
2297ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2298ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res * lockres,int status)2299ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2300ccd979bdSMark Fasheh 						   int status)
2301ccd979bdSMark Fasheh {
2302ccd979bdSMark Fasheh 	unsigned long flags;
2303ccd979bdSMark Fasheh 
2304ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2305ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2306ccd979bdSMark Fasheh 	if (!status)
2307ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2308ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2309ccd979bdSMark Fasheh 
2310ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2311ccd979bdSMark Fasheh }
2312ccd979bdSMark Fasheh 
2313ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
ocfs2_inode_lock_update(struct inode * inode,struct buffer_head ** bh)2314e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2315ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2316ccd979bdSMark Fasheh {
2317ccd979bdSMark Fasheh 	int status = 0;
2318ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2319e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2320ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2321c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2322ccd979bdSMark Fasheh 
2323be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2324be9e986bSMark Fasheh 		goto bail;
2325be9e986bSMark Fasheh 
2326ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2327ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2328b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2329ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2330b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2331ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2332ccd979bdSMark Fasheh 		status = -ENOENT;
2333ccd979bdSMark Fasheh 		goto bail;
2334ccd979bdSMark Fasheh 	}
2335ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2336ccd979bdSMark Fasheh 
2337ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2338ccd979bdSMark Fasheh 		goto bail;
2339ccd979bdSMark Fasheh 
2340ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2341ccd979bdSMark Fasheh 	 * for the inode metadata. */
23428cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2343ccd979bdSMark Fasheh 
234483418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
234583418978SMark Fasheh 
2346be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2347b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2348b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
234960606ecaSAl Viro 		status = ocfs2_refresh_inode_from_lvb(inode);
235060606ecaSAl Viro 		goto bail_refresh;
2351ccd979bdSMark Fasheh 	} else {
2352ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2353ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2354b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2355ccd979bdSMark Fasheh 		if (status < 0) {
2356ccd979bdSMark Fasheh 			mlog_errno(status);
2357ccd979bdSMark Fasheh 			goto bail_refresh;
2358ccd979bdSMark Fasheh 		}
2359ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
236060606ecaSAl Viro 		if (inode_wrong_type(inode, le16_to_cpu(fe->i_mode))) {
236160606ecaSAl Viro 			status = -ESTALE;
236260606ecaSAl Viro 			goto bail_refresh;
236360606ecaSAl Viro 		}
2364ccd979bdSMark Fasheh 
2365ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2366b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2367b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2368ccd979bdSMark Fasheh 		 *
2369ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2370ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2371ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2372ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2373ccd979bdSMark Fasheh 		 * locks associated with it. */
2374ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2375ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2376b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2377ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2378b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2379b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2380ccd979bdSMark Fasheh 				inode->i_generation);
2381ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2382ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2383b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2384b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2385b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2386ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2387ccd979bdSMark Fasheh 
2388ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
23898ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2390ccd979bdSMark Fasheh 	}
2391ccd979bdSMark Fasheh 
2392ccd979bdSMark Fasheh 	status = 0;
2393ccd979bdSMark Fasheh bail_refresh:
2394ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2395ccd979bdSMark Fasheh bail:
2396ccd979bdSMark Fasheh 	return status;
2397ccd979bdSMark Fasheh }
2398ccd979bdSMark Fasheh 
ocfs2_assign_bh(struct inode * inode,struct buffer_head ** ret_bh,struct buffer_head * passed_bh)2399ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2400ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2401ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2402ccd979bdSMark Fasheh {
2403ccd979bdSMark Fasheh 	int status;
2404ccd979bdSMark Fasheh 
2405ccd979bdSMark Fasheh 	if (passed_bh) {
2406ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2407ccd979bdSMark Fasheh 		 * returned bh. */
2408ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2409ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2410ccd979bdSMark Fasheh 
2411ccd979bdSMark Fasheh 		return 0;
2412ccd979bdSMark Fasheh 	}
2413ccd979bdSMark Fasheh 
2414b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2415ccd979bdSMark Fasheh 	if (status < 0)
2416ccd979bdSMark Fasheh 		mlog_errno(status);
2417ccd979bdSMark Fasheh 
2418ccd979bdSMark Fasheh 	return status;
2419ccd979bdSMark Fasheh }
2420ccd979bdSMark Fasheh 
2421ccd979bdSMark Fasheh /*
2422ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2423ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2424ccd979bdSMark Fasheh  */
ocfs2_inode_lock_full_nested(struct inode * inode,struct buffer_head ** ret_bh,int ex,int arg_flags,int subclass)2425cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2426ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2427ccd979bdSMark Fasheh 				 int ex,
2428cb25797dSJan Kara 				 int arg_flags,
2429cb25797dSJan Kara 				 int subclass)
2430ccd979bdSMark Fasheh {
2431bd3e7610SJoel Becker 	int status, level, acquired;
2432bd3e7610SJoel Becker 	u32 dlm_flags;
2433c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2434ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2435ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2436ccd979bdSMark Fasheh 
2437b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2438b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2439ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2440ccd979bdSMark Fasheh 
2441ccd979bdSMark Fasheh 	status = 0;
2442ccd979bdSMark Fasheh 	acquired = 0;
2443ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2444ccd979bdSMark Fasheh 	 * rodevices. */
2445ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2446ccd979bdSMark Fasheh 		if (ex)
2447ccd979bdSMark Fasheh 			status = -EROFS;
244803efed8aSTiger Yang 		goto getbh;
2449ccd979bdSMark Fasheh 	}
2450ccd979bdSMark Fasheh 
2451439a36b8SEric Ren 	if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
2452439a36b8SEric Ren 	    ocfs2_mount_local(osb))
2453439a36b8SEric Ren 		goto update;
2454c271c5c2SSunil Mushran 
2455ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2456553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2457ccd979bdSMark Fasheh 
2458e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2459bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2460ccd979bdSMark Fasheh 	dlm_flags = 0;
2461ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2462bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2463ccd979bdSMark Fasheh 
2464cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2465cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2466ccd979bdSMark Fasheh 	if (status < 0) {
246741003a7bSZach Brown 		if (status != -EAGAIN)
2468ccd979bdSMark Fasheh 			mlog_errno(status);
2469ccd979bdSMark Fasheh 		goto bail;
2470ccd979bdSMark Fasheh 	}
2471ccd979bdSMark Fasheh 
2472ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2473ccd979bdSMark Fasheh 	acquired = 1;
2474ccd979bdSMark Fasheh 
2475ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2476ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2477ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2478ccd979bdSMark Fasheh 	 * abort the operation. */
2479ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2480553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2481ccd979bdSMark Fasheh 
2482439a36b8SEric Ren update:
248324c19ef4SMark Fasheh 	/*
248424c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
248524c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
248624c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
248724c19ef4SMark Fasheh 	 * and let the caller handle it.
248824c19ef4SMark Fasheh 	 */
248924c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
249024c19ef4SMark Fasheh 		status = 0;
2491c271c5c2SSunil Mushran 		if (lockres)
249224c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
249324c19ef4SMark Fasheh 		goto bail;
249424c19ef4SMark Fasheh 	}
249524c19ef4SMark Fasheh 
2496ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2497e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2498ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2499ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2500ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2501e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2502ccd979bdSMark Fasheh 	if (status < 0) {
2503ccd979bdSMark Fasheh 		if (status != -ENOENT)
2504ccd979bdSMark Fasheh 			mlog_errno(status);
2505ccd979bdSMark Fasheh 		goto bail;
2506ccd979bdSMark Fasheh 	}
250703efed8aSTiger Yang getbh:
2508ccd979bdSMark Fasheh 	if (ret_bh) {
2509ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2510ccd979bdSMark Fasheh 		if (status < 0) {
2511ccd979bdSMark Fasheh 			mlog_errno(status);
2512ccd979bdSMark Fasheh 			goto bail;
2513ccd979bdSMark Fasheh 		}
2514ccd979bdSMark Fasheh 	}
2515ccd979bdSMark Fasheh 
2516ccd979bdSMark Fasheh bail:
2517ccd979bdSMark Fasheh 	if (status < 0) {
2518ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2519ccd979bdSMark Fasheh 			brelse(*ret_bh);
2520ccd979bdSMark Fasheh 			*ret_bh = NULL;
2521ccd979bdSMark Fasheh 		}
2522ccd979bdSMark Fasheh 		if (acquired)
2523e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2524ccd979bdSMark Fasheh 	}
2525ccd979bdSMark Fasheh 
2526ccd979bdSMark Fasheh 	brelse(local_bh);
2527ccd979bdSMark Fasheh 	return status;
2528ccd979bdSMark Fasheh }
2529ccd979bdSMark Fasheh 
2530ccd979bdSMark Fasheh /*
253134d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
253234d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
253334d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2534ccd979bdSMark Fasheh  *
2535ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2536ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2537ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2538ccd979bdSMark Fasheh  *
253934d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
254034d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
254134d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
254234d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
254334d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
254434d024f8SMark Fasheh  * immediately retry the aop call.
2545ccd979bdSMark Fasheh  */
ocfs2_inode_lock_with_page(struct inode * inode,struct buffer_head ** ret_bh,int ex,struct page * page)2546e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2547ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2548ccd979bdSMark Fasheh 			      int ex,
2549ccd979bdSMark Fasheh 			      struct page *page)
2550ccd979bdSMark Fasheh {
2551ccd979bdSMark Fasheh 	int ret;
2552ccd979bdSMark Fasheh 
2553e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2554ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2555ccd979bdSMark Fasheh 		unlock_page(page);
2556ff26cc10SGang He 		/*
2557ff26cc10SGang He 		 * If we can't get inode lock immediately, we should not return
2558ff26cc10SGang He 		 * directly here, since this will lead to a softlockup problem.
2559ff26cc10SGang He 		 * The method is to get a blocking lock and immediately unlock
2560ff26cc10SGang He 		 * before returning, this can avoid CPU resource waste due to
2561ff26cc10SGang He 		 * lots of retries, and benefits fairness in getting lock.
2562ff26cc10SGang He 		 */
2563ff26cc10SGang He 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2564ff26cc10SGang He 			ocfs2_inode_unlock(inode, ex);
2565ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2566ccd979bdSMark Fasheh 	}
2567ccd979bdSMark Fasheh 
2568ccd979bdSMark Fasheh 	return ret;
2569ccd979bdSMark Fasheh }
2570ccd979bdSMark Fasheh 
ocfs2_inode_lock_atime(struct inode * inode,struct vfsmount * vfsmnt,int * level,int wait)2571e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
25727f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
2573c4c2416aSGang He 			  int *level, int wait)
25747f1a37e3STiger Yang {
25757f1a37e3STiger Yang 	int ret;
25767f1a37e3STiger Yang 
2577c4c2416aSGang He 	if (wait)
2578e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, NULL, 0);
2579c4c2416aSGang He 	else
2580c4c2416aSGang He 		ret = ocfs2_try_inode_lock(inode, NULL, 0);
2581c4c2416aSGang He 
25827f1a37e3STiger Yang 	if (ret < 0) {
2583c4c2416aSGang He 		if (ret != -EAGAIN)
25847f1a37e3STiger Yang 			mlog_errno(ret);
25857f1a37e3STiger Yang 		return ret;
25867f1a37e3STiger Yang 	}
25877f1a37e3STiger Yang 
25887f1a37e3STiger Yang 	/*
25897f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
25907f1a37e3STiger Yang 	 * otherwise we just get PR lock.
25917f1a37e3STiger Yang 	 */
25927f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
25937f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
25947f1a37e3STiger Yang 
2595e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2596c4c2416aSGang He 		if (wait)
2597e63aecb6SMark Fasheh 			ret = ocfs2_inode_lock(inode, &bh, 1);
2598c4c2416aSGang He 		else
2599c4c2416aSGang He 			ret = ocfs2_try_inode_lock(inode, &bh, 1);
2600c4c2416aSGang He 
26017f1a37e3STiger Yang 		if (ret < 0) {
2602c4c2416aSGang He 			if (ret != -EAGAIN)
26037f1a37e3STiger Yang 				mlog_errno(ret);
26047f1a37e3STiger Yang 			return ret;
26057f1a37e3STiger Yang 		}
26067f1a37e3STiger Yang 		*level = 1;
26077f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
26087f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
26097f1a37e3STiger Yang 		brelse(bh);
26107f1a37e3STiger Yang 	} else
26117f1a37e3STiger Yang 		*level = 0;
26127f1a37e3STiger Yang 
26137f1a37e3STiger Yang 	return ret;
26147f1a37e3STiger Yang }
26157f1a37e3STiger Yang 
ocfs2_inode_unlock(struct inode * inode,int ex)2616e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2617ccd979bdSMark Fasheh 		       int ex)
2618ccd979bdSMark Fasheh {
2619bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2620e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2621c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2622ccd979bdSMark Fasheh 
2623b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2624b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2625ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2626ccd979bdSMark Fasheh 
26271119d3c0Spiaojun 	if (!ocfs2_is_hard_readonly(osb) &&
2628c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
26291119d3c0Spiaojun 		ocfs2_cluster_unlock(osb, lockres, level);
2630ccd979bdSMark Fasheh }
2631ccd979bdSMark Fasheh 
2632439a36b8SEric Ren /*
2633439a36b8SEric Ren  * This _tracker variantes are introduced to deal with the recursive cluster
2634439a36b8SEric Ren  * locking issue. The idea is to keep track of a lock holder on the stack of
2635439a36b8SEric Ren  * the current process. If there's a lock holder on the stack, we know the
2636439a36b8SEric Ren  * task context is already protected by cluster locking. Currently, they're
2637439a36b8SEric Ren  * used in some VFS entry routines.
2638439a36b8SEric Ren  *
2639439a36b8SEric Ren  * return < 0 on error, return == 0 if there's no lock holder on the stack
2640439a36b8SEric Ren  * before this call, return == 1 if this call would be a recursive locking.
2641133b81f2SLarry Chen  * return == -1 if this lock attempt will cause an upgrade which is forbidden.
2642133b81f2SLarry Chen  *
2643133b81f2SLarry Chen  * When taking lock levels into account,we face some different situations.
2644133b81f2SLarry Chen  *
2645133b81f2SLarry Chen  * 1. no lock is held
2646133b81f2SLarry Chen  *    In this case, just lock the inode as requested and return 0
2647133b81f2SLarry Chen  *
2648133b81f2SLarry Chen  * 2. We are holding a lock
2649133b81f2SLarry Chen  *    For this situation, things diverges into several cases
2650133b81f2SLarry Chen  *
2651133b81f2SLarry Chen  *    wanted     holding	     what to do
2652133b81f2SLarry Chen  *    ex		ex	    see 2.1 below
2653133b81f2SLarry Chen  *    ex		pr	    see 2.2 below
2654133b81f2SLarry Chen  *    pr		ex	    see 2.1 below
2655133b81f2SLarry Chen  *    pr		pr	    see 2.1 below
2656133b81f2SLarry Chen  *
2657133b81f2SLarry Chen  *    2.1 lock level that is been held is compatible
2658133b81f2SLarry Chen  *    with the wanted level, so no lock action will be tacken.
2659133b81f2SLarry Chen  *
2660133b81f2SLarry Chen  *    2.2 Otherwise, an upgrade is needed, but it is forbidden.
2661133b81f2SLarry Chen  *
2662133b81f2SLarry Chen  * Reason why upgrade within a process is forbidden is that
2663133b81f2SLarry Chen  * lock upgrade may cause dead lock. The following illustrates
2664133b81f2SLarry Chen  * how it happens.
2665133b81f2SLarry Chen  *
2666133b81f2SLarry Chen  *         thread on node1                             thread on node2
2667133b81f2SLarry Chen  * ocfs2_inode_lock_tracker(ex=0)
2668133b81f2SLarry Chen  *
2669133b81f2SLarry Chen  *                                <======   ocfs2_inode_lock_tracker(ex=1)
2670133b81f2SLarry Chen  *
2671133b81f2SLarry Chen  * ocfs2_inode_lock_tracker(ex=1)
2672439a36b8SEric Ren  */
ocfs2_inode_lock_tracker(struct inode * inode,struct buffer_head ** ret_bh,int ex,struct ocfs2_lock_holder * oh)2673439a36b8SEric Ren int ocfs2_inode_lock_tracker(struct inode *inode,
2674439a36b8SEric Ren 			     struct buffer_head **ret_bh,
2675439a36b8SEric Ren 			     int ex,
2676439a36b8SEric Ren 			     struct ocfs2_lock_holder *oh)
2677439a36b8SEric Ren {
2678133b81f2SLarry Chen 	int status = 0;
2679439a36b8SEric Ren 	struct ocfs2_lock_res *lockres;
2680133b81f2SLarry Chen 	struct ocfs2_lock_holder *tmp_oh;
2681133b81f2SLarry Chen 	struct pid *pid = task_pid(current);
2682133b81f2SLarry Chen 
2683439a36b8SEric Ren 
2684439a36b8SEric Ren 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2685133b81f2SLarry Chen 	tmp_oh = ocfs2_pid_holder(lockres, pid);
2686439a36b8SEric Ren 
2687133b81f2SLarry Chen 	if (!tmp_oh) {
2688133b81f2SLarry Chen 		/*
2689133b81f2SLarry Chen 		 * This corresponds to the case 1.
2690133b81f2SLarry Chen 		 * We haven't got any lock before.
2691133b81f2SLarry Chen 		 */
2692133b81f2SLarry Chen 		status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0);
2693133b81f2SLarry Chen 		if (status < 0) {
2694133b81f2SLarry Chen 			if (status != -ENOENT)
2695133b81f2SLarry Chen 				mlog_errno(status);
2696133b81f2SLarry Chen 			return status;
2697133b81f2SLarry Chen 		}
2698133b81f2SLarry Chen 
2699133b81f2SLarry Chen 		oh->oh_ex = ex;
2700133b81f2SLarry Chen 		ocfs2_add_holder(lockres, oh);
2701133b81f2SLarry Chen 		return 0;
2702133b81f2SLarry Chen 	}
2703133b81f2SLarry Chen 
2704133b81f2SLarry Chen 	if (unlikely(ex && !tmp_oh->oh_ex)) {
2705133b81f2SLarry Chen 		/*
2706133b81f2SLarry Chen 		 * case 2.2 upgrade may cause dead lock, forbid it.
2707133b81f2SLarry Chen 		 */
2708133b81f2SLarry Chen 		mlog(ML_ERROR, "Recursive locking is not permitted to "
2709133b81f2SLarry Chen 		     "upgrade to EX level from PR level.\n");
2710133b81f2SLarry Chen 		dump_stack();
2711133b81f2SLarry Chen 		return -EINVAL;
2712133b81f2SLarry Chen 	}
2713133b81f2SLarry Chen 
2714133b81f2SLarry Chen 	/*
2715133b81f2SLarry Chen 	 *  case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full.
2716133b81f2SLarry Chen 	 *  ignore the lock level and just update it.
2717133b81f2SLarry Chen 	 */
2718133b81f2SLarry Chen 	if (ret_bh) {
2719133b81f2SLarry Chen 		status = ocfs2_inode_lock_full(inode, ret_bh, ex,
2720133b81f2SLarry Chen 					       OCFS2_META_LOCK_GETBH);
2721439a36b8SEric Ren 		if (status < 0) {
2722439a36b8SEric Ren 			if (status != -ENOENT)
2723439a36b8SEric Ren 				mlog_errno(status);
2724439a36b8SEric Ren 			return status;
2725439a36b8SEric Ren 		}
2726439a36b8SEric Ren 	}
27272f566394SDan Carpenter 	return 1;
2728439a36b8SEric Ren }
2729439a36b8SEric Ren 
ocfs2_inode_unlock_tracker(struct inode * inode,int ex,struct ocfs2_lock_holder * oh,int had_lock)2730439a36b8SEric Ren void ocfs2_inode_unlock_tracker(struct inode *inode,
2731439a36b8SEric Ren 				int ex,
2732439a36b8SEric Ren 				struct ocfs2_lock_holder *oh,
2733439a36b8SEric Ren 				int had_lock)
2734439a36b8SEric Ren {
2735439a36b8SEric Ren 	struct ocfs2_lock_res *lockres;
2736439a36b8SEric Ren 
2737439a36b8SEric Ren 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
27388818efaaSEric Ren 	/* had_lock means that the currect process already takes the cluster
2739133b81f2SLarry Chen 	 * lock previously.
2740133b81f2SLarry Chen 	 * If had_lock is 1, we have nothing to do here.
2741133b81f2SLarry Chen 	 * If had_lock is 0, we will release the lock.
27428818efaaSEric Ren 	 */
2743439a36b8SEric Ren 	if (!had_lock) {
2744133b81f2SLarry Chen 		ocfs2_inode_unlock(inode, oh->oh_ex);
2745439a36b8SEric Ren 		ocfs2_remove_holder(lockres, oh);
2746439a36b8SEric Ren 	}
2747439a36b8SEric Ren }
2748439a36b8SEric Ren 
ocfs2_orphan_scan_lock(struct ocfs2_super * osb,u32 * seqno)2749df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
275083273932SSrinivas Eeda {
275183273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
275283273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
275383273932SSrinivas Eeda 	int status = 0;
275483273932SSrinivas Eeda 
2755df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2756df152c24SSunil Mushran 		return -EROFS;
2757df152c24SSunil Mushran 
2758df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2759df152c24SSunil Mushran 		return 0;
2760df152c24SSunil Mushran 
276183273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2762df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
276383273932SSrinivas Eeda 	if (status < 0)
276483273932SSrinivas Eeda 		return status;
276583273932SSrinivas Eeda 
276683273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
27671c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
27681c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
276983273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
27703211949fSSunil Mushran 	else
27713211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
27723211949fSSunil Mushran 
277383273932SSrinivas Eeda 	return status;
277483273932SSrinivas Eeda }
277583273932SSrinivas Eeda 
ocfs2_orphan_scan_unlock(struct ocfs2_super * osb,u32 seqno)2776df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
277783273932SSrinivas Eeda {
277883273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
277983273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
278083273932SSrinivas Eeda 
2781df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
278283273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
278383273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
278483273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
278583273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2786df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2787df152c24SSunil Mushran 	}
278883273932SSrinivas Eeda }
278983273932SSrinivas Eeda 
ocfs2_super_lock(struct ocfs2_super * osb,int ex)2790ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2791ccd979bdSMark Fasheh 		     int ex)
2792ccd979bdSMark Fasheh {
2793c271c5c2SSunil Mushran 	int status = 0;
2794bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2795ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2796ccd979bdSMark Fasheh 
2797ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2798ccd979bdSMark Fasheh 		return -EROFS;
2799ccd979bdSMark Fasheh 
2800c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2801c271c5c2SSunil Mushran 		goto bail;
2802c271c5c2SSunil Mushran 
2803ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2804ccd979bdSMark Fasheh 	if (status < 0) {
2805ccd979bdSMark Fasheh 		mlog_errno(status);
2806ccd979bdSMark Fasheh 		goto bail;
2807ccd979bdSMark Fasheh 	}
2808ccd979bdSMark Fasheh 
2809ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2810ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2811ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2812ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2813ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2814ccd979bdSMark Fasheh 	if (status) {
28158e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2816ccd979bdSMark Fasheh 
2817ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2818ccd979bdSMark Fasheh 
28193278bb74SJunxiao Bi 		if (status < 0) {
28203278bb74SJunxiao Bi 			ocfs2_cluster_unlock(osb, lockres, level);
2821ccd979bdSMark Fasheh 			mlog_errno(status);
28223278bb74SJunxiao Bi 		}
28238ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2824ccd979bdSMark Fasheh 	}
2825ccd979bdSMark Fasheh bail:
2826ccd979bdSMark Fasheh 	return status;
2827ccd979bdSMark Fasheh }
2828ccd979bdSMark Fasheh 
ocfs2_super_unlock(struct ocfs2_super * osb,int ex)2829ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2830ccd979bdSMark Fasheh 			int ex)
2831ccd979bdSMark Fasheh {
2832bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2833ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2834ccd979bdSMark Fasheh 
2835c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2836ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2837ccd979bdSMark Fasheh }
2838ccd979bdSMark Fasheh 
ocfs2_rename_lock(struct ocfs2_super * osb)2839ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2840ccd979bdSMark Fasheh {
2841ccd979bdSMark Fasheh 	int status;
2842ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2843ccd979bdSMark Fasheh 
2844ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2845ccd979bdSMark Fasheh 		return -EROFS;
2846ccd979bdSMark Fasheh 
2847c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2848c271c5c2SSunil Mushran 		return 0;
2849c271c5c2SSunil Mushran 
2850bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2851ccd979bdSMark Fasheh 	if (status < 0)
2852ccd979bdSMark Fasheh 		mlog_errno(status);
2853ccd979bdSMark Fasheh 
2854ccd979bdSMark Fasheh 	return status;
2855ccd979bdSMark Fasheh }
2856ccd979bdSMark Fasheh 
ocfs2_rename_unlock(struct ocfs2_super * osb)2857ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2858ccd979bdSMark Fasheh {
2859ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2860ccd979bdSMark Fasheh 
2861c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2862bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2863ccd979bdSMark Fasheh }
2864ccd979bdSMark Fasheh 
ocfs2_nfs_sync_lock(struct ocfs2_super * osb,int ex)28656ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
28666ca497a8Swengang wang {
28676ca497a8Swengang wang 	int status;
28686ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
28696ca497a8Swengang wang 
28706ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
28716ca497a8Swengang wang 		return -EROFS;
28726ca497a8Swengang wang 
28734cd9973fSJunxiao Bi 	if (ex)
28744cd9973fSJunxiao Bi 		down_write(&osb->nfs_sync_rwlock);
28754cd9973fSJunxiao Bi 	else
28764cd9973fSJunxiao Bi 		down_read(&osb->nfs_sync_rwlock);
28774cd9973fSJunxiao Bi 
28786ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
28796ca497a8Swengang wang 		return 0;
28806ca497a8Swengang wang 
28816ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
28826ca497a8Swengang wang 				    0, 0);
288357c720d4SPavel Machek 	if (status < 0) {
28846ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
28856ca497a8Swengang wang 
288657c720d4SPavel Machek 		if (ex)
288757c720d4SPavel Machek 			up_write(&osb->nfs_sync_rwlock);
288857c720d4SPavel Machek 		else
288957c720d4SPavel Machek 			up_read(&osb->nfs_sync_rwlock);
289057c720d4SPavel Machek 	}
289157c720d4SPavel Machek 
28926ca497a8Swengang wang 	return status;
28936ca497a8Swengang wang }
28946ca497a8Swengang wang 
ocfs2_nfs_sync_unlock(struct ocfs2_super * osb,int ex)28956ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
28966ca497a8Swengang wang {
28976ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
28986ca497a8Swengang wang 
28996ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
29006ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
29016ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
29024cd9973fSJunxiao Bi 	if (ex)
29034cd9973fSJunxiao Bi 		up_write(&osb->nfs_sync_rwlock);
29044cd9973fSJunxiao Bi 	else
29054cd9973fSJunxiao Bi 		up_read(&osb->nfs_sync_rwlock);
29066ca497a8Swengang wang }
29076ca497a8Swengang wang 
ocfs2_trim_fs_lock(struct ocfs2_super * osb,struct ocfs2_trim_fs_info * info,int trylock)29084882abebSGang He int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
29094882abebSGang He 		       struct ocfs2_trim_fs_info *info, int trylock)
29104882abebSGang He {
29114882abebSGang He 	int status;
29124882abebSGang He 	struct ocfs2_trim_fs_lvb *lvb;
29134882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
29144882abebSGang He 
29154882abebSGang He 	if (info)
29164882abebSGang He 		info->tf_valid = 0;
29174882abebSGang He 
29184882abebSGang He 	if (ocfs2_is_hard_readonly(osb))
29194882abebSGang He 		return -EROFS;
29204882abebSGang He 
29214882abebSGang He 	if (ocfs2_mount_local(osb))
29224882abebSGang He 		return 0;
29234882abebSGang He 
29244882abebSGang He 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX,
29254882abebSGang He 				    trylock ? DLM_LKF_NOQUEUE : 0, 0);
29264882abebSGang He 	if (status < 0) {
29274882abebSGang He 		if (status != -EAGAIN)
29284882abebSGang He 			mlog_errno(status);
29294882abebSGang He 		return status;
29304882abebSGang He 	}
29314882abebSGang He 
29324882abebSGang He 	if (info) {
29334882abebSGang He 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
29344882abebSGang He 		if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
29354882abebSGang He 		    lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) {
29364882abebSGang He 			info->tf_valid = 1;
29374882abebSGang He 			info->tf_success = lvb->lvb_success;
29384882abebSGang He 			info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum);
29394882abebSGang He 			info->tf_start = be64_to_cpu(lvb->lvb_start);
29404882abebSGang He 			info->tf_len = be64_to_cpu(lvb->lvb_len);
29414882abebSGang He 			info->tf_minlen = be64_to_cpu(lvb->lvb_minlen);
29424882abebSGang He 			info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen);
29434882abebSGang He 		}
29444882abebSGang He 	}
29454882abebSGang He 
29464882abebSGang He 	return status;
29474882abebSGang He }
29484882abebSGang He 
ocfs2_trim_fs_unlock(struct ocfs2_super * osb,struct ocfs2_trim_fs_info * info)29494882abebSGang He void ocfs2_trim_fs_unlock(struct ocfs2_super *osb,
29504882abebSGang He 			  struct ocfs2_trim_fs_info *info)
29514882abebSGang He {
29524882abebSGang He 	struct ocfs2_trim_fs_lvb *lvb;
29534882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
29544882abebSGang He 
29554882abebSGang He 	if (ocfs2_mount_local(osb))
29564882abebSGang He 		return;
29574882abebSGang He 
29584882abebSGang He 	if (info) {
29594882abebSGang He 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
29604882abebSGang He 		lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION;
29614882abebSGang He 		lvb->lvb_success = info->tf_success;
29624882abebSGang He 		lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum);
29634882abebSGang He 		lvb->lvb_start = cpu_to_be64(info->tf_start);
29644882abebSGang He 		lvb->lvb_len = cpu_to_be64(info->tf_len);
29654882abebSGang He 		lvb->lvb_minlen = cpu_to_be64(info->tf_minlen);
29664882abebSGang He 		lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen);
29674882abebSGang He 	}
29684882abebSGang He 
29694882abebSGang He 	ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
29704882abebSGang He }
29714882abebSGang He 
ocfs2_dentry_lock(struct dentry * dentry,int ex)2972d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2973d680efe9SMark Fasheh {
2974d680efe9SMark Fasheh 	int ret;
2975bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2976d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2977d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2978d680efe9SMark Fasheh 
2979d680efe9SMark Fasheh 	BUG_ON(!dl);
2980d680efe9SMark Fasheh 
298103efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
298203efed8aSTiger Yang 		if (ex)
2983d680efe9SMark Fasheh 			return -EROFS;
298403efed8aSTiger Yang 		return 0;
298503efed8aSTiger Yang 	}
2986d680efe9SMark Fasheh 
2987c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2988c271c5c2SSunil Mushran 		return 0;
2989c271c5c2SSunil Mushran 
2990d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2991d680efe9SMark Fasheh 	if (ret < 0)
2992d680efe9SMark Fasheh 		mlog_errno(ret);
2993d680efe9SMark Fasheh 
2994d680efe9SMark Fasheh 	return ret;
2995d680efe9SMark Fasheh }
2996d680efe9SMark Fasheh 
ocfs2_dentry_unlock(struct dentry * dentry,int ex)2997d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2998d680efe9SMark Fasheh {
2999bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3000d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
3001d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
3002d680efe9SMark Fasheh 
300303efed8aSTiger Yang 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3004d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
3005d680efe9SMark Fasheh }
3006d680efe9SMark Fasheh 
3007ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
3008ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
3009ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
ocfs2_dlm_debug_free(struct kref * kref)3010ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
3011ccd979bdSMark Fasheh {
3012ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
3013ccd979bdSMark Fasheh 
3014ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
3015ccd979bdSMark Fasheh 
3016ccd979bdSMark Fasheh 	kfree(dlm_debug);
3017ccd979bdSMark Fasheh }
3018ccd979bdSMark Fasheh 
ocfs2_put_dlm_debug(struct ocfs2_dlm_debug * dlm_debug)3019ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
3020ccd979bdSMark Fasheh {
3021ccd979bdSMark Fasheh 	if (dlm_debug)
3022ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
3023ccd979bdSMark Fasheh }
3024ccd979bdSMark Fasheh 
ocfs2_get_dlm_debug(struct ocfs2_dlm_debug * debug)3025ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
3026ccd979bdSMark Fasheh {
3027ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
3028ccd979bdSMark Fasheh }
3029ccd979bdSMark Fasheh 
ocfs2_new_dlm_debug(void)3030ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
3031ccd979bdSMark Fasheh {
3032ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
3033ccd979bdSMark Fasheh 
3034ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
3035ccd979bdSMark Fasheh 	if (!dlm_debug) {
3036ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
3037ccd979bdSMark Fasheh 		goto out;
3038ccd979bdSMark Fasheh 	}
3039ccd979bdSMark Fasheh 
3040ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
3041ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
30428056773aSGang He 	dlm_debug->d_filter_secs = 0;
3043ccd979bdSMark Fasheh out:
3044ccd979bdSMark Fasheh 	return dlm_debug;
3045ccd979bdSMark Fasheh }
3046ccd979bdSMark Fasheh 
3047ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
3048ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
3049ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
3050ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
3051ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
3052ccd979bdSMark Fasheh };
3053ccd979bdSMark Fasheh 
ocfs2_dlm_next_res(struct ocfs2_lock_res * start,struct ocfs2_dlm_seq_priv * priv)3054ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
3055ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
3056ccd979bdSMark Fasheh {
3057ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
3058ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
3059ccd979bdSMark Fasheh 
3060ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
3061ccd979bdSMark Fasheh 
3062ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
3063ccd979bdSMark Fasheh 		/* discover the head of the list */
3064ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
3065ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
3066ccd979bdSMark Fasheh 			break;
3067ccd979bdSMark Fasheh 		}
3068ccd979bdSMark Fasheh 
3069ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
3070ccd979bdSMark Fasheh 		 * l_ops field. */
3071ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
3072ccd979bdSMark Fasheh 			ret = iter;
3073ccd979bdSMark Fasheh 			break;
3074ccd979bdSMark Fasheh 		}
3075ccd979bdSMark Fasheh 	}
3076ccd979bdSMark Fasheh 
3077ccd979bdSMark Fasheh 	return ret;
3078ccd979bdSMark Fasheh }
3079ccd979bdSMark Fasheh 
ocfs2_dlm_seq_start(struct seq_file * m,loff_t * pos)3080ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
3081ccd979bdSMark Fasheh {
3082ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
3083ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
3084ccd979bdSMark Fasheh 
3085ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
3086ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
3087ccd979bdSMark Fasheh 	if (iter) {
3088ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
3089ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
3090ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
3091ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
3092ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
3093ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
3094ccd979bdSMark Fasheh 		 * in them. */
3095ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
3096ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
3097ccd979bdSMark Fasheh 	}
3098ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
3099ccd979bdSMark Fasheh 
3100ccd979bdSMark Fasheh 	return iter;
3101ccd979bdSMark Fasheh }
3102ccd979bdSMark Fasheh 
ocfs2_dlm_seq_stop(struct seq_file * m,void * v)3103ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
3104ccd979bdSMark Fasheh {
3105ccd979bdSMark Fasheh }
3106ccd979bdSMark Fasheh 
ocfs2_dlm_seq_next(struct seq_file * m,void * v,loff_t * pos)3107ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
3108ccd979bdSMark Fasheh {
3109ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
3110ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
3111ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
3112ccd979bdSMark Fasheh 
3113ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
3114ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
3115ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
3116ccd979bdSMark Fasheh 	if (iter) {
3117ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
3118ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
3119ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
3120ccd979bdSMark Fasheh 	}
3121ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
3122ccd979bdSMark Fasheh 
3123ccd979bdSMark Fasheh 	return iter;
3124ccd979bdSMark Fasheh }
3125ccd979bdSMark Fasheh 
31265bc970e8SSunil Mushran /*
31275bc970e8SSunil Mushran  * Version is used by debugfs.ocfs2 to determine the format being used
31285bc970e8SSunil Mushran  *
31295bc970e8SSunil Mushran  * New in version 2
31305bc970e8SSunil Mushran  *	- Lock stats printed
31315bc970e8SSunil Mushran  * New in version 3
31325bc970e8SSunil Mushran  *	- Max time in lock stats is in usecs (instead of nsecs)
31338a7f5f4cSGang He  * New in version 4
31345da844a2SGang He  *	- Add last pr/ex unlock times and first lock wait time in usecs
31355bc970e8SSunil Mushran  */
31368a7f5f4cSGang He #define OCFS2_DLM_DEBUG_STR_VERSION 4
ocfs2_dlm_seq_show(struct seq_file * m,void * v)3137ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
3138ccd979bdSMark Fasheh {
3139ccd979bdSMark Fasheh 	int i;
3140ccd979bdSMark Fasheh 	char *lvb;
3141ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
31428056773aSGang He #ifdef CONFIG_OCFS2_FS_STATS
31438056773aSGang He 	u64 now, last;
31448056773aSGang He 	struct ocfs2_dlm_debug *dlm_debug =
31458056773aSGang He 			((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug;
31468056773aSGang He #endif
3147ccd979bdSMark Fasheh 
3148ccd979bdSMark Fasheh 	if (!lockres)
3149ccd979bdSMark Fasheh 		return -EINVAL;
3150ccd979bdSMark Fasheh 
31518056773aSGang He #ifdef CONFIG_OCFS2_FS_STATS
31525da844a2SGang He 	if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
31538056773aSGang He 		now = ktime_to_us(ktime_get_real());
31548056773aSGang He 		if (lockres->l_lock_prmode.ls_last >
31558056773aSGang He 		    lockres->l_lock_exmode.ls_last)
31568056773aSGang He 			last = lockres->l_lock_prmode.ls_last;
31578056773aSGang He 		else
31588056773aSGang He 			last = lockres->l_lock_exmode.ls_last;
31598056773aSGang He 		/*
31608056773aSGang He 		 * Use d_filter_secs field to filter lock resources dump,
31618056773aSGang He 		 * the default d_filter_secs(0) value filters nothing,
31628056773aSGang He 		 * otherwise, only dump the last N seconds active lock
31638056773aSGang He 		 * resources.
31648056773aSGang He 		 */
31658056773aSGang He 		if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs)
31668056773aSGang He 			return 0;
31678056773aSGang He 	}
31688056773aSGang He #endif
31698056773aSGang He 
3170d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
3171d680efe9SMark Fasheh 
3172d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
3173d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
3174d680efe9SMark Fasheh 			   lockres->l_name,
3175d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
3176d680efe9SMark Fasheh 	else
3177d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
3178d680efe9SMark Fasheh 
3179d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
3180ccd979bdSMark Fasheh 		   "0x%lx\t"
3181ccd979bdSMark Fasheh 		   "0x%x\t"
3182ccd979bdSMark Fasheh 		   "0x%x\t"
3183ccd979bdSMark Fasheh 		   "%u\t"
3184ccd979bdSMark Fasheh 		   "%u\t"
3185ccd979bdSMark Fasheh 		   "%d\t"
3186ccd979bdSMark Fasheh 		   "%d\t",
3187ccd979bdSMark Fasheh 		   lockres->l_level,
3188ccd979bdSMark Fasheh 		   lockres->l_flags,
3189ccd979bdSMark Fasheh 		   lockres->l_action,
3190ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
3191ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
3192ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
3193ccd979bdSMark Fasheh 		   lockres->l_requested,
3194ccd979bdSMark Fasheh 		   lockres->l_blocking);
3195ccd979bdSMark Fasheh 
3196ccd979bdSMark Fasheh 	/* Dump the raw LVB */
31978f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3198ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
3199ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
3200ccd979bdSMark Fasheh 
32018ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
32025bc970e8SSunil Mushran # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
32035bc970e8SSunil Mushran # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
32045bc970e8SSunil Mushran # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
32055bc970e8SSunil Mushran # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
32065bc970e8SSunil Mushran # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
32075bc970e8SSunil Mushran # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
32085bc970e8SSunil Mushran # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
32095bc970e8SSunil Mushran # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
32105bc970e8SSunil Mushran # define lock_refresh(_l)		((_l)->l_lock_refresh)
32118a7f5f4cSGang He # define lock_last_prmode(_l)		((_l)->l_lock_prmode.ls_last)
32128a7f5f4cSGang He # define lock_last_exmode(_l)		((_l)->l_lock_exmode.ls_last)
32135da844a2SGang He # define lock_wait(_l)			((_l)->l_lock_wait)
32148ddb7b00SSunil Mushran #else
32155bc970e8SSunil Mushran # define lock_num_prmode(_l)		(0)
32165bc970e8SSunil Mushran # define lock_num_exmode(_l)		(0)
32178ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
32188ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
3219dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
3220dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
32218ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
32228ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
32238ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
32248a7f5f4cSGang He # define lock_last_prmode(_l)		(0ULL)
32258a7f5f4cSGang He # define lock_last_exmode(_l)		(0ULL)
32265da844a2SGang He # define lock_wait(_l)			(0ULL)
32278ddb7b00SSunil Mushran #endif
32288ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
32295bc970e8SSunil Mushran 	seq_printf(m, "%u\t"
32305bc970e8SSunil Mushran 		   "%u\t"
32318ddb7b00SSunil Mushran 		   "%u\t"
32328ddb7b00SSunil Mushran 		   "%u\t"
32338ddb7b00SSunil Mushran 		   "%llu\t"
32348ddb7b00SSunil Mushran 		   "%llu\t"
32358ddb7b00SSunil Mushran 		   "%u\t"
32368ddb7b00SSunil Mushran 		   "%u\t"
32378a7f5f4cSGang He 		   "%u\t"
32388a7f5f4cSGang He 		   "%llu\t"
32395da844a2SGang He 		   "%llu\t"
32408a7f5f4cSGang He 		   "%llu\t",
32418ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
32428ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
32438ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
32448ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
32458ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
32468ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
32478ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
32488ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
32498a7f5f4cSGang He 		   lock_refresh(lockres),
32508a7f5f4cSGang He 		   lock_last_prmode(lockres),
32515da844a2SGang He 		   lock_last_exmode(lockres),
32525da844a2SGang He 		   lock_wait(lockres));
32538ddb7b00SSunil Mushran 
3254ccd979bdSMark Fasheh 	/* End the line */
3255ccd979bdSMark Fasheh 	seq_printf(m, "\n");
3256ccd979bdSMark Fasheh 	return 0;
3257ccd979bdSMark Fasheh }
3258ccd979bdSMark Fasheh 
325990d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
3260ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
3261ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
3262ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
3263ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
3264ccd979bdSMark Fasheh };
3265ccd979bdSMark Fasheh 
ocfs2_dlm_debug_release(struct inode * inode,struct file * file)3266ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
3267ccd979bdSMark Fasheh {
326833fa1d90SJoe Perches 	struct seq_file *seq = file->private_data;
3269ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
3270ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
3271ccd979bdSMark Fasheh 
3272ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
3273ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
3274ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
3275ccd979bdSMark Fasheh }
3276ccd979bdSMark Fasheh 
ocfs2_dlm_debug_open(struct inode * inode,struct file * file)3277ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
3278ccd979bdSMark Fasheh {
3279ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
3280ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
3281ccd979bdSMark Fasheh 
32821848cb55SRob Jones 	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
3283ccd979bdSMark Fasheh 	if (!priv) {
32841848cb55SRob Jones 		mlog_errno(-ENOMEM);
32851848cb55SRob Jones 		return -ENOMEM;
3286ccd979bdSMark Fasheh 	}
32871848cb55SRob Jones 
32888e18e294STheodore Ts'o 	osb = inode->i_private;
3289ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
3290ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
3291ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
3292ccd979bdSMark Fasheh 
3293ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
3294ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
3295ccd979bdSMark Fasheh 
32961848cb55SRob Jones 	return 0;
3297ccd979bdSMark Fasheh }
3298ccd979bdSMark Fasheh 
32994b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
3300ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
3301ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
3302ccd979bdSMark Fasheh 	.read =		seq_read,
3303ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
3304ccd979bdSMark Fasheh };
3305ccd979bdSMark Fasheh 
ocfs2_dlm_init_debug(struct ocfs2_super * osb)3306e581595eSGreg Kroah-Hartman static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
3307ccd979bdSMark Fasheh {
3308ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3309ccd979bdSMark Fasheh 
33105e7a3ed9SGreg Kroah-Hartman 	debugfs_create_file("locking_state", S_IFREG|S_IRUSR,
33115e7a3ed9SGreg Kroah-Hartman 			    osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops);
3312ccd979bdSMark Fasheh 
33135e7a3ed9SGreg Kroah-Hartman 	debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
33148056773aSGang He 			   &dlm_debug->d_filter_secs);
3315b73eba2aSGang He 	ocfs2_get_dlm_debug(dlm_debug);
3316ccd979bdSMark Fasheh }
3317ccd979bdSMark Fasheh 
ocfs2_dlm_shutdown_debug(struct ocfs2_super * osb)3318ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
3319ccd979bdSMark Fasheh {
3320ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3321ccd979bdSMark Fasheh 
33225e7a3ed9SGreg Kroah-Hartman 	if (dlm_debug)
3323ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
3324ccd979bdSMark Fasheh }
3325ccd979bdSMark Fasheh 
ocfs2_dlm_init(struct ocfs2_super * osb)3326ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
3327ccd979bdSMark Fasheh {
3328c271c5c2SSunil Mushran 	int status = 0;
33294670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
3330ccd979bdSMark Fasheh 
33310abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
33320abd6d18SMark Fasheh 		osb->node_num = 0;
3333c271c5c2SSunil Mushran 		goto local;
33340abd6d18SMark Fasheh 	}
3335c271c5c2SSunil Mushran 
3336e581595eSGreg Kroah-Hartman 	ocfs2_dlm_init_debug(osb);
3337ccd979bdSMark Fasheh 
333834d024f8SMark Fasheh 	/* launch downconvert thread */
33395afc44e2SJoseph Qi 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
33405afc44e2SJoseph Qi 			osb->uuid_str);
334134d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
334234d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
334334d024f8SMark Fasheh 		osb->dc_task = NULL;
3344ccd979bdSMark Fasheh 		mlog_errno(status);
3345ccd979bdSMark Fasheh 		goto bail;
3346ccd979bdSMark Fasheh 	}
3347ccd979bdSMark Fasheh 
3348ccd979bdSMark Fasheh 	/* for now, uuid == domain */
33499c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
3350c74a3bddSGoldwyn Rodrigues 				       osb->osb_cluster_name,
3351c74a3bddSGoldwyn Rodrigues 				       strlen(osb->osb_cluster_name),
33529c6c877cSJoel Becker 				       osb->uuid_str,
33534670c46dSJoel Becker 				       strlen(osb->uuid_str),
3354553b5eb9SJoel Becker 				       &lproto, ocfs2_do_node_down, osb,
33554670c46dSJoel Becker 				       &conn);
33564670c46dSJoel Becker 	if (status) {
3357ccd979bdSMark Fasheh 		mlog_errno(status);
3358ccd979bdSMark Fasheh 		goto bail;
3359ccd979bdSMark Fasheh 	}
3360ccd979bdSMark Fasheh 
33613e834151SGoldwyn Rodrigues 	status = ocfs2_cluster_this_node(conn, &osb->node_num);
33620abd6d18SMark Fasheh 	if (status < 0) {
33630abd6d18SMark Fasheh 		mlog_errno(status);
33640abd6d18SMark Fasheh 		mlog(ML_ERROR,
33650abd6d18SMark Fasheh 		     "could not find this host's node number\n");
3366286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
33670abd6d18SMark Fasheh 		goto bail;
33680abd6d18SMark Fasheh 	}
33690abd6d18SMark Fasheh 
3370c271c5c2SSunil Mushran local:
3371ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3372ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
33734cd9973fSJunxiao Bi 	ocfs2_nfs_sync_lock_init(osb);
337483273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3375ccd979bdSMark Fasheh 
33764670c46dSJoel Becker 	osb->cconn = conn;
3377ccd979bdSMark Fasheh bail:
3378ccd979bdSMark Fasheh 	if (status < 0) {
3379ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
338034d024f8SMark Fasheh 		if (osb->dc_task)
338134d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3382ccd979bdSMark Fasheh 	}
3383ccd979bdSMark Fasheh 
3384ccd979bdSMark Fasheh 	return status;
3385ccd979bdSMark Fasheh }
3386ccd979bdSMark Fasheh 
ocfs2_dlm_shutdown(struct ocfs2_super * osb,int hangup_pending)3387286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3388286eaa95SJoel Becker 			int hangup_pending)
3389ccd979bdSMark Fasheh {
3390ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3391ccd979bdSMark Fasheh 
33924670c46dSJoel Becker 	/*
33934670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
33944670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
33954670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
33964670c46dSJoel Becker 	 */
33974670c46dSJoel Becker 
339834d024f8SMark Fasheh 	if (osb->dc_task) {
339934d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
340034d024f8SMark Fasheh 		osb->dc_task = NULL;
3401ccd979bdSMark Fasheh 	}
3402ccd979bdSMark Fasheh 
3403ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3404ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
34056ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
340683273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3407ccd979bdSMark Fasheh 
3408550842ccSHeming Zhao 	if (osb->cconn) {
3409286eaa95SJoel Becker 		ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
34104670c46dSJoel Becker 		osb->cconn = NULL;
3411ccd979bdSMark Fasheh 
3412ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
3413ccd979bdSMark Fasheh 	}
3414550842ccSHeming Zhao }
3415ccd979bdSMark Fasheh 
ocfs2_drop_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)3416ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
34170d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3418ccd979bdSMark Fasheh {
34197431cd7eSJoel Becker 	int ret;
3420ccd979bdSMark Fasheh 	unsigned long flags;
3421bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3422ccd979bdSMark Fasheh 
3423ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3424ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3425ccd979bdSMark Fasheh 		goto out;
3426ccd979bdSMark Fasheh 
3427b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3428bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3429b80fc012SMark Fasheh 
3430ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3431ccd979bdSMark Fasheh 
3432ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3433ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3434ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3435ccd979bdSMark Fasheh 
3436ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3437ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3438ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3439ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3440ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3441ccd979bdSMark Fasheh 
3442ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3443ccd979bdSMark Fasheh 
3444ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3445ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3446ccd979bdSMark Fasheh 		 * future? */
3447ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3448ccd979bdSMark Fasheh 
3449ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3450ccd979bdSMark Fasheh 	}
3451ccd979bdSMark Fasheh 
34520d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
34530d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3454bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
34550d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
34560d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
34570d5dc6c2SMark Fasheh 	}
3458ccd979bdSMark Fasheh 
3459ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3460ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3461ccd979bdSMark Fasheh 		     lockres->l_name);
3462ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3463ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3464ccd979bdSMark Fasheh 
3465ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3466ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3467ccd979bdSMark Fasheh 		goto out;
3468ccd979bdSMark Fasheh 	}
3469ccd979bdSMark Fasheh 
3470ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3471ccd979bdSMark Fasheh 
3472ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3473ccd979bdSMark Fasheh 	 * fire. */
3474ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3475ccd979bdSMark Fasheh 
3476ccd979bdSMark Fasheh 	/* is this necessary? */
3477ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3478ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3479ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3480ccd979bdSMark Fasheh 
3481ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3482ccd979bdSMark Fasheh 
3483a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
34847431cd7eSJoel Becker 	if (ret) {
34857431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3486ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3487cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3488ccd979bdSMark Fasheh 		BUG();
3489ccd979bdSMark Fasheh 	}
349073ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3491ccd979bdSMark Fasheh 	     lockres->l_name);
3492ccd979bdSMark Fasheh 
3493ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3494ccd979bdSMark Fasheh out:
3495ccd979bdSMark Fasheh 	return 0;
3496ccd979bdSMark Fasheh }
3497ccd979bdSMark Fasheh 
349884d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
349984d86f83SJan Kara 				       struct ocfs2_lock_res *lockres);
350084d86f83SJan Kara 
3501ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3502ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
350334d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3504ccd979bdSMark Fasheh  * it safe to drop.
3505ccd979bdSMark Fasheh  *
3506ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
ocfs2_mark_lockres_freeing(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)350784d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
350884d86f83SJan Kara 				struct ocfs2_lock_res *lockres)
3509ccd979bdSMark Fasheh {
3510ccd979bdSMark Fasheh 	int status;
3511ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
351284d86f83SJan Kara 	unsigned long flags, flags2;
3513ccd979bdSMark Fasheh 
3514ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3515ccd979bdSMark Fasheh 
3516ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3517ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
351884d86f83SJan Kara 	if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
351984d86f83SJan Kara 		/*
352084d86f83SJan Kara 		 * We know the downconvert is queued but not in progress
352184d86f83SJan Kara 		 * because we are the downconvert thread and processing
352284d86f83SJan Kara 		 * different lock. So we can just remove the lock from the
352384d86f83SJan Kara 		 * queue. This is not only an optimization but also a way
352484d86f83SJan Kara 		 * to avoid the following deadlock:
352584d86f83SJan Kara 		 *   ocfs2_dentry_post_unlock()
352684d86f83SJan Kara 		 *     ocfs2_dentry_lock_put()
352784d86f83SJan Kara 		 *       ocfs2_drop_dentry_lock()
352884d86f83SJan Kara 		 *         iput()
352984d86f83SJan Kara 		 *           ocfs2_evict_inode()
353084d86f83SJan Kara 		 *             ocfs2_clear_inode()
353184d86f83SJan Kara 		 *               ocfs2_mark_lockres_freeing()
353284d86f83SJan Kara 		 *                 ... blocks waiting for OCFS2_LOCK_QUEUED
353384d86f83SJan Kara 		 *                 since we are the downconvert thread which
353484d86f83SJan Kara 		 *                 should clear the flag.
353584d86f83SJan Kara 		 */
353684d86f83SJan Kara 		spin_unlock_irqrestore(&lockres->l_lock, flags);
353784d86f83SJan Kara 		spin_lock_irqsave(&osb->dc_task_lock, flags2);
353884d86f83SJan Kara 		list_del_init(&lockres->l_blocked_list);
353984d86f83SJan Kara 		osb->blocked_lock_count--;
354084d86f83SJan Kara 		spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
354184d86f83SJan Kara 		/*
354284d86f83SJan Kara 		 * Warn if we recurse into another post_unlock call.  Strictly
354384d86f83SJan Kara 		 * speaking it isn't a problem but we need to be careful if
354484d86f83SJan Kara 		 * that happens (stack overflow, deadlocks, ...) so warn if
354584d86f83SJan Kara 		 * ocfs2 grows a path for which this can happen.
354684d86f83SJan Kara 		 */
354784d86f83SJan Kara 		WARN_ON_ONCE(lockres->l_ops->post_unlock);
354884d86f83SJan Kara 		/* Since the lock is freeing we don't do much in the fn below */
354984d86f83SJan Kara 		ocfs2_process_blocked_lock(osb, lockres);
355084d86f83SJan Kara 		return;
355184d86f83SJan Kara 	}
3552ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3553ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3554ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3555ccd979bdSMark Fasheh 
3556ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3557ccd979bdSMark Fasheh 
3558ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3559ccd979bdSMark Fasheh 		if (status)
3560ccd979bdSMark Fasheh 			mlog_errno(status);
3561ccd979bdSMark Fasheh 
3562ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3563ccd979bdSMark Fasheh 	}
3564ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3565ccd979bdSMark Fasheh }
3566ccd979bdSMark Fasheh 
ocfs2_simple_drop_lockres(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)3567d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3568d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3569d680efe9SMark Fasheh {
3570d680efe9SMark Fasheh 	int ret;
3571d680efe9SMark Fasheh 
357284d86f83SJan Kara 	ocfs2_mark_lockres_freeing(osb, lockres);
35730d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3574d680efe9SMark Fasheh 	if (ret)
3575d680efe9SMark Fasheh 		mlog_errno(ret);
3576d680efe9SMark Fasheh }
3577d680efe9SMark Fasheh 
ocfs2_drop_osb_locks(struct ocfs2_super * osb)3578ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3579ccd979bdSMark Fasheh {
3580d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3581d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
35826ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
358383273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3584ccd979bdSMark Fasheh }
3585ccd979bdSMark Fasheh 
ocfs2_drop_inode_locks(struct inode * inode)3586ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3587ccd979bdSMark Fasheh {
3588ccd979bdSMark Fasheh 	int status, err;
3589ccd979bdSMark Fasheh 
3590ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3591ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3592ccd979bdSMark Fasheh 
3593ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
359450008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3595ccd979bdSMark Fasheh 	if (err < 0)
3596ccd979bdSMark Fasheh 		mlog_errno(err);
3597ccd979bdSMark Fasheh 
3598ccd979bdSMark Fasheh 	status = err;
3599ccd979bdSMark Fasheh 
3600ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3601e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3602ccd979bdSMark Fasheh 	if (err < 0)
3603ccd979bdSMark Fasheh 		mlog_errno(err);
3604ccd979bdSMark Fasheh 	if (err < 0 && !status)
3605ccd979bdSMark Fasheh 		status = err;
3606ccd979bdSMark Fasheh 
3607ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
36080d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3609ccd979bdSMark Fasheh 	if (err < 0)
3610ccd979bdSMark Fasheh 		mlog_errno(err);
3611ccd979bdSMark Fasheh 	if (err < 0 && !status)
3612ccd979bdSMark Fasheh 		status = err;
3613ccd979bdSMark Fasheh 
3614ccd979bdSMark Fasheh 	return status;
3615ccd979bdSMark Fasheh }
3616ccd979bdSMark Fasheh 
ocfs2_prepare_downconvert(struct ocfs2_lock_res * lockres,int new_level)3617de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3618ccd979bdSMark Fasheh 					      int new_level)
3619ccd979bdSMark Fasheh {
3620ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3621ccd979bdSMark Fasheh 
3622bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3623ccd979bdSMark Fasheh 
3624ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
36259b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
36269b915181SSunil Mushran 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
36279b915181SSunil Mushran 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
36289b915181SSunil Mushran 		     new_level, list_empty(&lockres->l_blocked_list),
36299b915181SSunil Mushran 		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
36309b915181SSunil Mushran 		     lockres->l_flags, lockres->l_ro_holders,
36319b915181SSunil Mushran 		     lockres->l_ex_holders, lockres->l_action,
36329b915181SSunil Mushran 		     lockres->l_unlock_action, lockres->l_requested,
36339b915181SSunil Mushran 		     lockres->l_blocking, lockres->l_pending_gen);
3634ccd979bdSMark Fasheh 		BUG();
3635ccd979bdSMark Fasheh 	}
3636ccd979bdSMark Fasheh 
36379b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
36389b915181SSunil Mushran 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3639ccd979bdSMark Fasheh 
3640ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3641ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3642ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3643de551246SJoel Becker 	return lockres_set_pending(lockres);
3644ccd979bdSMark Fasheh }
3645ccd979bdSMark Fasheh 
ocfs2_downconvert_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,int new_level,int lvb,unsigned int generation)3646ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3647ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3648ccd979bdSMark Fasheh 				  int new_level,
3649de551246SJoel Becker 				  int lvb,
3650de551246SJoel Becker 				  unsigned int generation)
3651ccd979bdSMark Fasheh {
3652bd3e7610SJoel Becker 	int ret;
3653bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3654ccd979bdSMark Fasheh 
36559b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
36569b915181SSunil Mushran 	     lockres->l_level, new_level);
36579b915181SSunil Mushran 
3658e7ee2c08SEric Ren 	/*
3659e7ee2c08SEric Ren 	 * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
3660e7ee2c08SEric Ren 	 * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
3661e7ee2c08SEric Ren 	 * we can recover correctly from node failure. Otherwise, we may get
3662793057e1SIngo Molnar 	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
3663e7ee2c08SEric Ren 	 */
3664a6346447SGang He 	if (ocfs2_userspace_stack(osb) &&
3665e7ee2c08SEric Ren 	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3666e7ee2c08SEric Ren 		lvb = 1;
3667e7ee2c08SEric Ren 
3668ccd979bdSMark Fasheh 	if (lvb)
3669bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3670ccd979bdSMark Fasheh 
36714670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3672ccd979bdSMark Fasheh 			     new_level,
3673ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3674ccd979bdSMark Fasheh 			     dlm_flags,
3675ccd979bdSMark Fasheh 			     lockres->l_name,
3676a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3677de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
36787431cd7eSJoel Becker 	if (ret) {
36797431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3680ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3681ccd979bdSMark Fasheh 		goto bail;
3682ccd979bdSMark Fasheh 	}
3683ccd979bdSMark Fasheh 
3684ccd979bdSMark Fasheh 	ret = 0;
3685ccd979bdSMark Fasheh bail:
3686ccd979bdSMark Fasheh 	return ret;
3687ccd979bdSMark Fasheh }
3688ccd979bdSMark Fasheh 
368924ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
ocfs2_prepare_cancel_convert(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)3690ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3691ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3692ccd979bdSMark Fasheh {
3693ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3694ccd979bdSMark Fasheh 
3695ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3696ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3697ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3698ccd979bdSMark Fasheh 		 * requeue this lock. */
36999b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3700ccd979bdSMark Fasheh 		return 0;
3701ccd979bdSMark Fasheh 	}
3702ccd979bdSMark Fasheh 
3703ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3704ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3705ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3706ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3707ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3708ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3709ccd979bdSMark Fasheh 
3710ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3711ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3712ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3713ccd979bdSMark Fasheh 
37149b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
37159b915181SSunil Mushran 
3716ccd979bdSMark Fasheh 	return 1;
3717ccd979bdSMark Fasheh }
3718ccd979bdSMark Fasheh 
ocfs2_cancel_convert(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)3719ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3720ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3721ccd979bdSMark Fasheh {
3722ccd979bdSMark Fasheh 	int ret;
3723ccd979bdSMark Fasheh 
37244670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3725a796d286SJoel Becker 			       DLM_LKF_CANCEL);
37267431cd7eSJoel Becker 	if (ret) {
37277431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3728ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3729ccd979bdSMark Fasheh 	}
3730ccd979bdSMark Fasheh 
37319b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3732ccd979bdSMark Fasheh 
3733ccd979bdSMark Fasheh 	return ret;
3734ccd979bdSMark Fasheh }
3735ccd979bdSMark Fasheh 
ocfs2_unblock_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres,struct ocfs2_unblock_ctl * ctl)3736b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3737ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3738cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3739ccd979bdSMark Fasheh {
3740ccd979bdSMark Fasheh 	unsigned long flags;
3741ccd979bdSMark Fasheh 	int blocking;
3742ccd979bdSMark Fasheh 	int new_level;
3743079b8057SSunil Mushran 	int level;
3744ccd979bdSMark Fasheh 	int ret = 0;
37455ef0d4eaSMark Fasheh 	int set_lvb = 0;
3746de551246SJoel Becker 	unsigned int gen;
3747ccd979bdSMark Fasheh 
3748ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3749ccd979bdSMark Fasheh 
3750ccd979bdSMark Fasheh recheck:
3751db0f6ce6SSunil Mushran 	/*
3752db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3753db0f6ce6SSunil Mushran 	 */
3754db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3755db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3756db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3757db0f6ce6SSunil Mushran 		ret = 0;
3758db0f6ce6SSunil Mushran 		goto leave;
3759db0f6ce6SSunil Mushran 	}
3760db0f6ce6SSunil Mushran 
3761ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3762de551246SJoel Becker 		/* XXX
3763de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3764de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3765de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3766de551246SJoel Becker 		 *
3767de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3768de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3769de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3770de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3771de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3772de551246SJoel Becker 		 *
3773de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3774de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3775de551246SJoel Becker 		 *
3776de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3777de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3778de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3779de551246SJoel Becker 		 * we then cancel their request.
3780de551246SJoel Becker 		 *
3781de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3782de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3783de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3784de551246SJoel Becker 		 */
37859b915181SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
37869b915181SSunil Mushran 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
37879b915181SSunil Mushran 			     lockres->l_name);
3788de551246SJoel Becker 			goto leave_requeue;
37899b915181SSunil Mushran 		}
3790de551246SJoel Becker 
3791d680efe9SMark Fasheh 		ctl->requeue = 1;
3792ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3793ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3794ccd979bdSMark Fasheh 		if (ret) {
3795ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3796ccd979bdSMark Fasheh 			if (ret < 0)
3797ccd979bdSMark Fasheh 				mlog_errno(ret);
3798ccd979bdSMark Fasheh 		}
3799ccd979bdSMark Fasheh 		goto leave;
3800ccd979bdSMark Fasheh 	}
3801ccd979bdSMark Fasheh 
3802a1912826SSunil Mushran 	/*
3803a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3804a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3805a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3806a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3807a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3808a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3809a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3810a1912826SSunil Mushran 	 */
3811a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3812a1912826SSunil Mushran 		goto leave_requeue;
3813a1912826SSunil Mushran 
38140d74125aSSunil Mushran 	/*
38150d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
38160d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
38170d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
38180d74125aSSunil Mushran 	 */
38190d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
38200d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
38219b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
38220d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
38230d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
38240d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
38250d74125aSSunil Mushran 		goto leave;
38260d74125aSSunil Mushran 	}
38270d74125aSSunil Mushran 
3828ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3829ccd979bdSMark Fasheh 	 * then requeue. */
3830bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
38319b915181SSunil Mushran 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
38329b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
38339b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders,
38349b915181SSunil Mushran 		     lockres->l_ro_holders);
3835f7fbfdd1SMark Fasheh 		goto leave_requeue;
38369b915181SSunil Mushran 	}
3837ccd979bdSMark Fasheh 
3838ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3839ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3840bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
38419b915181SSunil Mushran 	    lockres->l_ex_holders) {
38429b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
38439b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders);
3844f7fbfdd1SMark Fasheh 		goto leave_requeue;
38459b915181SSunil Mushran 	}
3846f7fbfdd1SMark Fasheh 
3847f7fbfdd1SMark Fasheh 	/*
3848f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3849f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3850f7fbfdd1SMark Fasheh 	 */
3851f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
38529b915181SSunil Mushran 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
38539b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
38549b915181SSunil Mushran 		     lockres->l_name);
3855f7fbfdd1SMark Fasheh 		goto leave_requeue;
38569b915181SSunil Mushran 	}
3857ccd979bdSMark Fasheh 
385816d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
385916d5b956SMark Fasheh 
386016d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
38619b915181SSunil Mushran 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
38629b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
38639b915181SSunil Mushran 		     lockres->l_name);
386416d5b956SMark Fasheh 		goto leave_requeue;
38659b915181SSunil Mushran 	}
386616d5b956SMark Fasheh 
3867ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3868ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3869ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3870cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3871ccd979bdSMark Fasheh 		goto downconvert;
3872ccd979bdSMark Fasheh 
3873ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3874ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3875ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3876ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3877ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3878079b8057SSunil Mushran 	level = lockres->l_level;
3879ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3880ccd979bdSMark Fasheh 
3881cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3882d680efe9SMark Fasheh 
38839b915181SSunil Mushran 	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
38849b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
38859b915181SSunil Mushran 		     lockres->l_name);
3886d680efe9SMark Fasheh 		goto leave;
38879b915181SSunil Mushran 	}
3888ccd979bdSMark Fasheh 
3889ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3890079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3891ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3892ccd979bdSMark Fasheh 		 * it just yet. */
38939b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
38949b915181SSunil Mushran 		     "Recheck\n", lockres->l_name, blocking,
38959b915181SSunil Mushran 		     lockres->l_blocking, level, lockres->l_level);
3896ccd979bdSMark Fasheh 		goto recheck;
3897ccd979bdSMark Fasheh 	}
3898ccd979bdSMark Fasheh 
3899ccd979bdSMark Fasheh downconvert:
3900d680efe9SMark Fasheh 	ctl->requeue = 0;
3901ccd979bdSMark Fasheh 
39025ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3903bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
39045ef0d4eaSMark Fasheh 			set_lvb = 1;
39055ef0d4eaSMark Fasheh 
39065ef0d4eaSMark Fasheh 		/*
39075ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
39085ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
39095ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
39105ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
39115ef0d4eaSMark Fasheh 		 */
39125ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
39135ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
39145ef0d4eaSMark Fasheh 	}
39155ef0d4eaSMark Fasheh 
3916de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3917ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3918de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3919de551246SJoel Becker 				     gen);
39209673e005SGang He 	/* The dlm lock convert is being cancelled in background,
39219673e005SGang He 	 * ocfs2_cancel_convert() is asynchronous in fs/dlm,
39229673e005SGang He 	 * requeue it, try again later.
39239673e005SGang He 	 */
39249673e005SGang He 	if (ret == -EBUSY) {
39259673e005SGang He 		ctl->requeue = 1;
39269673e005SGang He 		mlog(ML_BASTS, "lockres %s, ReQ: Downconvert busy\n",
39279673e005SGang He 		     lockres->l_name);
39289673e005SGang He 		ret = 0;
39299673e005SGang He 		msleep(20);
39309673e005SGang He 	}
3931de551246SJoel Becker 
3932ccd979bdSMark Fasheh leave:
3933c1e8d35eSTao Ma 	if (ret)
3934c1e8d35eSTao Ma 		mlog_errno(ret);
3935ccd979bdSMark Fasheh 	return ret;
3936f7fbfdd1SMark Fasheh 
3937f7fbfdd1SMark Fasheh leave_requeue:
3938f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3939f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3940f7fbfdd1SMark Fasheh 
3941f7fbfdd1SMark Fasheh 	return 0;
3942ccd979bdSMark Fasheh }
3943ccd979bdSMark Fasheh 
ocfs2_data_convert_worker(struct ocfs2_lock_res * lockres,int blocking)3944d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3945ccd979bdSMark Fasheh 				     int blocking)
3946ccd979bdSMark Fasheh {
3947ccd979bdSMark Fasheh 	struct inode *inode;
3948ccd979bdSMark Fasheh 	struct address_space *mapping;
39495e98d492SGoldwyn Rodrigues 	struct ocfs2_inode_info *oi;
3950ccd979bdSMark Fasheh 
3951ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3952ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3953ccd979bdSMark Fasheh 
39545e98d492SGoldwyn Rodrigues 	if (S_ISDIR(inode->i_mode)) {
39555e98d492SGoldwyn Rodrigues 		oi = OCFS2_I(inode);
39565e98d492SGoldwyn Rodrigues 		oi->ip_dir_lock_gen++;
39575e98d492SGoldwyn Rodrigues 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
39589c0f0a03SWengang Wang 		goto out_forget;
39595e98d492SGoldwyn Rodrigues 	}
39605e98d492SGoldwyn Rodrigues 
39611044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3962f1f54068SMark Fasheh 		goto out;
3963f1f54068SMark Fasheh 
39647f4a2a97SMark Fasheh 	/*
39657f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
39667f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
39677f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
39687f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
39697f4a2a97SMark Fasheh 	 * them up again.
39707f4a2a97SMark Fasheh 	 */
39717f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
39727f4a2a97SMark Fasheh 
3973ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3974b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3975b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3976ccd979bdSMark Fasheh 	}
3977ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3978bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3979ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3980ccd979bdSMark Fasheh 	} else {
3981ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3982ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3983ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3984ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3985ccd979bdSMark Fasheh 		 * them around in that case. */
3986ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3987ccd979bdSMark Fasheh 	}
3988ccd979bdSMark Fasheh 
39899c0f0a03SWengang Wang out_forget:
3990b8a7a3a6SAndreas Gruenbacher 	forget_all_cached_acls(inode);
3991b8a7a3a6SAndreas Gruenbacher 
3992f1f54068SMark Fasheh out:
3993d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3994ccd979bdSMark Fasheh }
3995ccd979bdSMark Fasheh 
ocfs2_ci_checkpointed(struct ocfs2_caching_info * ci,struct ocfs2_lock_res * lockres,int new_level)3996a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3997a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3998810d5aebSMark Fasheh 				 int new_level)
3999810d5aebSMark Fasheh {
4000a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
4001810d5aebSMark Fasheh 
4002bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
4003bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
4004810d5aebSMark Fasheh 
4005810d5aebSMark Fasheh 	if (checkpointed)
4006810d5aebSMark Fasheh 		return 1;
4007810d5aebSMark Fasheh 
4008a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
4009810d5aebSMark Fasheh 	return 0;
4010810d5aebSMark Fasheh }
4011810d5aebSMark Fasheh 
ocfs2_check_meta_downconvert(struct ocfs2_lock_res * lockres,int new_level)4012a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
4013a4338481STao Ma 					int new_level)
4014a4338481STao Ma {
4015a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
4016a4338481STao Ma 
4017a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
4018a4338481STao Ma }
4019a4338481STao Ma 
ocfs2_set_meta_lvb(struct ocfs2_lock_res * lockres)4020810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
4021810d5aebSMark Fasheh {
4022810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
4023810d5aebSMark Fasheh 
4024810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
4025810d5aebSMark Fasheh }
4026810d5aebSMark Fasheh 
4027d680efe9SMark Fasheh /*
4028d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
402934d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
4030d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
4031d680efe9SMark Fasheh  */
ocfs2_dentry_post_unlock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)4032d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
4033d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
4034d680efe9SMark Fasheh {
4035d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
4036d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
4037d680efe9SMark Fasheh }
4038d680efe9SMark Fasheh 
4039d680efe9SMark Fasheh /*
4040d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
4041d680efe9SMark Fasheh  *
4042d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
4043d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
4044d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
4045d680efe9SMark Fasheh  *
4046d680efe9SMark Fasheh  * We have two potential problems
4047d680efe9SMark Fasheh  *
4048d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
4049d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
4050d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
4051d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
4052d680efe9SMark Fasheh  *    unblock processing.
4053d680efe9SMark Fasheh  *
4054d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
4055d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
4056d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
4057d680efe9SMark Fasheh  */
ocfs2_dentry_convert_worker(struct ocfs2_lock_res * lockres,int blocking)4058d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
4059d680efe9SMark Fasheh 				       int blocking)
4060d680efe9SMark Fasheh {
4061d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
4062d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
4063d680efe9SMark Fasheh 	struct dentry *dentry;
4064d680efe9SMark Fasheh 	unsigned long flags;
4065d680efe9SMark Fasheh 	int extra_ref = 0;
4066d680efe9SMark Fasheh 
4067d680efe9SMark Fasheh 	/*
4068d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
4069d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
4070d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
4071d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
4072d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
4073d680efe9SMark Fasheh 	 * so there's no further work to do.
4074d680efe9SMark Fasheh 	 */
4075bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
4076d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
4077d680efe9SMark Fasheh 
4078d680efe9SMark Fasheh 	/*
4079d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
4080d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
4081d680efe9SMark Fasheh 	 * needs to be freed or not.
4082d680efe9SMark Fasheh 	 */
4083d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
4084d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
4085d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
4086d680efe9SMark Fasheh 
4087d680efe9SMark Fasheh 	/*
4088d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
4089d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
4090d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
4091d680efe9SMark Fasheh 	 * flag.
4092d680efe9SMark Fasheh 	 */
4093d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4094d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
4095d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
4096d680efe9SMark Fasheh 	    && dl->dl_count) {
4097d680efe9SMark Fasheh 		dl->dl_count++;
4098d680efe9SMark Fasheh 		extra_ref = 1;
4099d680efe9SMark Fasheh 	}
4100d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
4101d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4102d680efe9SMark Fasheh 
4103d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
4104d680efe9SMark Fasheh 
4105d680efe9SMark Fasheh 	/*
4106d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
4107d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
4108d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
4109d680efe9SMark Fasheh 	 */
4110d680efe9SMark Fasheh 	if (!extra_ref)
4111d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
4112d680efe9SMark Fasheh 
4113d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
4114d680efe9SMark Fasheh 	while (1) {
4115d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
4116d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
4117d680efe9SMark Fasheh 		if (!dentry)
4118d680efe9SMark Fasheh 			break;
4119d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
4120d680efe9SMark Fasheh 
412110ab8811Salex chen 		if (S_ISDIR(dl->dl_inode->i_mode))
412210ab8811Salex chen 			shrink_dcache_parent(dentry);
412310ab8811Salex chen 
4124a455589fSAl Viro 		mlog(0, "d_delete(%pd);\n", dentry);
4125d680efe9SMark Fasheh 
4126d680efe9SMark Fasheh 		/*
4127d680efe9SMark Fasheh 		 * The following dcache calls may do an
4128d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
4129d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
4130d680efe9SMark Fasheh 		 * because the requesting node already has an
4131d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
4132d680efe9SMark Fasheh 		 * for a downconvert.
4133d680efe9SMark Fasheh 		 */
4134d680efe9SMark Fasheh 		d_delete(dentry);
4135d680efe9SMark Fasheh 		dput(dentry);
4136d680efe9SMark Fasheh 
4137d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
4138d680efe9SMark Fasheh 	}
4139d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
4140d680efe9SMark Fasheh 
4141d680efe9SMark Fasheh 	/*
4142d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
4143d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
4144d680efe9SMark Fasheh 	 */
4145d680efe9SMark Fasheh 	if (dl->dl_count == 1)
4146d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
4147d680efe9SMark Fasheh 
4148d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
4149d680efe9SMark Fasheh }
4150d680efe9SMark Fasheh 
ocfs2_check_refcount_downconvert(struct ocfs2_lock_res * lockres,int new_level)41518dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
41528dec98edSTao Ma 					    int new_level)
41538dec98edSTao Ma {
41548dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
41558dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
41568dec98edSTao Ma 
41578dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
41588dec98edSTao Ma }
41598dec98edSTao Ma 
ocfs2_refcount_convert_worker(struct ocfs2_lock_res * lockres,int blocking)41608dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
41618dec98edSTao Ma 					 int blocking)
41628dec98edSTao Ma {
41638dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
41648dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
41658dec98edSTao Ma 
41668dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
41678dec98edSTao Ma 
41688dec98edSTao Ma 	return UNBLOCK_CONTINUE;
41698dec98edSTao Ma }
41708dec98edSTao Ma 
ocfs2_set_qinfo_lvb(struct ocfs2_lock_res * lockres)41719e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
41729e33d69fSJan Kara {
41739e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
41749e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
41759e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
41769e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
41779e33d69fSJan Kara 
4178a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
41799e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
41809e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
41819e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
41829e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
41839e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
41849e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
41859e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
41869e33d69fSJan Kara }
41879e33d69fSJan Kara 
ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo * oinfo,int ex)41889e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
41899e33d69fSJan Kara {
41909e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
41919e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
41929e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
41939e33d69fSJan Kara 
41949e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
41959e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
41969e33d69fSJan Kara }
41979e33d69fSJan Kara 
ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo * oinfo)41989e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
41999e33d69fSJan Kara {
42009e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
42019e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
42029e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
42039e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
420485eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
42059e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
42069e33d69fSJan Kara 	int status = 0;
42079e33d69fSJan Kara 
42081c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
42091c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
42109e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
42119e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
42129e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
42139e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
42149e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
42159e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
42169e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
42179e33d69fSJan Kara 	} else {
4218ae4f6ef1SJan Kara 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
4219ae4f6ef1SJan Kara 						     oinfo->dqi_giblk, &bh);
422085eb8b73SJoel Becker 		if (status) {
42219e33d69fSJan Kara 			mlog_errno(status);
42229e33d69fSJan Kara 			goto bail;
42239e33d69fSJan Kara 		}
42249e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
42259e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
42269e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
42279e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
42289e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
42299e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
42309e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
42319e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
42329e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
42339e33d69fSJan Kara 		brelse(bh);
42349e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
42359e33d69fSJan Kara 	}
42369e33d69fSJan Kara 
42379e33d69fSJan Kara bail:
42389e33d69fSJan Kara 	return status;
42399e33d69fSJan Kara }
42409e33d69fSJan Kara 
42419e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
42429e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo * oinfo,int ex)42439e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
42449e33d69fSJan Kara {
42459e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
42469e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
42479e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
42489e33d69fSJan Kara 	int status = 0;
42499e33d69fSJan Kara 
42509e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
42519e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
42529e33d69fSJan Kara 		if (ex)
42539e33d69fSJan Kara 			status = -EROFS;
42549e33d69fSJan Kara 		goto bail;
42559e33d69fSJan Kara 	}
42569e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
42579e33d69fSJan Kara 		goto bail;
42589e33d69fSJan Kara 
42599e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
42609e33d69fSJan Kara 	if (status < 0) {
42619e33d69fSJan Kara 		mlog_errno(status);
42629e33d69fSJan Kara 		goto bail;
42639e33d69fSJan Kara 	}
42649e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
42659e33d69fSJan Kara 		goto bail;
42669e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
42679e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
42689e33d69fSJan Kara 	if (status)
42699e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
42709e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
42719e33d69fSJan Kara bail:
42729e33d69fSJan Kara 	return status;
42739e33d69fSJan Kara }
42749e33d69fSJan Kara 
ocfs2_refcount_lock(struct ocfs2_refcount_tree * ref_tree,int ex)42758dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
42768dec98edSTao Ma {
42778dec98edSTao Ma 	int status;
42788dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
42798dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
42808dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
42818dec98edSTao Ma 
42828dec98edSTao Ma 
42838dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
42848dec98edSTao Ma 		return -EROFS;
42858dec98edSTao Ma 
42868dec98edSTao Ma 	if (ocfs2_mount_local(osb))
42878dec98edSTao Ma 		return 0;
42888dec98edSTao Ma 
42898dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
42908dec98edSTao Ma 	if (status < 0)
42918dec98edSTao Ma 		mlog_errno(status);
42928dec98edSTao Ma 
42938dec98edSTao Ma 	return status;
42948dec98edSTao Ma }
42958dec98edSTao Ma 
ocfs2_refcount_unlock(struct ocfs2_refcount_tree * ref_tree,int ex)42968dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
42978dec98edSTao Ma {
42988dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
42998dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
43008dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
43018dec98edSTao Ma 
43028dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
43038dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
43048dec98edSTao Ma }
43058dec98edSTao Ma 
ocfs2_process_blocked_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)430600600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4307ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
4308ccd979bdSMark Fasheh {
4309ccd979bdSMark Fasheh 	int status;
4310d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
4311ccd979bdSMark Fasheh 	unsigned long flags;
4312ccd979bdSMark Fasheh 
4313ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
4314ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
4315ccd979bdSMark Fasheh 	 * flag. */
4316ccd979bdSMark Fasheh 
4317ccd979bdSMark Fasheh 	BUG_ON(!lockres);
4318ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
4319ccd979bdSMark Fasheh 
43209b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
4321ccd979bdSMark Fasheh 
4322ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
432334d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
4324ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
4325ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
4326ccd979bdSMark Fasheh 	 * performance. */
4327ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4328ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
4329ccd979bdSMark Fasheh 		goto unqueue;
4330ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4331ccd979bdSMark Fasheh 
4332b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
4333ccd979bdSMark Fasheh 	if (status < 0)
4334ccd979bdSMark Fasheh 		mlog_errno(status);
4335ccd979bdSMark Fasheh 
4336ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4337ccd979bdSMark Fasheh unqueue:
4338d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
4339ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
4340ccd979bdSMark Fasheh 	} else
4341ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
4342ccd979bdSMark Fasheh 
43439b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
4344d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
4345ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4346ccd979bdSMark Fasheh 
4347d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
4348d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
4349d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
4350ccd979bdSMark Fasheh }
4351ccd979bdSMark Fasheh 
ocfs2_schedule_blocked_lock(struct ocfs2_super * osb,struct ocfs2_lock_res * lockres)4352ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4353ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
4354ccd979bdSMark Fasheh {
4355a75e9ccaSSrinivas Eeda 	unsigned long flags;
4356a75e9ccaSSrinivas Eeda 
4357ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
4358ccd979bdSMark Fasheh 
4359ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
4360ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
4361ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
4362ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
43639b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
4364ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
4365ccd979bdSMark Fasheh 		return;
4366ccd979bdSMark Fasheh 	}
4367ccd979bdSMark Fasheh 
4368ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4369ccd979bdSMark Fasheh 
4370a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
4371ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
4372ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
4373ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
4374ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
4375ccd979bdSMark Fasheh 	}
4376a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4377ccd979bdSMark Fasheh }
437834d024f8SMark Fasheh 
ocfs2_downconvert_thread_do_work(struct ocfs2_super * osb)437934d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
438034d024f8SMark Fasheh {
438134d024f8SMark Fasheh 	unsigned long processed;
4382a75e9ccaSSrinivas Eeda 	unsigned long flags;
438334d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
438434d024f8SMark Fasheh 
4385a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
438634d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
438734d024f8SMark Fasheh 	 * wake happens part-way through our work  */
438834d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
438934d024f8SMark Fasheh 
439034d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
4391209f7512SJoseph Qi 	/*
4392209f7512SJoseph Qi 	 * blocked lock processing in this loop might call iput which can
4393209f7512SJoseph Qi 	 * remove items off osb->blocked_lock_list. Downconvert up to
4394209f7512SJoseph Qi 	 * 'processed' number of locks, but stop short if we had some
4395209f7512SJoseph Qi 	 * removed in ocfs2_mark_lockres_freeing when downconverting.
4396209f7512SJoseph Qi 	 */
4397209f7512SJoseph Qi 	while (processed && !list_empty(&osb->blocked_lock_list)) {
439834d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
439934d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
440034d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
440134d024f8SMark Fasheh 		osb->blocked_lock_count--;
4402a75e9ccaSSrinivas Eeda 		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
440334d024f8SMark Fasheh 
440434d024f8SMark Fasheh 		BUG_ON(!processed);
440534d024f8SMark Fasheh 		processed--;
440634d024f8SMark Fasheh 
440734d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
440834d024f8SMark Fasheh 
4409a75e9ccaSSrinivas Eeda 		spin_lock_irqsave(&osb->dc_task_lock, flags);
441034d024f8SMark Fasheh 	}
4411a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
441234d024f8SMark Fasheh }
441334d024f8SMark Fasheh 
ocfs2_downconvert_thread_lists_empty(struct ocfs2_super * osb)441434d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
441534d024f8SMark Fasheh {
441634d024f8SMark Fasheh 	int empty = 0;
4417a75e9ccaSSrinivas Eeda 	unsigned long flags;
441834d024f8SMark Fasheh 
4419a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
442034d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
442134d024f8SMark Fasheh 		empty = 1;
442234d024f8SMark Fasheh 
4423a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
442434d024f8SMark Fasheh 	return empty;
442534d024f8SMark Fasheh }
442634d024f8SMark Fasheh 
ocfs2_downconvert_thread_should_wake(struct ocfs2_super * osb)442734d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
442834d024f8SMark Fasheh {
442934d024f8SMark Fasheh 	int should_wake = 0;
4430a75e9ccaSSrinivas Eeda 	unsigned long flags;
443134d024f8SMark Fasheh 
4432a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
443334d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
443434d024f8SMark Fasheh 		should_wake = 1;
4435a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
443634d024f8SMark Fasheh 
443734d024f8SMark Fasheh 	return should_wake;
443834d024f8SMark Fasheh }
443934d024f8SMark Fasheh 
ocfs2_downconvert_thread(void * arg)4440200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
444134d024f8SMark Fasheh {
444234d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
444334d024f8SMark Fasheh 
444434d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
444534d024f8SMark Fasheh 	 * work available */
444634d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
444734d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
444834d024f8SMark Fasheh 
444934d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
445034d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
445134d024f8SMark Fasheh 					 kthread_should_stop());
445234d024f8SMark Fasheh 
445334d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
445434d024f8SMark Fasheh 
445534d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
445634d024f8SMark Fasheh 	}
445734d024f8SMark Fasheh 
445834d024f8SMark Fasheh 	osb->dc_task = NULL;
44594658d87cSHariprasad Kelam 	return 0;
446034d024f8SMark Fasheh }
446134d024f8SMark Fasheh 
ocfs2_wake_downconvert_thread(struct ocfs2_super * osb)446234d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
446334d024f8SMark Fasheh {
4464a75e9ccaSSrinivas Eeda 	unsigned long flags;
4465a75e9ccaSSrinivas Eeda 
4466a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
446734d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
446834d024f8SMark Fasheh 	 * the caller may have made to the voting state */
446934d024f8SMark Fasheh 	osb->dc_wake_sequence++;
4470a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
447134d024f8SMark Fasheh 	wake_up(&osb->dc_event);
447234d024f8SMark Fasheh }
4473