xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision 174cd4b1)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * dlmglue.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/types.h>
27ccd979bdSMark Fasheh #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/highmem.h>
29ccd979bdSMark Fasheh #include <linux/mm.h>
30ccd979bdSMark Fasheh #include <linux/kthread.h>
31ccd979bdSMark Fasheh #include <linux/pagemap.h>
32ccd979bdSMark Fasheh #include <linux/debugfs.h>
33ccd979bdSMark Fasheh #include <linux/seq_file.h>
348ddb7b00SSunil Mushran #include <linux/time.h>
359e33d69fSJan Kara #include <linux/quotaops.h>
36174cd4b1SIngo Molnar #include <linux/sched/signal.h>
37ccd979bdSMark Fasheh 
38ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
39ccd979bdSMark Fasheh #include <cluster/masklog.h>
40ccd979bdSMark Fasheh 
41ccd979bdSMark Fasheh #include "ocfs2.h"
42d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
43ccd979bdSMark Fasheh 
44ccd979bdSMark Fasheh #include "alloc.h"
45d680efe9SMark Fasheh #include "dcache.h"
46ccd979bdSMark Fasheh #include "dlmglue.h"
47ccd979bdSMark Fasheh #include "extent_map.h"
487f1a37e3STiger Yang #include "file.h"
49ccd979bdSMark Fasheh #include "heartbeat.h"
50ccd979bdSMark Fasheh #include "inode.h"
51ccd979bdSMark Fasheh #include "journal.h"
5224ef1815SJoel Becker #include "stackglue.h"
53ccd979bdSMark Fasheh #include "slot_map.h"
54ccd979bdSMark Fasheh #include "super.h"
55ccd979bdSMark Fasheh #include "uptodate.h"
569e33d69fSJan Kara #include "quota.h"
578dec98edSTao Ma #include "refcounttree.h"
58b8a7a3a6SAndreas Gruenbacher #include "acl.h"
59ccd979bdSMark Fasheh 
60ccd979bdSMark Fasheh #include "buffer_head_io.h"
61ccd979bdSMark Fasheh 
62ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
63ccd979bdSMark Fasheh 	struct list_head	mw_item;
64ccd979bdSMark Fasheh 	int			mw_status;
65ccd979bdSMark Fasheh 	struct completion	mw_complete;
66ccd979bdSMark Fasheh 	unsigned long		mw_mask;
67ccd979bdSMark Fasheh 	unsigned long		mw_goal;
688ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
695bc970e8SSunil Mushran 	ktime_t			mw_lock_start;
708ddb7b00SSunil Mushran #endif
71ccd979bdSMark Fasheh };
72ccd979bdSMark Fasheh 
7354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
7454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
75cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
769e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
77ccd979bdSMark Fasheh 
78d680efe9SMark Fasheh /*
79cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
80d680efe9SMark Fasheh  *
81b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
82d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
83d680efe9SMark Fasheh  *
84d680efe9SMark Fasheh  */
85d680efe9SMark Fasheh enum ocfs2_unblock_action {
86d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
87d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
88d680efe9SMark Fasheh 				      * ->post_unlock callback */
89d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
90d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
91d680efe9SMark Fasheh };
92d680efe9SMark Fasheh 
93d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
94d680efe9SMark Fasheh 	int requeue;
95d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
96d680efe9SMark Fasheh };
97d680efe9SMark Fasheh 
98cb25797dSJan Kara /* Lockdep class keys */
99cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
100cb25797dSJan Kara 
101810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
102810d5aebSMark Fasheh 					int new_level);
103810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
104810d5aebSMark Fasheh 
105cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
106cc567d89SMark Fasheh 				     int blocking);
107cc567d89SMark Fasheh 
108cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
109cc567d89SMark Fasheh 				       int blocking);
110d680efe9SMark Fasheh 
111d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
112d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
113ccd979bdSMark Fasheh 
1149e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1156cb129f5SAdrian Bunk 
1168dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1178dec98edSTao Ma 					    int new_level);
1188dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1198dec98edSTao Ma 					 int blocking);
1208dec98edSTao Ma 
1216cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1226cb129f5SAdrian Bunk 
1236cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
1246cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1256cb129f5SAdrian Bunk 				     const char *function,
1266cb129f5SAdrian Bunk 				     unsigned int line,
1276cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1286cb129f5SAdrian Bunk {
129a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1306cb129f5SAdrian Bunk 
1316cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1326cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1336cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1346cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1356cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1366cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1376cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1386cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1396cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1406cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1416cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1426cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1436cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1446cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1456cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1466cb129f5SAdrian Bunk }
1476cb129f5SAdrian Bunk 
1486cb129f5SAdrian Bunk 
149f625c979SMark Fasheh /*
150f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
151f625c979SMark Fasheh  *
152f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1530d5dc6c2SMark Fasheh  *
1540d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1550d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1560d5dc6c2SMark Fasheh  *
1570d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1580d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1590d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1600d5dc6c2SMark Fasheh  * destruction time).
161f625c979SMark Fasheh  */
162ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
16354a7e755SMark Fasheh 	/*
16454a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
16554a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
16654a7e755SMark Fasheh 	 */
16754a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
168b5e500e2SMark Fasheh 
1690d5dc6c2SMark Fasheh 	/*
17034d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
17134d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
17234d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
17334d024f8SMark Fasheh 	 * memory, etc.
1740d5dc6c2SMark Fasheh 	 *
1750d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1760d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1770d5dc6c2SMark Fasheh 	 */
178d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
179f625c979SMark Fasheh 
180f625c979SMark Fasheh 	/*
18116d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
18216d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
18316d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
18416d5b956SMark Fasheh 	 *
18516d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
18616d5b956SMark Fasheh 	 * incompatible holders are sufficient.
18716d5b956SMark Fasheh 	 *
18816d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
18916d5b956SMark Fasheh 	 */
19016d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
19116d5b956SMark Fasheh 
19216d5b956SMark Fasheh 	/*
1935ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1945ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1955ef0d4eaSMark Fasheh 	 *
1965ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1975ef0d4eaSMark Fasheh 	 * in the flags field.
1985ef0d4eaSMark Fasheh 	 *
1995ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
2005ef0d4eaSMark Fasheh 	 */
2015ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
2025ef0d4eaSMark Fasheh 
2035ef0d4eaSMark Fasheh 	/*
204cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
205cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
206cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
207cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
208cc567d89SMark Fasheh 	 *
209cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
210cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
211cc567d89SMark Fasheh 	 */
212cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
213cc567d89SMark Fasheh 
214cc567d89SMark Fasheh 	/*
215f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
216f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
217f625c979SMark Fasheh 	 */
218f625c979SMark Fasheh 	int flags;
219ccd979bdSMark Fasheh };
220ccd979bdSMark Fasheh 
221f625c979SMark Fasheh /*
222f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
223f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
224f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
225f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
226f625c979SMark Fasheh  * expected that the locking wrapper will clear the
227f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
228f625c979SMark Fasheh  */
229f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
230f625c979SMark Fasheh 
231b80fc012SMark Fasheh /*
2325ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2335ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
234b80fc012SMark Fasheh  */
235b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
236b80fc012SMark Fasheh 
237ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
23854a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
239f625c979SMark Fasheh 	.flags		= 0,
240ccd979bdSMark Fasheh };
241ccd979bdSMark Fasheh 
242e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
24354a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
244810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
245810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
246f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
247b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
248ccd979bdSMark Fasheh };
249ccd979bdSMark Fasheh 
250ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
251f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
252ccd979bdSMark Fasheh };
253ccd979bdSMark Fasheh 
254ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
255f625c979SMark Fasheh 	.flags		= 0,
256ccd979bdSMark Fasheh };
257ccd979bdSMark Fasheh 
2586ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2596ca497a8Swengang wang 	.flags		= 0,
2606ca497a8Swengang wang };
2616ca497a8Swengang wang 
26283273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
26383273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
26483273932SSrinivas Eeda };
26583273932SSrinivas Eeda 
266d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
26754a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
268d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
269cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
270f625c979SMark Fasheh 	.flags		= 0,
271d680efe9SMark Fasheh };
272d680efe9SMark Fasheh 
27350008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
27450008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
27550008630STiger Yang 	.flags		= 0,
27650008630STiger Yang };
27750008630STiger Yang 
278cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
279cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
280cf8e06f1SMark Fasheh 	.flags		= 0,
281cf8e06f1SMark Fasheh };
282cf8e06f1SMark Fasheh 
2839e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2849e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2859e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2869e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2879e33d69fSJan Kara };
2889e33d69fSJan Kara 
2898dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2908dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2918dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2928dec98edSTao Ma 	.flags		= 0,
2938dec98edSTao Ma };
2948dec98edSTao Ma 
295ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
296ccd979bdSMark Fasheh {
297ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
29850008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
29950008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
300ccd979bdSMark Fasheh }
301ccd979bdSMark Fasheh 
302c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
303a796d286SJoel Becker {
304a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
305a796d286SJoel Becker }
306a796d286SJoel Becker 
307ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
308ccd979bdSMark Fasheh {
309ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
310ccd979bdSMark Fasheh 
311ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
312ccd979bdSMark Fasheh }
313ccd979bdSMark Fasheh 
314d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
315d680efe9SMark Fasheh {
316d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
317d680efe9SMark Fasheh 
318d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
319d680efe9SMark Fasheh }
320d680efe9SMark Fasheh 
3219e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3229e33d69fSJan Kara {
3239e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3249e33d69fSJan Kara 
3259e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3269e33d69fSJan Kara }
3279e33d69fSJan Kara 
3288dec98edSTao Ma static inline struct ocfs2_refcount_tree *
3298dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3308dec98edSTao Ma {
3318dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3328dec98edSTao Ma }
3338dec98edSTao Ma 
33454a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
33554a7e755SMark Fasheh {
33654a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
33754a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
33854a7e755SMark Fasheh 
33954a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
34054a7e755SMark Fasheh }
34154a7e755SMark Fasheh 
342ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
343ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
344ccd979bdSMark Fasheh 			     int level,
345bd3e7610SJoel Becker 			     u32 dlm_flags);
346ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
347ccd979bdSMark Fasheh 						     int wanted);
348cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
349ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
350cb25797dSJan Kara 				   int level, unsigned long caller_ip);
351cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
352cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
353cb25797dSJan Kara 					int level)
354cb25797dSJan Kara {
355cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
356cb25797dSJan Kara }
357cb25797dSJan Kara 
358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
359ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
360ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
361ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
362ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
363ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
364ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
365ccd979bdSMark Fasheh 						int convert);
3667431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
367c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3687431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3697431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
370c74ff8bbSSunil Mushran 	else										\
371c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
372c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
373c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
374ccd979bdSMark Fasheh } while (0)
37534d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
37634d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
377ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
378e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
379ccd979bdSMark Fasheh 				  struct buffer_head **bh);
380ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
381ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
382de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
383cf8e06f1SMark Fasheh 					      int new_level);
384cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
385cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
386cf8e06f1SMark Fasheh 				  int new_level,
387de551246SJoel Becker 				  int lvb,
388de551246SJoel Becker 				  unsigned int generation);
389cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
390cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
391cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
392cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
393cf8e06f1SMark Fasheh 
394ccd979bdSMark Fasheh 
395ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
396ccd979bdSMark Fasheh 				  u64 blkno,
397ccd979bdSMark Fasheh 				  u32 generation,
398ccd979bdSMark Fasheh 				  char *name)
399ccd979bdSMark Fasheh {
400ccd979bdSMark Fasheh 	int len;
401ccd979bdSMark Fasheh 
402ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
403ccd979bdSMark Fasheh 
404b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
405b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
406b0697053SMark Fasheh 		       (long long)blkno, generation);
407ccd979bdSMark Fasheh 
408ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
409ccd979bdSMark Fasheh 
410ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
411ccd979bdSMark Fasheh }
412ccd979bdSMark Fasheh 
41334af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
414ccd979bdSMark Fasheh 
415ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
416ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
417ccd979bdSMark Fasheh {
418ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
419ccd979bdSMark Fasheh 
420ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
421ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
422ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
423ccd979bdSMark Fasheh }
424ccd979bdSMark Fasheh 
425ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
426ccd979bdSMark Fasheh {
427ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
428ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
429ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
430ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
431ccd979bdSMark Fasheh }
432ccd979bdSMark Fasheh 
4338ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
4348ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4358ddb7b00SSunil Mushran {
4368ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4375bc970e8SSunil Mushran 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4385bc970e8SSunil Mushran 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4398ddb7b00SSunil Mushran }
4408ddb7b00SSunil Mushran 
4418ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4428ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4438ddb7b00SSunil Mushran {
4445bc970e8SSunil Mushran 	u32 usec;
4455bc970e8SSunil Mushran 	ktime_t kt;
4465bc970e8SSunil Mushran 	struct ocfs2_lock_stats *stats;
4478ddb7b00SSunil Mushran 
4485bc970e8SSunil Mushran 	if (level == LKM_PRMODE)
4495bc970e8SSunil Mushran 		stats = &res->l_lock_prmode;
4505bc970e8SSunil Mushran 	else if (level == LKM_EXMODE)
4515bc970e8SSunil Mushran 		stats = &res->l_lock_exmode;
4525bc970e8SSunil Mushran 	else
4538ddb7b00SSunil Mushran 		return;
4548ddb7b00SSunil Mushran 
4555bc970e8SSunil Mushran 	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4565bc970e8SSunil Mushran 	usec = ktime_to_us(kt);
4575bc970e8SSunil Mushran 
4585bc970e8SSunil Mushran 	stats->ls_gets++;
4595bc970e8SSunil Mushran 	stats->ls_total += ktime_to_ns(kt);
4605bc970e8SSunil Mushran 	/* overflow */
46116865b7cSroel 	if (unlikely(stats->ls_gets == 0)) {
4625bc970e8SSunil Mushran 		stats->ls_gets++;
4635bc970e8SSunil Mushran 		stats->ls_total = ktime_to_ns(kt);
4645bc970e8SSunil Mushran 	}
4655bc970e8SSunil Mushran 
4665bc970e8SSunil Mushran 	if (stats->ls_max < usec)
4675bc970e8SSunil Mushran 		stats->ls_max = usec;
4685bc970e8SSunil Mushran 
4698ddb7b00SSunil Mushran 	if (ret)
4705bc970e8SSunil Mushran 		stats->ls_fail++;
4718ddb7b00SSunil Mushran }
4728ddb7b00SSunil Mushran 
4738ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4748ddb7b00SSunil Mushran {
4758ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4768ddb7b00SSunil Mushran }
4778ddb7b00SSunil Mushran 
4788ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4798ddb7b00SSunil Mushran {
4805bc970e8SSunil Mushran 	mw->mw_lock_start = ktime_get();
4818ddb7b00SSunil Mushran }
4828ddb7b00SSunil Mushran #else
4838ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4848ddb7b00SSunil Mushran {
4858ddb7b00SSunil Mushran }
4868ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4878ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4888ddb7b00SSunil Mushran {
4898ddb7b00SSunil Mushran }
4908ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4918ddb7b00SSunil Mushran {
4928ddb7b00SSunil Mushran }
4938ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4948ddb7b00SSunil Mushran {
4958ddb7b00SSunil Mushran }
4968ddb7b00SSunil Mushran #endif
4978ddb7b00SSunil Mushran 
498ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
499ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
500ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
501ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
502ccd979bdSMark Fasheh 				       void *priv)
503ccd979bdSMark Fasheh {
504ccd979bdSMark Fasheh 	res->l_type          = type;
505ccd979bdSMark Fasheh 	res->l_ops           = ops;
506ccd979bdSMark Fasheh 	res->l_priv          = priv;
507ccd979bdSMark Fasheh 
508bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
509bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
510bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
511ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
512ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
513ccd979bdSMark Fasheh 
514ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
515ccd979bdSMark Fasheh 
516ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5178ddb7b00SSunil Mushran 
5188ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
519cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
520cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
521cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
522cb25797dSJan Kara 				 &lockdep_keys[type], 0);
523cb25797dSJan Kara 	else
524cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
525cb25797dSJan Kara #endif
526ccd979bdSMark Fasheh }
527ccd979bdSMark Fasheh 
528ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
529ccd979bdSMark Fasheh {
530ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
531ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
532ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
533ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
534ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
535ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
536439a36b8SEric Ren 	INIT_LIST_HEAD(&res->l_holders);
537ccd979bdSMark Fasheh }
538ccd979bdSMark Fasheh 
539ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
540ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
54124c19ef4SMark Fasheh 			       unsigned int generation,
542ccd979bdSMark Fasheh 			       struct inode *inode)
543ccd979bdSMark Fasheh {
544ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
545ccd979bdSMark Fasheh 
546ccd979bdSMark Fasheh 	switch(type) {
547ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
548ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
549ccd979bdSMark Fasheh 			break;
550ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
551e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
552ccd979bdSMark Fasheh 			break;
55350008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
55450008630STiger Yang 			ops = &ocfs2_inode_open_lops;
55550008630STiger Yang 			break;
556ccd979bdSMark Fasheh 		default:
557ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
558ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
559ccd979bdSMark Fasheh 			break;
560ccd979bdSMark Fasheh 	};
561ccd979bdSMark Fasheh 
562d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
56324c19ef4SMark Fasheh 			      generation, res->l_name);
564d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
565d680efe9SMark Fasheh }
566d680efe9SMark Fasheh 
56754a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
56854a7e755SMark Fasheh {
56954a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
57054a7e755SMark Fasheh 
57154a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
57254a7e755SMark Fasheh }
57354a7e755SMark Fasheh 
5749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5759e33d69fSJan Kara {
5769e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5779e33d69fSJan Kara 
5789e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5799e33d69fSJan Kara }
5809e33d69fSJan Kara 
581cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
582cf8e06f1SMark Fasheh {
583cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
584cf8e06f1SMark Fasheh 
585cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
586cf8e06f1SMark Fasheh }
587cf8e06f1SMark Fasheh 
588d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
589d680efe9SMark Fasheh {
590d680efe9SMark Fasheh 	__be64 inode_blkno_be;
591d680efe9SMark Fasheh 
592d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
593d680efe9SMark Fasheh 	       sizeof(__be64));
594d680efe9SMark Fasheh 
595d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
596d680efe9SMark Fasheh }
597d680efe9SMark Fasheh 
59854a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
59954a7e755SMark Fasheh {
60054a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
60154a7e755SMark Fasheh 
60254a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
60354a7e755SMark Fasheh }
60454a7e755SMark Fasheh 
605d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
606d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
607d680efe9SMark Fasheh {
608d680efe9SMark Fasheh 	int len;
609d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
610d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
611d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
612d680efe9SMark Fasheh 
613d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
614d680efe9SMark Fasheh 
615d680efe9SMark Fasheh 	/*
616d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
617d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
618d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
619d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
620d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
621d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
622d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
623d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
624d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
625d680efe9SMark Fasheh 	 *
626d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
627d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
628d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
629d680efe9SMark Fasheh 	 */
630d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
631d680efe9SMark Fasheh 		       "%c%016llx",
632d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
633d680efe9SMark Fasheh 		       (long long)parent);
634d680efe9SMark Fasheh 
635d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
636d680efe9SMark Fasheh 
637d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
638d680efe9SMark Fasheh 	       sizeof(__be64));
639d680efe9SMark Fasheh 
640d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
641d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
642d680efe9SMark Fasheh 				   dl);
643ccd979bdSMark Fasheh }
644ccd979bdSMark Fasheh 
645ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
646ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
647ccd979bdSMark Fasheh {
648ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
649ccd979bdSMark Fasheh 	 * once on it manually.  */
650ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
651d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
652d680efe9SMark Fasheh 			      0, res->l_name);
653ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
654ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
655ccd979bdSMark Fasheh }
656ccd979bdSMark Fasheh 
657ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
658ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
659ccd979bdSMark Fasheh {
660ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
661ccd979bdSMark Fasheh 	 * once on it manually.  */
662ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
663d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
664d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
665ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
666ccd979bdSMark Fasheh }
667ccd979bdSMark Fasheh 
6686ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6696ca497a8Swengang wang 					 struct ocfs2_super *osb)
6706ca497a8Swengang wang {
6716ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6726ca497a8Swengang wang 	 * once on it manually.  */
6736ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6746ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6756ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6766ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6776ca497a8Swengang wang }
6786ca497a8Swengang wang 
67983273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
68083273932SSrinivas Eeda 					    struct ocfs2_super *osb)
68183273932SSrinivas Eeda {
68283273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
68383273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
68483273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
68583273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
68683273932SSrinivas Eeda }
68783273932SSrinivas Eeda 
688cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
689cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
690cf8e06f1SMark Fasheh {
691cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
692cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
693cf8e06f1SMark Fasheh 
694cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
695cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
696cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
697cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
698cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
699cf8e06f1SMark Fasheh 				   fp);
700cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
701cf8e06f1SMark Fasheh }
702cf8e06f1SMark Fasheh 
7039e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7049e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7059e33d69fSJan Kara {
7069e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7079e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7089e33d69fSJan Kara 			      0, lockres->l_name);
7099e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7109e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7119e33d69fSJan Kara 				   info);
7129e33d69fSJan Kara }
7139e33d69fSJan Kara 
7148dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7158dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7168dec98edSTao Ma 				  unsigned int generation)
7178dec98edSTao Ma {
7188dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7198dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7208dec98edSTao Ma 			      generation, lockres->l_name);
7218dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7228dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7238dec98edSTao Ma }
7248dec98edSTao Ma 
725ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
726ccd979bdSMark Fasheh {
727ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
728ccd979bdSMark Fasheh 		return;
729ccd979bdSMark Fasheh 
730ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
731ccd979bdSMark Fasheh 
732ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
733ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
734ccd979bdSMark Fasheh 			res->l_name);
735ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
736ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
737ccd979bdSMark Fasheh 			res->l_name);
738ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
739ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
740ccd979bdSMark Fasheh 			res->l_name);
741ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
742ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
743ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
744ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
745ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
746ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
747ccd979bdSMark Fasheh 
748ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
749ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
750ccd979bdSMark Fasheh 
751ccd979bdSMark Fasheh 	res->l_flags = 0UL;
752ccd979bdSMark Fasheh }
753ccd979bdSMark Fasheh 
754439a36b8SEric Ren /*
755439a36b8SEric Ren  * Keep a list of processes who have interest in a lockres.
756439a36b8SEric Ren  * Note: this is now only uesed for check recursive cluster locking.
757439a36b8SEric Ren  */
758439a36b8SEric Ren static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
759439a36b8SEric Ren 				   struct ocfs2_lock_holder *oh)
760439a36b8SEric Ren {
761439a36b8SEric Ren 	INIT_LIST_HEAD(&oh->oh_list);
762439a36b8SEric Ren 	oh->oh_owner_pid = get_pid(task_pid(current));
763439a36b8SEric Ren 
764439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
765439a36b8SEric Ren 	list_add_tail(&oh->oh_list, &lockres->l_holders);
766439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
767439a36b8SEric Ren }
768439a36b8SEric Ren 
769439a36b8SEric Ren static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
770439a36b8SEric Ren 				       struct ocfs2_lock_holder *oh)
771439a36b8SEric Ren {
772439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
773439a36b8SEric Ren 	list_del(&oh->oh_list);
774439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
775439a36b8SEric Ren 
776439a36b8SEric Ren 	put_pid(oh->oh_owner_pid);
777439a36b8SEric Ren }
778439a36b8SEric Ren 
779439a36b8SEric Ren static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
780439a36b8SEric Ren {
781439a36b8SEric Ren 	struct ocfs2_lock_holder *oh;
782439a36b8SEric Ren 	struct pid *pid;
783439a36b8SEric Ren 
784439a36b8SEric Ren 	/* look in the list of holders for one with the current task as owner */
785439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
786439a36b8SEric Ren 	pid = task_pid(current);
787439a36b8SEric Ren 	list_for_each_entry(oh, &lockres->l_holders, oh_list) {
788439a36b8SEric Ren 		if (oh->oh_owner_pid == pid) {
789439a36b8SEric Ren 			spin_unlock(&lockres->l_lock);
790439a36b8SEric Ren 			return 1;
791439a36b8SEric Ren 		}
792439a36b8SEric Ren 	}
793439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
794439a36b8SEric Ren 
795439a36b8SEric Ren 	return 0;
796439a36b8SEric Ren }
797439a36b8SEric Ren 
798ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
799ccd979bdSMark Fasheh 				     int level)
800ccd979bdSMark Fasheh {
801ccd979bdSMark Fasheh 	BUG_ON(!lockres);
802ccd979bdSMark Fasheh 
803ccd979bdSMark Fasheh 	switch(level) {
804bd3e7610SJoel Becker 	case DLM_LOCK_EX:
805ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
806ccd979bdSMark Fasheh 		break;
807bd3e7610SJoel Becker 	case DLM_LOCK_PR:
808ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
809ccd979bdSMark Fasheh 		break;
810ccd979bdSMark Fasheh 	default:
811ccd979bdSMark Fasheh 		BUG();
812ccd979bdSMark Fasheh 	}
813ccd979bdSMark Fasheh }
814ccd979bdSMark Fasheh 
815ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
816ccd979bdSMark Fasheh 				     int level)
817ccd979bdSMark Fasheh {
818ccd979bdSMark Fasheh 	BUG_ON(!lockres);
819ccd979bdSMark Fasheh 
820ccd979bdSMark Fasheh 	switch(level) {
821bd3e7610SJoel Becker 	case DLM_LOCK_EX:
822ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
823ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
824ccd979bdSMark Fasheh 		break;
825bd3e7610SJoel Becker 	case DLM_LOCK_PR:
826ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
827ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
828ccd979bdSMark Fasheh 		break;
829ccd979bdSMark Fasheh 	default:
830ccd979bdSMark Fasheh 		BUG();
831ccd979bdSMark Fasheh 	}
832ccd979bdSMark Fasheh }
833ccd979bdSMark Fasheh 
834ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
835ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
836ccd979bdSMark Fasheh  * lock types are added. */
837ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
838ccd979bdSMark Fasheh {
839bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
840ccd979bdSMark Fasheh 
841bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
842bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
843bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
844bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
845ccd979bdSMark Fasheh 	return new_level;
846ccd979bdSMark Fasheh }
847ccd979bdSMark Fasheh 
848ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
849ccd979bdSMark Fasheh 			      unsigned long newflags)
850ccd979bdSMark Fasheh {
851800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
852ccd979bdSMark Fasheh 
853ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
854ccd979bdSMark Fasheh 
855ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
856ccd979bdSMark Fasheh 
857800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
858ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
859ccd979bdSMark Fasheh 			continue;
860ccd979bdSMark Fasheh 
861ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
862ccd979bdSMark Fasheh 		mw->mw_status = 0;
863ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
864ccd979bdSMark Fasheh 	}
865ccd979bdSMark Fasheh }
866ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
867ccd979bdSMark Fasheh {
868ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
869ccd979bdSMark Fasheh }
870ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
871ccd979bdSMark Fasheh 				unsigned long clear)
872ccd979bdSMark Fasheh {
873ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
874ccd979bdSMark Fasheh }
875ccd979bdSMark Fasheh 
876ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
877ccd979bdSMark Fasheh {
878ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
879ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
880ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
881bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
882ccd979bdSMark Fasheh 
883ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
884ccd979bdSMark Fasheh 	if (lockres->l_level <=
885ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
886bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
887ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
888ccd979bdSMark Fasheh 	}
889ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
890ccd979bdSMark Fasheh }
891ccd979bdSMark Fasheh 
892ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
893ccd979bdSMark Fasheh {
894ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
895ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
896ccd979bdSMark Fasheh 
897ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
898ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
899ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
900ccd979bdSMark Fasheh 	 * update */
901bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
902f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
903ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
904ccd979bdSMark Fasheh 
905ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
906a1912826SSunil Mushran 
907a1912826SSunil Mushran 	/*
908a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
909a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
910a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
911d1e78238SXue jiufei 	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
912d1e78238SXue jiufei 	 * had already returned.
913a1912826SSunil Mushran 	 */
914d1e78238SXue jiufei 	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
915a1912826SSunil Mushran 		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
916d1e78238SXue jiufei 	else
917d1e78238SXue jiufei 		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
918a1912826SSunil Mushran 
919ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
920ccd979bdSMark Fasheh }
921ccd979bdSMark Fasheh 
922ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
923ccd979bdSMark Fasheh {
9243cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
925ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
926ccd979bdSMark Fasheh 
927bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
928f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
929f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
930ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
931ccd979bdSMark Fasheh 
932ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
933ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
934ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
935ccd979bdSMark Fasheh }
936ccd979bdSMark Fasheh 
937ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
938ccd979bdSMark Fasheh 				     int level)
939ccd979bdSMark Fasheh {
940ccd979bdSMark Fasheh 	int needs_downconvert = 0;
941ccd979bdSMark Fasheh 
942ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
943ccd979bdSMark Fasheh 
944ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
945ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
946ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
947ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
948ccd979bdSMark Fasheh 		 * duplicate BASTs */
949ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
950ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
951ccd979bdSMark Fasheh 			needs_downconvert = 1;
952ccd979bdSMark Fasheh 
953ccd979bdSMark Fasheh 		lockres->l_blocking = level;
954ccd979bdSMark Fasheh 	}
955ccd979bdSMark Fasheh 
9569b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9579b915181SSunil Mushran 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
9589b915181SSunil Mushran 	     needs_downconvert);
9599b915181SSunil Mushran 
9600b94a909SWengang Wang 	if (needs_downconvert)
9610b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
962c1e8d35eSTao Ma 	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
963ccd979bdSMark Fasheh 	return needs_downconvert;
964ccd979bdSMark Fasheh }
965ccd979bdSMark Fasheh 
966de551246SJoel Becker /*
967de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
968de551246SJoel Becker  *
969de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
970de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
971de551246SJoel Becker  * for more details on the race.
972de551246SJoel Becker  *
973de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
974de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
975de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
976de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
977de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
978de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
979de551246SJoel Becker  * nothing.
980de551246SJoel Becker  *
981de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
982de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
983de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
984de551246SJoel Becker  * window.
985de551246SJoel Becker  *
986de551246SJoel Becker  * [Example]
987de551246SJoel Becker  *
988de551246SJoel Becker  * ocfs2_meta_lock()
989de551246SJoel Becker  *  ocfs2_cluster_lock()
990de551246SJoel Becker  *   set BUSY
991de551246SJoel Becker  *   set PENDING
992de551246SJoel Becker  *   drop l_lock
993de551246SJoel Becker  *   ocfs2_dlm_lock()
994de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
995de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
996de551246SJoel Becker  *					  take_l_lock
997de551246SJoel Becker  *					  !BUSY
998de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
999de551246SJoel Becker  *					   set BUSY
1000de551246SJoel Becker  *					   set PENDING
1001de551246SJoel Becker  *					  drop l_lock
1002de551246SJoel Becker  *   take l_lock
1003de551246SJoel Becker  *   clear PENDING
1004de551246SJoel Becker  *   drop l_lock
1005de551246SJoel Becker  *			<window>
1006de551246SJoel Becker  *					  ocfs2_dlm_lock()
1007de551246SJoel Becker  *
1008de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
1009de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
1010de551246SJoel Becker  *
1011de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
1012de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
1013de551246SJoel Becker  *
1014de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
1015de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
1016de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
1017de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
1018de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
1019de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
1020de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
1021de551246SJoel Becker  * ocfs2_prepare_downconvert().
1022de551246SJoel Becker  */
1023de551246SJoel Becker 
1024de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
1025de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
1026de551246SJoel Becker 				    unsigned int generation,
1027de551246SJoel Becker 				    struct ocfs2_super *osb)
1028de551246SJoel Becker {
1029de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1030de551246SJoel Becker 
1031de551246SJoel Becker 	/*
1032de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
1033de551246SJoel Becker 	 * will clear pending, the loser will not.
1034de551246SJoel Becker 	 */
1035de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
1036de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
1037de551246SJoel Becker 		return;
1038de551246SJoel Becker 
1039de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1040de551246SJoel Becker 	lockres->l_pending_gen++;
1041de551246SJoel Becker 
1042de551246SJoel Becker 	/*
1043de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
1044de551246SJoel Becker 	 * were PENDING.  Wake it up.
1045de551246SJoel Becker 	 */
1046de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1047de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
1048de551246SJoel Becker }
1049de551246SJoel Becker 
1050de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
1051de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1052de551246SJoel Becker 				  unsigned int generation,
1053de551246SJoel Becker 				  struct ocfs2_super *osb)
1054de551246SJoel Becker {
1055de551246SJoel Becker 	unsigned long flags;
1056de551246SJoel Becker 
1057de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1058de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1059de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1060de551246SJoel Becker }
1061de551246SJoel Becker 
1062de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1063de551246SJoel Becker {
1064de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1065de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1066de551246SJoel Becker 
1067de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1068de551246SJoel Becker 
1069de551246SJoel Becker 	return lockres->l_pending_gen;
1070de551246SJoel Becker }
1071de551246SJoel Becker 
1072c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1073ccd979bdSMark Fasheh {
1074a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1075aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1076ccd979bdSMark Fasheh 	int needs_downconvert;
1077ccd979bdSMark Fasheh 	unsigned long flags;
1078ccd979bdSMark Fasheh 
1079bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1080ccd979bdSMark Fasheh 
10819b915181SSunil Mushran 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
10829b915181SSunil Mushran 	     "type %s\n", lockres->l_name, level, lockres->l_level,
1083aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1084aa2623adSMark Fasheh 
1085cf8e06f1SMark Fasheh 	/*
1086cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1087cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1088cf8e06f1SMark Fasheh 	 */
1089cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1090cf8e06f1SMark Fasheh 		return;
1091cf8e06f1SMark Fasheh 
1092ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1093ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1094ccd979bdSMark Fasheh 	if (needs_downconvert)
1095ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1096ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1097ccd979bdSMark Fasheh 
1098d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1099d680efe9SMark Fasheh 
110034d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1101ccd979bdSMark Fasheh }
1102ccd979bdSMark Fasheh 
1103c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1104ccd979bdSMark Fasheh {
1105a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1106de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1107ccd979bdSMark Fasheh 	unsigned long flags;
11081693a5c0SDavid Teigland 	int status;
1109ccd979bdSMark Fasheh 
1110ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1111ccd979bdSMark Fasheh 
11121693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
11131693a5c0SDavid Teigland 
11141693a5c0SDavid Teigland 	if (status == -EAGAIN) {
11151693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
11161693a5c0SDavid Teigland 		goto out;
11171693a5c0SDavid Teigland 	}
11181693a5c0SDavid Teigland 
11191693a5c0SDavid Teigland 	if (status) {
11208f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
11211693a5c0SDavid Teigland 		     lockres->l_name, status);
1122ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1123ccd979bdSMark Fasheh 		return;
1124ccd979bdSMark Fasheh 	}
1125ccd979bdSMark Fasheh 
11269b915181SSunil Mushran 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
11279b915181SSunil Mushran 	     "level %d => %d\n", lockres->l_name, lockres->l_action,
11289b915181SSunil Mushran 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
11299b915181SSunil Mushran 
1130ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1131ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1132ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1133e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1134ccd979bdSMark Fasheh 		break;
1135ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1136ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1137ccd979bdSMark Fasheh 		break;
1138ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1139ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1140ccd979bdSMark Fasheh 		break;
1141ccd979bdSMark Fasheh 	default:
11429b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
11439b915181SSunil Mushran 		     "flags 0x%lx, unlock: %u\n",
1144e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1145e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1146ccd979bdSMark Fasheh 		BUG();
1147ccd979bdSMark Fasheh 	}
11481693a5c0SDavid Teigland out:
1149ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1150ccd979bdSMark Fasheh 	 * can catch it. */
1151ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1152ccd979bdSMark Fasheh 
1153de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1154de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1155de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1156de551246SJoel Becker 
1157de551246SJoel Becker 	/*
1158de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1159de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1160de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1161de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1162de551246SJoel Becker 	 */
1163de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1164de551246SJoel Becker 
1165ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1166d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1167ccd979bdSMark Fasheh }
1168ccd979bdSMark Fasheh 
1169553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1170553b5eb9SJoel Becker {
1171553b5eb9SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1172553b5eb9SJoel Becker 	unsigned long flags;
1173553b5eb9SJoel Becker 
11749b915181SSunil Mushran 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
11759b915181SSunil Mushran 	     lockres->l_name, lockres->l_unlock_action);
1176553b5eb9SJoel Becker 
1177553b5eb9SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1178553b5eb9SJoel Becker 	if (error) {
1179553b5eb9SJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1180553b5eb9SJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
1181553b5eb9SJoel Becker 		     lockres->l_unlock_action);
1182553b5eb9SJoel Becker 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1183553b5eb9SJoel Becker 		return;
1184553b5eb9SJoel Becker 	}
1185553b5eb9SJoel Becker 
1186553b5eb9SJoel Becker 	switch(lockres->l_unlock_action) {
1187553b5eb9SJoel Becker 	case OCFS2_UNLOCK_CANCEL_CONVERT:
1188553b5eb9SJoel Becker 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1189553b5eb9SJoel Becker 		lockres->l_action = OCFS2_AST_INVALID;
1190553b5eb9SJoel Becker 		/* Downconvert thread may have requeued this lock, we
1191553b5eb9SJoel Becker 		 * need to wake it. */
1192553b5eb9SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1193553b5eb9SJoel Becker 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1194553b5eb9SJoel Becker 		break;
1195553b5eb9SJoel Becker 	case OCFS2_UNLOCK_DROP_LOCK:
1196553b5eb9SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
1197553b5eb9SJoel Becker 		break;
1198553b5eb9SJoel Becker 	default:
1199553b5eb9SJoel Becker 		BUG();
1200553b5eb9SJoel Becker 	}
1201553b5eb9SJoel Becker 
1202553b5eb9SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1203553b5eb9SJoel Becker 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1204553b5eb9SJoel Becker 	wake_up(&lockres->l_event);
1205553b5eb9SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1206553b5eb9SJoel Becker }
1207553b5eb9SJoel Becker 
1208553b5eb9SJoel Becker /*
1209553b5eb9SJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
1210553b5eb9SJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
1211553b5eb9SJoel Becker  * The version number allows interoperability with systems running at
1212553b5eb9SJoel Becker  * the same major number and an equal or smaller minor number.
1213553b5eb9SJoel Becker  *
1214553b5eb9SJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
1215553b5eb9SJoel Becker  * lock, orders them differently, does different things underneath a lock),
1216553b5eb9SJoel Becker  * the version must be changed.  The protocol is negotiated when joining
1217553b5eb9SJoel Becker  * the dlm domain.  A node may join the domain if its major version is
1218553b5eb9SJoel Becker  * identical to all other nodes and its minor version is greater than
1219553b5eb9SJoel Becker  * or equal to all other nodes.  When its minor version is greater than
1220553b5eb9SJoel Becker  * the other nodes, it will run at the minor version specified by the
1221553b5eb9SJoel Becker  * other nodes.
1222553b5eb9SJoel Becker  *
1223553b5eb9SJoel Becker  * If a locking change is made that will not be compatible with older
1224553b5eb9SJoel Becker  * versions, the major number must be increased and the minor version set
1225553b5eb9SJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
1226553b5eb9SJoel Becker  * speaking to older versions, the minor version must be increased.  If a
1227553b5eb9SJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
1228553b5eb9SJoel Becker  * are just ignored by older versions), the version does not need to be
1229553b5eb9SJoel Becker  * updated.
1230553b5eb9SJoel Becker  */
1231553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = {
1232553b5eb9SJoel Becker 	.lp_max_version = {
1233553b5eb9SJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1234553b5eb9SJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1235553b5eb9SJoel Becker 	},
1236553b5eb9SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
1237553b5eb9SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
1238553b5eb9SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
1239553b5eb9SJoel Becker };
1240553b5eb9SJoel Becker 
1241553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void)
1242553b5eb9SJoel Becker {
1243553b5eb9SJoel Becker 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1244553b5eb9SJoel Becker }
1245553b5eb9SJoel Becker 
1246ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1247ccd979bdSMark Fasheh 						int convert)
1248ccd979bdSMark Fasheh {
1249ccd979bdSMark Fasheh 	unsigned long flags;
1250ccd979bdSMark Fasheh 
1251ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1252ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1253a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1254ccd979bdSMark Fasheh 	if (convert)
1255ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1256ccd979bdSMark Fasheh 	else
1257ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1258ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1259ccd979bdSMark Fasheh 
1260ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1261ccd979bdSMark Fasheh }
1262ccd979bdSMark Fasheh 
1263ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1264ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1265ccd979bdSMark Fasheh  * to do the right thing in that case.
1266ccd979bdSMark Fasheh  */
1267ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1268ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1269ccd979bdSMark Fasheh 			     int level,
1270bd3e7610SJoel Becker 			     u32 dlm_flags)
1271ccd979bdSMark Fasheh {
1272ccd979bdSMark Fasheh 	int ret = 0;
1273ccd979bdSMark Fasheh 	unsigned long flags;
1274de551246SJoel Becker 	unsigned int gen;
1275ccd979bdSMark Fasheh 
1276bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1277ccd979bdSMark Fasheh 	     dlm_flags);
1278ccd979bdSMark Fasheh 
1279ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1280ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1281ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1282ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1283ccd979bdSMark Fasheh 		goto bail;
1284ccd979bdSMark Fasheh 	}
1285ccd979bdSMark Fasheh 
1286ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1287ccd979bdSMark Fasheh 	lockres->l_requested = level;
1288ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1289de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1290ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1291ccd979bdSMark Fasheh 
12924670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1293ccd979bdSMark Fasheh 			     level,
1294ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1295ccd979bdSMark Fasheh 			     dlm_flags,
1296ccd979bdSMark Fasheh 			     lockres->l_name,
1297a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1298de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
12997431cd7eSJoel Becker 	if (ret) {
13007431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1301ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1302ccd979bdSMark Fasheh 	}
1303ccd979bdSMark Fasheh 
13047431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1305ccd979bdSMark Fasheh 
1306ccd979bdSMark Fasheh bail:
1307ccd979bdSMark Fasheh 	return ret;
1308ccd979bdSMark Fasheh }
1309ccd979bdSMark Fasheh 
1310ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1311ccd979bdSMark Fasheh 					int flag)
1312ccd979bdSMark Fasheh {
1313ccd979bdSMark Fasheh 	unsigned long flags;
1314ccd979bdSMark Fasheh 	int ret;
1315ccd979bdSMark Fasheh 
1316ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1317ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1318ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1319ccd979bdSMark Fasheh 
1320ccd979bdSMark Fasheh 	return ret;
1321ccd979bdSMark Fasheh }
1322ccd979bdSMark Fasheh 
1323ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1324ccd979bdSMark Fasheh 
1325ccd979bdSMark Fasheh {
1326ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1327ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1328ccd979bdSMark Fasheh }
1329ccd979bdSMark Fasheh 
1330ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1331ccd979bdSMark Fasheh 
1332ccd979bdSMark Fasheh {
1333ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1334ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1335ccd979bdSMark Fasheh }
1336ccd979bdSMark Fasheh 
1337ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1338ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1339ccd979bdSMark Fasheh  * level will be compatible with it. */
1340ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1341ccd979bdSMark Fasheh 						     int wanted)
1342ccd979bdSMark Fasheh {
1343ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1344ccd979bdSMark Fasheh 
1345ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1346ccd979bdSMark Fasheh }
1347ccd979bdSMark Fasheh 
1348ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1349ccd979bdSMark Fasheh {
1350ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1351ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
13528ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1353ccd979bdSMark Fasheh }
1354ccd979bdSMark Fasheh 
1355ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1356ccd979bdSMark Fasheh {
1357ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1358ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
135916735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1360ccd979bdSMark Fasheh 	return mw->mw_status;
1361ccd979bdSMark Fasheh }
1362ccd979bdSMark Fasheh 
1363ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1364ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1365ccd979bdSMark Fasheh 				    unsigned long mask,
1366ccd979bdSMark Fasheh 				    unsigned long goal)
1367ccd979bdSMark Fasheh {
1368ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1369ccd979bdSMark Fasheh 
1370ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1371ccd979bdSMark Fasheh 
1372ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1373ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1374ccd979bdSMark Fasheh 	mw->mw_goal = goal;
1375ccd979bdSMark Fasheh }
1376ccd979bdSMark Fasheh 
1377ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1378ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
1379d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1380ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1381ccd979bdSMark Fasheh {
1382ccd979bdSMark Fasheh 	int ret = 0;
1383ccd979bdSMark Fasheh 
1384d1e78238SXue jiufei 	assert_spin_locked(&lockres->l_lock);
1385ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1386ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1387ccd979bdSMark Fasheh 			ret = -EBUSY;
1388ccd979bdSMark Fasheh 
1389ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1390ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
1391ccd979bdSMark Fasheh 	}
1392d1e78238SXue jiufei 
1393d1e78238SXue jiufei 	return ret;
1394d1e78238SXue jiufei }
1395d1e78238SXue jiufei 
1396d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1397d1e78238SXue jiufei 				      struct ocfs2_mask_waiter *mw)
1398d1e78238SXue jiufei {
1399d1e78238SXue jiufei 	unsigned long flags;
1400d1e78238SXue jiufei 	int ret = 0;
1401d1e78238SXue jiufei 
1402d1e78238SXue jiufei 	spin_lock_irqsave(&lockres->l_lock, flags);
1403d1e78238SXue jiufei 	ret = __lockres_remove_mask_waiter(lockres, mw);
1404ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1405ccd979bdSMark Fasheh 
1406ccd979bdSMark Fasheh 	return ret;
1407ccd979bdSMark Fasheh 
1408ccd979bdSMark Fasheh }
1409ccd979bdSMark Fasheh 
1410cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1411cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1412cf8e06f1SMark Fasheh {
1413cf8e06f1SMark Fasheh 	int ret;
1414cf8e06f1SMark Fasheh 
1415cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1416cf8e06f1SMark Fasheh 	if (ret)
1417cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1418cf8e06f1SMark Fasheh 	else
1419cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1420cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
142116735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1422cf8e06f1SMark Fasheh 	return ret;
1423cf8e06f1SMark Fasheh }
1424cf8e06f1SMark Fasheh 
1425cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1426ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1427ccd979bdSMark Fasheh 				int level,
1428bd3e7610SJoel Becker 				u32 lkm_flags,
1429cb25797dSJan Kara 				int arg_flags,
1430cb25797dSJan Kara 				int l_subclass,
1431cb25797dSJan Kara 				unsigned long caller_ip)
1432ccd979bdSMark Fasheh {
1433ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1434ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1435ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1436ccd979bdSMark Fasheh 	unsigned long flags;
1437de551246SJoel Becker 	unsigned int gen;
14381693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1439d1e78238SXue jiufei 	int dlm_locked = 0;
1440b1b1e15eSTariq Saeed 	int kick_dc = 0;
1441ccd979bdSMark Fasheh 
14422f2eca20Salex chen 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
14432f2eca20Salex chen 		mlog_errno(-EINVAL);
14442f2eca20Salex chen 		return -EINVAL;
14452f2eca20Salex chen 	}
14462f2eca20Salex chen 
1447ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1448ccd979bdSMark Fasheh 
1449b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1450bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1451b80fc012SMark Fasheh 
1452ccd979bdSMark Fasheh again:
1453ccd979bdSMark Fasheh 	wait = 0;
1454ccd979bdSMark Fasheh 
1455a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1456a1912826SSunil Mushran 
1457ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1458ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1459a1912826SSunil Mushran 		goto unlock;
1460ccd979bdSMark Fasheh 	}
1461ccd979bdSMark Fasheh 
1462ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1463ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1464ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1465ccd979bdSMark Fasheh 
1466ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1467ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1468ccd979bdSMark Fasheh 	 * we'll get caught below. */
1469ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1470ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1471ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1472ccd979bdSMark Fasheh 		 * them. */
1473ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1474ccd979bdSMark Fasheh 		wait = 1;
1475ccd979bdSMark Fasheh 		goto unlock;
1476ccd979bdSMark Fasheh 	}
1477ccd979bdSMark Fasheh 
1478a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1479a1912826SSunil Mushran 		/*
1480a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1481a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1482a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1483a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1484a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1485a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1486a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1487a1912826SSunil Mushran 		 * requesting PR take the lock.
1488a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1489a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1490a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1491a1912826SSunil Mushran 		 * downconvert request.
1492a1912826SSunil Mushran 		 */
1493a1912826SSunil Mushran 		if (level <= lockres->l_level)
1494a1912826SSunil Mushran 			goto update_holders;
1495a1912826SSunil Mushran 	}
1496a1912826SSunil Mushran 
1497ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1498ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1499ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1500ccd979bdSMark Fasheh 		 * another node */
1501ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1502ccd979bdSMark Fasheh 		wait = 1;
1503ccd979bdSMark Fasheh 		goto unlock;
1504ccd979bdSMark Fasheh 	}
1505ccd979bdSMark Fasheh 
1506ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
15071693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
15081693a5c0SDavid Teigland 			ret = -EAGAIN;
15091693a5c0SDavid Teigland 			goto unlock;
15101693a5c0SDavid Teigland 		}
15111693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
15121693a5c0SDavid Teigland 			noqueue_attempted = 1;
15131693a5c0SDavid Teigland 
1514ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1515ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1516ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1517ccd979bdSMark Fasheh 
1518019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1519019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1520bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1521019d1b22SMark Fasheh 		} else {
1522ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1523bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1524019d1b22SMark Fasheh 		}
1525019d1b22SMark Fasheh 
1526ccd979bdSMark Fasheh 		lockres->l_requested = level;
1527ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1528de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1529ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1530ccd979bdSMark Fasheh 
1531bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1532bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1533ccd979bdSMark Fasheh 
15349b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1535ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1536ccd979bdSMark Fasheh 
1537ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
15384670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1539ccd979bdSMark Fasheh 				     level,
1540ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1541019d1b22SMark Fasheh 				     lkm_flags,
1542ccd979bdSMark Fasheh 				     lockres->l_name,
1543a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1544de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
15457431cd7eSJoel Becker 		if (ret) {
15467431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
15477431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
154824ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
15497431cd7eSJoel Becker 						    ret, lockres);
1550ccd979bdSMark Fasheh 			}
1551ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1552ccd979bdSMark Fasheh 			goto out;
1553ccd979bdSMark Fasheh 		}
1554d1e78238SXue jiufei 		dlm_locked = 1;
1555ccd979bdSMark Fasheh 
155673ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1557ccd979bdSMark Fasheh 		     lockres->l_name);
1558ccd979bdSMark Fasheh 
1559ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1560ccd979bdSMark Fasheh 		 * complete our work regardless. */
1561ccd979bdSMark Fasheh 		catch_signals = 0;
1562ccd979bdSMark Fasheh 
1563ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1564ccd979bdSMark Fasheh 		goto again;
1565ccd979bdSMark Fasheh 	}
1566ccd979bdSMark Fasheh 
1567a1912826SSunil Mushran update_holders:
1568ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1569ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1570ccd979bdSMark Fasheh 
1571ccd979bdSMark Fasheh 	ret = 0;
1572ccd979bdSMark Fasheh unlock:
1573a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1574a1912826SSunil Mushran 
1575b1b1e15eSTariq Saeed 	/* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
1576b1b1e15eSTariq Saeed 	kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
1577b1b1e15eSTariq Saeed 
1578ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1579b1b1e15eSTariq Saeed 	if (kick_dc)
1580b1b1e15eSTariq Saeed 		ocfs2_wake_downconvert_thread(osb);
1581ccd979bdSMark Fasheh out:
1582ccd979bdSMark Fasheh 	/*
1583ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1584ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1585ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1586ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1587ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1588ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1589ccd979bdSMark Fasheh 	 */
1590ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1591ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1592ccd979bdSMark Fasheh 		wait = 0;
1593d1e78238SXue jiufei 		spin_lock_irqsave(&lockres->l_lock, flags);
1594d1e78238SXue jiufei 		if (__lockres_remove_mask_waiter(lockres, &mw)) {
1595d1e78238SXue jiufei 			if (dlm_locked)
1596d1e78238SXue jiufei 				lockres_or_flags(lockres,
1597d1e78238SXue jiufei 					OCFS2_LOCK_NONBLOCK_FINISHED);
1598d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1599ccd979bdSMark Fasheh 			ret = -EAGAIN;
1600d1e78238SXue jiufei 		} else {
1601d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1602ccd979bdSMark Fasheh 			goto again;
1603ccd979bdSMark Fasheh 		}
1604d1e78238SXue jiufei 	}
1605ccd979bdSMark Fasheh 	if (wait) {
1606ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1607ccd979bdSMark Fasheh 		if (ret == 0)
1608ccd979bdSMark Fasheh 			goto again;
1609ccd979bdSMark Fasheh 		mlog_errno(ret);
1610ccd979bdSMark Fasheh 	}
16118ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1612ccd979bdSMark Fasheh 
1613cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1614cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1615cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1616cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1617cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1618cb25797dSJan Kara 				caller_ip);
1619cb25797dSJan Kara 		else
1620cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1621cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1622cb25797dSJan Kara 				caller_ip);
1623cb25797dSJan Kara 	}
1624cb25797dSJan Kara #endif
1625ccd979bdSMark Fasheh 	return ret;
1626ccd979bdSMark Fasheh }
1627ccd979bdSMark Fasheh 
1628cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1629ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1630cb25797dSJan Kara 				     int level,
1631cb25797dSJan Kara 				     u32 lkm_flags,
1632cb25797dSJan Kara 				     int arg_flags)
1633cb25797dSJan Kara {
1634cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1635cb25797dSJan Kara 				    0, _RET_IP_);
1636cb25797dSJan Kara }
1637cb25797dSJan Kara 
1638cb25797dSJan Kara 
1639cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1640cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1641cb25797dSJan Kara 				   int level,
1642cb25797dSJan Kara 				   unsigned long caller_ip)
1643ccd979bdSMark Fasheh {
1644ccd979bdSMark Fasheh 	unsigned long flags;
1645ccd979bdSMark Fasheh 
1646ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1647ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
164834d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1649ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1650cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1651cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
1652cb25797dSJan Kara 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1653cb25797dSJan Kara #endif
1654ccd979bdSMark Fasheh }
1655ccd979bdSMark Fasheh 
1656da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1657d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
165824c19ef4SMark Fasheh 				 int ex,
165924c19ef4SMark Fasheh 				 int local)
1660ccd979bdSMark Fasheh {
1661bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1662ccd979bdSMark Fasheh 	unsigned long flags;
1663bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1664ccd979bdSMark Fasheh 
1665ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1666ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1667ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1668ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1669ccd979bdSMark Fasheh 
167024c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1671ccd979bdSMark Fasheh }
1672ccd979bdSMark Fasheh 
1673ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1674ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1675ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1676ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1677ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1678ccd979bdSMark Fasheh  * with creating a new lock resource. */
1679ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1680ccd979bdSMark Fasheh {
1681ccd979bdSMark Fasheh 	int ret;
1682d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1683ccd979bdSMark Fasheh 
1684ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1685ccd979bdSMark Fasheh 
1686b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1687ccd979bdSMark Fasheh 
1688ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1689ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1690ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1691ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1692ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1693ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1694ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1695ccd979bdSMark Fasheh 
169624c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1697ccd979bdSMark Fasheh 	if (ret) {
1698ccd979bdSMark Fasheh 		mlog_errno(ret);
1699ccd979bdSMark Fasheh 		goto bail;
1700ccd979bdSMark Fasheh 	}
1701ccd979bdSMark Fasheh 
170224c19ef4SMark Fasheh 	/*
1703bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
170424c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
170524c19ef4SMark Fasheh 	 */
1706e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1707ccd979bdSMark Fasheh 	if (ret) {
1708ccd979bdSMark Fasheh 		mlog_errno(ret);
1709ccd979bdSMark Fasheh 		goto bail;
1710ccd979bdSMark Fasheh 	}
1711ccd979bdSMark Fasheh 
171250008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1713a8f24f1bSJoseph Qi 	if (ret)
171450008630STiger Yang 		mlog_errno(ret);
171550008630STiger Yang 
1716ccd979bdSMark Fasheh bail:
1717ccd979bdSMark Fasheh 	return ret;
1718ccd979bdSMark Fasheh }
1719ccd979bdSMark Fasheh 
1720ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1721ccd979bdSMark Fasheh {
1722ccd979bdSMark Fasheh 	int status, level;
1723ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1724c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1725ccd979bdSMark Fasheh 
1726b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1727b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1728ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1729ccd979bdSMark Fasheh 
1730c1e8d35eSTao Ma 	if (ocfs2_mount_local(osb))
1731c271c5c2SSunil Mushran 		return 0;
1732c271c5c2SSunil Mushran 
1733ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1734ccd979bdSMark Fasheh 
1735bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1736ccd979bdSMark Fasheh 
1737ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1738ccd979bdSMark Fasheh 				    0);
1739ccd979bdSMark Fasheh 	if (status < 0)
1740ccd979bdSMark Fasheh 		mlog_errno(status);
1741ccd979bdSMark Fasheh 
1742ccd979bdSMark Fasheh 	return status;
1743ccd979bdSMark Fasheh }
1744ccd979bdSMark Fasheh 
1745ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1746ccd979bdSMark Fasheh {
1747bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1748ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1749c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1750ccd979bdSMark Fasheh 
1751b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1752b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1753ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1754ccd979bdSMark Fasheh 
1755c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
1756ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1757ccd979bdSMark Fasheh }
1758ccd979bdSMark Fasheh 
175950008630STiger Yang /*
176050008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
176150008630STiger Yang  */
176250008630STiger Yang int ocfs2_open_lock(struct inode *inode)
176350008630STiger Yang {
176450008630STiger Yang 	int status = 0;
176550008630STiger Yang 	struct ocfs2_lock_res *lockres;
176650008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
176750008630STiger Yang 
176850008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
176950008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
177050008630STiger Yang 
177103efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
177250008630STiger Yang 		goto out;
177350008630STiger Yang 
177450008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
177550008630STiger Yang 
177650008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1777bd3e7610SJoel Becker 				    DLM_LOCK_PR, 0, 0);
177850008630STiger Yang 	if (status < 0)
177950008630STiger Yang 		mlog_errno(status);
178050008630STiger Yang 
178150008630STiger Yang out:
178250008630STiger Yang 	return status;
178350008630STiger Yang }
178450008630STiger Yang 
178550008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
178650008630STiger Yang {
178750008630STiger Yang 	int status = 0, level;
178850008630STiger Yang 	struct ocfs2_lock_res *lockres;
178950008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
179050008630STiger Yang 
179150008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
179250008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
179350008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
179450008630STiger Yang 
179503efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
179603efed8aSTiger Yang 		if (write)
179703efed8aSTiger Yang 			status = -EROFS;
179803efed8aSTiger Yang 		goto out;
179903efed8aSTiger Yang 	}
180003efed8aSTiger Yang 
180150008630STiger Yang 	if (ocfs2_mount_local(osb))
180250008630STiger Yang 		goto out;
180350008630STiger Yang 
180450008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
180550008630STiger Yang 
1806bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
180750008630STiger Yang 
180850008630STiger Yang 	/*
180950008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1810bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
181150008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
181250008630STiger Yang 	 * this inode is still in use.
181350008630STiger Yang 	 */
181450008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1815bd3e7610SJoel Becker 				    level, DLM_LKF_NOQUEUE, 0);
181650008630STiger Yang 
181750008630STiger Yang out:
181850008630STiger Yang 	return status;
181950008630STiger Yang }
182050008630STiger Yang 
182150008630STiger Yang /*
182250008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
182350008630STiger Yang  */
182450008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
182550008630STiger Yang {
182650008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
182750008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
182850008630STiger Yang 
182950008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
183050008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
183150008630STiger Yang 
183250008630STiger Yang 	if (ocfs2_mount_local(osb))
183350008630STiger Yang 		goto out;
183450008630STiger Yang 
183550008630STiger Yang 	if(lockres->l_ro_holders)
183650008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1837bd3e7610SJoel Becker 				     DLM_LOCK_PR);
183850008630STiger Yang 	if(lockres->l_ex_holders)
183950008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1840bd3e7610SJoel Becker 				     DLM_LOCK_EX);
184150008630STiger Yang 
184250008630STiger Yang out:
1843c1e8d35eSTao Ma 	return;
184450008630STiger Yang }
184550008630STiger Yang 
1846cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1847cf8e06f1SMark Fasheh 				     int level)
1848cf8e06f1SMark Fasheh {
1849cf8e06f1SMark Fasheh 	int ret;
1850cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1851cf8e06f1SMark Fasheh 	unsigned long flags;
1852cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1853cf8e06f1SMark Fasheh 
1854cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1855cf8e06f1SMark Fasheh 
1856cf8e06f1SMark Fasheh retry_cancel:
1857cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1858cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1859cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1860cf8e06f1SMark Fasheh 		if (ret) {
1861cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1862cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1863cf8e06f1SMark Fasheh 			if (ret < 0) {
1864cf8e06f1SMark Fasheh 				mlog_errno(ret);
1865cf8e06f1SMark Fasheh 				goto out;
1866cf8e06f1SMark Fasheh 			}
1867cf8e06f1SMark Fasheh 			goto retry_cancel;
1868cf8e06f1SMark Fasheh 		}
1869cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1870cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1871cf8e06f1SMark Fasheh 
1872cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1873cf8e06f1SMark Fasheh 		goto retry_cancel;
1874cf8e06f1SMark Fasheh 	}
1875cf8e06f1SMark Fasheh 
1876cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1877cf8e06f1SMark Fasheh 	/*
1878cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1879cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1880cf8e06f1SMark Fasheh 	 */
1881cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1882cf8e06f1SMark Fasheh 		ret = 0;
1883cf8e06f1SMark Fasheh 
1884cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1885cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1886cf8e06f1SMark Fasheh 
1887cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1888cf8e06f1SMark Fasheh 
1889cf8e06f1SMark Fasheh out:
1890cf8e06f1SMark Fasheh 	return ret;
1891cf8e06f1SMark Fasheh }
1892cf8e06f1SMark Fasheh 
1893cf8e06f1SMark Fasheh /*
1894cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1895cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1896cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
18973ad2f3fbSDaniel Mack  * separate path to the "low-level" dlm calls. In particular:
1898cf8e06f1SMark Fasheh  *
1899cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1900cf8e06f1SMark Fasheh  *   what's been requested.
1901cf8e06f1SMark Fasheh  *
1902cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1903cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1904cf8e06f1SMark Fasheh  *   the blocking list).
1905cf8e06f1SMark Fasheh  *
1906cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1907cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1908cf8e06f1SMark Fasheh  *   request.
1909cf8e06f1SMark Fasheh  *
1910cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1911cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1912cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1913cf8e06f1SMark Fasheh  */
1914cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1915cf8e06f1SMark Fasheh {
1916e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1917e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1918cf8e06f1SMark Fasheh 	unsigned long flags;
1919cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1920cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1921cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1922cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1923cf8e06f1SMark Fasheh 
1924cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1925cf8e06f1SMark Fasheh 
1926cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1927bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1928cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1929cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1930cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1931cf8e06f1SMark Fasheh 		     lockres->l_level);
1932cf8e06f1SMark Fasheh 		return -EINVAL;
1933cf8e06f1SMark Fasheh 	}
1934cf8e06f1SMark Fasheh 
1935cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1936cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1937cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1938cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1939cf8e06f1SMark Fasheh 
1940cf8e06f1SMark Fasheh 		/*
1941cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
1942cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
1943cf8e06f1SMark Fasheh 		 */
1944e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1945cf8e06f1SMark Fasheh 		if (ret < 0) {
1946cf8e06f1SMark Fasheh 			mlog_errno(ret);
1947cf8e06f1SMark Fasheh 			goto out;
1948cf8e06f1SMark Fasheh 		}
1949cf8e06f1SMark Fasheh 
1950cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1951cf8e06f1SMark Fasheh 		if (ret) {
1952cf8e06f1SMark Fasheh 			mlog_errno(ret);
1953cf8e06f1SMark Fasheh 			goto out;
1954cf8e06f1SMark Fasheh 		}
1955cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
1956cf8e06f1SMark Fasheh 	}
1957cf8e06f1SMark Fasheh 
1958cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
1959e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
1960cf8e06f1SMark Fasheh 	lockres->l_requested = level;
1961cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1962cf8e06f1SMark Fasheh 
1963cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1964cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1965cf8e06f1SMark Fasheh 
19664670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1967a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
19687431cd7eSJoel Becker 	if (ret) {
19697431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
197024ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1971cf8e06f1SMark Fasheh 			ret = -EINVAL;
1972cf8e06f1SMark Fasheh 		}
1973cf8e06f1SMark Fasheh 
1974cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1975cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
1976cf8e06f1SMark Fasheh 		goto out;
1977cf8e06f1SMark Fasheh 	}
1978cf8e06f1SMark Fasheh 
1979cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1980cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
1981cf8e06f1SMark Fasheh 		/*
1982cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
1983cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
1984cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
1985cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
1986cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
1987cf8e06f1SMark Fasheh 		 * reboot.
1988cf8e06f1SMark Fasheh 		 *
1989cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
1990cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
1991cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
1992cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
1993cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
1994af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
1995cf8e06f1SMark Fasheh 		 */
1996cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
19971693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
19981693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
19991693a5c0SDavid Teigland 		BUG_ON(!trylock);
20001693a5c0SDavid Teigland 		ret = -EAGAIN;
2001cf8e06f1SMark Fasheh 	}
2002cf8e06f1SMark Fasheh 
2003cf8e06f1SMark Fasheh out:
2004cf8e06f1SMark Fasheh 
2005cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
2006cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
2007cf8e06f1SMark Fasheh 	return ret;
2008cf8e06f1SMark Fasheh }
2009cf8e06f1SMark Fasheh 
2010cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
2011cf8e06f1SMark Fasheh {
2012cf8e06f1SMark Fasheh 	int ret;
2013de551246SJoel Becker 	unsigned int gen;
2014cf8e06f1SMark Fasheh 	unsigned long flags;
2015cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
2016cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
2017cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
2018cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
2019cf8e06f1SMark Fasheh 
2020cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
2021cf8e06f1SMark Fasheh 
2022cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
2023cf8e06f1SMark Fasheh 		return;
2024cf8e06f1SMark Fasheh 
2025e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
2026cf8e06f1SMark Fasheh 		return;
2027cf8e06f1SMark Fasheh 
2028cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
2029cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
2030cf8e06f1SMark Fasheh 	     lockres->l_action);
2031cf8e06f1SMark Fasheh 
2032cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2033cf8e06f1SMark Fasheh 	/*
2034cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
2035cf8e06f1SMark Fasheh 	 */
2036cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
2037bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
2038cf8e06f1SMark Fasheh 
2039e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
2040cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2041cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2042cf8e06f1SMark Fasheh 
2043e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
2044cf8e06f1SMark Fasheh 	if (ret) {
2045cf8e06f1SMark Fasheh 		mlog_errno(ret);
2046cf8e06f1SMark Fasheh 		return;
2047cf8e06f1SMark Fasheh 	}
2048cf8e06f1SMark Fasheh 
2049cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
2050cf8e06f1SMark Fasheh 	if (ret)
2051cf8e06f1SMark Fasheh 		mlog_errno(ret);
2052cf8e06f1SMark Fasheh }
2053cf8e06f1SMark Fasheh 
205434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2055ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
2056ccd979bdSMark Fasheh {
2057ccd979bdSMark Fasheh 	int kick = 0;
2058ccd979bdSMark Fasheh 
2059ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
206034d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
2061ccd979bdSMark Fasheh 	 * condition. */
2062ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
2063ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
2064bd3e7610SJoel Becker 		case DLM_LOCK_EX:
2065ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
2066ccd979bdSMark Fasheh 				kick = 1;
2067ccd979bdSMark Fasheh 			break;
2068bd3e7610SJoel Becker 		case DLM_LOCK_PR:
2069ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
2070ccd979bdSMark Fasheh 				kick = 1;
2071ccd979bdSMark Fasheh 			break;
2072ccd979bdSMark Fasheh 		default:
2073ccd979bdSMark Fasheh 			BUG();
2074ccd979bdSMark Fasheh 		}
2075ccd979bdSMark Fasheh 	}
2076ccd979bdSMark Fasheh 
2077ccd979bdSMark Fasheh 	if (kick)
207834d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
2079ccd979bdSMark Fasheh }
2080ccd979bdSMark Fasheh 
2081ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
2082ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT  (64 - 34)
2083ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
2084ccd979bdSMark Fasheh 
2085ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
2086ccd979bdSMark Fasheh  * now. */
2087ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec)
2088ccd979bdSMark Fasheh {
2089ccd979bdSMark Fasheh 	u64 res;
2090ccd979bdSMark Fasheh 	u64 sec = spec->tv_sec;
2091ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
2092ccd979bdSMark Fasheh 
2093ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2094ccd979bdSMark Fasheh 
2095ccd979bdSMark Fasheh 	return res;
2096ccd979bdSMark Fasheh }
2097ccd979bdSMark Fasheh 
2098ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2099ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2100e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
2101ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2102ccd979bdSMark Fasheh {
2103ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2104e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2105ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2106ccd979bdSMark Fasheh 
2107a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2108ccd979bdSMark Fasheh 
210924c19ef4SMark Fasheh 	/*
211024c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
211124c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
211224c19ef4SMark Fasheh 	 * status.
211324c19ef4SMark Fasheh 	 */
211424c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
211524c19ef4SMark Fasheh 		lvb->lvb_version = 0;
211624c19ef4SMark Fasheh 		goto out;
211724c19ef4SMark Fasheh 	}
211824c19ef4SMark Fasheh 
21194d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2120ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2121ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
212203ab30f7SEric W. Biederman 	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
212303ab30f7SEric W. Biederman 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
2124ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2125ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2126ccd979bdSMark Fasheh 	lvb->lvb_iatime_packed  =
2127ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2128ccd979bdSMark Fasheh 	lvb->lvb_ictime_packed =
2129ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2130ccd979bdSMark Fasheh 	lvb->lvb_imtime_packed =
2131ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2132ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
213315b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2134f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2135ccd979bdSMark Fasheh 
213624c19ef4SMark Fasheh out:
2137ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2138ccd979bdSMark Fasheh }
2139ccd979bdSMark Fasheh 
2140ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec,
2141ccd979bdSMark Fasheh 				  u64 packed_time)
2142ccd979bdSMark Fasheh {
2143ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2144ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2145ccd979bdSMark Fasheh }
2146ccd979bdSMark Fasheh 
2147ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2148ccd979bdSMark Fasheh {
2149ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2150e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2151ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2152ccd979bdSMark Fasheh 
2153ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2154ccd979bdSMark Fasheh 
2155a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2156ccd979bdSMark Fasheh 
2157ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2158ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2159ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2160ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2161ccd979bdSMark Fasheh 
2162ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
216315b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2164ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2165ca4d147eSHerbert Poetzl 
2166ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2167ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2168ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2169ccd979bdSMark Fasheh 	else
21708110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2171ccd979bdSMark Fasheh 
217203ab30f7SEric W. Biederman 	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
217303ab30f7SEric W. Biederman 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2174ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2175bfe86848SMiklos Szeredi 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2176ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_atime,
2177ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_iatime_packed));
2178ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_mtime,
2179ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_imtime_packed));
2180ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_ctime,
2181ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_ictime_packed));
2182ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2183ccd979bdSMark Fasheh }
2184ccd979bdSMark Fasheh 
2185f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2186f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2187ccd979bdSMark Fasheh {
2188a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2189ccd979bdSMark Fasheh 
21901c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
21911c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2192f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2193ccd979bdSMark Fasheh 		return 1;
2194ccd979bdSMark Fasheh 	return 0;
2195ccd979bdSMark Fasheh }
2196ccd979bdSMark Fasheh 
2197ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2198ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2199ccd979bdSMark Fasheh  *
2200ccd979bdSMark Fasheh  *   0 means no refresh needed.
2201ccd979bdSMark Fasheh  *
2202ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2203ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
2204ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2205ccd979bdSMark Fasheh {
2206ccd979bdSMark Fasheh 	unsigned long flags;
2207ccd979bdSMark Fasheh 	int status = 0;
2208ccd979bdSMark Fasheh 
2209ccd979bdSMark Fasheh refresh_check:
2210ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2211ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2212ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2213ccd979bdSMark Fasheh 		goto bail;
2214ccd979bdSMark Fasheh 	}
2215ccd979bdSMark Fasheh 
2216ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2217ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2218ccd979bdSMark Fasheh 
2219ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2220ccd979bdSMark Fasheh 		goto refresh_check;
2221ccd979bdSMark Fasheh 	}
2222ccd979bdSMark Fasheh 
2223ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2224ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2225ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2226ccd979bdSMark Fasheh 
2227ccd979bdSMark Fasheh 	status = 1;
2228ccd979bdSMark Fasheh bail:
2229c1e8d35eSTao Ma 	mlog(0, "status %d\n", status);
2230ccd979bdSMark Fasheh 	return status;
2231ccd979bdSMark Fasheh }
2232ccd979bdSMark Fasheh 
2233ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2234ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
2235ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2236ccd979bdSMark Fasheh 						   int status)
2237ccd979bdSMark Fasheh {
2238ccd979bdSMark Fasheh 	unsigned long flags;
2239ccd979bdSMark Fasheh 
2240ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2241ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2242ccd979bdSMark Fasheh 	if (!status)
2243ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2244ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2245ccd979bdSMark Fasheh 
2246ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2247ccd979bdSMark Fasheh }
2248ccd979bdSMark Fasheh 
2249ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
2250e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2251ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2252ccd979bdSMark Fasheh {
2253ccd979bdSMark Fasheh 	int status = 0;
2254ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2255e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2256ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2257c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2258ccd979bdSMark Fasheh 
2259be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2260be9e986bSMark Fasheh 		goto bail;
2261be9e986bSMark Fasheh 
2262ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2263ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2264b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2265ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2266b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2267ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2268ccd979bdSMark Fasheh 		status = -ENOENT;
2269ccd979bdSMark Fasheh 		goto bail;
2270ccd979bdSMark Fasheh 	}
2271ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2272ccd979bdSMark Fasheh 
2273ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2274ccd979bdSMark Fasheh 		goto bail;
2275ccd979bdSMark Fasheh 
2276ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2277ccd979bdSMark Fasheh 	 * for the inode metadata. */
22788cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2279ccd979bdSMark Fasheh 
228083418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
228183418978SMark Fasheh 
2282be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2283b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2284b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
2285ccd979bdSMark Fasheh 		ocfs2_refresh_inode_from_lvb(inode);
2286ccd979bdSMark Fasheh 	} else {
2287ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2288ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2289b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2290ccd979bdSMark Fasheh 		if (status < 0) {
2291ccd979bdSMark Fasheh 			mlog_errno(status);
2292ccd979bdSMark Fasheh 			goto bail_refresh;
2293ccd979bdSMark Fasheh 		}
2294ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
2295ccd979bdSMark Fasheh 
2296ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2297b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2298b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2299ccd979bdSMark Fasheh 		 *
2300ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2301ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2302ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2303ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2304ccd979bdSMark Fasheh 		 * locks associated with it. */
2305ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2306ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2307b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2308ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2309b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2310b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2311ccd979bdSMark Fasheh 				inode->i_generation);
2312ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2313ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2314b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2315b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2316b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2317ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2318ccd979bdSMark Fasheh 
2319ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
23208ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2321ccd979bdSMark Fasheh 	}
2322ccd979bdSMark Fasheh 
2323ccd979bdSMark Fasheh 	status = 0;
2324ccd979bdSMark Fasheh bail_refresh:
2325ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2326ccd979bdSMark Fasheh bail:
2327ccd979bdSMark Fasheh 	return status;
2328ccd979bdSMark Fasheh }
2329ccd979bdSMark Fasheh 
2330ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2331ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2332ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2333ccd979bdSMark Fasheh {
2334ccd979bdSMark Fasheh 	int status;
2335ccd979bdSMark Fasheh 
2336ccd979bdSMark Fasheh 	if (passed_bh) {
2337ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2338ccd979bdSMark Fasheh 		 * returned bh. */
2339ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2340ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2341ccd979bdSMark Fasheh 
2342ccd979bdSMark Fasheh 		return 0;
2343ccd979bdSMark Fasheh 	}
2344ccd979bdSMark Fasheh 
2345b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2346ccd979bdSMark Fasheh 	if (status < 0)
2347ccd979bdSMark Fasheh 		mlog_errno(status);
2348ccd979bdSMark Fasheh 
2349ccd979bdSMark Fasheh 	return status;
2350ccd979bdSMark Fasheh }
2351ccd979bdSMark Fasheh 
2352ccd979bdSMark Fasheh /*
2353ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2354ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2355ccd979bdSMark Fasheh  */
2356cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2357ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2358ccd979bdSMark Fasheh 				 int ex,
2359cb25797dSJan Kara 				 int arg_flags,
2360cb25797dSJan Kara 				 int subclass)
2361ccd979bdSMark Fasheh {
2362bd3e7610SJoel Becker 	int status, level, acquired;
2363bd3e7610SJoel Becker 	u32 dlm_flags;
2364c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2365ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2366ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2367ccd979bdSMark Fasheh 
2368b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2369b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2370ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2371ccd979bdSMark Fasheh 
2372ccd979bdSMark Fasheh 	status = 0;
2373ccd979bdSMark Fasheh 	acquired = 0;
2374ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2375ccd979bdSMark Fasheh 	 * rodevices. */
2376ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2377ccd979bdSMark Fasheh 		if (ex)
2378ccd979bdSMark Fasheh 			status = -EROFS;
237903efed8aSTiger Yang 		goto getbh;
2380ccd979bdSMark Fasheh 	}
2381ccd979bdSMark Fasheh 
2382439a36b8SEric Ren 	if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
2383439a36b8SEric Ren 	    ocfs2_mount_local(osb))
2384439a36b8SEric Ren 		goto update;
2385c271c5c2SSunil Mushran 
2386ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2387553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2388ccd979bdSMark Fasheh 
2389e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2390bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2391ccd979bdSMark Fasheh 	dlm_flags = 0;
2392ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2393bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2394ccd979bdSMark Fasheh 
2395cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2396cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2397ccd979bdSMark Fasheh 	if (status < 0) {
239841003a7bSZach Brown 		if (status != -EAGAIN)
2399ccd979bdSMark Fasheh 			mlog_errno(status);
2400ccd979bdSMark Fasheh 		goto bail;
2401ccd979bdSMark Fasheh 	}
2402ccd979bdSMark Fasheh 
2403ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2404ccd979bdSMark Fasheh 	acquired = 1;
2405ccd979bdSMark Fasheh 
2406ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2407ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2408ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2409ccd979bdSMark Fasheh 	 * abort the operation. */
2410ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2411553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2412ccd979bdSMark Fasheh 
2413439a36b8SEric Ren update:
241424c19ef4SMark Fasheh 	/*
241524c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
241624c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
241724c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
241824c19ef4SMark Fasheh 	 * and let the caller handle it.
241924c19ef4SMark Fasheh 	 */
242024c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
242124c19ef4SMark Fasheh 		status = 0;
2422c271c5c2SSunil Mushran 		if (lockres)
242324c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
242424c19ef4SMark Fasheh 		goto bail;
242524c19ef4SMark Fasheh 	}
242624c19ef4SMark Fasheh 
2427ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2428e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2429ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2430ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2431ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2432e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2433ccd979bdSMark Fasheh 	if (status < 0) {
2434ccd979bdSMark Fasheh 		if (status != -ENOENT)
2435ccd979bdSMark Fasheh 			mlog_errno(status);
2436ccd979bdSMark Fasheh 		goto bail;
2437ccd979bdSMark Fasheh 	}
243803efed8aSTiger Yang getbh:
2439ccd979bdSMark Fasheh 	if (ret_bh) {
2440ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2441ccd979bdSMark Fasheh 		if (status < 0) {
2442ccd979bdSMark Fasheh 			mlog_errno(status);
2443ccd979bdSMark Fasheh 			goto bail;
2444ccd979bdSMark Fasheh 		}
2445ccd979bdSMark Fasheh 	}
2446ccd979bdSMark Fasheh 
2447ccd979bdSMark Fasheh bail:
2448ccd979bdSMark Fasheh 	if (status < 0) {
2449ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2450ccd979bdSMark Fasheh 			brelse(*ret_bh);
2451ccd979bdSMark Fasheh 			*ret_bh = NULL;
2452ccd979bdSMark Fasheh 		}
2453ccd979bdSMark Fasheh 		if (acquired)
2454e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2455ccd979bdSMark Fasheh 	}
2456ccd979bdSMark Fasheh 
2457ccd979bdSMark Fasheh 	if (local_bh)
2458ccd979bdSMark Fasheh 		brelse(local_bh);
2459ccd979bdSMark Fasheh 
2460ccd979bdSMark Fasheh 	return status;
2461ccd979bdSMark Fasheh }
2462ccd979bdSMark Fasheh 
2463ccd979bdSMark Fasheh /*
246434d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
246534d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
246634d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2467ccd979bdSMark Fasheh  *
2468ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2469ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2470ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2471ccd979bdSMark Fasheh  *
247234d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
247334d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
247434d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
247534d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
247634d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
247734d024f8SMark Fasheh  * immediately retry the aop call.
2478ccd979bdSMark Fasheh  */
2479e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2480ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2481ccd979bdSMark Fasheh 			      int ex,
2482ccd979bdSMark Fasheh 			      struct page *page)
2483ccd979bdSMark Fasheh {
2484ccd979bdSMark Fasheh 	int ret;
2485ccd979bdSMark Fasheh 
2486e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2487ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2488ccd979bdSMark Fasheh 		unlock_page(page);
2489ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2490ccd979bdSMark Fasheh 	}
2491ccd979bdSMark Fasheh 
2492ccd979bdSMark Fasheh 	return ret;
2493ccd979bdSMark Fasheh }
2494ccd979bdSMark Fasheh 
2495e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
24967f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
24977f1a37e3STiger Yang 			  int *level)
24987f1a37e3STiger Yang {
24997f1a37e3STiger Yang 	int ret;
25007f1a37e3STiger Yang 
2501e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock(inode, NULL, 0);
25027f1a37e3STiger Yang 	if (ret < 0) {
25037f1a37e3STiger Yang 		mlog_errno(ret);
25047f1a37e3STiger Yang 		return ret;
25057f1a37e3STiger Yang 	}
25067f1a37e3STiger Yang 
25077f1a37e3STiger Yang 	/*
25087f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
25097f1a37e3STiger Yang 	 * otherwise we just get PR lock.
25107f1a37e3STiger Yang 	 */
25117f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
25127f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
25137f1a37e3STiger Yang 
2514e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2515e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, &bh, 1);
25167f1a37e3STiger Yang 		if (ret < 0) {
25177f1a37e3STiger Yang 			mlog_errno(ret);
25187f1a37e3STiger Yang 			return ret;
25197f1a37e3STiger Yang 		}
25207f1a37e3STiger Yang 		*level = 1;
25217f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
25227f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
25237f1a37e3STiger Yang 		if (bh)
25247f1a37e3STiger Yang 			brelse(bh);
25257f1a37e3STiger Yang 	} else
25267f1a37e3STiger Yang 		*level = 0;
25277f1a37e3STiger Yang 
25287f1a37e3STiger Yang 	return ret;
25297f1a37e3STiger Yang }
25307f1a37e3STiger Yang 
2531e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2532ccd979bdSMark Fasheh 		       int ex)
2533ccd979bdSMark Fasheh {
2534bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2535e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2536c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2537ccd979bdSMark Fasheh 
2538b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2539b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2540ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2541ccd979bdSMark Fasheh 
2542c271c5c2SSunil Mushran 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2543c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
2544ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2545ccd979bdSMark Fasheh }
2546ccd979bdSMark Fasheh 
2547439a36b8SEric Ren /*
2548439a36b8SEric Ren  * This _tracker variantes are introduced to deal with the recursive cluster
2549439a36b8SEric Ren  * locking issue. The idea is to keep track of a lock holder on the stack of
2550439a36b8SEric Ren  * the current process. If there's a lock holder on the stack, we know the
2551439a36b8SEric Ren  * task context is already protected by cluster locking. Currently, they're
2552439a36b8SEric Ren  * used in some VFS entry routines.
2553439a36b8SEric Ren  *
2554439a36b8SEric Ren  * return < 0 on error, return == 0 if there's no lock holder on the stack
2555439a36b8SEric Ren  * before this call, return == 1 if this call would be a recursive locking.
2556439a36b8SEric Ren  */
2557439a36b8SEric Ren int ocfs2_inode_lock_tracker(struct inode *inode,
2558439a36b8SEric Ren 			     struct buffer_head **ret_bh,
2559439a36b8SEric Ren 			     int ex,
2560439a36b8SEric Ren 			     struct ocfs2_lock_holder *oh)
2561439a36b8SEric Ren {
2562439a36b8SEric Ren 	int status;
2563439a36b8SEric Ren 	int arg_flags = 0, has_locked;
2564439a36b8SEric Ren 	struct ocfs2_lock_res *lockres;
2565439a36b8SEric Ren 
2566439a36b8SEric Ren 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2567439a36b8SEric Ren 	has_locked = ocfs2_is_locked_by_me(lockres);
2568439a36b8SEric Ren 	/* Just get buffer head if the cluster lock has been taken */
2569439a36b8SEric Ren 	if (has_locked)
2570439a36b8SEric Ren 		arg_flags = OCFS2_META_LOCK_GETBH;
2571439a36b8SEric Ren 
2572439a36b8SEric Ren 	if (likely(!has_locked || ret_bh)) {
2573439a36b8SEric Ren 		status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags);
2574439a36b8SEric Ren 		if (status < 0) {
2575439a36b8SEric Ren 			if (status != -ENOENT)
2576439a36b8SEric Ren 				mlog_errno(status);
2577439a36b8SEric Ren 			return status;
2578439a36b8SEric Ren 		}
2579439a36b8SEric Ren 	}
2580439a36b8SEric Ren 	if (!has_locked)
2581439a36b8SEric Ren 		ocfs2_add_holder(lockres, oh);
2582439a36b8SEric Ren 
2583439a36b8SEric Ren 	return has_locked;
2584439a36b8SEric Ren }
2585439a36b8SEric Ren 
2586439a36b8SEric Ren void ocfs2_inode_unlock_tracker(struct inode *inode,
2587439a36b8SEric Ren 				int ex,
2588439a36b8SEric Ren 				struct ocfs2_lock_holder *oh,
2589439a36b8SEric Ren 				int had_lock)
2590439a36b8SEric Ren {
2591439a36b8SEric Ren 	struct ocfs2_lock_res *lockres;
2592439a36b8SEric Ren 
2593439a36b8SEric Ren 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2594439a36b8SEric Ren 	if (!had_lock) {
2595439a36b8SEric Ren 		ocfs2_remove_holder(lockres, oh);
2596439a36b8SEric Ren 		ocfs2_inode_unlock(inode, ex);
2597439a36b8SEric Ren 	}
2598439a36b8SEric Ren }
2599439a36b8SEric Ren 
2600df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
260183273932SSrinivas Eeda {
260283273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
260383273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
260483273932SSrinivas Eeda 	int status = 0;
260583273932SSrinivas Eeda 
2606df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2607df152c24SSunil Mushran 		return -EROFS;
2608df152c24SSunil Mushran 
2609df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2610df152c24SSunil Mushran 		return 0;
2611df152c24SSunil Mushran 
261283273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2613df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
261483273932SSrinivas Eeda 	if (status < 0)
261583273932SSrinivas Eeda 		return status;
261683273932SSrinivas Eeda 
261783273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
26181c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
26191c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
262083273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
26213211949fSSunil Mushran 	else
26223211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
26233211949fSSunil Mushran 
262483273932SSrinivas Eeda 	return status;
262583273932SSrinivas Eeda }
262683273932SSrinivas Eeda 
2627df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
262883273932SSrinivas Eeda {
262983273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
263083273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
263183273932SSrinivas Eeda 
2632df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
263383273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
263483273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
263583273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
263683273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2637df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2638df152c24SSunil Mushran 	}
263983273932SSrinivas Eeda }
264083273932SSrinivas Eeda 
2641ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2642ccd979bdSMark Fasheh 		     int ex)
2643ccd979bdSMark Fasheh {
2644c271c5c2SSunil Mushran 	int status = 0;
2645bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2646ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2647ccd979bdSMark Fasheh 
2648ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2649ccd979bdSMark Fasheh 		return -EROFS;
2650ccd979bdSMark Fasheh 
2651c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2652c271c5c2SSunil Mushran 		goto bail;
2653c271c5c2SSunil Mushran 
2654ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2655ccd979bdSMark Fasheh 	if (status < 0) {
2656ccd979bdSMark Fasheh 		mlog_errno(status);
2657ccd979bdSMark Fasheh 		goto bail;
2658ccd979bdSMark Fasheh 	}
2659ccd979bdSMark Fasheh 
2660ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2661ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2662ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2663ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2664ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2665ccd979bdSMark Fasheh 	if (status) {
26668e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2667ccd979bdSMark Fasheh 
2668ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2669ccd979bdSMark Fasheh 
26703278bb74SJunxiao Bi 		if (status < 0) {
26713278bb74SJunxiao Bi 			ocfs2_cluster_unlock(osb, lockres, level);
2672ccd979bdSMark Fasheh 			mlog_errno(status);
26733278bb74SJunxiao Bi 		}
26748ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2675ccd979bdSMark Fasheh 	}
2676ccd979bdSMark Fasheh bail:
2677ccd979bdSMark Fasheh 	return status;
2678ccd979bdSMark Fasheh }
2679ccd979bdSMark Fasheh 
2680ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2681ccd979bdSMark Fasheh 			int ex)
2682ccd979bdSMark Fasheh {
2683bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2684ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2685ccd979bdSMark Fasheh 
2686c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2687ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2688ccd979bdSMark Fasheh }
2689ccd979bdSMark Fasheh 
2690ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2691ccd979bdSMark Fasheh {
2692ccd979bdSMark Fasheh 	int status;
2693ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2694ccd979bdSMark Fasheh 
2695ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2696ccd979bdSMark Fasheh 		return -EROFS;
2697ccd979bdSMark Fasheh 
2698c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2699c271c5c2SSunil Mushran 		return 0;
2700c271c5c2SSunil Mushran 
2701bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2702ccd979bdSMark Fasheh 	if (status < 0)
2703ccd979bdSMark Fasheh 		mlog_errno(status);
2704ccd979bdSMark Fasheh 
2705ccd979bdSMark Fasheh 	return status;
2706ccd979bdSMark Fasheh }
2707ccd979bdSMark Fasheh 
2708ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2709ccd979bdSMark Fasheh {
2710ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2711ccd979bdSMark Fasheh 
2712c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2713bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2714ccd979bdSMark Fasheh }
2715ccd979bdSMark Fasheh 
27166ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
27176ca497a8Swengang wang {
27186ca497a8Swengang wang 	int status;
27196ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
27206ca497a8Swengang wang 
27216ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
27226ca497a8Swengang wang 		return -EROFS;
27236ca497a8Swengang wang 
27246ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
27256ca497a8Swengang wang 		return 0;
27266ca497a8Swengang wang 
27276ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
27286ca497a8Swengang wang 				    0, 0);
27296ca497a8Swengang wang 	if (status < 0)
27306ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
27316ca497a8Swengang wang 
27326ca497a8Swengang wang 	return status;
27336ca497a8Swengang wang }
27346ca497a8Swengang wang 
27356ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
27366ca497a8Swengang wang {
27376ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
27386ca497a8Swengang wang 
27396ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
27406ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
27416ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
27426ca497a8Swengang wang }
27436ca497a8Swengang wang 
2744d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2745d680efe9SMark Fasheh {
2746d680efe9SMark Fasheh 	int ret;
2747bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2748d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2749d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2750d680efe9SMark Fasheh 
2751d680efe9SMark Fasheh 	BUG_ON(!dl);
2752d680efe9SMark Fasheh 
275303efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
275403efed8aSTiger Yang 		if (ex)
2755d680efe9SMark Fasheh 			return -EROFS;
275603efed8aSTiger Yang 		return 0;
275703efed8aSTiger Yang 	}
2758d680efe9SMark Fasheh 
2759c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2760c271c5c2SSunil Mushran 		return 0;
2761c271c5c2SSunil Mushran 
2762d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2763d680efe9SMark Fasheh 	if (ret < 0)
2764d680efe9SMark Fasheh 		mlog_errno(ret);
2765d680efe9SMark Fasheh 
2766d680efe9SMark Fasheh 	return ret;
2767d680efe9SMark Fasheh }
2768d680efe9SMark Fasheh 
2769d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2770d680efe9SMark Fasheh {
2771bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2772d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2773d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2774d680efe9SMark Fasheh 
277503efed8aSTiger Yang 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
2776d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2777d680efe9SMark Fasheh }
2778d680efe9SMark Fasheh 
2779ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
2780ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
2781ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
2782ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
2783ccd979bdSMark Fasheh {
2784ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2785ccd979bdSMark Fasheh 
2786ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2787ccd979bdSMark Fasheh 
2788ccd979bdSMark Fasheh 	kfree(dlm_debug);
2789ccd979bdSMark Fasheh }
2790ccd979bdSMark Fasheh 
2791ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2792ccd979bdSMark Fasheh {
2793ccd979bdSMark Fasheh 	if (dlm_debug)
2794ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2795ccd979bdSMark Fasheh }
2796ccd979bdSMark Fasheh 
2797ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2798ccd979bdSMark Fasheh {
2799ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
2800ccd979bdSMark Fasheh }
2801ccd979bdSMark Fasheh 
2802ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2803ccd979bdSMark Fasheh {
2804ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2805ccd979bdSMark Fasheh 
2806ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2807ccd979bdSMark Fasheh 	if (!dlm_debug) {
2808ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
2809ccd979bdSMark Fasheh 		goto out;
2810ccd979bdSMark Fasheh 	}
2811ccd979bdSMark Fasheh 
2812ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
2813ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2814ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = NULL;
2815ccd979bdSMark Fasheh out:
2816ccd979bdSMark Fasheh 	return dlm_debug;
2817ccd979bdSMark Fasheh }
2818ccd979bdSMark Fasheh 
2819ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
2820ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
2821ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
2822ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
2823ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
2824ccd979bdSMark Fasheh };
2825ccd979bdSMark Fasheh 
2826ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2827ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
2828ccd979bdSMark Fasheh {
2829ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
2830ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2831ccd979bdSMark Fasheh 
2832ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
2833ccd979bdSMark Fasheh 
2834ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2835ccd979bdSMark Fasheh 		/* discover the head of the list */
2836ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2837ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
2838ccd979bdSMark Fasheh 			break;
2839ccd979bdSMark Fasheh 		}
2840ccd979bdSMark Fasheh 
2841ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
2842ccd979bdSMark Fasheh 		 * l_ops field. */
2843ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
2844ccd979bdSMark Fasheh 			ret = iter;
2845ccd979bdSMark Fasheh 			break;
2846ccd979bdSMark Fasheh 		}
2847ccd979bdSMark Fasheh 	}
2848ccd979bdSMark Fasheh 
2849ccd979bdSMark Fasheh 	return ret;
2850ccd979bdSMark Fasheh }
2851ccd979bdSMark Fasheh 
2852ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2853ccd979bdSMark Fasheh {
2854ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2855ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
2856ccd979bdSMark Fasheh 
2857ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2858ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2859ccd979bdSMark Fasheh 	if (iter) {
2860ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
2861ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
2862ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
2863ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
2864ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
2865ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
2866ccd979bdSMark Fasheh 		 * in them. */
2867ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2868ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2869ccd979bdSMark Fasheh 	}
2870ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2871ccd979bdSMark Fasheh 
2872ccd979bdSMark Fasheh 	return iter;
2873ccd979bdSMark Fasheh }
2874ccd979bdSMark Fasheh 
2875ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2876ccd979bdSMark Fasheh {
2877ccd979bdSMark Fasheh }
2878ccd979bdSMark Fasheh 
2879ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2880ccd979bdSMark Fasheh {
2881ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2882ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
2883ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2884ccd979bdSMark Fasheh 
2885ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2886ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
2887ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
2888ccd979bdSMark Fasheh 	if (iter) {
2889ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
2890ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2891ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2892ccd979bdSMark Fasheh 	}
2893ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2894ccd979bdSMark Fasheh 
2895ccd979bdSMark Fasheh 	return iter;
2896ccd979bdSMark Fasheh }
2897ccd979bdSMark Fasheh 
28985bc970e8SSunil Mushran /*
28995bc970e8SSunil Mushran  * Version is used by debugfs.ocfs2 to determine the format being used
29005bc970e8SSunil Mushran  *
29015bc970e8SSunil Mushran  * New in version 2
29025bc970e8SSunil Mushran  *	- Lock stats printed
29035bc970e8SSunil Mushran  * New in version 3
29045bc970e8SSunil Mushran  *	- Max time in lock stats is in usecs (instead of nsecs)
29055bc970e8SSunil Mushran  */
29065bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3
2907ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2908ccd979bdSMark Fasheh {
2909ccd979bdSMark Fasheh 	int i;
2910ccd979bdSMark Fasheh 	char *lvb;
2911ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
2912ccd979bdSMark Fasheh 
2913ccd979bdSMark Fasheh 	if (!lockres)
2914ccd979bdSMark Fasheh 		return -EINVAL;
2915ccd979bdSMark Fasheh 
2916d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2917d680efe9SMark Fasheh 
2918d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2919d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2920d680efe9SMark Fasheh 			   lockres->l_name,
2921d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2922d680efe9SMark Fasheh 	else
2923d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2924d680efe9SMark Fasheh 
2925d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
2926ccd979bdSMark Fasheh 		   "0x%lx\t"
2927ccd979bdSMark Fasheh 		   "0x%x\t"
2928ccd979bdSMark Fasheh 		   "0x%x\t"
2929ccd979bdSMark Fasheh 		   "%u\t"
2930ccd979bdSMark Fasheh 		   "%u\t"
2931ccd979bdSMark Fasheh 		   "%d\t"
2932ccd979bdSMark Fasheh 		   "%d\t",
2933ccd979bdSMark Fasheh 		   lockres->l_level,
2934ccd979bdSMark Fasheh 		   lockres->l_flags,
2935ccd979bdSMark Fasheh 		   lockres->l_action,
2936ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
2937ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
2938ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
2939ccd979bdSMark Fasheh 		   lockres->l_requested,
2940ccd979bdSMark Fasheh 		   lockres->l_blocking);
2941ccd979bdSMark Fasheh 
2942ccd979bdSMark Fasheh 	/* Dump the raw LVB */
29438f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2944ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
2945ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
2946ccd979bdSMark Fasheh 
29478ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
29485bc970e8SSunil Mushran # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
29495bc970e8SSunil Mushran # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
29505bc970e8SSunil Mushran # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
29515bc970e8SSunil Mushran # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
29525bc970e8SSunil Mushran # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
29535bc970e8SSunil Mushran # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
29545bc970e8SSunil Mushran # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
29555bc970e8SSunil Mushran # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
29565bc970e8SSunil Mushran # define lock_refresh(_l)		((_l)->l_lock_refresh)
29578ddb7b00SSunil Mushran #else
29585bc970e8SSunil Mushran # define lock_num_prmode(_l)		(0)
29595bc970e8SSunil Mushran # define lock_num_exmode(_l)		(0)
29608ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
29618ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
2962dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
2963dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
29648ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
29658ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
29668ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
29678ddb7b00SSunil Mushran #endif
29688ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
29695bc970e8SSunil Mushran 	seq_printf(m, "%u\t"
29705bc970e8SSunil Mushran 		   "%u\t"
29718ddb7b00SSunil Mushran 		   "%u\t"
29728ddb7b00SSunil Mushran 		   "%u\t"
29738ddb7b00SSunil Mushran 		   "%llu\t"
29748ddb7b00SSunil Mushran 		   "%llu\t"
29758ddb7b00SSunil Mushran 		   "%u\t"
29768ddb7b00SSunil Mushran 		   "%u\t"
29778ddb7b00SSunil Mushran 		   "%u\t",
29788ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
29798ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
29808ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
29818ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
29828ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
29838ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
29848ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
29858ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
29868ddb7b00SSunil Mushran 		   lock_refresh(lockres));
29878ddb7b00SSunil Mushran 
2988ccd979bdSMark Fasheh 	/* End the line */
2989ccd979bdSMark Fasheh 	seq_printf(m, "\n");
2990ccd979bdSMark Fasheh 	return 0;
2991ccd979bdSMark Fasheh }
2992ccd979bdSMark Fasheh 
299390d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
2994ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
2995ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
2996ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
2997ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
2998ccd979bdSMark Fasheh };
2999ccd979bdSMark Fasheh 
3000ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
3001ccd979bdSMark Fasheh {
300233fa1d90SJoe Perches 	struct seq_file *seq = file->private_data;
3003ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
3004ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
3005ccd979bdSMark Fasheh 
3006ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
3007ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
3008ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
3009ccd979bdSMark Fasheh }
3010ccd979bdSMark Fasheh 
3011ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
3012ccd979bdSMark Fasheh {
3013ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
3014ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
3015ccd979bdSMark Fasheh 
30161848cb55SRob Jones 	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
3017ccd979bdSMark Fasheh 	if (!priv) {
30181848cb55SRob Jones 		mlog_errno(-ENOMEM);
30191848cb55SRob Jones 		return -ENOMEM;
3020ccd979bdSMark Fasheh 	}
30211848cb55SRob Jones 
30228e18e294STheodore Ts'o 	osb = inode->i_private;
3023ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
3024ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
3025ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
3026ccd979bdSMark Fasheh 
3027ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
3028ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
3029ccd979bdSMark Fasheh 
30301848cb55SRob Jones 	return 0;
3031ccd979bdSMark Fasheh }
3032ccd979bdSMark Fasheh 
30334b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
3034ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
3035ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
3036ccd979bdSMark Fasheh 	.read =		seq_read,
3037ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
3038ccd979bdSMark Fasheh };
3039ccd979bdSMark Fasheh 
3040ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
3041ccd979bdSMark Fasheh {
3042ccd979bdSMark Fasheh 	int ret = 0;
3043ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3044ccd979bdSMark Fasheh 
3045ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
3046ccd979bdSMark Fasheh 							 S_IFREG|S_IRUSR,
3047ccd979bdSMark Fasheh 							 osb->osb_debug_root,
3048ccd979bdSMark Fasheh 							 osb,
3049ccd979bdSMark Fasheh 							 &ocfs2_dlm_debug_fops);
30508f443e23SLinus Torvalds 	if (!dlm_debug->d_locking_state) {
3051ccd979bdSMark Fasheh 		ret = -EINVAL;
3052ccd979bdSMark Fasheh 		mlog(ML_ERROR,
3053ccd979bdSMark Fasheh 		     "Unable to create locking state debugfs file.\n");
3054ccd979bdSMark Fasheh 		goto out;
3055ccd979bdSMark Fasheh 	}
3056ccd979bdSMark Fasheh 
3057ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(dlm_debug);
3058ccd979bdSMark Fasheh out:
3059ccd979bdSMark Fasheh 	return ret;
3060ccd979bdSMark Fasheh }
3061ccd979bdSMark Fasheh 
3062ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
3063ccd979bdSMark Fasheh {
3064ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3065ccd979bdSMark Fasheh 
3066ccd979bdSMark Fasheh 	if (dlm_debug) {
3067ccd979bdSMark Fasheh 		debugfs_remove(dlm_debug->d_locking_state);
3068ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
3069ccd979bdSMark Fasheh 	}
3070ccd979bdSMark Fasheh }
3071ccd979bdSMark Fasheh 
3072ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
3073ccd979bdSMark Fasheh {
3074c271c5c2SSunil Mushran 	int status = 0;
30754670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
3076ccd979bdSMark Fasheh 
30770abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
30780abd6d18SMark Fasheh 		osb->node_num = 0;
3079c271c5c2SSunil Mushran 		goto local;
30800abd6d18SMark Fasheh 	}
3081c271c5c2SSunil Mushran 
3082ccd979bdSMark Fasheh 	status = ocfs2_dlm_init_debug(osb);
3083ccd979bdSMark Fasheh 	if (status < 0) {
3084ccd979bdSMark Fasheh 		mlog_errno(status);
3085ccd979bdSMark Fasheh 		goto bail;
3086ccd979bdSMark Fasheh 	}
3087ccd979bdSMark Fasheh 
308834d024f8SMark Fasheh 	/* launch downconvert thread */
30895afc44e2SJoseph Qi 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
30905afc44e2SJoseph Qi 			osb->uuid_str);
309134d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
309234d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
309334d024f8SMark Fasheh 		osb->dc_task = NULL;
3094ccd979bdSMark Fasheh 		mlog_errno(status);
3095ccd979bdSMark Fasheh 		goto bail;
3096ccd979bdSMark Fasheh 	}
3097ccd979bdSMark Fasheh 
3098ccd979bdSMark Fasheh 	/* for now, uuid == domain */
30999c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
3100c74a3bddSGoldwyn Rodrigues 				       osb->osb_cluster_name,
3101c74a3bddSGoldwyn Rodrigues 				       strlen(osb->osb_cluster_name),
31029c6c877cSJoel Becker 				       osb->uuid_str,
31034670c46dSJoel Becker 				       strlen(osb->uuid_str),
3104553b5eb9SJoel Becker 				       &lproto, ocfs2_do_node_down, osb,
31054670c46dSJoel Becker 				       &conn);
31064670c46dSJoel Becker 	if (status) {
3107ccd979bdSMark Fasheh 		mlog_errno(status);
3108ccd979bdSMark Fasheh 		goto bail;
3109ccd979bdSMark Fasheh 	}
3110ccd979bdSMark Fasheh 
31113e834151SGoldwyn Rodrigues 	status = ocfs2_cluster_this_node(conn, &osb->node_num);
31120abd6d18SMark Fasheh 	if (status < 0) {
31130abd6d18SMark Fasheh 		mlog_errno(status);
31140abd6d18SMark Fasheh 		mlog(ML_ERROR,
31150abd6d18SMark Fasheh 		     "could not find this host's node number\n");
3116286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
31170abd6d18SMark Fasheh 		goto bail;
31180abd6d18SMark Fasheh 	}
31190abd6d18SMark Fasheh 
3120c271c5c2SSunil Mushran local:
3121ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3122ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
31236ca497a8Swengang wang 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
312483273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3125ccd979bdSMark Fasheh 
31264670c46dSJoel Becker 	osb->cconn = conn;
3127ccd979bdSMark Fasheh bail:
3128ccd979bdSMark Fasheh 	if (status < 0) {
3129ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
313034d024f8SMark Fasheh 		if (osb->dc_task)
313134d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3132ccd979bdSMark Fasheh 	}
3133ccd979bdSMark Fasheh 
3134ccd979bdSMark Fasheh 	return status;
3135ccd979bdSMark Fasheh }
3136ccd979bdSMark Fasheh 
3137286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3138286eaa95SJoel Becker 			int hangup_pending)
3139ccd979bdSMark Fasheh {
3140ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3141ccd979bdSMark Fasheh 
31424670c46dSJoel Becker 	/*
31434670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
31444670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
31454670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
31464670c46dSJoel Becker 	 */
31474670c46dSJoel Becker 
314834d024f8SMark Fasheh 	if (osb->dc_task) {
314934d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
315034d024f8SMark Fasheh 		osb->dc_task = NULL;
3151ccd979bdSMark Fasheh 	}
3152ccd979bdSMark Fasheh 
3153ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3154ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
31556ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
315683273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3157ccd979bdSMark Fasheh 
3158286eaa95SJoel Becker 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
31594670c46dSJoel Becker 	osb->cconn = NULL;
3160ccd979bdSMark Fasheh 
3161ccd979bdSMark Fasheh 	ocfs2_dlm_shutdown_debug(osb);
3162ccd979bdSMark Fasheh }
3163ccd979bdSMark Fasheh 
3164ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
31650d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3166ccd979bdSMark Fasheh {
31677431cd7eSJoel Becker 	int ret;
3168ccd979bdSMark Fasheh 	unsigned long flags;
3169bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3170ccd979bdSMark Fasheh 
3171ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3172ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3173ccd979bdSMark Fasheh 		goto out;
3174ccd979bdSMark Fasheh 
3175b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3176bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3177b80fc012SMark Fasheh 
3178ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3179ccd979bdSMark Fasheh 
3180ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3181ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3182ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3183ccd979bdSMark Fasheh 
3184ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3185ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3186ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3187ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3188ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3189ccd979bdSMark Fasheh 
3190ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3191ccd979bdSMark Fasheh 
3192ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3193ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3194ccd979bdSMark Fasheh 		 * future? */
3195ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3196ccd979bdSMark Fasheh 
3197ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3198ccd979bdSMark Fasheh 	}
3199ccd979bdSMark Fasheh 
32000d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
32010d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3202bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
32030d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
32040d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
32050d5dc6c2SMark Fasheh 	}
3206ccd979bdSMark Fasheh 
3207ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3208ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3209ccd979bdSMark Fasheh 		     lockres->l_name);
3210ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3211ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3212ccd979bdSMark Fasheh 
3213ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3214ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3215ccd979bdSMark Fasheh 		goto out;
3216ccd979bdSMark Fasheh 	}
3217ccd979bdSMark Fasheh 
3218ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3219ccd979bdSMark Fasheh 
3220ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3221ccd979bdSMark Fasheh 	 * fire. */
3222ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3223ccd979bdSMark Fasheh 
3224ccd979bdSMark Fasheh 	/* is this necessary? */
3225ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3226ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3227ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3228ccd979bdSMark Fasheh 
3229ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3230ccd979bdSMark Fasheh 
3231a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
32327431cd7eSJoel Becker 	if (ret) {
32337431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3234ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3235cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3236ccd979bdSMark Fasheh 		BUG();
3237ccd979bdSMark Fasheh 	}
323873ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3239ccd979bdSMark Fasheh 	     lockres->l_name);
3240ccd979bdSMark Fasheh 
3241ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3242ccd979bdSMark Fasheh out:
3243ccd979bdSMark Fasheh 	return 0;
3244ccd979bdSMark Fasheh }
3245ccd979bdSMark Fasheh 
324684d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
324784d86f83SJan Kara 				       struct ocfs2_lock_res *lockres);
324884d86f83SJan Kara 
3249ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3250ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
325134d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3252ccd979bdSMark Fasheh  * it safe to drop.
3253ccd979bdSMark Fasheh  *
3254ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
325584d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
325684d86f83SJan Kara 				struct ocfs2_lock_res *lockres)
3257ccd979bdSMark Fasheh {
3258ccd979bdSMark Fasheh 	int status;
3259ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
326084d86f83SJan Kara 	unsigned long flags, flags2;
3261ccd979bdSMark Fasheh 
3262ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3263ccd979bdSMark Fasheh 
3264ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3265ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
326684d86f83SJan Kara 	if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
326784d86f83SJan Kara 		/*
326884d86f83SJan Kara 		 * We know the downconvert is queued but not in progress
326984d86f83SJan Kara 		 * because we are the downconvert thread and processing
327084d86f83SJan Kara 		 * different lock. So we can just remove the lock from the
327184d86f83SJan Kara 		 * queue. This is not only an optimization but also a way
327284d86f83SJan Kara 		 * to avoid the following deadlock:
327384d86f83SJan Kara 		 *   ocfs2_dentry_post_unlock()
327484d86f83SJan Kara 		 *     ocfs2_dentry_lock_put()
327584d86f83SJan Kara 		 *       ocfs2_drop_dentry_lock()
327684d86f83SJan Kara 		 *         iput()
327784d86f83SJan Kara 		 *           ocfs2_evict_inode()
327884d86f83SJan Kara 		 *             ocfs2_clear_inode()
327984d86f83SJan Kara 		 *               ocfs2_mark_lockres_freeing()
328084d86f83SJan Kara 		 *                 ... blocks waiting for OCFS2_LOCK_QUEUED
328184d86f83SJan Kara 		 *                 since we are the downconvert thread which
328284d86f83SJan Kara 		 *                 should clear the flag.
328384d86f83SJan Kara 		 */
328484d86f83SJan Kara 		spin_unlock_irqrestore(&lockres->l_lock, flags);
328584d86f83SJan Kara 		spin_lock_irqsave(&osb->dc_task_lock, flags2);
328684d86f83SJan Kara 		list_del_init(&lockres->l_blocked_list);
328784d86f83SJan Kara 		osb->blocked_lock_count--;
328884d86f83SJan Kara 		spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
328984d86f83SJan Kara 		/*
329084d86f83SJan Kara 		 * Warn if we recurse into another post_unlock call.  Strictly
329184d86f83SJan Kara 		 * speaking it isn't a problem but we need to be careful if
329284d86f83SJan Kara 		 * that happens (stack overflow, deadlocks, ...) so warn if
329384d86f83SJan Kara 		 * ocfs2 grows a path for which this can happen.
329484d86f83SJan Kara 		 */
329584d86f83SJan Kara 		WARN_ON_ONCE(lockres->l_ops->post_unlock);
329684d86f83SJan Kara 		/* Since the lock is freeing we don't do much in the fn below */
329784d86f83SJan Kara 		ocfs2_process_blocked_lock(osb, lockres);
329884d86f83SJan Kara 		return;
329984d86f83SJan Kara 	}
3300ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3301ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3302ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3303ccd979bdSMark Fasheh 
3304ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3305ccd979bdSMark Fasheh 
3306ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3307ccd979bdSMark Fasheh 		if (status)
3308ccd979bdSMark Fasheh 			mlog_errno(status);
3309ccd979bdSMark Fasheh 
3310ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3311ccd979bdSMark Fasheh 	}
3312ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3313ccd979bdSMark Fasheh }
3314ccd979bdSMark Fasheh 
3315d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3316d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3317d680efe9SMark Fasheh {
3318d680efe9SMark Fasheh 	int ret;
3319d680efe9SMark Fasheh 
332084d86f83SJan Kara 	ocfs2_mark_lockres_freeing(osb, lockres);
33210d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3322d680efe9SMark Fasheh 	if (ret)
3323d680efe9SMark Fasheh 		mlog_errno(ret);
3324d680efe9SMark Fasheh }
3325d680efe9SMark Fasheh 
3326ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3327ccd979bdSMark Fasheh {
3328d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3329d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
33306ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
333183273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3332ccd979bdSMark Fasheh }
3333ccd979bdSMark Fasheh 
3334ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3335ccd979bdSMark Fasheh {
3336ccd979bdSMark Fasheh 	int status, err;
3337ccd979bdSMark Fasheh 
3338ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3339ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3340ccd979bdSMark Fasheh 
3341ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
334250008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3343ccd979bdSMark Fasheh 	if (err < 0)
3344ccd979bdSMark Fasheh 		mlog_errno(err);
3345ccd979bdSMark Fasheh 
3346ccd979bdSMark Fasheh 	status = err;
3347ccd979bdSMark Fasheh 
3348ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3349e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3350ccd979bdSMark Fasheh 	if (err < 0)
3351ccd979bdSMark Fasheh 		mlog_errno(err);
3352ccd979bdSMark Fasheh 	if (err < 0 && !status)
3353ccd979bdSMark Fasheh 		status = err;
3354ccd979bdSMark Fasheh 
3355ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
33560d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3357ccd979bdSMark Fasheh 	if (err < 0)
3358ccd979bdSMark Fasheh 		mlog_errno(err);
3359ccd979bdSMark Fasheh 	if (err < 0 && !status)
3360ccd979bdSMark Fasheh 		status = err;
3361ccd979bdSMark Fasheh 
3362ccd979bdSMark Fasheh 	return status;
3363ccd979bdSMark Fasheh }
3364ccd979bdSMark Fasheh 
3365de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3366ccd979bdSMark Fasheh 					      int new_level)
3367ccd979bdSMark Fasheh {
3368ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3369ccd979bdSMark Fasheh 
3370bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3371ccd979bdSMark Fasheh 
3372ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
33739b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
33749b915181SSunil Mushran 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
33759b915181SSunil Mushran 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
33769b915181SSunil Mushran 		     new_level, list_empty(&lockres->l_blocked_list),
33779b915181SSunil Mushran 		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
33789b915181SSunil Mushran 		     lockres->l_flags, lockres->l_ro_holders,
33799b915181SSunil Mushran 		     lockres->l_ex_holders, lockres->l_action,
33809b915181SSunil Mushran 		     lockres->l_unlock_action, lockres->l_requested,
33819b915181SSunil Mushran 		     lockres->l_blocking, lockres->l_pending_gen);
3382ccd979bdSMark Fasheh 		BUG();
3383ccd979bdSMark Fasheh 	}
3384ccd979bdSMark Fasheh 
33859b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
33869b915181SSunil Mushran 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3387ccd979bdSMark Fasheh 
3388ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3389ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3390ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3391de551246SJoel Becker 	return lockres_set_pending(lockres);
3392ccd979bdSMark Fasheh }
3393ccd979bdSMark Fasheh 
3394ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3395ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3396ccd979bdSMark Fasheh 				  int new_level,
3397de551246SJoel Becker 				  int lvb,
3398de551246SJoel Becker 				  unsigned int generation)
3399ccd979bdSMark Fasheh {
3400bd3e7610SJoel Becker 	int ret;
3401bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3402ccd979bdSMark Fasheh 
34039b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
34049b915181SSunil Mushran 	     lockres->l_level, new_level);
34059b915181SSunil Mushran 
3406e7ee2c08SEric Ren 	/*
3407e7ee2c08SEric Ren 	 * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
3408e7ee2c08SEric Ren 	 * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
3409e7ee2c08SEric Ren 	 * we can recover correctly from node failure. Otherwise, we may get
3410e7ee2c08SEric Ren 	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
3411e7ee2c08SEric Ren 	 */
3412e7ee2c08SEric Ren 	if (!ocfs2_is_o2cb_active() &&
3413e7ee2c08SEric Ren 	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3414e7ee2c08SEric Ren 		lvb = 1;
3415e7ee2c08SEric Ren 
3416ccd979bdSMark Fasheh 	if (lvb)
3417bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3418ccd979bdSMark Fasheh 
34194670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3420ccd979bdSMark Fasheh 			     new_level,
3421ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3422ccd979bdSMark Fasheh 			     dlm_flags,
3423ccd979bdSMark Fasheh 			     lockres->l_name,
3424a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3425de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
34267431cd7eSJoel Becker 	if (ret) {
34277431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3428ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3429ccd979bdSMark Fasheh 		goto bail;
3430ccd979bdSMark Fasheh 	}
3431ccd979bdSMark Fasheh 
3432ccd979bdSMark Fasheh 	ret = 0;
3433ccd979bdSMark Fasheh bail:
3434ccd979bdSMark Fasheh 	return ret;
3435ccd979bdSMark Fasheh }
3436ccd979bdSMark Fasheh 
343724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3438ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3439ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3440ccd979bdSMark Fasheh {
3441ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3442ccd979bdSMark Fasheh 
3443ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3444ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3445ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3446ccd979bdSMark Fasheh 		 * requeue this lock. */
34479b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3448ccd979bdSMark Fasheh 		return 0;
3449ccd979bdSMark Fasheh 	}
3450ccd979bdSMark Fasheh 
3451ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3452ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3453ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3454ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3455ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3456ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3457ccd979bdSMark Fasheh 
3458ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3459ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3460ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3461ccd979bdSMark Fasheh 
34629b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
34639b915181SSunil Mushran 
3464ccd979bdSMark Fasheh 	return 1;
3465ccd979bdSMark Fasheh }
3466ccd979bdSMark Fasheh 
3467ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3468ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3469ccd979bdSMark Fasheh {
3470ccd979bdSMark Fasheh 	int ret;
3471ccd979bdSMark Fasheh 
34724670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3473a796d286SJoel Becker 			       DLM_LKF_CANCEL);
34747431cd7eSJoel Becker 	if (ret) {
34757431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3476ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3477ccd979bdSMark Fasheh 	}
3478ccd979bdSMark Fasheh 
34799b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3480ccd979bdSMark Fasheh 
3481ccd979bdSMark Fasheh 	return ret;
3482ccd979bdSMark Fasheh }
3483ccd979bdSMark Fasheh 
3484b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3485ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3486cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3487ccd979bdSMark Fasheh {
3488ccd979bdSMark Fasheh 	unsigned long flags;
3489ccd979bdSMark Fasheh 	int blocking;
3490ccd979bdSMark Fasheh 	int new_level;
3491079b8057SSunil Mushran 	int level;
3492ccd979bdSMark Fasheh 	int ret = 0;
34935ef0d4eaSMark Fasheh 	int set_lvb = 0;
3494de551246SJoel Becker 	unsigned int gen;
3495ccd979bdSMark Fasheh 
3496ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3497ccd979bdSMark Fasheh 
3498ccd979bdSMark Fasheh recheck:
3499db0f6ce6SSunil Mushran 	/*
3500db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3501db0f6ce6SSunil Mushran 	 */
3502db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3503db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3504db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3505db0f6ce6SSunil Mushran 		ret = 0;
3506db0f6ce6SSunil Mushran 		goto leave;
3507db0f6ce6SSunil Mushran 	}
3508db0f6ce6SSunil Mushran 
3509ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3510de551246SJoel Becker 		/* XXX
3511de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3512de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3513de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3514de551246SJoel Becker 		 *
3515de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3516de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3517de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3518de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3519de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3520de551246SJoel Becker 		 *
3521de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3522de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3523de551246SJoel Becker 		 *
3524de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3525de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3526de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3527de551246SJoel Becker 		 * we then cancel their request.
3528de551246SJoel Becker 		 *
3529de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3530de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3531de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3532de551246SJoel Becker 		 */
35339b915181SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
35349b915181SSunil Mushran 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
35359b915181SSunil Mushran 			     lockres->l_name);
3536de551246SJoel Becker 			goto leave_requeue;
35379b915181SSunil Mushran 		}
3538de551246SJoel Becker 
3539d680efe9SMark Fasheh 		ctl->requeue = 1;
3540ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3541ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3542ccd979bdSMark Fasheh 		if (ret) {
3543ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3544ccd979bdSMark Fasheh 			if (ret < 0)
3545ccd979bdSMark Fasheh 				mlog_errno(ret);
3546ccd979bdSMark Fasheh 		}
3547ccd979bdSMark Fasheh 		goto leave;
3548ccd979bdSMark Fasheh 	}
3549ccd979bdSMark Fasheh 
3550a1912826SSunil Mushran 	/*
3551a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3552a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3553a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3554a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3555a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3556a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3557a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3558a1912826SSunil Mushran 	 */
3559a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3560a1912826SSunil Mushran 		goto leave_requeue;
3561a1912826SSunil Mushran 
35620d74125aSSunil Mushran 	/*
35630d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
35640d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
35650d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
35660d74125aSSunil Mushran 	 */
35670d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
35680d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
35699b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
35700d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
35710d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
35720d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
35730d74125aSSunil Mushran 		goto leave;
35740d74125aSSunil Mushran 	}
35750d74125aSSunil Mushran 
3576ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3577ccd979bdSMark Fasheh 	 * then requeue. */
3578bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
35799b915181SSunil Mushran 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
35809b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
35819b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders,
35829b915181SSunil Mushran 		     lockres->l_ro_holders);
3583f7fbfdd1SMark Fasheh 		goto leave_requeue;
35849b915181SSunil Mushran 	}
3585ccd979bdSMark Fasheh 
3586ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3587ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3588bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
35899b915181SSunil Mushran 	    lockres->l_ex_holders) {
35909b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
35919b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders);
3592f7fbfdd1SMark Fasheh 		goto leave_requeue;
35939b915181SSunil Mushran 	}
3594f7fbfdd1SMark Fasheh 
3595f7fbfdd1SMark Fasheh 	/*
3596f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3597f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3598f7fbfdd1SMark Fasheh 	 */
3599f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
36009b915181SSunil Mushran 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
36019b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
36029b915181SSunil Mushran 		     lockres->l_name);
3603f7fbfdd1SMark Fasheh 		goto leave_requeue;
36049b915181SSunil Mushran 	}
3605ccd979bdSMark Fasheh 
360616d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
360716d5b956SMark Fasheh 
360816d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
36099b915181SSunil Mushran 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
36109b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
36119b915181SSunil Mushran 		     lockres->l_name);
361216d5b956SMark Fasheh 		goto leave_requeue;
36139b915181SSunil Mushran 	}
361416d5b956SMark Fasheh 
3615ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3616ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3617ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3618cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3619ccd979bdSMark Fasheh 		goto downconvert;
3620ccd979bdSMark Fasheh 
3621ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3622ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3623ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3624ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3625ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3626079b8057SSunil Mushran 	level = lockres->l_level;
3627ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3628ccd979bdSMark Fasheh 
3629cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3630d680efe9SMark Fasheh 
36319b915181SSunil Mushran 	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
36329b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
36339b915181SSunil Mushran 		     lockres->l_name);
3634d680efe9SMark Fasheh 		goto leave;
36359b915181SSunil Mushran 	}
3636ccd979bdSMark Fasheh 
3637ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3638079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3639ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3640ccd979bdSMark Fasheh 		 * it just yet. */
36419b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
36429b915181SSunil Mushran 		     "Recheck\n", lockres->l_name, blocking,
36439b915181SSunil Mushran 		     lockres->l_blocking, level, lockres->l_level);
3644ccd979bdSMark Fasheh 		goto recheck;
3645ccd979bdSMark Fasheh 	}
3646ccd979bdSMark Fasheh 
3647ccd979bdSMark Fasheh downconvert:
3648d680efe9SMark Fasheh 	ctl->requeue = 0;
3649ccd979bdSMark Fasheh 
36505ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3651bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
36525ef0d4eaSMark Fasheh 			set_lvb = 1;
36535ef0d4eaSMark Fasheh 
36545ef0d4eaSMark Fasheh 		/*
36555ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
36565ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
36575ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
36585ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
36595ef0d4eaSMark Fasheh 		 */
36605ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
36615ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
36625ef0d4eaSMark Fasheh 	}
36635ef0d4eaSMark Fasheh 
3664de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3665ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3666de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3667de551246SJoel Becker 				     gen);
3668de551246SJoel Becker 
3669ccd979bdSMark Fasheh leave:
3670c1e8d35eSTao Ma 	if (ret)
3671c1e8d35eSTao Ma 		mlog_errno(ret);
3672ccd979bdSMark Fasheh 	return ret;
3673f7fbfdd1SMark Fasheh 
3674f7fbfdd1SMark Fasheh leave_requeue:
3675f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3676f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3677f7fbfdd1SMark Fasheh 
3678f7fbfdd1SMark Fasheh 	return 0;
3679ccd979bdSMark Fasheh }
3680ccd979bdSMark Fasheh 
3681d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3682ccd979bdSMark Fasheh 				     int blocking)
3683ccd979bdSMark Fasheh {
3684ccd979bdSMark Fasheh 	struct inode *inode;
3685ccd979bdSMark Fasheh 	struct address_space *mapping;
36865e98d492SGoldwyn Rodrigues 	struct ocfs2_inode_info *oi;
3687ccd979bdSMark Fasheh 
3688ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3689ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3690ccd979bdSMark Fasheh 
36915e98d492SGoldwyn Rodrigues 	if (S_ISDIR(inode->i_mode)) {
36925e98d492SGoldwyn Rodrigues 		oi = OCFS2_I(inode);
36935e98d492SGoldwyn Rodrigues 		oi->ip_dir_lock_gen++;
36945e98d492SGoldwyn Rodrigues 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
36955e98d492SGoldwyn Rodrigues 		goto out;
36965e98d492SGoldwyn Rodrigues 	}
36975e98d492SGoldwyn Rodrigues 
36981044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3699f1f54068SMark Fasheh 		goto out;
3700f1f54068SMark Fasheh 
37017f4a2a97SMark Fasheh 	/*
37027f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
37037f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
37047f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
37057f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
37067f4a2a97SMark Fasheh 	 * them up again.
37077f4a2a97SMark Fasheh 	 */
37087f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
37097f4a2a97SMark Fasheh 
3710ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3711b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3712b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3713ccd979bdSMark Fasheh 	}
3714ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3715bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3716ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3717ccd979bdSMark Fasheh 	} else {
3718ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3719ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3720ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3721ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3722ccd979bdSMark Fasheh 		 * them around in that case. */
3723ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3724ccd979bdSMark Fasheh 	}
3725ccd979bdSMark Fasheh 
3726b8a7a3a6SAndreas Gruenbacher 	forget_all_cached_acls(inode);
3727b8a7a3a6SAndreas Gruenbacher 
3728f1f54068SMark Fasheh out:
3729d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3730ccd979bdSMark Fasheh }
3731ccd979bdSMark Fasheh 
3732a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3733a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3734810d5aebSMark Fasheh 				 int new_level)
3735810d5aebSMark Fasheh {
3736a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3737810d5aebSMark Fasheh 
3738bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3739bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3740810d5aebSMark Fasheh 
3741810d5aebSMark Fasheh 	if (checkpointed)
3742810d5aebSMark Fasheh 		return 1;
3743810d5aebSMark Fasheh 
3744a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3745810d5aebSMark Fasheh 	return 0;
3746810d5aebSMark Fasheh }
3747810d5aebSMark Fasheh 
3748a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3749a4338481STao Ma 					int new_level)
3750a4338481STao Ma {
3751a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3752a4338481STao Ma 
3753a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3754a4338481STao Ma }
3755a4338481STao Ma 
3756810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3757810d5aebSMark Fasheh {
3758810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3759810d5aebSMark Fasheh 
3760810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
3761810d5aebSMark Fasheh }
3762810d5aebSMark Fasheh 
3763d680efe9SMark Fasheh /*
3764d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
376534d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
3766d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
3767d680efe9SMark Fasheh  */
3768d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3769d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
3770d680efe9SMark Fasheh {
3771d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3772d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
3773d680efe9SMark Fasheh }
3774d680efe9SMark Fasheh 
3775d680efe9SMark Fasheh /*
3776d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
3777d680efe9SMark Fasheh  *
3778d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
3779d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
3780d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
3781d680efe9SMark Fasheh  *
3782d680efe9SMark Fasheh  * We have two potential problems
3783d680efe9SMark Fasheh  *
3784d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
3785d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
3786d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
3787d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
3788d680efe9SMark Fasheh  *    unblock processing.
3789d680efe9SMark Fasheh  *
3790d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
3791d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
3792d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
3793d680efe9SMark Fasheh  */
3794d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3795d680efe9SMark Fasheh 				       int blocking)
3796d680efe9SMark Fasheh {
3797d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3798d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3799d680efe9SMark Fasheh 	struct dentry *dentry;
3800d680efe9SMark Fasheh 	unsigned long flags;
3801d680efe9SMark Fasheh 	int extra_ref = 0;
3802d680efe9SMark Fasheh 
3803d680efe9SMark Fasheh 	/*
3804d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
3805d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
3806d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
3807d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
3808d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
3809d680efe9SMark Fasheh 	 * so there's no further work to do.
3810d680efe9SMark Fasheh 	 */
3811bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
3812d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3813d680efe9SMark Fasheh 
3814d680efe9SMark Fasheh 	/*
3815d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
3816d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
3817d680efe9SMark Fasheh 	 * needs to be freed or not.
3818d680efe9SMark Fasheh 	 */
3819d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
3820d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3821d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
3822d680efe9SMark Fasheh 
3823d680efe9SMark Fasheh 	/*
3824d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
3825d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3826d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
3827d680efe9SMark Fasheh 	 * flag.
3828d680efe9SMark Fasheh 	 */
3829d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3830d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3831d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3832d680efe9SMark Fasheh 	    && dl->dl_count) {
3833d680efe9SMark Fasheh 		dl->dl_count++;
3834d680efe9SMark Fasheh 		extra_ref = 1;
3835d680efe9SMark Fasheh 	}
3836d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3837d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3838d680efe9SMark Fasheh 
3839d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
3840d680efe9SMark Fasheh 
3841d680efe9SMark Fasheh 	/*
3842d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
3843d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
3844d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
3845d680efe9SMark Fasheh 	 */
3846d680efe9SMark Fasheh 	if (!extra_ref)
3847d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3848d680efe9SMark Fasheh 
3849d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3850d680efe9SMark Fasheh 	while (1) {
3851d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
3852d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
3853d680efe9SMark Fasheh 		if (!dentry)
3854d680efe9SMark Fasheh 			break;
3855d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
3856d680efe9SMark Fasheh 
385710ab8811Salex chen 		if (S_ISDIR(dl->dl_inode->i_mode))
385810ab8811Salex chen 			shrink_dcache_parent(dentry);
385910ab8811Salex chen 
3860a455589fSAl Viro 		mlog(0, "d_delete(%pd);\n", dentry);
3861d680efe9SMark Fasheh 
3862d680efe9SMark Fasheh 		/*
3863d680efe9SMark Fasheh 		 * The following dcache calls may do an
3864d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
3865d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
3866d680efe9SMark Fasheh 		 * because the requesting node already has an
3867d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
3868d680efe9SMark Fasheh 		 * for a downconvert.
3869d680efe9SMark Fasheh 		 */
3870d680efe9SMark Fasheh 		d_delete(dentry);
3871d680efe9SMark Fasheh 		dput(dentry);
3872d680efe9SMark Fasheh 
3873d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
3874d680efe9SMark Fasheh 	}
3875d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3876d680efe9SMark Fasheh 
3877d680efe9SMark Fasheh 	/*
3878d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
3879d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
3880d680efe9SMark Fasheh 	 */
3881d680efe9SMark Fasheh 	if (dl->dl_count == 1)
3882d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
3883d680efe9SMark Fasheh 
3884d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
3885d680efe9SMark Fasheh }
3886d680efe9SMark Fasheh 
38878dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
38888dec98edSTao Ma 					    int new_level)
38898dec98edSTao Ma {
38908dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
38918dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
38928dec98edSTao Ma 
38938dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
38948dec98edSTao Ma }
38958dec98edSTao Ma 
38968dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
38978dec98edSTao Ma 					 int blocking)
38988dec98edSTao Ma {
38998dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
39008dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
39018dec98edSTao Ma 
39028dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
39038dec98edSTao Ma 
39048dec98edSTao Ma 	return UNBLOCK_CONTINUE;
39058dec98edSTao Ma }
39068dec98edSTao Ma 
39079e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
39089e33d69fSJan Kara {
39099e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
39109e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
39119e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
39129e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
39139e33d69fSJan Kara 
3914a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
39159e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
39169e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
39179e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
39189e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
39199e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
39209e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
39219e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
39229e33d69fSJan Kara }
39239e33d69fSJan Kara 
39249e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
39259e33d69fSJan Kara {
39269e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
39279e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
39289e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
39299e33d69fSJan Kara 
39309e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
39319e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
39329e33d69fSJan Kara }
39339e33d69fSJan Kara 
39349e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
39359e33d69fSJan Kara {
39369e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
39379e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
39389e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
39399e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
394085eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
39419e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
39429e33d69fSJan Kara 	int status = 0;
39439e33d69fSJan Kara 
39441c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
39451c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
39469e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
39479e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
39489e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
39499e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
39509e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
39519e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
39529e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
39539e33d69fSJan Kara 	} else {
3954ae4f6ef1SJan Kara 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3955ae4f6ef1SJan Kara 						     oinfo->dqi_giblk, &bh);
395685eb8b73SJoel Becker 		if (status) {
39579e33d69fSJan Kara 			mlog_errno(status);
39589e33d69fSJan Kara 			goto bail;
39599e33d69fSJan Kara 		}
39609e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
39619e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
39629e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
39639e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
39649e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
39659e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
39669e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
39679e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
39689e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
39699e33d69fSJan Kara 		brelse(bh);
39709e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
39719e33d69fSJan Kara 	}
39729e33d69fSJan Kara 
39739e33d69fSJan Kara bail:
39749e33d69fSJan Kara 	return status;
39759e33d69fSJan Kara }
39769e33d69fSJan Kara 
39779e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
39789e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
39799e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
39809e33d69fSJan Kara {
39819e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
39829e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
39839e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
39849e33d69fSJan Kara 	int status = 0;
39859e33d69fSJan Kara 
39869e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
39879e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
39889e33d69fSJan Kara 		if (ex)
39899e33d69fSJan Kara 			status = -EROFS;
39909e33d69fSJan Kara 		goto bail;
39919e33d69fSJan Kara 	}
39929e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
39939e33d69fSJan Kara 		goto bail;
39949e33d69fSJan Kara 
39959e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
39969e33d69fSJan Kara 	if (status < 0) {
39979e33d69fSJan Kara 		mlog_errno(status);
39989e33d69fSJan Kara 		goto bail;
39999e33d69fSJan Kara 	}
40009e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
40019e33d69fSJan Kara 		goto bail;
40029e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
40039e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
40049e33d69fSJan Kara 	if (status)
40059e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
40069e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
40079e33d69fSJan Kara bail:
40089e33d69fSJan Kara 	return status;
40099e33d69fSJan Kara }
40109e33d69fSJan Kara 
40118dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
40128dec98edSTao Ma {
40138dec98edSTao Ma 	int status;
40148dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
40158dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
40168dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
40178dec98edSTao Ma 
40188dec98edSTao Ma 
40198dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
40208dec98edSTao Ma 		return -EROFS;
40218dec98edSTao Ma 
40228dec98edSTao Ma 	if (ocfs2_mount_local(osb))
40238dec98edSTao Ma 		return 0;
40248dec98edSTao Ma 
40258dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
40268dec98edSTao Ma 	if (status < 0)
40278dec98edSTao Ma 		mlog_errno(status);
40288dec98edSTao Ma 
40298dec98edSTao Ma 	return status;
40308dec98edSTao Ma }
40318dec98edSTao Ma 
40328dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
40338dec98edSTao Ma {
40348dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
40358dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
40368dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
40378dec98edSTao Ma 
40388dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
40398dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
40408dec98edSTao Ma }
40418dec98edSTao Ma 
404200600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4043ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
4044ccd979bdSMark Fasheh {
4045ccd979bdSMark Fasheh 	int status;
4046d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
4047ccd979bdSMark Fasheh 	unsigned long flags;
4048ccd979bdSMark Fasheh 
4049ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
4050ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
4051ccd979bdSMark Fasheh 	 * flag. */
4052ccd979bdSMark Fasheh 
4053ccd979bdSMark Fasheh 	BUG_ON(!lockres);
4054ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
4055ccd979bdSMark Fasheh 
40569b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
4057ccd979bdSMark Fasheh 
4058ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
405934d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
4060ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
4061ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
4062ccd979bdSMark Fasheh 	 * performance. */
4063ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4064ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
4065ccd979bdSMark Fasheh 		goto unqueue;
4066ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4067ccd979bdSMark Fasheh 
4068b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
4069ccd979bdSMark Fasheh 	if (status < 0)
4070ccd979bdSMark Fasheh 		mlog_errno(status);
4071ccd979bdSMark Fasheh 
4072ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4073ccd979bdSMark Fasheh unqueue:
4074d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
4075ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
4076ccd979bdSMark Fasheh 	} else
4077ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
4078ccd979bdSMark Fasheh 
40799b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
4080d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
4081ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4082ccd979bdSMark Fasheh 
4083d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
4084d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
4085d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
4086ccd979bdSMark Fasheh }
4087ccd979bdSMark Fasheh 
4088ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4089ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
4090ccd979bdSMark Fasheh {
4091a75e9ccaSSrinivas Eeda 	unsigned long flags;
4092a75e9ccaSSrinivas Eeda 
4093ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
4094ccd979bdSMark Fasheh 
4095ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
4096ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
4097ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
4098ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
40999b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
4100ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
4101ccd979bdSMark Fasheh 		return;
4102ccd979bdSMark Fasheh 	}
4103ccd979bdSMark Fasheh 
4104ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4105ccd979bdSMark Fasheh 
4106a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
4107ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
4108ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
4109ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
4110ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
4111ccd979bdSMark Fasheh 	}
4112a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4113ccd979bdSMark Fasheh }
411434d024f8SMark Fasheh 
411534d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
411634d024f8SMark Fasheh {
411734d024f8SMark Fasheh 	unsigned long processed;
4118a75e9ccaSSrinivas Eeda 	unsigned long flags;
411934d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
412034d024f8SMark Fasheh 
4121a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
412234d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
412334d024f8SMark Fasheh 	 * wake happens part-way through our work  */
412434d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
412534d024f8SMark Fasheh 
412634d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
4127209f7512SJoseph Qi 	/*
4128209f7512SJoseph Qi 	 * blocked lock processing in this loop might call iput which can
4129209f7512SJoseph Qi 	 * remove items off osb->blocked_lock_list. Downconvert up to
4130209f7512SJoseph Qi 	 * 'processed' number of locks, but stop short if we had some
4131209f7512SJoseph Qi 	 * removed in ocfs2_mark_lockres_freeing when downconverting.
4132209f7512SJoseph Qi 	 */
4133209f7512SJoseph Qi 	while (processed && !list_empty(&osb->blocked_lock_list)) {
413434d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
413534d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
413634d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
413734d024f8SMark Fasheh 		osb->blocked_lock_count--;
4138a75e9ccaSSrinivas Eeda 		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
413934d024f8SMark Fasheh 
414034d024f8SMark Fasheh 		BUG_ON(!processed);
414134d024f8SMark Fasheh 		processed--;
414234d024f8SMark Fasheh 
414334d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
414434d024f8SMark Fasheh 
4145a75e9ccaSSrinivas Eeda 		spin_lock_irqsave(&osb->dc_task_lock, flags);
414634d024f8SMark Fasheh 	}
4147a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
414834d024f8SMark Fasheh }
414934d024f8SMark Fasheh 
415034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
415134d024f8SMark Fasheh {
415234d024f8SMark Fasheh 	int empty = 0;
4153a75e9ccaSSrinivas Eeda 	unsigned long flags;
415434d024f8SMark Fasheh 
4155a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
415634d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
415734d024f8SMark Fasheh 		empty = 1;
415834d024f8SMark Fasheh 
4159a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
416034d024f8SMark Fasheh 	return empty;
416134d024f8SMark Fasheh }
416234d024f8SMark Fasheh 
416334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
416434d024f8SMark Fasheh {
416534d024f8SMark Fasheh 	int should_wake = 0;
4166a75e9ccaSSrinivas Eeda 	unsigned long flags;
416734d024f8SMark Fasheh 
4168a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
416934d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
417034d024f8SMark Fasheh 		should_wake = 1;
4171a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
417234d024f8SMark Fasheh 
417334d024f8SMark Fasheh 	return should_wake;
417434d024f8SMark Fasheh }
417534d024f8SMark Fasheh 
4176200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
417734d024f8SMark Fasheh {
417834d024f8SMark Fasheh 	int status = 0;
417934d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
418034d024f8SMark Fasheh 
418134d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
418234d024f8SMark Fasheh 	 * work available */
418334d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
418434d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
418534d024f8SMark Fasheh 
418634d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
418734d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
418834d024f8SMark Fasheh 					 kthread_should_stop());
418934d024f8SMark Fasheh 
419034d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
419134d024f8SMark Fasheh 
419234d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
419334d024f8SMark Fasheh 	}
419434d024f8SMark Fasheh 
419534d024f8SMark Fasheh 	osb->dc_task = NULL;
419634d024f8SMark Fasheh 	return status;
419734d024f8SMark Fasheh }
419834d024f8SMark Fasheh 
419934d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
420034d024f8SMark Fasheh {
4201a75e9ccaSSrinivas Eeda 	unsigned long flags;
4202a75e9ccaSSrinivas Eeda 
4203a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
420434d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
420534d024f8SMark Fasheh 	 * the caller may have made to the voting state */
420634d024f8SMark Fasheh 	osb->dc_wake_sequence++;
4207a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
420834d024f8SMark Fasheh 	wake_up(&osb->dc_event);
420934d024f8SMark Fasheh }
4210