xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision 06e7f13d)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * dlmglue.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/types.h>
27ccd979bdSMark Fasheh #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/highmem.h>
29ccd979bdSMark Fasheh #include <linux/mm.h>
30ccd979bdSMark Fasheh #include <linux/kthread.h>
31ccd979bdSMark Fasheh #include <linux/pagemap.h>
32ccd979bdSMark Fasheh #include <linux/debugfs.h>
33ccd979bdSMark Fasheh #include <linux/seq_file.h>
348ddb7b00SSunil Mushran #include <linux/time.h>
359e33d69fSJan Kara #include <linux/quotaops.h>
36174cd4b1SIngo Molnar #include <linux/sched/signal.h>
37ccd979bdSMark Fasheh 
38ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
39ccd979bdSMark Fasheh #include <cluster/masklog.h>
40ccd979bdSMark Fasheh 
41ccd979bdSMark Fasheh #include "ocfs2.h"
42d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
43ccd979bdSMark Fasheh 
44ccd979bdSMark Fasheh #include "alloc.h"
45d680efe9SMark Fasheh #include "dcache.h"
46ccd979bdSMark Fasheh #include "dlmglue.h"
47ccd979bdSMark Fasheh #include "extent_map.h"
487f1a37e3STiger Yang #include "file.h"
49ccd979bdSMark Fasheh #include "heartbeat.h"
50ccd979bdSMark Fasheh #include "inode.h"
51ccd979bdSMark Fasheh #include "journal.h"
5224ef1815SJoel Becker #include "stackglue.h"
53ccd979bdSMark Fasheh #include "slot_map.h"
54ccd979bdSMark Fasheh #include "super.h"
55ccd979bdSMark Fasheh #include "uptodate.h"
569e33d69fSJan Kara #include "quota.h"
578dec98edSTao Ma #include "refcounttree.h"
58b8a7a3a6SAndreas Gruenbacher #include "acl.h"
59ccd979bdSMark Fasheh 
60ccd979bdSMark Fasheh #include "buffer_head_io.h"
61ccd979bdSMark Fasheh 
62ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
63ccd979bdSMark Fasheh 	struct list_head	mw_item;
64ccd979bdSMark Fasheh 	int			mw_status;
65ccd979bdSMark Fasheh 	struct completion	mw_complete;
66ccd979bdSMark Fasheh 	unsigned long		mw_mask;
67ccd979bdSMark Fasheh 	unsigned long		mw_goal;
688ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
695bc970e8SSunil Mushran 	ktime_t			mw_lock_start;
708ddb7b00SSunil Mushran #endif
71ccd979bdSMark Fasheh };
72ccd979bdSMark Fasheh 
7354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
7454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
75cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
769e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
77ccd979bdSMark Fasheh 
78d680efe9SMark Fasheh /*
79cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
80d680efe9SMark Fasheh  *
81b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
82d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
83d680efe9SMark Fasheh  *
84d680efe9SMark Fasheh  */
85d680efe9SMark Fasheh enum ocfs2_unblock_action {
86d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
87d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
88d680efe9SMark Fasheh 				      * ->post_unlock callback */
89d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
90d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
91d680efe9SMark Fasheh };
92d680efe9SMark Fasheh 
93d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
94d680efe9SMark Fasheh 	int requeue;
95d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
96d680efe9SMark Fasheh };
97d680efe9SMark Fasheh 
98cb25797dSJan Kara /* Lockdep class keys */
99cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
100cb25797dSJan Kara 
101810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
102810d5aebSMark Fasheh 					int new_level);
103810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
104810d5aebSMark Fasheh 
105cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
106cc567d89SMark Fasheh 				     int blocking);
107cc567d89SMark Fasheh 
108cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
109cc567d89SMark Fasheh 				       int blocking);
110d680efe9SMark Fasheh 
111d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
112d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
113ccd979bdSMark Fasheh 
1149e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1156cb129f5SAdrian Bunk 
1168dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1178dec98edSTao Ma 					    int new_level);
1188dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1198dec98edSTao Ma 					 int blocking);
1208dec98edSTao Ma 
1216cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1226cb129f5SAdrian Bunk 
1236cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
1246cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1256cb129f5SAdrian Bunk 				     const char *function,
1266cb129f5SAdrian Bunk 				     unsigned int line,
1276cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1286cb129f5SAdrian Bunk {
129a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1306cb129f5SAdrian Bunk 
1316cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1326cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1336cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1346cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1356cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1366cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1376cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1386cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1396cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1406cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1416cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1426cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1436cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1446cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1456cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1466cb129f5SAdrian Bunk }
1476cb129f5SAdrian Bunk 
1486cb129f5SAdrian Bunk 
149f625c979SMark Fasheh /*
150f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
151f625c979SMark Fasheh  *
152f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1530d5dc6c2SMark Fasheh  *
1540d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1550d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1560d5dc6c2SMark Fasheh  *
1570d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1580d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1590d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1600d5dc6c2SMark Fasheh  * destruction time).
161f625c979SMark Fasheh  */
162ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
16354a7e755SMark Fasheh 	/*
16454a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
16554a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
16654a7e755SMark Fasheh 	 */
16754a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
168b5e500e2SMark Fasheh 
1690d5dc6c2SMark Fasheh 	/*
17034d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
17134d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
17234d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
17334d024f8SMark Fasheh 	 * memory, etc.
1740d5dc6c2SMark Fasheh 	 *
1750d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1760d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1770d5dc6c2SMark Fasheh 	 */
178d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
179f625c979SMark Fasheh 
180f625c979SMark Fasheh 	/*
18116d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
18216d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
18316d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
18416d5b956SMark Fasheh 	 *
18516d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
18616d5b956SMark Fasheh 	 * incompatible holders are sufficient.
18716d5b956SMark Fasheh 	 *
18816d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
18916d5b956SMark Fasheh 	 */
19016d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
19116d5b956SMark Fasheh 
19216d5b956SMark Fasheh 	/*
1935ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1945ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1955ef0d4eaSMark Fasheh 	 *
1965ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1975ef0d4eaSMark Fasheh 	 * in the flags field.
1985ef0d4eaSMark Fasheh 	 *
1995ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
2005ef0d4eaSMark Fasheh 	 */
2015ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
2025ef0d4eaSMark Fasheh 
2035ef0d4eaSMark Fasheh 	/*
204cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
205cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
206cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
207cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
208cc567d89SMark Fasheh 	 *
209cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
210cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
211cc567d89SMark Fasheh 	 */
212cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
213cc567d89SMark Fasheh 
214cc567d89SMark Fasheh 	/*
215f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
216f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
217f625c979SMark Fasheh 	 */
218f625c979SMark Fasheh 	int flags;
219ccd979bdSMark Fasheh };
220ccd979bdSMark Fasheh 
221f625c979SMark Fasheh /*
222f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
223f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
224f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
225f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
226f625c979SMark Fasheh  * expected that the locking wrapper will clear the
227f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
228f625c979SMark Fasheh  */
229f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
230f625c979SMark Fasheh 
231b80fc012SMark Fasheh /*
2325ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2335ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
234b80fc012SMark Fasheh  */
235b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
236b80fc012SMark Fasheh 
237ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
23854a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
239f625c979SMark Fasheh 	.flags		= 0,
240ccd979bdSMark Fasheh };
241ccd979bdSMark Fasheh 
242e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
24354a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
244810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
245810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
246f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
247b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
248ccd979bdSMark Fasheh };
249ccd979bdSMark Fasheh 
250ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
251f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
252ccd979bdSMark Fasheh };
253ccd979bdSMark Fasheh 
254ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
255f625c979SMark Fasheh 	.flags		= 0,
256ccd979bdSMark Fasheh };
257ccd979bdSMark Fasheh 
2586ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2596ca497a8Swengang wang 	.flags		= 0,
2606ca497a8Swengang wang };
2616ca497a8Swengang wang 
2624882abebSGang He static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
2634882abebSGang He 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
2644882abebSGang He };
2654882abebSGang He 
26683273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
26783273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
26883273932SSrinivas Eeda };
26983273932SSrinivas Eeda 
270d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
27154a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
272d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
273cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
274f625c979SMark Fasheh 	.flags		= 0,
275d680efe9SMark Fasheh };
276d680efe9SMark Fasheh 
27750008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
27850008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
27950008630STiger Yang 	.flags		= 0,
28050008630STiger Yang };
28150008630STiger Yang 
282cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
283cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
284cf8e06f1SMark Fasheh 	.flags		= 0,
285cf8e06f1SMark Fasheh };
286cf8e06f1SMark Fasheh 
2879e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2889e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2899e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2909e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2919e33d69fSJan Kara };
2929e33d69fSJan Kara 
2938dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2948dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2958dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2968dec98edSTao Ma 	.flags		= 0,
2978dec98edSTao Ma };
2988dec98edSTao Ma 
299ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
300ccd979bdSMark Fasheh {
301ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
30250008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
30350008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
304ccd979bdSMark Fasheh }
305ccd979bdSMark Fasheh 
306c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
307a796d286SJoel Becker {
308a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
309a796d286SJoel Becker }
310a796d286SJoel Becker 
311ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
312ccd979bdSMark Fasheh {
313ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
314ccd979bdSMark Fasheh 
315ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
316ccd979bdSMark Fasheh }
317ccd979bdSMark Fasheh 
318d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
319d680efe9SMark Fasheh {
320d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
321d680efe9SMark Fasheh 
322d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
323d680efe9SMark Fasheh }
324d680efe9SMark Fasheh 
3259e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3269e33d69fSJan Kara {
3279e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3289e33d69fSJan Kara 
3299e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3309e33d69fSJan Kara }
3319e33d69fSJan Kara 
3328dec98edSTao Ma static inline struct ocfs2_refcount_tree *
3338dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3348dec98edSTao Ma {
3358dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3368dec98edSTao Ma }
3378dec98edSTao Ma 
33854a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
33954a7e755SMark Fasheh {
34054a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
34154a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
34254a7e755SMark Fasheh 
34354a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
34454a7e755SMark Fasheh }
34554a7e755SMark Fasheh 
346ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
347ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
348ccd979bdSMark Fasheh 			     int level,
349bd3e7610SJoel Becker 			     u32 dlm_flags);
350ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
351ccd979bdSMark Fasheh 						     int wanted);
352cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
353ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
354cb25797dSJan Kara 				   int level, unsigned long caller_ip);
355cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
356cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
357cb25797dSJan Kara 					int level)
358cb25797dSJan Kara {
359cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
360cb25797dSJan Kara }
361cb25797dSJan Kara 
362ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
363ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
364ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
365ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
366ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
367ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
368ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
369ccd979bdSMark Fasheh 						int convert);
3707431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
371c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3727431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3737431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
374c74ff8bbSSunil Mushran 	else										\
375c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
376c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
377c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
378ccd979bdSMark Fasheh } while (0)
37934d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
38034d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
381ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
382e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
383ccd979bdSMark Fasheh 				  struct buffer_head **bh);
384ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
385ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
386de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
387cf8e06f1SMark Fasheh 					      int new_level);
388cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
389cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
390cf8e06f1SMark Fasheh 				  int new_level,
391de551246SJoel Becker 				  int lvb,
392de551246SJoel Becker 				  unsigned int generation);
393cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
394cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
395cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
396cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
397cf8e06f1SMark Fasheh 
398ccd979bdSMark Fasheh 
399ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
400ccd979bdSMark Fasheh 				  u64 blkno,
401ccd979bdSMark Fasheh 				  u32 generation,
402ccd979bdSMark Fasheh 				  char *name)
403ccd979bdSMark Fasheh {
404ccd979bdSMark Fasheh 	int len;
405ccd979bdSMark Fasheh 
406ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
407ccd979bdSMark Fasheh 
408b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
409b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
410b0697053SMark Fasheh 		       (long long)blkno, generation);
411ccd979bdSMark Fasheh 
412ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
413ccd979bdSMark Fasheh 
414ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
415ccd979bdSMark Fasheh }
416ccd979bdSMark Fasheh 
41734af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
418ccd979bdSMark Fasheh 
419ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
420ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
421ccd979bdSMark Fasheh {
422ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
423ccd979bdSMark Fasheh 
424ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
425ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
426ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
427ccd979bdSMark Fasheh }
428ccd979bdSMark Fasheh 
429ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
430ccd979bdSMark Fasheh {
431ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
432ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
433ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
434ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
435ccd979bdSMark Fasheh }
436ccd979bdSMark Fasheh 
4378ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
4388ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4398ddb7b00SSunil Mushran {
4408ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4415bc970e8SSunil Mushran 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4425bc970e8SSunil Mushran 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4438ddb7b00SSunil Mushran }
4448ddb7b00SSunil Mushran 
4458ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4468ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4478ddb7b00SSunil Mushran {
4485bc970e8SSunil Mushran 	u32 usec;
4495bc970e8SSunil Mushran 	ktime_t kt;
4505bc970e8SSunil Mushran 	struct ocfs2_lock_stats *stats;
4518ddb7b00SSunil Mushran 
4525bc970e8SSunil Mushran 	if (level == LKM_PRMODE)
4535bc970e8SSunil Mushran 		stats = &res->l_lock_prmode;
4545bc970e8SSunil Mushran 	else if (level == LKM_EXMODE)
4555bc970e8SSunil Mushran 		stats = &res->l_lock_exmode;
4565bc970e8SSunil Mushran 	else
4578ddb7b00SSunil Mushran 		return;
4588ddb7b00SSunil Mushran 
4595bc970e8SSunil Mushran 	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4605bc970e8SSunil Mushran 	usec = ktime_to_us(kt);
4615bc970e8SSunil Mushran 
4625bc970e8SSunil Mushran 	stats->ls_gets++;
4635bc970e8SSunil Mushran 	stats->ls_total += ktime_to_ns(kt);
4645bc970e8SSunil Mushran 	/* overflow */
46516865b7cSroel 	if (unlikely(stats->ls_gets == 0)) {
4665bc970e8SSunil Mushran 		stats->ls_gets++;
4675bc970e8SSunil Mushran 		stats->ls_total = ktime_to_ns(kt);
4685bc970e8SSunil Mushran 	}
4695bc970e8SSunil Mushran 
4705bc970e8SSunil Mushran 	if (stats->ls_max < usec)
4715bc970e8SSunil Mushran 		stats->ls_max = usec;
4725bc970e8SSunil Mushran 
4738ddb7b00SSunil Mushran 	if (ret)
4745bc970e8SSunil Mushran 		stats->ls_fail++;
4758ddb7b00SSunil Mushran }
4768ddb7b00SSunil Mushran 
4778ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4788ddb7b00SSunil Mushran {
4798ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4808ddb7b00SSunil Mushran }
4818ddb7b00SSunil Mushran 
4828ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4838ddb7b00SSunil Mushran {
4845bc970e8SSunil Mushran 	mw->mw_lock_start = ktime_get();
4858ddb7b00SSunil Mushran }
4868ddb7b00SSunil Mushran #else
4878ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4888ddb7b00SSunil Mushran {
4898ddb7b00SSunil Mushran }
4908ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4918ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4928ddb7b00SSunil Mushran {
4938ddb7b00SSunil Mushran }
4948ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4958ddb7b00SSunil Mushran {
4968ddb7b00SSunil Mushran }
4978ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4988ddb7b00SSunil Mushran {
4998ddb7b00SSunil Mushran }
5008ddb7b00SSunil Mushran #endif
5018ddb7b00SSunil Mushran 
502ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
503ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
504ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
505ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
506ccd979bdSMark Fasheh 				       void *priv)
507ccd979bdSMark Fasheh {
508ccd979bdSMark Fasheh 	res->l_type          = type;
509ccd979bdSMark Fasheh 	res->l_ops           = ops;
510ccd979bdSMark Fasheh 	res->l_priv          = priv;
511ccd979bdSMark Fasheh 
512bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
513bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
514bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
515ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
516ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
517ccd979bdSMark Fasheh 
518ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
519ccd979bdSMark Fasheh 
520ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5218ddb7b00SSunil Mushran 
5228ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
523cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
524cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
525cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
526cb25797dSJan Kara 				 &lockdep_keys[type], 0);
527cb25797dSJan Kara 	else
528cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
529cb25797dSJan Kara #endif
530ccd979bdSMark Fasheh }
531ccd979bdSMark Fasheh 
532ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
533ccd979bdSMark Fasheh {
534ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
535ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
536ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
537ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
538ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
539ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
540439a36b8SEric Ren 	INIT_LIST_HEAD(&res->l_holders);
541ccd979bdSMark Fasheh }
542ccd979bdSMark Fasheh 
543ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
544ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
54524c19ef4SMark Fasheh 			       unsigned int generation,
546ccd979bdSMark Fasheh 			       struct inode *inode)
547ccd979bdSMark Fasheh {
548ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
549ccd979bdSMark Fasheh 
550ccd979bdSMark Fasheh 	switch(type) {
551ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
552ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
553ccd979bdSMark Fasheh 			break;
554ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
555e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
556ccd979bdSMark Fasheh 			break;
55750008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
55850008630STiger Yang 			ops = &ocfs2_inode_open_lops;
55950008630STiger Yang 			break;
560ccd979bdSMark Fasheh 		default:
561ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
562ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
563ccd979bdSMark Fasheh 			break;
564ccd979bdSMark Fasheh 	};
565ccd979bdSMark Fasheh 
566d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
56724c19ef4SMark Fasheh 			      generation, res->l_name);
568d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
569d680efe9SMark Fasheh }
570d680efe9SMark Fasheh 
57154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
57254a7e755SMark Fasheh {
57354a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
57454a7e755SMark Fasheh 
57554a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
57654a7e755SMark Fasheh }
57754a7e755SMark Fasheh 
5789e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5799e33d69fSJan Kara {
5809e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5819e33d69fSJan Kara 
5829e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5839e33d69fSJan Kara }
5849e33d69fSJan Kara 
585cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
586cf8e06f1SMark Fasheh {
587cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
588cf8e06f1SMark Fasheh 
589cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
590cf8e06f1SMark Fasheh }
591cf8e06f1SMark Fasheh 
592d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
593d680efe9SMark Fasheh {
594d680efe9SMark Fasheh 	__be64 inode_blkno_be;
595d680efe9SMark Fasheh 
596d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
597d680efe9SMark Fasheh 	       sizeof(__be64));
598d680efe9SMark Fasheh 
599d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
600d680efe9SMark Fasheh }
601d680efe9SMark Fasheh 
60254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
60354a7e755SMark Fasheh {
60454a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
60554a7e755SMark Fasheh 
60654a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
60754a7e755SMark Fasheh }
60854a7e755SMark Fasheh 
609d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
610d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
611d680efe9SMark Fasheh {
612d680efe9SMark Fasheh 	int len;
613d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
614d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
615d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
616d680efe9SMark Fasheh 
617d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
618d680efe9SMark Fasheh 
619d680efe9SMark Fasheh 	/*
620d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
621d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
622d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
623d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
624d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
625d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
626d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
627d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
628d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
629d680efe9SMark Fasheh 	 *
630d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
631d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
632d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
633d680efe9SMark Fasheh 	 */
634d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
635d680efe9SMark Fasheh 		       "%c%016llx",
636d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
637d680efe9SMark Fasheh 		       (long long)parent);
638d680efe9SMark Fasheh 
639d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
640d680efe9SMark Fasheh 
641d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
642d680efe9SMark Fasheh 	       sizeof(__be64));
643d680efe9SMark Fasheh 
644d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
645d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
646d680efe9SMark Fasheh 				   dl);
647ccd979bdSMark Fasheh }
648ccd979bdSMark Fasheh 
649ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
650ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
651ccd979bdSMark Fasheh {
652ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
653ccd979bdSMark Fasheh 	 * once on it manually.  */
654ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
655d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
656d680efe9SMark Fasheh 			      0, res->l_name);
657ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
658ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
659ccd979bdSMark Fasheh }
660ccd979bdSMark Fasheh 
661ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
662ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
663ccd979bdSMark Fasheh {
664ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
665ccd979bdSMark Fasheh 	 * once on it manually.  */
666ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
667d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
668d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
669ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
670ccd979bdSMark Fasheh }
671ccd979bdSMark Fasheh 
6726ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6736ca497a8Swengang wang 					 struct ocfs2_super *osb)
6746ca497a8Swengang wang {
6756ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6766ca497a8Swengang wang 	 * once on it manually.  */
6776ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6786ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6796ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6806ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6816ca497a8Swengang wang }
6826ca497a8Swengang wang 
6834882abebSGang He void ocfs2_trim_fs_lock_res_init(struct ocfs2_super *osb)
6844882abebSGang He {
6854882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
6864882abebSGang He 
6874882abebSGang He 	ocfs2_lock_res_init_once(lockres);
6884882abebSGang He 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_TRIM_FS, 0, 0, lockres->l_name);
6894882abebSGang He 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_TRIM_FS,
6904882abebSGang He 				   &ocfs2_trim_fs_lops, osb);
6914882abebSGang He }
6924882abebSGang He 
6934882abebSGang He void ocfs2_trim_fs_lock_res_uninit(struct ocfs2_super *osb)
6944882abebSGang He {
6954882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
6964882abebSGang He 
6974882abebSGang He 	ocfs2_simple_drop_lockres(osb, lockres);
6984882abebSGang He 	ocfs2_lock_res_free(lockres);
6994882abebSGang He }
7004882abebSGang He 
70183273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
70283273932SSrinivas Eeda 					    struct ocfs2_super *osb)
70383273932SSrinivas Eeda {
70483273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
70583273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
70683273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
70783273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
70883273932SSrinivas Eeda }
70983273932SSrinivas Eeda 
710cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
711cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
712cf8e06f1SMark Fasheh {
713cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
714cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
715cf8e06f1SMark Fasheh 
716cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
717cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
718cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
719cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
720cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
721cf8e06f1SMark Fasheh 				   fp);
722cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
723cf8e06f1SMark Fasheh }
724cf8e06f1SMark Fasheh 
7259e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7269e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7279e33d69fSJan Kara {
7289e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7299e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7309e33d69fSJan Kara 			      0, lockres->l_name);
7319e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7329e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7339e33d69fSJan Kara 				   info);
7349e33d69fSJan Kara }
7359e33d69fSJan Kara 
7368dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7378dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7388dec98edSTao Ma 				  unsigned int generation)
7398dec98edSTao Ma {
7408dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7418dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7428dec98edSTao Ma 			      generation, lockres->l_name);
7438dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7448dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7458dec98edSTao Ma }
7468dec98edSTao Ma 
747ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
748ccd979bdSMark Fasheh {
749ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
750ccd979bdSMark Fasheh 		return;
751ccd979bdSMark Fasheh 
752ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
753ccd979bdSMark Fasheh 
754ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
755ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
756ccd979bdSMark Fasheh 			res->l_name);
757ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
758ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
759ccd979bdSMark Fasheh 			res->l_name);
760ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
761ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
762ccd979bdSMark Fasheh 			res->l_name);
763ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
764ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
765ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
766ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
767ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
768ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
769ccd979bdSMark Fasheh 
770ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
771ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
772ccd979bdSMark Fasheh 
773ccd979bdSMark Fasheh 	res->l_flags = 0UL;
774ccd979bdSMark Fasheh }
775ccd979bdSMark Fasheh 
776439a36b8SEric Ren /*
777439a36b8SEric Ren  * Keep a list of processes who have interest in a lockres.
778439a36b8SEric Ren  * Note: this is now only uesed for check recursive cluster locking.
779439a36b8SEric Ren  */
780439a36b8SEric Ren static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
781439a36b8SEric Ren 				   struct ocfs2_lock_holder *oh)
782439a36b8SEric Ren {
783439a36b8SEric Ren 	INIT_LIST_HEAD(&oh->oh_list);
784439a36b8SEric Ren 	oh->oh_owner_pid = get_pid(task_pid(current));
785439a36b8SEric Ren 
786439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
787439a36b8SEric Ren 	list_add_tail(&oh->oh_list, &lockres->l_holders);
788439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
789439a36b8SEric Ren }
790439a36b8SEric Ren 
791439a36b8SEric Ren static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
792439a36b8SEric Ren 				       struct ocfs2_lock_holder *oh)
793439a36b8SEric Ren {
794439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
795439a36b8SEric Ren 	list_del(&oh->oh_list);
796439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
797439a36b8SEric Ren 
798439a36b8SEric Ren 	put_pid(oh->oh_owner_pid);
799439a36b8SEric Ren }
800439a36b8SEric Ren 
801439a36b8SEric Ren static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
802439a36b8SEric Ren {
803439a36b8SEric Ren 	struct ocfs2_lock_holder *oh;
804439a36b8SEric Ren 	struct pid *pid;
805439a36b8SEric Ren 
806439a36b8SEric Ren 	/* look in the list of holders for one with the current task as owner */
807439a36b8SEric Ren 	spin_lock(&lockres->l_lock);
808439a36b8SEric Ren 	pid = task_pid(current);
809439a36b8SEric Ren 	list_for_each_entry(oh, &lockres->l_holders, oh_list) {
810439a36b8SEric Ren 		if (oh->oh_owner_pid == pid) {
811439a36b8SEric Ren 			spin_unlock(&lockres->l_lock);
812439a36b8SEric Ren 			return 1;
813439a36b8SEric Ren 		}
814439a36b8SEric Ren 	}
815439a36b8SEric Ren 	spin_unlock(&lockres->l_lock);
816439a36b8SEric Ren 
817439a36b8SEric Ren 	return 0;
818439a36b8SEric Ren }
819439a36b8SEric Ren 
820ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
821ccd979bdSMark Fasheh 				     int level)
822ccd979bdSMark Fasheh {
823ccd979bdSMark Fasheh 	BUG_ON(!lockres);
824ccd979bdSMark Fasheh 
825ccd979bdSMark Fasheh 	switch(level) {
826bd3e7610SJoel Becker 	case DLM_LOCK_EX:
827ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
828ccd979bdSMark Fasheh 		break;
829bd3e7610SJoel Becker 	case DLM_LOCK_PR:
830ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
831ccd979bdSMark Fasheh 		break;
832ccd979bdSMark Fasheh 	default:
833ccd979bdSMark Fasheh 		BUG();
834ccd979bdSMark Fasheh 	}
835ccd979bdSMark Fasheh }
836ccd979bdSMark Fasheh 
837ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
838ccd979bdSMark Fasheh 				     int level)
839ccd979bdSMark Fasheh {
840ccd979bdSMark Fasheh 	BUG_ON(!lockres);
841ccd979bdSMark Fasheh 
842ccd979bdSMark Fasheh 	switch(level) {
843bd3e7610SJoel Becker 	case DLM_LOCK_EX:
844ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
845ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
846ccd979bdSMark Fasheh 		break;
847bd3e7610SJoel Becker 	case DLM_LOCK_PR:
848ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
849ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
850ccd979bdSMark Fasheh 		break;
851ccd979bdSMark Fasheh 	default:
852ccd979bdSMark Fasheh 		BUG();
853ccd979bdSMark Fasheh 	}
854ccd979bdSMark Fasheh }
855ccd979bdSMark Fasheh 
856ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
857ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
858ccd979bdSMark Fasheh  * lock types are added. */
859ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
860ccd979bdSMark Fasheh {
861bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
862ccd979bdSMark Fasheh 
863bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
864bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
865bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
866bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
867ccd979bdSMark Fasheh 	return new_level;
868ccd979bdSMark Fasheh }
869ccd979bdSMark Fasheh 
870ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
871ccd979bdSMark Fasheh 			      unsigned long newflags)
872ccd979bdSMark Fasheh {
873800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
874ccd979bdSMark Fasheh 
875ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
876ccd979bdSMark Fasheh 
877ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
878ccd979bdSMark Fasheh 
879800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
880ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
881ccd979bdSMark Fasheh 			continue;
882ccd979bdSMark Fasheh 
883ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
884ccd979bdSMark Fasheh 		mw->mw_status = 0;
885ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
886ccd979bdSMark Fasheh 	}
887ccd979bdSMark Fasheh }
888ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
889ccd979bdSMark Fasheh {
890ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
891ccd979bdSMark Fasheh }
892ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
893ccd979bdSMark Fasheh 				unsigned long clear)
894ccd979bdSMark Fasheh {
895ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
896ccd979bdSMark Fasheh }
897ccd979bdSMark Fasheh 
898ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
899ccd979bdSMark Fasheh {
900ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
901ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
902ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
903bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
904ccd979bdSMark Fasheh 
905ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
906ccd979bdSMark Fasheh 	if (lockres->l_level <=
907ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
908bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
909ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
910ccd979bdSMark Fasheh 	}
911ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
912ccd979bdSMark Fasheh }
913ccd979bdSMark Fasheh 
914ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
915ccd979bdSMark Fasheh {
916ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
917ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
918ccd979bdSMark Fasheh 
919ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
920ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
921ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
922ccd979bdSMark Fasheh 	 * update */
923bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
924f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
925ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
926ccd979bdSMark Fasheh 
927ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
928a1912826SSunil Mushran 
929a1912826SSunil Mushran 	/*
930a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
931a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
932a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
933d1e78238SXue jiufei 	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
934d1e78238SXue jiufei 	 * had already returned.
935a1912826SSunil Mushran 	 */
936d1e78238SXue jiufei 	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
937a1912826SSunil Mushran 		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
938d1e78238SXue jiufei 	else
939d1e78238SXue jiufei 		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
940a1912826SSunil Mushran 
941ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
942ccd979bdSMark Fasheh }
943ccd979bdSMark Fasheh 
944ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
945ccd979bdSMark Fasheh {
9463cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
947ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
948ccd979bdSMark Fasheh 
949bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
950f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
951f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
952ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
953ccd979bdSMark Fasheh 
954ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
955ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
956ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
957ccd979bdSMark Fasheh }
958ccd979bdSMark Fasheh 
959ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
960ccd979bdSMark Fasheh 				     int level)
961ccd979bdSMark Fasheh {
962ccd979bdSMark Fasheh 	int needs_downconvert = 0;
963ccd979bdSMark Fasheh 
964ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
965ccd979bdSMark Fasheh 
966ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
967ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
968ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
969ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
970ccd979bdSMark Fasheh 		 * duplicate BASTs */
971ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
972ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
973ccd979bdSMark Fasheh 			needs_downconvert = 1;
974ccd979bdSMark Fasheh 
975ccd979bdSMark Fasheh 		lockres->l_blocking = level;
976ccd979bdSMark Fasheh 	}
977ccd979bdSMark Fasheh 
9789b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9799b915181SSunil Mushran 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
9809b915181SSunil Mushran 	     needs_downconvert);
9819b915181SSunil Mushran 
9820b94a909SWengang Wang 	if (needs_downconvert)
9830b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
984c1e8d35eSTao Ma 	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
985ccd979bdSMark Fasheh 	return needs_downconvert;
986ccd979bdSMark Fasheh }
987ccd979bdSMark Fasheh 
988de551246SJoel Becker /*
989de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
990de551246SJoel Becker  *
991de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
992de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
993de551246SJoel Becker  * for more details on the race.
994de551246SJoel Becker  *
995de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
996de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
997de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
998de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
999de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
1000de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
1001de551246SJoel Becker  * nothing.
1002de551246SJoel Becker  *
1003de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
1004de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
1005de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
1006de551246SJoel Becker  * window.
1007de551246SJoel Becker  *
1008de551246SJoel Becker  * [Example]
1009de551246SJoel Becker  *
1010de551246SJoel Becker  * ocfs2_meta_lock()
1011de551246SJoel Becker  *  ocfs2_cluster_lock()
1012de551246SJoel Becker  *   set BUSY
1013de551246SJoel Becker  *   set PENDING
1014de551246SJoel Becker  *   drop l_lock
1015de551246SJoel Becker  *   ocfs2_dlm_lock()
1016de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
1017de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
1018de551246SJoel Becker  *					  take_l_lock
1019de551246SJoel Becker  *					  !BUSY
1020de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
1021de551246SJoel Becker  *					   set BUSY
1022de551246SJoel Becker  *					   set PENDING
1023de551246SJoel Becker  *					  drop l_lock
1024de551246SJoel Becker  *   take l_lock
1025de551246SJoel Becker  *   clear PENDING
1026de551246SJoel Becker  *   drop l_lock
1027de551246SJoel Becker  *			<window>
1028de551246SJoel Becker  *					  ocfs2_dlm_lock()
1029de551246SJoel Becker  *
1030de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
1031de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
1032de551246SJoel Becker  *
1033de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
1034de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
1035de551246SJoel Becker  *
1036de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
1037de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
1038de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
1039de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
1040de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
1041de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
1042de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
1043de551246SJoel Becker  * ocfs2_prepare_downconvert().
1044de551246SJoel Becker  */
1045de551246SJoel Becker 
1046de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
1047de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
1048de551246SJoel Becker 				    unsigned int generation,
1049de551246SJoel Becker 				    struct ocfs2_super *osb)
1050de551246SJoel Becker {
1051de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1052de551246SJoel Becker 
1053de551246SJoel Becker 	/*
1054de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
1055de551246SJoel Becker 	 * will clear pending, the loser will not.
1056de551246SJoel Becker 	 */
1057de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
1058de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
1059de551246SJoel Becker 		return;
1060de551246SJoel Becker 
1061de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1062de551246SJoel Becker 	lockres->l_pending_gen++;
1063de551246SJoel Becker 
1064de551246SJoel Becker 	/*
1065de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
1066de551246SJoel Becker 	 * were PENDING.  Wake it up.
1067de551246SJoel Becker 	 */
1068de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1069de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
1070de551246SJoel Becker }
1071de551246SJoel Becker 
1072de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
1073de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1074de551246SJoel Becker 				  unsigned int generation,
1075de551246SJoel Becker 				  struct ocfs2_super *osb)
1076de551246SJoel Becker {
1077de551246SJoel Becker 	unsigned long flags;
1078de551246SJoel Becker 
1079de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1080de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1081de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1082de551246SJoel Becker }
1083de551246SJoel Becker 
1084de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1085de551246SJoel Becker {
1086de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1087de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1088de551246SJoel Becker 
1089de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1090de551246SJoel Becker 
1091de551246SJoel Becker 	return lockres->l_pending_gen;
1092de551246SJoel Becker }
1093de551246SJoel Becker 
1094c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1095ccd979bdSMark Fasheh {
1096a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1097aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1098ccd979bdSMark Fasheh 	int needs_downconvert;
1099ccd979bdSMark Fasheh 	unsigned long flags;
1100ccd979bdSMark Fasheh 
1101bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1102ccd979bdSMark Fasheh 
11039b915181SSunil Mushran 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
11049b915181SSunil Mushran 	     "type %s\n", lockres->l_name, level, lockres->l_level,
1105aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1106aa2623adSMark Fasheh 
1107cf8e06f1SMark Fasheh 	/*
1108cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1109cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1110cf8e06f1SMark Fasheh 	 */
1111cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1112cf8e06f1SMark Fasheh 		return;
1113cf8e06f1SMark Fasheh 
1114ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1115ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1116ccd979bdSMark Fasheh 	if (needs_downconvert)
1117ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1118ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1119ccd979bdSMark Fasheh 
1120d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1121d680efe9SMark Fasheh 
112234d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1123ccd979bdSMark Fasheh }
1124ccd979bdSMark Fasheh 
1125c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1126ccd979bdSMark Fasheh {
1127a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1128de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1129ccd979bdSMark Fasheh 	unsigned long flags;
11301693a5c0SDavid Teigland 	int status;
1131ccd979bdSMark Fasheh 
1132ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1133ccd979bdSMark Fasheh 
11341693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
11351693a5c0SDavid Teigland 
11361693a5c0SDavid Teigland 	if (status == -EAGAIN) {
11371693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
11381693a5c0SDavid Teigland 		goto out;
11391693a5c0SDavid Teigland 	}
11401693a5c0SDavid Teigland 
11411693a5c0SDavid Teigland 	if (status) {
11428f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
11431693a5c0SDavid Teigland 		     lockres->l_name, status);
1144ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1145ccd979bdSMark Fasheh 		return;
1146ccd979bdSMark Fasheh 	}
1147ccd979bdSMark Fasheh 
11489b915181SSunil Mushran 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
11499b915181SSunil Mushran 	     "level %d => %d\n", lockres->l_name, lockres->l_action,
11509b915181SSunil Mushran 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
11519b915181SSunil Mushran 
1152ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1153ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1154ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1155e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1156ccd979bdSMark Fasheh 		break;
1157ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1158ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1159ccd979bdSMark Fasheh 		break;
1160ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1161ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1162ccd979bdSMark Fasheh 		break;
1163ccd979bdSMark Fasheh 	default:
11649b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
11659b915181SSunil Mushran 		     "flags 0x%lx, unlock: %u\n",
1166e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1167e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1168ccd979bdSMark Fasheh 		BUG();
1169ccd979bdSMark Fasheh 	}
11701693a5c0SDavid Teigland out:
1171ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1172ccd979bdSMark Fasheh 	 * can catch it. */
1173ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1174ccd979bdSMark Fasheh 
1175de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1176de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1177de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1178de551246SJoel Becker 
1179de551246SJoel Becker 	/*
1180de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1181de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1182de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1183de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1184de551246SJoel Becker 	 */
1185de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1186de551246SJoel Becker 
1187ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1188d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1189ccd979bdSMark Fasheh }
1190ccd979bdSMark Fasheh 
1191553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1192553b5eb9SJoel Becker {
1193553b5eb9SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1194553b5eb9SJoel Becker 	unsigned long flags;
1195553b5eb9SJoel Becker 
11969b915181SSunil Mushran 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
11979b915181SSunil Mushran 	     lockres->l_name, lockres->l_unlock_action);
1198553b5eb9SJoel Becker 
1199553b5eb9SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1200553b5eb9SJoel Becker 	if (error) {
1201553b5eb9SJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1202553b5eb9SJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
1203553b5eb9SJoel Becker 		     lockres->l_unlock_action);
1204553b5eb9SJoel Becker 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1205553b5eb9SJoel Becker 		return;
1206553b5eb9SJoel Becker 	}
1207553b5eb9SJoel Becker 
1208553b5eb9SJoel Becker 	switch(lockres->l_unlock_action) {
1209553b5eb9SJoel Becker 	case OCFS2_UNLOCK_CANCEL_CONVERT:
1210553b5eb9SJoel Becker 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1211553b5eb9SJoel Becker 		lockres->l_action = OCFS2_AST_INVALID;
1212553b5eb9SJoel Becker 		/* Downconvert thread may have requeued this lock, we
1213553b5eb9SJoel Becker 		 * need to wake it. */
1214553b5eb9SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1215553b5eb9SJoel Becker 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1216553b5eb9SJoel Becker 		break;
1217553b5eb9SJoel Becker 	case OCFS2_UNLOCK_DROP_LOCK:
1218553b5eb9SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
1219553b5eb9SJoel Becker 		break;
1220553b5eb9SJoel Becker 	default:
1221553b5eb9SJoel Becker 		BUG();
1222553b5eb9SJoel Becker 	}
1223553b5eb9SJoel Becker 
1224553b5eb9SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1225553b5eb9SJoel Becker 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1226553b5eb9SJoel Becker 	wake_up(&lockres->l_event);
1227553b5eb9SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1228553b5eb9SJoel Becker }
1229553b5eb9SJoel Becker 
1230553b5eb9SJoel Becker /*
1231553b5eb9SJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
1232553b5eb9SJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
1233553b5eb9SJoel Becker  * The version number allows interoperability with systems running at
1234553b5eb9SJoel Becker  * the same major number and an equal or smaller minor number.
1235553b5eb9SJoel Becker  *
1236553b5eb9SJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
1237553b5eb9SJoel Becker  * lock, orders them differently, does different things underneath a lock),
1238553b5eb9SJoel Becker  * the version must be changed.  The protocol is negotiated when joining
1239553b5eb9SJoel Becker  * the dlm domain.  A node may join the domain if its major version is
1240553b5eb9SJoel Becker  * identical to all other nodes and its minor version is greater than
1241553b5eb9SJoel Becker  * or equal to all other nodes.  When its minor version is greater than
1242553b5eb9SJoel Becker  * the other nodes, it will run at the minor version specified by the
1243553b5eb9SJoel Becker  * other nodes.
1244553b5eb9SJoel Becker  *
1245553b5eb9SJoel Becker  * If a locking change is made that will not be compatible with older
1246553b5eb9SJoel Becker  * versions, the major number must be increased and the minor version set
1247553b5eb9SJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
1248553b5eb9SJoel Becker  * speaking to older versions, the minor version must be increased.  If a
1249553b5eb9SJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
1250553b5eb9SJoel Becker  * are just ignored by older versions), the version does not need to be
1251553b5eb9SJoel Becker  * updated.
1252553b5eb9SJoel Becker  */
1253553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = {
1254553b5eb9SJoel Becker 	.lp_max_version = {
1255553b5eb9SJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1256553b5eb9SJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1257553b5eb9SJoel Becker 	},
1258553b5eb9SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
1259553b5eb9SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
1260553b5eb9SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
1261553b5eb9SJoel Becker };
1262553b5eb9SJoel Becker 
1263553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void)
1264553b5eb9SJoel Becker {
1265553b5eb9SJoel Becker 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1266553b5eb9SJoel Becker }
1267553b5eb9SJoel Becker 
1268ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1269ccd979bdSMark Fasheh 						int convert)
1270ccd979bdSMark Fasheh {
1271ccd979bdSMark Fasheh 	unsigned long flags;
1272ccd979bdSMark Fasheh 
1273ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1274ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1275a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1276ccd979bdSMark Fasheh 	if (convert)
1277ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1278ccd979bdSMark Fasheh 	else
1279ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1280ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1281ccd979bdSMark Fasheh 
1282ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1283ccd979bdSMark Fasheh }
1284ccd979bdSMark Fasheh 
1285ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1286ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1287ccd979bdSMark Fasheh  * to do the right thing in that case.
1288ccd979bdSMark Fasheh  */
1289ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1290ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1291ccd979bdSMark Fasheh 			     int level,
1292bd3e7610SJoel Becker 			     u32 dlm_flags)
1293ccd979bdSMark Fasheh {
1294ccd979bdSMark Fasheh 	int ret = 0;
1295ccd979bdSMark Fasheh 	unsigned long flags;
1296de551246SJoel Becker 	unsigned int gen;
1297ccd979bdSMark Fasheh 
1298bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1299ccd979bdSMark Fasheh 	     dlm_flags);
1300ccd979bdSMark Fasheh 
1301ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1302ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1303ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1304ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1305ccd979bdSMark Fasheh 		goto bail;
1306ccd979bdSMark Fasheh 	}
1307ccd979bdSMark Fasheh 
1308ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1309ccd979bdSMark Fasheh 	lockres->l_requested = level;
1310ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1311de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1312ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1313ccd979bdSMark Fasheh 
13144670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1315ccd979bdSMark Fasheh 			     level,
1316ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1317ccd979bdSMark Fasheh 			     dlm_flags,
1318ccd979bdSMark Fasheh 			     lockres->l_name,
1319a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1320de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
13217431cd7eSJoel Becker 	if (ret) {
13227431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1323ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1324ccd979bdSMark Fasheh 	}
1325ccd979bdSMark Fasheh 
13267431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1327ccd979bdSMark Fasheh 
1328ccd979bdSMark Fasheh bail:
1329ccd979bdSMark Fasheh 	return ret;
1330ccd979bdSMark Fasheh }
1331ccd979bdSMark Fasheh 
1332ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1333ccd979bdSMark Fasheh 					int flag)
1334ccd979bdSMark Fasheh {
1335ccd979bdSMark Fasheh 	unsigned long flags;
1336ccd979bdSMark Fasheh 	int ret;
1337ccd979bdSMark Fasheh 
1338ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1339ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1340ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1341ccd979bdSMark Fasheh 
1342ccd979bdSMark Fasheh 	return ret;
1343ccd979bdSMark Fasheh }
1344ccd979bdSMark Fasheh 
1345ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1346ccd979bdSMark Fasheh 
1347ccd979bdSMark Fasheh {
1348ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1349ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1350ccd979bdSMark Fasheh }
1351ccd979bdSMark Fasheh 
1352ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1353ccd979bdSMark Fasheh 
1354ccd979bdSMark Fasheh {
1355ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1356ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1357ccd979bdSMark Fasheh }
1358ccd979bdSMark Fasheh 
1359ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1360ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1361ccd979bdSMark Fasheh  * level will be compatible with it. */
1362ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1363ccd979bdSMark Fasheh 						     int wanted)
1364ccd979bdSMark Fasheh {
1365ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1366ccd979bdSMark Fasheh 
1367ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1368ccd979bdSMark Fasheh }
1369ccd979bdSMark Fasheh 
1370ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1371ccd979bdSMark Fasheh {
1372ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1373ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
13748ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1375ccd979bdSMark Fasheh }
1376ccd979bdSMark Fasheh 
1377ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1378ccd979bdSMark Fasheh {
1379ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1380ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
138116735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1382ccd979bdSMark Fasheh 	return mw->mw_status;
1383ccd979bdSMark Fasheh }
1384ccd979bdSMark Fasheh 
1385ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1386ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1387ccd979bdSMark Fasheh 				    unsigned long mask,
1388ccd979bdSMark Fasheh 				    unsigned long goal)
1389ccd979bdSMark Fasheh {
1390ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1391ccd979bdSMark Fasheh 
1392ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1393ccd979bdSMark Fasheh 
1394ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1395ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1396ccd979bdSMark Fasheh 	mw->mw_goal = goal;
1397ccd979bdSMark Fasheh }
1398ccd979bdSMark Fasheh 
1399ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1400ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
1401d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1402ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1403ccd979bdSMark Fasheh {
1404ccd979bdSMark Fasheh 	int ret = 0;
1405ccd979bdSMark Fasheh 
1406d1e78238SXue jiufei 	assert_spin_locked(&lockres->l_lock);
1407ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1408ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1409ccd979bdSMark Fasheh 			ret = -EBUSY;
1410ccd979bdSMark Fasheh 
1411ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1412ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
1413ccd979bdSMark Fasheh 	}
1414d1e78238SXue jiufei 
1415d1e78238SXue jiufei 	return ret;
1416d1e78238SXue jiufei }
1417d1e78238SXue jiufei 
1418d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1419d1e78238SXue jiufei 				      struct ocfs2_mask_waiter *mw)
1420d1e78238SXue jiufei {
1421d1e78238SXue jiufei 	unsigned long flags;
1422d1e78238SXue jiufei 	int ret = 0;
1423d1e78238SXue jiufei 
1424d1e78238SXue jiufei 	spin_lock_irqsave(&lockres->l_lock, flags);
1425d1e78238SXue jiufei 	ret = __lockres_remove_mask_waiter(lockres, mw);
1426ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1427ccd979bdSMark Fasheh 
1428ccd979bdSMark Fasheh 	return ret;
1429ccd979bdSMark Fasheh 
1430ccd979bdSMark Fasheh }
1431ccd979bdSMark Fasheh 
1432cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1433cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1434cf8e06f1SMark Fasheh {
1435cf8e06f1SMark Fasheh 	int ret;
1436cf8e06f1SMark Fasheh 
1437cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1438cf8e06f1SMark Fasheh 	if (ret)
1439cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1440cf8e06f1SMark Fasheh 	else
1441cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1442cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
144316735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1444cf8e06f1SMark Fasheh 	return ret;
1445cf8e06f1SMark Fasheh }
1446cf8e06f1SMark Fasheh 
1447cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1448ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1449ccd979bdSMark Fasheh 				int level,
1450bd3e7610SJoel Becker 				u32 lkm_flags,
1451cb25797dSJan Kara 				int arg_flags,
1452cb25797dSJan Kara 				int l_subclass,
1453cb25797dSJan Kara 				unsigned long caller_ip)
1454ccd979bdSMark Fasheh {
1455ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1456ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1457ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1458ccd979bdSMark Fasheh 	unsigned long flags;
1459de551246SJoel Becker 	unsigned int gen;
14601693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1461d1e78238SXue jiufei 	int dlm_locked = 0;
1462b1b1e15eSTariq Saeed 	int kick_dc = 0;
1463ccd979bdSMark Fasheh 
14642f2eca20Salex chen 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
14652f2eca20Salex chen 		mlog_errno(-EINVAL);
14662f2eca20Salex chen 		return -EINVAL;
14672f2eca20Salex chen 	}
14682f2eca20Salex chen 
1469ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1470ccd979bdSMark Fasheh 
1471b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1472bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1473b80fc012SMark Fasheh 
1474ccd979bdSMark Fasheh again:
1475ccd979bdSMark Fasheh 	wait = 0;
1476ccd979bdSMark Fasheh 
1477a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1478a1912826SSunil Mushran 
1479ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1480ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1481a1912826SSunil Mushran 		goto unlock;
1482ccd979bdSMark Fasheh 	}
1483ccd979bdSMark Fasheh 
1484ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1485ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1486ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1487ccd979bdSMark Fasheh 
1488ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1489ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1490ccd979bdSMark Fasheh 	 * we'll get caught below. */
1491ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1492ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1493ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1494ccd979bdSMark Fasheh 		 * them. */
1495ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1496ccd979bdSMark Fasheh 		wait = 1;
1497ccd979bdSMark Fasheh 		goto unlock;
1498ccd979bdSMark Fasheh 	}
1499ccd979bdSMark Fasheh 
1500a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1501a1912826SSunil Mushran 		/*
1502a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1503a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1504a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1505a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1506a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1507a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1508a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1509a1912826SSunil Mushran 		 * requesting PR take the lock.
1510a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1511a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1512a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1513a1912826SSunil Mushran 		 * downconvert request.
1514a1912826SSunil Mushran 		 */
1515a1912826SSunil Mushran 		if (level <= lockres->l_level)
1516a1912826SSunil Mushran 			goto update_holders;
1517a1912826SSunil Mushran 	}
1518a1912826SSunil Mushran 
1519ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1520ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1521ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1522ccd979bdSMark Fasheh 		 * another node */
1523ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1524ccd979bdSMark Fasheh 		wait = 1;
1525ccd979bdSMark Fasheh 		goto unlock;
1526ccd979bdSMark Fasheh 	}
1527ccd979bdSMark Fasheh 
1528ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
15291693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
15301693a5c0SDavid Teigland 			ret = -EAGAIN;
15311693a5c0SDavid Teigland 			goto unlock;
15321693a5c0SDavid Teigland 		}
15331693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
15341693a5c0SDavid Teigland 			noqueue_attempted = 1;
15351693a5c0SDavid Teigland 
1536ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1537ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1538ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1539ccd979bdSMark Fasheh 
1540019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1541019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1542bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1543019d1b22SMark Fasheh 		} else {
1544ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1545bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1546019d1b22SMark Fasheh 		}
1547019d1b22SMark Fasheh 
1548ccd979bdSMark Fasheh 		lockres->l_requested = level;
1549ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1550de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1551ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1552ccd979bdSMark Fasheh 
1553bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1554bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1555ccd979bdSMark Fasheh 
15569b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1557ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1558ccd979bdSMark Fasheh 
1559ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
15604670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1561ccd979bdSMark Fasheh 				     level,
1562ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1563019d1b22SMark Fasheh 				     lkm_flags,
1564ccd979bdSMark Fasheh 				     lockres->l_name,
1565a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1566de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
15677431cd7eSJoel Becker 		if (ret) {
15687431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
15697431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
157024ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
15717431cd7eSJoel Becker 						    ret, lockres);
1572ccd979bdSMark Fasheh 			}
1573ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1574ccd979bdSMark Fasheh 			goto out;
1575ccd979bdSMark Fasheh 		}
1576d1e78238SXue jiufei 		dlm_locked = 1;
1577ccd979bdSMark Fasheh 
157873ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1579ccd979bdSMark Fasheh 		     lockres->l_name);
1580ccd979bdSMark Fasheh 
1581ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1582ccd979bdSMark Fasheh 		 * complete our work regardless. */
1583ccd979bdSMark Fasheh 		catch_signals = 0;
1584ccd979bdSMark Fasheh 
1585ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1586ccd979bdSMark Fasheh 		goto again;
1587ccd979bdSMark Fasheh 	}
1588ccd979bdSMark Fasheh 
1589a1912826SSunil Mushran update_holders:
1590ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1591ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1592ccd979bdSMark Fasheh 
1593ccd979bdSMark Fasheh 	ret = 0;
1594ccd979bdSMark Fasheh unlock:
1595a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1596a1912826SSunil Mushran 
1597b1b1e15eSTariq Saeed 	/* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
1598b1b1e15eSTariq Saeed 	kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
1599b1b1e15eSTariq Saeed 
1600ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1601b1b1e15eSTariq Saeed 	if (kick_dc)
1602b1b1e15eSTariq Saeed 		ocfs2_wake_downconvert_thread(osb);
1603ccd979bdSMark Fasheh out:
1604ccd979bdSMark Fasheh 	/*
1605ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1606ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1607ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1608ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1609ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1610ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1611ccd979bdSMark Fasheh 	 */
1612ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1613ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1614ccd979bdSMark Fasheh 		wait = 0;
1615d1e78238SXue jiufei 		spin_lock_irqsave(&lockres->l_lock, flags);
1616d1e78238SXue jiufei 		if (__lockres_remove_mask_waiter(lockres, &mw)) {
1617d1e78238SXue jiufei 			if (dlm_locked)
1618d1e78238SXue jiufei 				lockres_or_flags(lockres,
1619d1e78238SXue jiufei 					OCFS2_LOCK_NONBLOCK_FINISHED);
1620d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1621ccd979bdSMark Fasheh 			ret = -EAGAIN;
1622d1e78238SXue jiufei 		} else {
1623d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1624ccd979bdSMark Fasheh 			goto again;
1625ccd979bdSMark Fasheh 		}
1626d1e78238SXue jiufei 	}
1627ccd979bdSMark Fasheh 	if (wait) {
1628ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1629ccd979bdSMark Fasheh 		if (ret == 0)
1630ccd979bdSMark Fasheh 			goto again;
1631ccd979bdSMark Fasheh 		mlog_errno(ret);
1632ccd979bdSMark Fasheh 	}
16338ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1634ccd979bdSMark Fasheh 
1635cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1636cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1637cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1638cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1639cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1640cb25797dSJan Kara 				caller_ip);
1641cb25797dSJan Kara 		else
1642cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1643cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1644cb25797dSJan Kara 				caller_ip);
1645cb25797dSJan Kara 	}
1646cb25797dSJan Kara #endif
1647ccd979bdSMark Fasheh 	return ret;
1648ccd979bdSMark Fasheh }
1649ccd979bdSMark Fasheh 
1650cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1651ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1652cb25797dSJan Kara 				     int level,
1653cb25797dSJan Kara 				     u32 lkm_flags,
1654cb25797dSJan Kara 				     int arg_flags)
1655cb25797dSJan Kara {
1656cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1657cb25797dSJan Kara 				    0, _RET_IP_);
1658cb25797dSJan Kara }
1659cb25797dSJan Kara 
1660cb25797dSJan Kara 
1661cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1662cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1663cb25797dSJan Kara 				   int level,
1664cb25797dSJan Kara 				   unsigned long caller_ip)
1665ccd979bdSMark Fasheh {
1666ccd979bdSMark Fasheh 	unsigned long flags;
1667ccd979bdSMark Fasheh 
1668ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1669ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
167034d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1671ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1672cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1673cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
1674cb25797dSJan Kara 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1675cb25797dSJan Kara #endif
1676ccd979bdSMark Fasheh }
1677ccd979bdSMark Fasheh 
1678da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1679d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
168024c19ef4SMark Fasheh 				 int ex,
168124c19ef4SMark Fasheh 				 int local)
1682ccd979bdSMark Fasheh {
1683bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1684ccd979bdSMark Fasheh 	unsigned long flags;
1685bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1686ccd979bdSMark Fasheh 
1687ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1688ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1689ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1690ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1691ccd979bdSMark Fasheh 
169224c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1693ccd979bdSMark Fasheh }
1694ccd979bdSMark Fasheh 
1695ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1696ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1697ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1698ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1699ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1700ccd979bdSMark Fasheh  * with creating a new lock resource. */
1701ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1702ccd979bdSMark Fasheh {
1703ccd979bdSMark Fasheh 	int ret;
1704d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1705ccd979bdSMark Fasheh 
1706ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1707ccd979bdSMark Fasheh 
1708b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1709ccd979bdSMark Fasheh 
1710ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1711ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1712ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1713ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1714ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1715ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1716ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1717ccd979bdSMark Fasheh 
171824c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1719ccd979bdSMark Fasheh 	if (ret) {
1720ccd979bdSMark Fasheh 		mlog_errno(ret);
1721ccd979bdSMark Fasheh 		goto bail;
1722ccd979bdSMark Fasheh 	}
1723ccd979bdSMark Fasheh 
172424c19ef4SMark Fasheh 	/*
1725bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
172624c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
172724c19ef4SMark Fasheh 	 */
1728e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1729ccd979bdSMark Fasheh 	if (ret) {
1730ccd979bdSMark Fasheh 		mlog_errno(ret);
1731ccd979bdSMark Fasheh 		goto bail;
1732ccd979bdSMark Fasheh 	}
1733ccd979bdSMark Fasheh 
173450008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1735a8f24f1bSJoseph Qi 	if (ret)
173650008630STiger Yang 		mlog_errno(ret);
173750008630STiger Yang 
1738ccd979bdSMark Fasheh bail:
1739ccd979bdSMark Fasheh 	return ret;
1740ccd979bdSMark Fasheh }
1741ccd979bdSMark Fasheh 
1742ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1743ccd979bdSMark Fasheh {
1744ccd979bdSMark Fasheh 	int status, level;
1745ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1746c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1747ccd979bdSMark Fasheh 
1748b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1749b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1750ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1751ccd979bdSMark Fasheh 
1752c1e8d35eSTao Ma 	if (ocfs2_mount_local(osb))
1753c271c5c2SSunil Mushran 		return 0;
1754c271c5c2SSunil Mushran 
1755ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1756ccd979bdSMark Fasheh 
1757bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1758ccd979bdSMark Fasheh 
1759ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1760ccd979bdSMark Fasheh 				    0);
1761ccd979bdSMark Fasheh 	if (status < 0)
1762ccd979bdSMark Fasheh 		mlog_errno(status);
1763ccd979bdSMark Fasheh 
1764ccd979bdSMark Fasheh 	return status;
1765ccd979bdSMark Fasheh }
1766ccd979bdSMark Fasheh 
176706e7f13dSGang He int ocfs2_try_rw_lock(struct inode *inode, int write)
176806e7f13dSGang He {
176906e7f13dSGang He 	int status, level;
177006e7f13dSGang He 	struct ocfs2_lock_res *lockres;
177106e7f13dSGang He 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
177206e7f13dSGang He 
177306e7f13dSGang He 	mlog(0, "inode %llu try to take %s RW lock\n",
177406e7f13dSGang He 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
177506e7f13dSGang He 	     write ? "EXMODE" : "PRMODE");
177606e7f13dSGang He 
177706e7f13dSGang He 	if (ocfs2_mount_local(osb))
177806e7f13dSGang He 		return 0;
177906e7f13dSGang He 
178006e7f13dSGang He 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
178106e7f13dSGang He 
178206e7f13dSGang He 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
178306e7f13dSGang He 
178406e7f13dSGang He 	status = ocfs2_cluster_lock(osb, lockres, level, DLM_LKF_NOQUEUE, 0);
178506e7f13dSGang He 	return status;
178606e7f13dSGang He }
178706e7f13dSGang He 
1788ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1789ccd979bdSMark Fasheh {
1790bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1791ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1792c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1793ccd979bdSMark Fasheh 
1794b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1795b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1796ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1797ccd979bdSMark Fasheh 
1798c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
1799ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1800ccd979bdSMark Fasheh }
1801ccd979bdSMark Fasheh 
180250008630STiger Yang /*
180350008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
180450008630STiger Yang  */
180550008630STiger Yang int ocfs2_open_lock(struct inode *inode)
180650008630STiger Yang {
180750008630STiger Yang 	int status = 0;
180850008630STiger Yang 	struct ocfs2_lock_res *lockres;
180950008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
181050008630STiger Yang 
181150008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
181250008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
181350008630STiger Yang 
181403efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
181550008630STiger Yang 		goto out;
181650008630STiger Yang 
181750008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
181850008630STiger Yang 
181950008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1820bd3e7610SJoel Becker 				    DLM_LOCK_PR, 0, 0);
182150008630STiger Yang 	if (status < 0)
182250008630STiger Yang 		mlog_errno(status);
182350008630STiger Yang 
182450008630STiger Yang out:
182550008630STiger Yang 	return status;
182650008630STiger Yang }
182750008630STiger Yang 
182850008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
182950008630STiger Yang {
183050008630STiger Yang 	int status = 0, level;
183150008630STiger Yang 	struct ocfs2_lock_res *lockres;
183250008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
183350008630STiger Yang 
183450008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
183550008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
183650008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
183750008630STiger Yang 
183803efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
183903efed8aSTiger Yang 		if (write)
184003efed8aSTiger Yang 			status = -EROFS;
184103efed8aSTiger Yang 		goto out;
184203efed8aSTiger Yang 	}
184303efed8aSTiger Yang 
184450008630STiger Yang 	if (ocfs2_mount_local(osb))
184550008630STiger Yang 		goto out;
184650008630STiger Yang 
184750008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
184850008630STiger Yang 
1849bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
185050008630STiger Yang 
185150008630STiger Yang 	/*
185250008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1853bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
185450008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
185550008630STiger Yang 	 * this inode is still in use.
185650008630STiger Yang 	 */
185750008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1858bd3e7610SJoel Becker 				    level, DLM_LKF_NOQUEUE, 0);
185950008630STiger Yang 
186050008630STiger Yang out:
186150008630STiger Yang 	return status;
186250008630STiger Yang }
186350008630STiger Yang 
186450008630STiger Yang /*
186550008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
186650008630STiger Yang  */
186750008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
186850008630STiger Yang {
186950008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
187050008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
187150008630STiger Yang 
187250008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
187350008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
187450008630STiger Yang 
187550008630STiger Yang 	if (ocfs2_mount_local(osb))
187650008630STiger Yang 		goto out;
187750008630STiger Yang 
187850008630STiger Yang 	if(lockres->l_ro_holders)
187950008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1880bd3e7610SJoel Becker 				     DLM_LOCK_PR);
188150008630STiger Yang 	if(lockres->l_ex_holders)
188250008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1883bd3e7610SJoel Becker 				     DLM_LOCK_EX);
188450008630STiger Yang 
188550008630STiger Yang out:
1886c1e8d35eSTao Ma 	return;
188750008630STiger Yang }
188850008630STiger Yang 
1889cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1890cf8e06f1SMark Fasheh 				     int level)
1891cf8e06f1SMark Fasheh {
1892cf8e06f1SMark Fasheh 	int ret;
1893cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1894cf8e06f1SMark Fasheh 	unsigned long flags;
1895cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1896cf8e06f1SMark Fasheh 
1897cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1898cf8e06f1SMark Fasheh 
1899cf8e06f1SMark Fasheh retry_cancel:
1900cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1901cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1902cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1903cf8e06f1SMark Fasheh 		if (ret) {
1904cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1905cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1906cf8e06f1SMark Fasheh 			if (ret < 0) {
1907cf8e06f1SMark Fasheh 				mlog_errno(ret);
1908cf8e06f1SMark Fasheh 				goto out;
1909cf8e06f1SMark Fasheh 			}
1910cf8e06f1SMark Fasheh 			goto retry_cancel;
1911cf8e06f1SMark Fasheh 		}
1912cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1913cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1914cf8e06f1SMark Fasheh 
1915cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1916cf8e06f1SMark Fasheh 		goto retry_cancel;
1917cf8e06f1SMark Fasheh 	}
1918cf8e06f1SMark Fasheh 
1919cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1920cf8e06f1SMark Fasheh 	/*
1921cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1922cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1923cf8e06f1SMark Fasheh 	 */
1924cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1925cf8e06f1SMark Fasheh 		ret = 0;
1926cf8e06f1SMark Fasheh 
1927cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1928cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1929cf8e06f1SMark Fasheh 
1930cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1931cf8e06f1SMark Fasheh 
1932cf8e06f1SMark Fasheh out:
1933cf8e06f1SMark Fasheh 	return ret;
1934cf8e06f1SMark Fasheh }
1935cf8e06f1SMark Fasheh 
1936cf8e06f1SMark Fasheh /*
1937cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1938cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1939cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
19403ad2f3fbSDaniel Mack  * separate path to the "low-level" dlm calls. In particular:
1941cf8e06f1SMark Fasheh  *
1942cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1943cf8e06f1SMark Fasheh  *   what's been requested.
1944cf8e06f1SMark Fasheh  *
1945cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1946cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1947cf8e06f1SMark Fasheh  *   the blocking list).
1948cf8e06f1SMark Fasheh  *
1949cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1950cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1951cf8e06f1SMark Fasheh  *   request.
1952cf8e06f1SMark Fasheh  *
1953cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1954cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1955cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1956cf8e06f1SMark Fasheh  */
1957cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1958cf8e06f1SMark Fasheh {
1959e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1960e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1961cf8e06f1SMark Fasheh 	unsigned long flags;
1962cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1963cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1964cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1965cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1966cf8e06f1SMark Fasheh 
1967cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1968cf8e06f1SMark Fasheh 
1969cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1970bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1971cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1972cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1973cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1974cf8e06f1SMark Fasheh 		     lockres->l_level);
1975cf8e06f1SMark Fasheh 		return -EINVAL;
1976cf8e06f1SMark Fasheh 	}
1977cf8e06f1SMark Fasheh 
1978cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1979cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1980cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1981cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1982cf8e06f1SMark Fasheh 
1983cf8e06f1SMark Fasheh 		/*
1984cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
1985cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
1986cf8e06f1SMark Fasheh 		 */
1987e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1988cf8e06f1SMark Fasheh 		if (ret < 0) {
1989cf8e06f1SMark Fasheh 			mlog_errno(ret);
1990cf8e06f1SMark Fasheh 			goto out;
1991cf8e06f1SMark Fasheh 		}
1992cf8e06f1SMark Fasheh 
1993cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1994cf8e06f1SMark Fasheh 		if (ret) {
1995cf8e06f1SMark Fasheh 			mlog_errno(ret);
1996cf8e06f1SMark Fasheh 			goto out;
1997cf8e06f1SMark Fasheh 		}
1998cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
1999cf8e06f1SMark Fasheh 	}
2000cf8e06f1SMark Fasheh 
2001cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
2002e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
2003cf8e06f1SMark Fasheh 	lockres->l_requested = level;
2004cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
2005cf8e06f1SMark Fasheh 
2006cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2007cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2008cf8e06f1SMark Fasheh 
20094670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
2010a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
20117431cd7eSJoel Becker 	if (ret) {
20127431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
201324ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
2014cf8e06f1SMark Fasheh 			ret = -EINVAL;
2015cf8e06f1SMark Fasheh 		}
2016cf8e06f1SMark Fasheh 
2017cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
2018cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
2019cf8e06f1SMark Fasheh 		goto out;
2020cf8e06f1SMark Fasheh 	}
2021cf8e06f1SMark Fasheh 
2022cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
2023cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
2024cf8e06f1SMark Fasheh 		/*
2025cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
2026cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
2027cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
2028cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
2029cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
2030cf8e06f1SMark Fasheh 		 * reboot.
2031cf8e06f1SMark Fasheh 		 *
2032cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
2033cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
2034cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
2035cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
2036cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
2037af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
2038cf8e06f1SMark Fasheh 		 */
2039cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
20401693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
20411693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
20421693a5c0SDavid Teigland 		BUG_ON(!trylock);
20431693a5c0SDavid Teigland 		ret = -EAGAIN;
2044cf8e06f1SMark Fasheh 	}
2045cf8e06f1SMark Fasheh 
2046cf8e06f1SMark Fasheh out:
2047cf8e06f1SMark Fasheh 
2048cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
2049cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
2050cf8e06f1SMark Fasheh 	return ret;
2051cf8e06f1SMark Fasheh }
2052cf8e06f1SMark Fasheh 
2053cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
2054cf8e06f1SMark Fasheh {
2055cf8e06f1SMark Fasheh 	int ret;
2056de551246SJoel Becker 	unsigned int gen;
2057cf8e06f1SMark Fasheh 	unsigned long flags;
2058cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
2059cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
2060cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
2061cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
2062cf8e06f1SMark Fasheh 
2063cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
2064cf8e06f1SMark Fasheh 
2065cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
2066cf8e06f1SMark Fasheh 		return;
2067cf8e06f1SMark Fasheh 
2068e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
2069cf8e06f1SMark Fasheh 		return;
2070cf8e06f1SMark Fasheh 
2071cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
2072cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
2073cf8e06f1SMark Fasheh 	     lockres->l_action);
2074cf8e06f1SMark Fasheh 
2075cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2076cf8e06f1SMark Fasheh 	/*
2077cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
2078cf8e06f1SMark Fasheh 	 */
2079cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
2080bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
2081cf8e06f1SMark Fasheh 
2082e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
2083cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2084cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2085cf8e06f1SMark Fasheh 
2086e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
2087cf8e06f1SMark Fasheh 	if (ret) {
2088cf8e06f1SMark Fasheh 		mlog_errno(ret);
2089cf8e06f1SMark Fasheh 		return;
2090cf8e06f1SMark Fasheh 	}
2091cf8e06f1SMark Fasheh 
2092cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
2093cf8e06f1SMark Fasheh 	if (ret)
2094cf8e06f1SMark Fasheh 		mlog_errno(ret);
2095cf8e06f1SMark Fasheh }
2096cf8e06f1SMark Fasheh 
209734d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2098ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
2099ccd979bdSMark Fasheh {
2100ccd979bdSMark Fasheh 	int kick = 0;
2101ccd979bdSMark Fasheh 
2102ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
210334d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
2104ccd979bdSMark Fasheh 	 * condition. */
2105ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
2106ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
2107bd3e7610SJoel Becker 		case DLM_LOCK_EX:
2108ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
2109ccd979bdSMark Fasheh 				kick = 1;
2110ccd979bdSMark Fasheh 			break;
2111bd3e7610SJoel Becker 		case DLM_LOCK_PR:
2112ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
2113ccd979bdSMark Fasheh 				kick = 1;
2114ccd979bdSMark Fasheh 			break;
2115ccd979bdSMark Fasheh 		default:
2116ccd979bdSMark Fasheh 			BUG();
2117ccd979bdSMark Fasheh 		}
2118ccd979bdSMark Fasheh 	}
2119ccd979bdSMark Fasheh 
2120ccd979bdSMark Fasheh 	if (kick)
212134d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
2122ccd979bdSMark Fasheh }
2123ccd979bdSMark Fasheh 
2124ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
2125ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT  (64 - 34)
2126ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
2127ccd979bdSMark Fasheh 
2128ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
2129ccd979bdSMark Fasheh  * now. */
2130ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec)
2131ccd979bdSMark Fasheh {
2132ccd979bdSMark Fasheh 	u64 res;
2133ccd979bdSMark Fasheh 	u64 sec = spec->tv_sec;
2134ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
2135ccd979bdSMark Fasheh 
2136ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2137ccd979bdSMark Fasheh 
2138ccd979bdSMark Fasheh 	return res;
2139ccd979bdSMark Fasheh }
2140ccd979bdSMark Fasheh 
2141ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2142ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2143e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
2144ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2145ccd979bdSMark Fasheh {
2146ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2147e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2148ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2149ccd979bdSMark Fasheh 
2150a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2151ccd979bdSMark Fasheh 
215224c19ef4SMark Fasheh 	/*
215324c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
215424c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
215524c19ef4SMark Fasheh 	 * status.
215624c19ef4SMark Fasheh 	 */
215724c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
215824c19ef4SMark Fasheh 		lvb->lvb_version = 0;
215924c19ef4SMark Fasheh 		goto out;
216024c19ef4SMark Fasheh 	}
216124c19ef4SMark Fasheh 
21624d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2163ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2164ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
216503ab30f7SEric W. Biederman 	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
216603ab30f7SEric W. Biederman 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
2167ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2168ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2169ccd979bdSMark Fasheh 	lvb->lvb_iatime_packed  =
2170ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2171ccd979bdSMark Fasheh 	lvb->lvb_ictime_packed =
2172ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2173ccd979bdSMark Fasheh 	lvb->lvb_imtime_packed =
2174ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2175ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
217615b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2177f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2178ccd979bdSMark Fasheh 
217924c19ef4SMark Fasheh out:
2180ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2181ccd979bdSMark Fasheh }
2182ccd979bdSMark Fasheh 
2183ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec,
2184ccd979bdSMark Fasheh 				  u64 packed_time)
2185ccd979bdSMark Fasheh {
2186ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2187ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2188ccd979bdSMark Fasheh }
2189ccd979bdSMark Fasheh 
2190ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2191ccd979bdSMark Fasheh {
2192ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2193e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2194ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2195ccd979bdSMark Fasheh 
2196ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2197ccd979bdSMark Fasheh 
2198a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2199ccd979bdSMark Fasheh 
2200ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2201ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2202ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2203ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2204ccd979bdSMark Fasheh 
2205ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
220615b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2207ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2208ca4d147eSHerbert Poetzl 
2209ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2210ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2211ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2212ccd979bdSMark Fasheh 	else
22138110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2214ccd979bdSMark Fasheh 
221503ab30f7SEric W. Biederman 	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
221603ab30f7SEric W. Biederman 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2217ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2218bfe86848SMiklos Szeredi 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2219ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_atime,
2220ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_iatime_packed));
2221ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_mtime,
2222ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_imtime_packed));
2223ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_ctime,
2224ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_ictime_packed));
2225ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2226ccd979bdSMark Fasheh }
2227ccd979bdSMark Fasheh 
2228f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2229f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2230ccd979bdSMark Fasheh {
2231a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2232ccd979bdSMark Fasheh 
22331c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
22341c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2235f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2236ccd979bdSMark Fasheh 		return 1;
2237ccd979bdSMark Fasheh 	return 0;
2238ccd979bdSMark Fasheh }
2239ccd979bdSMark Fasheh 
2240ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2241ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2242ccd979bdSMark Fasheh  *
2243ccd979bdSMark Fasheh  *   0 means no refresh needed.
2244ccd979bdSMark Fasheh  *
2245ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2246ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
2247ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2248ccd979bdSMark Fasheh {
2249ccd979bdSMark Fasheh 	unsigned long flags;
2250ccd979bdSMark Fasheh 	int status = 0;
2251ccd979bdSMark Fasheh 
2252ccd979bdSMark Fasheh refresh_check:
2253ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2254ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2255ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2256ccd979bdSMark Fasheh 		goto bail;
2257ccd979bdSMark Fasheh 	}
2258ccd979bdSMark Fasheh 
2259ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2260ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2261ccd979bdSMark Fasheh 
2262ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2263ccd979bdSMark Fasheh 		goto refresh_check;
2264ccd979bdSMark Fasheh 	}
2265ccd979bdSMark Fasheh 
2266ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2267ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2268ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2269ccd979bdSMark Fasheh 
2270ccd979bdSMark Fasheh 	status = 1;
2271ccd979bdSMark Fasheh bail:
2272c1e8d35eSTao Ma 	mlog(0, "status %d\n", status);
2273ccd979bdSMark Fasheh 	return status;
2274ccd979bdSMark Fasheh }
2275ccd979bdSMark Fasheh 
2276ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2277ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
2278ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2279ccd979bdSMark Fasheh 						   int status)
2280ccd979bdSMark Fasheh {
2281ccd979bdSMark Fasheh 	unsigned long flags;
2282ccd979bdSMark Fasheh 
2283ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2284ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2285ccd979bdSMark Fasheh 	if (!status)
2286ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2287ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2288ccd979bdSMark Fasheh 
2289ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2290ccd979bdSMark Fasheh }
2291ccd979bdSMark Fasheh 
2292ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
2293e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2294ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2295ccd979bdSMark Fasheh {
2296ccd979bdSMark Fasheh 	int status = 0;
2297ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2298e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2299ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2300c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2301ccd979bdSMark Fasheh 
2302be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2303be9e986bSMark Fasheh 		goto bail;
2304be9e986bSMark Fasheh 
2305ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2306ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2307b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2308ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2309b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2310ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2311ccd979bdSMark Fasheh 		status = -ENOENT;
2312ccd979bdSMark Fasheh 		goto bail;
2313ccd979bdSMark Fasheh 	}
2314ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2315ccd979bdSMark Fasheh 
2316ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2317ccd979bdSMark Fasheh 		goto bail;
2318ccd979bdSMark Fasheh 
2319ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2320ccd979bdSMark Fasheh 	 * for the inode metadata. */
23218cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2322ccd979bdSMark Fasheh 
232383418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
232483418978SMark Fasheh 
2325be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2326b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2327b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
2328ccd979bdSMark Fasheh 		ocfs2_refresh_inode_from_lvb(inode);
2329ccd979bdSMark Fasheh 	} else {
2330ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2331ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2332b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2333ccd979bdSMark Fasheh 		if (status < 0) {
2334ccd979bdSMark Fasheh 			mlog_errno(status);
2335ccd979bdSMark Fasheh 			goto bail_refresh;
2336ccd979bdSMark Fasheh 		}
2337ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
2338ccd979bdSMark Fasheh 
2339ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2340b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2341b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2342ccd979bdSMark Fasheh 		 *
2343ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2344ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2345ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2346ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2347ccd979bdSMark Fasheh 		 * locks associated with it. */
2348ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2349ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2350b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2351ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2352b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2353b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2354ccd979bdSMark Fasheh 				inode->i_generation);
2355ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2356ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2357b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2358b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2359b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2360ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2361ccd979bdSMark Fasheh 
2362ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
23638ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2364ccd979bdSMark Fasheh 	}
2365ccd979bdSMark Fasheh 
2366ccd979bdSMark Fasheh 	status = 0;
2367ccd979bdSMark Fasheh bail_refresh:
2368ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2369ccd979bdSMark Fasheh bail:
2370ccd979bdSMark Fasheh 	return status;
2371ccd979bdSMark Fasheh }
2372ccd979bdSMark Fasheh 
2373ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2374ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2375ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2376ccd979bdSMark Fasheh {
2377ccd979bdSMark Fasheh 	int status;
2378ccd979bdSMark Fasheh 
2379ccd979bdSMark Fasheh 	if (passed_bh) {
2380ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2381ccd979bdSMark Fasheh 		 * returned bh. */
2382ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2383ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2384ccd979bdSMark Fasheh 
2385ccd979bdSMark Fasheh 		return 0;
2386ccd979bdSMark Fasheh 	}
2387ccd979bdSMark Fasheh 
2388b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2389ccd979bdSMark Fasheh 	if (status < 0)
2390ccd979bdSMark Fasheh 		mlog_errno(status);
2391ccd979bdSMark Fasheh 
2392ccd979bdSMark Fasheh 	return status;
2393ccd979bdSMark Fasheh }
2394ccd979bdSMark Fasheh 
2395ccd979bdSMark Fasheh /*
2396ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2397ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2398ccd979bdSMark Fasheh  */
2399cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2400ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2401ccd979bdSMark Fasheh 				 int ex,
2402cb25797dSJan Kara 				 int arg_flags,
2403cb25797dSJan Kara 				 int subclass)
2404ccd979bdSMark Fasheh {
2405bd3e7610SJoel Becker 	int status, level, acquired;
2406bd3e7610SJoel Becker 	u32 dlm_flags;
2407c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2408ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2409ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2410ccd979bdSMark Fasheh 
2411b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2412b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2413ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2414ccd979bdSMark Fasheh 
2415ccd979bdSMark Fasheh 	status = 0;
2416ccd979bdSMark Fasheh 	acquired = 0;
2417ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2418ccd979bdSMark Fasheh 	 * rodevices. */
2419ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2420ccd979bdSMark Fasheh 		if (ex)
2421ccd979bdSMark Fasheh 			status = -EROFS;
242203efed8aSTiger Yang 		goto getbh;
2423ccd979bdSMark Fasheh 	}
2424ccd979bdSMark Fasheh 
2425439a36b8SEric Ren 	if ((arg_flags & OCFS2_META_LOCK_GETBH) ||
2426439a36b8SEric Ren 	    ocfs2_mount_local(osb))
2427439a36b8SEric Ren 		goto update;
2428c271c5c2SSunil Mushran 
2429ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2430553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2431ccd979bdSMark Fasheh 
2432e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2433bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2434ccd979bdSMark Fasheh 	dlm_flags = 0;
2435ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2436bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2437ccd979bdSMark Fasheh 
2438cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2439cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2440ccd979bdSMark Fasheh 	if (status < 0) {
244141003a7bSZach Brown 		if (status != -EAGAIN)
2442ccd979bdSMark Fasheh 			mlog_errno(status);
2443ccd979bdSMark Fasheh 		goto bail;
2444ccd979bdSMark Fasheh 	}
2445ccd979bdSMark Fasheh 
2446ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2447ccd979bdSMark Fasheh 	acquired = 1;
2448ccd979bdSMark Fasheh 
2449ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2450ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2451ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2452ccd979bdSMark Fasheh 	 * abort the operation. */
2453ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2454553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2455ccd979bdSMark Fasheh 
2456439a36b8SEric Ren update:
245724c19ef4SMark Fasheh 	/*
245824c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
245924c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
246024c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
246124c19ef4SMark Fasheh 	 * and let the caller handle it.
246224c19ef4SMark Fasheh 	 */
246324c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
246424c19ef4SMark Fasheh 		status = 0;
2465c271c5c2SSunil Mushran 		if (lockres)
246624c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
246724c19ef4SMark Fasheh 		goto bail;
246824c19ef4SMark Fasheh 	}
246924c19ef4SMark Fasheh 
2470ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2471e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2472ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2473ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2474ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2475e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2476ccd979bdSMark Fasheh 	if (status < 0) {
2477ccd979bdSMark Fasheh 		if (status != -ENOENT)
2478ccd979bdSMark Fasheh 			mlog_errno(status);
2479ccd979bdSMark Fasheh 		goto bail;
2480ccd979bdSMark Fasheh 	}
248103efed8aSTiger Yang getbh:
2482ccd979bdSMark Fasheh 	if (ret_bh) {
2483ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2484ccd979bdSMark Fasheh 		if (status < 0) {
2485ccd979bdSMark Fasheh 			mlog_errno(status);
2486ccd979bdSMark Fasheh 			goto bail;
2487ccd979bdSMark Fasheh 		}
2488ccd979bdSMark Fasheh 	}
2489ccd979bdSMark Fasheh 
2490ccd979bdSMark Fasheh bail:
2491ccd979bdSMark Fasheh 	if (status < 0) {
2492ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2493ccd979bdSMark Fasheh 			brelse(*ret_bh);
2494ccd979bdSMark Fasheh 			*ret_bh = NULL;
2495ccd979bdSMark Fasheh 		}
2496ccd979bdSMark Fasheh 		if (acquired)
2497e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2498ccd979bdSMark Fasheh 	}
2499ccd979bdSMark Fasheh 
2500ccd979bdSMark Fasheh 	if (local_bh)
2501ccd979bdSMark Fasheh 		brelse(local_bh);
2502ccd979bdSMark Fasheh 
2503ccd979bdSMark Fasheh 	return status;
2504ccd979bdSMark Fasheh }
2505ccd979bdSMark Fasheh 
2506ccd979bdSMark Fasheh /*
250734d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
250834d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
250934d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2510ccd979bdSMark Fasheh  *
2511ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2512ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2513ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2514ccd979bdSMark Fasheh  *
251534d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
251634d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
251734d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
251834d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
251934d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
252034d024f8SMark Fasheh  * immediately retry the aop call.
2521ccd979bdSMark Fasheh  */
2522e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2523ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2524ccd979bdSMark Fasheh 			      int ex,
2525ccd979bdSMark Fasheh 			      struct page *page)
2526ccd979bdSMark Fasheh {
2527ccd979bdSMark Fasheh 	int ret;
2528ccd979bdSMark Fasheh 
2529e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2530ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2531ccd979bdSMark Fasheh 		unlock_page(page);
2532ff26cc10SGang He 		/*
2533ff26cc10SGang He 		 * If we can't get inode lock immediately, we should not return
2534ff26cc10SGang He 		 * directly here, since this will lead to a softlockup problem.
2535ff26cc10SGang He 		 * The method is to get a blocking lock and immediately unlock
2536ff26cc10SGang He 		 * before returning, this can avoid CPU resource waste due to
2537ff26cc10SGang He 		 * lots of retries, and benefits fairness in getting lock.
2538ff26cc10SGang He 		 */
2539ff26cc10SGang He 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2540ff26cc10SGang He 			ocfs2_inode_unlock(inode, ex);
2541ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2542ccd979bdSMark Fasheh 	}
2543ccd979bdSMark Fasheh 
2544ccd979bdSMark Fasheh 	return ret;
2545ccd979bdSMark Fasheh }
2546ccd979bdSMark Fasheh 
2547e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
25487f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
25497f1a37e3STiger Yang 			  int *level)
25507f1a37e3STiger Yang {
25517f1a37e3STiger Yang 	int ret;
25527f1a37e3STiger Yang 
2553e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock(inode, NULL, 0);
25547f1a37e3STiger Yang 	if (ret < 0) {
25557f1a37e3STiger Yang 		mlog_errno(ret);
25567f1a37e3STiger Yang 		return ret;
25577f1a37e3STiger Yang 	}
25587f1a37e3STiger Yang 
25597f1a37e3STiger Yang 	/*
25607f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
25617f1a37e3STiger Yang 	 * otherwise we just get PR lock.
25627f1a37e3STiger Yang 	 */
25637f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
25647f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
25657f1a37e3STiger Yang 
2566e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2567e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, &bh, 1);
25687f1a37e3STiger Yang 		if (ret < 0) {
25697f1a37e3STiger Yang 			mlog_errno(ret);
25707f1a37e3STiger Yang 			return ret;
25717f1a37e3STiger Yang 		}
25727f1a37e3STiger Yang 		*level = 1;
25737f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
25747f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
25757f1a37e3STiger Yang 		if (bh)
25767f1a37e3STiger Yang 			brelse(bh);
25777f1a37e3STiger Yang 	} else
25787f1a37e3STiger Yang 		*level = 0;
25797f1a37e3STiger Yang 
25807f1a37e3STiger Yang 	return ret;
25817f1a37e3STiger Yang }
25827f1a37e3STiger Yang 
2583e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2584ccd979bdSMark Fasheh 		       int ex)
2585ccd979bdSMark Fasheh {
2586bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2587e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2588c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2589ccd979bdSMark Fasheh 
2590b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2591b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2592ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2593ccd979bdSMark Fasheh 
2594c271c5c2SSunil Mushran 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2595c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
2596ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2597ccd979bdSMark Fasheh }
2598ccd979bdSMark Fasheh 
2599439a36b8SEric Ren /*
2600439a36b8SEric Ren  * This _tracker variantes are introduced to deal with the recursive cluster
2601439a36b8SEric Ren  * locking issue. The idea is to keep track of a lock holder on the stack of
2602439a36b8SEric Ren  * the current process. If there's a lock holder on the stack, we know the
2603439a36b8SEric Ren  * task context is already protected by cluster locking. Currently, they're
2604439a36b8SEric Ren  * used in some VFS entry routines.
2605439a36b8SEric Ren  *
2606439a36b8SEric Ren  * return < 0 on error, return == 0 if there's no lock holder on the stack
2607439a36b8SEric Ren  * before this call, return == 1 if this call would be a recursive locking.
2608439a36b8SEric Ren  */
2609439a36b8SEric Ren int ocfs2_inode_lock_tracker(struct inode *inode,
2610439a36b8SEric Ren 			     struct buffer_head **ret_bh,
2611439a36b8SEric Ren 			     int ex,
2612439a36b8SEric Ren 			     struct ocfs2_lock_holder *oh)
2613439a36b8SEric Ren {
2614439a36b8SEric Ren 	int status;
2615439a36b8SEric Ren 	int arg_flags = 0, has_locked;
2616439a36b8SEric Ren 	struct ocfs2_lock_res *lockres;
2617439a36b8SEric Ren 
2618439a36b8SEric Ren 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2619439a36b8SEric Ren 	has_locked = ocfs2_is_locked_by_me(lockres);
2620439a36b8SEric Ren 	/* Just get buffer head if the cluster lock has been taken */
2621439a36b8SEric Ren 	if (has_locked)
2622439a36b8SEric Ren 		arg_flags = OCFS2_META_LOCK_GETBH;
2623439a36b8SEric Ren 
2624439a36b8SEric Ren 	if (likely(!has_locked || ret_bh)) {
2625439a36b8SEric Ren 		status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags);
2626439a36b8SEric Ren 		if (status < 0) {
2627439a36b8SEric Ren 			if (status != -ENOENT)
2628439a36b8SEric Ren 				mlog_errno(status);
2629439a36b8SEric Ren 			return status;
2630439a36b8SEric Ren 		}
2631439a36b8SEric Ren 	}
2632439a36b8SEric Ren 	if (!has_locked)
2633439a36b8SEric Ren 		ocfs2_add_holder(lockres, oh);
2634439a36b8SEric Ren 
2635439a36b8SEric Ren 	return has_locked;
2636439a36b8SEric Ren }
2637439a36b8SEric Ren 
2638439a36b8SEric Ren void ocfs2_inode_unlock_tracker(struct inode *inode,
2639439a36b8SEric Ren 				int ex,
2640439a36b8SEric Ren 				struct ocfs2_lock_holder *oh,
2641439a36b8SEric Ren 				int had_lock)
2642439a36b8SEric Ren {
2643439a36b8SEric Ren 	struct ocfs2_lock_res *lockres;
2644439a36b8SEric Ren 
2645439a36b8SEric Ren 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
26468818efaaSEric Ren 	/* had_lock means that the currect process already takes the cluster
26478818efaaSEric Ren 	 * lock previously. If had_lock is 1, we have nothing to do here, and
26488818efaaSEric Ren 	 * it will get unlocked where we got the lock.
26498818efaaSEric Ren 	 */
2650439a36b8SEric Ren 	if (!had_lock) {
2651439a36b8SEric Ren 		ocfs2_remove_holder(lockres, oh);
2652439a36b8SEric Ren 		ocfs2_inode_unlock(inode, ex);
2653439a36b8SEric Ren 	}
2654439a36b8SEric Ren }
2655439a36b8SEric Ren 
2656df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
265783273932SSrinivas Eeda {
265883273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
265983273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
266083273932SSrinivas Eeda 	int status = 0;
266183273932SSrinivas Eeda 
2662df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2663df152c24SSunil Mushran 		return -EROFS;
2664df152c24SSunil Mushran 
2665df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2666df152c24SSunil Mushran 		return 0;
2667df152c24SSunil Mushran 
266883273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2669df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
267083273932SSrinivas Eeda 	if (status < 0)
267183273932SSrinivas Eeda 		return status;
267283273932SSrinivas Eeda 
267383273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
26741c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
26751c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
267683273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
26773211949fSSunil Mushran 	else
26783211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
26793211949fSSunil Mushran 
268083273932SSrinivas Eeda 	return status;
268183273932SSrinivas Eeda }
268283273932SSrinivas Eeda 
2683df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
268483273932SSrinivas Eeda {
268583273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
268683273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
268783273932SSrinivas Eeda 
2688df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
268983273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
269083273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
269183273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
269283273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2693df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2694df152c24SSunil Mushran 	}
269583273932SSrinivas Eeda }
269683273932SSrinivas Eeda 
2697ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2698ccd979bdSMark Fasheh 		     int ex)
2699ccd979bdSMark Fasheh {
2700c271c5c2SSunil Mushran 	int status = 0;
2701bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2702ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2703ccd979bdSMark Fasheh 
2704ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2705ccd979bdSMark Fasheh 		return -EROFS;
2706ccd979bdSMark Fasheh 
2707c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2708c271c5c2SSunil Mushran 		goto bail;
2709c271c5c2SSunil Mushran 
2710ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2711ccd979bdSMark Fasheh 	if (status < 0) {
2712ccd979bdSMark Fasheh 		mlog_errno(status);
2713ccd979bdSMark Fasheh 		goto bail;
2714ccd979bdSMark Fasheh 	}
2715ccd979bdSMark Fasheh 
2716ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2717ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2718ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2719ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2720ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2721ccd979bdSMark Fasheh 	if (status) {
27228e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2723ccd979bdSMark Fasheh 
2724ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2725ccd979bdSMark Fasheh 
27263278bb74SJunxiao Bi 		if (status < 0) {
27273278bb74SJunxiao Bi 			ocfs2_cluster_unlock(osb, lockres, level);
2728ccd979bdSMark Fasheh 			mlog_errno(status);
27293278bb74SJunxiao Bi 		}
27308ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2731ccd979bdSMark Fasheh 	}
2732ccd979bdSMark Fasheh bail:
2733ccd979bdSMark Fasheh 	return status;
2734ccd979bdSMark Fasheh }
2735ccd979bdSMark Fasheh 
2736ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2737ccd979bdSMark Fasheh 			int ex)
2738ccd979bdSMark Fasheh {
2739bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2740ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2741ccd979bdSMark Fasheh 
2742c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2743ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2744ccd979bdSMark Fasheh }
2745ccd979bdSMark Fasheh 
2746ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2747ccd979bdSMark Fasheh {
2748ccd979bdSMark Fasheh 	int status;
2749ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2750ccd979bdSMark Fasheh 
2751ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2752ccd979bdSMark Fasheh 		return -EROFS;
2753ccd979bdSMark Fasheh 
2754c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2755c271c5c2SSunil Mushran 		return 0;
2756c271c5c2SSunil Mushran 
2757bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2758ccd979bdSMark Fasheh 	if (status < 0)
2759ccd979bdSMark Fasheh 		mlog_errno(status);
2760ccd979bdSMark Fasheh 
2761ccd979bdSMark Fasheh 	return status;
2762ccd979bdSMark Fasheh }
2763ccd979bdSMark Fasheh 
2764ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2765ccd979bdSMark Fasheh {
2766ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2767ccd979bdSMark Fasheh 
2768c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2769bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2770ccd979bdSMark Fasheh }
2771ccd979bdSMark Fasheh 
27726ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
27736ca497a8Swengang wang {
27746ca497a8Swengang wang 	int status;
27756ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
27766ca497a8Swengang wang 
27776ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
27786ca497a8Swengang wang 		return -EROFS;
27796ca497a8Swengang wang 
27806ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
27816ca497a8Swengang wang 		return 0;
27826ca497a8Swengang wang 
27836ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
27846ca497a8Swengang wang 				    0, 0);
27856ca497a8Swengang wang 	if (status < 0)
27866ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
27876ca497a8Swengang wang 
27886ca497a8Swengang wang 	return status;
27896ca497a8Swengang wang }
27906ca497a8Swengang wang 
27916ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
27926ca497a8Swengang wang {
27936ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
27946ca497a8Swengang wang 
27956ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
27966ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
27976ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
27986ca497a8Swengang wang }
27996ca497a8Swengang wang 
28004882abebSGang He int ocfs2_trim_fs_lock(struct ocfs2_super *osb,
28014882abebSGang He 		       struct ocfs2_trim_fs_info *info, int trylock)
28024882abebSGang He {
28034882abebSGang He 	int status;
28044882abebSGang He 	struct ocfs2_trim_fs_lvb *lvb;
28054882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
28064882abebSGang He 
28074882abebSGang He 	if (info)
28084882abebSGang He 		info->tf_valid = 0;
28094882abebSGang He 
28104882abebSGang He 	if (ocfs2_is_hard_readonly(osb))
28114882abebSGang He 		return -EROFS;
28124882abebSGang He 
28134882abebSGang He 	if (ocfs2_mount_local(osb))
28144882abebSGang He 		return 0;
28154882abebSGang He 
28164882abebSGang He 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX,
28174882abebSGang He 				    trylock ? DLM_LKF_NOQUEUE : 0, 0);
28184882abebSGang He 	if (status < 0) {
28194882abebSGang He 		if (status != -EAGAIN)
28204882abebSGang He 			mlog_errno(status);
28214882abebSGang He 		return status;
28224882abebSGang He 	}
28234882abebSGang He 
28244882abebSGang He 	if (info) {
28254882abebSGang He 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
28264882abebSGang He 		if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
28274882abebSGang He 		    lvb->lvb_version == OCFS2_TRIMFS_LVB_VERSION) {
28284882abebSGang He 			info->tf_valid = 1;
28294882abebSGang He 			info->tf_success = lvb->lvb_success;
28304882abebSGang He 			info->tf_nodenum = be32_to_cpu(lvb->lvb_nodenum);
28314882abebSGang He 			info->tf_start = be64_to_cpu(lvb->lvb_start);
28324882abebSGang He 			info->tf_len = be64_to_cpu(lvb->lvb_len);
28334882abebSGang He 			info->tf_minlen = be64_to_cpu(lvb->lvb_minlen);
28344882abebSGang He 			info->tf_trimlen = be64_to_cpu(lvb->lvb_trimlen);
28354882abebSGang He 		}
28364882abebSGang He 	}
28374882abebSGang He 
28384882abebSGang He 	return status;
28394882abebSGang He }
28404882abebSGang He 
28414882abebSGang He void ocfs2_trim_fs_unlock(struct ocfs2_super *osb,
28424882abebSGang He 			  struct ocfs2_trim_fs_info *info)
28434882abebSGang He {
28444882abebSGang He 	struct ocfs2_trim_fs_lvb *lvb;
28454882abebSGang He 	struct ocfs2_lock_res *lockres = &osb->osb_trim_fs_lockres;
28464882abebSGang He 
28474882abebSGang He 	if (ocfs2_mount_local(osb))
28484882abebSGang He 		return;
28494882abebSGang He 
28504882abebSGang He 	if (info) {
28514882abebSGang He 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
28524882abebSGang He 		lvb->lvb_version = OCFS2_TRIMFS_LVB_VERSION;
28534882abebSGang He 		lvb->lvb_success = info->tf_success;
28544882abebSGang He 		lvb->lvb_nodenum = cpu_to_be32(info->tf_nodenum);
28554882abebSGang He 		lvb->lvb_start = cpu_to_be64(info->tf_start);
28564882abebSGang He 		lvb->lvb_len = cpu_to_be64(info->tf_len);
28574882abebSGang He 		lvb->lvb_minlen = cpu_to_be64(info->tf_minlen);
28584882abebSGang He 		lvb->lvb_trimlen = cpu_to_be64(info->tf_trimlen);
28594882abebSGang He 	}
28604882abebSGang He 
28614882abebSGang He 	ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
28624882abebSGang He }
28634882abebSGang He 
2864d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2865d680efe9SMark Fasheh {
2866d680efe9SMark Fasheh 	int ret;
2867bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2868d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2869d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2870d680efe9SMark Fasheh 
2871d680efe9SMark Fasheh 	BUG_ON(!dl);
2872d680efe9SMark Fasheh 
287303efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
287403efed8aSTiger Yang 		if (ex)
2875d680efe9SMark Fasheh 			return -EROFS;
287603efed8aSTiger Yang 		return 0;
287703efed8aSTiger Yang 	}
2878d680efe9SMark Fasheh 
2879c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2880c271c5c2SSunil Mushran 		return 0;
2881c271c5c2SSunil Mushran 
2882d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2883d680efe9SMark Fasheh 	if (ret < 0)
2884d680efe9SMark Fasheh 		mlog_errno(ret);
2885d680efe9SMark Fasheh 
2886d680efe9SMark Fasheh 	return ret;
2887d680efe9SMark Fasheh }
2888d680efe9SMark Fasheh 
2889d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2890d680efe9SMark Fasheh {
2891bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2892d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2893d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2894d680efe9SMark Fasheh 
289503efed8aSTiger Yang 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
2896d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2897d680efe9SMark Fasheh }
2898d680efe9SMark Fasheh 
2899ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
2900ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
2901ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
2902ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
2903ccd979bdSMark Fasheh {
2904ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2905ccd979bdSMark Fasheh 
2906ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2907ccd979bdSMark Fasheh 
2908ccd979bdSMark Fasheh 	kfree(dlm_debug);
2909ccd979bdSMark Fasheh }
2910ccd979bdSMark Fasheh 
2911ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2912ccd979bdSMark Fasheh {
2913ccd979bdSMark Fasheh 	if (dlm_debug)
2914ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2915ccd979bdSMark Fasheh }
2916ccd979bdSMark Fasheh 
2917ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2918ccd979bdSMark Fasheh {
2919ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
2920ccd979bdSMark Fasheh }
2921ccd979bdSMark Fasheh 
2922ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2923ccd979bdSMark Fasheh {
2924ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2925ccd979bdSMark Fasheh 
2926ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2927ccd979bdSMark Fasheh 	if (!dlm_debug) {
2928ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
2929ccd979bdSMark Fasheh 		goto out;
2930ccd979bdSMark Fasheh 	}
2931ccd979bdSMark Fasheh 
2932ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
2933ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2934ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = NULL;
2935ccd979bdSMark Fasheh out:
2936ccd979bdSMark Fasheh 	return dlm_debug;
2937ccd979bdSMark Fasheh }
2938ccd979bdSMark Fasheh 
2939ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
2940ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
2941ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
2942ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
2943ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
2944ccd979bdSMark Fasheh };
2945ccd979bdSMark Fasheh 
2946ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2947ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
2948ccd979bdSMark Fasheh {
2949ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
2950ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2951ccd979bdSMark Fasheh 
2952ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
2953ccd979bdSMark Fasheh 
2954ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2955ccd979bdSMark Fasheh 		/* discover the head of the list */
2956ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2957ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
2958ccd979bdSMark Fasheh 			break;
2959ccd979bdSMark Fasheh 		}
2960ccd979bdSMark Fasheh 
2961ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
2962ccd979bdSMark Fasheh 		 * l_ops field. */
2963ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
2964ccd979bdSMark Fasheh 			ret = iter;
2965ccd979bdSMark Fasheh 			break;
2966ccd979bdSMark Fasheh 		}
2967ccd979bdSMark Fasheh 	}
2968ccd979bdSMark Fasheh 
2969ccd979bdSMark Fasheh 	return ret;
2970ccd979bdSMark Fasheh }
2971ccd979bdSMark Fasheh 
2972ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2973ccd979bdSMark Fasheh {
2974ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2975ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
2976ccd979bdSMark Fasheh 
2977ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2978ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2979ccd979bdSMark Fasheh 	if (iter) {
2980ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
2981ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
2982ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
2983ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
2984ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
2985ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
2986ccd979bdSMark Fasheh 		 * in them. */
2987ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2988ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2989ccd979bdSMark Fasheh 	}
2990ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2991ccd979bdSMark Fasheh 
2992ccd979bdSMark Fasheh 	return iter;
2993ccd979bdSMark Fasheh }
2994ccd979bdSMark Fasheh 
2995ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2996ccd979bdSMark Fasheh {
2997ccd979bdSMark Fasheh }
2998ccd979bdSMark Fasheh 
2999ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
3000ccd979bdSMark Fasheh {
3001ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
3002ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
3003ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
3004ccd979bdSMark Fasheh 
3005ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
3006ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
3007ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
3008ccd979bdSMark Fasheh 	if (iter) {
3009ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
3010ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
3011ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
3012ccd979bdSMark Fasheh 	}
3013ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
3014ccd979bdSMark Fasheh 
3015ccd979bdSMark Fasheh 	return iter;
3016ccd979bdSMark Fasheh }
3017ccd979bdSMark Fasheh 
30185bc970e8SSunil Mushran /*
30195bc970e8SSunil Mushran  * Version is used by debugfs.ocfs2 to determine the format being used
30205bc970e8SSunil Mushran  *
30215bc970e8SSunil Mushran  * New in version 2
30225bc970e8SSunil Mushran  *	- Lock stats printed
30235bc970e8SSunil Mushran  * New in version 3
30245bc970e8SSunil Mushran  *	- Max time in lock stats is in usecs (instead of nsecs)
30255bc970e8SSunil Mushran  */
30265bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3
3027ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
3028ccd979bdSMark Fasheh {
3029ccd979bdSMark Fasheh 	int i;
3030ccd979bdSMark Fasheh 	char *lvb;
3031ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
3032ccd979bdSMark Fasheh 
3033ccd979bdSMark Fasheh 	if (!lockres)
3034ccd979bdSMark Fasheh 		return -EINVAL;
3035ccd979bdSMark Fasheh 
3036d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
3037d680efe9SMark Fasheh 
3038d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
3039d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
3040d680efe9SMark Fasheh 			   lockres->l_name,
3041d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
3042d680efe9SMark Fasheh 	else
3043d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
3044d680efe9SMark Fasheh 
3045d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
3046ccd979bdSMark Fasheh 		   "0x%lx\t"
3047ccd979bdSMark Fasheh 		   "0x%x\t"
3048ccd979bdSMark Fasheh 		   "0x%x\t"
3049ccd979bdSMark Fasheh 		   "%u\t"
3050ccd979bdSMark Fasheh 		   "%u\t"
3051ccd979bdSMark Fasheh 		   "%d\t"
3052ccd979bdSMark Fasheh 		   "%d\t",
3053ccd979bdSMark Fasheh 		   lockres->l_level,
3054ccd979bdSMark Fasheh 		   lockres->l_flags,
3055ccd979bdSMark Fasheh 		   lockres->l_action,
3056ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
3057ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
3058ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
3059ccd979bdSMark Fasheh 		   lockres->l_requested,
3060ccd979bdSMark Fasheh 		   lockres->l_blocking);
3061ccd979bdSMark Fasheh 
3062ccd979bdSMark Fasheh 	/* Dump the raw LVB */
30638f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3064ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
3065ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
3066ccd979bdSMark Fasheh 
30678ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
30685bc970e8SSunil Mushran # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
30695bc970e8SSunil Mushran # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
30705bc970e8SSunil Mushran # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
30715bc970e8SSunil Mushran # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
30725bc970e8SSunil Mushran # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
30735bc970e8SSunil Mushran # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
30745bc970e8SSunil Mushran # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
30755bc970e8SSunil Mushran # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
30765bc970e8SSunil Mushran # define lock_refresh(_l)		((_l)->l_lock_refresh)
30778ddb7b00SSunil Mushran #else
30785bc970e8SSunil Mushran # define lock_num_prmode(_l)		(0)
30795bc970e8SSunil Mushran # define lock_num_exmode(_l)		(0)
30808ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
30818ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
3082dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
3083dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
30848ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
30858ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
30868ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
30878ddb7b00SSunil Mushran #endif
30888ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
30895bc970e8SSunil Mushran 	seq_printf(m, "%u\t"
30905bc970e8SSunil Mushran 		   "%u\t"
30918ddb7b00SSunil Mushran 		   "%u\t"
30928ddb7b00SSunil Mushran 		   "%u\t"
30938ddb7b00SSunil Mushran 		   "%llu\t"
30948ddb7b00SSunil Mushran 		   "%llu\t"
30958ddb7b00SSunil Mushran 		   "%u\t"
30968ddb7b00SSunil Mushran 		   "%u\t"
30978ddb7b00SSunil Mushran 		   "%u\t",
30988ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
30998ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
31008ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
31018ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
31028ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
31038ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
31048ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
31058ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
31068ddb7b00SSunil Mushran 		   lock_refresh(lockres));
31078ddb7b00SSunil Mushran 
3108ccd979bdSMark Fasheh 	/* End the line */
3109ccd979bdSMark Fasheh 	seq_printf(m, "\n");
3110ccd979bdSMark Fasheh 	return 0;
3111ccd979bdSMark Fasheh }
3112ccd979bdSMark Fasheh 
311390d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
3114ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
3115ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
3116ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
3117ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
3118ccd979bdSMark Fasheh };
3119ccd979bdSMark Fasheh 
3120ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
3121ccd979bdSMark Fasheh {
312233fa1d90SJoe Perches 	struct seq_file *seq = file->private_data;
3123ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
3124ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
3125ccd979bdSMark Fasheh 
3126ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
3127ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
3128ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
3129ccd979bdSMark Fasheh }
3130ccd979bdSMark Fasheh 
3131ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
3132ccd979bdSMark Fasheh {
3133ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
3134ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
3135ccd979bdSMark Fasheh 
31361848cb55SRob Jones 	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
3137ccd979bdSMark Fasheh 	if (!priv) {
31381848cb55SRob Jones 		mlog_errno(-ENOMEM);
31391848cb55SRob Jones 		return -ENOMEM;
3140ccd979bdSMark Fasheh 	}
31411848cb55SRob Jones 
31428e18e294STheodore Ts'o 	osb = inode->i_private;
3143ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
3144ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
3145ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
3146ccd979bdSMark Fasheh 
3147ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
3148ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
3149ccd979bdSMark Fasheh 
31501848cb55SRob Jones 	return 0;
3151ccd979bdSMark Fasheh }
3152ccd979bdSMark Fasheh 
31534b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
3154ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
3155ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
3156ccd979bdSMark Fasheh 	.read =		seq_read,
3157ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
3158ccd979bdSMark Fasheh };
3159ccd979bdSMark Fasheh 
3160ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
3161ccd979bdSMark Fasheh {
3162ccd979bdSMark Fasheh 	int ret = 0;
3163ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3164ccd979bdSMark Fasheh 
3165ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
3166ccd979bdSMark Fasheh 							 S_IFREG|S_IRUSR,
3167ccd979bdSMark Fasheh 							 osb->osb_debug_root,
3168ccd979bdSMark Fasheh 							 osb,
3169ccd979bdSMark Fasheh 							 &ocfs2_dlm_debug_fops);
31708f443e23SLinus Torvalds 	if (!dlm_debug->d_locking_state) {
3171ccd979bdSMark Fasheh 		ret = -EINVAL;
3172ccd979bdSMark Fasheh 		mlog(ML_ERROR,
3173ccd979bdSMark Fasheh 		     "Unable to create locking state debugfs file.\n");
3174ccd979bdSMark Fasheh 		goto out;
3175ccd979bdSMark Fasheh 	}
3176ccd979bdSMark Fasheh 
3177ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(dlm_debug);
3178ccd979bdSMark Fasheh out:
3179ccd979bdSMark Fasheh 	return ret;
3180ccd979bdSMark Fasheh }
3181ccd979bdSMark Fasheh 
3182ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
3183ccd979bdSMark Fasheh {
3184ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
3185ccd979bdSMark Fasheh 
3186ccd979bdSMark Fasheh 	if (dlm_debug) {
3187ccd979bdSMark Fasheh 		debugfs_remove(dlm_debug->d_locking_state);
3188ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
3189ccd979bdSMark Fasheh 	}
3190ccd979bdSMark Fasheh }
3191ccd979bdSMark Fasheh 
3192ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
3193ccd979bdSMark Fasheh {
3194c271c5c2SSunil Mushran 	int status = 0;
31954670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
3196ccd979bdSMark Fasheh 
31970abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
31980abd6d18SMark Fasheh 		osb->node_num = 0;
3199c271c5c2SSunil Mushran 		goto local;
32000abd6d18SMark Fasheh 	}
3201c271c5c2SSunil Mushran 
3202ccd979bdSMark Fasheh 	status = ocfs2_dlm_init_debug(osb);
3203ccd979bdSMark Fasheh 	if (status < 0) {
3204ccd979bdSMark Fasheh 		mlog_errno(status);
3205ccd979bdSMark Fasheh 		goto bail;
3206ccd979bdSMark Fasheh 	}
3207ccd979bdSMark Fasheh 
320834d024f8SMark Fasheh 	/* launch downconvert thread */
32095afc44e2SJoseph Qi 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
32105afc44e2SJoseph Qi 			osb->uuid_str);
321134d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
321234d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
321334d024f8SMark Fasheh 		osb->dc_task = NULL;
3214ccd979bdSMark Fasheh 		mlog_errno(status);
3215ccd979bdSMark Fasheh 		goto bail;
3216ccd979bdSMark Fasheh 	}
3217ccd979bdSMark Fasheh 
3218ccd979bdSMark Fasheh 	/* for now, uuid == domain */
32199c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
3220c74a3bddSGoldwyn Rodrigues 				       osb->osb_cluster_name,
3221c74a3bddSGoldwyn Rodrigues 				       strlen(osb->osb_cluster_name),
32229c6c877cSJoel Becker 				       osb->uuid_str,
32234670c46dSJoel Becker 				       strlen(osb->uuid_str),
3224553b5eb9SJoel Becker 				       &lproto, ocfs2_do_node_down, osb,
32254670c46dSJoel Becker 				       &conn);
32264670c46dSJoel Becker 	if (status) {
3227ccd979bdSMark Fasheh 		mlog_errno(status);
3228ccd979bdSMark Fasheh 		goto bail;
3229ccd979bdSMark Fasheh 	}
3230ccd979bdSMark Fasheh 
32313e834151SGoldwyn Rodrigues 	status = ocfs2_cluster_this_node(conn, &osb->node_num);
32320abd6d18SMark Fasheh 	if (status < 0) {
32330abd6d18SMark Fasheh 		mlog_errno(status);
32340abd6d18SMark Fasheh 		mlog(ML_ERROR,
32350abd6d18SMark Fasheh 		     "could not find this host's node number\n");
3236286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
32370abd6d18SMark Fasheh 		goto bail;
32380abd6d18SMark Fasheh 	}
32390abd6d18SMark Fasheh 
3240c271c5c2SSunil Mushran local:
3241ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3242ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
32436ca497a8Swengang wang 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
324483273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3245ccd979bdSMark Fasheh 
32464670c46dSJoel Becker 	osb->cconn = conn;
3247ccd979bdSMark Fasheh bail:
3248ccd979bdSMark Fasheh 	if (status < 0) {
3249ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
325034d024f8SMark Fasheh 		if (osb->dc_task)
325134d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3252ccd979bdSMark Fasheh 	}
3253ccd979bdSMark Fasheh 
3254ccd979bdSMark Fasheh 	return status;
3255ccd979bdSMark Fasheh }
3256ccd979bdSMark Fasheh 
3257286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3258286eaa95SJoel Becker 			int hangup_pending)
3259ccd979bdSMark Fasheh {
3260ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3261ccd979bdSMark Fasheh 
32624670c46dSJoel Becker 	/*
32634670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
32644670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
32654670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
32664670c46dSJoel Becker 	 */
32674670c46dSJoel Becker 
326834d024f8SMark Fasheh 	if (osb->dc_task) {
326934d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
327034d024f8SMark Fasheh 		osb->dc_task = NULL;
3271ccd979bdSMark Fasheh 	}
3272ccd979bdSMark Fasheh 
3273ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3274ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
32756ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
327683273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3277ccd979bdSMark Fasheh 
3278286eaa95SJoel Becker 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
32794670c46dSJoel Becker 	osb->cconn = NULL;
3280ccd979bdSMark Fasheh 
3281ccd979bdSMark Fasheh 	ocfs2_dlm_shutdown_debug(osb);
3282ccd979bdSMark Fasheh }
3283ccd979bdSMark Fasheh 
3284ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
32850d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3286ccd979bdSMark Fasheh {
32877431cd7eSJoel Becker 	int ret;
3288ccd979bdSMark Fasheh 	unsigned long flags;
3289bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3290ccd979bdSMark Fasheh 
3291ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3292ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3293ccd979bdSMark Fasheh 		goto out;
3294ccd979bdSMark Fasheh 
3295b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3296bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3297b80fc012SMark Fasheh 
3298ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3299ccd979bdSMark Fasheh 
3300ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3301ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3302ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3303ccd979bdSMark Fasheh 
3304ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3305ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3306ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3307ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3308ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3309ccd979bdSMark Fasheh 
3310ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3311ccd979bdSMark Fasheh 
3312ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3313ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3314ccd979bdSMark Fasheh 		 * future? */
3315ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3316ccd979bdSMark Fasheh 
3317ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3318ccd979bdSMark Fasheh 	}
3319ccd979bdSMark Fasheh 
33200d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
33210d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3322bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
33230d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
33240d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
33250d5dc6c2SMark Fasheh 	}
3326ccd979bdSMark Fasheh 
3327ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3328ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3329ccd979bdSMark Fasheh 		     lockres->l_name);
3330ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3331ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3332ccd979bdSMark Fasheh 
3333ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3334ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3335ccd979bdSMark Fasheh 		goto out;
3336ccd979bdSMark Fasheh 	}
3337ccd979bdSMark Fasheh 
3338ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3339ccd979bdSMark Fasheh 
3340ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3341ccd979bdSMark Fasheh 	 * fire. */
3342ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3343ccd979bdSMark Fasheh 
3344ccd979bdSMark Fasheh 	/* is this necessary? */
3345ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3346ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3347ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3348ccd979bdSMark Fasheh 
3349ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3350ccd979bdSMark Fasheh 
3351a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
33527431cd7eSJoel Becker 	if (ret) {
33537431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3354ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3355cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3356ccd979bdSMark Fasheh 		BUG();
3357ccd979bdSMark Fasheh 	}
335873ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3359ccd979bdSMark Fasheh 	     lockres->l_name);
3360ccd979bdSMark Fasheh 
3361ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3362ccd979bdSMark Fasheh out:
3363ccd979bdSMark Fasheh 	return 0;
3364ccd979bdSMark Fasheh }
3365ccd979bdSMark Fasheh 
336684d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
336784d86f83SJan Kara 				       struct ocfs2_lock_res *lockres);
336884d86f83SJan Kara 
3369ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3370ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
337134d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3372ccd979bdSMark Fasheh  * it safe to drop.
3373ccd979bdSMark Fasheh  *
3374ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
337584d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
337684d86f83SJan Kara 				struct ocfs2_lock_res *lockres)
3377ccd979bdSMark Fasheh {
3378ccd979bdSMark Fasheh 	int status;
3379ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
338084d86f83SJan Kara 	unsigned long flags, flags2;
3381ccd979bdSMark Fasheh 
3382ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3383ccd979bdSMark Fasheh 
3384ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3385ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
338684d86f83SJan Kara 	if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
338784d86f83SJan Kara 		/*
338884d86f83SJan Kara 		 * We know the downconvert is queued but not in progress
338984d86f83SJan Kara 		 * because we are the downconvert thread and processing
339084d86f83SJan Kara 		 * different lock. So we can just remove the lock from the
339184d86f83SJan Kara 		 * queue. This is not only an optimization but also a way
339284d86f83SJan Kara 		 * to avoid the following deadlock:
339384d86f83SJan Kara 		 *   ocfs2_dentry_post_unlock()
339484d86f83SJan Kara 		 *     ocfs2_dentry_lock_put()
339584d86f83SJan Kara 		 *       ocfs2_drop_dentry_lock()
339684d86f83SJan Kara 		 *         iput()
339784d86f83SJan Kara 		 *           ocfs2_evict_inode()
339884d86f83SJan Kara 		 *             ocfs2_clear_inode()
339984d86f83SJan Kara 		 *               ocfs2_mark_lockres_freeing()
340084d86f83SJan Kara 		 *                 ... blocks waiting for OCFS2_LOCK_QUEUED
340184d86f83SJan Kara 		 *                 since we are the downconvert thread which
340284d86f83SJan Kara 		 *                 should clear the flag.
340384d86f83SJan Kara 		 */
340484d86f83SJan Kara 		spin_unlock_irqrestore(&lockres->l_lock, flags);
340584d86f83SJan Kara 		spin_lock_irqsave(&osb->dc_task_lock, flags2);
340684d86f83SJan Kara 		list_del_init(&lockres->l_blocked_list);
340784d86f83SJan Kara 		osb->blocked_lock_count--;
340884d86f83SJan Kara 		spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
340984d86f83SJan Kara 		/*
341084d86f83SJan Kara 		 * Warn if we recurse into another post_unlock call.  Strictly
341184d86f83SJan Kara 		 * speaking it isn't a problem but we need to be careful if
341284d86f83SJan Kara 		 * that happens (stack overflow, deadlocks, ...) so warn if
341384d86f83SJan Kara 		 * ocfs2 grows a path for which this can happen.
341484d86f83SJan Kara 		 */
341584d86f83SJan Kara 		WARN_ON_ONCE(lockres->l_ops->post_unlock);
341684d86f83SJan Kara 		/* Since the lock is freeing we don't do much in the fn below */
341784d86f83SJan Kara 		ocfs2_process_blocked_lock(osb, lockres);
341884d86f83SJan Kara 		return;
341984d86f83SJan Kara 	}
3420ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3421ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3422ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3423ccd979bdSMark Fasheh 
3424ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3425ccd979bdSMark Fasheh 
3426ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3427ccd979bdSMark Fasheh 		if (status)
3428ccd979bdSMark Fasheh 			mlog_errno(status);
3429ccd979bdSMark Fasheh 
3430ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3431ccd979bdSMark Fasheh 	}
3432ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3433ccd979bdSMark Fasheh }
3434ccd979bdSMark Fasheh 
3435d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3436d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3437d680efe9SMark Fasheh {
3438d680efe9SMark Fasheh 	int ret;
3439d680efe9SMark Fasheh 
344084d86f83SJan Kara 	ocfs2_mark_lockres_freeing(osb, lockres);
34410d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3442d680efe9SMark Fasheh 	if (ret)
3443d680efe9SMark Fasheh 		mlog_errno(ret);
3444d680efe9SMark Fasheh }
3445d680efe9SMark Fasheh 
3446ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3447ccd979bdSMark Fasheh {
3448d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3449d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
34506ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
345183273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3452ccd979bdSMark Fasheh }
3453ccd979bdSMark Fasheh 
3454ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3455ccd979bdSMark Fasheh {
3456ccd979bdSMark Fasheh 	int status, err;
3457ccd979bdSMark Fasheh 
3458ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3459ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3460ccd979bdSMark Fasheh 
3461ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
346250008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3463ccd979bdSMark Fasheh 	if (err < 0)
3464ccd979bdSMark Fasheh 		mlog_errno(err);
3465ccd979bdSMark Fasheh 
3466ccd979bdSMark Fasheh 	status = err;
3467ccd979bdSMark Fasheh 
3468ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3469e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3470ccd979bdSMark Fasheh 	if (err < 0)
3471ccd979bdSMark Fasheh 		mlog_errno(err);
3472ccd979bdSMark Fasheh 	if (err < 0 && !status)
3473ccd979bdSMark Fasheh 		status = err;
3474ccd979bdSMark Fasheh 
3475ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
34760d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3477ccd979bdSMark Fasheh 	if (err < 0)
3478ccd979bdSMark Fasheh 		mlog_errno(err);
3479ccd979bdSMark Fasheh 	if (err < 0 && !status)
3480ccd979bdSMark Fasheh 		status = err;
3481ccd979bdSMark Fasheh 
3482ccd979bdSMark Fasheh 	return status;
3483ccd979bdSMark Fasheh }
3484ccd979bdSMark Fasheh 
3485de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3486ccd979bdSMark Fasheh 					      int new_level)
3487ccd979bdSMark Fasheh {
3488ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3489ccd979bdSMark Fasheh 
3490bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3491ccd979bdSMark Fasheh 
3492ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
34939b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
34949b915181SSunil Mushran 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
34959b915181SSunil Mushran 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
34969b915181SSunil Mushran 		     new_level, list_empty(&lockres->l_blocked_list),
34979b915181SSunil Mushran 		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
34989b915181SSunil Mushran 		     lockres->l_flags, lockres->l_ro_holders,
34999b915181SSunil Mushran 		     lockres->l_ex_holders, lockres->l_action,
35009b915181SSunil Mushran 		     lockres->l_unlock_action, lockres->l_requested,
35019b915181SSunil Mushran 		     lockres->l_blocking, lockres->l_pending_gen);
3502ccd979bdSMark Fasheh 		BUG();
3503ccd979bdSMark Fasheh 	}
3504ccd979bdSMark Fasheh 
35059b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
35069b915181SSunil Mushran 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3507ccd979bdSMark Fasheh 
3508ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3509ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3510ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3511de551246SJoel Becker 	return lockres_set_pending(lockres);
3512ccd979bdSMark Fasheh }
3513ccd979bdSMark Fasheh 
3514ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3515ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3516ccd979bdSMark Fasheh 				  int new_level,
3517de551246SJoel Becker 				  int lvb,
3518de551246SJoel Becker 				  unsigned int generation)
3519ccd979bdSMark Fasheh {
3520bd3e7610SJoel Becker 	int ret;
3521bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3522ccd979bdSMark Fasheh 
35239b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
35249b915181SSunil Mushran 	     lockres->l_level, new_level);
35259b915181SSunil Mushran 
3526e7ee2c08SEric Ren 	/*
3527e7ee2c08SEric Ren 	 * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
3528e7ee2c08SEric Ren 	 * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
3529e7ee2c08SEric Ren 	 * we can recover correctly from node failure. Otherwise, we may get
3530e7ee2c08SEric Ren 	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
3531e7ee2c08SEric Ren 	 */
3532e7ee2c08SEric Ren 	if (!ocfs2_is_o2cb_active() &&
3533e7ee2c08SEric Ren 	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3534e7ee2c08SEric Ren 		lvb = 1;
3535e7ee2c08SEric Ren 
3536ccd979bdSMark Fasheh 	if (lvb)
3537bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3538ccd979bdSMark Fasheh 
35394670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3540ccd979bdSMark Fasheh 			     new_level,
3541ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3542ccd979bdSMark Fasheh 			     dlm_flags,
3543ccd979bdSMark Fasheh 			     lockres->l_name,
3544a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3545de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
35467431cd7eSJoel Becker 	if (ret) {
35477431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3548ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3549ccd979bdSMark Fasheh 		goto bail;
3550ccd979bdSMark Fasheh 	}
3551ccd979bdSMark Fasheh 
3552ccd979bdSMark Fasheh 	ret = 0;
3553ccd979bdSMark Fasheh bail:
3554ccd979bdSMark Fasheh 	return ret;
3555ccd979bdSMark Fasheh }
3556ccd979bdSMark Fasheh 
355724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3558ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3559ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3560ccd979bdSMark Fasheh {
3561ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3562ccd979bdSMark Fasheh 
3563ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3564ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3565ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3566ccd979bdSMark Fasheh 		 * requeue this lock. */
35679b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3568ccd979bdSMark Fasheh 		return 0;
3569ccd979bdSMark Fasheh 	}
3570ccd979bdSMark Fasheh 
3571ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3572ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3573ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3574ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3575ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3576ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3577ccd979bdSMark Fasheh 
3578ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3579ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3580ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3581ccd979bdSMark Fasheh 
35829b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
35839b915181SSunil Mushran 
3584ccd979bdSMark Fasheh 	return 1;
3585ccd979bdSMark Fasheh }
3586ccd979bdSMark Fasheh 
3587ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3588ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3589ccd979bdSMark Fasheh {
3590ccd979bdSMark Fasheh 	int ret;
3591ccd979bdSMark Fasheh 
35924670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3593a796d286SJoel Becker 			       DLM_LKF_CANCEL);
35947431cd7eSJoel Becker 	if (ret) {
35957431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3596ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3597ccd979bdSMark Fasheh 	}
3598ccd979bdSMark Fasheh 
35999b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3600ccd979bdSMark Fasheh 
3601ccd979bdSMark Fasheh 	return ret;
3602ccd979bdSMark Fasheh }
3603ccd979bdSMark Fasheh 
3604b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3605ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3606cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3607ccd979bdSMark Fasheh {
3608ccd979bdSMark Fasheh 	unsigned long flags;
3609ccd979bdSMark Fasheh 	int blocking;
3610ccd979bdSMark Fasheh 	int new_level;
3611079b8057SSunil Mushran 	int level;
3612ccd979bdSMark Fasheh 	int ret = 0;
36135ef0d4eaSMark Fasheh 	int set_lvb = 0;
3614de551246SJoel Becker 	unsigned int gen;
3615ccd979bdSMark Fasheh 
3616ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3617ccd979bdSMark Fasheh 
3618ccd979bdSMark Fasheh recheck:
3619db0f6ce6SSunil Mushran 	/*
3620db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3621db0f6ce6SSunil Mushran 	 */
3622db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3623db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3624db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3625db0f6ce6SSunil Mushran 		ret = 0;
3626db0f6ce6SSunil Mushran 		goto leave;
3627db0f6ce6SSunil Mushran 	}
3628db0f6ce6SSunil Mushran 
3629ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3630de551246SJoel Becker 		/* XXX
3631de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3632de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3633de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3634de551246SJoel Becker 		 *
3635de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3636de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3637de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3638de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3639de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3640de551246SJoel Becker 		 *
3641de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3642de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3643de551246SJoel Becker 		 *
3644de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3645de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3646de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3647de551246SJoel Becker 		 * we then cancel their request.
3648de551246SJoel Becker 		 *
3649de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3650de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3651de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3652de551246SJoel Becker 		 */
36539b915181SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
36549b915181SSunil Mushran 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
36559b915181SSunil Mushran 			     lockres->l_name);
3656de551246SJoel Becker 			goto leave_requeue;
36579b915181SSunil Mushran 		}
3658de551246SJoel Becker 
3659d680efe9SMark Fasheh 		ctl->requeue = 1;
3660ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3661ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3662ccd979bdSMark Fasheh 		if (ret) {
3663ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3664ccd979bdSMark Fasheh 			if (ret < 0)
3665ccd979bdSMark Fasheh 				mlog_errno(ret);
3666ccd979bdSMark Fasheh 		}
3667ccd979bdSMark Fasheh 		goto leave;
3668ccd979bdSMark Fasheh 	}
3669ccd979bdSMark Fasheh 
3670a1912826SSunil Mushran 	/*
3671a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3672a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3673a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3674a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3675a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3676a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3677a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3678a1912826SSunil Mushran 	 */
3679a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3680a1912826SSunil Mushran 		goto leave_requeue;
3681a1912826SSunil Mushran 
36820d74125aSSunil Mushran 	/*
36830d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
36840d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
36850d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
36860d74125aSSunil Mushran 	 */
36870d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
36880d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
36899b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
36900d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
36910d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
36920d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
36930d74125aSSunil Mushran 		goto leave;
36940d74125aSSunil Mushran 	}
36950d74125aSSunil Mushran 
3696ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3697ccd979bdSMark Fasheh 	 * then requeue. */
3698bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
36999b915181SSunil Mushran 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
37009b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
37019b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders,
37029b915181SSunil Mushran 		     lockres->l_ro_holders);
3703f7fbfdd1SMark Fasheh 		goto leave_requeue;
37049b915181SSunil Mushran 	}
3705ccd979bdSMark Fasheh 
3706ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3707ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3708bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
37099b915181SSunil Mushran 	    lockres->l_ex_holders) {
37109b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
37119b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders);
3712f7fbfdd1SMark Fasheh 		goto leave_requeue;
37139b915181SSunil Mushran 	}
3714f7fbfdd1SMark Fasheh 
3715f7fbfdd1SMark Fasheh 	/*
3716f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3717f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3718f7fbfdd1SMark Fasheh 	 */
3719f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
37209b915181SSunil Mushran 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
37219b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
37229b915181SSunil Mushran 		     lockres->l_name);
3723f7fbfdd1SMark Fasheh 		goto leave_requeue;
37249b915181SSunil Mushran 	}
3725ccd979bdSMark Fasheh 
372616d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
372716d5b956SMark Fasheh 
372816d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
37299b915181SSunil Mushran 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
37309b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
37319b915181SSunil Mushran 		     lockres->l_name);
373216d5b956SMark Fasheh 		goto leave_requeue;
37339b915181SSunil Mushran 	}
373416d5b956SMark Fasheh 
3735ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3736ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3737ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3738cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3739ccd979bdSMark Fasheh 		goto downconvert;
3740ccd979bdSMark Fasheh 
3741ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3742ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3743ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3744ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3745ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3746079b8057SSunil Mushran 	level = lockres->l_level;
3747ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3748ccd979bdSMark Fasheh 
3749cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3750d680efe9SMark Fasheh 
37519b915181SSunil Mushran 	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
37529b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
37539b915181SSunil Mushran 		     lockres->l_name);
3754d680efe9SMark Fasheh 		goto leave;
37559b915181SSunil Mushran 	}
3756ccd979bdSMark Fasheh 
3757ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3758079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3759ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3760ccd979bdSMark Fasheh 		 * it just yet. */
37619b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
37629b915181SSunil Mushran 		     "Recheck\n", lockres->l_name, blocking,
37639b915181SSunil Mushran 		     lockres->l_blocking, level, lockres->l_level);
3764ccd979bdSMark Fasheh 		goto recheck;
3765ccd979bdSMark Fasheh 	}
3766ccd979bdSMark Fasheh 
3767ccd979bdSMark Fasheh downconvert:
3768d680efe9SMark Fasheh 	ctl->requeue = 0;
3769ccd979bdSMark Fasheh 
37705ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3771bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
37725ef0d4eaSMark Fasheh 			set_lvb = 1;
37735ef0d4eaSMark Fasheh 
37745ef0d4eaSMark Fasheh 		/*
37755ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
37765ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
37775ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
37785ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
37795ef0d4eaSMark Fasheh 		 */
37805ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
37815ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
37825ef0d4eaSMark Fasheh 	}
37835ef0d4eaSMark Fasheh 
3784de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3785ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3786de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3787de551246SJoel Becker 				     gen);
3788de551246SJoel Becker 
3789ccd979bdSMark Fasheh leave:
3790c1e8d35eSTao Ma 	if (ret)
3791c1e8d35eSTao Ma 		mlog_errno(ret);
3792ccd979bdSMark Fasheh 	return ret;
3793f7fbfdd1SMark Fasheh 
3794f7fbfdd1SMark Fasheh leave_requeue:
3795f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3796f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3797f7fbfdd1SMark Fasheh 
3798f7fbfdd1SMark Fasheh 	return 0;
3799ccd979bdSMark Fasheh }
3800ccd979bdSMark Fasheh 
3801d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3802ccd979bdSMark Fasheh 				     int blocking)
3803ccd979bdSMark Fasheh {
3804ccd979bdSMark Fasheh 	struct inode *inode;
3805ccd979bdSMark Fasheh 	struct address_space *mapping;
38065e98d492SGoldwyn Rodrigues 	struct ocfs2_inode_info *oi;
3807ccd979bdSMark Fasheh 
3808ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3809ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3810ccd979bdSMark Fasheh 
38115e98d492SGoldwyn Rodrigues 	if (S_ISDIR(inode->i_mode)) {
38125e98d492SGoldwyn Rodrigues 		oi = OCFS2_I(inode);
38135e98d492SGoldwyn Rodrigues 		oi->ip_dir_lock_gen++;
38145e98d492SGoldwyn Rodrigues 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
38155e98d492SGoldwyn Rodrigues 		goto out;
38165e98d492SGoldwyn Rodrigues 	}
38175e98d492SGoldwyn Rodrigues 
38181044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3819f1f54068SMark Fasheh 		goto out;
3820f1f54068SMark Fasheh 
38217f4a2a97SMark Fasheh 	/*
38227f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
38237f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
38247f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
38257f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
38267f4a2a97SMark Fasheh 	 * them up again.
38277f4a2a97SMark Fasheh 	 */
38287f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
38297f4a2a97SMark Fasheh 
3830ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3831b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3832b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3833ccd979bdSMark Fasheh 	}
3834ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3835bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3836ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3837ccd979bdSMark Fasheh 	} else {
3838ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3839ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3840ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3841ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3842ccd979bdSMark Fasheh 		 * them around in that case. */
3843ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3844ccd979bdSMark Fasheh 	}
3845ccd979bdSMark Fasheh 
3846b8a7a3a6SAndreas Gruenbacher 	forget_all_cached_acls(inode);
3847b8a7a3a6SAndreas Gruenbacher 
3848f1f54068SMark Fasheh out:
3849d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3850ccd979bdSMark Fasheh }
3851ccd979bdSMark Fasheh 
3852a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3853a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3854810d5aebSMark Fasheh 				 int new_level)
3855810d5aebSMark Fasheh {
3856a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3857810d5aebSMark Fasheh 
3858bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3859bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3860810d5aebSMark Fasheh 
3861810d5aebSMark Fasheh 	if (checkpointed)
3862810d5aebSMark Fasheh 		return 1;
3863810d5aebSMark Fasheh 
3864a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3865810d5aebSMark Fasheh 	return 0;
3866810d5aebSMark Fasheh }
3867810d5aebSMark Fasheh 
3868a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3869a4338481STao Ma 					int new_level)
3870a4338481STao Ma {
3871a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3872a4338481STao Ma 
3873a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3874a4338481STao Ma }
3875a4338481STao Ma 
3876810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3877810d5aebSMark Fasheh {
3878810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3879810d5aebSMark Fasheh 
3880810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
3881810d5aebSMark Fasheh }
3882810d5aebSMark Fasheh 
3883d680efe9SMark Fasheh /*
3884d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
388534d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
3886d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
3887d680efe9SMark Fasheh  */
3888d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3889d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
3890d680efe9SMark Fasheh {
3891d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3892d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
3893d680efe9SMark Fasheh }
3894d680efe9SMark Fasheh 
3895d680efe9SMark Fasheh /*
3896d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
3897d680efe9SMark Fasheh  *
3898d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
3899d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
3900d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
3901d680efe9SMark Fasheh  *
3902d680efe9SMark Fasheh  * We have two potential problems
3903d680efe9SMark Fasheh  *
3904d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
3905d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
3906d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
3907d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
3908d680efe9SMark Fasheh  *    unblock processing.
3909d680efe9SMark Fasheh  *
3910d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
3911d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
3912d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
3913d680efe9SMark Fasheh  */
3914d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3915d680efe9SMark Fasheh 				       int blocking)
3916d680efe9SMark Fasheh {
3917d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3918d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3919d680efe9SMark Fasheh 	struct dentry *dentry;
3920d680efe9SMark Fasheh 	unsigned long flags;
3921d680efe9SMark Fasheh 	int extra_ref = 0;
3922d680efe9SMark Fasheh 
3923d680efe9SMark Fasheh 	/*
3924d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
3925d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
3926d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
3927d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
3928d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
3929d680efe9SMark Fasheh 	 * so there's no further work to do.
3930d680efe9SMark Fasheh 	 */
3931bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
3932d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3933d680efe9SMark Fasheh 
3934d680efe9SMark Fasheh 	/*
3935d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
3936d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
3937d680efe9SMark Fasheh 	 * needs to be freed or not.
3938d680efe9SMark Fasheh 	 */
3939d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
3940d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3941d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
3942d680efe9SMark Fasheh 
3943d680efe9SMark Fasheh 	/*
3944d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
3945d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3946d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
3947d680efe9SMark Fasheh 	 * flag.
3948d680efe9SMark Fasheh 	 */
3949d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3950d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3951d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3952d680efe9SMark Fasheh 	    && dl->dl_count) {
3953d680efe9SMark Fasheh 		dl->dl_count++;
3954d680efe9SMark Fasheh 		extra_ref = 1;
3955d680efe9SMark Fasheh 	}
3956d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3957d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3958d680efe9SMark Fasheh 
3959d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
3960d680efe9SMark Fasheh 
3961d680efe9SMark Fasheh 	/*
3962d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
3963d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
3964d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
3965d680efe9SMark Fasheh 	 */
3966d680efe9SMark Fasheh 	if (!extra_ref)
3967d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3968d680efe9SMark Fasheh 
3969d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3970d680efe9SMark Fasheh 	while (1) {
3971d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
3972d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
3973d680efe9SMark Fasheh 		if (!dentry)
3974d680efe9SMark Fasheh 			break;
3975d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
3976d680efe9SMark Fasheh 
397710ab8811Salex chen 		if (S_ISDIR(dl->dl_inode->i_mode))
397810ab8811Salex chen 			shrink_dcache_parent(dentry);
397910ab8811Salex chen 
3980a455589fSAl Viro 		mlog(0, "d_delete(%pd);\n", dentry);
3981d680efe9SMark Fasheh 
3982d680efe9SMark Fasheh 		/*
3983d680efe9SMark Fasheh 		 * The following dcache calls may do an
3984d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
3985d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
3986d680efe9SMark Fasheh 		 * because the requesting node already has an
3987d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
3988d680efe9SMark Fasheh 		 * for a downconvert.
3989d680efe9SMark Fasheh 		 */
3990d680efe9SMark Fasheh 		d_delete(dentry);
3991d680efe9SMark Fasheh 		dput(dentry);
3992d680efe9SMark Fasheh 
3993d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
3994d680efe9SMark Fasheh 	}
3995d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3996d680efe9SMark Fasheh 
3997d680efe9SMark Fasheh 	/*
3998d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
3999d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
4000d680efe9SMark Fasheh 	 */
4001d680efe9SMark Fasheh 	if (dl->dl_count == 1)
4002d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
4003d680efe9SMark Fasheh 
4004d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
4005d680efe9SMark Fasheh }
4006d680efe9SMark Fasheh 
40078dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
40088dec98edSTao Ma 					    int new_level)
40098dec98edSTao Ma {
40108dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
40118dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
40128dec98edSTao Ma 
40138dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
40148dec98edSTao Ma }
40158dec98edSTao Ma 
40168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
40178dec98edSTao Ma 					 int blocking)
40188dec98edSTao Ma {
40198dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
40208dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
40218dec98edSTao Ma 
40228dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
40238dec98edSTao Ma 
40248dec98edSTao Ma 	return UNBLOCK_CONTINUE;
40258dec98edSTao Ma }
40268dec98edSTao Ma 
40279e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
40289e33d69fSJan Kara {
40299e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
40309e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
40319e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
40329e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
40339e33d69fSJan Kara 
4034a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
40359e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
40369e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
40379e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
40389e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
40399e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
40409e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
40419e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
40429e33d69fSJan Kara }
40439e33d69fSJan Kara 
40449e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
40459e33d69fSJan Kara {
40469e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
40479e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
40489e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
40499e33d69fSJan Kara 
40509e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
40519e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
40529e33d69fSJan Kara }
40539e33d69fSJan Kara 
40549e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
40559e33d69fSJan Kara {
40569e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
40579e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
40589e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
40599e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
406085eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
40619e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
40629e33d69fSJan Kara 	int status = 0;
40639e33d69fSJan Kara 
40641c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
40651c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
40669e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
40679e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
40689e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
40699e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
40709e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
40719e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
40729e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
40739e33d69fSJan Kara 	} else {
4074ae4f6ef1SJan Kara 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
4075ae4f6ef1SJan Kara 						     oinfo->dqi_giblk, &bh);
407685eb8b73SJoel Becker 		if (status) {
40779e33d69fSJan Kara 			mlog_errno(status);
40789e33d69fSJan Kara 			goto bail;
40799e33d69fSJan Kara 		}
40809e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
40819e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
40829e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
40839e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
40849e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
40859e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
40869e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
40879e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
40889e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
40899e33d69fSJan Kara 		brelse(bh);
40909e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
40919e33d69fSJan Kara 	}
40929e33d69fSJan Kara 
40939e33d69fSJan Kara bail:
40949e33d69fSJan Kara 	return status;
40959e33d69fSJan Kara }
40969e33d69fSJan Kara 
40979e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
40989e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
40999e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
41009e33d69fSJan Kara {
41019e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
41029e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
41039e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
41049e33d69fSJan Kara 	int status = 0;
41059e33d69fSJan Kara 
41069e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
41079e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
41089e33d69fSJan Kara 		if (ex)
41099e33d69fSJan Kara 			status = -EROFS;
41109e33d69fSJan Kara 		goto bail;
41119e33d69fSJan Kara 	}
41129e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
41139e33d69fSJan Kara 		goto bail;
41149e33d69fSJan Kara 
41159e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
41169e33d69fSJan Kara 	if (status < 0) {
41179e33d69fSJan Kara 		mlog_errno(status);
41189e33d69fSJan Kara 		goto bail;
41199e33d69fSJan Kara 	}
41209e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
41219e33d69fSJan Kara 		goto bail;
41229e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
41239e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
41249e33d69fSJan Kara 	if (status)
41259e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
41269e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
41279e33d69fSJan Kara bail:
41289e33d69fSJan Kara 	return status;
41299e33d69fSJan Kara }
41309e33d69fSJan Kara 
41318dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
41328dec98edSTao Ma {
41338dec98edSTao Ma 	int status;
41348dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
41358dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
41368dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
41378dec98edSTao Ma 
41388dec98edSTao Ma 
41398dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
41408dec98edSTao Ma 		return -EROFS;
41418dec98edSTao Ma 
41428dec98edSTao Ma 	if (ocfs2_mount_local(osb))
41438dec98edSTao Ma 		return 0;
41448dec98edSTao Ma 
41458dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
41468dec98edSTao Ma 	if (status < 0)
41478dec98edSTao Ma 		mlog_errno(status);
41488dec98edSTao Ma 
41498dec98edSTao Ma 	return status;
41508dec98edSTao Ma }
41518dec98edSTao Ma 
41528dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
41538dec98edSTao Ma {
41548dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
41558dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
41568dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
41578dec98edSTao Ma 
41588dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
41598dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
41608dec98edSTao Ma }
41618dec98edSTao Ma 
416200600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
4163ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
4164ccd979bdSMark Fasheh {
4165ccd979bdSMark Fasheh 	int status;
4166d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
4167ccd979bdSMark Fasheh 	unsigned long flags;
4168ccd979bdSMark Fasheh 
4169ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
4170ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
4171ccd979bdSMark Fasheh 	 * flag. */
4172ccd979bdSMark Fasheh 
4173ccd979bdSMark Fasheh 	BUG_ON(!lockres);
4174ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
4175ccd979bdSMark Fasheh 
41769b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
4177ccd979bdSMark Fasheh 
4178ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
417934d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
4180ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
4181ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
4182ccd979bdSMark Fasheh 	 * performance. */
4183ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4184ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
4185ccd979bdSMark Fasheh 		goto unqueue;
4186ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4187ccd979bdSMark Fasheh 
4188b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
4189ccd979bdSMark Fasheh 	if (status < 0)
4190ccd979bdSMark Fasheh 		mlog_errno(status);
4191ccd979bdSMark Fasheh 
4192ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
4193ccd979bdSMark Fasheh unqueue:
4194d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
4195ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
4196ccd979bdSMark Fasheh 	} else
4197ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
4198ccd979bdSMark Fasheh 
41999b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
4200d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
4201ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
4202ccd979bdSMark Fasheh 
4203d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
4204d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
4205d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
4206ccd979bdSMark Fasheh }
4207ccd979bdSMark Fasheh 
4208ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4209ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
4210ccd979bdSMark Fasheh {
4211a75e9ccaSSrinivas Eeda 	unsigned long flags;
4212a75e9ccaSSrinivas Eeda 
4213ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
4214ccd979bdSMark Fasheh 
4215ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
4216ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
4217ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
4218ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
42199b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
4220ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
4221ccd979bdSMark Fasheh 		return;
4222ccd979bdSMark Fasheh 	}
4223ccd979bdSMark Fasheh 
4224ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4225ccd979bdSMark Fasheh 
4226a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
4227ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
4228ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
4229ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
4230ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
4231ccd979bdSMark Fasheh 	}
4232a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4233ccd979bdSMark Fasheh }
423434d024f8SMark Fasheh 
423534d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
423634d024f8SMark Fasheh {
423734d024f8SMark Fasheh 	unsigned long processed;
4238a75e9ccaSSrinivas Eeda 	unsigned long flags;
423934d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
424034d024f8SMark Fasheh 
4241a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
424234d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
424334d024f8SMark Fasheh 	 * wake happens part-way through our work  */
424434d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
424534d024f8SMark Fasheh 
424634d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
4247209f7512SJoseph Qi 	/*
4248209f7512SJoseph Qi 	 * blocked lock processing in this loop might call iput which can
4249209f7512SJoseph Qi 	 * remove items off osb->blocked_lock_list. Downconvert up to
4250209f7512SJoseph Qi 	 * 'processed' number of locks, but stop short if we had some
4251209f7512SJoseph Qi 	 * removed in ocfs2_mark_lockres_freeing when downconverting.
4252209f7512SJoseph Qi 	 */
4253209f7512SJoseph Qi 	while (processed && !list_empty(&osb->blocked_lock_list)) {
425434d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
425534d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
425634d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
425734d024f8SMark Fasheh 		osb->blocked_lock_count--;
4258a75e9ccaSSrinivas Eeda 		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
425934d024f8SMark Fasheh 
426034d024f8SMark Fasheh 		BUG_ON(!processed);
426134d024f8SMark Fasheh 		processed--;
426234d024f8SMark Fasheh 
426334d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
426434d024f8SMark Fasheh 
4265a75e9ccaSSrinivas Eeda 		spin_lock_irqsave(&osb->dc_task_lock, flags);
426634d024f8SMark Fasheh 	}
4267a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
426834d024f8SMark Fasheh }
426934d024f8SMark Fasheh 
427034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
427134d024f8SMark Fasheh {
427234d024f8SMark Fasheh 	int empty = 0;
4273a75e9ccaSSrinivas Eeda 	unsigned long flags;
427434d024f8SMark Fasheh 
4275a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
427634d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
427734d024f8SMark Fasheh 		empty = 1;
427834d024f8SMark Fasheh 
4279a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
428034d024f8SMark Fasheh 	return empty;
428134d024f8SMark Fasheh }
428234d024f8SMark Fasheh 
428334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
428434d024f8SMark Fasheh {
428534d024f8SMark Fasheh 	int should_wake = 0;
4286a75e9ccaSSrinivas Eeda 	unsigned long flags;
428734d024f8SMark Fasheh 
4288a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
428934d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
429034d024f8SMark Fasheh 		should_wake = 1;
4291a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
429234d024f8SMark Fasheh 
429334d024f8SMark Fasheh 	return should_wake;
429434d024f8SMark Fasheh }
429534d024f8SMark Fasheh 
4296200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
429734d024f8SMark Fasheh {
429834d024f8SMark Fasheh 	int status = 0;
429934d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
430034d024f8SMark Fasheh 
430134d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
430234d024f8SMark Fasheh 	 * work available */
430334d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
430434d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
430534d024f8SMark Fasheh 
430634d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
430734d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
430834d024f8SMark Fasheh 					 kthread_should_stop());
430934d024f8SMark Fasheh 
431034d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
431134d024f8SMark Fasheh 
431234d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
431334d024f8SMark Fasheh 	}
431434d024f8SMark Fasheh 
431534d024f8SMark Fasheh 	osb->dc_task = NULL;
431634d024f8SMark Fasheh 	return status;
431734d024f8SMark Fasheh }
431834d024f8SMark Fasheh 
431934d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
432034d024f8SMark Fasheh {
4321a75e9ccaSSrinivas Eeda 	unsigned long flags;
4322a75e9ccaSSrinivas Eeda 
4323a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
432434d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
432534d024f8SMark Fasheh 	 * the caller may have made to the voting state */
432634d024f8SMark Fasheh 	osb->dc_wake_sequence++;
4327a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
432834d024f8SMark Fasheh 	wake_up(&osb->dc_event);
432934d024f8SMark Fasheh }
4330