xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision a455589f)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * dlmglue.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/types.h>
27ccd979bdSMark Fasheh #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/highmem.h>
29ccd979bdSMark Fasheh #include <linux/mm.h>
30ccd979bdSMark Fasheh #include <linux/kthread.h>
31ccd979bdSMark Fasheh #include <linux/pagemap.h>
32ccd979bdSMark Fasheh #include <linux/debugfs.h>
33ccd979bdSMark Fasheh #include <linux/seq_file.h>
348ddb7b00SSunil Mushran #include <linux/time.h>
359e33d69fSJan Kara #include <linux/quotaops.h>
36ccd979bdSMark Fasheh 
37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
38ccd979bdSMark Fasheh #include <cluster/masklog.h>
39ccd979bdSMark Fasheh 
40ccd979bdSMark Fasheh #include "ocfs2.h"
41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
42ccd979bdSMark Fasheh 
43ccd979bdSMark Fasheh #include "alloc.h"
44d680efe9SMark Fasheh #include "dcache.h"
45ccd979bdSMark Fasheh #include "dlmglue.h"
46ccd979bdSMark Fasheh #include "extent_map.h"
477f1a37e3STiger Yang #include "file.h"
48ccd979bdSMark Fasheh #include "heartbeat.h"
49ccd979bdSMark Fasheh #include "inode.h"
50ccd979bdSMark Fasheh #include "journal.h"
5124ef1815SJoel Becker #include "stackglue.h"
52ccd979bdSMark Fasheh #include "slot_map.h"
53ccd979bdSMark Fasheh #include "super.h"
54ccd979bdSMark Fasheh #include "uptodate.h"
559e33d69fSJan Kara #include "quota.h"
568dec98edSTao Ma #include "refcounttree.h"
57ccd979bdSMark Fasheh 
58ccd979bdSMark Fasheh #include "buffer_head_io.h"
59ccd979bdSMark Fasheh 
60ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
61ccd979bdSMark Fasheh 	struct list_head	mw_item;
62ccd979bdSMark Fasheh 	int			mw_status;
63ccd979bdSMark Fasheh 	struct completion	mw_complete;
64ccd979bdSMark Fasheh 	unsigned long		mw_mask;
65ccd979bdSMark Fasheh 	unsigned long		mw_goal;
668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
675bc970e8SSunil Mushran 	ktime_t			mw_lock_start;
688ddb7b00SSunil Mushran #endif
69ccd979bdSMark Fasheh };
70ccd979bdSMark Fasheh 
7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
75ccd979bdSMark Fasheh 
76d680efe9SMark Fasheh /*
77cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
78d680efe9SMark Fasheh  *
79b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
80d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
81d680efe9SMark Fasheh  *
82d680efe9SMark Fasheh  */
83d680efe9SMark Fasheh enum ocfs2_unblock_action {
84d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
85d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
86d680efe9SMark Fasheh 				      * ->post_unlock callback */
87d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
88d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
89d680efe9SMark Fasheh };
90d680efe9SMark Fasheh 
91d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
92d680efe9SMark Fasheh 	int requeue;
93d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
94d680efe9SMark Fasheh };
95d680efe9SMark Fasheh 
96cb25797dSJan Kara /* Lockdep class keys */
97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
98cb25797dSJan Kara 
99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
100810d5aebSMark Fasheh 					int new_level);
101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
102810d5aebSMark Fasheh 
103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
104cc567d89SMark Fasheh 				     int blocking);
105cc567d89SMark Fasheh 
106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
107cc567d89SMark Fasheh 				       int blocking);
108d680efe9SMark Fasheh 
109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
111ccd979bdSMark Fasheh 
1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1136cb129f5SAdrian Bunk 
1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1158dec98edSTao Ma 					    int new_level);
1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1178dec98edSTao Ma 					 int blocking);
1188dec98edSTao Ma 
1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1206cb129f5SAdrian Bunk 
1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1236cb129f5SAdrian Bunk 				     const char *function,
1246cb129f5SAdrian Bunk 				     unsigned int line,
1256cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1266cb129f5SAdrian Bunk {
127a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1286cb129f5SAdrian Bunk 
1296cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1306cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1316cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1326cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1336cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1346cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1356cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1366cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1376cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1386cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1396cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1406cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1416cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1426cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1436cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1446cb129f5SAdrian Bunk }
1456cb129f5SAdrian Bunk 
1466cb129f5SAdrian Bunk 
147f625c979SMark Fasheh /*
148f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
149f625c979SMark Fasheh  *
150f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1510d5dc6c2SMark Fasheh  *
1520d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1530d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1540d5dc6c2SMark Fasheh  *
1550d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1560d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1570d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1580d5dc6c2SMark Fasheh  * destruction time).
159f625c979SMark Fasheh  */
160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
16154a7e755SMark Fasheh 	/*
16254a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
16354a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
16454a7e755SMark Fasheh 	 */
16554a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
166b5e500e2SMark Fasheh 
1670d5dc6c2SMark Fasheh 	/*
16834d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
16934d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
17034d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
17134d024f8SMark Fasheh 	 * memory, etc.
1720d5dc6c2SMark Fasheh 	 *
1730d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1740d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1750d5dc6c2SMark Fasheh 	 */
176d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
177f625c979SMark Fasheh 
178f625c979SMark Fasheh 	/*
17916d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
18016d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
18116d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
18216d5b956SMark Fasheh 	 *
18316d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
18416d5b956SMark Fasheh 	 * incompatible holders are sufficient.
18516d5b956SMark Fasheh 	 *
18616d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
18716d5b956SMark Fasheh 	 */
18816d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
18916d5b956SMark Fasheh 
19016d5b956SMark Fasheh 	/*
1915ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1925ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1935ef0d4eaSMark Fasheh 	 *
1945ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1955ef0d4eaSMark Fasheh 	 * in the flags field.
1965ef0d4eaSMark Fasheh 	 *
1975ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
1985ef0d4eaSMark Fasheh 	 */
1995ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
2005ef0d4eaSMark Fasheh 
2015ef0d4eaSMark Fasheh 	/*
202cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
203cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
204cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
205cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
206cc567d89SMark Fasheh 	 *
207cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
208cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
209cc567d89SMark Fasheh 	 */
210cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
211cc567d89SMark Fasheh 
212cc567d89SMark Fasheh 	/*
213f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
214f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
215f625c979SMark Fasheh 	 */
216f625c979SMark Fasheh 	int flags;
217ccd979bdSMark Fasheh };
218ccd979bdSMark Fasheh 
219f625c979SMark Fasheh /*
220f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
221f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
222f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
223f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
224f625c979SMark Fasheh  * expected that the locking wrapper will clear the
225f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
226f625c979SMark Fasheh  */
227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
228f625c979SMark Fasheh 
229b80fc012SMark Fasheh /*
2305ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2315ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
232b80fc012SMark Fasheh  */
233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
234b80fc012SMark Fasheh 
235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
23654a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
237f625c979SMark Fasheh 	.flags		= 0,
238ccd979bdSMark Fasheh };
239ccd979bdSMark Fasheh 
240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
24154a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
242810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
243810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
244f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
245b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
246ccd979bdSMark Fasheh };
247ccd979bdSMark Fasheh 
248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
249f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
250ccd979bdSMark Fasheh };
251ccd979bdSMark Fasheh 
252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
253f625c979SMark Fasheh 	.flags		= 0,
254ccd979bdSMark Fasheh };
255ccd979bdSMark Fasheh 
2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2576ca497a8Swengang wang 	.flags		= 0,
2586ca497a8Swengang wang };
2596ca497a8Swengang wang 
26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
26183273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
26283273932SSrinivas Eeda };
26383273932SSrinivas Eeda 
264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
26554a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
266d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
267cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
268f625c979SMark Fasheh 	.flags		= 0,
269d680efe9SMark Fasheh };
270d680efe9SMark Fasheh 
27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
27250008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
27350008630STiger Yang 	.flags		= 0,
27450008630STiger Yang };
27550008630STiger Yang 
276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
277cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
278cf8e06f1SMark Fasheh 	.flags		= 0,
279cf8e06f1SMark Fasheh };
280cf8e06f1SMark Fasheh 
2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2829e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2839e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2849e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2859e33d69fSJan Kara };
2869e33d69fSJan Kara 
2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2888dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2898dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2908dec98edSTao Ma 	.flags		= 0,
2918dec98edSTao Ma };
2928dec98edSTao Ma 
293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
294ccd979bdSMark Fasheh {
295ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
29650008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
29750008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
298ccd979bdSMark Fasheh }
299ccd979bdSMark Fasheh 
300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
301a796d286SJoel Becker {
302a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
303a796d286SJoel Becker }
304a796d286SJoel Becker 
305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
306ccd979bdSMark Fasheh {
307ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
308ccd979bdSMark Fasheh 
309ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
310ccd979bdSMark Fasheh }
311ccd979bdSMark Fasheh 
312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
313d680efe9SMark Fasheh {
314d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
315d680efe9SMark Fasheh 
316d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
317d680efe9SMark Fasheh }
318d680efe9SMark Fasheh 
3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3209e33d69fSJan Kara {
3219e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3229e33d69fSJan Kara 
3239e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3249e33d69fSJan Kara }
3259e33d69fSJan Kara 
3268dec98edSTao Ma static inline struct ocfs2_refcount_tree *
3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3288dec98edSTao Ma {
3298dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3308dec98edSTao Ma }
3318dec98edSTao Ma 
33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
33354a7e755SMark Fasheh {
33454a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
33554a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
33654a7e755SMark Fasheh 
33754a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
33854a7e755SMark Fasheh }
33954a7e755SMark Fasheh 
340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
341ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
342ccd979bdSMark Fasheh 			     int level,
343bd3e7610SJoel Becker 			     u32 dlm_flags);
344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
345ccd979bdSMark Fasheh 						     int wanted);
346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
347ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
348cb25797dSJan Kara 				   int level, unsigned long caller_ip);
349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
350cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
351cb25797dSJan Kara 					int level)
352cb25797dSJan Kara {
353cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
354cb25797dSJan Kara }
355cb25797dSJan Kara 
356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
361ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
363ccd979bdSMark Fasheh 						int convert);
3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
365c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3667431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3677431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
368c74ff8bbSSunil Mushran 	else										\
369c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
370c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
371c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
372ccd979bdSMark Fasheh } while (0)
37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
375ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
377ccd979bdSMark Fasheh 				  struct buffer_head **bh);
378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
381cf8e06f1SMark Fasheh 					      int new_level);
382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
383cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
384cf8e06f1SMark Fasheh 				  int new_level,
385de551246SJoel Becker 				  int lvb,
386de551246SJoel Becker 				  unsigned int generation);
387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
388cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
390cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
391cf8e06f1SMark Fasheh 
392ccd979bdSMark Fasheh 
393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
394ccd979bdSMark Fasheh 				  u64 blkno,
395ccd979bdSMark Fasheh 				  u32 generation,
396ccd979bdSMark Fasheh 				  char *name)
397ccd979bdSMark Fasheh {
398ccd979bdSMark Fasheh 	int len;
399ccd979bdSMark Fasheh 
400ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
401ccd979bdSMark Fasheh 
402b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
403b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
404b0697053SMark Fasheh 		       (long long)blkno, generation);
405ccd979bdSMark Fasheh 
406ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
407ccd979bdSMark Fasheh 
408ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
409ccd979bdSMark Fasheh }
410ccd979bdSMark Fasheh 
41134af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
412ccd979bdSMark Fasheh 
413ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
414ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
415ccd979bdSMark Fasheh {
416ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
417ccd979bdSMark Fasheh 
418ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
419ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
420ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
421ccd979bdSMark Fasheh }
422ccd979bdSMark Fasheh 
423ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
424ccd979bdSMark Fasheh {
425ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
426ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
427ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
428ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
429ccd979bdSMark Fasheh }
430ccd979bdSMark Fasheh 
4318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
4328ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4338ddb7b00SSunil Mushran {
4348ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4355bc970e8SSunil Mushran 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4365bc970e8SSunil Mushran 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4378ddb7b00SSunil Mushran }
4388ddb7b00SSunil Mushran 
4398ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4408ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4418ddb7b00SSunil Mushran {
4425bc970e8SSunil Mushran 	u32 usec;
4435bc970e8SSunil Mushran 	ktime_t kt;
4445bc970e8SSunil Mushran 	struct ocfs2_lock_stats *stats;
4458ddb7b00SSunil Mushran 
4465bc970e8SSunil Mushran 	if (level == LKM_PRMODE)
4475bc970e8SSunil Mushran 		stats = &res->l_lock_prmode;
4485bc970e8SSunil Mushran 	else if (level == LKM_EXMODE)
4495bc970e8SSunil Mushran 		stats = &res->l_lock_exmode;
4505bc970e8SSunil Mushran 	else
4518ddb7b00SSunil Mushran 		return;
4528ddb7b00SSunil Mushran 
4535bc970e8SSunil Mushran 	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4545bc970e8SSunil Mushran 	usec = ktime_to_us(kt);
4555bc970e8SSunil Mushran 
4565bc970e8SSunil Mushran 	stats->ls_gets++;
4575bc970e8SSunil Mushran 	stats->ls_total += ktime_to_ns(kt);
4585bc970e8SSunil Mushran 	/* overflow */
45916865b7cSroel 	if (unlikely(stats->ls_gets == 0)) {
4605bc970e8SSunil Mushran 		stats->ls_gets++;
4615bc970e8SSunil Mushran 		stats->ls_total = ktime_to_ns(kt);
4625bc970e8SSunil Mushran 	}
4635bc970e8SSunil Mushran 
4645bc970e8SSunil Mushran 	if (stats->ls_max < usec)
4655bc970e8SSunil Mushran 		stats->ls_max = usec;
4665bc970e8SSunil Mushran 
4678ddb7b00SSunil Mushran 	if (ret)
4685bc970e8SSunil Mushran 		stats->ls_fail++;
4698ddb7b00SSunil Mushran }
4708ddb7b00SSunil Mushran 
4718ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4728ddb7b00SSunil Mushran {
4738ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4748ddb7b00SSunil Mushran }
4758ddb7b00SSunil Mushran 
4768ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4778ddb7b00SSunil Mushran {
4785bc970e8SSunil Mushran 	mw->mw_lock_start = ktime_get();
4798ddb7b00SSunil Mushran }
4808ddb7b00SSunil Mushran #else
4818ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4828ddb7b00SSunil Mushran {
4838ddb7b00SSunil Mushran }
4848ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4858ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4868ddb7b00SSunil Mushran {
4878ddb7b00SSunil Mushran }
4888ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4898ddb7b00SSunil Mushran {
4908ddb7b00SSunil Mushran }
4918ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4928ddb7b00SSunil Mushran {
4938ddb7b00SSunil Mushran }
4948ddb7b00SSunil Mushran #endif
4958ddb7b00SSunil Mushran 
496ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
497ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
498ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
499ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
500ccd979bdSMark Fasheh 				       void *priv)
501ccd979bdSMark Fasheh {
502ccd979bdSMark Fasheh 	res->l_type          = type;
503ccd979bdSMark Fasheh 	res->l_ops           = ops;
504ccd979bdSMark Fasheh 	res->l_priv          = priv;
505ccd979bdSMark Fasheh 
506bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
507bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
508bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
509ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
510ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
511ccd979bdSMark Fasheh 
512ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
513ccd979bdSMark Fasheh 
514ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5158ddb7b00SSunil Mushran 
5168ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
517cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
518cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
519cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
520cb25797dSJan Kara 				 &lockdep_keys[type], 0);
521cb25797dSJan Kara 	else
522cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
523cb25797dSJan Kara #endif
524ccd979bdSMark Fasheh }
525ccd979bdSMark Fasheh 
526ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
527ccd979bdSMark Fasheh {
528ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
529ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
530ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
531ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
532ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
533ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
534ccd979bdSMark Fasheh }
535ccd979bdSMark Fasheh 
536ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
537ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
53824c19ef4SMark Fasheh 			       unsigned int generation,
539ccd979bdSMark Fasheh 			       struct inode *inode)
540ccd979bdSMark Fasheh {
541ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
542ccd979bdSMark Fasheh 
543ccd979bdSMark Fasheh 	switch(type) {
544ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
545ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
546ccd979bdSMark Fasheh 			break;
547ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
548e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
549ccd979bdSMark Fasheh 			break;
55050008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
55150008630STiger Yang 			ops = &ocfs2_inode_open_lops;
55250008630STiger Yang 			break;
553ccd979bdSMark Fasheh 		default:
554ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
555ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
556ccd979bdSMark Fasheh 			break;
557ccd979bdSMark Fasheh 	};
558ccd979bdSMark Fasheh 
559d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
56024c19ef4SMark Fasheh 			      generation, res->l_name);
561d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
562d680efe9SMark Fasheh }
563d680efe9SMark Fasheh 
56454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
56554a7e755SMark Fasheh {
56654a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
56754a7e755SMark Fasheh 
56854a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
56954a7e755SMark Fasheh }
57054a7e755SMark Fasheh 
5719e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5729e33d69fSJan Kara {
5739e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5749e33d69fSJan Kara 
5759e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5769e33d69fSJan Kara }
5779e33d69fSJan Kara 
578cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
579cf8e06f1SMark Fasheh {
580cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
581cf8e06f1SMark Fasheh 
582cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
583cf8e06f1SMark Fasheh }
584cf8e06f1SMark Fasheh 
585d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
586d680efe9SMark Fasheh {
587d680efe9SMark Fasheh 	__be64 inode_blkno_be;
588d680efe9SMark Fasheh 
589d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
590d680efe9SMark Fasheh 	       sizeof(__be64));
591d680efe9SMark Fasheh 
592d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
593d680efe9SMark Fasheh }
594d680efe9SMark Fasheh 
59554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
59654a7e755SMark Fasheh {
59754a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
59854a7e755SMark Fasheh 
59954a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
60054a7e755SMark Fasheh }
60154a7e755SMark Fasheh 
602d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
603d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
604d680efe9SMark Fasheh {
605d680efe9SMark Fasheh 	int len;
606d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
607d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
608d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
609d680efe9SMark Fasheh 
610d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
611d680efe9SMark Fasheh 
612d680efe9SMark Fasheh 	/*
613d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
614d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
615d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
616d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
617d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
618d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
619d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
620d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
621d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
622d680efe9SMark Fasheh 	 *
623d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
624d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
625d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
626d680efe9SMark Fasheh 	 */
627d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
628d680efe9SMark Fasheh 		       "%c%016llx",
629d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
630d680efe9SMark Fasheh 		       (long long)parent);
631d680efe9SMark Fasheh 
632d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
633d680efe9SMark Fasheh 
634d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
635d680efe9SMark Fasheh 	       sizeof(__be64));
636d680efe9SMark Fasheh 
637d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
638d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
639d680efe9SMark Fasheh 				   dl);
640ccd979bdSMark Fasheh }
641ccd979bdSMark Fasheh 
642ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
643ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
644ccd979bdSMark Fasheh {
645ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
646ccd979bdSMark Fasheh 	 * once on it manually.  */
647ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
648d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
649d680efe9SMark Fasheh 			      0, res->l_name);
650ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
651ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
652ccd979bdSMark Fasheh }
653ccd979bdSMark Fasheh 
654ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
655ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
656ccd979bdSMark Fasheh {
657ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
658ccd979bdSMark Fasheh 	 * once on it manually.  */
659ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
660d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
661d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
662ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
663ccd979bdSMark Fasheh }
664ccd979bdSMark Fasheh 
6656ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6666ca497a8Swengang wang 					 struct ocfs2_super *osb)
6676ca497a8Swengang wang {
6686ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6696ca497a8Swengang wang 	 * once on it manually.  */
6706ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6716ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6726ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6736ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6746ca497a8Swengang wang }
6756ca497a8Swengang wang 
67683273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
67783273932SSrinivas Eeda 					    struct ocfs2_super *osb)
67883273932SSrinivas Eeda {
67983273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
68083273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
68183273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
68283273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
68383273932SSrinivas Eeda }
68483273932SSrinivas Eeda 
685cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
686cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
687cf8e06f1SMark Fasheh {
688cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
689cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
690cf8e06f1SMark Fasheh 
691cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
692cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
693cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
694cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
695cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
696cf8e06f1SMark Fasheh 				   fp);
697cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
698cf8e06f1SMark Fasheh }
699cf8e06f1SMark Fasheh 
7009e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7019e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7029e33d69fSJan Kara {
7039e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7049e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7059e33d69fSJan Kara 			      0, lockres->l_name);
7069e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7079e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7089e33d69fSJan Kara 				   info);
7099e33d69fSJan Kara }
7109e33d69fSJan Kara 
7118dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7128dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7138dec98edSTao Ma 				  unsigned int generation)
7148dec98edSTao Ma {
7158dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7168dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7178dec98edSTao Ma 			      generation, lockres->l_name);
7188dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7198dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7208dec98edSTao Ma }
7218dec98edSTao Ma 
722ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
723ccd979bdSMark Fasheh {
724ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
725ccd979bdSMark Fasheh 		return;
726ccd979bdSMark Fasheh 
727ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
728ccd979bdSMark Fasheh 
729ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
730ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
731ccd979bdSMark Fasheh 			res->l_name);
732ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
733ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
734ccd979bdSMark Fasheh 			res->l_name);
735ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
736ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
737ccd979bdSMark Fasheh 			res->l_name);
738ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
739ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
740ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
741ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
742ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
743ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
744ccd979bdSMark Fasheh 
745ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
746ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
747ccd979bdSMark Fasheh 
748ccd979bdSMark Fasheh 	res->l_flags = 0UL;
749ccd979bdSMark Fasheh }
750ccd979bdSMark Fasheh 
751ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
752ccd979bdSMark Fasheh 				     int level)
753ccd979bdSMark Fasheh {
754ccd979bdSMark Fasheh 	BUG_ON(!lockres);
755ccd979bdSMark Fasheh 
756ccd979bdSMark Fasheh 	switch(level) {
757bd3e7610SJoel Becker 	case DLM_LOCK_EX:
758ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
759ccd979bdSMark Fasheh 		break;
760bd3e7610SJoel Becker 	case DLM_LOCK_PR:
761ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
762ccd979bdSMark Fasheh 		break;
763ccd979bdSMark Fasheh 	default:
764ccd979bdSMark Fasheh 		BUG();
765ccd979bdSMark Fasheh 	}
766ccd979bdSMark Fasheh }
767ccd979bdSMark Fasheh 
768ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
769ccd979bdSMark Fasheh 				     int level)
770ccd979bdSMark Fasheh {
771ccd979bdSMark Fasheh 	BUG_ON(!lockres);
772ccd979bdSMark Fasheh 
773ccd979bdSMark Fasheh 	switch(level) {
774bd3e7610SJoel Becker 	case DLM_LOCK_EX:
775ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
776ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
777ccd979bdSMark Fasheh 		break;
778bd3e7610SJoel Becker 	case DLM_LOCK_PR:
779ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
780ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
781ccd979bdSMark Fasheh 		break;
782ccd979bdSMark Fasheh 	default:
783ccd979bdSMark Fasheh 		BUG();
784ccd979bdSMark Fasheh 	}
785ccd979bdSMark Fasheh }
786ccd979bdSMark Fasheh 
787ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
788ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
789ccd979bdSMark Fasheh  * lock types are added. */
790ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
791ccd979bdSMark Fasheh {
792bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
793ccd979bdSMark Fasheh 
794bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
795bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
796bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
797bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
798ccd979bdSMark Fasheh 	return new_level;
799ccd979bdSMark Fasheh }
800ccd979bdSMark Fasheh 
801ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
802ccd979bdSMark Fasheh 			      unsigned long newflags)
803ccd979bdSMark Fasheh {
804800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
805ccd979bdSMark Fasheh 
806ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
807ccd979bdSMark Fasheh 
808ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
809ccd979bdSMark Fasheh 
810800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
811ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
812ccd979bdSMark Fasheh 			continue;
813ccd979bdSMark Fasheh 
814ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
815ccd979bdSMark Fasheh 		mw->mw_status = 0;
816ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
817ccd979bdSMark Fasheh 	}
818ccd979bdSMark Fasheh }
819ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
820ccd979bdSMark Fasheh {
821ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
822ccd979bdSMark Fasheh }
823ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
824ccd979bdSMark Fasheh 				unsigned long clear)
825ccd979bdSMark Fasheh {
826ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
827ccd979bdSMark Fasheh }
828ccd979bdSMark Fasheh 
829ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
830ccd979bdSMark Fasheh {
831ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
832ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
833ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
834bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
835ccd979bdSMark Fasheh 
836ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
837ccd979bdSMark Fasheh 	if (lockres->l_level <=
838ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
839bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
840ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
841ccd979bdSMark Fasheh 	}
842ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
843ccd979bdSMark Fasheh }
844ccd979bdSMark Fasheh 
845ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
846ccd979bdSMark Fasheh {
847ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
848ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
849ccd979bdSMark Fasheh 
850ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
851ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
852ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
853ccd979bdSMark Fasheh 	 * update */
854bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
855f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
856ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
857ccd979bdSMark Fasheh 
858ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
859a1912826SSunil Mushran 
860a1912826SSunil Mushran 	/*
861a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
862a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
863a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
864a1912826SSunil Mushran 	 */
865a1912826SSunil Mushran 	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
866a1912826SSunil Mushran 
867ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
868ccd979bdSMark Fasheh }
869ccd979bdSMark Fasheh 
870ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
871ccd979bdSMark Fasheh {
8723cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
873ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
874ccd979bdSMark Fasheh 
875bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
876f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
877f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
878ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
879ccd979bdSMark Fasheh 
880ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
881ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
882ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
883ccd979bdSMark Fasheh }
884ccd979bdSMark Fasheh 
885ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
886ccd979bdSMark Fasheh 				     int level)
887ccd979bdSMark Fasheh {
888ccd979bdSMark Fasheh 	int needs_downconvert = 0;
889ccd979bdSMark Fasheh 
890ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
891ccd979bdSMark Fasheh 
892ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
893ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
894ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
895ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
896ccd979bdSMark Fasheh 		 * duplicate BASTs */
897ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
898ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
899ccd979bdSMark Fasheh 			needs_downconvert = 1;
900ccd979bdSMark Fasheh 
901ccd979bdSMark Fasheh 		lockres->l_blocking = level;
902ccd979bdSMark Fasheh 	}
903ccd979bdSMark Fasheh 
9049b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9059b915181SSunil Mushran 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
9069b915181SSunil Mushran 	     needs_downconvert);
9079b915181SSunil Mushran 
9080b94a909SWengang Wang 	if (needs_downconvert)
9090b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
910c1e8d35eSTao Ma 	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
911ccd979bdSMark Fasheh 	return needs_downconvert;
912ccd979bdSMark Fasheh }
913ccd979bdSMark Fasheh 
914de551246SJoel Becker /*
915de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
916de551246SJoel Becker  *
917de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
918de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
919de551246SJoel Becker  * for more details on the race.
920de551246SJoel Becker  *
921de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
922de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
923de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
924de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
925de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
926de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
927de551246SJoel Becker  * nothing.
928de551246SJoel Becker  *
929de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
930de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
931de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
932de551246SJoel Becker  * window.
933de551246SJoel Becker  *
934de551246SJoel Becker  * [Example]
935de551246SJoel Becker  *
936de551246SJoel Becker  * ocfs2_meta_lock()
937de551246SJoel Becker  *  ocfs2_cluster_lock()
938de551246SJoel Becker  *   set BUSY
939de551246SJoel Becker  *   set PENDING
940de551246SJoel Becker  *   drop l_lock
941de551246SJoel Becker  *   ocfs2_dlm_lock()
942de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
943de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
944de551246SJoel Becker  *					  take_l_lock
945de551246SJoel Becker  *					  !BUSY
946de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
947de551246SJoel Becker  *					   set BUSY
948de551246SJoel Becker  *					   set PENDING
949de551246SJoel Becker  *					  drop l_lock
950de551246SJoel Becker  *   take l_lock
951de551246SJoel Becker  *   clear PENDING
952de551246SJoel Becker  *   drop l_lock
953de551246SJoel Becker  *			<window>
954de551246SJoel Becker  *					  ocfs2_dlm_lock()
955de551246SJoel Becker  *
956de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
957de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
958de551246SJoel Becker  *
959de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
960de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
961de551246SJoel Becker  *
962de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
963de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
964de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
965de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
966de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
967de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
968de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
969de551246SJoel Becker  * ocfs2_prepare_downconvert().
970de551246SJoel Becker  */
971de551246SJoel Becker 
972de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
973de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
974de551246SJoel Becker 				    unsigned int generation,
975de551246SJoel Becker 				    struct ocfs2_super *osb)
976de551246SJoel Becker {
977de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
978de551246SJoel Becker 
979de551246SJoel Becker 	/*
980de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
981de551246SJoel Becker 	 * will clear pending, the loser will not.
982de551246SJoel Becker 	 */
983de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
984de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
985de551246SJoel Becker 		return;
986de551246SJoel Becker 
987de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
988de551246SJoel Becker 	lockres->l_pending_gen++;
989de551246SJoel Becker 
990de551246SJoel Becker 	/*
991de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
992de551246SJoel Becker 	 * were PENDING.  Wake it up.
993de551246SJoel Becker 	 */
994de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
995de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
996de551246SJoel Becker }
997de551246SJoel Becker 
998de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
999de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1000de551246SJoel Becker 				  unsigned int generation,
1001de551246SJoel Becker 				  struct ocfs2_super *osb)
1002de551246SJoel Becker {
1003de551246SJoel Becker 	unsigned long flags;
1004de551246SJoel Becker 
1005de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1006de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1007de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1008de551246SJoel Becker }
1009de551246SJoel Becker 
1010de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1011de551246SJoel Becker {
1012de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1013de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1014de551246SJoel Becker 
1015de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1016de551246SJoel Becker 
1017de551246SJoel Becker 	return lockres->l_pending_gen;
1018de551246SJoel Becker }
1019de551246SJoel Becker 
1020c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1021ccd979bdSMark Fasheh {
1022a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1023aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1024ccd979bdSMark Fasheh 	int needs_downconvert;
1025ccd979bdSMark Fasheh 	unsigned long flags;
1026ccd979bdSMark Fasheh 
1027bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1028ccd979bdSMark Fasheh 
10299b915181SSunil Mushran 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
10309b915181SSunil Mushran 	     "type %s\n", lockres->l_name, level, lockres->l_level,
1031aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1032aa2623adSMark Fasheh 
1033cf8e06f1SMark Fasheh 	/*
1034cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1035cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1036cf8e06f1SMark Fasheh 	 */
1037cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1038cf8e06f1SMark Fasheh 		return;
1039cf8e06f1SMark Fasheh 
1040ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1041ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1042ccd979bdSMark Fasheh 	if (needs_downconvert)
1043ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1044ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1045ccd979bdSMark Fasheh 
1046d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1047d680efe9SMark Fasheh 
104834d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1049ccd979bdSMark Fasheh }
1050ccd979bdSMark Fasheh 
1051c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1052ccd979bdSMark Fasheh {
1053a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1054de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1055ccd979bdSMark Fasheh 	unsigned long flags;
10561693a5c0SDavid Teigland 	int status;
1057ccd979bdSMark Fasheh 
1058ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1059ccd979bdSMark Fasheh 
10601693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
10611693a5c0SDavid Teigland 
10621693a5c0SDavid Teigland 	if (status == -EAGAIN) {
10631693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
10641693a5c0SDavid Teigland 		goto out;
10651693a5c0SDavid Teigland 	}
10661693a5c0SDavid Teigland 
10671693a5c0SDavid Teigland 	if (status) {
10688f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
10691693a5c0SDavid Teigland 		     lockres->l_name, status);
1070ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1071ccd979bdSMark Fasheh 		return;
1072ccd979bdSMark Fasheh 	}
1073ccd979bdSMark Fasheh 
10749b915181SSunil Mushran 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
10759b915181SSunil Mushran 	     "level %d => %d\n", lockres->l_name, lockres->l_action,
10769b915181SSunil Mushran 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
10779b915181SSunil Mushran 
1078ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1079ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1080ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1081e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1082ccd979bdSMark Fasheh 		break;
1083ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1084ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1085ccd979bdSMark Fasheh 		break;
1086ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1087ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1088ccd979bdSMark Fasheh 		break;
1089ccd979bdSMark Fasheh 	default:
10909b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
10919b915181SSunil Mushran 		     "flags 0x%lx, unlock: %u\n",
1092e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1093e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1094ccd979bdSMark Fasheh 		BUG();
1095ccd979bdSMark Fasheh 	}
10961693a5c0SDavid Teigland out:
1097ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1098ccd979bdSMark Fasheh 	 * can catch it. */
1099ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1100ccd979bdSMark Fasheh 
1101de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1102de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1103de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1104de551246SJoel Becker 
1105de551246SJoel Becker 	/*
1106de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1107de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1108de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1109de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1110de551246SJoel Becker 	 */
1111de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1112de551246SJoel Becker 
1113ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1114d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1115ccd979bdSMark Fasheh }
1116ccd979bdSMark Fasheh 
1117553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1118553b5eb9SJoel Becker {
1119553b5eb9SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1120553b5eb9SJoel Becker 	unsigned long flags;
1121553b5eb9SJoel Becker 
11229b915181SSunil Mushran 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
11239b915181SSunil Mushran 	     lockres->l_name, lockres->l_unlock_action);
1124553b5eb9SJoel Becker 
1125553b5eb9SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1126553b5eb9SJoel Becker 	if (error) {
1127553b5eb9SJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1128553b5eb9SJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
1129553b5eb9SJoel Becker 		     lockres->l_unlock_action);
1130553b5eb9SJoel Becker 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1131553b5eb9SJoel Becker 		return;
1132553b5eb9SJoel Becker 	}
1133553b5eb9SJoel Becker 
1134553b5eb9SJoel Becker 	switch(lockres->l_unlock_action) {
1135553b5eb9SJoel Becker 	case OCFS2_UNLOCK_CANCEL_CONVERT:
1136553b5eb9SJoel Becker 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1137553b5eb9SJoel Becker 		lockres->l_action = OCFS2_AST_INVALID;
1138553b5eb9SJoel Becker 		/* Downconvert thread may have requeued this lock, we
1139553b5eb9SJoel Becker 		 * need to wake it. */
1140553b5eb9SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1141553b5eb9SJoel Becker 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1142553b5eb9SJoel Becker 		break;
1143553b5eb9SJoel Becker 	case OCFS2_UNLOCK_DROP_LOCK:
1144553b5eb9SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
1145553b5eb9SJoel Becker 		break;
1146553b5eb9SJoel Becker 	default:
1147553b5eb9SJoel Becker 		BUG();
1148553b5eb9SJoel Becker 	}
1149553b5eb9SJoel Becker 
1150553b5eb9SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1151553b5eb9SJoel Becker 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1152553b5eb9SJoel Becker 	wake_up(&lockres->l_event);
1153553b5eb9SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1154553b5eb9SJoel Becker }
1155553b5eb9SJoel Becker 
1156553b5eb9SJoel Becker /*
1157553b5eb9SJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
1158553b5eb9SJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
1159553b5eb9SJoel Becker  * The version number allows interoperability with systems running at
1160553b5eb9SJoel Becker  * the same major number and an equal or smaller minor number.
1161553b5eb9SJoel Becker  *
1162553b5eb9SJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
1163553b5eb9SJoel Becker  * lock, orders them differently, does different things underneath a lock),
1164553b5eb9SJoel Becker  * the version must be changed.  The protocol is negotiated when joining
1165553b5eb9SJoel Becker  * the dlm domain.  A node may join the domain if its major version is
1166553b5eb9SJoel Becker  * identical to all other nodes and its minor version is greater than
1167553b5eb9SJoel Becker  * or equal to all other nodes.  When its minor version is greater than
1168553b5eb9SJoel Becker  * the other nodes, it will run at the minor version specified by the
1169553b5eb9SJoel Becker  * other nodes.
1170553b5eb9SJoel Becker  *
1171553b5eb9SJoel Becker  * If a locking change is made that will not be compatible with older
1172553b5eb9SJoel Becker  * versions, the major number must be increased and the minor version set
1173553b5eb9SJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
1174553b5eb9SJoel Becker  * speaking to older versions, the minor version must be increased.  If a
1175553b5eb9SJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
1176553b5eb9SJoel Becker  * are just ignored by older versions), the version does not need to be
1177553b5eb9SJoel Becker  * updated.
1178553b5eb9SJoel Becker  */
1179553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = {
1180553b5eb9SJoel Becker 	.lp_max_version = {
1181553b5eb9SJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1182553b5eb9SJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1183553b5eb9SJoel Becker 	},
1184553b5eb9SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
1185553b5eb9SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
1186553b5eb9SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
1187553b5eb9SJoel Becker };
1188553b5eb9SJoel Becker 
1189553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void)
1190553b5eb9SJoel Becker {
1191553b5eb9SJoel Becker 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1192553b5eb9SJoel Becker }
1193553b5eb9SJoel Becker 
1194ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1195ccd979bdSMark Fasheh 						int convert)
1196ccd979bdSMark Fasheh {
1197ccd979bdSMark Fasheh 	unsigned long flags;
1198ccd979bdSMark Fasheh 
1199ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1200ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1201a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1202ccd979bdSMark Fasheh 	if (convert)
1203ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1204ccd979bdSMark Fasheh 	else
1205ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1206ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1207ccd979bdSMark Fasheh 
1208ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1209ccd979bdSMark Fasheh }
1210ccd979bdSMark Fasheh 
1211ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1212ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1213ccd979bdSMark Fasheh  * to do the right thing in that case.
1214ccd979bdSMark Fasheh  */
1215ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1216ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1217ccd979bdSMark Fasheh 			     int level,
1218bd3e7610SJoel Becker 			     u32 dlm_flags)
1219ccd979bdSMark Fasheh {
1220ccd979bdSMark Fasheh 	int ret = 0;
1221ccd979bdSMark Fasheh 	unsigned long flags;
1222de551246SJoel Becker 	unsigned int gen;
1223ccd979bdSMark Fasheh 
1224bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1225ccd979bdSMark Fasheh 	     dlm_flags);
1226ccd979bdSMark Fasheh 
1227ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1228ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1229ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1230ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1231ccd979bdSMark Fasheh 		goto bail;
1232ccd979bdSMark Fasheh 	}
1233ccd979bdSMark Fasheh 
1234ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1235ccd979bdSMark Fasheh 	lockres->l_requested = level;
1236ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1237de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1238ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1239ccd979bdSMark Fasheh 
12404670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1241ccd979bdSMark Fasheh 			     level,
1242ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1243ccd979bdSMark Fasheh 			     dlm_flags,
1244ccd979bdSMark Fasheh 			     lockres->l_name,
1245a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1246de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
12477431cd7eSJoel Becker 	if (ret) {
12487431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1249ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1250ccd979bdSMark Fasheh 	}
1251ccd979bdSMark Fasheh 
12527431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1253ccd979bdSMark Fasheh 
1254ccd979bdSMark Fasheh bail:
1255ccd979bdSMark Fasheh 	return ret;
1256ccd979bdSMark Fasheh }
1257ccd979bdSMark Fasheh 
1258ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1259ccd979bdSMark Fasheh 					int flag)
1260ccd979bdSMark Fasheh {
1261ccd979bdSMark Fasheh 	unsigned long flags;
1262ccd979bdSMark Fasheh 	int ret;
1263ccd979bdSMark Fasheh 
1264ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1265ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1266ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1267ccd979bdSMark Fasheh 
1268ccd979bdSMark Fasheh 	return ret;
1269ccd979bdSMark Fasheh }
1270ccd979bdSMark Fasheh 
1271ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1272ccd979bdSMark Fasheh 
1273ccd979bdSMark Fasheh {
1274ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1275ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1276ccd979bdSMark Fasheh }
1277ccd979bdSMark Fasheh 
1278ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1279ccd979bdSMark Fasheh 
1280ccd979bdSMark Fasheh {
1281ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1282ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1283ccd979bdSMark Fasheh }
1284ccd979bdSMark Fasheh 
1285ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1286ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1287ccd979bdSMark Fasheh  * level will be compatible with it. */
1288ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1289ccd979bdSMark Fasheh 						     int wanted)
1290ccd979bdSMark Fasheh {
1291ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1292ccd979bdSMark Fasheh 
1293ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1294ccd979bdSMark Fasheh }
1295ccd979bdSMark Fasheh 
1296ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1297ccd979bdSMark Fasheh {
1298ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1299ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
13008ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1301ccd979bdSMark Fasheh }
1302ccd979bdSMark Fasheh 
1303ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1304ccd979bdSMark Fasheh {
1305ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1306ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
130716735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1308ccd979bdSMark Fasheh 	return mw->mw_status;
1309ccd979bdSMark Fasheh }
1310ccd979bdSMark Fasheh 
1311ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1312ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1313ccd979bdSMark Fasheh 				    unsigned long mask,
1314ccd979bdSMark Fasheh 				    unsigned long goal)
1315ccd979bdSMark Fasheh {
1316ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1317ccd979bdSMark Fasheh 
1318ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1319ccd979bdSMark Fasheh 
1320ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1321ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1322ccd979bdSMark Fasheh 	mw->mw_goal = goal;
1323ccd979bdSMark Fasheh }
1324ccd979bdSMark Fasheh 
1325ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1326ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
1327ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1328ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1329ccd979bdSMark Fasheh {
1330ccd979bdSMark Fasheh 	unsigned long flags;
1331ccd979bdSMark Fasheh 	int ret = 0;
1332ccd979bdSMark Fasheh 
1333ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1334ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1335ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1336ccd979bdSMark Fasheh 			ret = -EBUSY;
1337ccd979bdSMark Fasheh 
1338ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1339ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
1340ccd979bdSMark Fasheh 	}
1341ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1342ccd979bdSMark Fasheh 
1343ccd979bdSMark Fasheh 	return ret;
1344ccd979bdSMark Fasheh 
1345ccd979bdSMark Fasheh }
1346ccd979bdSMark Fasheh 
1347cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1348cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1349cf8e06f1SMark Fasheh {
1350cf8e06f1SMark Fasheh 	int ret;
1351cf8e06f1SMark Fasheh 
1352cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1353cf8e06f1SMark Fasheh 	if (ret)
1354cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1355cf8e06f1SMark Fasheh 	else
1356cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1357cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
135816735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1359cf8e06f1SMark Fasheh 	return ret;
1360cf8e06f1SMark Fasheh }
1361cf8e06f1SMark Fasheh 
1362cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1363ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1364ccd979bdSMark Fasheh 				int level,
1365bd3e7610SJoel Becker 				u32 lkm_flags,
1366cb25797dSJan Kara 				int arg_flags,
1367cb25797dSJan Kara 				int l_subclass,
1368cb25797dSJan Kara 				unsigned long caller_ip)
1369ccd979bdSMark Fasheh {
1370ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1371ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1372ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1373ccd979bdSMark Fasheh 	unsigned long flags;
1374de551246SJoel Becker 	unsigned int gen;
13751693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1376ccd979bdSMark Fasheh 
1377ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1378ccd979bdSMark Fasheh 
1379b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1380bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1381b80fc012SMark Fasheh 
1382ccd979bdSMark Fasheh again:
1383ccd979bdSMark Fasheh 	wait = 0;
1384ccd979bdSMark Fasheh 
1385a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1386a1912826SSunil Mushran 
1387ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1388ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1389a1912826SSunil Mushran 		goto unlock;
1390ccd979bdSMark Fasheh 	}
1391ccd979bdSMark Fasheh 
1392ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1393ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1394ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1395ccd979bdSMark Fasheh 
1396ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1397ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1398ccd979bdSMark Fasheh 	 * we'll get caught below. */
1399ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1400ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1401ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1402ccd979bdSMark Fasheh 		 * them. */
1403ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1404ccd979bdSMark Fasheh 		wait = 1;
1405ccd979bdSMark Fasheh 		goto unlock;
1406ccd979bdSMark Fasheh 	}
1407ccd979bdSMark Fasheh 
1408a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1409a1912826SSunil Mushran 		/*
1410a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1411a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1412a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1413a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1414a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1415a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1416a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1417a1912826SSunil Mushran 		 * requesting PR take the lock.
1418a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1419a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1420a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1421a1912826SSunil Mushran 		 * downconvert request.
1422a1912826SSunil Mushran 		 */
1423a1912826SSunil Mushran 		if (level <= lockres->l_level)
1424a1912826SSunil Mushran 			goto update_holders;
1425a1912826SSunil Mushran 	}
1426a1912826SSunil Mushran 
1427ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1428ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1429ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1430ccd979bdSMark Fasheh 		 * another node */
1431ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1432ccd979bdSMark Fasheh 		wait = 1;
1433ccd979bdSMark Fasheh 		goto unlock;
1434ccd979bdSMark Fasheh 	}
1435ccd979bdSMark Fasheh 
1436ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
14371693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
14381693a5c0SDavid Teigland 			ret = -EAGAIN;
14391693a5c0SDavid Teigland 			goto unlock;
14401693a5c0SDavid Teigland 		}
14411693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
14421693a5c0SDavid Teigland 			noqueue_attempted = 1;
14431693a5c0SDavid Teigland 
1444ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1445ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1446ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1447ccd979bdSMark Fasheh 
1448019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1449019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1450bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1451019d1b22SMark Fasheh 		} else {
1452ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1453bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1454019d1b22SMark Fasheh 		}
1455019d1b22SMark Fasheh 
1456ccd979bdSMark Fasheh 		lockres->l_requested = level;
1457ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1458de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1459ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1460ccd979bdSMark Fasheh 
1461bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1462bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1463ccd979bdSMark Fasheh 
14649b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1465ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1466ccd979bdSMark Fasheh 
1467ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
14684670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1469ccd979bdSMark Fasheh 				     level,
1470ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1471019d1b22SMark Fasheh 				     lkm_flags,
1472ccd979bdSMark Fasheh 				     lockres->l_name,
1473a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1474de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
14757431cd7eSJoel Becker 		if (ret) {
14767431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
14777431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
147824ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
14797431cd7eSJoel Becker 						    ret, lockres);
1480ccd979bdSMark Fasheh 			}
1481ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1482ccd979bdSMark Fasheh 			goto out;
1483ccd979bdSMark Fasheh 		}
1484ccd979bdSMark Fasheh 
148573ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1486ccd979bdSMark Fasheh 		     lockres->l_name);
1487ccd979bdSMark Fasheh 
1488ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1489ccd979bdSMark Fasheh 		 * complete our work regardless. */
1490ccd979bdSMark Fasheh 		catch_signals = 0;
1491ccd979bdSMark Fasheh 
1492ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1493ccd979bdSMark Fasheh 		goto again;
1494ccd979bdSMark Fasheh 	}
1495ccd979bdSMark Fasheh 
1496a1912826SSunil Mushran update_holders:
1497ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1498ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1499ccd979bdSMark Fasheh 
1500ccd979bdSMark Fasheh 	ret = 0;
1501ccd979bdSMark Fasheh unlock:
1502a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1503a1912826SSunil Mushran 
1504ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1505ccd979bdSMark Fasheh out:
1506ccd979bdSMark Fasheh 	/*
1507ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1508ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1509ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1510ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1511ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1512ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1513ccd979bdSMark Fasheh 	 */
1514ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1515ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1516ccd979bdSMark Fasheh 		wait = 0;
1517ccd979bdSMark Fasheh 		if (lockres_remove_mask_waiter(lockres, &mw))
1518ccd979bdSMark Fasheh 			ret = -EAGAIN;
1519ccd979bdSMark Fasheh 		else
1520ccd979bdSMark Fasheh 			goto again;
1521ccd979bdSMark Fasheh 	}
1522ccd979bdSMark Fasheh 	if (wait) {
1523ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1524ccd979bdSMark Fasheh 		if (ret == 0)
1525ccd979bdSMark Fasheh 			goto again;
1526ccd979bdSMark Fasheh 		mlog_errno(ret);
1527ccd979bdSMark Fasheh 	}
15288ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1529ccd979bdSMark Fasheh 
1530cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1531cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1532cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1533cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1534cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1535cb25797dSJan Kara 				caller_ip);
1536cb25797dSJan Kara 		else
1537cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1538cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1539cb25797dSJan Kara 				caller_ip);
1540cb25797dSJan Kara 	}
1541cb25797dSJan Kara #endif
1542ccd979bdSMark Fasheh 	return ret;
1543ccd979bdSMark Fasheh }
1544ccd979bdSMark Fasheh 
1545cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1546ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1547cb25797dSJan Kara 				     int level,
1548cb25797dSJan Kara 				     u32 lkm_flags,
1549cb25797dSJan Kara 				     int arg_flags)
1550cb25797dSJan Kara {
1551cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1552cb25797dSJan Kara 				    0, _RET_IP_);
1553cb25797dSJan Kara }
1554cb25797dSJan Kara 
1555cb25797dSJan Kara 
1556cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1557cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1558cb25797dSJan Kara 				   int level,
1559cb25797dSJan Kara 				   unsigned long caller_ip)
1560ccd979bdSMark Fasheh {
1561ccd979bdSMark Fasheh 	unsigned long flags;
1562ccd979bdSMark Fasheh 
1563ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1564ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
156534d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1566ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1567cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1568cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
1569cb25797dSJan Kara 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1570cb25797dSJan Kara #endif
1571ccd979bdSMark Fasheh }
1572ccd979bdSMark Fasheh 
1573da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1574d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
157524c19ef4SMark Fasheh 				 int ex,
157624c19ef4SMark Fasheh 				 int local)
1577ccd979bdSMark Fasheh {
1578bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1579ccd979bdSMark Fasheh 	unsigned long flags;
1580bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1581ccd979bdSMark Fasheh 
1582ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1583ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1584ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1585ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1586ccd979bdSMark Fasheh 
158724c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1588ccd979bdSMark Fasheh }
1589ccd979bdSMark Fasheh 
1590ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1591ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1592ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1593ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1594ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1595ccd979bdSMark Fasheh  * with creating a new lock resource. */
1596ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1597ccd979bdSMark Fasheh {
1598ccd979bdSMark Fasheh 	int ret;
1599d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1600ccd979bdSMark Fasheh 
1601ccd979bdSMark Fasheh 	BUG_ON(!inode);
1602ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1603ccd979bdSMark Fasheh 
1604b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1605ccd979bdSMark Fasheh 
1606ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1607ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1608ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1609ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1610ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1611ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1612ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1613ccd979bdSMark Fasheh 
161424c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1615ccd979bdSMark Fasheh 	if (ret) {
1616ccd979bdSMark Fasheh 		mlog_errno(ret);
1617ccd979bdSMark Fasheh 		goto bail;
1618ccd979bdSMark Fasheh 	}
1619ccd979bdSMark Fasheh 
162024c19ef4SMark Fasheh 	/*
1621bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
162224c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
162324c19ef4SMark Fasheh 	 */
1624e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1625ccd979bdSMark Fasheh 	if (ret) {
1626ccd979bdSMark Fasheh 		mlog_errno(ret);
1627ccd979bdSMark Fasheh 		goto bail;
1628ccd979bdSMark Fasheh 	}
1629ccd979bdSMark Fasheh 
163050008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
163150008630STiger Yang 	if (ret) {
163250008630STiger Yang 		mlog_errno(ret);
163350008630STiger Yang 		goto bail;
163450008630STiger Yang 	}
163550008630STiger Yang 
1636ccd979bdSMark Fasheh bail:
1637ccd979bdSMark Fasheh 	return ret;
1638ccd979bdSMark Fasheh }
1639ccd979bdSMark Fasheh 
1640ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1641ccd979bdSMark Fasheh {
1642ccd979bdSMark Fasheh 	int status, level;
1643ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1644c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1645ccd979bdSMark Fasheh 
1646ccd979bdSMark Fasheh 	BUG_ON(!inode);
1647ccd979bdSMark Fasheh 
1648b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1649b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1650ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1651ccd979bdSMark Fasheh 
1652c1e8d35eSTao Ma 	if (ocfs2_mount_local(osb))
1653c271c5c2SSunil Mushran 		return 0;
1654c271c5c2SSunil Mushran 
1655ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1656ccd979bdSMark Fasheh 
1657bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1658ccd979bdSMark Fasheh 
1659ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1660ccd979bdSMark Fasheh 				    0);
1661ccd979bdSMark Fasheh 	if (status < 0)
1662ccd979bdSMark Fasheh 		mlog_errno(status);
1663ccd979bdSMark Fasheh 
1664ccd979bdSMark Fasheh 	return status;
1665ccd979bdSMark Fasheh }
1666ccd979bdSMark Fasheh 
1667ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1668ccd979bdSMark Fasheh {
1669bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1670ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1671c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1672ccd979bdSMark Fasheh 
1673b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1674b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1675ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1676ccd979bdSMark Fasheh 
1677c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
1678ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1679ccd979bdSMark Fasheh }
1680ccd979bdSMark Fasheh 
168150008630STiger Yang /*
168250008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
168350008630STiger Yang  */
168450008630STiger Yang int ocfs2_open_lock(struct inode *inode)
168550008630STiger Yang {
168650008630STiger Yang 	int status = 0;
168750008630STiger Yang 	struct ocfs2_lock_res *lockres;
168850008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
168950008630STiger Yang 
169050008630STiger Yang 	BUG_ON(!inode);
169150008630STiger Yang 
169250008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
169350008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
169450008630STiger Yang 
169503efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
169650008630STiger Yang 		goto out;
169750008630STiger Yang 
169850008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
169950008630STiger Yang 
170050008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1701bd3e7610SJoel Becker 				    DLM_LOCK_PR, 0, 0);
170250008630STiger Yang 	if (status < 0)
170350008630STiger Yang 		mlog_errno(status);
170450008630STiger Yang 
170550008630STiger Yang out:
170650008630STiger Yang 	return status;
170750008630STiger Yang }
170850008630STiger Yang 
170950008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
171050008630STiger Yang {
171150008630STiger Yang 	int status = 0, level;
171250008630STiger Yang 	struct ocfs2_lock_res *lockres;
171350008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
171450008630STiger Yang 
171550008630STiger Yang 	BUG_ON(!inode);
171650008630STiger Yang 
171750008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
171850008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
171950008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
172050008630STiger Yang 
172103efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
172203efed8aSTiger Yang 		if (write)
172303efed8aSTiger Yang 			status = -EROFS;
172403efed8aSTiger Yang 		goto out;
172503efed8aSTiger Yang 	}
172603efed8aSTiger Yang 
172750008630STiger Yang 	if (ocfs2_mount_local(osb))
172850008630STiger Yang 		goto out;
172950008630STiger Yang 
173050008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
173150008630STiger Yang 
1732bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
173350008630STiger Yang 
173450008630STiger Yang 	/*
173550008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1736bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
173750008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
173850008630STiger Yang 	 * this inode is still in use.
173950008630STiger Yang 	 */
174050008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1741bd3e7610SJoel Becker 				    level, DLM_LKF_NOQUEUE, 0);
174250008630STiger Yang 
174350008630STiger Yang out:
174450008630STiger Yang 	return status;
174550008630STiger Yang }
174650008630STiger Yang 
174750008630STiger Yang /*
174850008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
174950008630STiger Yang  */
175050008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
175150008630STiger Yang {
175250008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
175350008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
175450008630STiger Yang 
175550008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
175650008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
175750008630STiger Yang 
175850008630STiger Yang 	if (ocfs2_mount_local(osb))
175950008630STiger Yang 		goto out;
176050008630STiger Yang 
176150008630STiger Yang 	if(lockres->l_ro_holders)
176250008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1763bd3e7610SJoel Becker 				     DLM_LOCK_PR);
176450008630STiger Yang 	if(lockres->l_ex_holders)
176550008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1766bd3e7610SJoel Becker 				     DLM_LOCK_EX);
176750008630STiger Yang 
176850008630STiger Yang out:
1769c1e8d35eSTao Ma 	return;
177050008630STiger Yang }
177150008630STiger Yang 
1772cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1773cf8e06f1SMark Fasheh 				     int level)
1774cf8e06f1SMark Fasheh {
1775cf8e06f1SMark Fasheh 	int ret;
1776cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1777cf8e06f1SMark Fasheh 	unsigned long flags;
1778cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1779cf8e06f1SMark Fasheh 
1780cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1781cf8e06f1SMark Fasheh 
1782cf8e06f1SMark Fasheh retry_cancel:
1783cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1784cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1785cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1786cf8e06f1SMark Fasheh 		if (ret) {
1787cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1788cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1789cf8e06f1SMark Fasheh 			if (ret < 0) {
1790cf8e06f1SMark Fasheh 				mlog_errno(ret);
1791cf8e06f1SMark Fasheh 				goto out;
1792cf8e06f1SMark Fasheh 			}
1793cf8e06f1SMark Fasheh 			goto retry_cancel;
1794cf8e06f1SMark Fasheh 		}
1795cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1796cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1797cf8e06f1SMark Fasheh 
1798cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1799cf8e06f1SMark Fasheh 		goto retry_cancel;
1800cf8e06f1SMark Fasheh 	}
1801cf8e06f1SMark Fasheh 
1802cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1803cf8e06f1SMark Fasheh 	/*
1804cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1805cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1806cf8e06f1SMark Fasheh 	 */
1807cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1808cf8e06f1SMark Fasheh 		ret = 0;
1809cf8e06f1SMark Fasheh 
1810cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1811cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1812cf8e06f1SMark Fasheh 
1813cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1814cf8e06f1SMark Fasheh 
1815cf8e06f1SMark Fasheh out:
1816cf8e06f1SMark Fasheh 	return ret;
1817cf8e06f1SMark Fasheh }
1818cf8e06f1SMark Fasheh 
1819cf8e06f1SMark Fasheh /*
1820cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1821cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1822cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
18233ad2f3fbSDaniel Mack  * separate path to the "low-level" dlm calls. In particular:
1824cf8e06f1SMark Fasheh  *
1825cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1826cf8e06f1SMark Fasheh  *   what's been requested.
1827cf8e06f1SMark Fasheh  *
1828cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1829cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1830cf8e06f1SMark Fasheh  *   the blocking list).
1831cf8e06f1SMark Fasheh  *
1832cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1833cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1834cf8e06f1SMark Fasheh  *   request.
1835cf8e06f1SMark Fasheh  *
1836cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1837cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1838cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1839cf8e06f1SMark Fasheh  */
1840cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1841cf8e06f1SMark Fasheh {
1842e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1843e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1844cf8e06f1SMark Fasheh 	unsigned long flags;
1845cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1846cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1847cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1848cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1849cf8e06f1SMark Fasheh 
1850cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1851cf8e06f1SMark Fasheh 
1852cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1853bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1854cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1855cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1856cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1857cf8e06f1SMark Fasheh 		     lockres->l_level);
1858cf8e06f1SMark Fasheh 		return -EINVAL;
1859cf8e06f1SMark Fasheh 	}
1860cf8e06f1SMark Fasheh 
1861cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1862cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1863cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1864cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1865cf8e06f1SMark Fasheh 
1866cf8e06f1SMark Fasheh 		/*
1867cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
1868cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
1869cf8e06f1SMark Fasheh 		 */
1870e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1871cf8e06f1SMark Fasheh 		if (ret < 0) {
1872cf8e06f1SMark Fasheh 			mlog_errno(ret);
1873cf8e06f1SMark Fasheh 			goto out;
1874cf8e06f1SMark Fasheh 		}
1875cf8e06f1SMark Fasheh 
1876cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1877cf8e06f1SMark Fasheh 		if (ret) {
1878cf8e06f1SMark Fasheh 			mlog_errno(ret);
1879cf8e06f1SMark Fasheh 			goto out;
1880cf8e06f1SMark Fasheh 		}
1881cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
1882cf8e06f1SMark Fasheh 	}
1883cf8e06f1SMark Fasheh 
1884cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
1885e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
1886cf8e06f1SMark Fasheh 	lockres->l_requested = level;
1887cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1888cf8e06f1SMark Fasheh 
1889cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1890cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1891cf8e06f1SMark Fasheh 
18924670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1893a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
18947431cd7eSJoel Becker 	if (ret) {
18957431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
189624ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1897cf8e06f1SMark Fasheh 			ret = -EINVAL;
1898cf8e06f1SMark Fasheh 		}
1899cf8e06f1SMark Fasheh 
1900cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1901cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
1902cf8e06f1SMark Fasheh 		goto out;
1903cf8e06f1SMark Fasheh 	}
1904cf8e06f1SMark Fasheh 
1905cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1906cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
1907cf8e06f1SMark Fasheh 		/*
1908cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
1909cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
1910cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
1911cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
1912cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
1913cf8e06f1SMark Fasheh 		 * reboot.
1914cf8e06f1SMark Fasheh 		 *
1915cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
1916cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
1917cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
1918cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
1919cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
1920af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
1921cf8e06f1SMark Fasheh 		 */
1922cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
19231693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
19241693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
19251693a5c0SDavid Teigland 		BUG_ON(!trylock);
19261693a5c0SDavid Teigland 		ret = -EAGAIN;
1927cf8e06f1SMark Fasheh 	}
1928cf8e06f1SMark Fasheh 
1929cf8e06f1SMark Fasheh out:
1930cf8e06f1SMark Fasheh 
1931cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1932cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
1933cf8e06f1SMark Fasheh 	return ret;
1934cf8e06f1SMark Fasheh }
1935cf8e06f1SMark Fasheh 
1936cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
1937cf8e06f1SMark Fasheh {
1938cf8e06f1SMark Fasheh 	int ret;
1939de551246SJoel Becker 	unsigned int gen;
1940cf8e06f1SMark Fasheh 	unsigned long flags;
1941cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1942cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1943cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1944cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1945cf8e06f1SMark Fasheh 
1946cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1947cf8e06f1SMark Fasheh 
1948cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1949cf8e06f1SMark Fasheh 		return;
1950cf8e06f1SMark Fasheh 
1951e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
1952cf8e06f1SMark Fasheh 		return;
1953cf8e06f1SMark Fasheh 
1954cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1955cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
1956cf8e06f1SMark Fasheh 	     lockres->l_action);
1957cf8e06f1SMark Fasheh 
1958cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1959cf8e06f1SMark Fasheh 	/*
1960cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
1961cf8e06f1SMark Fasheh 	 */
1962cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1963bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
1964cf8e06f1SMark Fasheh 
1965e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
1966cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1967cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1968cf8e06f1SMark Fasheh 
1969e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
1970cf8e06f1SMark Fasheh 	if (ret) {
1971cf8e06f1SMark Fasheh 		mlog_errno(ret);
1972cf8e06f1SMark Fasheh 		return;
1973cf8e06f1SMark Fasheh 	}
1974cf8e06f1SMark Fasheh 
1975cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
1976cf8e06f1SMark Fasheh 	if (ret)
1977cf8e06f1SMark Fasheh 		mlog_errno(ret);
1978cf8e06f1SMark Fasheh }
1979cf8e06f1SMark Fasheh 
198034d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1981ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
1982ccd979bdSMark Fasheh {
1983ccd979bdSMark Fasheh 	int kick = 0;
1984ccd979bdSMark Fasheh 
1985ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
198634d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
1987ccd979bdSMark Fasheh 	 * condition. */
1988ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1989ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
1990bd3e7610SJoel Becker 		case DLM_LOCK_EX:
1991ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1992ccd979bdSMark Fasheh 				kick = 1;
1993ccd979bdSMark Fasheh 			break;
1994bd3e7610SJoel Becker 		case DLM_LOCK_PR:
1995ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
1996ccd979bdSMark Fasheh 				kick = 1;
1997ccd979bdSMark Fasheh 			break;
1998ccd979bdSMark Fasheh 		default:
1999ccd979bdSMark Fasheh 			BUG();
2000ccd979bdSMark Fasheh 		}
2001ccd979bdSMark Fasheh 	}
2002ccd979bdSMark Fasheh 
2003ccd979bdSMark Fasheh 	if (kick)
200434d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
2005ccd979bdSMark Fasheh }
2006ccd979bdSMark Fasheh 
2007ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
2008ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT  (64 - 34)
2009ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
2010ccd979bdSMark Fasheh 
2011ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
2012ccd979bdSMark Fasheh  * now. */
2013ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec)
2014ccd979bdSMark Fasheh {
2015ccd979bdSMark Fasheh 	u64 res;
2016ccd979bdSMark Fasheh 	u64 sec = spec->tv_sec;
2017ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
2018ccd979bdSMark Fasheh 
2019ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2020ccd979bdSMark Fasheh 
2021ccd979bdSMark Fasheh 	return res;
2022ccd979bdSMark Fasheh }
2023ccd979bdSMark Fasheh 
2024ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2025ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2026e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
2027ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2028ccd979bdSMark Fasheh {
2029ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2030e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2031ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2032ccd979bdSMark Fasheh 
2033a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2034ccd979bdSMark Fasheh 
203524c19ef4SMark Fasheh 	/*
203624c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
203724c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
203824c19ef4SMark Fasheh 	 * status.
203924c19ef4SMark Fasheh 	 */
204024c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
204124c19ef4SMark Fasheh 		lvb->lvb_version = 0;
204224c19ef4SMark Fasheh 		goto out;
204324c19ef4SMark Fasheh 	}
204424c19ef4SMark Fasheh 
20454d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2046ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2047ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
204803ab30f7SEric W. Biederman 	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
204903ab30f7SEric W. Biederman 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
2050ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2051ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2052ccd979bdSMark Fasheh 	lvb->lvb_iatime_packed  =
2053ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2054ccd979bdSMark Fasheh 	lvb->lvb_ictime_packed =
2055ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2056ccd979bdSMark Fasheh 	lvb->lvb_imtime_packed =
2057ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2058ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
205915b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2060f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2061ccd979bdSMark Fasheh 
206224c19ef4SMark Fasheh out:
2063ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2064ccd979bdSMark Fasheh }
2065ccd979bdSMark Fasheh 
2066ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec,
2067ccd979bdSMark Fasheh 				  u64 packed_time)
2068ccd979bdSMark Fasheh {
2069ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2070ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2071ccd979bdSMark Fasheh }
2072ccd979bdSMark Fasheh 
2073ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2074ccd979bdSMark Fasheh {
2075ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2076e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2077ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2078ccd979bdSMark Fasheh 
2079ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2080ccd979bdSMark Fasheh 
2081a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2082ccd979bdSMark Fasheh 
2083ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2084ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2085ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2086ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2087ccd979bdSMark Fasheh 
2088ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
208915b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2090ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2091ca4d147eSHerbert Poetzl 
2092ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2093ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2094ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2095ccd979bdSMark Fasheh 	else
20968110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2097ccd979bdSMark Fasheh 
209803ab30f7SEric W. Biederman 	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
209903ab30f7SEric W. Biederman 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2100ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2101bfe86848SMiklos Szeredi 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2102ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_atime,
2103ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_iatime_packed));
2104ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_mtime,
2105ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_imtime_packed));
2106ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_ctime,
2107ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_ictime_packed));
2108ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2109ccd979bdSMark Fasheh }
2110ccd979bdSMark Fasheh 
2111f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2112f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2113ccd979bdSMark Fasheh {
2114a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2115ccd979bdSMark Fasheh 
21161c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
21171c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2118f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2119ccd979bdSMark Fasheh 		return 1;
2120ccd979bdSMark Fasheh 	return 0;
2121ccd979bdSMark Fasheh }
2122ccd979bdSMark Fasheh 
2123ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2124ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2125ccd979bdSMark Fasheh  *
2126ccd979bdSMark Fasheh  *   0 means no refresh needed.
2127ccd979bdSMark Fasheh  *
2128ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2129ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
2130ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2131ccd979bdSMark Fasheh {
2132ccd979bdSMark Fasheh 	unsigned long flags;
2133ccd979bdSMark Fasheh 	int status = 0;
2134ccd979bdSMark Fasheh 
2135ccd979bdSMark Fasheh refresh_check:
2136ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2137ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2138ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2139ccd979bdSMark Fasheh 		goto bail;
2140ccd979bdSMark Fasheh 	}
2141ccd979bdSMark Fasheh 
2142ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2143ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2144ccd979bdSMark Fasheh 
2145ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2146ccd979bdSMark Fasheh 		goto refresh_check;
2147ccd979bdSMark Fasheh 	}
2148ccd979bdSMark Fasheh 
2149ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2150ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2151ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2152ccd979bdSMark Fasheh 
2153ccd979bdSMark Fasheh 	status = 1;
2154ccd979bdSMark Fasheh bail:
2155c1e8d35eSTao Ma 	mlog(0, "status %d\n", status);
2156ccd979bdSMark Fasheh 	return status;
2157ccd979bdSMark Fasheh }
2158ccd979bdSMark Fasheh 
2159ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2160ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
2161ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2162ccd979bdSMark Fasheh 						   int status)
2163ccd979bdSMark Fasheh {
2164ccd979bdSMark Fasheh 	unsigned long flags;
2165ccd979bdSMark Fasheh 
2166ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2167ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2168ccd979bdSMark Fasheh 	if (!status)
2169ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2170ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2171ccd979bdSMark Fasheh 
2172ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2173ccd979bdSMark Fasheh }
2174ccd979bdSMark Fasheh 
2175ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
2176e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2177ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2178ccd979bdSMark Fasheh {
2179ccd979bdSMark Fasheh 	int status = 0;
2180ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2181e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2182ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2183c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2184ccd979bdSMark Fasheh 
2185be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2186be9e986bSMark Fasheh 		goto bail;
2187be9e986bSMark Fasheh 
2188ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2189ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2190b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2191ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2192b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2193ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2194ccd979bdSMark Fasheh 		status = -ENOENT;
2195ccd979bdSMark Fasheh 		goto bail;
2196ccd979bdSMark Fasheh 	}
2197ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2198ccd979bdSMark Fasheh 
2199ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2200ccd979bdSMark Fasheh 		goto bail;
2201ccd979bdSMark Fasheh 
2202ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2203ccd979bdSMark Fasheh 	 * for the inode metadata. */
22048cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2205ccd979bdSMark Fasheh 
220683418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
220783418978SMark Fasheh 
2208be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2209b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2210b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
2211ccd979bdSMark Fasheh 		ocfs2_refresh_inode_from_lvb(inode);
2212ccd979bdSMark Fasheh 	} else {
2213ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2214ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2215b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2216ccd979bdSMark Fasheh 		if (status < 0) {
2217ccd979bdSMark Fasheh 			mlog_errno(status);
2218ccd979bdSMark Fasheh 			goto bail_refresh;
2219ccd979bdSMark Fasheh 		}
2220ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
2221ccd979bdSMark Fasheh 
2222ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2223b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2224b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2225ccd979bdSMark Fasheh 		 *
2226ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2227ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2228ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2229ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2230ccd979bdSMark Fasheh 		 * locks associated with it. */
2231ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2232ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2233b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2234ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2235b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2236b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2237ccd979bdSMark Fasheh 				inode->i_generation);
2238ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2239ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2240b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2241b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2242b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2243ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2244ccd979bdSMark Fasheh 
2245ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
22468ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2247ccd979bdSMark Fasheh 	}
2248ccd979bdSMark Fasheh 
2249ccd979bdSMark Fasheh 	status = 0;
2250ccd979bdSMark Fasheh bail_refresh:
2251ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2252ccd979bdSMark Fasheh bail:
2253ccd979bdSMark Fasheh 	return status;
2254ccd979bdSMark Fasheh }
2255ccd979bdSMark Fasheh 
2256ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2257ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2258ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2259ccd979bdSMark Fasheh {
2260ccd979bdSMark Fasheh 	int status;
2261ccd979bdSMark Fasheh 
2262ccd979bdSMark Fasheh 	if (passed_bh) {
2263ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2264ccd979bdSMark Fasheh 		 * returned bh. */
2265ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2266ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2267ccd979bdSMark Fasheh 
2268ccd979bdSMark Fasheh 		return 0;
2269ccd979bdSMark Fasheh 	}
2270ccd979bdSMark Fasheh 
2271b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2272ccd979bdSMark Fasheh 	if (status < 0)
2273ccd979bdSMark Fasheh 		mlog_errno(status);
2274ccd979bdSMark Fasheh 
2275ccd979bdSMark Fasheh 	return status;
2276ccd979bdSMark Fasheh }
2277ccd979bdSMark Fasheh 
2278ccd979bdSMark Fasheh /*
2279ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2280ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2281ccd979bdSMark Fasheh  */
2282cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2283ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2284ccd979bdSMark Fasheh 				 int ex,
2285cb25797dSJan Kara 				 int arg_flags,
2286cb25797dSJan Kara 				 int subclass)
2287ccd979bdSMark Fasheh {
2288bd3e7610SJoel Becker 	int status, level, acquired;
2289bd3e7610SJoel Becker 	u32 dlm_flags;
2290c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2291ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2292ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2293ccd979bdSMark Fasheh 
2294ccd979bdSMark Fasheh 	BUG_ON(!inode);
2295ccd979bdSMark Fasheh 
2296b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2297b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2298ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2299ccd979bdSMark Fasheh 
2300ccd979bdSMark Fasheh 	status = 0;
2301ccd979bdSMark Fasheh 	acquired = 0;
2302ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2303ccd979bdSMark Fasheh 	 * rodevices. */
2304ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2305ccd979bdSMark Fasheh 		if (ex)
2306ccd979bdSMark Fasheh 			status = -EROFS;
230703efed8aSTiger Yang 		goto getbh;
2308ccd979bdSMark Fasheh 	}
2309ccd979bdSMark Fasheh 
2310c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2311c271c5c2SSunil Mushran 		goto local;
2312c271c5c2SSunil Mushran 
2313ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2314553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2315ccd979bdSMark Fasheh 
2316e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2317bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2318ccd979bdSMark Fasheh 	dlm_flags = 0;
2319ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2320bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2321ccd979bdSMark Fasheh 
2322cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2323cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2324ccd979bdSMark Fasheh 	if (status < 0) {
232541003a7bSZach Brown 		if (status != -EAGAIN)
2326ccd979bdSMark Fasheh 			mlog_errno(status);
2327ccd979bdSMark Fasheh 		goto bail;
2328ccd979bdSMark Fasheh 	}
2329ccd979bdSMark Fasheh 
2330ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2331ccd979bdSMark Fasheh 	acquired = 1;
2332ccd979bdSMark Fasheh 
2333ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2334ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2335ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2336ccd979bdSMark Fasheh 	 * abort the operation. */
2337ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2338553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2339ccd979bdSMark Fasheh 
2340c271c5c2SSunil Mushran local:
234124c19ef4SMark Fasheh 	/*
234224c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
234324c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
234424c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
234524c19ef4SMark Fasheh 	 * and let the caller handle it.
234624c19ef4SMark Fasheh 	 */
234724c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
234824c19ef4SMark Fasheh 		status = 0;
2349c271c5c2SSunil Mushran 		if (lockres)
235024c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
235124c19ef4SMark Fasheh 		goto bail;
235224c19ef4SMark Fasheh 	}
235324c19ef4SMark Fasheh 
2354ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2355e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2356ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2357ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2358ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2359e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2360ccd979bdSMark Fasheh 	if (status < 0) {
2361ccd979bdSMark Fasheh 		if (status != -ENOENT)
2362ccd979bdSMark Fasheh 			mlog_errno(status);
2363ccd979bdSMark Fasheh 		goto bail;
2364ccd979bdSMark Fasheh 	}
236503efed8aSTiger Yang getbh:
2366ccd979bdSMark Fasheh 	if (ret_bh) {
2367ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2368ccd979bdSMark Fasheh 		if (status < 0) {
2369ccd979bdSMark Fasheh 			mlog_errno(status);
2370ccd979bdSMark Fasheh 			goto bail;
2371ccd979bdSMark Fasheh 		}
2372ccd979bdSMark Fasheh 	}
2373ccd979bdSMark Fasheh 
2374ccd979bdSMark Fasheh bail:
2375ccd979bdSMark Fasheh 	if (status < 0) {
2376ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2377ccd979bdSMark Fasheh 			brelse(*ret_bh);
2378ccd979bdSMark Fasheh 			*ret_bh = NULL;
2379ccd979bdSMark Fasheh 		}
2380ccd979bdSMark Fasheh 		if (acquired)
2381e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2382ccd979bdSMark Fasheh 	}
2383ccd979bdSMark Fasheh 
2384ccd979bdSMark Fasheh 	if (local_bh)
2385ccd979bdSMark Fasheh 		brelse(local_bh);
2386ccd979bdSMark Fasheh 
2387ccd979bdSMark Fasheh 	return status;
2388ccd979bdSMark Fasheh }
2389ccd979bdSMark Fasheh 
2390ccd979bdSMark Fasheh /*
239134d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
239234d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
239334d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2394ccd979bdSMark Fasheh  *
2395ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2396ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2397ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2398ccd979bdSMark Fasheh  *
239934d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
240034d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
240134d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
240234d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
240334d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
240434d024f8SMark Fasheh  * immediately retry the aop call.
2405ccd979bdSMark Fasheh  *
2406ccd979bdSMark Fasheh  * We do a blocking lock and immediate unlock before returning, though, so that
2407ccd979bdSMark Fasheh  * the lock has a great chance of being cached on this node by the time the VFS
2408ccd979bdSMark Fasheh  * calls back to retry the aop.    This has a potential to livelock as nodes
2409ccd979bdSMark Fasheh  * ping locks back and forth, but that's a risk we're willing to take to avoid
2410ccd979bdSMark Fasheh  * the lock inversion simply.
2411ccd979bdSMark Fasheh  */
2412e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2413ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2414ccd979bdSMark Fasheh 			      int ex,
2415ccd979bdSMark Fasheh 			      struct page *page)
2416ccd979bdSMark Fasheh {
2417ccd979bdSMark Fasheh 	int ret;
2418ccd979bdSMark Fasheh 
2419e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2420ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2421ccd979bdSMark Fasheh 		unlock_page(page);
2422e63aecb6SMark Fasheh 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2423e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2424ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2425ccd979bdSMark Fasheh 	}
2426ccd979bdSMark Fasheh 
2427ccd979bdSMark Fasheh 	return ret;
2428ccd979bdSMark Fasheh }
2429ccd979bdSMark Fasheh 
2430e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
24317f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
24327f1a37e3STiger Yang 			  int *level)
24337f1a37e3STiger Yang {
24347f1a37e3STiger Yang 	int ret;
24357f1a37e3STiger Yang 
2436e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock(inode, NULL, 0);
24377f1a37e3STiger Yang 	if (ret < 0) {
24387f1a37e3STiger Yang 		mlog_errno(ret);
24397f1a37e3STiger Yang 		return ret;
24407f1a37e3STiger Yang 	}
24417f1a37e3STiger Yang 
24427f1a37e3STiger Yang 	/*
24437f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
24447f1a37e3STiger Yang 	 * otherwise we just get PR lock.
24457f1a37e3STiger Yang 	 */
24467f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
24477f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
24487f1a37e3STiger Yang 
2449e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2450e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, &bh, 1);
24517f1a37e3STiger Yang 		if (ret < 0) {
24527f1a37e3STiger Yang 			mlog_errno(ret);
24537f1a37e3STiger Yang 			return ret;
24547f1a37e3STiger Yang 		}
24557f1a37e3STiger Yang 		*level = 1;
24567f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
24577f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
24587f1a37e3STiger Yang 		if (bh)
24597f1a37e3STiger Yang 			brelse(bh);
24607f1a37e3STiger Yang 	} else
24617f1a37e3STiger Yang 		*level = 0;
24627f1a37e3STiger Yang 
24637f1a37e3STiger Yang 	return ret;
24647f1a37e3STiger Yang }
24657f1a37e3STiger Yang 
2466e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2467ccd979bdSMark Fasheh 		       int ex)
2468ccd979bdSMark Fasheh {
2469bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2470e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2471c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2472ccd979bdSMark Fasheh 
2473b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2474b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2475ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2476ccd979bdSMark Fasheh 
2477c271c5c2SSunil Mushran 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2478c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
2479ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2480ccd979bdSMark Fasheh }
2481ccd979bdSMark Fasheh 
2482df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
248383273932SSrinivas Eeda {
248483273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
248583273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
248683273932SSrinivas Eeda 	int status = 0;
248783273932SSrinivas Eeda 
2488df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2489df152c24SSunil Mushran 		return -EROFS;
2490df152c24SSunil Mushran 
2491df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2492df152c24SSunil Mushran 		return 0;
2493df152c24SSunil Mushran 
249483273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2495df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
249683273932SSrinivas Eeda 	if (status < 0)
249783273932SSrinivas Eeda 		return status;
249883273932SSrinivas Eeda 
249983273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
25001c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
25011c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
250283273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
25033211949fSSunil Mushran 	else
25043211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
25053211949fSSunil Mushran 
250683273932SSrinivas Eeda 	return status;
250783273932SSrinivas Eeda }
250883273932SSrinivas Eeda 
2509df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
251083273932SSrinivas Eeda {
251183273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
251283273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
251383273932SSrinivas Eeda 
2514df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
251583273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
251683273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
251783273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
251883273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2519df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2520df152c24SSunil Mushran 	}
252183273932SSrinivas Eeda }
252283273932SSrinivas Eeda 
2523ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2524ccd979bdSMark Fasheh 		     int ex)
2525ccd979bdSMark Fasheh {
2526c271c5c2SSunil Mushran 	int status = 0;
2527bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2528ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2529ccd979bdSMark Fasheh 
2530ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2531ccd979bdSMark Fasheh 		return -EROFS;
2532ccd979bdSMark Fasheh 
2533c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2534c271c5c2SSunil Mushran 		goto bail;
2535c271c5c2SSunil Mushran 
2536ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2537ccd979bdSMark Fasheh 	if (status < 0) {
2538ccd979bdSMark Fasheh 		mlog_errno(status);
2539ccd979bdSMark Fasheh 		goto bail;
2540ccd979bdSMark Fasheh 	}
2541ccd979bdSMark Fasheh 
2542ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2543ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2544ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2545ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2546ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2547ccd979bdSMark Fasheh 	if (status) {
25488e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2549ccd979bdSMark Fasheh 
2550ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2551ccd979bdSMark Fasheh 
25523278bb74SJunxiao Bi 		if (status < 0) {
25533278bb74SJunxiao Bi 			ocfs2_cluster_unlock(osb, lockres, level);
2554ccd979bdSMark Fasheh 			mlog_errno(status);
25553278bb74SJunxiao Bi 		}
25568ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2557ccd979bdSMark Fasheh 	}
2558ccd979bdSMark Fasheh bail:
2559ccd979bdSMark Fasheh 	return status;
2560ccd979bdSMark Fasheh }
2561ccd979bdSMark Fasheh 
2562ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2563ccd979bdSMark Fasheh 			int ex)
2564ccd979bdSMark Fasheh {
2565bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2566ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2567ccd979bdSMark Fasheh 
2568c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2569ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2570ccd979bdSMark Fasheh }
2571ccd979bdSMark Fasheh 
2572ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2573ccd979bdSMark Fasheh {
2574ccd979bdSMark Fasheh 	int status;
2575ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2576ccd979bdSMark Fasheh 
2577ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2578ccd979bdSMark Fasheh 		return -EROFS;
2579ccd979bdSMark Fasheh 
2580c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2581c271c5c2SSunil Mushran 		return 0;
2582c271c5c2SSunil Mushran 
2583bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2584ccd979bdSMark Fasheh 	if (status < 0)
2585ccd979bdSMark Fasheh 		mlog_errno(status);
2586ccd979bdSMark Fasheh 
2587ccd979bdSMark Fasheh 	return status;
2588ccd979bdSMark Fasheh }
2589ccd979bdSMark Fasheh 
2590ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2591ccd979bdSMark Fasheh {
2592ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2593ccd979bdSMark Fasheh 
2594c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2595bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2596ccd979bdSMark Fasheh }
2597ccd979bdSMark Fasheh 
25986ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
25996ca497a8Swengang wang {
26006ca497a8Swengang wang 	int status;
26016ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26026ca497a8Swengang wang 
26036ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
26046ca497a8Swengang wang 		return -EROFS;
26056ca497a8Swengang wang 
26066ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
26076ca497a8Swengang wang 		return 0;
26086ca497a8Swengang wang 
26096ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
26106ca497a8Swengang wang 				    0, 0);
26116ca497a8Swengang wang 	if (status < 0)
26126ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
26136ca497a8Swengang wang 
26146ca497a8Swengang wang 	return status;
26156ca497a8Swengang wang }
26166ca497a8Swengang wang 
26176ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
26186ca497a8Swengang wang {
26196ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26206ca497a8Swengang wang 
26216ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
26226ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
26236ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
26246ca497a8Swengang wang }
26256ca497a8Swengang wang 
2626d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2627d680efe9SMark Fasheh {
2628d680efe9SMark Fasheh 	int ret;
2629bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2630d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2631d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2632d680efe9SMark Fasheh 
2633d680efe9SMark Fasheh 	BUG_ON(!dl);
2634d680efe9SMark Fasheh 
263503efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
263603efed8aSTiger Yang 		if (ex)
2637d680efe9SMark Fasheh 			return -EROFS;
263803efed8aSTiger Yang 		return 0;
263903efed8aSTiger Yang 	}
2640d680efe9SMark Fasheh 
2641c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2642c271c5c2SSunil Mushran 		return 0;
2643c271c5c2SSunil Mushran 
2644d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2645d680efe9SMark Fasheh 	if (ret < 0)
2646d680efe9SMark Fasheh 		mlog_errno(ret);
2647d680efe9SMark Fasheh 
2648d680efe9SMark Fasheh 	return ret;
2649d680efe9SMark Fasheh }
2650d680efe9SMark Fasheh 
2651d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2652d680efe9SMark Fasheh {
2653bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2654d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2655d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2656d680efe9SMark Fasheh 
265703efed8aSTiger Yang 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
2658d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2659d680efe9SMark Fasheh }
2660d680efe9SMark Fasheh 
2661ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
2662ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
2663ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
2664ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
2665ccd979bdSMark Fasheh {
2666ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2667ccd979bdSMark Fasheh 
2668ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2669ccd979bdSMark Fasheh 
2670ccd979bdSMark Fasheh 	kfree(dlm_debug);
2671ccd979bdSMark Fasheh }
2672ccd979bdSMark Fasheh 
2673ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2674ccd979bdSMark Fasheh {
2675ccd979bdSMark Fasheh 	if (dlm_debug)
2676ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2677ccd979bdSMark Fasheh }
2678ccd979bdSMark Fasheh 
2679ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2680ccd979bdSMark Fasheh {
2681ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
2682ccd979bdSMark Fasheh }
2683ccd979bdSMark Fasheh 
2684ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2685ccd979bdSMark Fasheh {
2686ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2687ccd979bdSMark Fasheh 
2688ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2689ccd979bdSMark Fasheh 	if (!dlm_debug) {
2690ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
2691ccd979bdSMark Fasheh 		goto out;
2692ccd979bdSMark Fasheh 	}
2693ccd979bdSMark Fasheh 
2694ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
2695ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2696ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = NULL;
2697ccd979bdSMark Fasheh out:
2698ccd979bdSMark Fasheh 	return dlm_debug;
2699ccd979bdSMark Fasheh }
2700ccd979bdSMark Fasheh 
2701ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
2702ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
2703ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
2704ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
2705ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
2706ccd979bdSMark Fasheh };
2707ccd979bdSMark Fasheh 
2708ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2709ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
2710ccd979bdSMark Fasheh {
2711ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
2712ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2713ccd979bdSMark Fasheh 
2714ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
2715ccd979bdSMark Fasheh 
2716ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2717ccd979bdSMark Fasheh 		/* discover the head of the list */
2718ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2719ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
2720ccd979bdSMark Fasheh 			break;
2721ccd979bdSMark Fasheh 		}
2722ccd979bdSMark Fasheh 
2723ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
2724ccd979bdSMark Fasheh 		 * l_ops field. */
2725ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
2726ccd979bdSMark Fasheh 			ret = iter;
2727ccd979bdSMark Fasheh 			break;
2728ccd979bdSMark Fasheh 		}
2729ccd979bdSMark Fasheh 	}
2730ccd979bdSMark Fasheh 
2731ccd979bdSMark Fasheh 	return ret;
2732ccd979bdSMark Fasheh }
2733ccd979bdSMark Fasheh 
2734ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2735ccd979bdSMark Fasheh {
2736ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2737ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
2738ccd979bdSMark Fasheh 
2739ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2740ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2741ccd979bdSMark Fasheh 	if (iter) {
2742ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
2743ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
2744ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
2745ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
2746ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
2747ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
2748ccd979bdSMark Fasheh 		 * in them. */
2749ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2750ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2751ccd979bdSMark Fasheh 	}
2752ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2753ccd979bdSMark Fasheh 
2754ccd979bdSMark Fasheh 	return iter;
2755ccd979bdSMark Fasheh }
2756ccd979bdSMark Fasheh 
2757ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2758ccd979bdSMark Fasheh {
2759ccd979bdSMark Fasheh }
2760ccd979bdSMark Fasheh 
2761ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2762ccd979bdSMark Fasheh {
2763ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2764ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
2765ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2766ccd979bdSMark Fasheh 
2767ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2768ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
2769ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
2770ccd979bdSMark Fasheh 	if (iter) {
2771ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
2772ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2773ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2774ccd979bdSMark Fasheh 	}
2775ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2776ccd979bdSMark Fasheh 
2777ccd979bdSMark Fasheh 	return iter;
2778ccd979bdSMark Fasheh }
2779ccd979bdSMark Fasheh 
27805bc970e8SSunil Mushran /*
27815bc970e8SSunil Mushran  * Version is used by debugfs.ocfs2 to determine the format being used
27825bc970e8SSunil Mushran  *
27835bc970e8SSunil Mushran  * New in version 2
27845bc970e8SSunil Mushran  *	- Lock stats printed
27855bc970e8SSunil Mushran  * New in version 3
27865bc970e8SSunil Mushran  *	- Max time in lock stats is in usecs (instead of nsecs)
27875bc970e8SSunil Mushran  */
27885bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3
2789ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2790ccd979bdSMark Fasheh {
2791ccd979bdSMark Fasheh 	int i;
2792ccd979bdSMark Fasheh 	char *lvb;
2793ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
2794ccd979bdSMark Fasheh 
2795ccd979bdSMark Fasheh 	if (!lockres)
2796ccd979bdSMark Fasheh 		return -EINVAL;
2797ccd979bdSMark Fasheh 
2798d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2799d680efe9SMark Fasheh 
2800d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2801d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2802d680efe9SMark Fasheh 			   lockres->l_name,
2803d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2804d680efe9SMark Fasheh 	else
2805d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2806d680efe9SMark Fasheh 
2807d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
2808ccd979bdSMark Fasheh 		   "0x%lx\t"
2809ccd979bdSMark Fasheh 		   "0x%x\t"
2810ccd979bdSMark Fasheh 		   "0x%x\t"
2811ccd979bdSMark Fasheh 		   "%u\t"
2812ccd979bdSMark Fasheh 		   "%u\t"
2813ccd979bdSMark Fasheh 		   "%d\t"
2814ccd979bdSMark Fasheh 		   "%d\t",
2815ccd979bdSMark Fasheh 		   lockres->l_level,
2816ccd979bdSMark Fasheh 		   lockres->l_flags,
2817ccd979bdSMark Fasheh 		   lockres->l_action,
2818ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
2819ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
2820ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
2821ccd979bdSMark Fasheh 		   lockres->l_requested,
2822ccd979bdSMark Fasheh 		   lockres->l_blocking);
2823ccd979bdSMark Fasheh 
2824ccd979bdSMark Fasheh 	/* Dump the raw LVB */
28258f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2826ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
2827ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
2828ccd979bdSMark Fasheh 
28298ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
28305bc970e8SSunil Mushran # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
28315bc970e8SSunil Mushran # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
28325bc970e8SSunil Mushran # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
28335bc970e8SSunil Mushran # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
28345bc970e8SSunil Mushran # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
28355bc970e8SSunil Mushran # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
28365bc970e8SSunil Mushran # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
28375bc970e8SSunil Mushran # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
28385bc970e8SSunil Mushran # define lock_refresh(_l)		((_l)->l_lock_refresh)
28398ddb7b00SSunil Mushran #else
28405bc970e8SSunil Mushran # define lock_num_prmode(_l)		(0)
28415bc970e8SSunil Mushran # define lock_num_exmode(_l)		(0)
28428ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
28438ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
2844dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
2845dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
28468ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
28478ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
28488ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
28498ddb7b00SSunil Mushran #endif
28508ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
28515bc970e8SSunil Mushran 	seq_printf(m, "%u\t"
28525bc970e8SSunil Mushran 		   "%u\t"
28538ddb7b00SSunil Mushran 		   "%u\t"
28548ddb7b00SSunil Mushran 		   "%u\t"
28558ddb7b00SSunil Mushran 		   "%llu\t"
28568ddb7b00SSunil Mushran 		   "%llu\t"
28578ddb7b00SSunil Mushran 		   "%u\t"
28588ddb7b00SSunil Mushran 		   "%u\t"
28598ddb7b00SSunil Mushran 		   "%u\t",
28608ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
28618ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
28628ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
28638ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
28648ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
28658ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
28668ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
28678ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
28688ddb7b00SSunil Mushran 		   lock_refresh(lockres));
28698ddb7b00SSunil Mushran 
2870ccd979bdSMark Fasheh 	/* End the line */
2871ccd979bdSMark Fasheh 	seq_printf(m, "\n");
2872ccd979bdSMark Fasheh 	return 0;
2873ccd979bdSMark Fasheh }
2874ccd979bdSMark Fasheh 
287590d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
2876ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
2877ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
2878ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
2879ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
2880ccd979bdSMark Fasheh };
2881ccd979bdSMark Fasheh 
2882ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2883ccd979bdSMark Fasheh {
288433fa1d90SJoe Perches 	struct seq_file *seq = file->private_data;
2885ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
2886ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
2887ccd979bdSMark Fasheh 
2888ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
2889ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
2890ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
2891ccd979bdSMark Fasheh }
2892ccd979bdSMark Fasheh 
2893ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2894ccd979bdSMark Fasheh {
2895ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
2896ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
2897ccd979bdSMark Fasheh 
28981848cb55SRob Jones 	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
2899ccd979bdSMark Fasheh 	if (!priv) {
29001848cb55SRob Jones 		mlog_errno(-ENOMEM);
29011848cb55SRob Jones 		return -ENOMEM;
2902ccd979bdSMark Fasheh 	}
29031848cb55SRob Jones 
29048e18e294STheodore Ts'o 	osb = inode->i_private;
2905ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2906ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
2907ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2908ccd979bdSMark Fasheh 
2909ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
2910ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
2911ccd979bdSMark Fasheh 
29121848cb55SRob Jones 	return 0;
2913ccd979bdSMark Fasheh }
2914ccd979bdSMark Fasheh 
29154b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
2916ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
2917ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
2918ccd979bdSMark Fasheh 	.read =		seq_read,
2919ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
2920ccd979bdSMark Fasheh };
2921ccd979bdSMark Fasheh 
2922ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2923ccd979bdSMark Fasheh {
2924ccd979bdSMark Fasheh 	int ret = 0;
2925ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2926ccd979bdSMark Fasheh 
2927ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2928ccd979bdSMark Fasheh 							 S_IFREG|S_IRUSR,
2929ccd979bdSMark Fasheh 							 osb->osb_debug_root,
2930ccd979bdSMark Fasheh 							 osb,
2931ccd979bdSMark Fasheh 							 &ocfs2_dlm_debug_fops);
2932ccd979bdSMark Fasheh 	if (!dlm_debug->d_locking_state) {
2933ccd979bdSMark Fasheh 		ret = -EINVAL;
2934ccd979bdSMark Fasheh 		mlog(ML_ERROR,
2935ccd979bdSMark Fasheh 		     "Unable to create locking state debugfs file.\n");
2936ccd979bdSMark Fasheh 		goto out;
2937ccd979bdSMark Fasheh 	}
2938ccd979bdSMark Fasheh 
2939ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(dlm_debug);
2940ccd979bdSMark Fasheh out:
2941ccd979bdSMark Fasheh 	return ret;
2942ccd979bdSMark Fasheh }
2943ccd979bdSMark Fasheh 
2944ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2945ccd979bdSMark Fasheh {
2946ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2947ccd979bdSMark Fasheh 
2948ccd979bdSMark Fasheh 	if (dlm_debug) {
2949ccd979bdSMark Fasheh 		debugfs_remove(dlm_debug->d_locking_state);
2950ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
2951ccd979bdSMark Fasheh 	}
2952ccd979bdSMark Fasheh }
2953ccd979bdSMark Fasheh 
2954ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
2955ccd979bdSMark Fasheh {
2956c271c5c2SSunil Mushran 	int status = 0;
29574670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
2958ccd979bdSMark Fasheh 
29590abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
29600abd6d18SMark Fasheh 		osb->node_num = 0;
2961c271c5c2SSunil Mushran 		goto local;
29620abd6d18SMark Fasheh 	}
2963c271c5c2SSunil Mushran 
2964ccd979bdSMark Fasheh 	status = ocfs2_dlm_init_debug(osb);
2965ccd979bdSMark Fasheh 	if (status < 0) {
2966ccd979bdSMark Fasheh 		mlog_errno(status);
2967ccd979bdSMark Fasheh 		goto bail;
2968ccd979bdSMark Fasheh 	}
2969ccd979bdSMark Fasheh 
297034d024f8SMark Fasheh 	/* launch downconvert thread */
297134d024f8SMark Fasheh 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
297234d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
297334d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
297434d024f8SMark Fasheh 		osb->dc_task = NULL;
2975ccd979bdSMark Fasheh 		mlog_errno(status);
2976ccd979bdSMark Fasheh 		goto bail;
2977ccd979bdSMark Fasheh 	}
2978ccd979bdSMark Fasheh 
2979ccd979bdSMark Fasheh 	/* for now, uuid == domain */
29809c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
2981c74a3bddSGoldwyn Rodrigues 				       osb->osb_cluster_name,
2982c74a3bddSGoldwyn Rodrigues 				       strlen(osb->osb_cluster_name),
29839c6c877cSJoel Becker 				       osb->uuid_str,
29844670c46dSJoel Becker 				       strlen(osb->uuid_str),
2985553b5eb9SJoel Becker 				       &lproto, ocfs2_do_node_down, osb,
29864670c46dSJoel Becker 				       &conn);
29874670c46dSJoel Becker 	if (status) {
2988ccd979bdSMark Fasheh 		mlog_errno(status);
2989ccd979bdSMark Fasheh 		goto bail;
2990ccd979bdSMark Fasheh 	}
2991ccd979bdSMark Fasheh 
29923e834151SGoldwyn Rodrigues 	status = ocfs2_cluster_this_node(conn, &osb->node_num);
29930abd6d18SMark Fasheh 	if (status < 0) {
29940abd6d18SMark Fasheh 		mlog_errno(status);
29950abd6d18SMark Fasheh 		mlog(ML_ERROR,
29960abd6d18SMark Fasheh 		     "could not find this host's node number\n");
2997286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
29980abd6d18SMark Fasheh 		goto bail;
29990abd6d18SMark Fasheh 	}
30000abd6d18SMark Fasheh 
3001c271c5c2SSunil Mushran local:
3002ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3003ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
30046ca497a8Swengang wang 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
300583273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3006ccd979bdSMark Fasheh 
30074670c46dSJoel Becker 	osb->cconn = conn;
3008ccd979bdSMark Fasheh 
3009ccd979bdSMark Fasheh 	status = 0;
3010ccd979bdSMark Fasheh bail:
3011ccd979bdSMark Fasheh 	if (status < 0) {
3012ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
301334d024f8SMark Fasheh 		if (osb->dc_task)
301434d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3015ccd979bdSMark Fasheh 	}
3016ccd979bdSMark Fasheh 
3017ccd979bdSMark Fasheh 	return status;
3018ccd979bdSMark Fasheh }
3019ccd979bdSMark Fasheh 
3020286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3021286eaa95SJoel Becker 			int hangup_pending)
3022ccd979bdSMark Fasheh {
3023ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3024ccd979bdSMark Fasheh 
30254670c46dSJoel Becker 	/*
30264670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
30274670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
30284670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
30294670c46dSJoel Becker 	 */
30304670c46dSJoel Becker 
303134d024f8SMark Fasheh 	if (osb->dc_task) {
303234d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
303334d024f8SMark Fasheh 		osb->dc_task = NULL;
3034ccd979bdSMark Fasheh 	}
3035ccd979bdSMark Fasheh 
3036ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3037ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
30386ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
303983273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3040ccd979bdSMark Fasheh 
3041286eaa95SJoel Becker 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
30424670c46dSJoel Becker 	osb->cconn = NULL;
3043ccd979bdSMark Fasheh 
3044ccd979bdSMark Fasheh 	ocfs2_dlm_shutdown_debug(osb);
3045ccd979bdSMark Fasheh }
3046ccd979bdSMark Fasheh 
3047ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
30480d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3049ccd979bdSMark Fasheh {
30507431cd7eSJoel Becker 	int ret;
3051ccd979bdSMark Fasheh 	unsigned long flags;
3052bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3053ccd979bdSMark Fasheh 
3054ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3055ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3056ccd979bdSMark Fasheh 		goto out;
3057ccd979bdSMark Fasheh 
3058b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3059bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3060b80fc012SMark Fasheh 
3061ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3062ccd979bdSMark Fasheh 
3063ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3064ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3065ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3066ccd979bdSMark Fasheh 
3067ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3068ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3069ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3070ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3071ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3072ccd979bdSMark Fasheh 
3073ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3074ccd979bdSMark Fasheh 
3075ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3076ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3077ccd979bdSMark Fasheh 		 * future? */
3078ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3079ccd979bdSMark Fasheh 
3080ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3081ccd979bdSMark Fasheh 	}
3082ccd979bdSMark Fasheh 
30830d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
30840d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3085bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
30860d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
30870d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
30880d5dc6c2SMark Fasheh 	}
3089ccd979bdSMark Fasheh 
3090ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3091ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3092ccd979bdSMark Fasheh 		     lockres->l_name);
3093ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3094ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3095ccd979bdSMark Fasheh 
3096ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3097ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3098ccd979bdSMark Fasheh 		goto out;
3099ccd979bdSMark Fasheh 	}
3100ccd979bdSMark Fasheh 
3101ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3102ccd979bdSMark Fasheh 
3103ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3104ccd979bdSMark Fasheh 	 * fire. */
3105ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3106ccd979bdSMark Fasheh 
3107ccd979bdSMark Fasheh 	/* is this necessary? */
3108ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3109ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3110ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3111ccd979bdSMark Fasheh 
3112ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3113ccd979bdSMark Fasheh 
3114a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
31157431cd7eSJoel Becker 	if (ret) {
31167431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3117ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3118cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3119ccd979bdSMark Fasheh 		BUG();
3120ccd979bdSMark Fasheh 	}
312173ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3122ccd979bdSMark Fasheh 	     lockres->l_name);
3123ccd979bdSMark Fasheh 
3124ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3125ccd979bdSMark Fasheh out:
3126ccd979bdSMark Fasheh 	return 0;
3127ccd979bdSMark Fasheh }
3128ccd979bdSMark Fasheh 
312984d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
313084d86f83SJan Kara 				       struct ocfs2_lock_res *lockres);
313184d86f83SJan Kara 
3132ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3133ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
313434d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3135ccd979bdSMark Fasheh  * it safe to drop.
3136ccd979bdSMark Fasheh  *
3137ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
313884d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
313984d86f83SJan Kara 				struct ocfs2_lock_res *lockres)
3140ccd979bdSMark Fasheh {
3141ccd979bdSMark Fasheh 	int status;
3142ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
314384d86f83SJan Kara 	unsigned long flags, flags2;
3144ccd979bdSMark Fasheh 
3145ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3146ccd979bdSMark Fasheh 
3147ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3148ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
314984d86f83SJan Kara 	if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
315084d86f83SJan Kara 		/*
315184d86f83SJan Kara 		 * We know the downconvert is queued but not in progress
315284d86f83SJan Kara 		 * because we are the downconvert thread and processing
315384d86f83SJan Kara 		 * different lock. So we can just remove the lock from the
315484d86f83SJan Kara 		 * queue. This is not only an optimization but also a way
315584d86f83SJan Kara 		 * to avoid the following deadlock:
315684d86f83SJan Kara 		 *   ocfs2_dentry_post_unlock()
315784d86f83SJan Kara 		 *     ocfs2_dentry_lock_put()
315884d86f83SJan Kara 		 *       ocfs2_drop_dentry_lock()
315984d86f83SJan Kara 		 *         iput()
316084d86f83SJan Kara 		 *           ocfs2_evict_inode()
316184d86f83SJan Kara 		 *             ocfs2_clear_inode()
316284d86f83SJan Kara 		 *               ocfs2_mark_lockres_freeing()
316384d86f83SJan Kara 		 *                 ... blocks waiting for OCFS2_LOCK_QUEUED
316484d86f83SJan Kara 		 *                 since we are the downconvert thread which
316584d86f83SJan Kara 		 *                 should clear the flag.
316684d86f83SJan Kara 		 */
316784d86f83SJan Kara 		spin_unlock_irqrestore(&lockres->l_lock, flags);
316884d86f83SJan Kara 		spin_lock_irqsave(&osb->dc_task_lock, flags2);
316984d86f83SJan Kara 		list_del_init(&lockres->l_blocked_list);
317084d86f83SJan Kara 		osb->blocked_lock_count--;
317184d86f83SJan Kara 		spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
317284d86f83SJan Kara 		/*
317384d86f83SJan Kara 		 * Warn if we recurse into another post_unlock call.  Strictly
317484d86f83SJan Kara 		 * speaking it isn't a problem but we need to be careful if
317584d86f83SJan Kara 		 * that happens (stack overflow, deadlocks, ...) so warn if
317684d86f83SJan Kara 		 * ocfs2 grows a path for which this can happen.
317784d86f83SJan Kara 		 */
317884d86f83SJan Kara 		WARN_ON_ONCE(lockres->l_ops->post_unlock);
317984d86f83SJan Kara 		/* Since the lock is freeing we don't do much in the fn below */
318084d86f83SJan Kara 		ocfs2_process_blocked_lock(osb, lockres);
318184d86f83SJan Kara 		return;
318284d86f83SJan Kara 	}
3183ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3184ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3185ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3186ccd979bdSMark Fasheh 
3187ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3188ccd979bdSMark Fasheh 
3189ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3190ccd979bdSMark Fasheh 		if (status)
3191ccd979bdSMark Fasheh 			mlog_errno(status);
3192ccd979bdSMark Fasheh 
3193ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3194ccd979bdSMark Fasheh 	}
3195ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3196ccd979bdSMark Fasheh }
3197ccd979bdSMark Fasheh 
3198d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3199d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3200d680efe9SMark Fasheh {
3201d680efe9SMark Fasheh 	int ret;
3202d680efe9SMark Fasheh 
320384d86f83SJan Kara 	ocfs2_mark_lockres_freeing(osb, lockres);
32040d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3205d680efe9SMark Fasheh 	if (ret)
3206d680efe9SMark Fasheh 		mlog_errno(ret);
3207d680efe9SMark Fasheh }
3208d680efe9SMark Fasheh 
3209ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3210ccd979bdSMark Fasheh {
3211d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3212d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
32136ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
321483273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3215ccd979bdSMark Fasheh }
3216ccd979bdSMark Fasheh 
3217ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3218ccd979bdSMark Fasheh {
3219ccd979bdSMark Fasheh 	int status, err;
3220ccd979bdSMark Fasheh 
3221ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3222ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3223ccd979bdSMark Fasheh 
3224ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
322550008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3226ccd979bdSMark Fasheh 	if (err < 0)
3227ccd979bdSMark Fasheh 		mlog_errno(err);
3228ccd979bdSMark Fasheh 
3229ccd979bdSMark Fasheh 	status = err;
3230ccd979bdSMark Fasheh 
3231ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3232e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3233ccd979bdSMark Fasheh 	if (err < 0)
3234ccd979bdSMark Fasheh 		mlog_errno(err);
3235ccd979bdSMark Fasheh 	if (err < 0 && !status)
3236ccd979bdSMark Fasheh 		status = err;
3237ccd979bdSMark Fasheh 
3238ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
32390d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3240ccd979bdSMark Fasheh 	if (err < 0)
3241ccd979bdSMark Fasheh 		mlog_errno(err);
3242ccd979bdSMark Fasheh 	if (err < 0 && !status)
3243ccd979bdSMark Fasheh 		status = err;
3244ccd979bdSMark Fasheh 
3245ccd979bdSMark Fasheh 	return status;
3246ccd979bdSMark Fasheh }
3247ccd979bdSMark Fasheh 
3248de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3249ccd979bdSMark Fasheh 					      int new_level)
3250ccd979bdSMark Fasheh {
3251ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3252ccd979bdSMark Fasheh 
3253bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3254ccd979bdSMark Fasheh 
3255ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
32569b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
32579b915181SSunil Mushran 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
32589b915181SSunil Mushran 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
32599b915181SSunil Mushran 		     new_level, list_empty(&lockres->l_blocked_list),
32609b915181SSunil Mushran 		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
32619b915181SSunil Mushran 		     lockres->l_flags, lockres->l_ro_holders,
32629b915181SSunil Mushran 		     lockres->l_ex_holders, lockres->l_action,
32639b915181SSunil Mushran 		     lockres->l_unlock_action, lockres->l_requested,
32649b915181SSunil Mushran 		     lockres->l_blocking, lockres->l_pending_gen);
3265ccd979bdSMark Fasheh 		BUG();
3266ccd979bdSMark Fasheh 	}
3267ccd979bdSMark Fasheh 
32689b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
32699b915181SSunil Mushran 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3270ccd979bdSMark Fasheh 
3271ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3272ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3273ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3274de551246SJoel Becker 	return lockres_set_pending(lockres);
3275ccd979bdSMark Fasheh }
3276ccd979bdSMark Fasheh 
3277ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3278ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3279ccd979bdSMark Fasheh 				  int new_level,
3280de551246SJoel Becker 				  int lvb,
3281de551246SJoel Becker 				  unsigned int generation)
3282ccd979bdSMark Fasheh {
3283bd3e7610SJoel Becker 	int ret;
3284bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3285ccd979bdSMark Fasheh 
32869b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
32879b915181SSunil Mushran 	     lockres->l_level, new_level);
32889b915181SSunil Mushran 
3289ccd979bdSMark Fasheh 	if (lvb)
3290bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3291ccd979bdSMark Fasheh 
32924670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3293ccd979bdSMark Fasheh 			     new_level,
3294ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3295ccd979bdSMark Fasheh 			     dlm_flags,
3296ccd979bdSMark Fasheh 			     lockres->l_name,
3297a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3298de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
32997431cd7eSJoel Becker 	if (ret) {
33007431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3301ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3302ccd979bdSMark Fasheh 		goto bail;
3303ccd979bdSMark Fasheh 	}
3304ccd979bdSMark Fasheh 
3305ccd979bdSMark Fasheh 	ret = 0;
3306ccd979bdSMark Fasheh bail:
3307ccd979bdSMark Fasheh 	return ret;
3308ccd979bdSMark Fasheh }
3309ccd979bdSMark Fasheh 
331024ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3311ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3312ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3313ccd979bdSMark Fasheh {
3314ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3315ccd979bdSMark Fasheh 
3316ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3317ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3318ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3319ccd979bdSMark Fasheh 		 * requeue this lock. */
33209b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3321ccd979bdSMark Fasheh 		return 0;
3322ccd979bdSMark Fasheh 	}
3323ccd979bdSMark Fasheh 
3324ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3325ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3326ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3327ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3328ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3329ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3330ccd979bdSMark Fasheh 
3331ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3332ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3333ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3334ccd979bdSMark Fasheh 
33359b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
33369b915181SSunil Mushran 
3337ccd979bdSMark Fasheh 	return 1;
3338ccd979bdSMark Fasheh }
3339ccd979bdSMark Fasheh 
3340ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3341ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3342ccd979bdSMark Fasheh {
3343ccd979bdSMark Fasheh 	int ret;
3344ccd979bdSMark Fasheh 
33454670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3346a796d286SJoel Becker 			       DLM_LKF_CANCEL);
33477431cd7eSJoel Becker 	if (ret) {
33487431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3349ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3350ccd979bdSMark Fasheh 	}
3351ccd979bdSMark Fasheh 
33529b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3353ccd979bdSMark Fasheh 
3354ccd979bdSMark Fasheh 	return ret;
3355ccd979bdSMark Fasheh }
3356ccd979bdSMark Fasheh 
3357b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3358ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3359cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3360ccd979bdSMark Fasheh {
3361ccd979bdSMark Fasheh 	unsigned long flags;
3362ccd979bdSMark Fasheh 	int blocking;
3363ccd979bdSMark Fasheh 	int new_level;
3364079b8057SSunil Mushran 	int level;
3365ccd979bdSMark Fasheh 	int ret = 0;
33665ef0d4eaSMark Fasheh 	int set_lvb = 0;
3367de551246SJoel Becker 	unsigned int gen;
3368ccd979bdSMark Fasheh 
3369ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3370ccd979bdSMark Fasheh 
3371ccd979bdSMark Fasheh recheck:
3372db0f6ce6SSunil Mushran 	/*
3373db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3374db0f6ce6SSunil Mushran 	 */
3375db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3376db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3377db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3378db0f6ce6SSunil Mushran 		ret = 0;
3379db0f6ce6SSunil Mushran 		goto leave;
3380db0f6ce6SSunil Mushran 	}
3381db0f6ce6SSunil Mushran 
3382ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3383de551246SJoel Becker 		/* XXX
3384de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3385de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3386de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3387de551246SJoel Becker 		 *
3388de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3389de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3390de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3391de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3392de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3393de551246SJoel Becker 		 *
3394de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3395de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3396de551246SJoel Becker 		 *
3397de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3398de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3399de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3400de551246SJoel Becker 		 * we then cancel their request.
3401de551246SJoel Becker 		 *
3402de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3403de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3404de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3405de551246SJoel Becker 		 */
34069b915181SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
34079b915181SSunil Mushran 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
34089b915181SSunil Mushran 			     lockres->l_name);
3409de551246SJoel Becker 			goto leave_requeue;
34109b915181SSunil Mushran 		}
3411de551246SJoel Becker 
3412d680efe9SMark Fasheh 		ctl->requeue = 1;
3413ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3414ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3415ccd979bdSMark Fasheh 		if (ret) {
3416ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3417ccd979bdSMark Fasheh 			if (ret < 0)
3418ccd979bdSMark Fasheh 				mlog_errno(ret);
3419ccd979bdSMark Fasheh 		}
3420ccd979bdSMark Fasheh 		goto leave;
3421ccd979bdSMark Fasheh 	}
3422ccd979bdSMark Fasheh 
3423a1912826SSunil Mushran 	/*
3424a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3425a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3426a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3427a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3428a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3429a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3430a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3431a1912826SSunil Mushran 	 */
3432a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3433a1912826SSunil Mushran 		goto leave_requeue;
3434a1912826SSunil Mushran 
34350d74125aSSunil Mushran 	/*
34360d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
34370d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
34380d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
34390d74125aSSunil Mushran 	 */
34400d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
34410d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
34429b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
34430d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
34440d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
34450d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
34460d74125aSSunil Mushran 		goto leave;
34470d74125aSSunil Mushran 	}
34480d74125aSSunil Mushran 
3449ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3450ccd979bdSMark Fasheh 	 * then requeue. */
3451bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
34529b915181SSunil Mushran 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
34539b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
34549b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders,
34559b915181SSunil Mushran 		     lockres->l_ro_holders);
3456f7fbfdd1SMark Fasheh 		goto leave_requeue;
34579b915181SSunil Mushran 	}
3458ccd979bdSMark Fasheh 
3459ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3460ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3461bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
34629b915181SSunil Mushran 	    lockres->l_ex_holders) {
34639b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
34649b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders);
3465f7fbfdd1SMark Fasheh 		goto leave_requeue;
34669b915181SSunil Mushran 	}
3467f7fbfdd1SMark Fasheh 
3468f7fbfdd1SMark Fasheh 	/*
3469f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3470f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3471f7fbfdd1SMark Fasheh 	 */
3472f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
34739b915181SSunil Mushran 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
34749b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
34759b915181SSunil Mushran 		     lockres->l_name);
3476f7fbfdd1SMark Fasheh 		goto leave_requeue;
34779b915181SSunil Mushran 	}
3478ccd979bdSMark Fasheh 
347916d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
348016d5b956SMark Fasheh 
348116d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
34829b915181SSunil Mushran 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
34839b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
34849b915181SSunil Mushran 		     lockres->l_name);
348516d5b956SMark Fasheh 		goto leave_requeue;
34869b915181SSunil Mushran 	}
348716d5b956SMark Fasheh 
3488ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3489ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3490ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3491cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3492ccd979bdSMark Fasheh 		goto downconvert;
3493ccd979bdSMark Fasheh 
3494ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3495ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3496ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3497ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3498ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3499079b8057SSunil Mushran 	level = lockres->l_level;
3500ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3501ccd979bdSMark Fasheh 
3502cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3503d680efe9SMark Fasheh 
35049b915181SSunil Mushran 	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
35059b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
35069b915181SSunil Mushran 		     lockres->l_name);
3507d680efe9SMark Fasheh 		goto leave;
35089b915181SSunil Mushran 	}
3509ccd979bdSMark Fasheh 
3510ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3511079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3512ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3513ccd979bdSMark Fasheh 		 * it just yet. */
35149b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
35159b915181SSunil Mushran 		     "Recheck\n", lockres->l_name, blocking,
35169b915181SSunil Mushran 		     lockres->l_blocking, level, lockres->l_level);
3517ccd979bdSMark Fasheh 		goto recheck;
3518ccd979bdSMark Fasheh 	}
3519ccd979bdSMark Fasheh 
3520ccd979bdSMark Fasheh downconvert:
3521d680efe9SMark Fasheh 	ctl->requeue = 0;
3522ccd979bdSMark Fasheh 
35235ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3524bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
35255ef0d4eaSMark Fasheh 			set_lvb = 1;
35265ef0d4eaSMark Fasheh 
35275ef0d4eaSMark Fasheh 		/*
35285ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
35295ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
35305ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
35315ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
35325ef0d4eaSMark Fasheh 		 */
35335ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
35345ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
35355ef0d4eaSMark Fasheh 	}
35365ef0d4eaSMark Fasheh 
3537de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3538ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3539de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3540de551246SJoel Becker 				     gen);
3541de551246SJoel Becker 
3542ccd979bdSMark Fasheh leave:
3543c1e8d35eSTao Ma 	if (ret)
3544c1e8d35eSTao Ma 		mlog_errno(ret);
3545ccd979bdSMark Fasheh 	return ret;
3546f7fbfdd1SMark Fasheh 
3547f7fbfdd1SMark Fasheh leave_requeue:
3548f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3549f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3550f7fbfdd1SMark Fasheh 
3551f7fbfdd1SMark Fasheh 	return 0;
3552ccd979bdSMark Fasheh }
3553ccd979bdSMark Fasheh 
3554d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3555ccd979bdSMark Fasheh 				     int blocking)
3556ccd979bdSMark Fasheh {
3557ccd979bdSMark Fasheh 	struct inode *inode;
3558ccd979bdSMark Fasheh 	struct address_space *mapping;
35595e98d492SGoldwyn Rodrigues 	struct ocfs2_inode_info *oi;
3560ccd979bdSMark Fasheh 
3561ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3562ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3563ccd979bdSMark Fasheh 
35645e98d492SGoldwyn Rodrigues 	if (S_ISDIR(inode->i_mode)) {
35655e98d492SGoldwyn Rodrigues 		oi = OCFS2_I(inode);
35665e98d492SGoldwyn Rodrigues 		oi->ip_dir_lock_gen++;
35675e98d492SGoldwyn Rodrigues 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
35685e98d492SGoldwyn Rodrigues 		goto out;
35695e98d492SGoldwyn Rodrigues 	}
35705e98d492SGoldwyn Rodrigues 
35711044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3572f1f54068SMark Fasheh 		goto out;
3573f1f54068SMark Fasheh 
35747f4a2a97SMark Fasheh 	/*
35757f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
35767f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
35777f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
35787f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
35797f4a2a97SMark Fasheh 	 * them up again.
35807f4a2a97SMark Fasheh 	 */
35817f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
35827f4a2a97SMark Fasheh 
3583ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3584b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3585b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3586ccd979bdSMark Fasheh 	}
3587ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3588bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3589ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3590ccd979bdSMark Fasheh 	} else {
3591ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3592ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3593ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3594ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3595ccd979bdSMark Fasheh 		 * them around in that case. */
3596ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3597ccd979bdSMark Fasheh 	}
3598ccd979bdSMark Fasheh 
3599f1f54068SMark Fasheh out:
3600d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3601ccd979bdSMark Fasheh }
3602ccd979bdSMark Fasheh 
3603a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3604a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3605810d5aebSMark Fasheh 				 int new_level)
3606810d5aebSMark Fasheh {
3607a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3608810d5aebSMark Fasheh 
3609bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3610bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3611810d5aebSMark Fasheh 
3612810d5aebSMark Fasheh 	if (checkpointed)
3613810d5aebSMark Fasheh 		return 1;
3614810d5aebSMark Fasheh 
3615a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3616810d5aebSMark Fasheh 	return 0;
3617810d5aebSMark Fasheh }
3618810d5aebSMark Fasheh 
3619a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3620a4338481STao Ma 					int new_level)
3621a4338481STao Ma {
3622a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3623a4338481STao Ma 
3624a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3625a4338481STao Ma }
3626a4338481STao Ma 
3627810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3628810d5aebSMark Fasheh {
3629810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3630810d5aebSMark Fasheh 
3631810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
3632810d5aebSMark Fasheh }
3633810d5aebSMark Fasheh 
3634d680efe9SMark Fasheh /*
3635d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
363634d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
3637d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
3638d680efe9SMark Fasheh  */
3639d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3640d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
3641d680efe9SMark Fasheh {
3642d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3643d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
3644d680efe9SMark Fasheh }
3645d680efe9SMark Fasheh 
3646d680efe9SMark Fasheh /*
3647d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
3648d680efe9SMark Fasheh  *
3649d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
3650d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
3651d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
3652d680efe9SMark Fasheh  *
3653d680efe9SMark Fasheh  * We have two potential problems
3654d680efe9SMark Fasheh  *
3655d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
3656d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
3657d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
3658d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
3659d680efe9SMark Fasheh  *    unblock processing.
3660d680efe9SMark Fasheh  *
3661d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
3662d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
3663d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
3664d680efe9SMark Fasheh  */
3665d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3666d680efe9SMark Fasheh 				       int blocking)
3667d680efe9SMark Fasheh {
3668d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3669d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3670d680efe9SMark Fasheh 	struct dentry *dentry;
3671d680efe9SMark Fasheh 	unsigned long flags;
3672d680efe9SMark Fasheh 	int extra_ref = 0;
3673d680efe9SMark Fasheh 
3674d680efe9SMark Fasheh 	/*
3675d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
3676d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
3677d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
3678d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
3679d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
3680d680efe9SMark Fasheh 	 * so there's no further work to do.
3681d680efe9SMark Fasheh 	 */
3682bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
3683d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3684d680efe9SMark Fasheh 
3685d680efe9SMark Fasheh 	/*
3686d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
3687d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
3688d680efe9SMark Fasheh 	 * needs to be freed or not.
3689d680efe9SMark Fasheh 	 */
3690d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
3691d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3692d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
3693d680efe9SMark Fasheh 
3694d680efe9SMark Fasheh 	/*
3695d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
3696d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3697d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
3698d680efe9SMark Fasheh 	 * flag.
3699d680efe9SMark Fasheh 	 */
3700d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3701d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3702d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3703d680efe9SMark Fasheh 	    && dl->dl_count) {
3704d680efe9SMark Fasheh 		dl->dl_count++;
3705d680efe9SMark Fasheh 		extra_ref = 1;
3706d680efe9SMark Fasheh 	}
3707d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3708d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3709d680efe9SMark Fasheh 
3710d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
3711d680efe9SMark Fasheh 
3712d680efe9SMark Fasheh 	/*
3713d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
3714d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
3715d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
3716d680efe9SMark Fasheh 	 */
3717d680efe9SMark Fasheh 	if (!extra_ref)
3718d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3719d680efe9SMark Fasheh 
3720d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3721d680efe9SMark Fasheh 	while (1) {
3722d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
3723d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
3724d680efe9SMark Fasheh 		if (!dentry)
3725d680efe9SMark Fasheh 			break;
3726d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
3727d680efe9SMark Fasheh 
3728a455589fSAl Viro 		mlog(0, "d_delete(%pd);\n", dentry);
3729d680efe9SMark Fasheh 
3730d680efe9SMark Fasheh 		/*
3731d680efe9SMark Fasheh 		 * The following dcache calls may do an
3732d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
3733d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
3734d680efe9SMark Fasheh 		 * because the requesting node already has an
3735d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
3736d680efe9SMark Fasheh 		 * for a downconvert.
3737d680efe9SMark Fasheh 		 */
3738d680efe9SMark Fasheh 		d_delete(dentry);
3739d680efe9SMark Fasheh 		dput(dentry);
3740d680efe9SMark Fasheh 
3741d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
3742d680efe9SMark Fasheh 	}
3743d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3744d680efe9SMark Fasheh 
3745d680efe9SMark Fasheh 	/*
3746d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
3747d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
3748d680efe9SMark Fasheh 	 */
3749d680efe9SMark Fasheh 	if (dl->dl_count == 1)
3750d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
3751d680efe9SMark Fasheh 
3752d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
3753d680efe9SMark Fasheh }
3754d680efe9SMark Fasheh 
37558dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
37568dec98edSTao Ma 					    int new_level)
37578dec98edSTao Ma {
37588dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37598dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37608dec98edSTao Ma 
37618dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
37628dec98edSTao Ma }
37638dec98edSTao Ma 
37648dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
37658dec98edSTao Ma 					 int blocking)
37668dec98edSTao Ma {
37678dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37688dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37698dec98edSTao Ma 
37708dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
37718dec98edSTao Ma 
37728dec98edSTao Ma 	return UNBLOCK_CONTINUE;
37738dec98edSTao Ma }
37748dec98edSTao Ma 
37759e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
37769e33d69fSJan Kara {
37779e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
37789e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
37799e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
37809e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
37819e33d69fSJan Kara 
3782a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
37839e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
37849e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
37859e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
37869e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
37879e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
37889e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
37899e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
37909e33d69fSJan Kara }
37919e33d69fSJan Kara 
37929e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
37939e33d69fSJan Kara {
37949e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
37959e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
37969e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
37979e33d69fSJan Kara 
37989e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
37999e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
38009e33d69fSJan Kara }
38019e33d69fSJan Kara 
38029e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
38039e33d69fSJan Kara {
38049e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
38059e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
38069e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38079e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
380885eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
38099e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
38109e33d69fSJan Kara 	int status = 0;
38119e33d69fSJan Kara 
38121c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
38131c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
38149e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
38159e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
38169e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
38179e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
38189e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
38199e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38209e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
38219e33d69fSJan Kara 	} else {
3822ae4f6ef1SJan Kara 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3823ae4f6ef1SJan Kara 						     oinfo->dqi_giblk, &bh);
382485eb8b73SJoel Becker 		if (status) {
38259e33d69fSJan Kara 			mlog_errno(status);
38269e33d69fSJan Kara 			goto bail;
38279e33d69fSJan Kara 		}
38289e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
38299e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
38309e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
38319e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
38329e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
38339e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
38349e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
38359e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38369e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
38379e33d69fSJan Kara 		brelse(bh);
38389e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
38399e33d69fSJan Kara 	}
38409e33d69fSJan Kara 
38419e33d69fSJan Kara bail:
38429e33d69fSJan Kara 	return status;
38439e33d69fSJan Kara }
38449e33d69fSJan Kara 
38459e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
38469e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
38479e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
38489e33d69fSJan Kara {
38499e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38509e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
38519e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38529e33d69fSJan Kara 	int status = 0;
38539e33d69fSJan Kara 
38549e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
38559e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
38569e33d69fSJan Kara 		if (ex)
38579e33d69fSJan Kara 			status = -EROFS;
38589e33d69fSJan Kara 		goto bail;
38599e33d69fSJan Kara 	}
38609e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
38619e33d69fSJan Kara 		goto bail;
38629e33d69fSJan Kara 
38639e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38649e33d69fSJan Kara 	if (status < 0) {
38659e33d69fSJan Kara 		mlog_errno(status);
38669e33d69fSJan Kara 		goto bail;
38679e33d69fSJan Kara 	}
38689e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
38699e33d69fSJan Kara 		goto bail;
38709e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
38719e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
38729e33d69fSJan Kara 	if (status)
38739e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
38749e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
38759e33d69fSJan Kara bail:
38769e33d69fSJan Kara 	return status;
38779e33d69fSJan Kara }
38789e33d69fSJan Kara 
38798dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
38808dec98edSTao Ma {
38818dec98edSTao Ma 	int status;
38828dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38838dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
38848dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
38858dec98edSTao Ma 
38868dec98edSTao Ma 
38878dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
38888dec98edSTao Ma 		return -EROFS;
38898dec98edSTao Ma 
38908dec98edSTao Ma 	if (ocfs2_mount_local(osb))
38918dec98edSTao Ma 		return 0;
38928dec98edSTao Ma 
38938dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38948dec98edSTao Ma 	if (status < 0)
38958dec98edSTao Ma 		mlog_errno(status);
38968dec98edSTao Ma 
38978dec98edSTao Ma 	return status;
38988dec98edSTao Ma }
38998dec98edSTao Ma 
39008dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
39018dec98edSTao Ma {
39028dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
39038dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
39048dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
39058dec98edSTao Ma 
39068dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
39078dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
39088dec98edSTao Ma }
39098dec98edSTao Ma 
391000600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3911ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
3912ccd979bdSMark Fasheh {
3913ccd979bdSMark Fasheh 	int status;
3914d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
3915ccd979bdSMark Fasheh 	unsigned long flags;
3916ccd979bdSMark Fasheh 
3917ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
3918ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
3919ccd979bdSMark Fasheh 	 * flag. */
3920ccd979bdSMark Fasheh 
3921ccd979bdSMark Fasheh 	BUG_ON(!lockres);
3922ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
3923ccd979bdSMark Fasheh 
39249b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
3925ccd979bdSMark Fasheh 
3926ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
392734d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
3928ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
3929ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
3930ccd979bdSMark Fasheh 	 * performance. */
3931ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3932ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
3933ccd979bdSMark Fasheh 		goto unqueue;
3934ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3935ccd979bdSMark Fasheh 
3936b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
3937ccd979bdSMark Fasheh 	if (status < 0)
3938ccd979bdSMark Fasheh 		mlog_errno(status);
3939ccd979bdSMark Fasheh 
3940ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3941ccd979bdSMark Fasheh unqueue:
3942d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
3943ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3944ccd979bdSMark Fasheh 	} else
3945ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
3946ccd979bdSMark Fasheh 
39479b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
3948d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
3949ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3950ccd979bdSMark Fasheh 
3951d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
3952d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
3953d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
3954ccd979bdSMark Fasheh }
3955ccd979bdSMark Fasheh 
3956ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3957ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
3958ccd979bdSMark Fasheh {
3959a75e9ccaSSrinivas Eeda 	unsigned long flags;
3960a75e9ccaSSrinivas Eeda 
3961ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3962ccd979bdSMark Fasheh 
3963ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
3964ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
3965ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
3966ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
39679b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
3968ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
3969ccd979bdSMark Fasheh 		return;
3970ccd979bdSMark Fasheh 	}
3971ccd979bdSMark Fasheh 
3972ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3973ccd979bdSMark Fasheh 
3974a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
3975ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
3976ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
3977ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
3978ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
3979ccd979bdSMark Fasheh 	}
3980a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3981ccd979bdSMark Fasheh }
398234d024f8SMark Fasheh 
398334d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
398434d024f8SMark Fasheh {
398534d024f8SMark Fasheh 	unsigned long processed;
3986a75e9ccaSSrinivas Eeda 	unsigned long flags;
398734d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
398834d024f8SMark Fasheh 
3989a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
399034d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
399134d024f8SMark Fasheh 	 * wake happens part-way through our work  */
399234d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
399334d024f8SMark Fasheh 
399434d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
399534d024f8SMark Fasheh 	while (processed) {
399634d024f8SMark Fasheh 		BUG_ON(list_empty(&osb->blocked_lock_list));
399734d024f8SMark Fasheh 
399834d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
399934d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
400034d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
400134d024f8SMark Fasheh 		osb->blocked_lock_count--;
4002a75e9ccaSSrinivas Eeda 		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
400334d024f8SMark Fasheh 
400434d024f8SMark Fasheh 		BUG_ON(!processed);
400534d024f8SMark Fasheh 		processed--;
400634d024f8SMark Fasheh 
400734d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
400834d024f8SMark Fasheh 
4009a75e9ccaSSrinivas Eeda 		spin_lock_irqsave(&osb->dc_task_lock, flags);
401034d024f8SMark Fasheh 	}
4011a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
401234d024f8SMark Fasheh }
401334d024f8SMark Fasheh 
401434d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
401534d024f8SMark Fasheh {
401634d024f8SMark Fasheh 	int empty = 0;
4017a75e9ccaSSrinivas Eeda 	unsigned long flags;
401834d024f8SMark Fasheh 
4019a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
402034d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
402134d024f8SMark Fasheh 		empty = 1;
402234d024f8SMark Fasheh 
4023a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
402434d024f8SMark Fasheh 	return empty;
402534d024f8SMark Fasheh }
402634d024f8SMark Fasheh 
402734d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
402834d024f8SMark Fasheh {
402934d024f8SMark Fasheh 	int should_wake = 0;
4030a75e9ccaSSrinivas Eeda 	unsigned long flags;
403134d024f8SMark Fasheh 
4032a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
403334d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
403434d024f8SMark Fasheh 		should_wake = 1;
4035a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
403634d024f8SMark Fasheh 
403734d024f8SMark Fasheh 	return should_wake;
403834d024f8SMark Fasheh }
403934d024f8SMark Fasheh 
4040200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
404134d024f8SMark Fasheh {
404234d024f8SMark Fasheh 	int status = 0;
404334d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
404434d024f8SMark Fasheh 
404534d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
404634d024f8SMark Fasheh 	 * work available */
404734d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
404834d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
404934d024f8SMark Fasheh 
405034d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
405134d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
405234d024f8SMark Fasheh 					 kthread_should_stop());
405334d024f8SMark Fasheh 
405434d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
405534d024f8SMark Fasheh 
405634d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
405734d024f8SMark Fasheh 	}
405834d024f8SMark Fasheh 
405934d024f8SMark Fasheh 	osb->dc_task = NULL;
406034d024f8SMark Fasheh 	return status;
406134d024f8SMark Fasheh }
406234d024f8SMark Fasheh 
406334d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
406434d024f8SMark Fasheh {
4065a75e9ccaSSrinivas Eeda 	unsigned long flags;
4066a75e9ccaSSrinivas Eeda 
4067a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
406834d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
406934d024f8SMark Fasheh 	 * the caller may have made to the voting state */
407034d024f8SMark Fasheh 	osb->dc_wake_sequence++;
4071a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
407234d024f8SMark Fasheh 	wake_up(&osb->dc_event);
407334d024f8SMark Fasheh }
4074