xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision b1b1e15e)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * dlmglue.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/types.h>
27ccd979bdSMark Fasheh #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/highmem.h>
29ccd979bdSMark Fasheh #include <linux/mm.h>
30ccd979bdSMark Fasheh #include <linux/kthread.h>
31ccd979bdSMark Fasheh #include <linux/pagemap.h>
32ccd979bdSMark Fasheh #include <linux/debugfs.h>
33ccd979bdSMark Fasheh #include <linux/seq_file.h>
348ddb7b00SSunil Mushran #include <linux/time.h>
359e33d69fSJan Kara #include <linux/quotaops.h>
36ccd979bdSMark Fasheh 
37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
38ccd979bdSMark Fasheh #include <cluster/masklog.h>
39ccd979bdSMark Fasheh 
40ccd979bdSMark Fasheh #include "ocfs2.h"
41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
42ccd979bdSMark Fasheh 
43ccd979bdSMark Fasheh #include "alloc.h"
44d680efe9SMark Fasheh #include "dcache.h"
45ccd979bdSMark Fasheh #include "dlmglue.h"
46ccd979bdSMark Fasheh #include "extent_map.h"
477f1a37e3STiger Yang #include "file.h"
48ccd979bdSMark Fasheh #include "heartbeat.h"
49ccd979bdSMark Fasheh #include "inode.h"
50ccd979bdSMark Fasheh #include "journal.h"
5124ef1815SJoel Becker #include "stackglue.h"
52ccd979bdSMark Fasheh #include "slot_map.h"
53ccd979bdSMark Fasheh #include "super.h"
54ccd979bdSMark Fasheh #include "uptodate.h"
559e33d69fSJan Kara #include "quota.h"
568dec98edSTao Ma #include "refcounttree.h"
57ccd979bdSMark Fasheh 
58ccd979bdSMark Fasheh #include "buffer_head_io.h"
59ccd979bdSMark Fasheh 
60ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
61ccd979bdSMark Fasheh 	struct list_head	mw_item;
62ccd979bdSMark Fasheh 	int			mw_status;
63ccd979bdSMark Fasheh 	struct completion	mw_complete;
64ccd979bdSMark Fasheh 	unsigned long		mw_mask;
65ccd979bdSMark Fasheh 	unsigned long		mw_goal;
668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
675bc970e8SSunil Mushran 	ktime_t			mw_lock_start;
688ddb7b00SSunil Mushran #endif
69ccd979bdSMark Fasheh };
70ccd979bdSMark Fasheh 
7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
75ccd979bdSMark Fasheh 
76d680efe9SMark Fasheh /*
77cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
78d680efe9SMark Fasheh  *
79b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
80d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
81d680efe9SMark Fasheh  *
82d680efe9SMark Fasheh  */
83d680efe9SMark Fasheh enum ocfs2_unblock_action {
84d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
85d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
86d680efe9SMark Fasheh 				      * ->post_unlock callback */
87d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
88d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
89d680efe9SMark Fasheh };
90d680efe9SMark Fasheh 
91d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
92d680efe9SMark Fasheh 	int requeue;
93d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
94d680efe9SMark Fasheh };
95d680efe9SMark Fasheh 
96cb25797dSJan Kara /* Lockdep class keys */
97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
98cb25797dSJan Kara 
99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
100810d5aebSMark Fasheh 					int new_level);
101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
102810d5aebSMark Fasheh 
103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
104cc567d89SMark Fasheh 				     int blocking);
105cc567d89SMark Fasheh 
106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
107cc567d89SMark Fasheh 				       int blocking);
108d680efe9SMark Fasheh 
109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
111ccd979bdSMark Fasheh 
1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1136cb129f5SAdrian Bunk 
1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1158dec98edSTao Ma 					    int new_level);
1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1178dec98edSTao Ma 					 int blocking);
1188dec98edSTao Ma 
1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1206cb129f5SAdrian Bunk 
1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1236cb129f5SAdrian Bunk 				     const char *function,
1246cb129f5SAdrian Bunk 				     unsigned int line,
1256cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1266cb129f5SAdrian Bunk {
127a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1286cb129f5SAdrian Bunk 
1296cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1306cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1316cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1326cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1336cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1346cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1356cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1366cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1376cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1386cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1396cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1406cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1416cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1426cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1436cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1446cb129f5SAdrian Bunk }
1456cb129f5SAdrian Bunk 
1466cb129f5SAdrian Bunk 
147f625c979SMark Fasheh /*
148f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
149f625c979SMark Fasheh  *
150f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1510d5dc6c2SMark Fasheh  *
1520d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1530d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1540d5dc6c2SMark Fasheh  *
1550d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1560d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1570d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1580d5dc6c2SMark Fasheh  * destruction time).
159f625c979SMark Fasheh  */
160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
16154a7e755SMark Fasheh 	/*
16254a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
16354a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
16454a7e755SMark Fasheh 	 */
16554a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
166b5e500e2SMark Fasheh 
1670d5dc6c2SMark Fasheh 	/*
16834d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
16934d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
17034d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
17134d024f8SMark Fasheh 	 * memory, etc.
1720d5dc6c2SMark Fasheh 	 *
1730d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1740d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1750d5dc6c2SMark Fasheh 	 */
176d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
177f625c979SMark Fasheh 
178f625c979SMark Fasheh 	/*
17916d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
18016d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
18116d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
18216d5b956SMark Fasheh 	 *
18316d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
18416d5b956SMark Fasheh 	 * incompatible holders are sufficient.
18516d5b956SMark Fasheh 	 *
18616d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
18716d5b956SMark Fasheh 	 */
18816d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
18916d5b956SMark Fasheh 
19016d5b956SMark Fasheh 	/*
1915ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1925ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1935ef0d4eaSMark Fasheh 	 *
1945ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1955ef0d4eaSMark Fasheh 	 * in the flags field.
1965ef0d4eaSMark Fasheh 	 *
1975ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
1985ef0d4eaSMark Fasheh 	 */
1995ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
2005ef0d4eaSMark Fasheh 
2015ef0d4eaSMark Fasheh 	/*
202cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
203cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
204cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
205cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
206cc567d89SMark Fasheh 	 *
207cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
208cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
209cc567d89SMark Fasheh 	 */
210cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
211cc567d89SMark Fasheh 
212cc567d89SMark Fasheh 	/*
213f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
214f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
215f625c979SMark Fasheh 	 */
216f625c979SMark Fasheh 	int flags;
217ccd979bdSMark Fasheh };
218ccd979bdSMark Fasheh 
219f625c979SMark Fasheh /*
220f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
221f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
222f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
223f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
224f625c979SMark Fasheh  * expected that the locking wrapper will clear the
225f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
226f625c979SMark Fasheh  */
227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
228f625c979SMark Fasheh 
229b80fc012SMark Fasheh /*
2305ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2315ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
232b80fc012SMark Fasheh  */
233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
234b80fc012SMark Fasheh 
235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
23654a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
237f625c979SMark Fasheh 	.flags		= 0,
238ccd979bdSMark Fasheh };
239ccd979bdSMark Fasheh 
240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
24154a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
242810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
243810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
244f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
245b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
246ccd979bdSMark Fasheh };
247ccd979bdSMark Fasheh 
248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
249f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
250ccd979bdSMark Fasheh };
251ccd979bdSMark Fasheh 
252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
253f625c979SMark Fasheh 	.flags		= 0,
254ccd979bdSMark Fasheh };
255ccd979bdSMark Fasheh 
2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2576ca497a8Swengang wang 	.flags		= 0,
2586ca497a8Swengang wang };
2596ca497a8Swengang wang 
26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
26183273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
26283273932SSrinivas Eeda };
26383273932SSrinivas Eeda 
264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
26554a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
266d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
267cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
268f625c979SMark Fasheh 	.flags		= 0,
269d680efe9SMark Fasheh };
270d680efe9SMark Fasheh 
27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
27250008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
27350008630STiger Yang 	.flags		= 0,
27450008630STiger Yang };
27550008630STiger Yang 
276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
277cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
278cf8e06f1SMark Fasheh 	.flags		= 0,
279cf8e06f1SMark Fasheh };
280cf8e06f1SMark Fasheh 
2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2829e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2839e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2849e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2859e33d69fSJan Kara };
2869e33d69fSJan Kara 
2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2888dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2898dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2908dec98edSTao Ma 	.flags		= 0,
2918dec98edSTao Ma };
2928dec98edSTao Ma 
293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
294ccd979bdSMark Fasheh {
295ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
29650008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
29750008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
298ccd979bdSMark Fasheh }
299ccd979bdSMark Fasheh 
300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
301a796d286SJoel Becker {
302a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
303a796d286SJoel Becker }
304a796d286SJoel Becker 
305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
306ccd979bdSMark Fasheh {
307ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
308ccd979bdSMark Fasheh 
309ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
310ccd979bdSMark Fasheh }
311ccd979bdSMark Fasheh 
312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
313d680efe9SMark Fasheh {
314d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
315d680efe9SMark Fasheh 
316d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
317d680efe9SMark Fasheh }
318d680efe9SMark Fasheh 
3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3209e33d69fSJan Kara {
3219e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3229e33d69fSJan Kara 
3239e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3249e33d69fSJan Kara }
3259e33d69fSJan Kara 
3268dec98edSTao Ma static inline struct ocfs2_refcount_tree *
3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3288dec98edSTao Ma {
3298dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3308dec98edSTao Ma }
3318dec98edSTao Ma 
33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
33354a7e755SMark Fasheh {
33454a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
33554a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
33654a7e755SMark Fasheh 
33754a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
33854a7e755SMark Fasheh }
33954a7e755SMark Fasheh 
340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
341ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
342ccd979bdSMark Fasheh 			     int level,
343bd3e7610SJoel Becker 			     u32 dlm_flags);
344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
345ccd979bdSMark Fasheh 						     int wanted);
346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
347ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
348cb25797dSJan Kara 				   int level, unsigned long caller_ip);
349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
350cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
351cb25797dSJan Kara 					int level)
352cb25797dSJan Kara {
353cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
354cb25797dSJan Kara }
355cb25797dSJan Kara 
356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
361ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
363ccd979bdSMark Fasheh 						int convert);
3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
365c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3667431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3677431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
368c74ff8bbSSunil Mushran 	else										\
369c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
370c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
371c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
372ccd979bdSMark Fasheh } while (0)
37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
375ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
377ccd979bdSMark Fasheh 				  struct buffer_head **bh);
378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
381cf8e06f1SMark Fasheh 					      int new_level);
382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
383cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
384cf8e06f1SMark Fasheh 				  int new_level,
385de551246SJoel Becker 				  int lvb,
386de551246SJoel Becker 				  unsigned int generation);
387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
388cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
390cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
391cf8e06f1SMark Fasheh 
392ccd979bdSMark Fasheh 
393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
394ccd979bdSMark Fasheh 				  u64 blkno,
395ccd979bdSMark Fasheh 				  u32 generation,
396ccd979bdSMark Fasheh 				  char *name)
397ccd979bdSMark Fasheh {
398ccd979bdSMark Fasheh 	int len;
399ccd979bdSMark Fasheh 
400ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
401ccd979bdSMark Fasheh 
402b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
403b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
404b0697053SMark Fasheh 		       (long long)blkno, generation);
405ccd979bdSMark Fasheh 
406ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
407ccd979bdSMark Fasheh 
408ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
409ccd979bdSMark Fasheh }
410ccd979bdSMark Fasheh 
41134af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
412ccd979bdSMark Fasheh 
413ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
414ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
415ccd979bdSMark Fasheh {
416ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
417ccd979bdSMark Fasheh 
418ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
419ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
420ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
421ccd979bdSMark Fasheh }
422ccd979bdSMark Fasheh 
423ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
424ccd979bdSMark Fasheh {
425ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
426ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
427ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
428ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
429ccd979bdSMark Fasheh }
430ccd979bdSMark Fasheh 
4318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
4328ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4338ddb7b00SSunil Mushran {
4348ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4355bc970e8SSunil Mushran 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4365bc970e8SSunil Mushran 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4378ddb7b00SSunil Mushran }
4388ddb7b00SSunil Mushran 
4398ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4408ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4418ddb7b00SSunil Mushran {
4425bc970e8SSunil Mushran 	u32 usec;
4435bc970e8SSunil Mushran 	ktime_t kt;
4445bc970e8SSunil Mushran 	struct ocfs2_lock_stats *stats;
4458ddb7b00SSunil Mushran 
4465bc970e8SSunil Mushran 	if (level == LKM_PRMODE)
4475bc970e8SSunil Mushran 		stats = &res->l_lock_prmode;
4485bc970e8SSunil Mushran 	else if (level == LKM_EXMODE)
4495bc970e8SSunil Mushran 		stats = &res->l_lock_exmode;
4505bc970e8SSunil Mushran 	else
4518ddb7b00SSunil Mushran 		return;
4528ddb7b00SSunil Mushran 
4535bc970e8SSunil Mushran 	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4545bc970e8SSunil Mushran 	usec = ktime_to_us(kt);
4555bc970e8SSunil Mushran 
4565bc970e8SSunil Mushran 	stats->ls_gets++;
4575bc970e8SSunil Mushran 	stats->ls_total += ktime_to_ns(kt);
4585bc970e8SSunil Mushran 	/* overflow */
45916865b7cSroel 	if (unlikely(stats->ls_gets == 0)) {
4605bc970e8SSunil Mushran 		stats->ls_gets++;
4615bc970e8SSunil Mushran 		stats->ls_total = ktime_to_ns(kt);
4625bc970e8SSunil Mushran 	}
4635bc970e8SSunil Mushran 
4645bc970e8SSunil Mushran 	if (stats->ls_max < usec)
4655bc970e8SSunil Mushran 		stats->ls_max = usec;
4665bc970e8SSunil Mushran 
4678ddb7b00SSunil Mushran 	if (ret)
4685bc970e8SSunil Mushran 		stats->ls_fail++;
4698ddb7b00SSunil Mushran }
4708ddb7b00SSunil Mushran 
4718ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4728ddb7b00SSunil Mushran {
4738ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4748ddb7b00SSunil Mushran }
4758ddb7b00SSunil Mushran 
4768ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4778ddb7b00SSunil Mushran {
4785bc970e8SSunil Mushran 	mw->mw_lock_start = ktime_get();
4798ddb7b00SSunil Mushran }
4808ddb7b00SSunil Mushran #else
4818ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4828ddb7b00SSunil Mushran {
4838ddb7b00SSunil Mushran }
4848ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4858ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4868ddb7b00SSunil Mushran {
4878ddb7b00SSunil Mushran }
4888ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4898ddb7b00SSunil Mushran {
4908ddb7b00SSunil Mushran }
4918ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4928ddb7b00SSunil Mushran {
4938ddb7b00SSunil Mushran }
4948ddb7b00SSunil Mushran #endif
4958ddb7b00SSunil Mushran 
496ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
497ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
498ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
499ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
500ccd979bdSMark Fasheh 				       void *priv)
501ccd979bdSMark Fasheh {
502ccd979bdSMark Fasheh 	res->l_type          = type;
503ccd979bdSMark Fasheh 	res->l_ops           = ops;
504ccd979bdSMark Fasheh 	res->l_priv          = priv;
505ccd979bdSMark Fasheh 
506bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
507bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
508bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
509ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
510ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
511ccd979bdSMark Fasheh 
512ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
513ccd979bdSMark Fasheh 
514ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5158ddb7b00SSunil Mushran 
5168ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
517cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
518cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
519cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
520cb25797dSJan Kara 				 &lockdep_keys[type], 0);
521cb25797dSJan Kara 	else
522cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
523cb25797dSJan Kara #endif
524ccd979bdSMark Fasheh }
525ccd979bdSMark Fasheh 
526ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
527ccd979bdSMark Fasheh {
528ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
529ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
530ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
531ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
532ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
533ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
534ccd979bdSMark Fasheh }
535ccd979bdSMark Fasheh 
536ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
537ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
53824c19ef4SMark Fasheh 			       unsigned int generation,
539ccd979bdSMark Fasheh 			       struct inode *inode)
540ccd979bdSMark Fasheh {
541ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
542ccd979bdSMark Fasheh 
543ccd979bdSMark Fasheh 	switch(type) {
544ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
545ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
546ccd979bdSMark Fasheh 			break;
547ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
548e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
549ccd979bdSMark Fasheh 			break;
55050008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
55150008630STiger Yang 			ops = &ocfs2_inode_open_lops;
55250008630STiger Yang 			break;
553ccd979bdSMark Fasheh 		default:
554ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
555ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
556ccd979bdSMark Fasheh 			break;
557ccd979bdSMark Fasheh 	};
558ccd979bdSMark Fasheh 
559d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
56024c19ef4SMark Fasheh 			      generation, res->l_name);
561d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
562d680efe9SMark Fasheh }
563d680efe9SMark Fasheh 
56454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
56554a7e755SMark Fasheh {
56654a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
56754a7e755SMark Fasheh 
56854a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
56954a7e755SMark Fasheh }
57054a7e755SMark Fasheh 
5719e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5729e33d69fSJan Kara {
5739e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5749e33d69fSJan Kara 
5759e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5769e33d69fSJan Kara }
5779e33d69fSJan Kara 
578cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
579cf8e06f1SMark Fasheh {
580cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
581cf8e06f1SMark Fasheh 
582cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
583cf8e06f1SMark Fasheh }
584cf8e06f1SMark Fasheh 
585d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
586d680efe9SMark Fasheh {
587d680efe9SMark Fasheh 	__be64 inode_blkno_be;
588d680efe9SMark Fasheh 
589d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
590d680efe9SMark Fasheh 	       sizeof(__be64));
591d680efe9SMark Fasheh 
592d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
593d680efe9SMark Fasheh }
594d680efe9SMark Fasheh 
59554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
59654a7e755SMark Fasheh {
59754a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
59854a7e755SMark Fasheh 
59954a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
60054a7e755SMark Fasheh }
60154a7e755SMark Fasheh 
602d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
603d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
604d680efe9SMark Fasheh {
605d680efe9SMark Fasheh 	int len;
606d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
607d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
608d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
609d680efe9SMark Fasheh 
610d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
611d680efe9SMark Fasheh 
612d680efe9SMark Fasheh 	/*
613d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
614d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
615d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
616d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
617d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
618d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
619d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
620d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
621d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
622d680efe9SMark Fasheh 	 *
623d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
624d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
625d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
626d680efe9SMark Fasheh 	 */
627d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
628d680efe9SMark Fasheh 		       "%c%016llx",
629d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
630d680efe9SMark Fasheh 		       (long long)parent);
631d680efe9SMark Fasheh 
632d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
633d680efe9SMark Fasheh 
634d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
635d680efe9SMark Fasheh 	       sizeof(__be64));
636d680efe9SMark Fasheh 
637d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
638d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
639d680efe9SMark Fasheh 				   dl);
640ccd979bdSMark Fasheh }
641ccd979bdSMark Fasheh 
642ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
643ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
644ccd979bdSMark Fasheh {
645ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
646ccd979bdSMark Fasheh 	 * once on it manually.  */
647ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
648d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
649d680efe9SMark Fasheh 			      0, res->l_name);
650ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
651ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
652ccd979bdSMark Fasheh }
653ccd979bdSMark Fasheh 
654ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
655ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
656ccd979bdSMark Fasheh {
657ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
658ccd979bdSMark Fasheh 	 * once on it manually.  */
659ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
660d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
661d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
662ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
663ccd979bdSMark Fasheh }
664ccd979bdSMark Fasheh 
6656ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6666ca497a8Swengang wang 					 struct ocfs2_super *osb)
6676ca497a8Swengang wang {
6686ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6696ca497a8Swengang wang 	 * once on it manually.  */
6706ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6716ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6726ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6736ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6746ca497a8Swengang wang }
6756ca497a8Swengang wang 
67683273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
67783273932SSrinivas Eeda 					    struct ocfs2_super *osb)
67883273932SSrinivas Eeda {
67983273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
68083273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
68183273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
68283273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
68383273932SSrinivas Eeda }
68483273932SSrinivas Eeda 
685cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
686cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
687cf8e06f1SMark Fasheh {
688cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
689cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
690cf8e06f1SMark Fasheh 
691cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
692cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
693cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
694cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
695cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
696cf8e06f1SMark Fasheh 				   fp);
697cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
698cf8e06f1SMark Fasheh }
699cf8e06f1SMark Fasheh 
7009e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7019e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7029e33d69fSJan Kara {
7039e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7049e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7059e33d69fSJan Kara 			      0, lockres->l_name);
7069e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7079e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7089e33d69fSJan Kara 				   info);
7099e33d69fSJan Kara }
7109e33d69fSJan Kara 
7118dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7128dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7138dec98edSTao Ma 				  unsigned int generation)
7148dec98edSTao Ma {
7158dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7168dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7178dec98edSTao Ma 			      generation, lockres->l_name);
7188dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7198dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7208dec98edSTao Ma }
7218dec98edSTao Ma 
722ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
723ccd979bdSMark Fasheh {
724ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
725ccd979bdSMark Fasheh 		return;
726ccd979bdSMark Fasheh 
727ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
728ccd979bdSMark Fasheh 
729ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
730ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
731ccd979bdSMark Fasheh 			res->l_name);
732ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
733ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
734ccd979bdSMark Fasheh 			res->l_name);
735ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
736ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
737ccd979bdSMark Fasheh 			res->l_name);
738ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
739ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
740ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
741ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
742ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
743ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
744ccd979bdSMark Fasheh 
745ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
746ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
747ccd979bdSMark Fasheh 
748ccd979bdSMark Fasheh 	res->l_flags = 0UL;
749ccd979bdSMark Fasheh }
750ccd979bdSMark Fasheh 
751ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
752ccd979bdSMark Fasheh 				     int level)
753ccd979bdSMark Fasheh {
754ccd979bdSMark Fasheh 	BUG_ON(!lockres);
755ccd979bdSMark Fasheh 
756ccd979bdSMark Fasheh 	switch(level) {
757bd3e7610SJoel Becker 	case DLM_LOCK_EX:
758ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
759ccd979bdSMark Fasheh 		break;
760bd3e7610SJoel Becker 	case DLM_LOCK_PR:
761ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
762ccd979bdSMark Fasheh 		break;
763ccd979bdSMark Fasheh 	default:
764ccd979bdSMark Fasheh 		BUG();
765ccd979bdSMark Fasheh 	}
766ccd979bdSMark Fasheh }
767ccd979bdSMark Fasheh 
768ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
769ccd979bdSMark Fasheh 				     int level)
770ccd979bdSMark Fasheh {
771ccd979bdSMark Fasheh 	BUG_ON(!lockres);
772ccd979bdSMark Fasheh 
773ccd979bdSMark Fasheh 	switch(level) {
774bd3e7610SJoel Becker 	case DLM_LOCK_EX:
775ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
776ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
777ccd979bdSMark Fasheh 		break;
778bd3e7610SJoel Becker 	case DLM_LOCK_PR:
779ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
780ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
781ccd979bdSMark Fasheh 		break;
782ccd979bdSMark Fasheh 	default:
783ccd979bdSMark Fasheh 		BUG();
784ccd979bdSMark Fasheh 	}
785ccd979bdSMark Fasheh }
786ccd979bdSMark Fasheh 
787ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
788ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
789ccd979bdSMark Fasheh  * lock types are added. */
790ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
791ccd979bdSMark Fasheh {
792bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
793ccd979bdSMark Fasheh 
794bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
795bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
796bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
797bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
798ccd979bdSMark Fasheh 	return new_level;
799ccd979bdSMark Fasheh }
800ccd979bdSMark Fasheh 
801ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
802ccd979bdSMark Fasheh 			      unsigned long newflags)
803ccd979bdSMark Fasheh {
804800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
805ccd979bdSMark Fasheh 
806ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
807ccd979bdSMark Fasheh 
808ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
809ccd979bdSMark Fasheh 
810800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
811ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
812ccd979bdSMark Fasheh 			continue;
813ccd979bdSMark Fasheh 
814ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
815ccd979bdSMark Fasheh 		mw->mw_status = 0;
816ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
817ccd979bdSMark Fasheh 	}
818ccd979bdSMark Fasheh }
819ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
820ccd979bdSMark Fasheh {
821ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
822ccd979bdSMark Fasheh }
823ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
824ccd979bdSMark Fasheh 				unsigned long clear)
825ccd979bdSMark Fasheh {
826ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
827ccd979bdSMark Fasheh }
828ccd979bdSMark Fasheh 
829ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
830ccd979bdSMark Fasheh {
831ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
832ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
833ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
834bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
835ccd979bdSMark Fasheh 
836ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
837ccd979bdSMark Fasheh 	if (lockres->l_level <=
838ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
839bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
840ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
841ccd979bdSMark Fasheh 	}
842ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
843ccd979bdSMark Fasheh }
844ccd979bdSMark Fasheh 
845ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
846ccd979bdSMark Fasheh {
847ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
848ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
849ccd979bdSMark Fasheh 
850ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
851ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
852ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
853ccd979bdSMark Fasheh 	 * update */
854bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
855f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
856ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
857ccd979bdSMark Fasheh 
858ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
859a1912826SSunil Mushran 
860a1912826SSunil Mushran 	/*
861a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
862a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
863a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
864d1e78238SXue jiufei 	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
865d1e78238SXue jiufei 	 * had already returned.
866a1912826SSunil Mushran 	 */
867d1e78238SXue jiufei 	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
868a1912826SSunil Mushran 		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
869d1e78238SXue jiufei 	else
870d1e78238SXue jiufei 		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
871a1912826SSunil Mushran 
872ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
873ccd979bdSMark Fasheh }
874ccd979bdSMark Fasheh 
875ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
876ccd979bdSMark Fasheh {
8773cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
878ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
879ccd979bdSMark Fasheh 
880bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
881f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
882f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
883ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
884ccd979bdSMark Fasheh 
885ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
886ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
887ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
888ccd979bdSMark Fasheh }
889ccd979bdSMark Fasheh 
890ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
891ccd979bdSMark Fasheh 				     int level)
892ccd979bdSMark Fasheh {
893ccd979bdSMark Fasheh 	int needs_downconvert = 0;
894ccd979bdSMark Fasheh 
895ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
896ccd979bdSMark Fasheh 
897ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
898ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
899ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
900ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
901ccd979bdSMark Fasheh 		 * duplicate BASTs */
902ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
903ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
904ccd979bdSMark Fasheh 			needs_downconvert = 1;
905ccd979bdSMark Fasheh 
906ccd979bdSMark Fasheh 		lockres->l_blocking = level;
907ccd979bdSMark Fasheh 	}
908ccd979bdSMark Fasheh 
9099b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9109b915181SSunil Mushran 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
9119b915181SSunil Mushran 	     needs_downconvert);
9129b915181SSunil Mushran 
9130b94a909SWengang Wang 	if (needs_downconvert)
9140b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
915c1e8d35eSTao Ma 	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
916ccd979bdSMark Fasheh 	return needs_downconvert;
917ccd979bdSMark Fasheh }
918ccd979bdSMark Fasheh 
919de551246SJoel Becker /*
920de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
921de551246SJoel Becker  *
922de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
923de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
924de551246SJoel Becker  * for more details on the race.
925de551246SJoel Becker  *
926de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
927de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
928de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
929de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
930de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
931de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
932de551246SJoel Becker  * nothing.
933de551246SJoel Becker  *
934de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
935de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
936de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
937de551246SJoel Becker  * window.
938de551246SJoel Becker  *
939de551246SJoel Becker  * [Example]
940de551246SJoel Becker  *
941de551246SJoel Becker  * ocfs2_meta_lock()
942de551246SJoel Becker  *  ocfs2_cluster_lock()
943de551246SJoel Becker  *   set BUSY
944de551246SJoel Becker  *   set PENDING
945de551246SJoel Becker  *   drop l_lock
946de551246SJoel Becker  *   ocfs2_dlm_lock()
947de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
948de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
949de551246SJoel Becker  *					  take_l_lock
950de551246SJoel Becker  *					  !BUSY
951de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
952de551246SJoel Becker  *					   set BUSY
953de551246SJoel Becker  *					   set PENDING
954de551246SJoel Becker  *					  drop l_lock
955de551246SJoel Becker  *   take l_lock
956de551246SJoel Becker  *   clear PENDING
957de551246SJoel Becker  *   drop l_lock
958de551246SJoel Becker  *			<window>
959de551246SJoel Becker  *					  ocfs2_dlm_lock()
960de551246SJoel Becker  *
961de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
962de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
963de551246SJoel Becker  *
964de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
965de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
966de551246SJoel Becker  *
967de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
968de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
969de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
970de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
971de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
972de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
973de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
974de551246SJoel Becker  * ocfs2_prepare_downconvert().
975de551246SJoel Becker  */
976de551246SJoel Becker 
977de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
978de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
979de551246SJoel Becker 				    unsigned int generation,
980de551246SJoel Becker 				    struct ocfs2_super *osb)
981de551246SJoel Becker {
982de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
983de551246SJoel Becker 
984de551246SJoel Becker 	/*
985de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
986de551246SJoel Becker 	 * will clear pending, the loser will not.
987de551246SJoel Becker 	 */
988de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
989de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
990de551246SJoel Becker 		return;
991de551246SJoel Becker 
992de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
993de551246SJoel Becker 	lockres->l_pending_gen++;
994de551246SJoel Becker 
995de551246SJoel Becker 	/*
996de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
997de551246SJoel Becker 	 * were PENDING.  Wake it up.
998de551246SJoel Becker 	 */
999de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1000de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
1001de551246SJoel Becker }
1002de551246SJoel Becker 
1003de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
1004de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1005de551246SJoel Becker 				  unsigned int generation,
1006de551246SJoel Becker 				  struct ocfs2_super *osb)
1007de551246SJoel Becker {
1008de551246SJoel Becker 	unsigned long flags;
1009de551246SJoel Becker 
1010de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1011de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1012de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1013de551246SJoel Becker }
1014de551246SJoel Becker 
1015de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1016de551246SJoel Becker {
1017de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1018de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1019de551246SJoel Becker 
1020de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1021de551246SJoel Becker 
1022de551246SJoel Becker 	return lockres->l_pending_gen;
1023de551246SJoel Becker }
1024de551246SJoel Becker 
1025c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1026ccd979bdSMark Fasheh {
1027a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1028aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1029ccd979bdSMark Fasheh 	int needs_downconvert;
1030ccd979bdSMark Fasheh 	unsigned long flags;
1031ccd979bdSMark Fasheh 
1032bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1033ccd979bdSMark Fasheh 
10349b915181SSunil Mushran 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
10359b915181SSunil Mushran 	     "type %s\n", lockres->l_name, level, lockres->l_level,
1036aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1037aa2623adSMark Fasheh 
1038cf8e06f1SMark Fasheh 	/*
1039cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1040cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1041cf8e06f1SMark Fasheh 	 */
1042cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1043cf8e06f1SMark Fasheh 		return;
1044cf8e06f1SMark Fasheh 
1045ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1046ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1047ccd979bdSMark Fasheh 	if (needs_downconvert)
1048ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1049ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1050ccd979bdSMark Fasheh 
1051d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1052d680efe9SMark Fasheh 
105334d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1054ccd979bdSMark Fasheh }
1055ccd979bdSMark Fasheh 
1056c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1057ccd979bdSMark Fasheh {
1058a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1059de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1060ccd979bdSMark Fasheh 	unsigned long flags;
10611693a5c0SDavid Teigland 	int status;
1062ccd979bdSMark Fasheh 
1063ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1064ccd979bdSMark Fasheh 
10651693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
10661693a5c0SDavid Teigland 
10671693a5c0SDavid Teigland 	if (status == -EAGAIN) {
10681693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
10691693a5c0SDavid Teigland 		goto out;
10701693a5c0SDavid Teigland 	}
10711693a5c0SDavid Teigland 
10721693a5c0SDavid Teigland 	if (status) {
10738f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
10741693a5c0SDavid Teigland 		     lockres->l_name, status);
1075ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1076ccd979bdSMark Fasheh 		return;
1077ccd979bdSMark Fasheh 	}
1078ccd979bdSMark Fasheh 
10799b915181SSunil Mushran 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
10809b915181SSunil Mushran 	     "level %d => %d\n", lockres->l_name, lockres->l_action,
10819b915181SSunil Mushran 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
10829b915181SSunil Mushran 
1083ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1084ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1085ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1086e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1087ccd979bdSMark Fasheh 		break;
1088ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1089ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1090ccd979bdSMark Fasheh 		break;
1091ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1092ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1093ccd979bdSMark Fasheh 		break;
1094ccd979bdSMark Fasheh 	default:
10959b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
10969b915181SSunil Mushran 		     "flags 0x%lx, unlock: %u\n",
1097e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1098e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1099ccd979bdSMark Fasheh 		BUG();
1100ccd979bdSMark Fasheh 	}
11011693a5c0SDavid Teigland out:
1102ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1103ccd979bdSMark Fasheh 	 * can catch it. */
1104ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1105ccd979bdSMark Fasheh 
1106de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1107de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1108de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1109de551246SJoel Becker 
1110de551246SJoel Becker 	/*
1111de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1112de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1113de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1114de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1115de551246SJoel Becker 	 */
1116de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1117de551246SJoel Becker 
1118ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1119d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1120ccd979bdSMark Fasheh }
1121ccd979bdSMark Fasheh 
1122553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1123553b5eb9SJoel Becker {
1124553b5eb9SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1125553b5eb9SJoel Becker 	unsigned long flags;
1126553b5eb9SJoel Becker 
11279b915181SSunil Mushran 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
11289b915181SSunil Mushran 	     lockres->l_name, lockres->l_unlock_action);
1129553b5eb9SJoel Becker 
1130553b5eb9SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1131553b5eb9SJoel Becker 	if (error) {
1132553b5eb9SJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1133553b5eb9SJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
1134553b5eb9SJoel Becker 		     lockres->l_unlock_action);
1135553b5eb9SJoel Becker 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1136553b5eb9SJoel Becker 		return;
1137553b5eb9SJoel Becker 	}
1138553b5eb9SJoel Becker 
1139553b5eb9SJoel Becker 	switch(lockres->l_unlock_action) {
1140553b5eb9SJoel Becker 	case OCFS2_UNLOCK_CANCEL_CONVERT:
1141553b5eb9SJoel Becker 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1142553b5eb9SJoel Becker 		lockres->l_action = OCFS2_AST_INVALID;
1143553b5eb9SJoel Becker 		/* Downconvert thread may have requeued this lock, we
1144553b5eb9SJoel Becker 		 * need to wake it. */
1145553b5eb9SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1146553b5eb9SJoel Becker 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1147553b5eb9SJoel Becker 		break;
1148553b5eb9SJoel Becker 	case OCFS2_UNLOCK_DROP_LOCK:
1149553b5eb9SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
1150553b5eb9SJoel Becker 		break;
1151553b5eb9SJoel Becker 	default:
1152553b5eb9SJoel Becker 		BUG();
1153553b5eb9SJoel Becker 	}
1154553b5eb9SJoel Becker 
1155553b5eb9SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1156553b5eb9SJoel Becker 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1157553b5eb9SJoel Becker 	wake_up(&lockres->l_event);
1158553b5eb9SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1159553b5eb9SJoel Becker }
1160553b5eb9SJoel Becker 
1161553b5eb9SJoel Becker /*
1162553b5eb9SJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
1163553b5eb9SJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
1164553b5eb9SJoel Becker  * The version number allows interoperability with systems running at
1165553b5eb9SJoel Becker  * the same major number and an equal or smaller minor number.
1166553b5eb9SJoel Becker  *
1167553b5eb9SJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
1168553b5eb9SJoel Becker  * lock, orders them differently, does different things underneath a lock),
1169553b5eb9SJoel Becker  * the version must be changed.  The protocol is negotiated when joining
1170553b5eb9SJoel Becker  * the dlm domain.  A node may join the domain if its major version is
1171553b5eb9SJoel Becker  * identical to all other nodes and its minor version is greater than
1172553b5eb9SJoel Becker  * or equal to all other nodes.  When its minor version is greater than
1173553b5eb9SJoel Becker  * the other nodes, it will run at the minor version specified by the
1174553b5eb9SJoel Becker  * other nodes.
1175553b5eb9SJoel Becker  *
1176553b5eb9SJoel Becker  * If a locking change is made that will not be compatible with older
1177553b5eb9SJoel Becker  * versions, the major number must be increased and the minor version set
1178553b5eb9SJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
1179553b5eb9SJoel Becker  * speaking to older versions, the minor version must be increased.  If a
1180553b5eb9SJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
1181553b5eb9SJoel Becker  * are just ignored by older versions), the version does not need to be
1182553b5eb9SJoel Becker  * updated.
1183553b5eb9SJoel Becker  */
1184553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = {
1185553b5eb9SJoel Becker 	.lp_max_version = {
1186553b5eb9SJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1187553b5eb9SJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1188553b5eb9SJoel Becker 	},
1189553b5eb9SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
1190553b5eb9SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
1191553b5eb9SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
1192553b5eb9SJoel Becker };
1193553b5eb9SJoel Becker 
1194553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void)
1195553b5eb9SJoel Becker {
1196553b5eb9SJoel Becker 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1197553b5eb9SJoel Becker }
1198553b5eb9SJoel Becker 
1199ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1200ccd979bdSMark Fasheh 						int convert)
1201ccd979bdSMark Fasheh {
1202ccd979bdSMark Fasheh 	unsigned long flags;
1203ccd979bdSMark Fasheh 
1204ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1205ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1206a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1207ccd979bdSMark Fasheh 	if (convert)
1208ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1209ccd979bdSMark Fasheh 	else
1210ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1211ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1212ccd979bdSMark Fasheh 
1213ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1214ccd979bdSMark Fasheh }
1215ccd979bdSMark Fasheh 
1216ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1217ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1218ccd979bdSMark Fasheh  * to do the right thing in that case.
1219ccd979bdSMark Fasheh  */
1220ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1221ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1222ccd979bdSMark Fasheh 			     int level,
1223bd3e7610SJoel Becker 			     u32 dlm_flags)
1224ccd979bdSMark Fasheh {
1225ccd979bdSMark Fasheh 	int ret = 0;
1226ccd979bdSMark Fasheh 	unsigned long flags;
1227de551246SJoel Becker 	unsigned int gen;
1228ccd979bdSMark Fasheh 
1229bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1230ccd979bdSMark Fasheh 	     dlm_flags);
1231ccd979bdSMark Fasheh 
1232ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1233ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1234ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1235ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1236ccd979bdSMark Fasheh 		goto bail;
1237ccd979bdSMark Fasheh 	}
1238ccd979bdSMark Fasheh 
1239ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1240ccd979bdSMark Fasheh 	lockres->l_requested = level;
1241ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1242de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1243ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1244ccd979bdSMark Fasheh 
12454670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1246ccd979bdSMark Fasheh 			     level,
1247ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1248ccd979bdSMark Fasheh 			     dlm_flags,
1249ccd979bdSMark Fasheh 			     lockres->l_name,
1250a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1251de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
12527431cd7eSJoel Becker 	if (ret) {
12537431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1254ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1255ccd979bdSMark Fasheh 	}
1256ccd979bdSMark Fasheh 
12577431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1258ccd979bdSMark Fasheh 
1259ccd979bdSMark Fasheh bail:
1260ccd979bdSMark Fasheh 	return ret;
1261ccd979bdSMark Fasheh }
1262ccd979bdSMark Fasheh 
1263ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1264ccd979bdSMark Fasheh 					int flag)
1265ccd979bdSMark Fasheh {
1266ccd979bdSMark Fasheh 	unsigned long flags;
1267ccd979bdSMark Fasheh 	int ret;
1268ccd979bdSMark Fasheh 
1269ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1270ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1271ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1272ccd979bdSMark Fasheh 
1273ccd979bdSMark Fasheh 	return ret;
1274ccd979bdSMark Fasheh }
1275ccd979bdSMark Fasheh 
1276ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1277ccd979bdSMark Fasheh 
1278ccd979bdSMark Fasheh {
1279ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1280ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1281ccd979bdSMark Fasheh }
1282ccd979bdSMark Fasheh 
1283ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1284ccd979bdSMark Fasheh 
1285ccd979bdSMark Fasheh {
1286ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1287ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1288ccd979bdSMark Fasheh }
1289ccd979bdSMark Fasheh 
1290ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1291ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1292ccd979bdSMark Fasheh  * level will be compatible with it. */
1293ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1294ccd979bdSMark Fasheh 						     int wanted)
1295ccd979bdSMark Fasheh {
1296ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1297ccd979bdSMark Fasheh 
1298ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1299ccd979bdSMark Fasheh }
1300ccd979bdSMark Fasheh 
1301ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1302ccd979bdSMark Fasheh {
1303ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1304ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
13058ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1306ccd979bdSMark Fasheh }
1307ccd979bdSMark Fasheh 
1308ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1309ccd979bdSMark Fasheh {
1310ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1311ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
131216735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1313ccd979bdSMark Fasheh 	return mw->mw_status;
1314ccd979bdSMark Fasheh }
1315ccd979bdSMark Fasheh 
1316ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1317ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1318ccd979bdSMark Fasheh 				    unsigned long mask,
1319ccd979bdSMark Fasheh 				    unsigned long goal)
1320ccd979bdSMark Fasheh {
1321ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1322ccd979bdSMark Fasheh 
1323ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1324ccd979bdSMark Fasheh 
1325ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1326ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1327ccd979bdSMark Fasheh 	mw->mw_goal = goal;
1328ccd979bdSMark Fasheh }
1329ccd979bdSMark Fasheh 
1330ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1331ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
1332d1e78238SXue jiufei static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1333ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1334ccd979bdSMark Fasheh {
1335ccd979bdSMark Fasheh 	int ret = 0;
1336ccd979bdSMark Fasheh 
1337d1e78238SXue jiufei 	assert_spin_locked(&lockres->l_lock);
1338ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1339ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1340ccd979bdSMark Fasheh 			ret = -EBUSY;
1341ccd979bdSMark Fasheh 
1342ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1343ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
1344ccd979bdSMark Fasheh 	}
1345d1e78238SXue jiufei 
1346d1e78238SXue jiufei 	return ret;
1347d1e78238SXue jiufei }
1348d1e78238SXue jiufei 
1349d1e78238SXue jiufei static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1350d1e78238SXue jiufei 				      struct ocfs2_mask_waiter *mw)
1351d1e78238SXue jiufei {
1352d1e78238SXue jiufei 	unsigned long flags;
1353d1e78238SXue jiufei 	int ret = 0;
1354d1e78238SXue jiufei 
1355d1e78238SXue jiufei 	spin_lock_irqsave(&lockres->l_lock, flags);
1356d1e78238SXue jiufei 	ret = __lockres_remove_mask_waiter(lockres, mw);
1357ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1358ccd979bdSMark Fasheh 
1359ccd979bdSMark Fasheh 	return ret;
1360ccd979bdSMark Fasheh 
1361ccd979bdSMark Fasheh }
1362ccd979bdSMark Fasheh 
1363cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1364cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1365cf8e06f1SMark Fasheh {
1366cf8e06f1SMark Fasheh 	int ret;
1367cf8e06f1SMark Fasheh 
1368cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1369cf8e06f1SMark Fasheh 	if (ret)
1370cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1371cf8e06f1SMark Fasheh 	else
1372cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1373cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
137416735d02SWolfram Sang 	reinit_completion(&mw->mw_complete);
1375cf8e06f1SMark Fasheh 	return ret;
1376cf8e06f1SMark Fasheh }
1377cf8e06f1SMark Fasheh 
1378cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1379ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1380ccd979bdSMark Fasheh 				int level,
1381bd3e7610SJoel Becker 				u32 lkm_flags,
1382cb25797dSJan Kara 				int arg_flags,
1383cb25797dSJan Kara 				int l_subclass,
1384cb25797dSJan Kara 				unsigned long caller_ip)
1385ccd979bdSMark Fasheh {
1386ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1387ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1388ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1389ccd979bdSMark Fasheh 	unsigned long flags;
1390de551246SJoel Becker 	unsigned int gen;
13911693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1392d1e78238SXue jiufei 	int dlm_locked = 0;
1393b1b1e15eSTariq Saeed 	int kick_dc = 0;
1394ccd979bdSMark Fasheh 
13952f2eca20Salex chen 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
13962f2eca20Salex chen 		mlog_errno(-EINVAL);
13972f2eca20Salex chen 		return -EINVAL;
13982f2eca20Salex chen 	}
13992f2eca20Salex chen 
1400ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1401ccd979bdSMark Fasheh 
1402b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1403bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1404b80fc012SMark Fasheh 
1405ccd979bdSMark Fasheh again:
1406ccd979bdSMark Fasheh 	wait = 0;
1407ccd979bdSMark Fasheh 
1408a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1409a1912826SSunil Mushran 
1410ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1411ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1412a1912826SSunil Mushran 		goto unlock;
1413ccd979bdSMark Fasheh 	}
1414ccd979bdSMark Fasheh 
1415ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1416ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1417ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1418ccd979bdSMark Fasheh 
1419ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1420ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1421ccd979bdSMark Fasheh 	 * we'll get caught below. */
1422ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1423ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1424ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1425ccd979bdSMark Fasheh 		 * them. */
1426ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1427ccd979bdSMark Fasheh 		wait = 1;
1428ccd979bdSMark Fasheh 		goto unlock;
1429ccd979bdSMark Fasheh 	}
1430ccd979bdSMark Fasheh 
1431a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1432a1912826SSunil Mushran 		/*
1433a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1434a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1435a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1436a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1437a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1438a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1439a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1440a1912826SSunil Mushran 		 * requesting PR take the lock.
1441a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1442a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1443a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1444a1912826SSunil Mushran 		 * downconvert request.
1445a1912826SSunil Mushran 		 */
1446a1912826SSunil Mushran 		if (level <= lockres->l_level)
1447a1912826SSunil Mushran 			goto update_holders;
1448a1912826SSunil Mushran 	}
1449a1912826SSunil Mushran 
1450ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1451ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1452ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1453ccd979bdSMark Fasheh 		 * another node */
1454ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1455ccd979bdSMark Fasheh 		wait = 1;
1456ccd979bdSMark Fasheh 		goto unlock;
1457ccd979bdSMark Fasheh 	}
1458ccd979bdSMark Fasheh 
1459ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
14601693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
14611693a5c0SDavid Teigland 			ret = -EAGAIN;
14621693a5c0SDavid Teigland 			goto unlock;
14631693a5c0SDavid Teigland 		}
14641693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
14651693a5c0SDavid Teigland 			noqueue_attempted = 1;
14661693a5c0SDavid Teigland 
1467ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1468ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1469ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1470ccd979bdSMark Fasheh 
1471019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1472019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1473bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1474019d1b22SMark Fasheh 		} else {
1475ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1476bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1477019d1b22SMark Fasheh 		}
1478019d1b22SMark Fasheh 
1479ccd979bdSMark Fasheh 		lockres->l_requested = level;
1480ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1481de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1482ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1483ccd979bdSMark Fasheh 
1484bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1485bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1486ccd979bdSMark Fasheh 
14879b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1488ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1489ccd979bdSMark Fasheh 
1490ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
14914670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1492ccd979bdSMark Fasheh 				     level,
1493ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1494019d1b22SMark Fasheh 				     lkm_flags,
1495ccd979bdSMark Fasheh 				     lockres->l_name,
1496a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1497de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
14987431cd7eSJoel Becker 		if (ret) {
14997431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
15007431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
150124ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
15027431cd7eSJoel Becker 						    ret, lockres);
1503ccd979bdSMark Fasheh 			}
1504ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1505ccd979bdSMark Fasheh 			goto out;
1506ccd979bdSMark Fasheh 		}
1507d1e78238SXue jiufei 		dlm_locked = 1;
1508ccd979bdSMark Fasheh 
150973ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1510ccd979bdSMark Fasheh 		     lockres->l_name);
1511ccd979bdSMark Fasheh 
1512ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1513ccd979bdSMark Fasheh 		 * complete our work regardless. */
1514ccd979bdSMark Fasheh 		catch_signals = 0;
1515ccd979bdSMark Fasheh 
1516ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1517ccd979bdSMark Fasheh 		goto again;
1518ccd979bdSMark Fasheh 	}
1519ccd979bdSMark Fasheh 
1520a1912826SSunil Mushran update_holders:
1521ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1522ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1523ccd979bdSMark Fasheh 
1524ccd979bdSMark Fasheh 	ret = 0;
1525ccd979bdSMark Fasheh unlock:
1526a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1527a1912826SSunil Mushran 
1528b1b1e15eSTariq Saeed 	/* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
1529b1b1e15eSTariq Saeed 	kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
1530b1b1e15eSTariq Saeed 
1531ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1532b1b1e15eSTariq Saeed 	if (kick_dc)
1533b1b1e15eSTariq Saeed 		ocfs2_wake_downconvert_thread(osb);
1534ccd979bdSMark Fasheh out:
1535ccd979bdSMark Fasheh 	/*
1536ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1537ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1538ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1539ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1540ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1541ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1542ccd979bdSMark Fasheh 	 */
1543ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1544ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1545ccd979bdSMark Fasheh 		wait = 0;
1546d1e78238SXue jiufei 		spin_lock_irqsave(&lockres->l_lock, flags);
1547d1e78238SXue jiufei 		if (__lockres_remove_mask_waiter(lockres, &mw)) {
1548d1e78238SXue jiufei 			if (dlm_locked)
1549d1e78238SXue jiufei 				lockres_or_flags(lockres,
1550d1e78238SXue jiufei 					OCFS2_LOCK_NONBLOCK_FINISHED);
1551d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1552ccd979bdSMark Fasheh 			ret = -EAGAIN;
1553d1e78238SXue jiufei 		} else {
1554d1e78238SXue jiufei 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1555ccd979bdSMark Fasheh 			goto again;
1556ccd979bdSMark Fasheh 		}
1557d1e78238SXue jiufei 	}
1558ccd979bdSMark Fasheh 	if (wait) {
1559ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1560ccd979bdSMark Fasheh 		if (ret == 0)
1561ccd979bdSMark Fasheh 			goto again;
1562ccd979bdSMark Fasheh 		mlog_errno(ret);
1563ccd979bdSMark Fasheh 	}
15648ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1565ccd979bdSMark Fasheh 
1566cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1567cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1568cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1569cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1570cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1571cb25797dSJan Kara 				caller_ip);
1572cb25797dSJan Kara 		else
1573cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1574cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1575cb25797dSJan Kara 				caller_ip);
1576cb25797dSJan Kara 	}
1577cb25797dSJan Kara #endif
1578ccd979bdSMark Fasheh 	return ret;
1579ccd979bdSMark Fasheh }
1580ccd979bdSMark Fasheh 
1581cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1582ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1583cb25797dSJan Kara 				     int level,
1584cb25797dSJan Kara 				     u32 lkm_flags,
1585cb25797dSJan Kara 				     int arg_flags)
1586cb25797dSJan Kara {
1587cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1588cb25797dSJan Kara 				    0, _RET_IP_);
1589cb25797dSJan Kara }
1590cb25797dSJan Kara 
1591cb25797dSJan Kara 
1592cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1593cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1594cb25797dSJan Kara 				   int level,
1595cb25797dSJan Kara 				   unsigned long caller_ip)
1596ccd979bdSMark Fasheh {
1597ccd979bdSMark Fasheh 	unsigned long flags;
1598ccd979bdSMark Fasheh 
1599ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1600ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
160134d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1602ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1603cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1604cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
1605cb25797dSJan Kara 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1606cb25797dSJan Kara #endif
1607ccd979bdSMark Fasheh }
1608ccd979bdSMark Fasheh 
1609da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1610d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
161124c19ef4SMark Fasheh 				 int ex,
161224c19ef4SMark Fasheh 				 int local)
1613ccd979bdSMark Fasheh {
1614bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1615ccd979bdSMark Fasheh 	unsigned long flags;
1616bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1617ccd979bdSMark Fasheh 
1618ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1619ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1620ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1621ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1622ccd979bdSMark Fasheh 
162324c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1624ccd979bdSMark Fasheh }
1625ccd979bdSMark Fasheh 
1626ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1627ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1628ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1629ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1630ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1631ccd979bdSMark Fasheh  * with creating a new lock resource. */
1632ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1633ccd979bdSMark Fasheh {
1634ccd979bdSMark Fasheh 	int ret;
1635d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1636ccd979bdSMark Fasheh 
1637ccd979bdSMark Fasheh 	BUG_ON(!inode);
1638ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1639ccd979bdSMark Fasheh 
1640b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1641ccd979bdSMark Fasheh 
1642ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1643ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1644ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1645ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1646ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1647ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1648ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1649ccd979bdSMark Fasheh 
165024c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1651ccd979bdSMark Fasheh 	if (ret) {
1652ccd979bdSMark Fasheh 		mlog_errno(ret);
1653ccd979bdSMark Fasheh 		goto bail;
1654ccd979bdSMark Fasheh 	}
1655ccd979bdSMark Fasheh 
165624c19ef4SMark Fasheh 	/*
1657bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
165824c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
165924c19ef4SMark Fasheh 	 */
1660e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1661ccd979bdSMark Fasheh 	if (ret) {
1662ccd979bdSMark Fasheh 		mlog_errno(ret);
1663ccd979bdSMark Fasheh 		goto bail;
1664ccd979bdSMark Fasheh 	}
1665ccd979bdSMark Fasheh 
166650008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
166750008630STiger Yang 	if (ret) {
166850008630STiger Yang 		mlog_errno(ret);
166950008630STiger Yang 		goto bail;
167050008630STiger Yang 	}
167150008630STiger Yang 
1672ccd979bdSMark Fasheh bail:
1673ccd979bdSMark Fasheh 	return ret;
1674ccd979bdSMark Fasheh }
1675ccd979bdSMark Fasheh 
1676ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1677ccd979bdSMark Fasheh {
1678ccd979bdSMark Fasheh 	int status, level;
1679ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1680c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1681ccd979bdSMark Fasheh 
1682ccd979bdSMark Fasheh 	BUG_ON(!inode);
1683ccd979bdSMark Fasheh 
1684b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1685b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1686ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1687ccd979bdSMark Fasheh 
1688c1e8d35eSTao Ma 	if (ocfs2_mount_local(osb))
1689c271c5c2SSunil Mushran 		return 0;
1690c271c5c2SSunil Mushran 
1691ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1692ccd979bdSMark Fasheh 
1693bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1694ccd979bdSMark Fasheh 
1695ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1696ccd979bdSMark Fasheh 				    0);
1697ccd979bdSMark Fasheh 	if (status < 0)
1698ccd979bdSMark Fasheh 		mlog_errno(status);
1699ccd979bdSMark Fasheh 
1700ccd979bdSMark Fasheh 	return status;
1701ccd979bdSMark Fasheh }
1702ccd979bdSMark Fasheh 
1703ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1704ccd979bdSMark Fasheh {
1705bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1706ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1707c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1708ccd979bdSMark Fasheh 
1709b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1710b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1711ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1712ccd979bdSMark Fasheh 
1713c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
1714ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1715ccd979bdSMark Fasheh }
1716ccd979bdSMark Fasheh 
171750008630STiger Yang /*
171850008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
171950008630STiger Yang  */
172050008630STiger Yang int ocfs2_open_lock(struct inode *inode)
172150008630STiger Yang {
172250008630STiger Yang 	int status = 0;
172350008630STiger Yang 	struct ocfs2_lock_res *lockres;
172450008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
172550008630STiger Yang 
172650008630STiger Yang 	BUG_ON(!inode);
172750008630STiger Yang 
172850008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
172950008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
173050008630STiger Yang 
173103efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
173250008630STiger Yang 		goto out;
173350008630STiger Yang 
173450008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
173550008630STiger Yang 
173650008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1737bd3e7610SJoel Becker 				    DLM_LOCK_PR, 0, 0);
173850008630STiger Yang 	if (status < 0)
173950008630STiger Yang 		mlog_errno(status);
174050008630STiger Yang 
174150008630STiger Yang out:
174250008630STiger Yang 	return status;
174350008630STiger Yang }
174450008630STiger Yang 
174550008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
174650008630STiger Yang {
174750008630STiger Yang 	int status = 0, level;
174850008630STiger Yang 	struct ocfs2_lock_res *lockres;
174950008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
175050008630STiger Yang 
175150008630STiger Yang 	BUG_ON(!inode);
175250008630STiger Yang 
175350008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
175450008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
175550008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
175650008630STiger Yang 
175703efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
175803efed8aSTiger Yang 		if (write)
175903efed8aSTiger Yang 			status = -EROFS;
176003efed8aSTiger Yang 		goto out;
176103efed8aSTiger Yang 	}
176203efed8aSTiger Yang 
176350008630STiger Yang 	if (ocfs2_mount_local(osb))
176450008630STiger Yang 		goto out;
176550008630STiger Yang 
176650008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
176750008630STiger Yang 
1768bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
176950008630STiger Yang 
177050008630STiger Yang 	/*
177150008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1772bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
177350008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
177450008630STiger Yang 	 * this inode is still in use.
177550008630STiger Yang 	 */
177650008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1777bd3e7610SJoel Becker 				    level, DLM_LKF_NOQUEUE, 0);
177850008630STiger Yang 
177950008630STiger Yang out:
178050008630STiger Yang 	return status;
178150008630STiger Yang }
178250008630STiger Yang 
178350008630STiger Yang /*
178450008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
178550008630STiger Yang  */
178650008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
178750008630STiger Yang {
178850008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
178950008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
179050008630STiger Yang 
179150008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
179250008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
179350008630STiger Yang 
179450008630STiger Yang 	if (ocfs2_mount_local(osb))
179550008630STiger Yang 		goto out;
179650008630STiger Yang 
179750008630STiger Yang 	if(lockres->l_ro_holders)
179850008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1799bd3e7610SJoel Becker 				     DLM_LOCK_PR);
180050008630STiger Yang 	if(lockres->l_ex_holders)
180150008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1802bd3e7610SJoel Becker 				     DLM_LOCK_EX);
180350008630STiger Yang 
180450008630STiger Yang out:
1805c1e8d35eSTao Ma 	return;
180650008630STiger Yang }
180750008630STiger Yang 
1808cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1809cf8e06f1SMark Fasheh 				     int level)
1810cf8e06f1SMark Fasheh {
1811cf8e06f1SMark Fasheh 	int ret;
1812cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1813cf8e06f1SMark Fasheh 	unsigned long flags;
1814cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1815cf8e06f1SMark Fasheh 
1816cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1817cf8e06f1SMark Fasheh 
1818cf8e06f1SMark Fasheh retry_cancel:
1819cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1820cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1821cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1822cf8e06f1SMark Fasheh 		if (ret) {
1823cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1824cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1825cf8e06f1SMark Fasheh 			if (ret < 0) {
1826cf8e06f1SMark Fasheh 				mlog_errno(ret);
1827cf8e06f1SMark Fasheh 				goto out;
1828cf8e06f1SMark Fasheh 			}
1829cf8e06f1SMark Fasheh 			goto retry_cancel;
1830cf8e06f1SMark Fasheh 		}
1831cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1832cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1833cf8e06f1SMark Fasheh 
1834cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1835cf8e06f1SMark Fasheh 		goto retry_cancel;
1836cf8e06f1SMark Fasheh 	}
1837cf8e06f1SMark Fasheh 
1838cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1839cf8e06f1SMark Fasheh 	/*
1840cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1841cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1842cf8e06f1SMark Fasheh 	 */
1843cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1844cf8e06f1SMark Fasheh 		ret = 0;
1845cf8e06f1SMark Fasheh 
1846cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1847cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1848cf8e06f1SMark Fasheh 
1849cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1850cf8e06f1SMark Fasheh 
1851cf8e06f1SMark Fasheh out:
1852cf8e06f1SMark Fasheh 	return ret;
1853cf8e06f1SMark Fasheh }
1854cf8e06f1SMark Fasheh 
1855cf8e06f1SMark Fasheh /*
1856cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1857cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1858cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
18593ad2f3fbSDaniel Mack  * separate path to the "low-level" dlm calls. In particular:
1860cf8e06f1SMark Fasheh  *
1861cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1862cf8e06f1SMark Fasheh  *   what's been requested.
1863cf8e06f1SMark Fasheh  *
1864cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1865cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1866cf8e06f1SMark Fasheh  *   the blocking list).
1867cf8e06f1SMark Fasheh  *
1868cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1869cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1870cf8e06f1SMark Fasheh  *   request.
1871cf8e06f1SMark Fasheh  *
1872cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1873cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1874cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1875cf8e06f1SMark Fasheh  */
1876cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1877cf8e06f1SMark Fasheh {
1878e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1879e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1880cf8e06f1SMark Fasheh 	unsigned long flags;
1881cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1882cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1883cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1884cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1885cf8e06f1SMark Fasheh 
1886cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1887cf8e06f1SMark Fasheh 
1888cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1889bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1890cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1891cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1892cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1893cf8e06f1SMark Fasheh 		     lockres->l_level);
1894cf8e06f1SMark Fasheh 		return -EINVAL;
1895cf8e06f1SMark Fasheh 	}
1896cf8e06f1SMark Fasheh 
1897cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1898cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1899cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1900cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1901cf8e06f1SMark Fasheh 
1902cf8e06f1SMark Fasheh 		/*
1903cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
1904cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
1905cf8e06f1SMark Fasheh 		 */
1906e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1907cf8e06f1SMark Fasheh 		if (ret < 0) {
1908cf8e06f1SMark Fasheh 			mlog_errno(ret);
1909cf8e06f1SMark Fasheh 			goto out;
1910cf8e06f1SMark Fasheh 		}
1911cf8e06f1SMark Fasheh 
1912cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1913cf8e06f1SMark Fasheh 		if (ret) {
1914cf8e06f1SMark Fasheh 			mlog_errno(ret);
1915cf8e06f1SMark Fasheh 			goto out;
1916cf8e06f1SMark Fasheh 		}
1917cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
1918cf8e06f1SMark Fasheh 	}
1919cf8e06f1SMark Fasheh 
1920cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
1921e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
1922cf8e06f1SMark Fasheh 	lockres->l_requested = level;
1923cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1924cf8e06f1SMark Fasheh 
1925cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1926cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1927cf8e06f1SMark Fasheh 
19284670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1929a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
19307431cd7eSJoel Becker 	if (ret) {
19317431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
193224ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1933cf8e06f1SMark Fasheh 			ret = -EINVAL;
1934cf8e06f1SMark Fasheh 		}
1935cf8e06f1SMark Fasheh 
1936cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1937cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
1938cf8e06f1SMark Fasheh 		goto out;
1939cf8e06f1SMark Fasheh 	}
1940cf8e06f1SMark Fasheh 
1941cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1942cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
1943cf8e06f1SMark Fasheh 		/*
1944cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
1945cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
1946cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
1947cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
1948cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
1949cf8e06f1SMark Fasheh 		 * reboot.
1950cf8e06f1SMark Fasheh 		 *
1951cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
1952cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
1953cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
1954cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
1955cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
1956af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
1957cf8e06f1SMark Fasheh 		 */
1958cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
19591693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
19601693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
19611693a5c0SDavid Teigland 		BUG_ON(!trylock);
19621693a5c0SDavid Teigland 		ret = -EAGAIN;
1963cf8e06f1SMark Fasheh 	}
1964cf8e06f1SMark Fasheh 
1965cf8e06f1SMark Fasheh out:
1966cf8e06f1SMark Fasheh 
1967cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1968cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
1969cf8e06f1SMark Fasheh 	return ret;
1970cf8e06f1SMark Fasheh }
1971cf8e06f1SMark Fasheh 
1972cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
1973cf8e06f1SMark Fasheh {
1974cf8e06f1SMark Fasheh 	int ret;
1975de551246SJoel Becker 	unsigned int gen;
1976cf8e06f1SMark Fasheh 	unsigned long flags;
1977cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1978cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1979cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1980cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1981cf8e06f1SMark Fasheh 
1982cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1983cf8e06f1SMark Fasheh 
1984cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1985cf8e06f1SMark Fasheh 		return;
1986cf8e06f1SMark Fasheh 
1987e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
1988cf8e06f1SMark Fasheh 		return;
1989cf8e06f1SMark Fasheh 
1990cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1991cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
1992cf8e06f1SMark Fasheh 	     lockres->l_action);
1993cf8e06f1SMark Fasheh 
1994cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1995cf8e06f1SMark Fasheh 	/*
1996cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
1997cf8e06f1SMark Fasheh 	 */
1998cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1999bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
2000cf8e06f1SMark Fasheh 
2001e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
2002cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
2003cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2004cf8e06f1SMark Fasheh 
2005e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
2006cf8e06f1SMark Fasheh 	if (ret) {
2007cf8e06f1SMark Fasheh 		mlog_errno(ret);
2008cf8e06f1SMark Fasheh 		return;
2009cf8e06f1SMark Fasheh 	}
2010cf8e06f1SMark Fasheh 
2011cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
2012cf8e06f1SMark Fasheh 	if (ret)
2013cf8e06f1SMark Fasheh 		mlog_errno(ret);
2014cf8e06f1SMark Fasheh }
2015cf8e06f1SMark Fasheh 
201634d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
2017ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
2018ccd979bdSMark Fasheh {
2019ccd979bdSMark Fasheh 	int kick = 0;
2020ccd979bdSMark Fasheh 
2021ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
202234d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
2023ccd979bdSMark Fasheh 	 * condition. */
2024ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
2025ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
2026bd3e7610SJoel Becker 		case DLM_LOCK_EX:
2027ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
2028ccd979bdSMark Fasheh 				kick = 1;
2029ccd979bdSMark Fasheh 			break;
2030bd3e7610SJoel Becker 		case DLM_LOCK_PR:
2031ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
2032ccd979bdSMark Fasheh 				kick = 1;
2033ccd979bdSMark Fasheh 			break;
2034ccd979bdSMark Fasheh 		default:
2035ccd979bdSMark Fasheh 			BUG();
2036ccd979bdSMark Fasheh 		}
2037ccd979bdSMark Fasheh 	}
2038ccd979bdSMark Fasheh 
2039ccd979bdSMark Fasheh 	if (kick)
204034d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
2041ccd979bdSMark Fasheh }
2042ccd979bdSMark Fasheh 
2043ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
2044ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT  (64 - 34)
2045ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
2046ccd979bdSMark Fasheh 
2047ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
2048ccd979bdSMark Fasheh  * now. */
2049ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec)
2050ccd979bdSMark Fasheh {
2051ccd979bdSMark Fasheh 	u64 res;
2052ccd979bdSMark Fasheh 	u64 sec = spec->tv_sec;
2053ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
2054ccd979bdSMark Fasheh 
2055ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2056ccd979bdSMark Fasheh 
2057ccd979bdSMark Fasheh 	return res;
2058ccd979bdSMark Fasheh }
2059ccd979bdSMark Fasheh 
2060ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2061ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2062e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
2063ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2064ccd979bdSMark Fasheh {
2065ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2066e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2067ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2068ccd979bdSMark Fasheh 
2069a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2070ccd979bdSMark Fasheh 
207124c19ef4SMark Fasheh 	/*
207224c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
207324c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
207424c19ef4SMark Fasheh 	 * status.
207524c19ef4SMark Fasheh 	 */
207624c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
207724c19ef4SMark Fasheh 		lvb->lvb_version = 0;
207824c19ef4SMark Fasheh 		goto out;
207924c19ef4SMark Fasheh 	}
208024c19ef4SMark Fasheh 
20814d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2082ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2083ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
208403ab30f7SEric W. Biederman 	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
208503ab30f7SEric W. Biederman 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
2086ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2087ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2088ccd979bdSMark Fasheh 	lvb->lvb_iatime_packed  =
2089ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2090ccd979bdSMark Fasheh 	lvb->lvb_ictime_packed =
2091ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2092ccd979bdSMark Fasheh 	lvb->lvb_imtime_packed =
2093ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2094ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
209515b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2096f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2097ccd979bdSMark Fasheh 
209824c19ef4SMark Fasheh out:
2099ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2100ccd979bdSMark Fasheh }
2101ccd979bdSMark Fasheh 
2102ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec,
2103ccd979bdSMark Fasheh 				  u64 packed_time)
2104ccd979bdSMark Fasheh {
2105ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2106ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2107ccd979bdSMark Fasheh }
2108ccd979bdSMark Fasheh 
2109ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2110ccd979bdSMark Fasheh {
2111ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2112e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2113ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2114ccd979bdSMark Fasheh 
2115ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2116ccd979bdSMark Fasheh 
2117a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2118ccd979bdSMark Fasheh 
2119ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2120ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2121ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2122ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2123ccd979bdSMark Fasheh 
2124ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
212515b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2126ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2127ca4d147eSHerbert Poetzl 
2128ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2129ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2130ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2131ccd979bdSMark Fasheh 	else
21328110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2133ccd979bdSMark Fasheh 
213403ab30f7SEric W. Biederman 	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
213503ab30f7SEric W. Biederman 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2136ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2137bfe86848SMiklos Szeredi 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2138ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_atime,
2139ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_iatime_packed));
2140ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_mtime,
2141ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_imtime_packed));
2142ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_ctime,
2143ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_ictime_packed));
2144ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2145ccd979bdSMark Fasheh }
2146ccd979bdSMark Fasheh 
2147f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2148f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2149ccd979bdSMark Fasheh {
2150a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2151ccd979bdSMark Fasheh 
21521c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
21531c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2154f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2155ccd979bdSMark Fasheh 		return 1;
2156ccd979bdSMark Fasheh 	return 0;
2157ccd979bdSMark Fasheh }
2158ccd979bdSMark Fasheh 
2159ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2160ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2161ccd979bdSMark Fasheh  *
2162ccd979bdSMark Fasheh  *   0 means no refresh needed.
2163ccd979bdSMark Fasheh  *
2164ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2165ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
2166ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2167ccd979bdSMark Fasheh {
2168ccd979bdSMark Fasheh 	unsigned long flags;
2169ccd979bdSMark Fasheh 	int status = 0;
2170ccd979bdSMark Fasheh 
2171ccd979bdSMark Fasheh refresh_check:
2172ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2173ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2174ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2175ccd979bdSMark Fasheh 		goto bail;
2176ccd979bdSMark Fasheh 	}
2177ccd979bdSMark Fasheh 
2178ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2179ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2180ccd979bdSMark Fasheh 
2181ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2182ccd979bdSMark Fasheh 		goto refresh_check;
2183ccd979bdSMark Fasheh 	}
2184ccd979bdSMark Fasheh 
2185ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2186ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2187ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2188ccd979bdSMark Fasheh 
2189ccd979bdSMark Fasheh 	status = 1;
2190ccd979bdSMark Fasheh bail:
2191c1e8d35eSTao Ma 	mlog(0, "status %d\n", status);
2192ccd979bdSMark Fasheh 	return status;
2193ccd979bdSMark Fasheh }
2194ccd979bdSMark Fasheh 
2195ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2196ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
2197ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2198ccd979bdSMark Fasheh 						   int status)
2199ccd979bdSMark Fasheh {
2200ccd979bdSMark Fasheh 	unsigned long flags;
2201ccd979bdSMark Fasheh 
2202ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2203ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2204ccd979bdSMark Fasheh 	if (!status)
2205ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2206ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2207ccd979bdSMark Fasheh 
2208ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2209ccd979bdSMark Fasheh }
2210ccd979bdSMark Fasheh 
2211ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
2212e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2213ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2214ccd979bdSMark Fasheh {
2215ccd979bdSMark Fasheh 	int status = 0;
2216ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2217e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2218ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2219c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2220ccd979bdSMark Fasheh 
2221be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2222be9e986bSMark Fasheh 		goto bail;
2223be9e986bSMark Fasheh 
2224ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2225ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2226b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2227ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2228b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2229ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2230ccd979bdSMark Fasheh 		status = -ENOENT;
2231ccd979bdSMark Fasheh 		goto bail;
2232ccd979bdSMark Fasheh 	}
2233ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2234ccd979bdSMark Fasheh 
2235ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2236ccd979bdSMark Fasheh 		goto bail;
2237ccd979bdSMark Fasheh 
2238ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2239ccd979bdSMark Fasheh 	 * for the inode metadata. */
22408cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2241ccd979bdSMark Fasheh 
224283418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
224383418978SMark Fasheh 
2244be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2245b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2246b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
2247ccd979bdSMark Fasheh 		ocfs2_refresh_inode_from_lvb(inode);
2248ccd979bdSMark Fasheh 	} else {
2249ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2250ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2251b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2252ccd979bdSMark Fasheh 		if (status < 0) {
2253ccd979bdSMark Fasheh 			mlog_errno(status);
2254ccd979bdSMark Fasheh 			goto bail_refresh;
2255ccd979bdSMark Fasheh 		}
2256ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
2257ccd979bdSMark Fasheh 
2258ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2259b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2260b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2261ccd979bdSMark Fasheh 		 *
2262ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2263ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2264ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2265ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2266ccd979bdSMark Fasheh 		 * locks associated with it. */
2267ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2268ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2269b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2270ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2271b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2272b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2273ccd979bdSMark Fasheh 				inode->i_generation);
2274ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2275ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2276b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2277b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2278b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2279ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2280ccd979bdSMark Fasheh 
2281ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
22828ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2283ccd979bdSMark Fasheh 	}
2284ccd979bdSMark Fasheh 
2285ccd979bdSMark Fasheh 	status = 0;
2286ccd979bdSMark Fasheh bail_refresh:
2287ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2288ccd979bdSMark Fasheh bail:
2289ccd979bdSMark Fasheh 	return status;
2290ccd979bdSMark Fasheh }
2291ccd979bdSMark Fasheh 
2292ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2293ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2294ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2295ccd979bdSMark Fasheh {
2296ccd979bdSMark Fasheh 	int status;
2297ccd979bdSMark Fasheh 
2298ccd979bdSMark Fasheh 	if (passed_bh) {
2299ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2300ccd979bdSMark Fasheh 		 * returned bh. */
2301ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2302ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2303ccd979bdSMark Fasheh 
2304ccd979bdSMark Fasheh 		return 0;
2305ccd979bdSMark Fasheh 	}
2306ccd979bdSMark Fasheh 
2307b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2308ccd979bdSMark Fasheh 	if (status < 0)
2309ccd979bdSMark Fasheh 		mlog_errno(status);
2310ccd979bdSMark Fasheh 
2311ccd979bdSMark Fasheh 	return status;
2312ccd979bdSMark Fasheh }
2313ccd979bdSMark Fasheh 
2314ccd979bdSMark Fasheh /*
2315ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2316ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2317ccd979bdSMark Fasheh  */
2318cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2319ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2320ccd979bdSMark Fasheh 				 int ex,
2321cb25797dSJan Kara 				 int arg_flags,
2322cb25797dSJan Kara 				 int subclass)
2323ccd979bdSMark Fasheh {
2324bd3e7610SJoel Becker 	int status, level, acquired;
2325bd3e7610SJoel Becker 	u32 dlm_flags;
2326c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2327ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2328ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2329ccd979bdSMark Fasheh 
2330ccd979bdSMark Fasheh 	BUG_ON(!inode);
2331ccd979bdSMark Fasheh 
2332b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2333b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2334ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2335ccd979bdSMark Fasheh 
2336ccd979bdSMark Fasheh 	status = 0;
2337ccd979bdSMark Fasheh 	acquired = 0;
2338ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2339ccd979bdSMark Fasheh 	 * rodevices. */
2340ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2341ccd979bdSMark Fasheh 		if (ex)
2342ccd979bdSMark Fasheh 			status = -EROFS;
234303efed8aSTiger Yang 		goto getbh;
2344ccd979bdSMark Fasheh 	}
2345ccd979bdSMark Fasheh 
2346c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2347c271c5c2SSunil Mushran 		goto local;
2348c271c5c2SSunil Mushran 
2349ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2350553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2351ccd979bdSMark Fasheh 
2352e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2353bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2354ccd979bdSMark Fasheh 	dlm_flags = 0;
2355ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2356bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2357ccd979bdSMark Fasheh 
2358cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2359cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2360ccd979bdSMark Fasheh 	if (status < 0) {
236141003a7bSZach Brown 		if (status != -EAGAIN)
2362ccd979bdSMark Fasheh 			mlog_errno(status);
2363ccd979bdSMark Fasheh 		goto bail;
2364ccd979bdSMark Fasheh 	}
2365ccd979bdSMark Fasheh 
2366ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2367ccd979bdSMark Fasheh 	acquired = 1;
2368ccd979bdSMark Fasheh 
2369ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2370ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2371ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2372ccd979bdSMark Fasheh 	 * abort the operation. */
2373ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2374553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2375ccd979bdSMark Fasheh 
2376c271c5c2SSunil Mushran local:
237724c19ef4SMark Fasheh 	/*
237824c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
237924c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
238024c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
238124c19ef4SMark Fasheh 	 * and let the caller handle it.
238224c19ef4SMark Fasheh 	 */
238324c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
238424c19ef4SMark Fasheh 		status = 0;
2385c271c5c2SSunil Mushran 		if (lockres)
238624c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
238724c19ef4SMark Fasheh 		goto bail;
238824c19ef4SMark Fasheh 	}
238924c19ef4SMark Fasheh 
2390ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2391e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2392ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2393ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2394ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2395e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2396ccd979bdSMark Fasheh 	if (status < 0) {
2397ccd979bdSMark Fasheh 		if (status != -ENOENT)
2398ccd979bdSMark Fasheh 			mlog_errno(status);
2399ccd979bdSMark Fasheh 		goto bail;
2400ccd979bdSMark Fasheh 	}
240103efed8aSTiger Yang getbh:
2402ccd979bdSMark Fasheh 	if (ret_bh) {
2403ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2404ccd979bdSMark Fasheh 		if (status < 0) {
2405ccd979bdSMark Fasheh 			mlog_errno(status);
2406ccd979bdSMark Fasheh 			goto bail;
2407ccd979bdSMark Fasheh 		}
2408ccd979bdSMark Fasheh 	}
2409ccd979bdSMark Fasheh 
2410ccd979bdSMark Fasheh bail:
2411ccd979bdSMark Fasheh 	if (status < 0) {
2412ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2413ccd979bdSMark Fasheh 			brelse(*ret_bh);
2414ccd979bdSMark Fasheh 			*ret_bh = NULL;
2415ccd979bdSMark Fasheh 		}
2416ccd979bdSMark Fasheh 		if (acquired)
2417e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2418ccd979bdSMark Fasheh 	}
2419ccd979bdSMark Fasheh 
2420ccd979bdSMark Fasheh 	if (local_bh)
2421ccd979bdSMark Fasheh 		brelse(local_bh);
2422ccd979bdSMark Fasheh 
2423ccd979bdSMark Fasheh 	return status;
2424ccd979bdSMark Fasheh }
2425ccd979bdSMark Fasheh 
2426ccd979bdSMark Fasheh /*
242734d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
242834d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
242934d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2430ccd979bdSMark Fasheh  *
2431ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2432ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2433ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2434ccd979bdSMark Fasheh  *
243534d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
243634d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
243734d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
243834d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
243934d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
244034d024f8SMark Fasheh  * immediately retry the aop call.
2441ccd979bdSMark Fasheh  */
2442e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2443ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2444ccd979bdSMark Fasheh 			      int ex,
2445ccd979bdSMark Fasheh 			      struct page *page)
2446ccd979bdSMark Fasheh {
2447ccd979bdSMark Fasheh 	int ret;
2448ccd979bdSMark Fasheh 
2449e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2450ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2451ccd979bdSMark Fasheh 		unlock_page(page);
2452ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2453ccd979bdSMark Fasheh 	}
2454ccd979bdSMark Fasheh 
2455ccd979bdSMark Fasheh 	return ret;
2456ccd979bdSMark Fasheh }
2457ccd979bdSMark Fasheh 
2458e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
24597f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
24607f1a37e3STiger Yang 			  int *level)
24617f1a37e3STiger Yang {
24627f1a37e3STiger Yang 	int ret;
24637f1a37e3STiger Yang 
2464e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock(inode, NULL, 0);
24657f1a37e3STiger Yang 	if (ret < 0) {
24667f1a37e3STiger Yang 		mlog_errno(ret);
24677f1a37e3STiger Yang 		return ret;
24687f1a37e3STiger Yang 	}
24697f1a37e3STiger Yang 
24707f1a37e3STiger Yang 	/*
24717f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
24727f1a37e3STiger Yang 	 * otherwise we just get PR lock.
24737f1a37e3STiger Yang 	 */
24747f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
24757f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
24767f1a37e3STiger Yang 
2477e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2478e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, &bh, 1);
24797f1a37e3STiger Yang 		if (ret < 0) {
24807f1a37e3STiger Yang 			mlog_errno(ret);
24817f1a37e3STiger Yang 			return ret;
24827f1a37e3STiger Yang 		}
24837f1a37e3STiger Yang 		*level = 1;
24847f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
24857f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
24867f1a37e3STiger Yang 		if (bh)
24877f1a37e3STiger Yang 			brelse(bh);
24887f1a37e3STiger Yang 	} else
24897f1a37e3STiger Yang 		*level = 0;
24907f1a37e3STiger Yang 
24917f1a37e3STiger Yang 	return ret;
24927f1a37e3STiger Yang }
24937f1a37e3STiger Yang 
2494e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2495ccd979bdSMark Fasheh 		       int ex)
2496ccd979bdSMark Fasheh {
2497bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2498e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2499c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2500ccd979bdSMark Fasheh 
2501b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2502b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2503ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2504ccd979bdSMark Fasheh 
2505c271c5c2SSunil Mushran 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2506c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
2507ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2508ccd979bdSMark Fasheh }
2509ccd979bdSMark Fasheh 
2510df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
251183273932SSrinivas Eeda {
251283273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
251383273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
251483273932SSrinivas Eeda 	int status = 0;
251583273932SSrinivas Eeda 
2516df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2517df152c24SSunil Mushran 		return -EROFS;
2518df152c24SSunil Mushran 
2519df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2520df152c24SSunil Mushran 		return 0;
2521df152c24SSunil Mushran 
252283273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2523df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
252483273932SSrinivas Eeda 	if (status < 0)
252583273932SSrinivas Eeda 		return status;
252683273932SSrinivas Eeda 
252783273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
25281c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
25291c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
253083273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
25313211949fSSunil Mushran 	else
25323211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
25333211949fSSunil Mushran 
253483273932SSrinivas Eeda 	return status;
253583273932SSrinivas Eeda }
253683273932SSrinivas Eeda 
2537df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
253883273932SSrinivas Eeda {
253983273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
254083273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
254183273932SSrinivas Eeda 
2542df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
254383273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
254483273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
254583273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
254683273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2547df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2548df152c24SSunil Mushran 	}
254983273932SSrinivas Eeda }
255083273932SSrinivas Eeda 
2551ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2552ccd979bdSMark Fasheh 		     int ex)
2553ccd979bdSMark Fasheh {
2554c271c5c2SSunil Mushran 	int status = 0;
2555bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2556ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2557ccd979bdSMark Fasheh 
2558ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2559ccd979bdSMark Fasheh 		return -EROFS;
2560ccd979bdSMark Fasheh 
2561c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2562c271c5c2SSunil Mushran 		goto bail;
2563c271c5c2SSunil Mushran 
2564ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2565ccd979bdSMark Fasheh 	if (status < 0) {
2566ccd979bdSMark Fasheh 		mlog_errno(status);
2567ccd979bdSMark Fasheh 		goto bail;
2568ccd979bdSMark Fasheh 	}
2569ccd979bdSMark Fasheh 
2570ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2571ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2572ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2573ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2574ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2575ccd979bdSMark Fasheh 	if (status) {
25768e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2577ccd979bdSMark Fasheh 
2578ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2579ccd979bdSMark Fasheh 
25803278bb74SJunxiao Bi 		if (status < 0) {
25813278bb74SJunxiao Bi 			ocfs2_cluster_unlock(osb, lockres, level);
2582ccd979bdSMark Fasheh 			mlog_errno(status);
25833278bb74SJunxiao Bi 		}
25848ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2585ccd979bdSMark Fasheh 	}
2586ccd979bdSMark Fasheh bail:
2587ccd979bdSMark Fasheh 	return status;
2588ccd979bdSMark Fasheh }
2589ccd979bdSMark Fasheh 
2590ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2591ccd979bdSMark Fasheh 			int ex)
2592ccd979bdSMark Fasheh {
2593bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2594ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2595ccd979bdSMark Fasheh 
2596c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2597ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2598ccd979bdSMark Fasheh }
2599ccd979bdSMark Fasheh 
2600ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2601ccd979bdSMark Fasheh {
2602ccd979bdSMark Fasheh 	int status;
2603ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2604ccd979bdSMark Fasheh 
2605ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2606ccd979bdSMark Fasheh 		return -EROFS;
2607ccd979bdSMark Fasheh 
2608c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2609c271c5c2SSunil Mushran 		return 0;
2610c271c5c2SSunil Mushran 
2611bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2612ccd979bdSMark Fasheh 	if (status < 0)
2613ccd979bdSMark Fasheh 		mlog_errno(status);
2614ccd979bdSMark Fasheh 
2615ccd979bdSMark Fasheh 	return status;
2616ccd979bdSMark Fasheh }
2617ccd979bdSMark Fasheh 
2618ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2619ccd979bdSMark Fasheh {
2620ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2621ccd979bdSMark Fasheh 
2622c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2623bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2624ccd979bdSMark Fasheh }
2625ccd979bdSMark Fasheh 
26266ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
26276ca497a8Swengang wang {
26286ca497a8Swengang wang 	int status;
26296ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26306ca497a8Swengang wang 
26316ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
26326ca497a8Swengang wang 		return -EROFS;
26336ca497a8Swengang wang 
26346ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
26356ca497a8Swengang wang 		return 0;
26366ca497a8Swengang wang 
26376ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
26386ca497a8Swengang wang 				    0, 0);
26396ca497a8Swengang wang 	if (status < 0)
26406ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
26416ca497a8Swengang wang 
26426ca497a8Swengang wang 	return status;
26436ca497a8Swengang wang }
26446ca497a8Swengang wang 
26456ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
26466ca497a8Swengang wang {
26476ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26486ca497a8Swengang wang 
26496ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
26506ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
26516ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
26526ca497a8Swengang wang }
26536ca497a8Swengang wang 
2654d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2655d680efe9SMark Fasheh {
2656d680efe9SMark Fasheh 	int ret;
2657bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2658d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2659d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2660d680efe9SMark Fasheh 
2661d680efe9SMark Fasheh 	BUG_ON(!dl);
2662d680efe9SMark Fasheh 
266303efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
266403efed8aSTiger Yang 		if (ex)
2665d680efe9SMark Fasheh 			return -EROFS;
266603efed8aSTiger Yang 		return 0;
266703efed8aSTiger Yang 	}
2668d680efe9SMark Fasheh 
2669c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2670c271c5c2SSunil Mushran 		return 0;
2671c271c5c2SSunil Mushran 
2672d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2673d680efe9SMark Fasheh 	if (ret < 0)
2674d680efe9SMark Fasheh 		mlog_errno(ret);
2675d680efe9SMark Fasheh 
2676d680efe9SMark Fasheh 	return ret;
2677d680efe9SMark Fasheh }
2678d680efe9SMark Fasheh 
2679d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2680d680efe9SMark Fasheh {
2681bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2682d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2683d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2684d680efe9SMark Fasheh 
268503efed8aSTiger Yang 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
2686d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2687d680efe9SMark Fasheh }
2688d680efe9SMark Fasheh 
2689ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
2690ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
2691ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
2692ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
2693ccd979bdSMark Fasheh {
2694ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2695ccd979bdSMark Fasheh 
2696ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2697ccd979bdSMark Fasheh 
2698ccd979bdSMark Fasheh 	kfree(dlm_debug);
2699ccd979bdSMark Fasheh }
2700ccd979bdSMark Fasheh 
2701ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2702ccd979bdSMark Fasheh {
2703ccd979bdSMark Fasheh 	if (dlm_debug)
2704ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2705ccd979bdSMark Fasheh }
2706ccd979bdSMark Fasheh 
2707ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2708ccd979bdSMark Fasheh {
2709ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
2710ccd979bdSMark Fasheh }
2711ccd979bdSMark Fasheh 
2712ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2713ccd979bdSMark Fasheh {
2714ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2715ccd979bdSMark Fasheh 
2716ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2717ccd979bdSMark Fasheh 	if (!dlm_debug) {
2718ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
2719ccd979bdSMark Fasheh 		goto out;
2720ccd979bdSMark Fasheh 	}
2721ccd979bdSMark Fasheh 
2722ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
2723ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2724ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = NULL;
2725ccd979bdSMark Fasheh out:
2726ccd979bdSMark Fasheh 	return dlm_debug;
2727ccd979bdSMark Fasheh }
2728ccd979bdSMark Fasheh 
2729ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
2730ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
2731ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
2732ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
2733ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
2734ccd979bdSMark Fasheh };
2735ccd979bdSMark Fasheh 
2736ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2737ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
2738ccd979bdSMark Fasheh {
2739ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
2740ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2741ccd979bdSMark Fasheh 
2742ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
2743ccd979bdSMark Fasheh 
2744ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2745ccd979bdSMark Fasheh 		/* discover the head of the list */
2746ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2747ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
2748ccd979bdSMark Fasheh 			break;
2749ccd979bdSMark Fasheh 		}
2750ccd979bdSMark Fasheh 
2751ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
2752ccd979bdSMark Fasheh 		 * l_ops field. */
2753ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
2754ccd979bdSMark Fasheh 			ret = iter;
2755ccd979bdSMark Fasheh 			break;
2756ccd979bdSMark Fasheh 		}
2757ccd979bdSMark Fasheh 	}
2758ccd979bdSMark Fasheh 
2759ccd979bdSMark Fasheh 	return ret;
2760ccd979bdSMark Fasheh }
2761ccd979bdSMark Fasheh 
2762ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2763ccd979bdSMark Fasheh {
2764ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2765ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
2766ccd979bdSMark Fasheh 
2767ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2768ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2769ccd979bdSMark Fasheh 	if (iter) {
2770ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
2771ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
2772ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
2773ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
2774ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
2775ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
2776ccd979bdSMark Fasheh 		 * in them. */
2777ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2778ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2779ccd979bdSMark Fasheh 	}
2780ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2781ccd979bdSMark Fasheh 
2782ccd979bdSMark Fasheh 	return iter;
2783ccd979bdSMark Fasheh }
2784ccd979bdSMark Fasheh 
2785ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2786ccd979bdSMark Fasheh {
2787ccd979bdSMark Fasheh }
2788ccd979bdSMark Fasheh 
2789ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2790ccd979bdSMark Fasheh {
2791ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2792ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
2793ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2794ccd979bdSMark Fasheh 
2795ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2796ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
2797ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
2798ccd979bdSMark Fasheh 	if (iter) {
2799ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
2800ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2801ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2802ccd979bdSMark Fasheh 	}
2803ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2804ccd979bdSMark Fasheh 
2805ccd979bdSMark Fasheh 	return iter;
2806ccd979bdSMark Fasheh }
2807ccd979bdSMark Fasheh 
28085bc970e8SSunil Mushran /*
28095bc970e8SSunil Mushran  * Version is used by debugfs.ocfs2 to determine the format being used
28105bc970e8SSunil Mushran  *
28115bc970e8SSunil Mushran  * New in version 2
28125bc970e8SSunil Mushran  *	- Lock stats printed
28135bc970e8SSunil Mushran  * New in version 3
28145bc970e8SSunil Mushran  *	- Max time in lock stats is in usecs (instead of nsecs)
28155bc970e8SSunil Mushran  */
28165bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3
2817ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2818ccd979bdSMark Fasheh {
2819ccd979bdSMark Fasheh 	int i;
2820ccd979bdSMark Fasheh 	char *lvb;
2821ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
2822ccd979bdSMark Fasheh 
2823ccd979bdSMark Fasheh 	if (!lockres)
2824ccd979bdSMark Fasheh 		return -EINVAL;
2825ccd979bdSMark Fasheh 
2826d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2827d680efe9SMark Fasheh 
2828d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2829d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2830d680efe9SMark Fasheh 			   lockres->l_name,
2831d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2832d680efe9SMark Fasheh 	else
2833d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2834d680efe9SMark Fasheh 
2835d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
2836ccd979bdSMark Fasheh 		   "0x%lx\t"
2837ccd979bdSMark Fasheh 		   "0x%x\t"
2838ccd979bdSMark Fasheh 		   "0x%x\t"
2839ccd979bdSMark Fasheh 		   "%u\t"
2840ccd979bdSMark Fasheh 		   "%u\t"
2841ccd979bdSMark Fasheh 		   "%d\t"
2842ccd979bdSMark Fasheh 		   "%d\t",
2843ccd979bdSMark Fasheh 		   lockres->l_level,
2844ccd979bdSMark Fasheh 		   lockres->l_flags,
2845ccd979bdSMark Fasheh 		   lockres->l_action,
2846ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
2847ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
2848ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
2849ccd979bdSMark Fasheh 		   lockres->l_requested,
2850ccd979bdSMark Fasheh 		   lockres->l_blocking);
2851ccd979bdSMark Fasheh 
2852ccd979bdSMark Fasheh 	/* Dump the raw LVB */
28538f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2854ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
2855ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
2856ccd979bdSMark Fasheh 
28578ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
28585bc970e8SSunil Mushran # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
28595bc970e8SSunil Mushran # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
28605bc970e8SSunil Mushran # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
28615bc970e8SSunil Mushran # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
28625bc970e8SSunil Mushran # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
28635bc970e8SSunil Mushran # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
28645bc970e8SSunil Mushran # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
28655bc970e8SSunil Mushran # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
28665bc970e8SSunil Mushran # define lock_refresh(_l)		((_l)->l_lock_refresh)
28678ddb7b00SSunil Mushran #else
28685bc970e8SSunil Mushran # define lock_num_prmode(_l)		(0)
28695bc970e8SSunil Mushran # define lock_num_exmode(_l)		(0)
28708ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
28718ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
2872dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
2873dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
28748ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
28758ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
28768ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
28778ddb7b00SSunil Mushran #endif
28788ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
28795bc970e8SSunil Mushran 	seq_printf(m, "%u\t"
28805bc970e8SSunil Mushran 		   "%u\t"
28818ddb7b00SSunil Mushran 		   "%u\t"
28828ddb7b00SSunil Mushran 		   "%u\t"
28838ddb7b00SSunil Mushran 		   "%llu\t"
28848ddb7b00SSunil Mushran 		   "%llu\t"
28858ddb7b00SSunil Mushran 		   "%u\t"
28868ddb7b00SSunil Mushran 		   "%u\t"
28878ddb7b00SSunil Mushran 		   "%u\t",
28888ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
28898ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
28908ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
28918ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
28928ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
28938ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
28948ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
28958ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
28968ddb7b00SSunil Mushran 		   lock_refresh(lockres));
28978ddb7b00SSunil Mushran 
2898ccd979bdSMark Fasheh 	/* End the line */
2899ccd979bdSMark Fasheh 	seq_printf(m, "\n");
2900ccd979bdSMark Fasheh 	return 0;
2901ccd979bdSMark Fasheh }
2902ccd979bdSMark Fasheh 
290390d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
2904ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
2905ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
2906ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
2907ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
2908ccd979bdSMark Fasheh };
2909ccd979bdSMark Fasheh 
2910ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2911ccd979bdSMark Fasheh {
291233fa1d90SJoe Perches 	struct seq_file *seq = file->private_data;
2913ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
2914ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
2915ccd979bdSMark Fasheh 
2916ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
2917ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
2918ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
2919ccd979bdSMark Fasheh }
2920ccd979bdSMark Fasheh 
2921ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2922ccd979bdSMark Fasheh {
2923ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
2924ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
2925ccd979bdSMark Fasheh 
29261848cb55SRob Jones 	priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv));
2927ccd979bdSMark Fasheh 	if (!priv) {
29281848cb55SRob Jones 		mlog_errno(-ENOMEM);
29291848cb55SRob Jones 		return -ENOMEM;
2930ccd979bdSMark Fasheh 	}
29311848cb55SRob Jones 
29328e18e294STheodore Ts'o 	osb = inode->i_private;
2933ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2934ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
2935ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2936ccd979bdSMark Fasheh 
2937ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
2938ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
2939ccd979bdSMark Fasheh 
29401848cb55SRob Jones 	return 0;
2941ccd979bdSMark Fasheh }
2942ccd979bdSMark Fasheh 
29434b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
2944ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
2945ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
2946ccd979bdSMark Fasheh 	.read =		seq_read,
2947ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
2948ccd979bdSMark Fasheh };
2949ccd979bdSMark Fasheh 
2950ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2951ccd979bdSMark Fasheh {
2952ccd979bdSMark Fasheh 	int ret = 0;
2953ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2954ccd979bdSMark Fasheh 
2955ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2956ccd979bdSMark Fasheh 							 S_IFREG|S_IRUSR,
2957ccd979bdSMark Fasheh 							 osb->osb_debug_root,
2958ccd979bdSMark Fasheh 							 osb,
2959ccd979bdSMark Fasheh 							 &ocfs2_dlm_debug_fops);
29608f443e23SLinus Torvalds 	if (!dlm_debug->d_locking_state) {
2961ccd979bdSMark Fasheh 		ret = -EINVAL;
2962ccd979bdSMark Fasheh 		mlog(ML_ERROR,
2963ccd979bdSMark Fasheh 		     "Unable to create locking state debugfs file.\n");
2964ccd979bdSMark Fasheh 		goto out;
2965ccd979bdSMark Fasheh 	}
2966ccd979bdSMark Fasheh 
2967ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(dlm_debug);
2968ccd979bdSMark Fasheh out:
2969ccd979bdSMark Fasheh 	return ret;
2970ccd979bdSMark Fasheh }
2971ccd979bdSMark Fasheh 
2972ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2973ccd979bdSMark Fasheh {
2974ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2975ccd979bdSMark Fasheh 
2976ccd979bdSMark Fasheh 	if (dlm_debug) {
2977ccd979bdSMark Fasheh 		debugfs_remove(dlm_debug->d_locking_state);
2978ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
2979ccd979bdSMark Fasheh 	}
2980ccd979bdSMark Fasheh }
2981ccd979bdSMark Fasheh 
2982ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
2983ccd979bdSMark Fasheh {
2984c271c5c2SSunil Mushran 	int status = 0;
29854670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
2986ccd979bdSMark Fasheh 
29870abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
29880abd6d18SMark Fasheh 		osb->node_num = 0;
2989c271c5c2SSunil Mushran 		goto local;
29900abd6d18SMark Fasheh 	}
2991c271c5c2SSunil Mushran 
2992ccd979bdSMark Fasheh 	status = ocfs2_dlm_init_debug(osb);
2993ccd979bdSMark Fasheh 	if (status < 0) {
2994ccd979bdSMark Fasheh 		mlog_errno(status);
2995ccd979bdSMark Fasheh 		goto bail;
2996ccd979bdSMark Fasheh 	}
2997ccd979bdSMark Fasheh 
299834d024f8SMark Fasheh 	/* launch downconvert thread */
29995afc44e2SJoseph Qi 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
30005afc44e2SJoseph Qi 			osb->uuid_str);
300134d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
300234d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
300334d024f8SMark Fasheh 		osb->dc_task = NULL;
3004ccd979bdSMark Fasheh 		mlog_errno(status);
3005ccd979bdSMark Fasheh 		goto bail;
3006ccd979bdSMark Fasheh 	}
3007ccd979bdSMark Fasheh 
3008ccd979bdSMark Fasheh 	/* for now, uuid == domain */
30099c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
3010c74a3bddSGoldwyn Rodrigues 				       osb->osb_cluster_name,
3011c74a3bddSGoldwyn Rodrigues 				       strlen(osb->osb_cluster_name),
30129c6c877cSJoel Becker 				       osb->uuid_str,
30134670c46dSJoel Becker 				       strlen(osb->uuid_str),
3014553b5eb9SJoel Becker 				       &lproto, ocfs2_do_node_down, osb,
30154670c46dSJoel Becker 				       &conn);
30164670c46dSJoel Becker 	if (status) {
3017ccd979bdSMark Fasheh 		mlog_errno(status);
3018ccd979bdSMark Fasheh 		goto bail;
3019ccd979bdSMark Fasheh 	}
3020ccd979bdSMark Fasheh 
30213e834151SGoldwyn Rodrigues 	status = ocfs2_cluster_this_node(conn, &osb->node_num);
30220abd6d18SMark Fasheh 	if (status < 0) {
30230abd6d18SMark Fasheh 		mlog_errno(status);
30240abd6d18SMark Fasheh 		mlog(ML_ERROR,
30250abd6d18SMark Fasheh 		     "could not find this host's node number\n");
3026286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
30270abd6d18SMark Fasheh 		goto bail;
30280abd6d18SMark Fasheh 	}
30290abd6d18SMark Fasheh 
3030c271c5c2SSunil Mushran local:
3031ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3032ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
30336ca497a8Swengang wang 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
303483273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3035ccd979bdSMark Fasheh 
30364670c46dSJoel Becker 	osb->cconn = conn;
3037ccd979bdSMark Fasheh bail:
3038ccd979bdSMark Fasheh 	if (status < 0) {
3039ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
304034d024f8SMark Fasheh 		if (osb->dc_task)
304134d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3042ccd979bdSMark Fasheh 	}
3043ccd979bdSMark Fasheh 
3044ccd979bdSMark Fasheh 	return status;
3045ccd979bdSMark Fasheh }
3046ccd979bdSMark Fasheh 
3047286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3048286eaa95SJoel Becker 			int hangup_pending)
3049ccd979bdSMark Fasheh {
3050ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3051ccd979bdSMark Fasheh 
30524670c46dSJoel Becker 	/*
30534670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
30544670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
30554670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
30564670c46dSJoel Becker 	 */
30574670c46dSJoel Becker 
305834d024f8SMark Fasheh 	if (osb->dc_task) {
305934d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
306034d024f8SMark Fasheh 		osb->dc_task = NULL;
3061ccd979bdSMark Fasheh 	}
3062ccd979bdSMark Fasheh 
3063ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3064ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
30656ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
306683273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3067ccd979bdSMark Fasheh 
3068286eaa95SJoel Becker 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
30694670c46dSJoel Becker 	osb->cconn = NULL;
3070ccd979bdSMark Fasheh 
3071ccd979bdSMark Fasheh 	ocfs2_dlm_shutdown_debug(osb);
3072ccd979bdSMark Fasheh }
3073ccd979bdSMark Fasheh 
3074ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
30750d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3076ccd979bdSMark Fasheh {
30777431cd7eSJoel Becker 	int ret;
3078ccd979bdSMark Fasheh 	unsigned long flags;
3079bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3080ccd979bdSMark Fasheh 
3081ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3082ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3083ccd979bdSMark Fasheh 		goto out;
3084ccd979bdSMark Fasheh 
3085b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3086bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3087b80fc012SMark Fasheh 
3088ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3089ccd979bdSMark Fasheh 
3090ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3091ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3092ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3093ccd979bdSMark Fasheh 
3094ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3095ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3096ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3097ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3098ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3099ccd979bdSMark Fasheh 
3100ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3101ccd979bdSMark Fasheh 
3102ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3103ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3104ccd979bdSMark Fasheh 		 * future? */
3105ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3106ccd979bdSMark Fasheh 
3107ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3108ccd979bdSMark Fasheh 	}
3109ccd979bdSMark Fasheh 
31100d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
31110d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3112bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
31130d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
31140d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
31150d5dc6c2SMark Fasheh 	}
3116ccd979bdSMark Fasheh 
3117ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3118ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3119ccd979bdSMark Fasheh 		     lockres->l_name);
3120ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3121ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3122ccd979bdSMark Fasheh 
3123ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3124ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3125ccd979bdSMark Fasheh 		goto out;
3126ccd979bdSMark Fasheh 	}
3127ccd979bdSMark Fasheh 
3128ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3129ccd979bdSMark Fasheh 
3130ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3131ccd979bdSMark Fasheh 	 * fire. */
3132ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3133ccd979bdSMark Fasheh 
3134ccd979bdSMark Fasheh 	/* is this necessary? */
3135ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3136ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3137ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3138ccd979bdSMark Fasheh 
3139ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3140ccd979bdSMark Fasheh 
3141a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
31427431cd7eSJoel Becker 	if (ret) {
31437431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3144ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3145cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3146ccd979bdSMark Fasheh 		BUG();
3147ccd979bdSMark Fasheh 	}
314873ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3149ccd979bdSMark Fasheh 	     lockres->l_name);
3150ccd979bdSMark Fasheh 
3151ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3152ccd979bdSMark Fasheh out:
3153ccd979bdSMark Fasheh 	return 0;
3154ccd979bdSMark Fasheh }
3155ccd979bdSMark Fasheh 
315684d86f83SJan Kara static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
315784d86f83SJan Kara 				       struct ocfs2_lock_res *lockres);
315884d86f83SJan Kara 
3159ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3160ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
316134d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3162ccd979bdSMark Fasheh  * it safe to drop.
3163ccd979bdSMark Fasheh  *
3164ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
316584d86f83SJan Kara void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
316684d86f83SJan Kara 				struct ocfs2_lock_res *lockres)
3167ccd979bdSMark Fasheh {
3168ccd979bdSMark Fasheh 	int status;
3169ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
317084d86f83SJan Kara 	unsigned long flags, flags2;
3171ccd979bdSMark Fasheh 
3172ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3173ccd979bdSMark Fasheh 
3174ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3175ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
317684d86f83SJan Kara 	if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) {
317784d86f83SJan Kara 		/*
317884d86f83SJan Kara 		 * We know the downconvert is queued but not in progress
317984d86f83SJan Kara 		 * because we are the downconvert thread and processing
318084d86f83SJan Kara 		 * different lock. So we can just remove the lock from the
318184d86f83SJan Kara 		 * queue. This is not only an optimization but also a way
318284d86f83SJan Kara 		 * to avoid the following deadlock:
318384d86f83SJan Kara 		 *   ocfs2_dentry_post_unlock()
318484d86f83SJan Kara 		 *     ocfs2_dentry_lock_put()
318584d86f83SJan Kara 		 *       ocfs2_drop_dentry_lock()
318684d86f83SJan Kara 		 *         iput()
318784d86f83SJan Kara 		 *           ocfs2_evict_inode()
318884d86f83SJan Kara 		 *             ocfs2_clear_inode()
318984d86f83SJan Kara 		 *               ocfs2_mark_lockres_freeing()
319084d86f83SJan Kara 		 *                 ... blocks waiting for OCFS2_LOCK_QUEUED
319184d86f83SJan Kara 		 *                 since we are the downconvert thread which
319284d86f83SJan Kara 		 *                 should clear the flag.
319384d86f83SJan Kara 		 */
319484d86f83SJan Kara 		spin_unlock_irqrestore(&lockres->l_lock, flags);
319584d86f83SJan Kara 		spin_lock_irqsave(&osb->dc_task_lock, flags2);
319684d86f83SJan Kara 		list_del_init(&lockres->l_blocked_list);
319784d86f83SJan Kara 		osb->blocked_lock_count--;
319884d86f83SJan Kara 		spin_unlock_irqrestore(&osb->dc_task_lock, flags2);
319984d86f83SJan Kara 		/*
320084d86f83SJan Kara 		 * Warn if we recurse into another post_unlock call.  Strictly
320184d86f83SJan Kara 		 * speaking it isn't a problem but we need to be careful if
320284d86f83SJan Kara 		 * that happens (stack overflow, deadlocks, ...) so warn if
320384d86f83SJan Kara 		 * ocfs2 grows a path for which this can happen.
320484d86f83SJan Kara 		 */
320584d86f83SJan Kara 		WARN_ON_ONCE(lockres->l_ops->post_unlock);
320684d86f83SJan Kara 		/* Since the lock is freeing we don't do much in the fn below */
320784d86f83SJan Kara 		ocfs2_process_blocked_lock(osb, lockres);
320884d86f83SJan Kara 		return;
320984d86f83SJan Kara 	}
3210ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3211ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3212ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3213ccd979bdSMark Fasheh 
3214ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3215ccd979bdSMark Fasheh 
3216ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3217ccd979bdSMark Fasheh 		if (status)
3218ccd979bdSMark Fasheh 			mlog_errno(status);
3219ccd979bdSMark Fasheh 
3220ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3221ccd979bdSMark Fasheh 	}
3222ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3223ccd979bdSMark Fasheh }
3224ccd979bdSMark Fasheh 
3225d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3226d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3227d680efe9SMark Fasheh {
3228d680efe9SMark Fasheh 	int ret;
3229d680efe9SMark Fasheh 
323084d86f83SJan Kara 	ocfs2_mark_lockres_freeing(osb, lockres);
32310d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3232d680efe9SMark Fasheh 	if (ret)
3233d680efe9SMark Fasheh 		mlog_errno(ret);
3234d680efe9SMark Fasheh }
3235d680efe9SMark Fasheh 
3236ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3237ccd979bdSMark Fasheh {
3238d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3239d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
32406ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
324183273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3242ccd979bdSMark Fasheh }
3243ccd979bdSMark Fasheh 
3244ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3245ccd979bdSMark Fasheh {
3246ccd979bdSMark Fasheh 	int status, err;
3247ccd979bdSMark Fasheh 
3248ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3249ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3250ccd979bdSMark Fasheh 
3251ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
325250008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3253ccd979bdSMark Fasheh 	if (err < 0)
3254ccd979bdSMark Fasheh 		mlog_errno(err);
3255ccd979bdSMark Fasheh 
3256ccd979bdSMark Fasheh 	status = err;
3257ccd979bdSMark Fasheh 
3258ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3259e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3260ccd979bdSMark Fasheh 	if (err < 0)
3261ccd979bdSMark Fasheh 		mlog_errno(err);
3262ccd979bdSMark Fasheh 	if (err < 0 && !status)
3263ccd979bdSMark Fasheh 		status = err;
3264ccd979bdSMark Fasheh 
3265ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
32660d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3267ccd979bdSMark Fasheh 	if (err < 0)
3268ccd979bdSMark Fasheh 		mlog_errno(err);
3269ccd979bdSMark Fasheh 	if (err < 0 && !status)
3270ccd979bdSMark Fasheh 		status = err;
3271ccd979bdSMark Fasheh 
3272ccd979bdSMark Fasheh 	return status;
3273ccd979bdSMark Fasheh }
3274ccd979bdSMark Fasheh 
3275de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3276ccd979bdSMark Fasheh 					      int new_level)
3277ccd979bdSMark Fasheh {
3278ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3279ccd979bdSMark Fasheh 
3280bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3281ccd979bdSMark Fasheh 
3282ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
32839b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
32849b915181SSunil Mushran 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
32859b915181SSunil Mushran 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
32869b915181SSunil Mushran 		     new_level, list_empty(&lockres->l_blocked_list),
32879b915181SSunil Mushran 		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
32889b915181SSunil Mushran 		     lockres->l_flags, lockres->l_ro_holders,
32899b915181SSunil Mushran 		     lockres->l_ex_holders, lockres->l_action,
32909b915181SSunil Mushran 		     lockres->l_unlock_action, lockres->l_requested,
32919b915181SSunil Mushran 		     lockres->l_blocking, lockres->l_pending_gen);
3292ccd979bdSMark Fasheh 		BUG();
3293ccd979bdSMark Fasheh 	}
3294ccd979bdSMark Fasheh 
32959b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
32969b915181SSunil Mushran 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3297ccd979bdSMark Fasheh 
3298ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3299ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3300ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3301de551246SJoel Becker 	return lockres_set_pending(lockres);
3302ccd979bdSMark Fasheh }
3303ccd979bdSMark Fasheh 
3304ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3305ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3306ccd979bdSMark Fasheh 				  int new_level,
3307de551246SJoel Becker 				  int lvb,
3308de551246SJoel Becker 				  unsigned int generation)
3309ccd979bdSMark Fasheh {
3310bd3e7610SJoel Becker 	int ret;
3311bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3312ccd979bdSMark Fasheh 
33139b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
33149b915181SSunil Mushran 	     lockres->l_level, new_level);
33159b915181SSunil Mushran 
3316ccd979bdSMark Fasheh 	if (lvb)
3317bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3318ccd979bdSMark Fasheh 
33194670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3320ccd979bdSMark Fasheh 			     new_level,
3321ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3322ccd979bdSMark Fasheh 			     dlm_flags,
3323ccd979bdSMark Fasheh 			     lockres->l_name,
3324a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3325de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
33267431cd7eSJoel Becker 	if (ret) {
33277431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3328ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3329ccd979bdSMark Fasheh 		goto bail;
3330ccd979bdSMark Fasheh 	}
3331ccd979bdSMark Fasheh 
3332ccd979bdSMark Fasheh 	ret = 0;
3333ccd979bdSMark Fasheh bail:
3334ccd979bdSMark Fasheh 	return ret;
3335ccd979bdSMark Fasheh }
3336ccd979bdSMark Fasheh 
333724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3338ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3339ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3340ccd979bdSMark Fasheh {
3341ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3342ccd979bdSMark Fasheh 
3343ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3344ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3345ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3346ccd979bdSMark Fasheh 		 * requeue this lock. */
33479b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3348ccd979bdSMark Fasheh 		return 0;
3349ccd979bdSMark Fasheh 	}
3350ccd979bdSMark Fasheh 
3351ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3352ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3353ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3354ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3355ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3356ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3357ccd979bdSMark Fasheh 
3358ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3359ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3360ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3361ccd979bdSMark Fasheh 
33629b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
33639b915181SSunil Mushran 
3364ccd979bdSMark Fasheh 	return 1;
3365ccd979bdSMark Fasheh }
3366ccd979bdSMark Fasheh 
3367ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3368ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3369ccd979bdSMark Fasheh {
3370ccd979bdSMark Fasheh 	int ret;
3371ccd979bdSMark Fasheh 
33724670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3373a796d286SJoel Becker 			       DLM_LKF_CANCEL);
33747431cd7eSJoel Becker 	if (ret) {
33757431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3376ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3377ccd979bdSMark Fasheh 	}
3378ccd979bdSMark Fasheh 
33799b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3380ccd979bdSMark Fasheh 
3381ccd979bdSMark Fasheh 	return ret;
3382ccd979bdSMark Fasheh }
3383ccd979bdSMark Fasheh 
3384b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3385ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3386cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3387ccd979bdSMark Fasheh {
3388ccd979bdSMark Fasheh 	unsigned long flags;
3389ccd979bdSMark Fasheh 	int blocking;
3390ccd979bdSMark Fasheh 	int new_level;
3391079b8057SSunil Mushran 	int level;
3392ccd979bdSMark Fasheh 	int ret = 0;
33935ef0d4eaSMark Fasheh 	int set_lvb = 0;
3394de551246SJoel Becker 	unsigned int gen;
3395ccd979bdSMark Fasheh 
3396ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3397ccd979bdSMark Fasheh 
3398ccd979bdSMark Fasheh recheck:
3399db0f6ce6SSunil Mushran 	/*
3400db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3401db0f6ce6SSunil Mushran 	 */
3402db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3403db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3404db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3405db0f6ce6SSunil Mushran 		ret = 0;
3406db0f6ce6SSunil Mushran 		goto leave;
3407db0f6ce6SSunil Mushran 	}
3408db0f6ce6SSunil Mushran 
3409ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3410de551246SJoel Becker 		/* XXX
3411de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3412de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3413de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3414de551246SJoel Becker 		 *
3415de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3416de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3417de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3418de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3419de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3420de551246SJoel Becker 		 *
3421de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3422de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3423de551246SJoel Becker 		 *
3424de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3425de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3426de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3427de551246SJoel Becker 		 * we then cancel their request.
3428de551246SJoel Becker 		 *
3429de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3430de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3431de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3432de551246SJoel Becker 		 */
34339b915181SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
34349b915181SSunil Mushran 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
34359b915181SSunil Mushran 			     lockres->l_name);
3436de551246SJoel Becker 			goto leave_requeue;
34379b915181SSunil Mushran 		}
3438de551246SJoel Becker 
3439d680efe9SMark Fasheh 		ctl->requeue = 1;
3440ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3441ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3442ccd979bdSMark Fasheh 		if (ret) {
3443ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3444ccd979bdSMark Fasheh 			if (ret < 0)
3445ccd979bdSMark Fasheh 				mlog_errno(ret);
3446ccd979bdSMark Fasheh 		}
3447ccd979bdSMark Fasheh 		goto leave;
3448ccd979bdSMark Fasheh 	}
3449ccd979bdSMark Fasheh 
3450a1912826SSunil Mushran 	/*
3451a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3452a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3453a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3454a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3455a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3456a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3457a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3458a1912826SSunil Mushran 	 */
3459a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3460a1912826SSunil Mushran 		goto leave_requeue;
3461a1912826SSunil Mushran 
34620d74125aSSunil Mushran 	/*
34630d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
34640d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
34650d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
34660d74125aSSunil Mushran 	 */
34670d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
34680d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
34699b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
34700d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
34710d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
34720d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
34730d74125aSSunil Mushran 		goto leave;
34740d74125aSSunil Mushran 	}
34750d74125aSSunil Mushran 
3476ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3477ccd979bdSMark Fasheh 	 * then requeue. */
3478bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
34799b915181SSunil Mushran 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
34809b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
34819b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders,
34829b915181SSunil Mushran 		     lockres->l_ro_holders);
3483f7fbfdd1SMark Fasheh 		goto leave_requeue;
34849b915181SSunil Mushran 	}
3485ccd979bdSMark Fasheh 
3486ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3487ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3488bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
34899b915181SSunil Mushran 	    lockres->l_ex_holders) {
34909b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
34919b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders);
3492f7fbfdd1SMark Fasheh 		goto leave_requeue;
34939b915181SSunil Mushran 	}
3494f7fbfdd1SMark Fasheh 
3495f7fbfdd1SMark Fasheh 	/*
3496f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3497f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3498f7fbfdd1SMark Fasheh 	 */
3499f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
35009b915181SSunil Mushran 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
35019b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
35029b915181SSunil Mushran 		     lockres->l_name);
3503f7fbfdd1SMark Fasheh 		goto leave_requeue;
35049b915181SSunil Mushran 	}
3505ccd979bdSMark Fasheh 
350616d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
350716d5b956SMark Fasheh 
350816d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
35099b915181SSunil Mushran 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
35109b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
35119b915181SSunil Mushran 		     lockres->l_name);
351216d5b956SMark Fasheh 		goto leave_requeue;
35139b915181SSunil Mushran 	}
351416d5b956SMark Fasheh 
3515ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3516ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3517ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3518cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3519ccd979bdSMark Fasheh 		goto downconvert;
3520ccd979bdSMark Fasheh 
3521ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3522ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3523ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3524ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3525ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3526079b8057SSunil Mushran 	level = lockres->l_level;
3527ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3528ccd979bdSMark Fasheh 
3529cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3530d680efe9SMark Fasheh 
35319b915181SSunil Mushran 	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
35329b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
35339b915181SSunil Mushran 		     lockres->l_name);
3534d680efe9SMark Fasheh 		goto leave;
35359b915181SSunil Mushran 	}
3536ccd979bdSMark Fasheh 
3537ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3538079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3539ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3540ccd979bdSMark Fasheh 		 * it just yet. */
35419b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
35429b915181SSunil Mushran 		     "Recheck\n", lockres->l_name, blocking,
35439b915181SSunil Mushran 		     lockres->l_blocking, level, lockres->l_level);
3544ccd979bdSMark Fasheh 		goto recheck;
3545ccd979bdSMark Fasheh 	}
3546ccd979bdSMark Fasheh 
3547ccd979bdSMark Fasheh downconvert:
3548d680efe9SMark Fasheh 	ctl->requeue = 0;
3549ccd979bdSMark Fasheh 
35505ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3551bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
35525ef0d4eaSMark Fasheh 			set_lvb = 1;
35535ef0d4eaSMark Fasheh 
35545ef0d4eaSMark Fasheh 		/*
35555ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
35565ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
35575ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
35585ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
35595ef0d4eaSMark Fasheh 		 */
35605ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
35615ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
35625ef0d4eaSMark Fasheh 	}
35635ef0d4eaSMark Fasheh 
3564de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3565ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3566de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3567de551246SJoel Becker 				     gen);
3568de551246SJoel Becker 
3569ccd979bdSMark Fasheh leave:
3570c1e8d35eSTao Ma 	if (ret)
3571c1e8d35eSTao Ma 		mlog_errno(ret);
3572ccd979bdSMark Fasheh 	return ret;
3573f7fbfdd1SMark Fasheh 
3574f7fbfdd1SMark Fasheh leave_requeue:
3575f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3576f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3577f7fbfdd1SMark Fasheh 
3578f7fbfdd1SMark Fasheh 	return 0;
3579ccd979bdSMark Fasheh }
3580ccd979bdSMark Fasheh 
3581d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3582ccd979bdSMark Fasheh 				     int blocking)
3583ccd979bdSMark Fasheh {
3584ccd979bdSMark Fasheh 	struct inode *inode;
3585ccd979bdSMark Fasheh 	struct address_space *mapping;
35865e98d492SGoldwyn Rodrigues 	struct ocfs2_inode_info *oi;
3587ccd979bdSMark Fasheh 
3588ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3589ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3590ccd979bdSMark Fasheh 
35915e98d492SGoldwyn Rodrigues 	if (S_ISDIR(inode->i_mode)) {
35925e98d492SGoldwyn Rodrigues 		oi = OCFS2_I(inode);
35935e98d492SGoldwyn Rodrigues 		oi->ip_dir_lock_gen++;
35945e98d492SGoldwyn Rodrigues 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
35955e98d492SGoldwyn Rodrigues 		goto out;
35965e98d492SGoldwyn Rodrigues 	}
35975e98d492SGoldwyn Rodrigues 
35981044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3599f1f54068SMark Fasheh 		goto out;
3600f1f54068SMark Fasheh 
36017f4a2a97SMark Fasheh 	/*
36027f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
36037f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
36047f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
36057f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
36067f4a2a97SMark Fasheh 	 * them up again.
36077f4a2a97SMark Fasheh 	 */
36087f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
36097f4a2a97SMark Fasheh 
3610ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3611b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3612b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3613ccd979bdSMark Fasheh 	}
3614ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3615bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3616ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3617ccd979bdSMark Fasheh 	} else {
3618ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3619ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3620ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3621ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3622ccd979bdSMark Fasheh 		 * them around in that case. */
3623ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3624ccd979bdSMark Fasheh 	}
3625ccd979bdSMark Fasheh 
3626f1f54068SMark Fasheh out:
3627d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3628ccd979bdSMark Fasheh }
3629ccd979bdSMark Fasheh 
3630a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3631a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3632810d5aebSMark Fasheh 				 int new_level)
3633810d5aebSMark Fasheh {
3634a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3635810d5aebSMark Fasheh 
3636bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3637bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3638810d5aebSMark Fasheh 
3639810d5aebSMark Fasheh 	if (checkpointed)
3640810d5aebSMark Fasheh 		return 1;
3641810d5aebSMark Fasheh 
3642a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3643810d5aebSMark Fasheh 	return 0;
3644810d5aebSMark Fasheh }
3645810d5aebSMark Fasheh 
3646a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3647a4338481STao Ma 					int new_level)
3648a4338481STao Ma {
3649a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3650a4338481STao Ma 
3651a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3652a4338481STao Ma }
3653a4338481STao Ma 
3654810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3655810d5aebSMark Fasheh {
3656810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3657810d5aebSMark Fasheh 
3658810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
3659810d5aebSMark Fasheh }
3660810d5aebSMark Fasheh 
3661d680efe9SMark Fasheh /*
3662d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
366334d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
3664d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
3665d680efe9SMark Fasheh  */
3666d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3667d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
3668d680efe9SMark Fasheh {
3669d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3670d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
3671d680efe9SMark Fasheh }
3672d680efe9SMark Fasheh 
3673d680efe9SMark Fasheh /*
3674d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
3675d680efe9SMark Fasheh  *
3676d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
3677d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
3678d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
3679d680efe9SMark Fasheh  *
3680d680efe9SMark Fasheh  * We have two potential problems
3681d680efe9SMark Fasheh  *
3682d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
3683d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
3684d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
3685d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
3686d680efe9SMark Fasheh  *    unblock processing.
3687d680efe9SMark Fasheh  *
3688d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
3689d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
3690d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
3691d680efe9SMark Fasheh  */
3692d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3693d680efe9SMark Fasheh 				       int blocking)
3694d680efe9SMark Fasheh {
3695d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3696d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3697d680efe9SMark Fasheh 	struct dentry *dentry;
3698d680efe9SMark Fasheh 	unsigned long flags;
3699d680efe9SMark Fasheh 	int extra_ref = 0;
3700d680efe9SMark Fasheh 
3701d680efe9SMark Fasheh 	/*
3702d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
3703d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
3704d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
3705d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
3706d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
3707d680efe9SMark Fasheh 	 * so there's no further work to do.
3708d680efe9SMark Fasheh 	 */
3709bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
3710d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3711d680efe9SMark Fasheh 
3712d680efe9SMark Fasheh 	/*
3713d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
3714d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
3715d680efe9SMark Fasheh 	 * needs to be freed or not.
3716d680efe9SMark Fasheh 	 */
3717d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
3718d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3719d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
3720d680efe9SMark Fasheh 
3721d680efe9SMark Fasheh 	/*
3722d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
3723d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3724d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
3725d680efe9SMark Fasheh 	 * flag.
3726d680efe9SMark Fasheh 	 */
3727d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3728d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3729d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3730d680efe9SMark Fasheh 	    && dl->dl_count) {
3731d680efe9SMark Fasheh 		dl->dl_count++;
3732d680efe9SMark Fasheh 		extra_ref = 1;
3733d680efe9SMark Fasheh 	}
3734d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3735d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3736d680efe9SMark Fasheh 
3737d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
3738d680efe9SMark Fasheh 
3739d680efe9SMark Fasheh 	/*
3740d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
3741d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
3742d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
3743d680efe9SMark Fasheh 	 */
3744d680efe9SMark Fasheh 	if (!extra_ref)
3745d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3746d680efe9SMark Fasheh 
3747d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3748d680efe9SMark Fasheh 	while (1) {
3749d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
3750d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
3751d680efe9SMark Fasheh 		if (!dentry)
3752d680efe9SMark Fasheh 			break;
3753d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
3754d680efe9SMark Fasheh 
375510ab8811Salex chen 		if (S_ISDIR(dl->dl_inode->i_mode))
375610ab8811Salex chen 			shrink_dcache_parent(dentry);
375710ab8811Salex chen 
3758a455589fSAl Viro 		mlog(0, "d_delete(%pd);\n", dentry);
3759d680efe9SMark Fasheh 
3760d680efe9SMark Fasheh 		/*
3761d680efe9SMark Fasheh 		 * The following dcache calls may do an
3762d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
3763d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
3764d680efe9SMark Fasheh 		 * because the requesting node already has an
3765d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
3766d680efe9SMark Fasheh 		 * for a downconvert.
3767d680efe9SMark Fasheh 		 */
3768d680efe9SMark Fasheh 		d_delete(dentry);
3769d680efe9SMark Fasheh 		dput(dentry);
3770d680efe9SMark Fasheh 
3771d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
3772d680efe9SMark Fasheh 	}
3773d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3774d680efe9SMark Fasheh 
3775d680efe9SMark Fasheh 	/*
3776d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
3777d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
3778d680efe9SMark Fasheh 	 */
3779d680efe9SMark Fasheh 	if (dl->dl_count == 1)
3780d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
3781d680efe9SMark Fasheh 
3782d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
3783d680efe9SMark Fasheh }
3784d680efe9SMark Fasheh 
37858dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
37868dec98edSTao Ma 					    int new_level)
37878dec98edSTao Ma {
37888dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37898dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37908dec98edSTao Ma 
37918dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
37928dec98edSTao Ma }
37938dec98edSTao Ma 
37948dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
37958dec98edSTao Ma 					 int blocking)
37968dec98edSTao Ma {
37978dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37988dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37998dec98edSTao Ma 
38008dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
38018dec98edSTao Ma 
38028dec98edSTao Ma 	return UNBLOCK_CONTINUE;
38038dec98edSTao Ma }
38048dec98edSTao Ma 
38059e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
38069e33d69fSJan Kara {
38079e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
38089e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
38099e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
38109e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
38119e33d69fSJan Kara 
3812a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
38139e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
38149e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
38159e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
38169e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
38179e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
38189e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
38199e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
38209e33d69fSJan Kara }
38219e33d69fSJan Kara 
38229e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
38239e33d69fSJan Kara {
38249e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38259e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
38269e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38279e33d69fSJan Kara 
38289e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
38299e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
38309e33d69fSJan Kara }
38319e33d69fSJan Kara 
38329e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
38339e33d69fSJan Kara {
38349e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
38359e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
38369e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38379e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
383885eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
38399e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
38409e33d69fSJan Kara 	int status = 0;
38419e33d69fSJan Kara 
38421c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
38431c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
38449e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
38459e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
38469e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
38479e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
38489e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
38499e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38509e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
38519e33d69fSJan Kara 	} else {
3852ae4f6ef1SJan Kara 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3853ae4f6ef1SJan Kara 						     oinfo->dqi_giblk, &bh);
385485eb8b73SJoel Becker 		if (status) {
38559e33d69fSJan Kara 			mlog_errno(status);
38569e33d69fSJan Kara 			goto bail;
38579e33d69fSJan Kara 		}
38589e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
38599e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
38609e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
38619e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
38629e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
38639e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
38649e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
38659e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38669e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
38679e33d69fSJan Kara 		brelse(bh);
38689e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
38699e33d69fSJan Kara 	}
38709e33d69fSJan Kara 
38719e33d69fSJan Kara bail:
38729e33d69fSJan Kara 	return status;
38739e33d69fSJan Kara }
38749e33d69fSJan Kara 
38759e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
38769e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
38779e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
38789e33d69fSJan Kara {
38799e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38809e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
38819e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38829e33d69fSJan Kara 	int status = 0;
38839e33d69fSJan Kara 
38849e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
38859e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
38869e33d69fSJan Kara 		if (ex)
38879e33d69fSJan Kara 			status = -EROFS;
38889e33d69fSJan Kara 		goto bail;
38899e33d69fSJan Kara 	}
38909e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
38919e33d69fSJan Kara 		goto bail;
38929e33d69fSJan Kara 
38939e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38949e33d69fSJan Kara 	if (status < 0) {
38959e33d69fSJan Kara 		mlog_errno(status);
38969e33d69fSJan Kara 		goto bail;
38979e33d69fSJan Kara 	}
38989e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
38999e33d69fSJan Kara 		goto bail;
39009e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
39019e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
39029e33d69fSJan Kara 	if (status)
39039e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
39049e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
39059e33d69fSJan Kara bail:
39069e33d69fSJan Kara 	return status;
39079e33d69fSJan Kara }
39089e33d69fSJan Kara 
39098dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
39108dec98edSTao Ma {
39118dec98edSTao Ma 	int status;
39128dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
39138dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
39148dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
39158dec98edSTao Ma 
39168dec98edSTao Ma 
39178dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
39188dec98edSTao Ma 		return -EROFS;
39198dec98edSTao Ma 
39208dec98edSTao Ma 	if (ocfs2_mount_local(osb))
39218dec98edSTao Ma 		return 0;
39228dec98edSTao Ma 
39238dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
39248dec98edSTao Ma 	if (status < 0)
39258dec98edSTao Ma 		mlog_errno(status);
39268dec98edSTao Ma 
39278dec98edSTao Ma 	return status;
39288dec98edSTao Ma }
39298dec98edSTao Ma 
39308dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
39318dec98edSTao Ma {
39328dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
39338dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
39348dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
39358dec98edSTao Ma 
39368dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
39378dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
39388dec98edSTao Ma }
39398dec98edSTao Ma 
394000600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3941ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
3942ccd979bdSMark Fasheh {
3943ccd979bdSMark Fasheh 	int status;
3944d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
3945ccd979bdSMark Fasheh 	unsigned long flags;
3946ccd979bdSMark Fasheh 
3947ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
3948ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
3949ccd979bdSMark Fasheh 	 * flag. */
3950ccd979bdSMark Fasheh 
3951ccd979bdSMark Fasheh 	BUG_ON(!lockres);
3952ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
3953ccd979bdSMark Fasheh 
39549b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
3955ccd979bdSMark Fasheh 
3956ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
395734d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
3958ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
3959ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
3960ccd979bdSMark Fasheh 	 * performance. */
3961ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3962ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
3963ccd979bdSMark Fasheh 		goto unqueue;
3964ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3965ccd979bdSMark Fasheh 
3966b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
3967ccd979bdSMark Fasheh 	if (status < 0)
3968ccd979bdSMark Fasheh 		mlog_errno(status);
3969ccd979bdSMark Fasheh 
3970ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3971ccd979bdSMark Fasheh unqueue:
3972d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
3973ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3974ccd979bdSMark Fasheh 	} else
3975ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
3976ccd979bdSMark Fasheh 
39779b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
3978d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
3979ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3980ccd979bdSMark Fasheh 
3981d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
3982d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
3983d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
3984ccd979bdSMark Fasheh }
3985ccd979bdSMark Fasheh 
3986ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3987ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
3988ccd979bdSMark Fasheh {
3989a75e9ccaSSrinivas Eeda 	unsigned long flags;
3990a75e9ccaSSrinivas Eeda 
3991ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3992ccd979bdSMark Fasheh 
3993ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
3994ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
3995ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
3996ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
39979b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
3998ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
3999ccd979bdSMark Fasheh 		return;
4000ccd979bdSMark Fasheh 	}
4001ccd979bdSMark Fasheh 
4002ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4003ccd979bdSMark Fasheh 
4004a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
4005ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
4006ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
4007ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
4008ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
4009ccd979bdSMark Fasheh 	}
4010a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4011ccd979bdSMark Fasheh }
401234d024f8SMark Fasheh 
401334d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
401434d024f8SMark Fasheh {
401534d024f8SMark Fasheh 	unsigned long processed;
4016a75e9ccaSSrinivas Eeda 	unsigned long flags;
401734d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
401834d024f8SMark Fasheh 
4019a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
402034d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
402134d024f8SMark Fasheh 	 * wake happens part-way through our work  */
402234d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
402334d024f8SMark Fasheh 
402434d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
4025209f7512SJoseph Qi 	/*
4026209f7512SJoseph Qi 	 * blocked lock processing in this loop might call iput which can
4027209f7512SJoseph Qi 	 * remove items off osb->blocked_lock_list. Downconvert up to
4028209f7512SJoseph Qi 	 * 'processed' number of locks, but stop short if we had some
4029209f7512SJoseph Qi 	 * removed in ocfs2_mark_lockres_freeing when downconverting.
4030209f7512SJoseph Qi 	 */
4031209f7512SJoseph Qi 	while (processed && !list_empty(&osb->blocked_lock_list)) {
403234d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
403334d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
403434d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
403534d024f8SMark Fasheh 		osb->blocked_lock_count--;
4036a75e9ccaSSrinivas Eeda 		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
403734d024f8SMark Fasheh 
403834d024f8SMark Fasheh 		BUG_ON(!processed);
403934d024f8SMark Fasheh 		processed--;
404034d024f8SMark Fasheh 
404134d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
404234d024f8SMark Fasheh 
4043a75e9ccaSSrinivas Eeda 		spin_lock_irqsave(&osb->dc_task_lock, flags);
404434d024f8SMark Fasheh 	}
4045a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
404634d024f8SMark Fasheh }
404734d024f8SMark Fasheh 
404834d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
404934d024f8SMark Fasheh {
405034d024f8SMark Fasheh 	int empty = 0;
4051a75e9ccaSSrinivas Eeda 	unsigned long flags;
405234d024f8SMark Fasheh 
4053a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
405434d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
405534d024f8SMark Fasheh 		empty = 1;
405634d024f8SMark Fasheh 
4057a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
405834d024f8SMark Fasheh 	return empty;
405934d024f8SMark Fasheh }
406034d024f8SMark Fasheh 
406134d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
406234d024f8SMark Fasheh {
406334d024f8SMark Fasheh 	int should_wake = 0;
4064a75e9ccaSSrinivas Eeda 	unsigned long flags;
406534d024f8SMark Fasheh 
4066a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
406734d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
406834d024f8SMark Fasheh 		should_wake = 1;
4069a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
407034d024f8SMark Fasheh 
407134d024f8SMark Fasheh 	return should_wake;
407234d024f8SMark Fasheh }
407334d024f8SMark Fasheh 
4074200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
407534d024f8SMark Fasheh {
407634d024f8SMark Fasheh 	int status = 0;
407734d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
407834d024f8SMark Fasheh 
407934d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
408034d024f8SMark Fasheh 	 * work available */
408134d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
408234d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
408334d024f8SMark Fasheh 
408434d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
408534d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
408634d024f8SMark Fasheh 					 kthread_should_stop());
408734d024f8SMark Fasheh 
408834d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
408934d024f8SMark Fasheh 
409034d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
409134d024f8SMark Fasheh 	}
409234d024f8SMark Fasheh 
409334d024f8SMark Fasheh 	osb->dc_task = NULL;
409434d024f8SMark Fasheh 	return status;
409534d024f8SMark Fasheh }
409634d024f8SMark Fasheh 
409734d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
409834d024f8SMark Fasheh {
4099a75e9ccaSSrinivas Eeda 	unsigned long flags;
4100a75e9ccaSSrinivas Eeda 
4101a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
410234d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
410334d024f8SMark Fasheh 	 * the caller may have made to the voting state */
410434d024f8SMark Fasheh 	osb->dc_wake_sequence++;
4105a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
410634d024f8SMark Fasheh 	wake_up(&osb->dc_event);
410734d024f8SMark Fasheh }
4108