xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision 03ab30f7)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * dlmglue.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/types.h>
27ccd979bdSMark Fasheh #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/highmem.h>
29ccd979bdSMark Fasheh #include <linux/mm.h>
30ccd979bdSMark Fasheh #include <linux/kthread.h>
31ccd979bdSMark Fasheh #include <linux/pagemap.h>
32ccd979bdSMark Fasheh #include <linux/debugfs.h>
33ccd979bdSMark Fasheh #include <linux/seq_file.h>
348ddb7b00SSunil Mushran #include <linux/time.h>
359e33d69fSJan Kara #include <linux/quotaops.h>
36ccd979bdSMark Fasheh 
37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
38ccd979bdSMark Fasheh #include <cluster/masklog.h>
39ccd979bdSMark Fasheh 
40ccd979bdSMark Fasheh #include "ocfs2.h"
41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
42ccd979bdSMark Fasheh 
43ccd979bdSMark Fasheh #include "alloc.h"
44d680efe9SMark Fasheh #include "dcache.h"
45ccd979bdSMark Fasheh #include "dlmglue.h"
46ccd979bdSMark Fasheh #include "extent_map.h"
477f1a37e3STiger Yang #include "file.h"
48ccd979bdSMark Fasheh #include "heartbeat.h"
49ccd979bdSMark Fasheh #include "inode.h"
50ccd979bdSMark Fasheh #include "journal.h"
5124ef1815SJoel Becker #include "stackglue.h"
52ccd979bdSMark Fasheh #include "slot_map.h"
53ccd979bdSMark Fasheh #include "super.h"
54ccd979bdSMark Fasheh #include "uptodate.h"
559e33d69fSJan Kara #include "quota.h"
568dec98edSTao Ma #include "refcounttree.h"
57ccd979bdSMark Fasheh 
58ccd979bdSMark Fasheh #include "buffer_head_io.h"
59ccd979bdSMark Fasheh 
60ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
61ccd979bdSMark Fasheh 	struct list_head	mw_item;
62ccd979bdSMark Fasheh 	int			mw_status;
63ccd979bdSMark Fasheh 	struct completion	mw_complete;
64ccd979bdSMark Fasheh 	unsigned long		mw_mask;
65ccd979bdSMark Fasheh 	unsigned long		mw_goal;
668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
675bc970e8SSunil Mushran 	ktime_t			mw_lock_start;
688ddb7b00SSunil Mushran #endif
69ccd979bdSMark Fasheh };
70ccd979bdSMark Fasheh 
7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
75ccd979bdSMark Fasheh 
76d680efe9SMark Fasheh /*
77cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
78d680efe9SMark Fasheh  *
79b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
80d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
81d680efe9SMark Fasheh  *
82d680efe9SMark Fasheh  */
83d680efe9SMark Fasheh enum ocfs2_unblock_action {
84d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
85d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
86d680efe9SMark Fasheh 				      * ->post_unlock callback */
87d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
88d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
89d680efe9SMark Fasheh };
90d680efe9SMark Fasheh 
91d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
92d680efe9SMark Fasheh 	int requeue;
93d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
94d680efe9SMark Fasheh };
95d680efe9SMark Fasheh 
96cb25797dSJan Kara /* Lockdep class keys */
97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
98cb25797dSJan Kara 
99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
100810d5aebSMark Fasheh 					int new_level);
101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
102810d5aebSMark Fasheh 
103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
104cc567d89SMark Fasheh 				     int blocking);
105cc567d89SMark Fasheh 
106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
107cc567d89SMark Fasheh 				       int blocking);
108d680efe9SMark Fasheh 
109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
111ccd979bdSMark Fasheh 
1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1136cb129f5SAdrian Bunk 
1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1158dec98edSTao Ma 					    int new_level);
1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1178dec98edSTao Ma 					 int blocking);
1188dec98edSTao Ma 
1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1206cb129f5SAdrian Bunk 
1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1236cb129f5SAdrian Bunk 				     const char *function,
1246cb129f5SAdrian Bunk 				     unsigned int line,
1256cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1266cb129f5SAdrian Bunk {
127a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1286cb129f5SAdrian Bunk 
1296cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1306cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1316cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1326cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1336cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1346cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1356cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1366cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1376cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1386cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1396cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1406cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1416cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1426cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1436cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1446cb129f5SAdrian Bunk }
1456cb129f5SAdrian Bunk 
1466cb129f5SAdrian Bunk 
147f625c979SMark Fasheh /*
148f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
149f625c979SMark Fasheh  *
150f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1510d5dc6c2SMark Fasheh  *
1520d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1530d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1540d5dc6c2SMark Fasheh  *
1550d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1560d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1570d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1580d5dc6c2SMark Fasheh  * destruction time).
159f625c979SMark Fasheh  */
160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
16154a7e755SMark Fasheh 	/*
16254a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
16354a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
16454a7e755SMark Fasheh 	 */
16554a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
166b5e500e2SMark Fasheh 
1670d5dc6c2SMark Fasheh 	/*
16834d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
16934d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
17034d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
17134d024f8SMark Fasheh 	 * memory, etc.
1720d5dc6c2SMark Fasheh 	 *
1730d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1740d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1750d5dc6c2SMark Fasheh 	 */
176d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
177f625c979SMark Fasheh 
178f625c979SMark Fasheh 	/*
17916d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
18016d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
18116d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
18216d5b956SMark Fasheh 	 *
18316d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
18416d5b956SMark Fasheh 	 * incompatible holders are sufficient.
18516d5b956SMark Fasheh 	 *
18616d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
18716d5b956SMark Fasheh 	 */
18816d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
18916d5b956SMark Fasheh 
19016d5b956SMark Fasheh 	/*
1915ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1925ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1935ef0d4eaSMark Fasheh 	 *
1945ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1955ef0d4eaSMark Fasheh 	 * in the flags field.
1965ef0d4eaSMark Fasheh 	 *
1975ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
1985ef0d4eaSMark Fasheh 	 */
1995ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
2005ef0d4eaSMark Fasheh 
2015ef0d4eaSMark Fasheh 	/*
202cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
203cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
204cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
205cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
206cc567d89SMark Fasheh 	 *
207cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
208cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
209cc567d89SMark Fasheh 	 */
210cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
211cc567d89SMark Fasheh 
212cc567d89SMark Fasheh 	/*
213f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
214f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
215f625c979SMark Fasheh 	 */
216f625c979SMark Fasheh 	int flags;
217ccd979bdSMark Fasheh };
218ccd979bdSMark Fasheh 
219f625c979SMark Fasheh /*
220f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
221f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
222f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
223f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
224f625c979SMark Fasheh  * expected that the locking wrapper will clear the
225f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
226f625c979SMark Fasheh  */
227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
228f625c979SMark Fasheh 
229b80fc012SMark Fasheh /*
2305ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2315ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
232b80fc012SMark Fasheh  */
233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
234b80fc012SMark Fasheh 
235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
23654a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
237f625c979SMark Fasheh 	.flags		= 0,
238ccd979bdSMark Fasheh };
239ccd979bdSMark Fasheh 
240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
24154a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
242810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
243810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
244f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
245b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
246ccd979bdSMark Fasheh };
247ccd979bdSMark Fasheh 
248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
249f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
250ccd979bdSMark Fasheh };
251ccd979bdSMark Fasheh 
252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
253f625c979SMark Fasheh 	.flags		= 0,
254ccd979bdSMark Fasheh };
255ccd979bdSMark Fasheh 
2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2576ca497a8Swengang wang 	.flags		= 0,
2586ca497a8Swengang wang };
2596ca497a8Swengang wang 
26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
26183273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
26283273932SSrinivas Eeda };
26383273932SSrinivas Eeda 
264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
26554a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
266d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
267cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
268f625c979SMark Fasheh 	.flags		= 0,
269d680efe9SMark Fasheh };
270d680efe9SMark Fasheh 
27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
27250008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
27350008630STiger Yang 	.flags		= 0,
27450008630STiger Yang };
27550008630STiger Yang 
276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
277cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
278cf8e06f1SMark Fasheh 	.flags		= 0,
279cf8e06f1SMark Fasheh };
280cf8e06f1SMark Fasheh 
2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2829e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2839e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2849e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2859e33d69fSJan Kara };
2869e33d69fSJan Kara 
2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2888dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2898dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2908dec98edSTao Ma 	.flags		= 0,
2918dec98edSTao Ma };
2928dec98edSTao Ma 
293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
294ccd979bdSMark Fasheh {
295ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
29650008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
29750008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
298ccd979bdSMark Fasheh }
299ccd979bdSMark Fasheh 
300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
301a796d286SJoel Becker {
302a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
303a796d286SJoel Becker }
304a796d286SJoel Becker 
305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
306ccd979bdSMark Fasheh {
307ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
308ccd979bdSMark Fasheh 
309ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
310ccd979bdSMark Fasheh }
311ccd979bdSMark Fasheh 
312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
313d680efe9SMark Fasheh {
314d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
315d680efe9SMark Fasheh 
316d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
317d680efe9SMark Fasheh }
318d680efe9SMark Fasheh 
3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3209e33d69fSJan Kara {
3219e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3229e33d69fSJan Kara 
3239e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3249e33d69fSJan Kara }
3259e33d69fSJan Kara 
3268dec98edSTao Ma static inline struct ocfs2_refcount_tree *
3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3288dec98edSTao Ma {
3298dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3308dec98edSTao Ma }
3318dec98edSTao Ma 
33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
33354a7e755SMark Fasheh {
33454a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
33554a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
33654a7e755SMark Fasheh 
33754a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
33854a7e755SMark Fasheh }
33954a7e755SMark Fasheh 
340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
341ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
342ccd979bdSMark Fasheh 			     int level,
343bd3e7610SJoel Becker 			     u32 dlm_flags);
344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
345ccd979bdSMark Fasheh 						     int wanted);
346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
347ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
348cb25797dSJan Kara 				   int level, unsigned long caller_ip);
349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
350cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
351cb25797dSJan Kara 					int level)
352cb25797dSJan Kara {
353cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
354cb25797dSJan Kara }
355cb25797dSJan Kara 
356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
361ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
363ccd979bdSMark Fasheh 						int convert);
3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
365c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3667431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3677431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
368c74ff8bbSSunil Mushran 	else										\
369c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
370c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
371c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
372ccd979bdSMark Fasheh } while (0)
37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
375ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
377ccd979bdSMark Fasheh 				  struct buffer_head **bh);
378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
381cf8e06f1SMark Fasheh 					      int new_level);
382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
383cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
384cf8e06f1SMark Fasheh 				  int new_level,
385de551246SJoel Becker 				  int lvb,
386de551246SJoel Becker 				  unsigned int generation);
387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
388cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
390cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
391cf8e06f1SMark Fasheh 
392ccd979bdSMark Fasheh 
393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
394ccd979bdSMark Fasheh 				  u64 blkno,
395ccd979bdSMark Fasheh 				  u32 generation,
396ccd979bdSMark Fasheh 				  char *name)
397ccd979bdSMark Fasheh {
398ccd979bdSMark Fasheh 	int len;
399ccd979bdSMark Fasheh 
400ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
401ccd979bdSMark Fasheh 
402b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
403b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
404b0697053SMark Fasheh 		       (long long)blkno, generation);
405ccd979bdSMark Fasheh 
406ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
407ccd979bdSMark Fasheh 
408ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
409ccd979bdSMark Fasheh }
410ccd979bdSMark Fasheh 
41134af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
412ccd979bdSMark Fasheh 
413ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
414ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
415ccd979bdSMark Fasheh {
416ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
417ccd979bdSMark Fasheh 
418ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
419ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
420ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
421ccd979bdSMark Fasheh }
422ccd979bdSMark Fasheh 
423ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
424ccd979bdSMark Fasheh {
425ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
426ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
427ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
428ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
429ccd979bdSMark Fasheh }
430ccd979bdSMark Fasheh 
4318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
4328ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4338ddb7b00SSunil Mushran {
4348ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4355bc970e8SSunil Mushran 	memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
4365bc970e8SSunil Mushran 	memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
4378ddb7b00SSunil Mushran }
4388ddb7b00SSunil Mushran 
4398ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4408ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4418ddb7b00SSunil Mushran {
4425bc970e8SSunil Mushran 	u32 usec;
4435bc970e8SSunil Mushran 	ktime_t kt;
4445bc970e8SSunil Mushran 	struct ocfs2_lock_stats *stats;
4458ddb7b00SSunil Mushran 
4465bc970e8SSunil Mushran 	if (level == LKM_PRMODE)
4475bc970e8SSunil Mushran 		stats = &res->l_lock_prmode;
4485bc970e8SSunil Mushran 	else if (level == LKM_EXMODE)
4495bc970e8SSunil Mushran 		stats = &res->l_lock_exmode;
4505bc970e8SSunil Mushran 	else
4518ddb7b00SSunil Mushran 		return;
4528ddb7b00SSunil Mushran 
4535bc970e8SSunil Mushran 	kt = ktime_sub(ktime_get(), mw->mw_lock_start);
4545bc970e8SSunil Mushran 	usec = ktime_to_us(kt);
4555bc970e8SSunil Mushran 
4565bc970e8SSunil Mushran 	stats->ls_gets++;
4575bc970e8SSunil Mushran 	stats->ls_total += ktime_to_ns(kt);
4585bc970e8SSunil Mushran 	/* overflow */
45916865b7cSroel 	if (unlikely(stats->ls_gets == 0)) {
4605bc970e8SSunil Mushran 		stats->ls_gets++;
4615bc970e8SSunil Mushran 		stats->ls_total = ktime_to_ns(kt);
4625bc970e8SSunil Mushran 	}
4635bc970e8SSunil Mushran 
4645bc970e8SSunil Mushran 	if (stats->ls_max < usec)
4655bc970e8SSunil Mushran 		stats->ls_max = usec;
4665bc970e8SSunil Mushran 
4678ddb7b00SSunil Mushran 	if (ret)
4685bc970e8SSunil Mushran 		stats->ls_fail++;
4698ddb7b00SSunil Mushran }
4708ddb7b00SSunil Mushran 
4718ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4728ddb7b00SSunil Mushran {
4738ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4748ddb7b00SSunil Mushran }
4758ddb7b00SSunil Mushran 
4768ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4778ddb7b00SSunil Mushran {
4785bc970e8SSunil Mushran 	mw->mw_lock_start = ktime_get();
4798ddb7b00SSunil Mushran }
4808ddb7b00SSunil Mushran #else
4818ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4828ddb7b00SSunil Mushran {
4838ddb7b00SSunil Mushran }
4848ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4858ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4868ddb7b00SSunil Mushran {
4878ddb7b00SSunil Mushran }
4888ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4898ddb7b00SSunil Mushran {
4908ddb7b00SSunil Mushran }
4918ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4928ddb7b00SSunil Mushran {
4938ddb7b00SSunil Mushran }
4948ddb7b00SSunil Mushran #endif
4958ddb7b00SSunil Mushran 
496ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
497ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
498ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
499ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
500ccd979bdSMark Fasheh 				       void *priv)
501ccd979bdSMark Fasheh {
502ccd979bdSMark Fasheh 	res->l_type          = type;
503ccd979bdSMark Fasheh 	res->l_ops           = ops;
504ccd979bdSMark Fasheh 	res->l_priv          = priv;
505ccd979bdSMark Fasheh 
506bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
507bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
508bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
509ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
510ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
511ccd979bdSMark Fasheh 
512ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
513ccd979bdSMark Fasheh 
514ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5158ddb7b00SSunil Mushran 
5168ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
517cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
518cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
519cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
520cb25797dSJan Kara 				 &lockdep_keys[type], 0);
521cb25797dSJan Kara 	else
522cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
523cb25797dSJan Kara #endif
524ccd979bdSMark Fasheh }
525ccd979bdSMark Fasheh 
526ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
527ccd979bdSMark Fasheh {
528ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
529ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
530ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
531ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
532ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
533ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
534ccd979bdSMark Fasheh }
535ccd979bdSMark Fasheh 
536ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
537ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
53824c19ef4SMark Fasheh 			       unsigned int generation,
539ccd979bdSMark Fasheh 			       struct inode *inode)
540ccd979bdSMark Fasheh {
541ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
542ccd979bdSMark Fasheh 
543ccd979bdSMark Fasheh 	switch(type) {
544ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
545ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
546ccd979bdSMark Fasheh 			break;
547ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
548e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
549ccd979bdSMark Fasheh 			break;
55050008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
55150008630STiger Yang 			ops = &ocfs2_inode_open_lops;
55250008630STiger Yang 			break;
553ccd979bdSMark Fasheh 		default:
554ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
555ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
556ccd979bdSMark Fasheh 			break;
557ccd979bdSMark Fasheh 	};
558ccd979bdSMark Fasheh 
559d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
56024c19ef4SMark Fasheh 			      generation, res->l_name);
561d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
562d680efe9SMark Fasheh }
563d680efe9SMark Fasheh 
56454a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
56554a7e755SMark Fasheh {
56654a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
56754a7e755SMark Fasheh 
56854a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
56954a7e755SMark Fasheh }
57054a7e755SMark Fasheh 
5719e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5729e33d69fSJan Kara {
5739e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5749e33d69fSJan Kara 
5759e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5769e33d69fSJan Kara }
5779e33d69fSJan Kara 
578cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
579cf8e06f1SMark Fasheh {
580cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
581cf8e06f1SMark Fasheh 
582cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
583cf8e06f1SMark Fasheh }
584cf8e06f1SMark Fasheh 
585d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
586d680efe9SMark Fasheh {
587d680efe9SMark Fasheh 	__be64 inode_blkno_be;
588d680efe9SMark Fasheh 
589d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
590d680efe9SMark Fasheh 	       sizeof(__be64));
591d680efe9SMark Fasheh 
592d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
593d680efe9SMark Fasheh }
594d680efe9SMark Fasheh 
59554a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
59654a7e755SMark Fasheh {
59754a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
59854a7e755SMark Fasheh 
59954a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
60054a7e755SMark Fasheh }
60154a7e755SMark Fasheh 
602d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
603d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
604d680efe9SMark Fasheh {
605d680efe9SMark Fasheh 	int len;
606d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
607d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
608d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
609d680efe9SMark Fasheh 
610d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
611d680efe9SMark Fasheh 
612d680efe9SMark Fasheh 	/*
613d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
614d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
615d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
616d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
617d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
618d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
619d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
620d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
621d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
622d680efe9SMark Fasheh 	 *
623d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
624d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
625d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
626d680efe9SMark Fasheh 	 */
627d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
628d680efe9SMark Fasheh 		       "%c%016llx",
629d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
630d680efe9SMark Fasheh 		       (long long)parent);
631d680efe9SMark Fasheh 
632d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
633d680efe9SMark Fasheh 
634d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
635d680efe9SMark Fasheh 	       sizeof(__be64));
636d680efe9SMark Fasheh 
637d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
638d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
639d680efe9SMark Fasheh 				   dl);
640ccd979bdSMark Fasheh }
641ccd979bdSMark Fasheh 
642ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
643ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
644ccd979bdSMark Fasheh {
645ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
646ccd979bdSMark Fasheh 	 * once on it manually.  */
647ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
648d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
649d680efe9SMark Fasheh 			      0, res->l_name);
650ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
651ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
652ccd979bdSMark Fasheh }
653ccd979bdSMark Fasheh 
654ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
655ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
656ccd979bdSMark Fasheh {
657ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
658ccd979bdSMark Fasheh 	 * once on it manually.  */
659ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
660d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
661d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
662ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
663ccd979bdSMark Fasheh }
664ccd979bdSMark Fasheh 
6656ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6666ca497a8Swengang wang 					 struct ocfs2_super *osb)
6676ca497a8Swengang wang {
6686ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6696ca497a8Swengang wang 	 * once on it manually.  */
6706ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6716ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6726ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6736ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6746ca497a8Swengang wang }
6756ca497a8Swengang wang 
67683273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
67783273932SSrinivas Eeda 					    struct ocfs2_super *osb)
67883273932SSrinivas Eeda {
67983273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
68083273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
68183273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
68283273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
68383273932SSrinivas Eeda }
68483273932SSrinivas Eeda 
685cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
686cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
687cf8e06f1SMark Fasheh {
688cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
689cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
690cf8e06f1SMark Fasheh 
691cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
692cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
693cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
694cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
695cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
696cf8e06f1SMark Fasheh 				   fp);
697cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
698cf8e06f1SMark Fasheh }
699cf8e06f1SMark Fasheh 
7009e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7019e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7029e33d69fSJan Kara {
7039e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7049e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7059e33d69fSJan Kara 			      0, lockres->l_name);
7069e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7079e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7089e33d69fSJan Kara 				   info);
7099e33d69fSJan Kara }
7109e33d69fSJan Kara 
7118dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7128dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7138dec98edSTao Ma 				  unsigned int generation)
7148dec98edSTao Ma {
7158dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7168dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7178dec98edSTao Ma 			      generation, lockres->l_name);
7188dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7198dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7208dec98edSTao Ma }
7218dec98edSTao Ma 
722ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
723ccd979bdSMark Fasheh {
724ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
725ccd979bdSMark Fasheh 		return;
726ccd979bdSMark Fasheh 
727ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
728ccd979bdSMark Fasheh 
729ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
730ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
731ccd979bdSMark Fasheh 			res->l_name);
732ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
733ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
734ccd979bdSMark Fasheh 			res->l_name);
735ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
736ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
737ccd979bdSMark Fasheh 			res->l_name);
738ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
739ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
740ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
741ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
742ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
743ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
744ccd979bdSMark Fasheh 
745ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
746ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
747ccd979bdSMark Fasheh 
748ccd979bdSMark Fasheh 	res->l_flags = 0UL;
749ccd979bdSMark Fasheh }
750ccd979bdSMark Fasheh 
751ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
752ccd979bdSMark Fasheh 				     int level)
753ccd979bdSMark Fasheh {
754ccd979bdSMark Fasheh 	BUG_ON(!lockres);
755ccd979bdSMark Fasheh 
756ccd979bdSMark Fasheh 	switch(level) {
757bd3e7610SJoel Becker 	case DLM_LOCK_EX:
758ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
759ccd979bdSMark Fasheh 		break;
760bd3e7610SJoel Becker 	case DLM_LOCK_PR:
761ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
762ccd979bdSMark Fasheh 		break;
763ccd979bdSMark Fasheh 	default:
764ccd979bdSMark Fasheh 		BUG();
765ccd979bdSMark Fasheh 	}
766ccd979bdSMark Fasheh }
767ccd979bdSMark Fasheh 
768ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
769ccd979bdSMark Fasheh 				     int level)
770ccd979bdSMark Fasheh {
771ccd979bdSMark Fasheh 	BUG_ON(!lockres);
772ccd979bdSMark Fasheh 
773ccd979bdSMark Fasheh 	switch(level) {
774bd3e7610SJoel Becker 	case DLM_LOCK_EX:
775ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
776ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
777ccd979bdSMark Fasheh 		break;
778bd3e7610SJoel Becker 	case DLM_LOCK_PR:
779ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
780ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
781ccd979bdSMark Fasheh 		break;
782ccd979bdSMark Fasheh 	default:
783ccd979bdSMark Fasheh 		BUG();
784ccd979bdSMark Fasheh 	}
785ccd979bdSMark Fasheh }
786ccd979bdSMark Fasheh 
787ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
788ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
789ccd979bdSMark Fasheh  * lock types are added. */
790ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
791ccd979bdSMark Fasheh {
792bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
793ccd979bdSMark Fasheh 
794bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
795bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
796bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
797bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
798ccd979bdSMark Fasheh 	return new_level;
799ccd979bdSMark Fasheh }
800ccd979bdSMark Fasheh 
801ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
802ccd979bdSMark Fasheh 			      unsigned long newflags)
803ccd979bdSMark Fasheh {
804800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
805ccd979bdSMark Fasheh 
806ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
807ccd979bdSMark Fasheh 
808ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
809ccd979bdSMark Fasheh 
810800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
811ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
812ccd979bdSMark Fasheh 			continue;
813ccd979bdSMark Fasheh 
814ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
815ccd979bdSMark Fasheh 		mw->mw_status = 0;
816ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
817ccd979bdSMark Fasheh 	}
818ccd979bdSMark Fasheh }
819ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
820ccd979bdSMark Fasheh {
821ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
822ccd979bdSMark Fasheh }
823ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
824ccd979bdSMark Fasheh 				unsigned long clear)
825ccd979bdSMark Fasheh {
826ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
827ccd979bdSMark Fasheh }
828ccd979bdSMark Fasheh 
829ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
830ccd979bdSMark Fasheh {
831ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
832ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
833ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
834bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
835ccd979bdSMark Fasheh 
836ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
837ccd979bdSMark Fasheh 	if (lockres->l_level <=
838ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
839bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
840ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
841ccd979bdSMark Fasheh 	}
842ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
843ccd979bdSMark Fasheh }
844ccd979bdSMark Fasheh 
845ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
846ccd979bdSMark Fasheh {
847ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
848ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
849ccd979bdSMark Fasheh 
850ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
851ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
852ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
853ccd979bdSMark Fasheh 	 * update */
854bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
855f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
856ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
857ccd979bdSMark Fasheh 
858ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
859a1912826SSunil Mushran 
860a1912826SSunil Mushran 	/*
861a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
862a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
863a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
864a1912826SSunil Mushran 	 */
865a1912826SSunil Mushran 	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
866a1912826SSunil Mushran 
867ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
868ccd979bdSMark Fasheh }
869ccd979bdSMark Fasheh 
870ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
871ccd979bdSMark Fasheh {
8723cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
873ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
874ccd979bdSMark Fasheh 
875bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
876f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
877f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
878ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
879ccd979bdSMark Fasheh 
880ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
881ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
882ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
883ccd979bdSMark Fasheh }
884ccd979bdSMark Fasheh 
885ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
886ccd979bdSMark Fasheh 				     int level)
887ccd979bdSMark Fasheh {
888ccd979bdSMark Fasheh 	int needs_downconvert = 0;
889ccd979bdSMark Fasheh 
890ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
891ccd979bdSMark Fasheh 
892ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
893ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
894ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
895ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
896ccd979bdSMark Fasheh 		 * duplicate BASTs */
897ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
898ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
899ccd979bdSMark Fasheh 			needs_downconvert = 1;
900ccd979bdSMark Fasheh 
901ccd979bdSMark Fasheh 		lockres->l_blocking = level;
902ccd979bdSMark Fasheh 	}
903ccd979bdSMark Fasheh 
9049b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, block %d, level %d, l_block %d, dwn %d\n",
9059b915181SSunil Mushran 	     lockres->l_name, level, lockres->l_level, lockres->l_blocking,
9069b915181SSunil Mushran 	     needs_downconvert);
9079b915181SSunil Mushran 
9080b94a909SWengang Wang 	if (needs_downconvert)
9090b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
910c1e8d35eSTao Ma 	mlog(0, "needs_downconvert = %d\n", needs_downconvert);
911ccd979bdSMark Fasheh 	return needs_downconvert;
912ccd979bdSMark Fasheh }
913ccd979bdSMark Fasheh 
914de551246SJoel Becker /*
915de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
916de551246SJoel Becker  *
917de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
918de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
919de551246SJoel Becker  * for more details on the race.
920de551246SJoel Becker  *
921de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
922de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
923de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
924de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
925de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
926de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
927de551246SJoel Becker  * nothing.
928de551246SJoel Becker  *
929de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
930de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
931de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
932de551246SJoel Becker  * window.
933de551246SJoel Becker  *
934de551246SJoel Becker  * [Example]
935de551246SJoel Becker  *
936de551246SJoel Becker  * ocfs2_meta_lock()
937de551246SJoel Becker  *  ocfs2_cluster_lock()
938de551246SJoel Becker  *   set BUSY
939de551246SJoel Becker  *   set PENDING
940de551246SJoel Becker  *   drop l_lock
941de551246SJoel Becker  *   ocfs2_dlm_lock()
942de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
943de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
944de551246SJoel Becker  *					  take_l_lock
945de551246SJoel Becker  *					  !BUSY
946de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
947de551246SJoel Becker  *					   set BUSY
948de551246SJoel Becker  *					   set PENDING
949de551246SJoel Becker  *					  drop l_lock
950de551246SJoel Becker  *   take l_lock
951de551246SJoel Becker  *   clear PENDING
952de551246SJoel Becker  *   drop l_lock
953de551246SJoel Becker  *			<window>
954de551246SJoel Becker  *					  ocfs2_dlm_lock()
955de551246SJoel Becker  *
956de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
957de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
958de551246SJoel Becker  *
959de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
960de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
961de551246SJoel Becker  *
962de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
963de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
964de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
965de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
966de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
967de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
968de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
969de551246SJoel Becker  * ocfs2_prepare_downconvert().
970de551246SJoel Becker  */
971de551246SJoel Becker 
972de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
973de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
974de551246SJoel Becker 				    unsigned int generation,
975de551246SJoel Becker 				    struct ocfs2_super *osb)
976de551246SJoel Becker {
977de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
978de551246SJoel Becker 
979de551246SJoel Becker 	/*
980de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
981de551246SJoel Becker 	 * will clear pending, the loser will not.
982de551246SJoel Becker 	 */
983de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
984de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
985de551246SJoel Becker 		return;
986de551246SJoel Becker 
987de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
988de551246SJoel Becker 	lockres->l_pending_gen++;
989de551246SJoel Becker 
990de551246SJoel Becker 	/*
991de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
992de551246SJoel Becker 	 * were PENDING.  Wake it up.
993de551246SJoel Becker 	 */
994de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
995de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
996de551246SJoel Becker }
997de551246SJoel Becker 
998de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
999de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1000de551246SJoel Becker 				  unsigned int generation,
1001de551246SJoel Becker 				  struct ocfs2_super *osb)
1002de551246SJoel Becker {
1003de551246SJoel Becker 	unsigned long flags;
1004de551246SJoel Becker 
1005de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1006de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1007de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1008de551246SJoel Becker }
1009de551246SJoel Becker 
1010de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1011de551246SJoel Becker {
1012de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1013de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1014de551246SJoel Becker 
1015de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1016de551246SJoel Becker 
1017de551246SJoel Becker 	return lockres->l_pending_gen;
1018de551246SJoel Becker }
1019de551246SJoel Becker 
1020c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1021ccd979bdSMark Fasheh {
1022a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1023aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1024ccd979bdSMark Fasheh 	int needs_downconvert;
1025ccd979bdSMark Fasheh 	unsigned long flags;
1026ccd979bdSMark Fasheh 
1027bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1028ccd979bdSMark Fasheh 
10299b915181SSunil Mushran 	mlog(ML_BASTS, "BAST fired for lockres %s, blocking %d, level %d, "
10309b915181SSunil Mushran 	     "type %s\n", lockres->l_name, level, lockres->l_level,
1031aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1032aa2623adSMark Fasheh 
1033cf8e06f1SMark Fasheh 	/*
1034cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1035cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1036cf8e06f1SMark Fasheh 	 */
1037cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1038cf8e06f1SMark Fasheh 		return;
1039cf8e06f1SMark Fasheh 
1040ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1041ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1042ccd979bdSMark Fasheh 	if (needs_downconvert)
1043ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1044ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1045ccd979bdSMark Fasheh 
1046d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1047d680efe9SMark Fasheh 
104834d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1049ccd979bdSMark Fasheh }
1050ccd979bdSMark Fasheh 
1051c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1052ccd979bdSMark Fasheh {
1053a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1054de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1055ccd979bdSMark Fasheh 	unsigned long flags;
10561693a5c0SDavid Teigland 	int status;
1057ccd979bdSMark Fasheh 
1058ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1059ccd979bdSMark Fasheh 
10601693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
10611693a5c0SDavid Teigland 
10621693a5c0SDavid Teigland 	if (status == -EAGAIN) {
10631693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
10641693a5c0SDavid Teigland 		goto out;
10651693a5c0SDavid Teigland 	}
10661693a5c0SDavid Teigland 
10671693a5c0SDavid Teigland 	if (status) {
10688f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
10691693a5c0SDavid Teigland 		     lockres->l_name, status);
1070ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1071ccd979bdSMark Fasheh 		return;
1072ccd979bdSMark Fasheh 	}
1073ccd979bdSMark Fasheh 
10749b915181SSunil Mushran 	mlog(ML_BASTS, "AST fired for lockres %s, action %d, unlock %d, "
10759b915181SSunil Mushran 	     "level %d => %d\n", lockres->l_name, lockres->l_action,
10769b915181SSunil Mushran 	     lockres->l_unlock_action, lockres->l_level, lockres->l_requested);
10779b915181SSunil Mushran 
1078ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1079ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1080ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1081e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1082ccd979bdSMark Fasheh 		break;
1083ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1084ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1085ccd979bdSMark Fasheh 		break;
1086ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1087ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1088ccd979bdSMark Fasheh 		break;
1089ccd979bdSMark Fasheh 	default:
10909b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s: AST fired with invalid action: %u, "
10919b915181SSunil Mushran 		     "flags 0x%lx, unlock: %u\n",
1092e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1093e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1094ccd979bdSMark Fasheh 		BUG();
1095ccd979bdSMark Fasheh 	}
10961693a5c0SDavid Teigland out:
1097ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1098ccd979bdSMark Fasheh 	 * can catch it. */
1099ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1100ccd979bdSMark Fasheh 
1101de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1102de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1103de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1104de551246SJoel Becker 
1105de551246SJoel Becker 	/*
1106de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1107de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1108de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1109de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1110de551246SJoel Becker 	 */
1111de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1112de551246SJoel Becker 
1113ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1114d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1115ccd979bdSMark Fasheh }
1116ccd979bdSMark Fasheh 
1117553b5eb9SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
1118553b5eb9SJoel Becker {
1119553b5eb9SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1120553b5eb9SJoel Becker 	unsigned long flags;
1121553b5eb9SJoel Becker 
11229b915181SSunil Mushran 	mlog(ML_BASTS, "UNLOCK AST fired for lockres %s, action = %d\n",
11239b915181SSunil Mushran 	     lockres->l_name, lockres->l_unlock_action);
1124553b5eb9SJoel Becker 
1125553b5eb9SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1126553b5eb9SJoel Becker 	if (error) {
1127553b5eb9SJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
1128553b5eb9SJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
1129553b5eb9SJoel Becker 		     lockres->l_unlock_action);
1130553b5eb9SJoel Becker 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1131553b5eb9SJoel Becker 		return;
1132553b5eb9SJoel Becker 	}
1133553b5eb9SJoel Becker 
1134553b5eb9SJoel Becker 	switch(lockres->l_unlock_action) {
1135553b5eb9SJoel Becker 	case OCFS2_UNLOCK_CANCEL_CONVERT:
1136553b5eb9SJoel Becker 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
1137553b5eb9SJoel Becker 		lockres->l_action = OCFS2_AST_INVALID;
1138553b5eb9SJoel Becker 		/* Downconvert thread may have requeued this lock, we
1139553b5eb9SJoel Becker 		 * need to wake it. */
1140553b5eb9SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1141553b5eb9SJoel Becker 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
1142553b5eb9SJoel Becker 		break;
1143553b5eb9SJoel Becker 	case OCFS2_UNLOCK_DROP_LOCK:
1144553b5eb9SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
1145553b5eb9SJoel Becker 		break;
1146553b5eb9SJoel Becker 	default:
1147553b5eb9SJoel Becker 		BUG();
1148553b5eb9SJoel Becker 	}
1149553b5eb9SJoel Becker 
1150553b5eb9SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1151553b5eb9SJoel Becker 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1152553b5eb9SJoel Becker 	wake_up(&lockres->l_event);
1153553b5eb9SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1154553b5eb9SJoel Becker }
1155553b5eb9SJoel Becker 
1156553b5eb9SJoel Becker /*
1157553b5eb9SJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
1158553b5eb9SJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
1159553b5eb9SJoel Becker  * The version number allows interoperability with systems running at
1160553b5eb9SJoel Becker  * the same major number and an equal or smaller minor number.
1161553b5eb9SJoel Becker  *
1162553b5eb9SJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
1163553b5eb9SJoel Becker  * lock, orders them differently, does different things underneath a lock),
1164553b5eb9SJoel Becker  * the version must be changed.  The protocol is negotiated when joining
1165553b5eb9SJoel Becker  * the dlm domain.  A node may join the domain if its major version is
1166553b5eb9SJoel Becker  * identical to all other nodes and its minor version is greater than
1167553b5eb9SJoel Becker  * or equal to all other nodes.  When its minor version is greater than
1168553b5eb9SJoel Becker  * the other nodes, it will run at the minor version specified by the
1169553b5eb9SJoel Becker  * other nodes.
1170553b5eb9SJoel Becker  *
1171553b5eb9SJoel Becker  * If a locking change is made that will not be compatible with older
1172553b5eb9SJoel Becker  * versions, the major number must be increased and the minor version set
1173553b5eb9SJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
1174553b5eb9SJoel Becker  * speaking to older versions, the minor version must be increased.  If a
1175553b5eb9SJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
1176553b5eb9SJoel Becker  * are just ignored by older versions), the version does not need to be
1177553b5eb9SJoel Becker  * updated.
1178553b5eb9SJoel Becker  */
1179553b5eb9SJoel Becker static struct ocfs2_locking_protocol lproto = {
1180553b5eb9SJoel Becker 	.lp_max_version = {
1181553b5eb9SJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
1182553b5eb9SJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
1183553b5eb9SJoel Becker 	},
1184553b5eb9SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
1185553b5eb9SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
1186553b5eb9SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
1187553b5eb9SJoel Becker };
1188553b5eb9SJoel Becker 
1189553b5eb9SJoel Becker void ocfs2_set_locking_protocol(void)
1190553b5eb9SJoel Becker {
1191553b5eb9SJoel Becker 	ocfs2_stack_glue_set_max_proto_version(&lproto.lp_max_version);
1192553b5eb9SJoel Becker }
1193553b5eb9SJoel Becker 
1194ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1195ccd979bdSMark Fasheh 						int convert)
1196ccd979bdSMark Fasheh {
1197ccd979bdSMark Fasheh 	unsigned long flags;
1198ccd979bdSMark Fasheh 
1199ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1200ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1201a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1202ccd979bdSMark Fasheh 	if (convert)
1203ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1204ccd979bdSMark Fasheh 	else
1205ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1206ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1207ccd979bdSMark Fasheh 
1208ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1209ccd979bdSMark Fasheh }
1210ccd979bdSMark Fasheh 
1211ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1212ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1213ccd979bdSMark Fasheh  * to do the right thing in that case.
1214ccd979bdSMark Fasheh  */
1215ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1216ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1217ccd979bdSMark Fasheh 			     int level,
1218bd3e7610SJoel Becker 			     u32 dlm_flags)
1219ccd979bdSMark Fasheh {
1220ccd979bdSMark Fasheh 	int ret = 0;
1221ccd979bdSMark Fasheh 	unsigned long flags;
1222de551246SJoel Becker 	unsigned int gen;
1223ccd979bdSMark Fasheh 
1224bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1225ccd979bdSMark Fasheh 	     dlm_flags);
1226ccd979bdSMark Fasheh 
1227ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1228ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1229ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1230ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1231ccd979bdSMark Fasheh 		goto bail;
1232ccd979bdSMark Fasheh 	}
1233ccd979bdSMark Fasheh 
1234ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1235ccd979bdSMark Fasheh 	lockres->l_requested = level;
1236ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1237de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1238ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1239ccd979bdSMark Fasheh 
12404670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1241ccd979bdSMark Fasheh 			     level,
1242ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1243ccd979bdSMark Fasheh 			     dlm_flags,
1244ccd979bdSMark Fasheh 			     lockres->l_name,
1245a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1246de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
12477431cd7eSJoel Becker 	if (ret) {
12487431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1249ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1250ccd979bdSMark Fasheh 	}
1251ccd979bdSMark Fasheh 
12527431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1253ccd979bdSMark Fasheh 
1254ccd979bdSMark Fasheh bail:
1255ccd979bdSMark Fasheh 	return ret;
1256ccd979bdSMark Fasheh }
1257ccd979bdSMark Fasheh 
1258ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1259ccd979bdSMark Fasheh 					int flag)
1260ccd979bdSMark Fasheh {
1261ccd979bdSMark Fasheh 	unsigned long flags;
1262ccd979bdSMark Fasheh 	int ret;
1263ccd979bdSMark Fasheh 
1264ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1265ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1266ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1267ccd979bdSMark Fasheh 
1268ccd979bdSMark Fasheh 	return ret;
1269ccd979bdSMark Fasheh }
1270ccd979bdSMark Fasheh 
1271ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1272ccd979bdSMark Fasheh 
1273ccd979bdSMark Fasheh {
1274ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1275ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1276ccd979bdSMark Fasheh }
1277ccd979bdSMark Fasheh 
1278ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1279ccd979bdSMark Fasheh 
1280ccd979bdSMark Fasheh {
1281ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1282ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1283ccd979bdSMark Fasheh }
1284ccd979bdSMark Fasheh 
1285ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1286ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1287ccd979bdSMark Fasheh  * level will be compatible with it. */
1288ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1289ccd979bdSMark Fasheh 						     int wanted)
1290ccd979bdSMark Fasheh {
1291ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1292ccd979bdSMark Fasheh 
1293ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1294ccd979bdSMark Fasheh }
1295ccd979bdSMark Fasheh 
1296ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1297ccd979bdSMark Fasheh {
1298ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1299ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
13008ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1301ccd979bdSMark Fasheh }
1302ccd979bdSMark Fasheh 
1303ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1304ccd979bdSMark Fasheh {
1305ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1306ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
1307ccd979bdSMark Fasheh 	INIT_COMPLETION(mw->mw_complete);
1308ccd979bdSMark Fasheh 	return mw->mw_status;
1309ccd979bdSMark Fasheh }
1310ccd979bdSMark Fasheh 
1311ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1312ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1313ccd979bdSMark Fasheh 				    unsigned long mask,
1314ccd979bdSMark Fasheh 				    unsigned long goal)
1315ccd979bdSMark Fasheh {
1316ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1317ccd979bdSMark Fasheh 
1318ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1319ccd979bdSMark Fasheh 
1320ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1321ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1322ccd979bdSMark Fasheh 	mw->mw_goal = goal;
1323ccd979bdSMark Fasheh }
1324ccd979bdSMark Fasheh 
1325ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1326ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
1327ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1328ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1329ccd979bdSMark Fasheh {
1330ccd979bdSMark Fasheh 	unsigned long flags;
1331ccd979bdSMark Fasheh 	int ret = 0;
1332ccd979bdSMark Fasheh 
1333ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1334ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1335ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1336ccd979bdSMark Fasheh 			ret = -EBUSY;
1337ccd979bdSMark Fasheh 
1338ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1339ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
1340ccd979bdSMark Fasheh 	}
1341ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1342ccd979bdSMark Fasheh 
1343ccd979bdSMark Fasheh 	return ret;
1344ccd979bdSMark Fasheh 
1345ccd979bdSMark Fasheh }
1346ccd979bdSMark Fasheh 
1347cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1348cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1349cf8e06f1SMark Fasheh {
1350cf8e06f1SMark Fasheh 	int ret;
1351cf8e06f1SMark Fasheh 
1352cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1353cf8e06f1SMark Fasheh 	if (ret)
1354cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1355cf8e06f1SMark Fasheh 	else
1356cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1357cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
1358cf8e06f1SMark Fasheh 	INIT_COMPLETION(mw->mw_complete);
1359cf8e06f1SMark Fasheh 	return ret;
1360cf8e06f1SMark Fasheh }
1361cf8e06f1SMark Fasheh 
1362cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1363ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1364ccd979bdSMark Fasheh 				int level,
1365bd3e7610SJoel Becker 				u32 lkm_flags,
1366cb25797dSJan Kara 				int arg_flags,
1367cb25797dSJan Kara 				int l_subclass,
1368cb25797dSJan Kara 				unsigned long caller_ip)
1369ccd979bdSMark Fasheh {
1370ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1371ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1372ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1373ccd979bdSMark Fasheh 	unsigned long flags;
1374de551246SJoel Becker 	unsigned int gen;
13751693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1376ccd979bdSMark Fasheh 
1377ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1378ccd979bdSMark Fasheh 
1379b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1380bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1381b80fc012SMark Fasheh 
1382ccd979bdSMark Fasheh again:
1383ccd979bdSMark Fasheh 	wait = 0;
1384ccd979bdSMark Fasheh 
1385a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1386a1912826SSunil Mushran 
1387ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1388ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1389a1912826SSunil Mushran 		goto unlock;
1390ccd979bdSMark Fasheh 	}
1391ccd979bdSMark Fasheh 
1392ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1393ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1394ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1395ccd979bdSMark Fasheh 
1396ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1397ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1398ccd979bdSMark Fasheh 	 * we'll get caught below. */
1399ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1400ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1401ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1402ccd979bdSMark Fasheh 		 * them. */
1403ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1404ccd979bdSMark Fasheh 		wait = 1;
1405ccd979bdSMark Fasheh 		goto unlock;
1406ccd979bdSMark Fasheh 	}
1407ccd979bdSMark Fasheh 
1408a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1409a1912826SSunil Mushran 		/*
1410a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1411a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1412a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1413a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1414a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1415a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1416a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1417a1912826SSunil Mushran 		 * requesting PR take the lock.
1418a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1419a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1420a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1421a1912826SSunil Mushran 		 * downconvert request.
1422a1912826SSunil Mushran 		 */
1423a1912826SSunil Mushran 		if (level <= lockres->l_level)
1424a1912826SSunil Mushran 			goto update_holders;
1425a1912826SSunil Mushran 	}
1426a1912826SSunil Mushran 
1427ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1428ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1429ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1430ccd979bdSMark Fasheh 		 * another node */
1431ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1432ccd979bdSMark Fasheh 		wait = 1;
1433ccd979bdSMark Fasheh 		goto unlock;
1434ccd979bdSMark Fasheh 	}
1435ccd979bdSMark Fasheh 
1436ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
14371693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
14381693a5c0SDavid Teigland 			ret = -EAGAIN;
14391693a5c0SDavid Teigland 			goto unlock;
14401693a5c0SDavid Teigland 		}
14411693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
14421693a5c0SDavid Teigland 			noqueue_attempted = 1;
14431693a5c0SDavid Teigland 
1444ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1445ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1446ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1447ccd979bdSMark Fasheh 
1448019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1449019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1450bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1451019d1b22SMark Fasheh 		} else {
1452ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1453bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1454019d1b22SMark Fasheh 		}
1455019d1b22SMark Fasheh 
1456ccd979bdSMark Fasheh 		lockres->l_requested = level;
1457ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1458de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1459ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1460ccd979bdSMark Fasheh 
1461bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1462bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1463ccd979bdSMark Fasheh 
14649b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, convert from %d to %d\n",
1465ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1466ccd979bdSMark Fasheh 
1467ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
14684670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1469ccd979bdSMark Fasheh 				     level,
1470ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1471019d1b22SMark Fasheh 				     lkm_flags,
1472ccd979bdSMark Fasheh 				     lockres->l_name,
1473a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1474de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
14757431cd7eSJoel Becker 		if (ret) {
14767431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
14777431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
147824ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
14797431cd7eSJoel Becker 						    ret, lockres);
1480ccd979bdSMark Fasheh 			}
1481ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1482ccd979bdSMark Fasheh 			goto out;
1483ccd979bdSMark Fasheh 		}
1484ccd979bdSMark Fasheh 
148573ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1486ccd979bdSMark Fasheh 		     lockres->l_name);
1487ccd979bdSMark Fasheh 
1488ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1489ccd979bdSMark Fasheh 		 * complete our work regardless. */
1490ccd979bdSMark Fasheh 		catch_signals = 0;
1491ccd979bdSMark Fasheh 
1492ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1493ccd979bdSMark Fasheh 		goto again;
1494ccd979bdSMark Fasheh 	}
1495ccd979bdSMark Fasheh 
1496a1912826SSunil Mushran update_holders:
1497ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1498ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1499ccd979bdSMark Fasheh 
1500ccd979bdSMark Fasheh 	ret = 0;
1501ccd979bdSMark Fasheh unlock:
1502a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1503a1912826SSunil Mushran 
1504ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1505ccd979bdSMark Fasheh out:
1506ccd979bdSMark Fasheh 	/*
1507ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1508ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1509ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1510ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1511ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1512ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1513ccd979bdSMark Fasheh 	 */
1514ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1515ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1516ccd979bdSMark Fasheh 		wait = 0;
1517ccd979bdSMark Fasheh 		if (lockres_remove_mask_waiter(lockres, &mw))
1518ccd979bdSMark Fasheh 			ret = -EAGAIN;
1519ccd979bdSMark Fasheh 		else
1520ccd979bdSMark Fasheh 			goto again;
1521ccd979bdSMark Fasheh 	}
1522ccd979bdSMark Fasheh 	if (wait) {
1523ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1524ccd979bdSMark Fasheh 		if (ret == 0)
1525ccd979bdSMark Fasheh 			goto again;
1526ccd979bdSMark Fasheh 		mlog_errno(ret);
1527ccd979bdSMark Fasheh 	}
15288ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1529ccd979bdSMark Fasheh 
1530cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1531cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1532cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1533cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1534cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1535cb25797dSJan Kara 				caller_ip);
1536cb25797dSJan Kara 		else
1537cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1538cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1539cb25797dSJan Kara 				caller_ip);
1540cb25797dSJan Kara 	}
1541cb25797dSJan Kara #endif
1542ccd979bdSMark Fasheh 	return ret;
1543ccd979bdSMark Fasheh }
1544ccd979bdSMark Fasheh 
1545cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1546ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1547cb25797dSJan Kara 				     int level,
1548cb25797dSJan Kara 				     u32 lkm_flags,
1549cb25797dSJan Kara 				     int arg_flags)
1550cb25797dSJan Kara {
1551cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1552cb25797dSJan Kara 				    0, _RET_IP_);
1553cb25797dSJan Kara }
1554cb25797dSJan Kara 
1555cb25797dSJan Kara 
1556cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1557cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1558cb25797dSJan Kara 				   int level,
1559cb25797dSJan Kara 				   unsigned long caller_ip)
1560ccd979bdSMark Fasheh {
1561ccd979bdSMark Fasheh 	unsigned long flags;
1562ccd979bdSMark Fasheh 
1563ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1564ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
156534d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1566ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1567cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1568cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
1569cb25797dSJan Kara 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1570cb25797dSJan Kara #endif
1571ccd979bdSMark Fasheh }
1572ccd979bdSMark Fasheh 
1573da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1574d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
157524c19ef4SMark Fasheh 				 int ex,
157624c19ef4SMark Fasheh 				 int local)
1577ccd979bdSMark Fasheh {
1578bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1579ccd979bdSMark Fasheh 	unsigned long flags;
1580bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1581ccd979bdSMark Fasheh 
1582ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1583ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1584ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1585ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1586ccd979bdSMark Fasheh 
158724c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1588ccd979bdSMark Fasheh }
1589ccd979bdSMark Fasheh 
1590ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1591ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1592ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1593ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1594ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1595ccd979bdSMark Fasheh  * with creating a new lock resource. */
1596ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1597ccd979bdSMark Fasheh {
1598ccd979bdSMark Fasheh 	int ret;
1599d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1600ccd979bdSMark Fasheh 
1601ccd979bdSMark Fasheh 	BUG_ON(!inode);
1602ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1603ccd979bdSMark Fasheh 
1604b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1605ccd979bdSMark Fasheh 
1606ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1607ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1608ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1609ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1610ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1611ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1612ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1613ccd979bdSMark Fasheh 
161424c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1615ccd979bdSMark Fasheh 	if (ret) {
1616ccd979bdSMark Fasheh 		mlog_errno(ret);
1617ccd979bdSMark Fasheh 		goto bail;
1618ccd979bdSMark Fasheh 	}
1619ccd979bdSMark Fasheh 
162024c19ef4SMark Fasheh 	/*
1621bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
162224c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
162324c19ef4SMark Fasheh 	 */
1624e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1625ccd979bdSMark Fasheh 	if (ret) {
1626ccd979bdSMark Fasheh 		mlog_errno(ret);
1627ccd979bdSMark Fasheh 		goto bail;
1628ccd979bdSMark Fasheh 	}
1629ccd979bdSMark Fasheh 
163050008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
163150008630STiger Yang 	if (ret) {
163250008630STiger Yang 		mlog_errno(ret);
163350008630STiger Yang 		goto bail;
163450008630STiger Yang 	}
163550008630STiger Yang 
1636ccd979bdSMark Fasheh bail:
1637ccd979bdSMark Fasheh 	return ret;
1638ccd979bdSMark Fasheh }
1639ccd979bdSMark Fasheh 
1640ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1641ccd979bdSMark Fasheh {
1642ccd979bdSMark Fasheh 	int status, level;
1643ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1644c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1645ccd979bdSMark Fasheh 
1646ccd979bdSMark Fasheh 	BUG_ON(!inode);
1647ccd979bdSMark Fasheh 
1648b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1649b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1650ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1651ccd979bdSMark Fasheh 
1652c1e8d35eSTao Ma 	if (ocfs2_mount_local(osb))
1653c271c5c2SSunil Mushran 		return 0;
1654c271c5c2SSunil Mushran 
1655ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1656ccd979bdSMark Fasheh 
1657bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1658ccd979bdSMark Fasheh 
1659ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1660ccd979bdSMark Fasheh 				    0);
1661ccd979bdSMark Fasheh 	if (status < 0)
1662ccd979bdSMark Fasheh 		mlog_errno(status);
1663ccd979bdSMark Fasheh 
1664ccd979bdSMark Fasheh 	return status;
1665ccd979bdSMark Fasheh }
1666ccd979bdSMark Fasheh 
1667ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1668ccd979bdSMark Fasheh {
1669bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1670ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1671c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1672ccd979bdSMark Fasheh 
1673b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1674b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1675ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1676ccd979bdSMark Fasheh 
1677c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
1678ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1679ccd979bdSMark Fasheh }
1680ccd979bdSMark Fasheh 
168150008630STiger Yang /*
168250008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
168350008630STiger Yang  */
168450008630STiger Yang int ocfs2_open_lock(struct inode *inode)
168550008630STiger Yang {
168650008630STiger Yang 	int status = 0;
168750008630STiger Yang 	struct ocfs2_lock_res *lockres;
168850008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
168950008630STiger Yang 
169050008630STiger Yang 	BUG_ON(!inode);
169150008630STiger Yang 
169250008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
169350008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
169450008630STiger Yang 
169503efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
169650008630STiger Yang 		goto out;
169750008630STiger Yang 
169850008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
169950008630STiger Yang 
170050008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1701bd3e7610SJoel Becker 				    DLM_LOCK_PR, 0, 0);
170250008630STiger Yang 	if (status < 0)
170350008630STiger Yang 		mlog_errno(status);
170450008630STiger Yang 
170550008630STiger Yang out:
170650008630STiger Yang 	return status;
170750008630STiger Yang }
170850008630STiger Yang 
170950008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
171050008630STiger Yang {
171150008630STiger Yang 	int status = 0, level;
171250008630STiger Yang 	struct ocfs2_lock_res *lockres;
171350008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
171450008630STiger Yang 
171550008630STiger Yang 	BUG_ON(!inode);
171650008630STiger Yang 
171750008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
171850008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
171950008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
172050008630STiger Yang 
172103efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
172203efed8aSTiger Yang 		if (write)
172303efed8aSTiger Yang 			status = -EROFS;
172403efed8aSTiger Yang 		goto out;
172503efed8aSTiger Yang 	}
172603efed8aSTiger Yang 
172750008630STiger Yang 	if (ocfs2_mount_local(osb))
172850008630STiger Yang 		goto out;
172950008630STiger Yang 
173050008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
173150008630STiger Yang 
1732bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
173350008630STiger Yang 
173450008630STiger Yang 	/*
173550008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1736bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
173750008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
173850008630STiger Yang 	 * this inode is still in use.
173950008630STiger Yang 	 */
174050008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1741bd3e7610SJoel Becker 				    level, DLM_LKF_NOQUEUE, 0);
174250008630STiger Yang 
174350008630STiger Yang out:
174450008630STiger Yang 	return status;
174550008630STiger Yang }
174650008630STiger Yang 
174750008630STiger Yang /*
174850008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
174950008630STiger Yang  */
175050008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
175150008630STiger Yang {
175250008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
175350008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
175450008630STiger Yang 
175550008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
175650008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
175750008630STiger Yang 
175850008630STiger Yang 	if (ocfs2_mount_local(osb))
175950008630STiger Yang 		goto out;
176050008630STiger Yang 
176150008630STiger Yang 	if(lockres->l_ro_holders)
176250008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1763bd3e7610SJoel Becker 				     DLM_LOCK_PR);
176450008630STiger Yang 	if(lockres->l_ex_holders)
176550008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1766bd3e7610SJoel Becker 				     DLM_LOCK_EX);
176750008630STiger Yang 
176850008630STiger Yang out:
1769c1e8d35eSTao Ma 	return;
177050008630STiger Yang }
177150008630STiger Yang 
1772cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1773cf8e06f1SMark Fasheh 				     int level)
1774cf8e06f1SMark Fasheh {
1775cf8e06f1SMark Fasheh 	int ret;
1776cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1777cf8e06f1SMark Fasheh 	unsigned long flags;
1778cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1779cf8e06f1SMark Fasheh 
1780cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1781cf8e06f1SMark Fasheh 
1782cf8e06f1SMark Fasheh retry_cancel:
1783cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1784cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1785cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1786cf8e06f1SMark Fasheh 		if (ret) {
1787cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1788cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1789cf8e06f1SMark Fasheh 			if (ret < 0) {
1790cf8e06f1SMark Fasheh 				mlog_errno(ret);
1791cf8e06f1SMark Fasheh 				goto out;
1792cf8e06f1SMark Fasheh 			}
1793cf8e06f1SMark Fasheh 			goto retry_cancel;
1794cf8e06f1SMark Fasheh 		}
1795cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1796cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1797cf8e06f1SMark Fasheh 
1798cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1799cf8e06f1SMark Fasheh 		goto retry_cancel;
1800cf8e06f1SMark Fasheh 	}
1801cf8e06f1SMark Fasheh 
1802cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1803cf8e06f1SMark Fasheh 	/*
1804cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1805cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1806cf8e06f1SMark Fasheh 	 */
1807cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1808cf8e06f1SMark Fasheh 		ret = 0;
1809cf8e06f1SMark Fasheh 
1810cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1811cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1812cf8e06f1SMark Fasheh 
1813cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1814cf8e06f1SMark Fasheh 
1815cf8e06f1SMark Fasheh out:
1816cf8e06f1SMark Fasheh 	return ret;
1817cf8e06f1SMark Fasheh }
1818cf8e06f1SMark Fasheh 
1819cf8e06f1SMark Fasheh /*
1820cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1821cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1822cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
18233ad2f3fbSDaniel Mack  * separate path to the "low-level" dlm calls. In particular:
1824cf8e06f1SMark Fasheh  *
1825cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1826cf8e06f1SMark Fasheh  *   what's been requested.
1827cf8e06f1SMark Fasheh  *
1828cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1829cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1830cf8e06f1SMark Fasheh  *   the blocking list).
1831cf8e06f1SMark Fasheh  *
1832cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1833cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1834cf8e06f1SMark Fasheh  *   request.
1835cf8e06f1SMark Fasheh  *
1836cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1837cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1838cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1839cf8e06f1SMark Fasheh  */
1840cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1841cf8e06f1SMark Fasheh {
1842e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1843e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1844cf8e06f1SMark Fasheh 	unsigned long flags;
1845cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1846cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1847cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1848cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1849cf8e06f1SMark Fasheh 
1850cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1851cf8e06f1SMark Fasheh 
1852cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1853bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1854cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1855cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1856cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1857cf8e06f1SMark Fasheh 		     lockres->l_level);
1858cf8e06f1SMark Fasheh 		return -EINVAL;
1859cf8e06f1SMark Fasheh 	}
1860cf8e06f1SMark Fasheh 
1861cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1862cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1863cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1864cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1865cf8e06f1SMark Fasheh 
1866cf8e06f1SMark Fasheh 		/*
1867cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
1868cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
1869cf8e06f1SMark Fasheh 		 */
1870e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1871cf8e06f1SMark Fasheh 		if (ret < 0) {
1872cf8e06f1SMark Fasheh 			mlog_errno(ret);
1873cf8e06f1SMark Fasheh 			goto out;
1874cf8e06f1SMark Fasheh 		}
1875cf8e06f1SMark Fasheh 
1876cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1877cf8e06f1SMark Fasheh 		if (ret) {
1878cf8e06f1SMark Fasheh 			mlog_errno(ret);
1879cf8e06f1SMark Fasheh 			goto out;
1880cf8e06f1SMark Fasheh 		}
1881cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
1882cf8e06f1SMark Fasheh 	}
1883cf8e06f1SMark Fasheh 
1884cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
1885e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
1886cf8e06f1SMark Fasheh 	lockres->l_requested = level;
1887cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1888cf8e06f1SMark Fasheh 
1889cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1890cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1891cf8e06f1SMark Fasheh 
18924670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1893a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
18947431cd7eSJoel Becker 	if (ret) {
18957431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
189624ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1897cf8e06f1SMark Fasheh 			ret = -EINVAL;
1898cf8e06f1SMark Fasheh 		}
1899cf8e06f1SMark Fasheh 
1900cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1901cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
1902cf8e06f1SMark Fasheh 		goto out;
1903cf8e06f1SMark Fasheh 	}
1904cf8e06f1SMark Fasheh 
1905cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1906cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
1907cf8e06f1SMark Fasheh 		/*
1908cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
1909cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
1910cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
1911cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
1912cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
1913cf8e06f1SMark Fasheh 		 * reboot.
1914cf8e06f1SMark Fasheh 		 *
1915cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
1916cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
1917cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
1918cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
1919cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
1920af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
1921cf8e06f1SMark Fasheh 		 */
1922cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
19231693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
19241693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
19251693a5c0SDavid Teigland 		BUG_ON(!trylock);
19261693a5c0SDavid Teigland 		ret = -EAGAIN;
1927cf8e06f1SMark Fasheh 	}
1928cf8e06f1SMark Fasheh 
1929cf8e06f1SMark Fasheh out:
1930cf8e06f1SMark Fasheh 
1931cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1932cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
1933cf8e06f1SMark Fasheh 	return ret;
1934cf8e06f1SMark Fasheh }
1935cf8e06f1SMark Fasheh 
1936cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
1937cf8e06f1SMark Fasheh {
1938cf8e06f1SMark Fasheh 	int ret;
1939de551246SJoel Becker 	unsigned int gen;
1940cf8e06f1SMark Fasheh 	unsigned long flags;
1941cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1942cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1943cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1944cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1945cf8e06f1SMark Fasheh 
1946cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1947cf8e06f1SMark Fasheh 
1948cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1949cf8e06f1SMark Fasheh 		return;
1950cf8e06f1SMark Fasheh 
1951e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
1952cf8e06f1SMark Fasheh 		return;
1953cf8e06f1SMark Fasheh 
1954cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1955cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
1956cf8e06f1SMark Fasheh 	     lockres->l_action);
1957cf8e06f1SMark Fasheh 
1958cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1959cf8e06f1SMark Fasheh 	/*
1960cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
1961cf8e06f1SMark Fasheh 	 */
1962cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1963bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
1964cf8e06f1SMark Fasheh 
1965e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
1966cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1967cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1968cf8e06f1SMark Fasheh 
1969e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
1970cf8e06f1SMark Fasheh 	if (ret) {
1971cf8e06f1SMark Fasheh 		mlog_errno(ret);
1972cf8e06f1SMark Fasheh 		return;
1973cf8e06f1SMark Fasheh 	}
1974cf8e06f1SMark Fasheh 
1975cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
1976cf8e06f1SMark Fasheh 	if (ret)
1977cf8e06f1SMark Fasheh 		mlog_errno(ret);
1978cf8e06f1SMark Fasheh }
1979cf8e06f1SMark Fasheh 
198034d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1981ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
1982ccd979bdSMark Fasheh {
1983ccd979bdSMark Fasheh 	int kick = 0;
1984ccd979bdSMark Fasheh 
1985ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
198634d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
1987ccd979bdSMark Fasheh 	 * condition. */
1988ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1989ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
1990bd3e7610SJoel Becker 		case DLM_LOCK_EX:
1991ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1992ccd979bdSMark Fasheh 				kick = 1;
1993ccd979bdSMark Fasheh 			break;
1994bd3e7610SJoel Becker 		case DLM_LOCK_PR:
1995ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
1996ccd979bdSMark Fasheh 				kick = 1;
1997ccd979bdSMark Fasheh 			break;
1998ccd979bdSMark Fasheh 		default:
1999ccd979bdSMark Fasheh 			BUG();
2000ccd979bdSMark Fasheh 		}
2001ccd979bdSMark Fasheh 	}
2002ccd979bdSMark Fasheh 
2003ccd979bdSMark Fasheh 	if (kick)
200434d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
2005ccd979bdSMark Fasheh }
2006ccd979bdSMark Fasheh 
2007ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
2008ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT  (64 - 34)
2009ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
2010ccd979bdSMark Fasheh 
2011ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
2012ccd979bdSMark Fasheh  * now. */
2013ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec)
2014ccd979bdSMark Fasheh {
2015ccd979bdSMark Fasheh 	u64 res;
2016ccd979bdSMark Fasheh 	u64 sec = spec->tv_sec;
2017ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
2018ccd979bdSMark Fasheh 
2019ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
2020ccd979bdSMark Fasheh 
2021ccd979bdSMark Fasheh 	return res;
2022ccd979bdSMark Fasheh }
2023ccd979bdSMark Fasheh 
2024ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2025ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2026e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
2027ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2028ccd979bdSMark Fasheh {
2029ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2030e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2031ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2032ccd979bdSMark Fasheh 
2033a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2034ccd979bdSMark Fasheh 
203524c19ef4SMark Fasheh 	/*
203624c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
203724c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
203824c19ef4SMark Fasheh 	 * status.
203924c19ef4SMark Fasheh 	 */
204024c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
204124c19ef4SMark Fasheh 		lvb->lvb_version = 0;
204224c19ef4SMark Fasheh 		goto out;
204324c19ef4SMark Fasheh 	}
204424c19ef4SMark Fasheh 
20454d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2046ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2047ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
204803ab30f7SEric W. Biederman 	lvb->lvb_iuid      = cpu_to_be32(i_uid_read(inode));
204903ab30f7SEric W. Biederman 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
2050ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2051ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2052ccd979bdSMark Fasheh 	lvb->lvb_iatime_packed  =
2053ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2054ccd979bdSMark Fasheh 	lvb->lvb_ictime_packed =
2055ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2056ccd979bdSMark Fasheh 	lvb->lvb_imtime_packed =
2057ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2058ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
205915b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2060f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2061ccd979bdSMark Fasheh 
206224c19ef4SMark Fasheh out:
2063ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2064ccd979bdSMark Fasheh }
2065ccd979bdSMark Fasheh 
2066ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec,
2067ccd979bdSMark Fasheh 				  u64 packed_time)
2068ccd979bdSMark Fasheh {
2069ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2070ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2071ccd979bdSMark Fasheh }
2072ccd979bdSMark Fasheh 
2073ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2074ccd979bdSMark Fasheh {
2075ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2076e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2077ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2078ccd979bdSMark Fasheh 
2079ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2080ccd979bdSMark Fasheh 
2081a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2082ccd979bdSMark Fasheh 
2083ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2084ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2085ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2086ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2087ccd979bdSMark Fasheh 
2088ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
208915b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2090ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2091ca4d147eSHerbert Poetzl 
2092ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2093ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2094ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2095ccd979bdSMark Fasheh 	else
20968110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2097ccd979bdSMark Fasheh 
209803ab30f7SEric W. Biederman 	i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
209903ab30f7SEric W. Biederman 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2100ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2101bfe86848SMiklos Szeredi 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2102ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_atime,
2103ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_iatime_packed));
2104ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_mtime,
2105ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_imtime_packed));
2106ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_ctime,
2107ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_ictime_packed));
2108ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2109ccd979bdSMark Fasheh }
2110ccd979bdSMark Fasheh 
2111f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2112f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2113ccd979bdSMark Fasheh {
2114a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2115ccd979bdSMark Fasheh 
21161c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
21171c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2118f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2119ccd979bdSMark Fasheh 		return 1;
2120ccd979bdSMark Fasheh 	return 0;
2121ccd979bdSMark Fasheh }
2122ccd979bdSMark Fasheh 
2123ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2124ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2125ccd979bdSMark Fasheh  *
2126ccd979bdSMark Fasheh  *   0 means no refresh needed.
2127ccd979bdSMark Fasheh  *
2128ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2129ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
2130ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2131ccd979bdSMark Fasheh {
2132ccd979bdSMark Fasheh 	unsigned long flags;
2133ccd979bdSMark Fasheh 	int status = 0;
2134ccd979bdSMark Fasheh 
2135ccd979bdSMark Fasheh refresh_check:
2136ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2137ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2138ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2139ccd979bdSMark Fasheh 		goto bail;
2140ccd979bdSMark Fasheh 	}
2141ccd979bdSMark Fasheh 
2142ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2143ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2144ccd979bdSMark Fasheh 
2145ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2146ccd979bdSMark Fasheh 		goto refresh_check;
2147ccd979bdSMark Fasheh 	}
2148ccd979bdSMark Fasheh 
2149ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2150ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2151ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2152ccd979bdSMark Fasheh 
2153ccd979bdSMark Fasheh 	status = 1;
2154ccd979bdSMark Fasheh bail:
2155c1e8d35eSTao Ma 	mlog(0, "status %d\n", status);
2156ccd979bdSMark Fasheh 	return status;
2157ccd979bdSMark Fasheh }
2158ccd979bdSMark Fasheh 
2159ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2160ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
2161ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2162ccd979bdSMark Fasheh 						   int status)
2163ccd979bdSMark Fasheh {
2164ccd979bdSMark Fasheh 	unsigned long flags;
2165ccd979bdSMark Fasheh 
2166ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2167ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2168ccd979bdSMark Fasheh 	if (!status)
2169ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2170ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2171ccd979bdSMark Fasheh 
2172ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2173ccd979bdSMark Fasheh }
2174ccd979bdSMark Fasheh 
2175ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
2176e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2177ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2178ccd979bdSMark Fasheh {
2179ccd979bdSMark Fasheh 	int status = 0;
2180ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2181e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2182ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2183c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2184ccd979bdSMark Fasheh 
2185be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2186be9e986bSMark Fasheh 		goto bail;
2187be9e986bSMark Fasheh 
2188ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2189ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2190b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2191ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2192b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2193ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2194ccd979bdSMark Fasheh 		status = -ENOENT;
2195ccd979bdSMark Fasheh 		goto bail;
2196ccd979bdSMark Fasheh 	}
2197ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2198ccd979bdSMark Fasheh 
2199ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2200ccd979bdSMark Fasheh 		goto bail;
2201ccd979bdSMark Fasheh 
2202ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2203ccd979bdSMark Fasheh 	 * for the inode metadata. */
22048cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2205ccd979bdSMark Fasheh 
220683418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
220783418978SMark Fasheh 
2208be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2209b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2210b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
2211ccd979bdSMark Fasheh 		ocfs2_refresh_inode_from_lvb(inode);
2212ccd979bdSMark Fasheh 	} else {
2213ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2214ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2215b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2216ccd979bdSMark Fasheh 		if (status < 0) {
2217ccd979bdSMark Fasheh 			mlog_errno(status);
2218ccd979bdSMark Fasheh 			goto bail_refresh;
2219ccd979bdSMark Fasheh 		}
2220ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
2221ccd979bdSMark Fasheh 
2222ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2223b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2224b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2225ccd979bdSMark Fasheh 		 *
2226ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2227ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2228ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2229ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2230ccd979bdSMark Fasheh 		 * locks associated with it. */
2231ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2232ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2233b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2234ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2235b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2236b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2237ccd979bdSMark Fasheh 				inode->i_generation);
2238ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2239ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2240b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2241b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2242b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2243ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2244ccd979bdSMark Fasheh 
2245ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
22468ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2247ccd979bdSMark Fasheh 	}
2248ccd979bdSMark Fasheh 
2249ccd979bdSMark Fasheh 	status = 0;
2250ccd979bdSMark Fasheh bail_refresh:
2251ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2252ccd979bdSMark Fasheh bail:
2253ccd979bdSMark Fasheh 	return status;
2254ccd979bdSMark Fasheh }
2255ccd979bdSMark Fasheh 
2256ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2257ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2258ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2259ccd979bdSMark Fasheh {
2260ccd979bdSMark Fasheh 	int status;
2261ccd979bdSMark Fasheh 
2262ccd979bdSMark Fasheh 	if (passed_bh) {
2263ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2264ccd979bdSMark Fasheh 		 * returned bh. */
2265ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2266ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2267ccd979bdSMark Fasheh 
2268ccd979bdSMark Fasheh 		return 0;
2269ccd979bdSMark Fasheh 	}
2270ccd979bdSMark Fasheh 
2271b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2272ccd979bdSMark Fasheh 	if (status < 0)
2273ccd979bdSMark Fasheh 		mlog_errno(status);
2274ccd979bdSMark Fasheh 
2275ccd979bdSMark Fasheh 	return status;
2276ccd979bdSMark Fasheh }
2277ccd979bdSMark Fasheh 
2278ccd979bdSMark Fasheh /*
2279ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2280ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2281ccd979bdSMark Fasheh  */
2282cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2283ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2284ccd979bdSMark Fasheh 				 int ex,
2285cb25797dSJan Kara 				 int arg_flags,
2286cb25797dSJan Kara 				 int subclass)
2287ccd979bdSMark Fasheh {
2288bd3e7610SJoel Becker 	int status, level, acquired;
2289bd3e7610SJoel Becker 	u32 dlm_flags;
2290c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2291ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2292ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2293ccd979bdSMark Fasheh 
2294ccd979bdSMark Fasheh 	BUG_ON(!inode);
2295ccd979bdSMark Fasheh 
2296b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2297b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2298ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2299ccd979bdSMark Fasheh 
2300ccd979bdSMark Fasheh 	status = 0;
2301ccd979bdSMark Fasheh 	acquired = 0;
2302ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2303ccd979bdSMark Fasheh 	 * rodevices. */
2304ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2305ccd979bdSMark Fasheh 		if (ex)
2306ccd979bdSMark Fasheh 			status = -EROFS;
230703efed8aSTiger Yang 		goto getbh;
2308ccd979bdSMark Fasheh 	}
2309ccd979bdSMark Fasheh 
2310c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2311c271c5c2SSunil Mushran 		goto local;
2312c271c5c2SSunil Mushran 
2313ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2314553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2315ccd979bdSMark Fasheh 
2316e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2317bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2318ccd979bdSMark Fasheh 	dlm_flags = 0;
2319ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2320bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2321ccd979bdSMark Fasheh 
2322cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2323cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2324ccd979bdSMark Fasheh 	if (status < 0) {
2325ccd979bdSMark Fasheh 		if (status != -EAGAIN && status != -EIOCBRETRY)
2326ccd979bdSMark Fasheh 			mlog_errno(status);
2327ccd979bdSMark Fasheh 		goto bail;
2328ccd979bdSMark Fasheh 	}
2329ccd979bdSMark Fasheh 
2330ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2331ccd979bdSMark Fasheh 	acquired = 1;
2332ccd979bdSMark Fasheh 
2333ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2334ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2335ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2336ccd979bdSMark Fasheh 	 * abort the operation. */
2337ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2338553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2339ccd979bdSMark Fasheh 
2340c271c5c2SSunil Mushran local:
234124c19ef4SMark Fasheh 	/*
234224c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
234324c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
234424c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
234524c19ef4SMark Fasheh 	 * and let the caller handle it.
234624c19ef4SMark Fasheh 	 */
234724c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
234824c19ef4SMark Fasheh 		status = 0;
2349c271c5c2SSunil Mushran 		if (lockres)
235024c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
235124c19ef4SMark Fasheh 		goto bail;
235224c19ef4SMark Fasheh 	}
235324c19ef4SMark Fasheh 
2354ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2355e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2356ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2357ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2358ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2359e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2360ccd979bdSMark Fasheh 	if (status < 0) {
2361ccd979bdSMark Fasheh 		if (status != -ENOENT)
2362ccd979bdSMark Fasheh 			mlog_errno(status);
2363ccd979bdSMark Fasheh 		goto bail;
2364ccd979bdSMark Fasheh 	}
236503efed8aSTiger Yang getbh:
2366ccd979bdSMark Fasheh 	if (ret_bh) {
2367ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2368ccd979bdSMark Fasheh 		if (status < 0) {
2369ccd979bdSMark Fasheh 			mlog_errno(status);
2370ccd979bdSMark Fasheh 			goto bail;
2371ccd979bdSMark Fasheh 		}
2372ccd979bdSMark Fasheh 	}
2373ccd979bdSMark Fasheh 
2374ccd979bdSMark Fasheh bail:
2375ccd979bdSMark Fasheh 	if (status < 0) {
2376ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2377ccd979bdSMark Fasheh 			brelse(*ret_bh);
2378ccd979bdSMark Fasheh 			*ret_bh = NULL;
2379ccd979bdSMark Fasheh 		}
2380ccd979bdSMark Fasheh 		if (acquired)
2381e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2382ccd979bdSMark Fasheh 	}
2383ccd979bdSMark Fasheh 
2384ccd979bdSMark Fasheh 	if (local_bh)
2385ccd979bdSMark Fasheh 		brelse(local_bh);
2386ccd979bdSMark Fasheh 
2387ccd979bdSMark Fasheh 	return status;
2388ccd979bdSMark Fasheh }
2389ccd979bdSMark Fasheh 
2390ccd979bdSMark Fasheh /*
239134d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
239234d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
239334d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2394ccd979bdSMark Fasheh  *
2395ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2396ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2397ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2398ccd979bdSMark Fasheh  *
239934d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
240034d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
240134d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
240234d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
240334d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
240434d024f8SMark Fasheh  * immediately retry the aop call.
2405ccd979bdSMark Fasheh  *
2406ccd979bdSMark Fasheh  * We do a blocking lock and immediate unlock before returning, though, so that
2407ccd979bdSMark Fasheh  * the lock has a great chance of being cached on this node by the time the VFS
2408ccd979bdSMark Fasheh  * calls back to retry the aop.    This has a potential to livelock as nodes
2409ccd979bdSMark Fasheh  * ping locks back and forth, but that's a risk we're willing to take to avoid
2410ccd979bdSMark Fasheh  * the lock inversion simply.
2411ccd979bdSMark Fasheh  */
2412e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2413ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2414ccd979bdSMark Fasheh 			      int ex,
2415ccd979bdSMark Fasheh 			      struct page *page)
2416ccd979bdSMark Fasheh {
2417ccd979bdSMark Fasheh 	int ret;
2418ccd979bdSMark Fasheh 
2419e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2420ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2421ccd979bdSMark Fasheh 		unlock_page(page);
2422e63aecb6SMark Fasheh 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2423e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2424ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2425ccd979bdSMark Fasheh 	}
2426ccd979bdSMark Fasheh 
2427ccd979bdSMark Fasheh 	return ret;
2428ccd979bdSMark Fasheh }
2429ccd979bdSMark Fasheh 
2430e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
24317f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
24327f1a37e3STiger Yang 			  int *level)
24337f1a37e3STiger Yang {
24347f1a37e3STiger Yang 	int ret;
24357f1a37e3STiger Yang 
2436e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock(inode, NULL, 0);
24377f1a37e3STiger Yang 	if (ret < 0) {
24387f1a37e3STiger Yang 		mlog_errno(ret);
24397f1a37e3STiger Yang 		return ret;
24407f1a37e3STiger Yang 	}
24417f1a37e3STiger Yang 
24427f1a37e3STiger Yang 	/*
24437f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
24447f1a37e3STiger Yang 	 * otherwise we just get PR lock.
24457f1a37e3STiger Yang 	 */
24467f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
24477f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
24487f1a37e3STiger Yang 
2449e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2450e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, &bh, 1);
24517f1a37e3STiger Yang 		if (ret < 0) {
24527f1a37e3STiger Yang 			mlog_errno(ret);
24537f1a37e3STiger Yang 			return ret;
24547f1a37e3STiger Yang 		}
24557f1a37e3STiger Yang 		*level = 1;
24567f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
24577f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
24587f1a37e3STiger Yang 		if (bh)
24597f1a37e3STiger Yang 			brelse(bh);
24607f1a37e3STiger Yang 	} else
24617f1a37e3STiger Yang 		*level = 0;
24627f1a37e3STiger Yang 
24637f1a37e3STiger Yang 	return ret;
24647f1a37e3STiger Yang }
24657f1a37e3STiger Yang 
2466e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2467ccd979bdSMark Fasheh 		       int ex)
2468ccd979bdSMark Fasheh {
2469bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2470e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2471c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2472ccd979bdSMark Fasheh 
2473b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2474b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2475ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2476ccd979bdSMark Fasheh 
2477c271c5c2SSunil Mushran 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2478c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
2479ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2480ccd979bdSMark Fasheh }
2481ccd979bdSMark Fasheh 
2482df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
248383273932SSrinivas Eeda {
248483273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
248583273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
248683273932SSrinivas Eeda 	int status = 0;
248783273932SSrinivas Eeda 
2488df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2489df152c24SSunil Mushran 		return -EROFS;
2490df152c24SSunil Mushran 
2491df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2492df152c24SSunil Mushran 		return 0;
2493df152c24SSunil Mushran 
249483273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2495df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
249683273932SSrinivas Eeda 	if (status < 0)
249783273932SSrinivas Eeda 		return status;
249883273932SSrinivas Eeda 
249983273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
25001c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
25011c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
250283273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
25033211949fSSunil Mushran 	else
25043211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
25053211949fSSunil Mushran 
250683273932SSrinivas Eeda 	return status;
250783273932SSrinivas Eeda }
250883273932SSrinivas Eeda 
2509df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
251083273932SSrinivas Eeda {
251183273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
251283273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
251383273932SSrinivas Eeda 
2514df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
251583273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
251683273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
251783273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
251883273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2519df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2520df152c24SSunil Mushran 	}
252183273932SSrinivas Eeda }
252283273932SSrinivas Eeda 
2523ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2524ccd979bdSMark Fasheh 		     int ex)
2525ccd979bdSMark Fasheh {
2526c271c5c2SSunil Mushran 	int status = 0;
2527bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2528ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2529ccd979bdSMark Fasheh 
2530ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2531ccd979bdSMark Fasheh 		return -EROFS;
2532ccd979bdSMark Fasheh 
2533c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2534c271c5c2SSunil Mushran 		goto bail;
2535c271c5c2SSunil Mushran 
2536ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2537ccd979bdSMark Fasheh 	if (status < 0) {
2538ccd979bdSMark Fasheh 		mlog_errno(status);
2539ccd979bdSMark Fasheh 		goto bail;
2540ccd979bdSMark Fasheh 	}
2541ccd979bdSMark Fasheh 
2542ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2543ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2544ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2545ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2546ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2547ccd979bdSMark Fasheh 	if (status < 0) {
2548ccd979bdSMark Fasheh 		mlog_errno(status);
2549ccd979bdSMark Fasheh 		goto bail;
2550ccd979bdSMark Fasheh 	}
2551ccd979bdSMark Fasheh 	if (status) {
25528e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2553ccd979bdSMark Fasheh 
2554ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2555ccd979bdSMark Fasheh 
2556ccd979bdSMark Fasheh 		if (status < 0)
2557ccd979bdSMark Fasheh 			mlog_errno(status);
25588ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2559ccd979bdSMark Fasheh 	}
2560ccd979bdSMark Fasheh bail:
2561ccd979bdSMark Fasheh 	return status;
2562ccd979bdSMark Fasheh }
2563ccd979bdSMark Fasheh 
2564ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2565ccd979bdSMark Fasheh 			int ex)
2566ccd979bdSMark Fasheh {
2567bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2568ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2569ccd979bdSMark Fasheh 
2570c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2571ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2572ccd979bdSMark Fasheh }
2573ccd979bdSMark Fasheh 
2574ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2575ccd979bdSMark Fasheh {
2576ccd979bdSMark Fasheh 	int status;
2577ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2578ccd979bdSMark Fasheh 
2579ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2580ccd979bdSMark Fasheh 		return -EROFS;
2581ccd979bdSMark Fasheh 
2582c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2583c271c5c2SSunil Mushran 		return 0;
2584c271c5c2SSunil Mushran 
2585bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2586ccd979bdSMark Fasheh 	if (status < 0)
2587ccd979bdSMark Fasheh 		mlog_errno(status);
2588ccd979bdSMark Fasheh 
2589ccd979bdSMark Fasheh 	return status;
2590ccd979bdSMark Fasheh }
2591ccd979bdSMark Fasheh 
2592ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2593ccd979bdSMark Fasheh {
2594ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2595ccd979bdSMark Fasheh 
2596c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2597bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2598ccd979bdSMark Fasheh }
2599ccd979bdSMark Fasheh 
26006ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
26016ca497a8Swengang wang {
26026ca497a8Swengang wang 	int status;
26036ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26046ca497a8Swengang wang 
26056ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
26066ca497a8Swengang wang 		return -EROFS;
26076ca497a8Swengang wang 
26086ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
26096ca497a8Swengang wang 		return 0;
26106ca497a8Swengang wang 
26116ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
26126ca497a8Swengang wang 				    0, 0);
26136ca497a8Swengang wang 	if (status < 0)
26146ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
26156ca497a8Swengang wang 
26166ca497a8Swengang wang 	return status;
26176ca497a8Swengang wang }
26186ca497a8Swengang wang 
26196ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
26206ca497a8Swengang wang {
26216ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26226ca497a8Swengang wang 
26236ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
26246ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
26256ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
26266ca497a8Swengang wang }
26276ca497a8Swengang wang 
2628d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2629d680efe9SMark Fasheh {
2630d680efe9SMark Fasheh 	int ret;
2631bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2632d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2633d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2634d680efe9SMark Fasheh 
2635d680efe9SMark Fasheh 	BUG_ON(!dl);
2636d680efe9SMark Fasheh 
263703efed8aSTiger Yang 	if (ocfs2_is_hard_readonly(osb)) {
263803efed8aSTiger Yang 		if (ex)
2639d680efe9SMark Fasheh 			return -EROFS;
264003efed8aSTiger Yang 		return 0;
264103efed8aSTiger Yang 	}
2642d680efe9SMark Fasheh 
2643c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2644c271c5c2SSunil Mushran 		return 0;
2645c271c5c2SSunil Mushran 
2646d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2647d680efe9SMark Fasheh 	if (ret < 0)
2648d680efe9SMark Fasheh 		mlog_errno(ret);
2649d680efe9SMark Fasheh 
2650d680efe9SMark Fasheh 	return ret;
2651d680efe9SMark Fasheh }
2652d680efe9SMark Fasheh 
2653d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2654d680efe9SMark Fasheh {
2655bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2656d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2657d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2658d680efe9SMark Fasheh 
265903efed8aSTiger Yang 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
2660d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2661d680efe9SMark Fasheh }
2662d680efe9SMark Fasheh 
2663ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
2664ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
2665ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
2666ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
2667ccd979bdSMark Fasheh {
2668ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2669ccd979bdSMark Fasheh 
2670ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2671ccd979bdSMark Fasheh 
2672ccd979bdSMark Fasheh 	kfree(dlm_debug);
2673ccd979bdSMark Fasheh }
2674ccd979bdSMark Fasheh 
2675ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2676ccd979bdSMark Fasheh {
2677ccd979bdSMark Fasheh 	if (dlm_debug)
2678ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2679ccd979bdSMark Fasheh }
2680ccd979bdSMark Fasheh 
2681ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2682ccd979bdSMark Fasheh {
2683ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
2684ccd979bdSMark Fasheh }
2685ccd979bdSMark Fasheh 
2686ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2687ccd979bdSMark Fasheh {
2688ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2689ccd979bdSMark Fasheh 
2690ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2691ccd979bdSMark Fasheh 	if (!dlm_debug) {
2692ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
2693ccd979bdSMark Fasheh 		goto out;
2694ccd979bdSMark Fasheh 	}
2695ccd979bdSMark Fasheh 
2696ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
2697ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2698ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = NULL;
2699ccd979bdSMark Fasheh out:
2700ccd979bdSMark Fasheh 	return dlm_debug;
2701ccd979bdSMark Fasheh }
2702ccd979bdSMark Fasheh 
2703ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
2704ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
2705ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
2706ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
2707ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
2708ccd979bdSMark Fasheh };
2709ccd979bdSMark Fasheh 
2710ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2711ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
2712ccd979bdSMark Fasheh {
2713ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
2714ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2715ccd979bdSMark Fasheh 
2716ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
2717ccd979bdSMark Fasheh 
2718ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2719ccd979bdSMark Fasheh 		/* discover the head of the list */
2720ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2721ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
2722ccd979bdSMark Fasheh 			break;
2723ccd979bdSMark Fasheh 		}
2724ccd979bdSMark Fasheh 
2725ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
2726ccd979bdSMark Fasheh 		 * l_ops field. */
2727ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
2728ccd979bdSMark Fasheh 			ret = iter;
2729ccd979bdSMark Fasheh 			break;
2730ccd979bdSMark Fasheh 		}
2731ccd979bdSMark Fasheh 	}
2732ccd979bdSMark Fasheh 
2733ccd979bdSMark Fasheh 	return ret;
2734ccd979bdSMark Fasheh }
2735ccd979bdSMark Fasheh 
2736ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2737ccd979bdSMark Fasheh {
2738ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2739ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
2740ccd979bdSMark Fasheh 
2741ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2742ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2743ccd979bdSMark Fasheh 	if (iter) {
2744ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
2745ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
2746ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
2747ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
2748ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
2749ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
2750ccd979bdSMark Fasheh 		 * in them. */
2751ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2752ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2753ccd979bdSMark Fasheh 	}
2754ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2755ccd979bdSMark Fasheh 
2756ccd979bdSMark Fasheh 	return iter;
2757ccd979bdSMark Fasheh }
2758ccd979bdSMark Fasheh 
2759ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2760ccd979bdSMark Fasheh {
2761ccd979bdSMark Fasheh }
2762ccd979bdSMark Fasheh 
2763ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2764ccd979bdSMark Fasheh {
2765ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2766ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
2767ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2768ccd979bdSMark Fasheh 
2769ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2770ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
2771ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
2772ccd979bdSMark Fasheh 	if (iter) {
2773ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
2774ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2775ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2776ccd979bdSMark Fasheh 	}
2777ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2778ccd979bdSMark Fasheh 
2779ccd979bdSMark Fasheh 	return iter;
2780ccd979bdSMark Fasheh }
2781ccd979bdSMark Fasheh 
27825bc970e8SSunil Mushran /*
27835bc970e8SSunil Mushran  * Version is used by debugfs.ocfs2 to determine the format being used
27845bc970e8SSunil Mushran  *
27855bc970e8SSunil Mushran  * New in version 2
27865bc970e8SSunil Mushran  *	- Lock stats printed
27875bc970e8SSunil Mushran  * New in version 3
27885bc970e8SSunil Mushran  *	- Max time in lock stats is in usecs (instead of nsecs)
27895bc970e8SSunil Mushran  */
27905bc970e8SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 3
2791ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2792ccd979bdSMark Fasheh {
2793ccd979bdSMark Fasheh 	int i;
2794ccd979bdSMark Fasheh 	char *lvb;
2795ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
2796ccd979bdSMark Fasheh 
2797ccd979bdSMark Fasheh 	if (!lockres)
2798ccd979bdSMark Fasheh 		return -EINVAL;
2799ccd979bdSMark Fasheh 
2800d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2801d680efe9SMark Fasheh 
2802d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2803d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2804d680efe9SMark Fasheh 			   lockres->l_name,
2805d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2806d680efe9SMark Fasheh 	else
2807d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2808d680efe9SMark Fasheh 
2809d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
2810ccd979bdSMark Fasheh 		   "0x%lx\t"
2811ccd979bdSMark Fasheh 		   "0x%x\t"
2812ccd979bdSMark Fasheh 		   "0x%x\t"
2813ccd979bdSMark Fasheh 		   "%u\t"
2814ccd979bdSMark Fasheh 		   "%u\t"
2815ccd979bdSMark Fasheh 		   "%d\t"
2816ccd979bdSMark Fasheh 		   "%d\t",
2817ccd979bdSMark Fasheh 		   lockres->l_level,
2818ccd979bdSMark Fasheh 		   lockres->l_flags,
2819ccd979bdSMark Fasheh 		   lockres->l_action,
2820ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
2821ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
2822ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
2823ccd979bdSMark Fasheh 		   lockres->l_requested,
2824ccd979bdSMark Fasheh 		   lockres->l_blocking);
2825ccd979bdSMark Fasheh 
2826ccd979bdSMark Fasheh 	/* Dump the raw LVB */
28278f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2828ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
2829ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
2830ccd979bdSMark Fasheh 
28318ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
28325bc970e8SSunil Mushran # define lock_num_prmode(_l)		((_l)->l_lock_prmode.ls_gets)
28335bc970e8SSunil Mushran # define lock_num_exmode(_l)		((_l)->l_lock_exmode.ls_gets)
28345bc970e8SSunil Mushran # define lock_num_prmode_failed(_l)	((_l)->l_lock_prmode.ls_fail)
28355bc970e8SSunil Mushran # define lock_num_exmode_failed(_l)	((_l)->l_lock_exmode.ls_fail)
28365bc970e8SSunil Mushran # define lock_total_prmode(_l)		((_l)->l_lock_prmode.ls_total)
28375bc970e8SSunil Mushran # define lock_total_exmode(_l)		((_l)->l_lock_exmode.ls_total)
28385bc970e8SSunil Mushran # define lock_max_prmode(_l)		((_l)->l_lock_prmode.ls_max)
28395bc970e8SSunil Mushran # define lock_max_exmode(_l)		((_l)->l_lock_exmode.ls_max)
28405bc970e8SSunil Mushran # define lock_refresh(_l)		((_l)->l_lock_refresh)
28418ddb7b00SSunil Mushran #else
28425bc970e8SSunil Mushran # define lock_num_prmode(_l)		(0)
28435bc970e8SSunil Mushran # define lock_num_exmode(_l)		(0)
28448ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
28458ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
2846dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
2847dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
28488ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
28498ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
28508ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
28518ddb7b00SSunil Mushran #endif
28528ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
28535bc970e8SSunil Mushran 	seq_printf(m, "%u\t"
28545bc970e8SSunil Mushran 		   "%u\t"
28558ddb7b00SSunil Mushran 		   "%u\t"
28568ddb7b00SSunil Mushran 		   "%u\t"
28578ddb7b00SSunil Mushran 		   "%llu\t"
28588ddb7b00SSunil Mushran 		   "%llu\t"
28598ddb7b00SSunil Mushran 		   "%u\t"
28608ddb7b00SSunil Mushran 		   "%u\t"
28618ddb7b00SSunil Mushran 		   "%u\t",
28628ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
28638ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
28648ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
28658ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
28668ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
28678ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
28688ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
28698ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
28708ddb7b00SSunil Mushran 		   lock_refresh(lockres));
28718ddb7b00SSunil Mushran 
2872ccd979bdSMark Fasheh 	/* End the line */
2873ccd979bdSMark Fasheh 	seq_printf(m, "\n");
2874ccd979bdSMark Fasheh 	return 0;
2875ccd979bdSMark Fasheh }
2876ccd979bdSMark Fasheh 
287790d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
2878ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
2879ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
2880ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
2881ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
2882ccd979bdSMark Fasheh };
2883ccd979bdSMark Fasheh 
2884ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2885ccd979bdSMark Fasheh {
288633fa1d90SJoe Perches 	struct seq_file *seq = file->private_data;
2887ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
2888ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
2889ccd979bdSMark Fasheh 
2890ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
2891ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
2892ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
2893ccd979bdSMark Fasheh }
2894ccd979bdSMark Fasheh 
2895ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2896ccd979bdSMark Fasheh {
2897ccd979bdSMark Fasheh 	int ret;
2898ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
2899ccd979bdSMark Fasheh 	struct seq_file *seq;
2900ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
2901ccd979bdSMark Fasheh 
2902ccd979bdSMark Fasheh 	priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
2903ccd979bdSMark Fasheh 	if (!priv) {
2904ccd979bdSMark Fasheh 		ret = -ENOMEM;
2905ccd979bdSMark Fasheh 		mlog_errno(ret);
2906ccd979bdSMark Fasheh 		goto out;
2907ccd979bdSMark Fasheh 	}
29088e18e294STheodore Ts'o 	osb = inode->i_private;
2909ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2910ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
2911ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2912ccd979bdSMark Fasheh 
2913ccd979bdSMark Fasheh 	ret = seq_open(file, &ocfs2_dlm_seq_ops);
2914ccd979bdSMark Fasheh 	if (ret) {
2915ccd979bdSMark Fasheh 		kfree(priv);
2916ccd979bdSMark Fasheh 		mlog_errno(ret);
2917ccd979bdSMark Fasheh 		goto out;
2918ccd979bdSMark Fasheh 	}
2919ccd979bdSMark Fasheh 
292033fa1d90SJoe Perches 	seq = file->private_data;
2921ccd979bdSMark Fasheh 	seq->private = priv;
2922ccd979bdSMark Fasheh 
2923ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
2924ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
2925ccd979bdSMark Fasheh 
2926ccd979bdSMark Fasheh out:
2927ccd979bdSMark Fasheh 	return ret;
2928ccd979bdSMark Fasheh }
2929ccd979bdSMark Fasheh 
29304b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
2931ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
2932ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
2933ccd979bdSMark Fasheh 	.read =		seq_read,
2934ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
2935ccd979bdSMark Fasheh };
2936ccd979bdSMark Fasheh 
2937ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2938ccd979bdSMark Fasheh {
2939ccd979bdSMark Fasheh 	int ret = 0;
2940ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2941ccd979bdSMark Fasheh 
2942ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2943ccd979bdSMark Fasheh 							 S_IFREG|S_IRUSR,
2944ccd979bdSMark Fasheh 							 osb->osb_debug_root,
2945ccd979bdSMark Fasheh 							 osb,
2946ccd979bdSMark Fasheh 							 &ocfs2_dlm_debug_fops);
2947ccd979bdSMark Fasheh 	if (!dlm_debug->d_locking_state) {
2948ccd979bdSMark Fasheh 		ret = -EINVAL;
2949ccd979bdSMark Fasheh 		mlog(ML_ERROR,
2950ccd979bdSMark Fasheh 		     "Unable to create locking state debugfs file.\n");
2951ccd979bdSMark Fasheh 		goto out;
2952ccd979bdSMark Fasheh 	}
2953ccd979bdSMark Fasheh 
2954ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(dlm_debug);
2955ccd979bdSMark Fasheh out:
2956ccd979bdSMark Fasheh 	return ret;
2957ccd979bdSMark Fasheh }
2958ccd979bdSMark Fasheh 
2959ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2960ccd979bdSMark Fasheh {
2961ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2962ccd979bdSMark Fasheh 
2963ccd979bdSMark Fasheh 	if (dlm_debug) {
2964ccd979bdSMark Fasheh 		debugfs_remove(dlm_debug->d_locking_state);
2965ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
2966ccd979bdSMark Fasheh 	}
2967ccd979bdSMark Fasheh }
2968ccd979bdSMark Fasheh 
2969ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
2970ccd979bdSMark Fasheh {
2971c271c5c2SSunil Mushran 	int status = 0;
29724670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
2973ccd979bdSMark Fasheh 
29740abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
29750abd6d18SMark Fasheh 		osb->node_num = 0;
2976c271c5c2SSunil Mushran 		goto local;
29770abd6d18SMark Fasheh 	}
2978c271c5c2SSunil Mushran 
2979ccd979bdSMark Fasheh 	status = ocfs2_dlm_init_debug(osb);
2980ccd979bdSMark Fasheh 	if (status < 0) {
2981ccd979bdSMark Fasheh 		mlog_errno(status);
2982ccd979bdSMark Fasheh 		goto bail;
2983ccd979bdSMark Fasheh 	}
2984ccd979bdSMark Fasheh 
298534d024f8SMark Fasheh 	/* launch downconvert thread */
298634d024f8SMark Fasheh 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
298734d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
298834d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
298934d024f8SMark Fasheh 		osb->dc_task = NULL;
2990ccd979bdSMark Fasheh 		mlog_errno(status);
2991ccd979bdSMark Fasheh 		goto bail;
2992ccd979bdSMark Fasheh 	}
2993ccd979bdSMark Fasheh 
2994ccd979bdSMark Fasheh 	/* for now, uuid == domain */
29959c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
29969c6c877cSJoel Becker 				       osb->uuid_str,
29974670c46dSJoel Becker 				       strlen(osb->uuid_str),
2998553b5eb9SJoel Becker 				       &lproto, ocfs2_do_node_down, osb,
29994670c46dSJoel Becker 				       &conn);
30004670c46dSJoel Becker 	if (status) {
3001ccd979bdSMark Fasheh 		mlog_errno(status);
3002ccd979bdSMark Fasheh 		goto bail;
3003ccd979bdSMark Fasheh 	}
3004ccd979bdSMark Fasheh 
30050abd6d18SMark Fasheh 	status = ocfs2_cluster_this_node(&osb->node_num);
30060abd6d18SMark Fasheh 	if (status < 0) {
30070abd6d18SMark Fasheh 		mlog_errno(status);
30080abd6d18SMark Fasheh 		mlog(ML_ERROR,
30090abd6d18SMark Fasheh 		     "could not find this host's node number\n");
3010286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
30110abd6d18SMark Fasheh 		goto bail;
30120abd6d18SMark Fasheh 	}
30130abd6d18SMark Fasheh 
3014c271c5c2SSunil Mushran local:
3015ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3016ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
30176ca497a8Swengang wang 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
301883273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3019ccd979bdSMark Fasheh 
30204670c46dSJoel Becker 	osb->cconn = conn;
3021ccd979bdSMark Fasheh 
3022ccd979bdSMark Fasheh 	status = 0;
3023ccd979bdSMark Fasheh bail:
3024ccd979bdSMark Fasheh 	if (status < 0) {
3025ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
302634d024f8SMark Fasheh 		if (osb->dc_task)
302734d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3028ccd979bdSMark Fasheh 	}
3029ccd979bdSMark Fasheh 
3030ccd979bdSMark Fasheh 	return status;
3031ccd979bdSMark Fasheh }
3032ccd979bdSMark Fasheh 
3033286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3034286eaa95SJoel Becker 			int hangup_pending)
3035ccd979bdSMark Fasheh {
3036ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3037ccd979bdSMark Fasheh 
30384670c46dSJoel Becker 	/*
30394670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
30404670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
30414670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
30424670c46dSJoel Becker 	 */
30434670c46dSJoel Becker 
304434d024f8SMark Fasheh 	if (osb->dc_task) {
304534d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
304634d024f8SMark Fasheh 		osb->dc_task = NULL;
3047ccd979bdSMark Fasheh 	}
3048ccd979bdSMark Fasheh 
3049ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3050ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
30516ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
305283273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3053ccd979bdSMark Fasheh 
3054286eaa95SJoel Becker 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
30554670c46dSJoel Becker 	osb->cconn = NULL;
3056ccd979bdSMark Fasheh 
3057ccd979bdSMark Fasheh 	ocfs2_dlm_shutdown_debug(osb);
3058ccd979bdSMark Fasheh }
3059ccd979bdSMark Fasheh 
3060ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
30610d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3062ccd979bdSMark Fasheh {
30637431cd7eSJoel Becker 	int ret;
3064ccd979bdSMark Fasheh 	unsigned long flags;
3065bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3066ccd979bdSMark Fasheh 
3067ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3068ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3069ccd979bdSMark Fasheh 		goto out;
3070ccd979bdSMark Fasheh 
3071b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3072bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3073b80fc012SMark Fasheh 
3074ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3075ccd979bdSMark Fasheh 
3076ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3077ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3078ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3079ccd979bdSMark Fasheh 
3080ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3081ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3082ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3083ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3084ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3085ccd979bdSMark Fasheh 
3086ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3087ccd979bdSMark Fasheh 
3088ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3089ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3090ccd979bdSMark Fasheh 		 * future? */
3091ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3092ccd979bdSMark Fasheh 
3093ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3094ccd979bdSMark Fasheh 	}
3095ccd979bdSMark Fasheh 
30960d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
30970d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3098bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
30990d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
31000d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
31010d5dc6c2SMark Fasheh 	}
3102ccd979bdSMark Fasheh 
3103ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3104ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3105ccd979bdSMark Fasheh 		     lockres->l_name);
3106ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3107ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3108ccd979bdSMark Fasheh 
3109ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3110ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3111ccd979bdSMark Fasheh 		goto out;
3112ccd979bdSMark Fasheh 	}
3113ccd979bdSMark Fasheh 
3114ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3115ccd979bdSMark Fasheh 
3116ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3117ccd979bdSMark Fasheh 	 * fire. */
3118ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3119ccd979bdSMark Fasheh 
3120ccd979bdSMark Fasheh 	/* is this necessary? */
3121ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3122ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3123ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3124ccd979bdSMark Fasheh 
3125ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3126ccd979bdSMark Fasheh 
3127a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
31287431cd7eSJoel Becker 	if (ret) {
31297431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3130ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3131cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3132ccd979bdSMark Fasheh 		BUG();
3133ccd979bdSMark Fasheh 	}
313473ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3135ccd979bdSMark Fasheh 	     lockres->l_name);
3136ccd979bdSMark Fasheh 
3137ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3138ccd979bdSMark Fasheh out:
3139ccd979bdSMark Fasheh 	return 0;
3140ccd979bdSMark Fasheh }
3141ccd979bdSMark Fasheh 
3142ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3143ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
314434d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3145ccd979bdSMark Fasheh  * it safe to drop.
3146ccd979bdSMark Fasheh  *
3147ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
3148ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
3149ccd979bdSMark Fasheh {
3150ccd979bdSMark Fasheh 	int status;
3151ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
3152ccd979bdSMark Fasheh 	unsigned long flags;
3153ccd979bdSMark Fasheh 
3154ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3155ccd979bdSMark Fasheh 
3156ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3157ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
3158ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3159ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3160ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3161ccd979bdSMark Fasheh 
3162ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3163ccd979bdSMark Fasheh 
3164ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3165ccd979bdSMark Fasheh 		if (status)
3166ccd979bdSMark Fasheh 			mlog_errno(status);
3167ccd979bdSMark Fasheh 
3168ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3169ccd979bdSMark Fasheh 	}
3170ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3171ccd979bdSMark Fasheh }
3172ccd979bdSMark Fasheh 
3173d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3174d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3175d680efe9SMark Fasheh {
3176d680efe9SMark Fasheh 	int ret;
3177d680efe9SMark Fasheh 
3178d680efe9SMark Fasheh 	ocfs2_mark_lockres_freeing(lockres);
31790d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3180d680efe9SMark Fasheh 	if (ret)
3181d680efe9SMark Fasheh 		mlog_errno(ret);
3182d680efe9SMark Fasheh }
3183d680efe9SMark Fasheh 
3184ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3185ccd979bdSMark Fasheh {
3186d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3187d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
31886ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
318983273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3190ccd979bdSMark Fasheh }
3191ccd979bdSMark Fasheh 
3192ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3193ccd979bdSMark Fasheh {
3194ccd979bdSMark Fasheh 	int status, err;
3195ccd979bdSMark Fasheh 
3196ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3197ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3198ccd979bdSMark Fasheh 
3199ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
320050008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3201ccd979bdSMark Fasheh 	if (err < 0)
3202ccd979bdSMark Fasheh 		mlog_errno(err);
3203ccd979bdSMark Fasheh 
3204ccd979bdSMark Fasheh 	status = err;
3205ccd979bdSMark Fasheh 
3206ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3207e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3208ccd979bdSMark Fasheh 	if (err < 0)
3209ccd979bdSMark Fasheh 		mlog_errno(err);
3210ccd979bdSMark Fasheh 	if (err < 0 && !status)
3211ccd979bdSMark Fasheh 		status = err;
3212ccd979bdSMark Fasheh 
3213ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
32140d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3215ccd979bdSMark Fasheh 	if (err < 0)
3216ccd979bdSMark Fasheh 		mlog_errno(err);
3217ccd979bdSMark Fasheh 	if (err < 0 && !status)
3218ccd979bdSMark Fasheh 		status = err;
3219ccd979bdSMark Fasheh 
3220ccd979bdSMark Fasheh 	return status;
3221ccd979bdSMark Fasheh }
3222ccd979bdSMark Fasheh 
3223de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3224ccd979bdSMark Fasheh 					      int new_level)
3225ccd979bdSMark Fasheh {
3226ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3227ccd979bdSMark Fasheh 
3228bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3229ccd979bdSMark Fasheh 
3230ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
32319b915181SSunil Mushran 		mlog(ML_ERROR, "lockres %s, lvl %d <= %d, blcklst %d, mask %d, "
32329b915181SSunil Mushran 		     "type %d, flags 0x%lx, hold %d %d, act %d %d, req %d, "
32339b915181SSunil Mushran 		     "block %d, pgen %d\n", lockres->l_name, lockres->l_level,
32349b915181SSunil Mushran 		     new_level, list_empty(&lockres->l_blocked_list),
32359b915181SSunil Mushran 		     list_empty(&lockres->l_mask_waiters), lockres->l_type,
32369b915181SSunil Mushran 		     lockres->l_flags, lockres->l_ro_holders,
32379b915181SSunil Mushran 		     lockres->l_ex_holders, lockres->l_action,
32389b915181SSunil Mushran 		     lockres->l_unlock_action, lockres->l_requested,
32399b915181SSunil Mushran 		     lockres->l_blocking, lockres->l_pending_gen);
3240ccd979bdSMark Fasheh 		BUG();
3241ccd979bdSMark Fasheh 	}
3242ccd979bdSMark Fasheh 
32439b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d, blocking %d\n",
32449b915181SSunil Mushran 	     lockres->l_name, lockres->l_level, new_level, lockres->l_blocking);
3245ccd979bdSMark Fasheh 
3246ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3247ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3248ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3249de551246SJoel Becker 	return lockres_set_pending(lockres);
3250ccd979bdSMark Fasheh }
3251ccd979bdSMark Fasheh 
3252ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3253ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3254ccd979bdSMark Fasheh 				  int new_level,
3255de551246SJoel Becker 				  int lvb,
3256de551246SJoel Becker 				  unsigned int generation)
3257ccd979bdSMark Fasheh {
3258bd3e7610SJoel Becker 	int ret;
3259bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3260ccd979bdSMark Fasheh 
32619b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
32629b915181SSunil Mushran 	     lockres->l_level, new_level);
32639b915181SSunil Mushran 
3264ccd979bdSMark Fasheh 	if (lvb)
3265bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3266ccd979bdSMark Fasheh 
32674670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3268ccd979bdSMark Fasheh 			     new_level,
3269ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3270ccd979bdSMark Fasheh 			     dlm_flags,
3271ccd979bdSMark Fasheh 			     lockres->l_name,
3272a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3273de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
32747431cd7eSJoel Becker 	if (ret) {
32757431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3276ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3277ccd979bdSMark Fasheh 		goto bail;
3278ccd979bdSMark Fasheh 	}
3279ccd979bdSMark Fasheh 
3280ccd979bdSMark Fasheh 	ret = 0;
3281ccd979bdSMark Fasheh bail:
3282ccd979bdSMark Fasheh 	return ret;
3283ccd979bdSMark Fasheh }
3284ccd979bdSMark Fasheh 
328524ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3286ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3287ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3288ccd979bdSMark Fasheh {
3289ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3290ccd979bdSMark Fasheh 
3291ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3292ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3293ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3294ccd979bdSMark Fasheh 		 * requeue this lock. */
32959b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, skip convert\n", lockres->l_name);
3296ccd979bdSMark Fasheh 		return 0;
3297ccd979bdSMark Fasheh 	}
3298ccd979bdSMark Fasheh 
3299ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3300ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3301ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3302ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3303ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3304ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3305ccd979bdSMark Fasheh 
3306ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3307ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3308ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3309ccd979bdSMark Fasheh 
33109b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
33119b915181SSunil Mushran 
3312ccd979bdSMark Fasheh 	return 1;
3313ccd979bdSMark Fasheh }
3314ccd979bdSMark Fasheh 
3315ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3316ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3317ccd979bdSMark Fasheh {
3318ccd979bdSMark Fasheh 	int ret;
3319ccd979bdSMark Fasheh 
33204670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3321a796d286SJoel Becker 			       DLM_LKF_CANCEL);
33227431cd7eSJoel Becker 	if (ret) {
33237431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3324ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3325ccd979bdSMark Fasheh 	}
3326ccd979bdSMark Fasheh 
33279b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s\n", lockres->l_name);
3328ccd979bdSMark Fasheh 
3329ccd979bdSMark Fasheh 	return ret;
3330ccd979bdSMark Fasheh }
3331ccd979bdSMark Fasheh 
3332b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3333ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3334cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3335ccd979bdSMark Fasheh {
3336ccd979bdSMark Fasheh 	unsigned long flags;
3337ccd979bdSMark Fasheh 	int blocking;
3338ccd979bdSMark Fasheh 	int new_level;
3339079b8057SSunil Mushran 	int level;
3340ccd979bdSMark Fasheh 	int ret = 0;
33415ef0d4eaSMark Fasheh 	int set_lvb = 0;
3342de551246SJoel Becker 	unsigned int gen;
3343ccd979bdSMark Fasheh 
3344ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3345ccd979bdSMark Fasheh 
3346ccd979bdSMark Fasheh recheck:
3347db0f6ce6SSunil Mushran 	/*
3348db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3349db0f6ce6SSunil Mushran 	 */
3350db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3351db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3352db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3353db0f6ce6SSunil Mushran 		ret = 0;
3354db0f6ce6SSunil Mushran 		goto leave;
3355db0f6ce6SSunil Mushran 	}
3356db0f6ce6SSunil Mushran 
3357ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3358de551246SJoel Becker 		/* XXX
3359de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3360de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3361de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3362de551246SJoel Becker 		 *
3363de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3364de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3365de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3366de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3367de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3368de551246SJoel Becker 		 *
3369de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3370de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3371de551246SJoel Becker 		 *
3372de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3373de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3374de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3375de551246SJoel Becker 		 * we then cancel their request.
3376de551246SJoel Becker 		 *
3377de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3378de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3379de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3380de551246SJoel Becker 		 */
33819b915181SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_PENDING) {
33829b915181SSunil Mushran 			mlog(ML_BASTS, "lockres %s, ReQ: Pending\n",
33839b915181SSunil Mushran 			     lockres->l_name);
3384de551246SJoel Becker 			goto leave_requeue;
33859b915181SSunil Mushran 		}
3386de551246SJoel Becker 
3387d680efe9SMark Fasheh 		ctl->requeue = 1;
3388ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3389ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3390ccd979bdSMark Fasheh 		if (ret) {
3391ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3392ccd979bdSMark Fasheh 			if (ret < 0)
3393ccd979bdSMark Fasheh 				mlog_errno(ret);
3394ccd979bdSMark Fasheh 		}
3395ccd979bdSMark Fasheh 		goto leave;
3396ccd979bdSMark Fasheh 	}
3397ccd979bdSMark Fasheh 
3398a1912826SSunil Mushran 	/*
3399a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3400a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3401a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3402a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3403a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3404a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3405a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3406a1912826SSunil Mushran 	 */
3407a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3408a1912826SSunil Mushran 		goto leave_requeue;
3409a1912826SSunil Mushran 
34100d74125aSSunil Mushran 	/*
34110d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
34120d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
34130d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
34140d74125aSSunil Mushran 	 */
34150d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
34160d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
34179b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, Aborting dc\n", lockres->l_name);
34180d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
34190d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
34200d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
34210d74125aSSunil Mushran 		goto leave;
34220d74125aSSunil Mushran 	}
34230d74125aSSunil Mushran 
3424ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3425ccd979bdSMark Fasheh 	 * then requeue. */
3426bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
34279b915181SSunil Mushran 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
34289b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX/PR Holders %u,%u\n",
34299b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders,
34309b915181SSunil Mushran 		     lockres->l_ro_holders);
3431f7fbfdd1SMark Fasheh 		goto leave_requeue;
34329b915181SSunil Mushran 	}
3433ccd979bdSMark Fasheh 
3434ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3435ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3436bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
34379b915181SSunil Mushran 	    lockres->l_ex_holders) {
34389b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: EX Holders %u\n",
34399b915181SSunil Mushran 		     lockres->l_name, lockres->l_ex_holders);
3440f7fbfdd1SMark Fasheh 		goto leave_requeue;
34419b915181SSunil Mushran 	}
3442f7fbfdd1SMark Fasheh 
3443f7fbfdd1SMark Fasheh 	/*
3444f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3445f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3446f7fbfdd1SMark Fasheh 	 */
3447f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
34489b915181SSunil Mushran 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING)) {
34499b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Lock Refreshing\n",
34509b915181SSunil Mushran 		     lockres->l_name);
3451f7fbfdd1SMark Fasheh 		goto leave_requeue;
34529b915181SSunil Mushran 	}
3453ccd979bdSMark Fasheh 
345416d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
345516d5b956SMark Fasheh 
345616d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
34579b915181SSunil Mushran 	    && !lockres->l_ops->check_downconvert(lockres, new_level)) {
34589b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, ReQ: Checkpointing\n",
34599b915181SSunil Mushran 		     lockres->l_name);
346016d5b956SMark Fasheh 		goto leave_requeue;
34619b915181SSunil Mushran 	}
346216d5b956SMark Fasheh 
3463ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3464ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3465ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3466cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3467ccd979bdSMark Fasheh 		goto downconvert;
3468ccd979bdSMark Fasheh 
3469ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3470ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3471ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3472ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3473ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3474079b8057SSunil Mushran 	level = lockres->l_level;
3475ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3476ccd979bdSMark Fasheh 
3477cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3478d680efe9SMark Fasheh 
34799b915181SSunil Mushran 	if (ctl->unblock_action == UNBLOCK_STOP_POST) {
34809b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, UNBLOCK_STOP_POST\n",
34819b915181SSunil Mushran 		     lockres->l_name);
3482d680efe9SMark Fasheh 		goto leave;
34839b915181SSunil Mushran 	}
3484ccd979bdSMark Fasheh 
3485ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3486079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3487ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3488ccd979bdSMark Fasheh 		 * it just yet. */
34899b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s, block=%d:%d, level=%d:%d, "
34909b915181SSunil Mushran 		     "Recheck\n", lockres->l_name, blocking,
34919b915181SSunil Mushran 		     lockres->l_blocking, level, lockres->l_level);
3492ccd979bdSMark Fasheh 		goto recheck;
3493ccd979bdSMark Fasheh 	}
3494ccd979bdSMark Fasheh 
3495ccd979bdSMark Fasheh downconvert:
3496d680efe9SMark Fasheh 	ctl->requeue = 0;
3497ccd979bdSMark Fasheh 
34985ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3499bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
35005ef0d4eaSMark Fasheh 			set_lvb = 1;
35015ef0d4eaSMark Fasheh 
35025ef0d4eaSMark Fasheh 		/*
35035ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
35045ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
35055ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
35065ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
35075ef0d4eaSMark Fasheh 		 */
35085ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
35095ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
35105ef0d4eaSMark Fasheh 	}
35115ef0d4eaSMark Fasheh 
3512de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3513ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3514de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3515de551246SJoel Becker 				     gen);
3516de551246SJoel Becker 
3517ccd979bdSMark Fasheh leave:
3518c1e8d35eSTao Ma 	if (ret)
3519c1e8d35eSTao Ma 		mlog_errno(ret);
3520ccd979bdSMark Fasheh 	return ret;
3521f7fbfdd1SMark Fasheh 
3522f7fbfdd1SMark Fasheh leave_requeue:
3523f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3524f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3525f7fbfdd1SMark Fasheh 
3526f7fbfdd1SMark Fasheh 	return 0;
3527ccd979bdSMark Fasheh }
3528ccd979bdSMark Fasheh 
3529d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3530ccd979bdSMark Fasheh 				     int blocking)
3531ccd979bdSMark Fasheh {
3532ccd979bdSMark Fasheh 	struct inode *inode;
3533ccd979bdSMark Fasheh 	struct address_space *mapping;
35345e98d492SGoldwyn Rodrigues 	struct ocfs2_inode_info *oi;
3535ccd979bdSMark Fasheh 
3536ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3537ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3538ccd979bdSMark Fasheh 
35395e98d492SGoldwyn Rodrigues 	if (S_ISDIR(inode->i_mode)) {
35405e98d492SGoldwyn Rodrigues 		oi = OCFS2_I(inode);
35415e98d492SGoldwyn Rodrigues 		oi->ip_dir_lock_gen++;
35425e98d492SGoldwyn Rodrigues 		mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
35435e98d492SGoldwyn Rodrigues 		goto out;
35445e98d492SGoldwyn Rodrigues 	}
35455e98d492SGoldwyn Rodrigues 
35461044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3547f1f54068SMark Fasheh 		goto out;
3548f1f54068SMark Fasheh 
35497f4a2a97SMark Fasheh 	/*
35507f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
35517f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
35527f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
35537f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
35547f4a2a97SMark Fasheh 	 * them up again.
35557f4a2a97SMark Fasheh 	 */
35567f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
35577f4a2a97SMark Fasheh 
3558ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3559b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3560b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3561ccd979bdSMark Fasheh 	}
3562ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3563bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3564ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3565ccd979bdSMark Fasheh 	} else {
3566ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3567ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3568ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3569ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3570ccd979bdSMark Fasheh 		 * them around in that case. */
3571ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3572ccd979bdSMark Fasheh 	}
3573ccd979bdSMark Fasheh 
3574f1f54068SMark Fasheh out:
3575d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3576ccd979bdSMark Fasheh }
3577ccd979bdSMark Fasheh 
3578a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3579a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3580810d5aebSMark Fasheh 				 int new_level)
3581810d5aebSMark Fasheh {
3582a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3583810d5aebSMark Fasheh 
3584bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3585bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3586810d5aebSMark Fasheh 
3587810d5aebSMark Fasheh 	if (checkpointed)
3588810d5aebSMark Fasheh 		return 1;
3589810d5aebSMark Fasheh 
3590a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3591810d5aebSMark Fasheh 	return 0;
3592810d5aebSMark Fasheh }
3593810d5aebSMark Fasheh 
3594a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3595a4338481STao Ma 					int new_level)
3596a4338481STao Ma {
3597a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3598a4338481STao Ma 
3599a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3600a4338481STao Ma }
3601a4338481STao Ma 
3602810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3603810d5aebSMark Fasheh {
3604810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3605810d5aebSMark Fasheh 
3606810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
3607810d5aebSMark Fasheh }
3608810d5aebSMark Fasheh 
3609d680efe9SMark Fasheh /*
3610d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
361134d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
3612d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
3613d680efe9SMark Fasheh  */
3614d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3615d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
3616d680efe9SMark Fasheh {
3617d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3618d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
3619d680efe9SMark Fasheh }
3620d680efe9SMark Fasheh 
3621d680efe9SMark Fasheh /*
3622d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
3623d680efe9SMark Fasheh  *
3624d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
3625d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
3626d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
3627d680efe9SMark Fasheh  *
3628d680efe9SMark Fasheh  * We have two potential problems
3629d680efe9SMark Fasheh  *
3630d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
3631d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
3632d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
3633d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
3634d680efe9SMark Fasheh  *    unblock processing.
3635d680efe9SMark Fasheh  *
3636d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
3637d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
3638d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
3639d680efe9SMark Fasheh  */
3640d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3641d680efe9SMark Fasheh 				       int blocking)
3642d680efe9SMark Fasheh {
3643d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3644d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3645d680efe9SMark Fasheh 	struct dentry *dentry;
3646d680efe9SMark Fasheh 	unsigned long flags;
3647d680efe9SMark Fasheh 	int extra_ref = 0;
3648d680efe9SMark Fasheh 
3649d680efe9SMark Fasheh 	/*
3650d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
3651d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
3652d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
3653d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
3654d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
3655d680efe9SMark Fasheh 	 * so there's no further work to do.
3656d680efe9SMark Fasheh 	 */
3657bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
3658d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3659d680efe9SMark Fasheh 
3660d680efe9SMark Fasheh 	/*
3661d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
3662d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
3663d680efe9SMark Fasheh 	 * needs to be freed or not.
3664d680efe9SMark Fasheh 	 */
3665d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
3666d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3667d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
3668d680efe9SMark Fasheh 
3669d680efe9SMark Fasheh 	/*
3670d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
3671d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3672d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
3673d680efe9SMark Fasheh 	 * flag.
3674d680efe9SMark Fasheh 	 */
3675d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3676d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3677d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3678d680efe9SMark Fasheh 	    && dl->dl_count) {
3679d680efe9SMark Fasheh 		dl->dl_count++;
3680d680efe9SMark Fasheh 		extra_ref = 1;
3681d680efe9SMark Fasheh 	}
3682d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3683d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3684d680efe9SMark Fasheh 
3685d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
3686d680efe9SMark Fasheh 
3687d680efe9SMark Fasheh 	/*
3688d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
3689d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
3690d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
3691d680efe9SMark Fasheh 	 */
3692d680efe9SMark Fasheh 	if (!extra_ref)
3693d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3694d680efe9SMark Fasheh 
3695d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3696d680efe9SMark Fasheh 	while (1) {
3697d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
3698d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
3699d680efe9SMark Fasheh 		if (!dentry)
3700d680efe9SMark Fasheh 			break;
3701d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
3702d680efe9SMark Fasheh 
3703d680efe9SMark Fasheh 		mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
3704d680efe9SMark Fasheh 		     dentry->d_name.name);
3705d680efe9SMark Fasheh 
3706d680efe9SMark Fasheh 		/*
3707d680efe9SMark Fasheh 		 * The following dcache calls may do an
3708d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
3709d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
3710d680efe9SMark Fasheh 		 * because the requesting node already has an
3711d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
3712d680efe9SMark Fasheh 		 * for a downconvert.
3713d680efe9SMark Fasheh 		 */
3714d680efe9SMark Fasheh 		d_delete(dentry);
3715d680efe9SMark Fasheh 		dput(dentry);
3716d680efe9SMark Fasheh 
3717d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
3718d680efe9SMark Fasheh 	}
3719d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3720d680efe9SMark Fasheh 
3721d680efe9SMark Fasheh 	/*
3722d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
3723d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
3724d680efe9SMark Fasheh 	 */
3725d680efe9SMark Fasheh 	if (dl->dl_count == 1)
3726d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
3727d680efe9SMark Fasheh 
3728d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
3729d680efe9SMark Fasheh }
3730d680efe9SMark Fasheh 
37318dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
37328dec98edSTao Ma 					    int new_level)
37338dec98edSTao Ma {
37348dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37358dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37368dec98edSTao Ma 
37378dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
37388dec98edSTao Ma }
37398dec98edSTao Ma 
37408dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
37418dec98edSTao Ma 					 int blocking)
37428dec98edSTao Ma {
37438dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37448dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37458dec98edSTao Ma 
37468dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
37478dec98edSTao Ma 
37488dec98edSTao Ma 	return UNBLOCK_CONTINUE;
37498dec98edSTao Ma }
37508dec98edSTao Ma 
37519e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
37529e33d69fSJan Kara {
37539e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
37549e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
37559e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
37569e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
37579e33d69fSJan Kara 
3758a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
37599e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
37609e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
37619e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
37629e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
37639e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
37649e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
37659e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
37669e33d69fSJan Kara }
37679e33d69fSJan Kara 
37689e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
37699e33d69fSJan Kara {
37709e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
37719e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
37729e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
37739e33d69fSJan Kara 
37749e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
37759e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
37769e33d69fSJan Kara }
37779e33d69fSJan Kara 
37789e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
37799e33d69fSJan Kara {
37809e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
37819e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
37829e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
37839e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
378485eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
37859e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
37869e33d69fSJan Kara 	int status = 0;
37879e33d69fSJan Kara 
37881c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
37891c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
37909e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
37919e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
37929e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
37939e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
37949e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
37959e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
37969e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
37979e33d69fSJan Kara 	} else {
3798ae4f6ef1SJan Kara 		status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3799ae4f6ef1SJan Kara 						     oinfo->dqi_giblk, &bh);
380085eb8b73SJoel Becker 		if (status) {
38019e33d69fSJan Kara 			mlog_errno(status);
38029e33d69fSJan Kara 			goto bail;
38039e33d69fSJan Kara 		}
38049e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
38059e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
38069e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
38079e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
38089e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
38099e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
38109e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
38119e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38129e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
38139e33d69fSJan Kara 		brelse(bh);
38149e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
38159e33d69fSJan Kara 	}
38169e33d69fSJan Kara 
38179e33d69fSJan Kara bail:
38189e33d69fSJan Kara 	return status;
38199e33d69fSJan Kara }
38209e33d69fSJan Kara 
38219e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
38229e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
38239e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
38249e33d69fSJan Kara {
38259e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38269e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
38279e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38289e33d69fSJan Kara 	int status = 0;
38299e33d69fSJan Kara 
38309e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
38319e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
38329e33d69fSJan Kara 		if (ex)
38339e33d69fSJan Kara 			status = -EROFS;
38349e33d69fSJan Kara 		goto bail;
38359e33d69fSJan Kara 	}
38369e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
38379e33d69fSJan Kara 		goto bail;
38389e33d69fSJan Kara 
38399e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38409e33d69fSJan Kara 	if (status < 0) {
38419e33d69fSJan Kara 		mlog_errno(status);
38429e33d69fSJan Kara 		goto bail;
38439e33d69fSJan Kara 	}
38449e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
38459e33d69fSJan Kara 		goto bail;
38469e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
38479e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
38489e33d69fSJan Kara 	if (status)
38499e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
38509e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
38519e33d69fSJan Kara bail:
38529e33d69fSJan Kara 	return status;
38539e33d69fSJan Kara }
38549e33d69fSJan Kara 
38558dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
38568dec98edSTao Ma {
38578dec98edSTao Ma 	int status;
38588dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38598dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
38608dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
38618dec98edSTao Ma 
38628dec98edSTao Ma 
38638dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
38648dec98edSTao Ma 		return -EROFS;
38658dec98edSTao Ma 
38668dec98edSTao Ma 	if (ocfs2_mount_local(osb))
38678dec98edSTao Ma 		return 0;
38688dec98edSTao Ma 
38698dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38708dec98edSTao Ma 	if (status < 0)
38718dec98edSTao Ma 		mlog_errno(status);
38728dec98edSTao Ma 
38738dec98edSTao Ma 	return status;
38748dec98edSTao Ma }
38758dec98edSTao Ma 
38768dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
38778dec98edSTao Ma {
38788dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38798dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
38808dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
38818dec98edSTao Ma 
38828dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
38838dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
38848dec98edSTao Ma }
38858dec98edSTao Ma 
388600600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3887ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
3888ccd979bdSMark Fasheh {
3889ccd979bdSMark Fasheh 	int status;
3890d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
3891ccd979bdSMark Fasheh 	unsigned long flags;
3892ccd979bdSMark Fasheh 
3893ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
3894ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
3895ccd979bdSMark Fasheh 	 * flag. */
3896ccd979bdSMark Fasheh 
3897ccd979bdSMark Fasheh 	BUG_ON(!lockres);
3898ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
3899ccd979bdSMark Fasheh 
39009b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s blocked\n", lockres->l_name);
3901ccd979bdSMark Fasheh 
3902ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
390334d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
3904ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
3905ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
3906ccd979bdSMark Fasheh 	 * performance. */
3907ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3908ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
3909ccd979bdSMark Fasheh 		goto unqueue;
3910ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3911ccd979bdSMark Fasheh 
3912b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
3913ccd979bdSMark Fasheh 	if (status < 0)
3914ccd979bdSMark Fasheh 		mlog_errno(status);
3915ccd979bdSMark Fasheh 
3916ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3917ccd979bdSMark Fasheh unqueue:
3918d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
3919ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3920ccd979bdSMark Fasheh 	} else
3921ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
3922ccd979bdSMark Fasheh 
39239b915181SSunil Mushran 	mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
3924d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
3925ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3926ccd979bdSMark Fasheh 
3927d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
3928d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
3929d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
3930ccd979bdSMark Fasheh }
3931ccd979bdSMark Fasheh 
3932ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3933ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
3934ccd979bdSMark Fasheh {
3935a75e9ccaSSrinivas Eeda 	unsigned long flags;
3936a75e9ccaSSrinivas Eeda 
3937ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3938ccd979bdSMark Fasheh 
3939ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
3940ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
3941ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
3942ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
39439b915181SSunil Mushran 		mlog(ML_BASTS, "lockres %s won't be scheduled: flags 0x%lx\n",
3944ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
3945ccd979bdSMark Fasheh 		return;
3946ccd979bdSMark Fasheh 	}
3947ccd979bdSMark Fasheh 
3948ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3949ccd979bdSMark Fasheh 
3950a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
3951ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
3952ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
3953ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
3954ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
3955ccd979bdSMark Fasheh 	}
3956a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3957ccd979bdSMark Fasheh }
395834d024f8SMark Fasheh 
395934d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
396034d024f8SMark Fasheh {
396134d024f8SMark Fasheh 	unsigned long processed;
3962a75e9ccaSSrinivas Eeda 	unsigned long flags;
396334d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
396434d024f8SMark Fasheh 
3965a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
396634d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
396734d024f8SMark Fasheh 	 * wake happens part-way through our work  */
396834d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
396934d024f8SMark Fasheh 
397034d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
397134d024f8SMark Fasheh 	while (processed) {
397234d024f8SMark Fasheh 		BUG_ON(list_empty(&osb->blocked_lock_list));
397334d024f8SMark Fasheh 
397434d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
397534d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
397634d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
397734d024f8SMark Fasheh 		osb->blocked_lock_count--;
3978a75e9ccaSSrinivas Eeda 		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
397934d024f8SMark Fasheh 
398034d024f8SMark Fasheh 		BUG_ON(!processed);
398134d024f8SMark Fasheh 		processed--;
398234d024f8SMark Fasheh 
398334d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
398434d024f8SMark Fasheh 
3985a75e9ccaSSrinivas Eeda 		spin_lock_irqsave(&osb->dc_task_lock, flags);
398634d024f8SMark Fasheh 	}
3987a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
398834d024f8SMark Fasheh }
398934d024f8SMark Fasheh 
399034d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
399134d024f8SMark Fasheh {
399234d024f8SMark Fasheh 	int empty = 0;
3993a75e9ccaSSrinivas Eeda 	unsigned long flags;
399434d024f8SMark Fasheh 
3995a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
399634d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
399734d024f8SMark Fasheh 		empty = 1;
399834d024f8SMark Fasheh 
3999a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
400034d024f8SMark Fasheh 	return empty;
400134d024f8SMark Fasheh }
400234d024f8SMark Fasheh 
400334d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
400434d024f8SMark Fasheh {
400534d024f8SMark Fasheh 	int should_wake = 0;
4006a75e9ccaSSrinivas Eeda 	unsigned long flags;
400734d024f8SMark Fasheh 
4008a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
400934d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
401034d024f8SMark Fasheh 		should_wake = 1;
4011a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
401234d024f8SMark Fasheh 
401334d024f8SMark Fasheh 	return should_wake;
401434d024f8SMark Fasheh }
401534d024f8SMark Fasheh 
4016200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
401734d024f8SMark Fasheh {
401834d024f8SMark Fasheh 	int status = 0;
401934d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
402034d024f8SMark Fasheh 
402134d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
402234d024f8SMark Fasheh 	 * work available */
402334d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
402434d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
402534d024f8SMark Fasheh 
402634d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
402734d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
402834d024f8SMark Fasheh 					 kthread_should_stop());
402934d024f8SMark Fasheh 
403034d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
403134d024f8SMark Fasheh 
403234d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
403334d024f8SMark Fasheh 	}
403434d024f8SMark Fasheh 
403534d024f8SMark Fasheh 	osb->dc_task = NULL;
403634d024f8SMark Fasheh 	return status;
403734d024f8SMark Fasheh }
403834d024f8SMark Fasheh 
403934d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
404034d024f8SMark Fasheh {
4041a75e9ccaSSrinivas Eeda 	unsigned long flags;
4042a75e9ccaSSrinivas Eeda 
4043a75e9ccaSSrinivas Eeda 	spin_lock_irqsave(&osb->dc_task_lock, flags);
404434d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
404534d024f8SMark Fasheh 	 * the caller may have made to the voting state */
404634d024f8SMark Fasheh 	osb->dc_wake_sequence++;
4047a75e9ccaSSrinivas Eeda 	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
404834d024f8SMark Fasheh 	wake_up(&osb->dc_event);
404934d024f8SMark Fasheh }
4050