xref: /openbmc/linux/fs/ocfs2/dlmglue.c (revision c0e41338)
1ccd979bdSMark Fasheh /* -*- mode: c; c-basic-offset: 8; -*-
2ccd979bdSMark Fasheh  * vim: noexpandtab sw=8 ts=8 sts=0:
3ccd979bdSMark Fasheh  *
4ccd979bdSMark Fasheh  * dlmglue.c
5ccd979bdSMark Fasheh  *
6ccd979bdSMark Fasheh  * Code which implements an OCFS2 specific interface to our DLM.
7ccd979bdSMark Fasheh  *
8ccd979bdSMark Fasheh  * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
9ccd979bdSMark Fasheh  *
10ccd979bdSMark Fasheh  * This program is free software; you can redistribute it and/or
11ccd979bdSMark Fasheh  * modify it under the terms of the GNU General Public
12ccd979bdSMark Fasheh  * License as published by the Free Software Foundation; either
13ccd979bdSMark Fasheh  * version 2 of the License, or (at your option) any later version.
14ccd979bdSMark Fasheh  *
15ccd979bdSMark Fasheh  * This program is distributed in the hope that it will be useful,
16ccd979bdSMark Fasheh  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17ccd979bdSMark Fasheh  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18ccd979bdSMark Fasheh  * General Public License for more details.
19ccd979bdSMark Fasheh  *
20ccd979bdSMark Fasheh  * You should have received a copy of the GNU General Public
21ccd979bdSMark Fasheh  * License along with this program; if not, write to the
22ccd979bdSMark Fasheh  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23ccd979bdSMark Fasheh  * Boston, MA 021110-1307, USA.
24ccd979bdSMark Fasheh  */
25ccd979bdSMark Fasheh 
26ccd979bdSMark Fasheh #include <linux/types.h>
27ccd979bdSMark Fasheh #include <linux/slab.h>
28ccd979bdSMark Fasheh #include <linux/highmem.h>
29ccd979bdSMark Fasheh #include <linux/mm.h>
30ccd979bdSMark Fasheh #include <linux/kthread.h>
31ccd979bdSMark Fasheh #include <linux/pagemap.h>
32ccd979bdSMark Fasheh #include <linux/debugfs.h>
33ccd979bdSMark Fasheh #include <linux/seq_file.h>
348ddb7b00SSunil Mushran #include <linux/time.h>
359e33d69fSJan Kara #include <linux/quotaops.h>
36ccd979bdSMark Fasheh 
37ccd979bdSMark Fasheh #define MLOG_MASK_PREFIX ML_DLM_GLUE
38ccd979bdSMark Fasheh #include <cluster/masklog.h>
39ccd979bdSMark Fasheh 
40ccd979bdSMark Fasheh #include "ocfs2.h"
41d24fbcdaSJoel Becker #include "ocfs2_lockingver.h"
42ccd979bdSMark Fasheh 
43ccd979bdSMark Fasheh #include "alloc.h"
44d680efe9SMark Fasheh #include "dcache.h"
45ccd979bdSMark Fasheh #include "dlmglue.h"
46ccd979bdSMark Fasheh #include "extent_map.h"
477f1a37e3STiger Yang #include "file.h"
48ccd979bdSMark Fasheh #include "heartbeat.h"
49ccd979bdSMark Fasheh #include "inode.h"
50ccd979bdSMark Fasheh #include "journal.h"
5124ef1815SJoel Becker #include "stackglue.h"
52ccd979bdSMark Fasheh #include "slot_map.h"
53ccd979bdSMark Fasheh #include "super.h"
54ccd979bdSMark Fasheh #include "uptodate.h"
559e33d69fSJan Kara #include "quota.h"
568dec98edSTao Ma #include "refcounttree.h"
57ccd979bdSMark Fasheh 
58ccd979bdSMark Fasheh #include "buffer_head_io.h"
59ccd979bdSMark Fasheh 
60ccd979bdSMark Fasheh struct ocfs2_mask_waiter {
61ccd979bdSMark Fasheh 	struct list_head	mw_item;
62ccd979bdSMark Fasheh 	int			mw_status;
63ccd979bdSMark Fasheh 	struct completion	mw_complete;
64ccd979bdSMark Fasheh 	unsigned long		mw_mask;
65ccd979bdSMark Fasheh 	unsigned long		mw_goal;
668ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
678ddb7b00SSunil Mushran 	unsigned long long 	mw_lock_start;
688ddb7b00SSunil Mushran #endif
69ccd979bdSMark Fasheh };
70ccd979bdSMark Fasheh 
7154a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
7254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
73cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
749e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
75ccd979bdSMark Fasheh 
76d680efe9SMark Fasheh /*
77cc567d89SMark Fasheh  * Return value from ->downconvert_worker functions.
78d680efe9SMark Fasheh  *
79b5e500e2SMark Fasheh  * These control the precise actions of ocfs2_unblock_lock()
80d680efe9SMark Fasheh  * and ocfs2_process_blocked_lock()
81d680efe9SMark Fasheh  *
82d680efe9SMark Fasheh  */
83d680efe9SMark Fasheh enum ocfs2_unblock_action {
84d680efe9SMark Fasheh 	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
85d680efe9SMark Fasheh 	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
86d680efe9SMark Fasheh 				      * ->post_unlock callback */
87d680efe9SMark Fasheh 	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
88d680efe9SMark Fasheh 				      * ->post_unlock() callback. */
89d680efe9SMark Fasheh };
90d680efe9SMark Fasheh 
91d680efe9SMark Fasheh struct ocfs2_unblock_ctl {
92d680efe9SMark Fasheh 	int requeue;
93d680efe9SMark Fasheh 	enum ocfs2_unblock_action unblock_action;
94d680efe9SMark Fasheh };
95d680efe9SMark Fasheh 
96cb25797dSJan Kara /* Lockdep class keys */
97cb25797dSJan Kara struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
98cb25797dSJan Kara 
99810d5aebSMark Fasheh static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
100810d5aebSMark Fasheh 					int new_level);
101810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
102810d5aebSMark Fasheh 
103cc567d89SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
104cc567d89SMark Fasheh 				     int blocking);
105cc567d89SMark Fasheh 
106cc567d89SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
107cc567d89SMark Fasheh 				       int blocking);
108d680efe9SMark Fasheh 
109d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres);
111ccd979bdSMark Fasheh 
1129e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
1136cb129f5SAdrian Bunk 
1148dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
1158dec98edSTao Ma 					    int new_level);
1168dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
1178dec98edSTao Ma 					 int blocking);
1188dec98edSTao Ma 
1196cb129f5SAdrian Bunk #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
1206cb129f5SAdrian Bunk 
1216cb129f5SAdrian Bunk /* This aids in debugging situations where a bad LVB might be involved. */
1226cb129f5SAdrian Bunk static void ocfs2_dump_meta_lvb_info(u64 level,
1236cb129f5SAdrian Bunk 				     const char *function,
1246cb129f5SAdrian Bunk 				     unsigned int line,
1256cb129f5SAdrian Bunk 				     struct ocfs2_lock_res *lockres)
1266cb129f5SAdrian Bunk {
127a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1286cb129f5SAdrian Bunk 
1296cb129f5SAdrian Bunk 	mlog(level, "LVB information for %s (called from %s:%u):\n",
1306cb129f5SAdrian Bunk 	     lockres->l_name, function, line);
1316cb129f5SAdrian Bunk 	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
1326cb129f5SAdrian Bunk 	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
1336cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_igeneration));
1346cb129f5SAdrian Bunk 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
1356cb129f5SAdrian Bunk 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
1366cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
1376cb129f5SAdrian Bunk 	     be16_to_cpu(lvb->lvb_imode));
1386cb129f5SAdrian Bunk 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
1396cb129f5SAdrian Bunk 	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
1406cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
1416cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
1426cb129f5SAdrian Bunk 	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
1436cb129f5SAdrian Bunk 	     be32_to_cpu(lvb->lvb_iattr));
1446cb129f5SAdrian Bunk }
1456cb129f5SAdrian Bunk 
1466cb129f5SAdrian Bunk 
147f625c979SMark Fasheh /*
148f625c979SMark Fasheh  * OCFS2 Lock Resource Operations
149f625c979SMark Fasheh  *
150f625c979SMark Fasheh  * These fine tune the behavior of the generic dlmglue locking infrastructure.
1510d5dc6c2SMark Fasheh  *
1520d5dc6c2SMark Fasheh  * The most basic of lock types can point ->l_priv to their respective
1530d5dc6c2SMark Fasheh  * struct ocfs2_super and allow the default actions to manage things.
1540d5dc6c2SMark Fasheh  *
1550d5dc6c2SMark Fasheh  * Right now, each lock type also needs to implement an init function,
1560d5dc6c2SMark Fasheh  * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
1570d5dc6c2SMark Fasheh  * should be called when the lock is no longer needed (i.e., object
1580d5dc6c2SMark Fasheh  * destruction time).
159f625c979SMark Fasheh  */
160ccd979bdSMark Fasheh struct ocfs2_lock_res_ops {
16154a7e755SMark Fasheh 	/*
16254a7e755SMark Fasheh 	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
16354a7e755SMark Fasheh 	 * this callback if ->l_priv is not an ocfs2_super pointer
16454a7e755SMark Fasheh 	 */
16554a7e755SMark Fasheh 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
166b5e500e2SMark Fasheh 
1670d5dc6c2SMark Fasheh 	/*
16834d024f8SMark Fasheh 	 * Optionally called in the downconvert thread after a
16934d024f8SMark Fasheh 	 * successful downconvert. The lockres will not be referenced
17034d024f8SMark Fasheh 	 * after this callback is called, so it is safe to free
17134d024f8SMark Fasheh 	 * memory, etc.
1720d5dc6c2SMark Fasheh 	 *
1730d5dc6c2SMark Fasheh 	 * The exact semantics of when this is called are controlled
1740d5dc6c2SMark Fasheh 	 * by ->downconvert_worker()
1750d5dc6c2SMark Fasheh 	 */
176d680efe9SMark Fasheh 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
177f625c979SMark Fasheh 
178f625c979SMark Fasheh 	/*
17916d5b956SMark Fasheh 	 * Allow a lock type to add checks to determine whether it is
18016d5b956SMark Fasheh 	 * safe to downconvert a lock. Return 0 to re-queue the
18116d5b956SMark Fasheh 	 * downconvert at a later time, nonzero to continue.
18216d5b956SMark Fasheh 	 *
18316d5b956SMark Fasheh 	 * For most locks, the default checks that there are no
18416d5b956SMark Fasheh 	 * incompatible holders are sufficient.
18516d5b956SMark Fasheh 	 *
18616d5b956SMark Fasheh 	 * Called with the lockres spinlock held.
18716d5b956SMark Fasheh 	 */
18816d5b956SMark Fasheh 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
18916d5b956SMark Fasheh 
19016d5b956SMark Fasheh 	/*
1915ef0d4eaSMark Fasheh 	 * Allows a lock type to populate the lock value block. This
1925ef0d4eaSMark Fasheh 	 * is called on downconvert, and when we drop a lock.
1935ef0d4eaSMark Fasheh 	 *
1945ef0d4eaSMark Fasheh 	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
1955ef0d4eaSMark Fasheh 	 * in the flags field.
1965ef0d4eaSMark Fasheh 	 *
1975ef0d4eaSMark Fasheh 	 * Called with the lockres spinlock held.
1985ef0d4eaSMark Fasheh 	 */
1995ef0d4eaSMark Fasheh 	void (*set_lvb)(struct ocfs2_lock_res *);
2005ef0d4eaSMark Fasheh 
2015ef0d4eaSMark Fasheh 	/*
202cc567d89SMark Fasheh 	 * Called from the downconvert thread when it is determined
203cc567d89SMark Fasheh 	 * that a lock will be downconverted. This is called without
204cc567d89SMark Fasheh 	 * any locks held so the function can do work that might
205cc567d89SMark Fasheh 	 * schedule (syncing out data, etc).
206cc567d89SMark Fasheh 	 *
207cc567d89SMark Fasheh 	 * This should return any one of the ocfs2_unblock_action
208cc567d89SMark Fasheh 	 * values, depending on what it wants the thread to do.
209cc567d89SMark Fasheh 	 */
210cc567d89SMark Fasheh 	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
211cc567d89SMark Fasheh 
212cc567d89SMark Fasheh 	/*
213f625c979SMark Fasheh 	 * LOCK_TYPE_* flags which describe the specific requirements
214f625c979SMark Fasheh 	 * of a lock type. Descriptions of each individual flag follow.
215f625c979SMark Fasheh 	 */
216f625c979SMark Fasheh 	int flags;
217ccd979bdSMark Fasheh };
218ccd979bdSMark Fasheh 
219f625c979SMark Fasheh /*
220f625c979SMark Fasheh  * Some locks want to "refresh" potentially stale data when a
221f625c979SMark Fasheh  * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
222f625c979SMark Fasheh  * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
223f625c979SMark Fasheh  * individual lockres l_flags member from the ast function. It is
224f625c979SMark Fasheh  * expected that the locking wrapper will clear the
225f625c979SMark Fasheh  * OCFS2_LOCK_NEEDS_REFRESH flag when done.
226f625c979SMark Fasheh  */
227f625c979SMark Fasheh #define LOCK_TYPE_REQUIRES_REFRESH 0x1
228f625c979SMark Fasheh 
229b80fc012SMark Fasheh /*
2305ef0d4eaSMark Fasheh  * Indicate that a lock type makes use of the lock value block. The
2315ef0d4eaSMark Fasheh  * ->set_lvb lock type callback must be defined.
232b80fc012SMark Fasheh  */
233b80fc012SMark Fasheh #define LOCK_TYPE_USES_LVB		0x2
234b80fc012SMark Fasheh 
235ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
23654a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
237f625c979SMark Fasheh 	.flags		= 0,
238ccd979bdSMark Fasheh };
239ccd979bdSMark Fasheh 
240e63aecb6SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
24154a7e755SMark Fasheh 	.get_osb	= ocfs2_get_inode_osb,
242810d5aebSMark Fasheh 	.check_downconvert = ocfs2_check_meta_downconvert,
243810d5aebSMark Fasheh 	.set_lvb	= ocfs2_set_meta_lvb,
244f1f54068SMark Fasheh 	.downconvert_worker = ocfs2_data_convert_worker,
245b80fc012SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
246ccd979bdSMark Fasheh };
247ccd979bdSMark Fasheh 
248ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_super_lops = {
249f625c979SMark Fasheh 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
250ccd979bdSMark Fasheh };
251ccd979bdSMark Fasheh 
252ccd979bdSMark Fasheh static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
253f625c979SMark Fasheh 	.flags		= 0,
254ccd979bdSMark Fasheh };
255ccd979bdSMark Fasheh 
2566ca497a8Swengang wang static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
2576ca497a8Swengang wang 	.flags		= 0,
2586ca497a8Swengang wang };
2596ca497a8Swengang wang 
26083273932SSrinivas Eeda static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
26183273932SSrinivas Eeda 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
26283273932SSrinivas Eeda };
26383273932SSrinivas Eeda 
264d680efe9SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
26554a7e755SMark Fasheh 	.get_osb	= ocfs2_get_dentry_osb,
266d680efe9SMark Fasheh 	.post_unlock	= ocfs2_dentry_post_unlock,
267cc567d89SMark Fasheh 	.downconvert_worker = ocfs2_dentry_convert_worker,
268f625c979SMark Fasheh 	.flags		= 0,
269d680efe9SMark Fasheh };
270d680efe9SMark Fasheh 
27150008630STiger Yang static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
27250008630STiger Yang 	.get_osb	= ocfs2_get_inode_osb,
27350008630STiger Yang 	.flags		= 0,
27450008630STiger Yang };
27550008630STiger Yang 
276cf8e06f1SMark Fasheh static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
277cf8e06f1SMark Fasheh 	.get_osb	= ocfs2_get_file_osb,
278cf8e06f1SMark Fasheh 	.flags		= 0,
279cf8e06f1SMark Fasheh };
280cf8e06f1SMark Fasheh 
2819e33d69fSJan Kara static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
2829e33d69fSJan Kara 	.set_lvb	= ocfs2_set_qinfo_lvb,
2839e33d69fSJan Kara 	.get_osb	= ocfs2_get_qinfo_osb,
2849e33d69fSJan Kara 	.flags		= LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
2859e33d69fSJan Kara };
2869e33d69fSJan Kara 
2878dec98edSTao Ma static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
2888dec98edSTao Ma 	.check_downconvert = ocfs2_check_refcount_downconvert,
2898dec98edSTao Ma 	.downconvert_worker = ocfs2_refcount_convert_worker,
2908dec98edSTao Ma 	.flags		= 0,
2918dec98edSTao Ma };
2928dec98edSTao Ma 
293ccd979bdSMark Fasheh static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
294ccd979bdSMark Fasheh {
295ccd979bdSMark Fasheh 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
29650008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_RW ||
29750008630STiger Yang 		lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
298ccd979bdSMark Fasheh }
299ccd979bdSMark Fasheh 
300c0e41338SJoel Becker static inline struct ocfs2_lock_res *ocfs2_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
301a796d286SJoel Becker {
302a796d286SJoel Becker 	return container_of(lksb, struct ocfs2_lock_res, l_lksb);
303a796d286SJoel Becker }
304a796d286SJoel Becker 
305ccd979bdSMark Fasheh static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
306ccd979bdSMark Fasheh {
307ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_is_inode_lock(lockres));
308ccd979bdSMark Fasheh 
309ccd979bdSMark Fasheh 	return (struct inode *) lockres->l_priv;
310ccd979bdSMark Fasheh }
311ccd979bdSMark Fasheh 
312d680efe9SMark Fasheh static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
313d680efe9SMark Fasheh {
314d680efe9SMark Fasheh 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
315d680efe9SMark Fasheh 
316d680efe9SMark Fasheh 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
317d680efe9SMark Fasheh }
318d680efe9SMark Fasheh 
3199e33d69fSJan Kara static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
3209e33d69fSJan Kara {
3219e33d69fSJan Kara 	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
3229e33d69fSJan Kara 
3239e33d69fSJan Kara 	return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
3249e33d69fSJan Kara }
3259e33d69fSJan Kara 
3268dec98edSTao Ma static inline struct ocfs2_refcount_tree *
3278dec98edSTao Ma ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
3288dec98edSTao Ma {
3298dec98edSTao Ma 	return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
3308dec98edSTao Ma }
3318dec98edSTao Ma 
33254a7e755SMark Fasheh static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
33354a7e755SMark Fasheh {
33454a7e755SMark Fasheh 	if (lockres->l_ops->get_osb)
33554a7e755SMark Fasheh 		return lockres->l_ops->get_osb(lockres);
33654a7e755SMark Fasheh 
33754a7e755SMark Fasheh 	return (struct ocfs2_super *)lockres->l_priv;
33854a7e755SMark Fasheh }
33954a7e755SMark Fasheh 
340ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
341ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
342ccd979bdSMark Fasheh 			     int level,
343bd3e7610SJoel Becker 			     u32 dlm_flags);
344ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
345ccd979bdSMark Fasheh 						     int wanted);
346cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
347ccd979bdSMark Fasheh 				   struct ocfs2_lock_res *lockres,
348cb25797dSJan Kara 				   int level, unsigned long caller_ip);
349cb25797dSJan Kara static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
350cb25797dSJan Kara 					struct ocfs2_lock_res *lockres,
351cb25797dSJan Kara 					int level)
352cb25797dSJan Kara {
353cb25797dSJan Kara 	__ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
354cb25797dSJan Kara }
355cb25797dSJan Kara 
356ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
357ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
358ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
359ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
360ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
361ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
362ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
363ccd979bdSMark Fasheh 						int convert);
3647431cd7eSJoel Becker #define ocfs2_log_dlm_error(_func, _err, _lockres) do {					\
365c74ff8bbSSunil Mushran 	if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)				\
3667431cd7eSJoel Becker 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",	\
3677431cd7eSJoel Becker 		     _err, _func, _lockres->l_name);					\
368c74ff8bbSSunil Mushran 	else										\
369c74ff8bbSSunil Mushran 		mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",	\
370c74ff8bbSSunil Mushran 		     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,	\
371c74ff8bbSSunil Mushran 		     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));		\
372ccd979bdSMark Fasheh } while (0)
37334d024f8SMark Fasheh static int ocfs2_downconvert_thread(void *arg);
37434d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
375ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres);
376e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
377ccd979bdSMark Fasheh 				  struct buffer_head **bh);
378ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
379ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level);
380de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
381cf8e06f1SMark Fasheh 					      int new_level);
382cf8e06f1SMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
383cf8e06f1SMark Fasheh 				  struct ocfs2_lock_res *lockres,
384cf8e06f1SMark Fasheh 				  int new_level,
385de551246SJoel Becker 				  int lvb,
386de551246SJoel Becker 				  unsigned int generation);
387cf8e06f1SMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
388cf8e06f1SMark Fasheh 				        struct ocfs2_lock_res *lockres);
389cf8e06f1SMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
390cf8e06f1SMark Fasheh 				struct ocfs2_lock_res *lockres);
391cf8e06f1SMark Fasheh 
392ccd979bdSMark Fasheh 
393ccd979bdSMark Fasheh static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
394ccd979bdSMark Fasheh 				  u64 blkno,
395ccd979bdSMark Fasheh 				  u32 generation,
396ccd979bdSMark Fasheh 				  char *name)
397ccd979bdSMark Fasheh {
398ccd979bdSMark Fasheh 	int len;
399ccd979bdSMark Fasheh 
400ccd979bdSMark Fasheh 	mlog_entry_void();
401ccd979bdSMark Fasheh 
402ccd979bdSMark Fasheh 	BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
403ccd979bdSMark Fasheh 
404b0697053SMark Fasheh 	len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
405b0697053SMark Fasheh 		       ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
406b0697053SMark Fasheh 		       (long long)blkno, generation);
407ccd979bdSMark Fasheh 
408ccd979bdSMark Fasheh 	BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
409ccd979bdSMark Fasheh 
410ccd979bdSMark Fasheh 	mlog(0, "built lock resource with name: %s\n", name);
411ccd979bdSMark Fasheh 
412ccd979bdSMark Fasheh 	mlog_exit_void();
413ccd979bdSMark Fasheh }
414ccd979bdSMark Fasheh 
41534af946aSIngo Molnar static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
416ccd979bdSMark Fasheh 
417ccd979bdSMark Fasheh static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
418ccd979bdSMark Fasheh 				       struct ocfs2_dlm_debug *dlm_debug)
419ccd979bdSMark Fasheh {
420ccd979bdSMark Fasheh 	mlog(0, "Add tracking for lockres %s\n", res->l_name);
421ccd979bdSMark Fasheh 
422ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
423ccd979bdSMark Fasheh 	list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
424ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
425ccd979bdSMark Fasheh }
426ccd979bdSMark Fasheh 
427ccd979bdSMark Fasheh static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
428ccd979bdSMark Fasheh {
429ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
430ccd979bdSMark Fasheh 	if (!list_empty(&res->l_debug_list))
431ccd979bdSMark Fasheh 		list_del_init(&res->l_debug_list);
432ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
433ccd979bdSMark Fasheh }
434ccd979bdSMark Fasheh 
4358ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
4368ddb7b00SSunil Mushran static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4378ddb7b00SSunil Mushran {
4388ddb7b00SSunil Mushran 	res->l_lock_num_prmode = 0;
4398ddb7b00SSunil Mushran 	res->l_lock_num_prmode_failed = 0;
4408ddb7b00SSunil Mushran 	res->l_lock_total_prmode = 0;
4418ddb7b00SSunil Mushran 	res->l_lock_max_prmode = 0;
4428ddb7b00SSunil Mushran 	res->l_lock_num_exmode = 0;
4438ddb7b00SSunil Mushran 	res->l_lock_num_exmode_failed = 0;
4448ddb7b00SSunil Mushran 	res->l_lock_total_exmode = 0;
4458ddb7b00SSunil Mushran 	res->l_lock_max_exmode = 0;
4468ddb7b00SSunil Mushran 	res->l_lock_refresh = 0;
4478ddb7b00SSunil Mushran }
4488ddb7b00SSunil Mushran 
4498ddb7b00SSunil Mushran static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
4508ddb7b00SSunil Mushran 				    struct ocfs2_mask_waiter *mw, int ret)
4518ddb7b00SSunil Mushran {
4528ddb7b00SSunil Mushran 	unsigned long long *num, *sum;
4538ddb7b00SSunil Mushran 	unsigned int *max, *failed;
4548ddb7b00SSunil Mushran 	struct timespec ts = current_kernel_time();
4558ddb7b00SSunil Mushran 	unsigned long long time = timespec_to_ns(&ts) - mw->mw_lock_start;
4568ddb7b00SSunil Mushran 
4578ddb7b00SSunil Mushran 	if (level == LKM_PRMODE) {
4588ddb7b00SSunil Mushran 		num = &res->l_lock_num_prmode;
4598ddb7b00SSunil Mushran 		sum = &res->l_lock_total_prmode;
4608ddb7b00SSunil Mushran 		max = &res->l_lock_max_prmode;
4618ddb7b00SSunil Mushran 		failed = &res->l_lock_num_prmode_failed;
4628ddb7b00SSunil Mushran 	} else if (level == LKM_EXMODE) {
4638ddb7b00SSunil Mushran 		num = &res->l_lock_num_exmode;
4648ddb7b00SSunil Mushran 		sum = &res->l_lock_total_exmode;
4658ddb7b00SSunil Mushran 		max = &res->l_lock_max_exmode;
4668ddb7b00SSunil Mushran 		failed = &res->l_lock_num_exmode_failed;
4678ddb7b00SSunil Mushran 	} else
4688ddb7b00SSunil Mushran 		return;
4698ddb7b00SSunil Mushran 
4708ddb7b00SSunil Mushran 	(*num)++;
4718ddb7b00SSunil Mushran 	(*sum) += time;
4728ddb7b00SSunil Mushran 	if (time > *max)
4738ddb7b00SSunil Mushran 		*max = time;
4748ddb7b00SSunil Mushran 	if (ret)
4758ddb7b00SSunil Mushran 		(*failed)++;
4768ddb7b00SSunil Mushran }
4778ddb7b00SSunil Mushran 
4788ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4798ddb7b00SSunil Mushran {
4808ddb7b00SSunil Mushran 	lockres->l_lock_refresh++;
4818ddb7b00SSunil Mushran }
4828ddb7b00SSunil Mushran 
4838ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
4848ddb7b00SSunil Mushran {
4858ddb7b00SSunil Mushran 	struct timespec ts = current_kernel_time();
4868ddb7b00SSunil Mushran 	mw->mw_lock_start = timespec_to_ns(&ts);
4878ddb7b00SSunil Mushran }
4888ddb7b00SSunil Mushran #else
4898ddb7b00SSunil Mushran static inline void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
4908ddb7b00SSunil Mushran {
4918ddb7b00SSunil Mushran }
4928ddb7b00SSunil Mushran static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
4938ddb7b00SSunil Mushran 			   int level, struct ocfs2_mask_waiter *mw, int ret)
4948ddb7b00SSunil Mushran {
4958ddb7b00SSunil Mushran }
4968ddb7b00SSunil Mushran static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
4978ddb7b00SSunil Mushran {
4988ddb7b00SSunil Mushran }
4998ddb7b00SSunil Mushran static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
5008ddb7b00SSunil Mushran {
5018ddb7b00SSunil Mushran }
5028ddb7b00SSunil Mushran #endif
5038ddb7b00SSunil Mushran 
504ccd979bdSMark Fasheh static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
505ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *res,
506ccd979bdSMark Fasheh 				       enum ocfs2_lock_type type,
507ccd979bdSMark Fasheh 				       struct ocfs2_lock_res_ops *ops,
508ccd979bdSMark Fasheh 				       void *priv)
509ccd979bdSMark Fasheh {
510ccd979bdSMark Fasheh 	res->l_type          = type;
511ccd979bdSMark Fasheh 	res->l_ops           = ops;
512ccd979bdSMark Fasheh 	res->l_priv          = priv;
513ccd979bdSMark Fasheh 
514bd3e7610SJoel Becker 	res->l_level         = DLM_LOCK_IV;
515bd3e7610SJoel Becker 	res->l_requested     = DLM_LOCK_IV;
516bd3e7610SJoel Becker 	res->l_blocking      = DLM_LOCK_IV;
517ccd979bdSMark Fasheh 	res->l_action        = OCFS2_AST_INVALID;
518ccd979bdSMark Fasheh 	res->l_unlock_action = OCFS2_UNLOCK_INVALID;
519ccd979bdSMark Fasheh 
520ccd979bdSMark Fasheh 	res->l_flags         = OCFS2_LOCK_INITIALIZED;
521ccd979bdSMark Fasheh 
522ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
5238ddb7b00SSunil Mushran 
5248ddb7b00SSunil Mushran 	ocfs2_init_lock_stats(res);
525cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
526cb25797dSJan Kara 	if (type != OCFS2_LOCK_TYPE_OPEN)
527cb25797dSJan Kara 		lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
528cb25797dSJan Kara 				 &lockdep_keys[type], 0);
529cb25797dSJan Kara 	else
530cb25797dSJan Kara 		res->l_lockdep_map.key = NULL;
531cb25797dSJan Kara #endif
532ccd979bdSMark Fasheh }
533ccd979bdSMark Fasheh 
534ccd979bdSMark Fasheh void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
535ccd979bdSMark Fasheh {
536ccd979bdSMark Fasheh 	/* This also clears out the lock status block */
537ccd979bdSMark Fasheh 	memset(res, 0, sizeof(struct ocfs2_lock_res));
538ccd979bdSMark Fasheh 	spin_lock_init(&res->l_lock);
539ccd979bdSMark Fasheh 	init_waitqueue_head(&res->l_event);
540ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_blocked_list);
541ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&res->l_mask_waiters);
542ccd979bdSMark Fasheh }
543ccd979bdSMark Fasheh 
544ccd979bdSMark Fasheh void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
545ccd979bdSMark Fasheh 			       enum ocfs2_lock_type type,
54624c19ef4SMark Fasheh 			       unsigned int generation,
547ccd979bdSMark Fasheh 			       struct inode *inode)
548ccd979bdSMark Fasheh {
549ccd979bdSMark Fasheh 	struct ocfs2_lock_res_ops *ops;
550ccd979bdSMark Fasheh 
551ccd979bdSMark Fasheh 	switch(type) {
552ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_RW:
553ccd979bdSMark Fasheh 			ops = &ocfs2_inode_rw_lops;
554ccd979bdSMark Fasheh 			break;
555ccd979bdSMark Fasheh 		case OCFS2_LOCK_TYPE_META:
556e63aecb6SMark Fasheh 			ops = &ocfs2_inode_inode_lops;
557ccd979bdSMark Fasheh 			break;
55850008630STiger Yang 		case OCFS2_LOCK_TYPE_OPEN:
55950008630STiger Yang 			ops = &ocfs2_inode_open_lops;
56050008630STiger Yang 			break;
561ccd979bdSMark Fasheh 		default:
562ccd979bdSMark Fasheh 			mlog_bug_on_msg(1, "type: %d\n", type);
563ccd979bdSMark Fasheh 			ops = NULL; /* thanks, gcc */
564ccd979bdSMark Fasheh 			break;
565ccd979bdSMark Fasheh 	};
566ccd979bdSMark Fasheh 
567d680efe9SMark Fasheh 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
56824c19ef4SMark Fasheh 			      generation, res->l_name);
569d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
570d680efe9SMark Fasheh }
571d680efe9SMark Fasheh 
57254a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
57354a7e755SMark Fasheh {
57454a7e755SMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
57554a7e755SMark Fasheh 
57654a7e755SMark Fasheh 	return OCFS2_SB(inode->i_sb);
57754a7e755SMark Fasheh }
57854a7e755SMark Fasheh 
5799e33d69fSJan Kara static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
5809e33d69fSJan Kara {
5819e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *info = lockres->l_priv;
5829e33d69fSJan Kara 
5839e33d69fSJan Kara 	return OCFS2_SB(info->dqi_gi.dqi_sb);
5849e33d69fSJan Kara }
5859e33d69fSJan Kara 
586cf8e06f1SMark Fasheh static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
587cf8e06f1SMark Fasheh {
588cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = lockres->l_priv;
589cf8e06f1SMark Fasheh 
590cf8e06f1SMark Fasheh 	return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb);
591cf8e06f1SMark Fasheh }
592cf8e06f1SMark Fasheh 
593d680efe9SMark Fasheh static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
594d680efe9SMark Fasheh {
595d680efe9SMark Fasheh 	__be64 inode_blkno_be;
596d680efe9SMark Fasheh 
597d680efe9SMark Fasheh 	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
598d680efe9SMark Fasheh 	       sizeof(__be64));
599d680efe9SMark Fasheh 
600d680efe9SMark Fasheh 	return be64_to_cpu(inode_blkno_be);
601d680efe9SMark Fasheh }
602d680efe9SMark Fasheh 
60354a7e755SMark Fasheh static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
60454a7e755SMark Fasheh {
60554a7e755SMark Fasheh 	struct ocfs2_dentry_lock *dl = lockres->l_priv;
60654a7e755SMark Fasheh 
60754a7e755SMark Fasheh 	return OCFS2_SB(dl->dl_inode->i_sb);
60854a7e755SMark Fasheh }
60954a7e755SMark Fasheh 
610d680efe9SMark Fasheh void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
611d680efe9SMark Fasheh 				u64 parent, struct inode *inode)
612d680efe9SMark Fasheh {
613d680efe9SMark Fasheh 	int len;
614d680efe9SMark Fasheh 	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
615d680efe9SMark Fasheh 	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
616d680efe9SMark Fasheh 	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
617d680efe9SMark Fasheh 
618d680efe9SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
619d680efe9SMark Fasheh 
620d680efe9SMark Fasheh 	/*
621d680efe9SMark Fasheh 	 * Unfortunately, the standard lock naming scheme won't work
622d680efe9SMark Fasheh 	 * here because we have two 16 byte values to use. Instead,
623d680efe9SMark Fasheh 	 * we'll stuff the inode number as a binary value. We still
624d680efe9SMark Fasheh 	 * want error prints to show something without garbling the
625d680efe9SMark Fasheh 	 * display, so drop a null byte in there before the inode
626d680efe9SMark Fasheh 	 * number. A future version of OCFS2 will likely use all
627d680efe9SMark Fasheh 	 * binary lock names. The stringified names have been a
628d680efe9SMark Fasheh 	 * tremendous aid in debugging, but now that the debugfs
629d680efe9SMark Fasheh 	 * interface exists, we can mangle things there if need be.
630d680efe9SMark Fasheh 	 *
631d680efe9SMark Fasheh 	 * NOTE: We also drop the standard "pad" value (the total lock
632d680efe9SMark Fasheh 	 * name size stays the same though - the last part is all
633d680efe9SMark Fasheh 	 * zeros due to the memset in ocfs2_lock_res_init_once()
634d680efe9SMark Fasheh 	 */
635d680efe9SMark Fasheh 	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
636d680efe9SMark Fasheh 		       "%c%016llx",
637d680efe9SMark Fasheh 		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
638d680efe9SMark Fasheh 		       (long long)parent);
639d680efe9SMark Fasheh 
640d680efe9SMark Fasheh 	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
641d680efe9SMark Fasheh 
642d680efe9SMark Fasheh 	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
643d680efe9SMark Fasheh 	       sizeof(__be64));
644d680efe9SMark Fasheh 
645d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
646d680efe9SMark Fasheh 				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
647d680efe9SMark Fasheh 				   dl);
648ccd979bdSMark Fasheh }
649ccd979bdSMark Fasheh 
650ccd979bdSMark Fasheh static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
651ccd979bdSMark Fasheh 				      struct ocfs2_super *osb)
652ccd979bdSMark Fasheh {
653ccd979bdSMark Fasheh 	/* Superblock lockres doesn't come from a slab so we call init
654ccd979bdSMark Fasheh 	 * once on it manually.  */
655ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
656d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
657d680efe9SMark Fasheh 			      0, res->l_name);
658ccd979bdSMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
659ccd979bdSMark Fasheh 				   &ocfs2_super_lops, osb);
660ccd979bdSMark Fasheh }
661ccd979bdSMark Fasheh 
662ccd979bdSMark Fasheh static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
663ccd979bdSMark Fasheh 				       struct ocfs2_super *osb)
664ccd979bdSMark Fasheh {
665ccd979bdSMark Fasheh 	/* Rename lockres doesn't come from a slab so we call init
666ccd979bdSMark Fasheh 	 * once on it manually.  */
667ccd979bdSMark Fasheh 	ocfs2_lock_res_init_once(res);
668d680efe9SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
669d680efe9SMark Fasheh 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
670ccd979bdSMark Fasheh 				   &ocfs2_rename_lops, osb);
671ccd979bdSMark Fasheh }
672ccd979bdSMark Fasheh 
6736ca497a8Swengang wang static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
6746ca497a8Swengang wang 					 struct ocfs2_super *osb)
6756ca497a8Swengang wang {
6766ca497a8Swengang wang 	/* nfs_sync lockres doesn't come from a slab so we call init
6776ca497a8Swengang wang 	 * once on it manually.  */
6786ca497a8Swengang wang 	ocfs2_lock_res_init_once(res);
6796ca497a8Swengang wang 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_NFS_SYNC, 0, 0, res->l_name);
6806ca497a8Swengang wang 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_NFS_SYNC,
6816ca497a8Swengang wang 				   &ocfs2_nfs_sync_lops, osb);
6826ca497a8Swengang wang }
6836ca497a8Swengang wang 
68483273932SSrinivas Eeda static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
68583273932SSrinivas Eeda 					    struct ocfs2_super *osb)
68683273932SSrinivas Eeda {
68783273932SSrinivas Eeda 	ocfs2_lock_res_init_once(res);
68883273932SSrinivas Eeda 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
68983273932SSrinivas Eeda 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
69083273932SSrinivas Eeda 				   &ocfs2_orphan_scan_lops, osb);
69183273932SSrinivas Eeda }
69283273932SSrinivas Eeda 
693cf8e06f1SMark Fasheh void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
694cf8e06f1SMark Fasheh 			      struct ocfs2_file_private *fp)
695cf8e06f1SMark Fasheh {
696cf8e06f1SMark Fasheh 	struct inode *inode = fp->fp_file->f_mapping->host;
697cf8e06f1SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
698cf8e06f1SMark Fasheh 
699cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_once(lockres);
700cf8e06f1SMark Fasheh 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno,
701cf8e06f1SMark Fasheh 			      inode->i_generation, lockres->l_name);
702cf8e06f1SMark Fasheh 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
703cf8e06f1SMark Fasheh 				   OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops,
704cf8e06f1SMark Fasheh 				   fp);
705cf8e06f1SMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_NOCACHE;
706cf8e06f1SMark Fasheh }
707cf8e06f1SMark Fasheh 
7089e33d69fSJan Kara void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
7099e33d69fSJan Kara 			       struct ocfs2_mem_dqinfo *info)
7109e33d69fSJan Kara {
7119e33d69fSJan Kara 	ocfs2_lock_res_init_once(lockres);
7129e33d69fSJan Kara 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
7139e33d69fSJan Kara 			      0, lockres->l_name);
7149e33d69fSJan Kara 	ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
7159e33d69fSJan Kara 				   OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
7169e33d69fSJan Kara 				   info);
7179e33d69fSJan Kara }
7189e33d69fSJan Kara 
7198dec98edSTao Ma void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
7208dec98edSTao Ma 				  struct ocfs2_super *osb, u64 ref_blkno,
7218dec98edSTao Ma 				  unsigned int generation)
7228dec98edSTao Ma {
7238dec98edSTao Ma 	ocfs2_lock_res_init_once(lockres);
7248dec98edSTao Ma 	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
7258dec98edSTao Ma 			      generation, lockres->l_name);
7268dec98edSTao Ma 	ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
7278dec98edSTao Ma 				   &ocfs2_refcount_block_lops, osb);
7288dec98edSTao Ma }
7298dec98edSTao Ma 
730ccd979bdSMark Fasheh void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
731ccd979bdSMark Fasheh {
732ccd979bdSMark Fasheh 	mlog_entry_void();
733ccd979bdSMark Fasheh 
734ccd979bdSMark Fasheh 	if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
735ccd979bdSMark Fasheh 		return;
736ccd979bdSMark Fasheh 
737ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
738ccd979bdSMark Fasheh 
739ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
740ccd979bdSMark Fasheh 			"Lockres %s is on the blocked list\n",
741ccd979bdSMark Fasheh 			res->l_name);
742ccd979bdSMark Fasheh 	mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
743ccd979bdSMark Fasheh 			"Lockres %s has mask waiters pending\n",
744ccd979bdSMark Fasheh 			res->l_name);
745ccd979bdSMark Fasheh 	mlog_bug_on_msg(spin_is_locked(&res->l_lock),
746ccd979bdSMark Fasheh 			"Lockres %s is locked\n",
747ccd979bdSMark Fasheh 			res->l_name);
748ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ro_holders,
749ccd979bdSMark Fasheh 			"Lockres %s has %u ro holders\n",
750ccd979bdSMark Fasheh 			res->l_name, res->l_ro_holders);
751ccd979bdSMark Fasheh 	mlog_bug_on_msg(res->l_ex_holders,
752ccd979bdSMark Fasheh 			"Lockres %s has %u ex holders\n",
753ccd979bdSMark Fasheh 			res->l_name, res->l_ex_holders);
754ccd979bdSMark Fasheh 
755ccd979bdSMark Fasheh 	/* Need to clear out the lock status block for the dlm */
756ccd979bdSMark Fasheh 	memset(&res->l_lksb, 0, sizeof(res->l_lksb));
757ccd979bdSMark Fasheh 
758ccd979bdSMark Fasheh 	res->l_flags = 0UL;
759ccd979bdSMark Fasheh 	mlog_exit_void();
760ccd979bdSMark Fasheh }
761ccd979bdSMark Fasheh 
762ccd979bdSMark Fasheh static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
763ccd979bdSMark Fasheh 				     int level)
764ccd979bdSMark Fasheh {
765ccd979bdSMark Fasheh 	mlog_entry_void();
766ccd979bdSMark Fasheh 
767ccd979bdSMark Fasheh 	BUG_ON(!lockres);
768ccd979bdSMark Fasheh 
769ccd979bdSMark Fasheh 	switch(level) {
770bd3e7610SJoel Becker 	case DLM_LOCK_EX:
771ccd979bdSMark Fasheh 		lockres->l_ex_holders++;
772ccd979bdSMark Fasheh 		break;
773bd3e7610SJoel Becker 	case DLM_LOCK_PR:
774ccd979bdSMark Fasheh 		lockres->l_ro_holders++;
775ccd979bdSMark Fasheh 		break;
776ccd979bdSMark Fasheh 	default:
777ccd979bdSMark Fasheh 		BUG();
778ccd979bdSMark Fasheh 	}
779ccd979bdSMark Fasheh 
780ccd979bdSMark Fasheh 	mlog_exit_void();
781ccd979bdSMark Fasheh }
782ccd979bdSMark Fasheh 
783ccd979bdSMark Fasheh static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
784ccd979bdSMark Fasheh 				     int level)
785ccd979bdSMark Fasheh {
786ccd979bdSMark Fasheh 	mlog_entry_void();
787ccd979bdSMark Fasheh 
788ccd979bdSMark Fasheh 	BUG_ON(!lockres);
789ccd979bdSMark Fasheh 
790ccd979bdSMark Fasheh 	switch(level) {
791bd3e7610SJoel Becker 	case DLM_LOCK_EX:
792ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ex_holders);
793ccd979bdSMark Fasheh 		lockres->l_ex_holders--;
794ccd979bdSMark Fasheh 		break;
795bd3e7610SJoel Becker 	case DLM_LOCK_PR:
796ccd979bdSMark Fasheh 		BUG_ON(!lockres->l_ro_holders);
797ccd979bdSMark Fasheh 		lockres->l_ro_holders--;
798ccd979bdSMark Fasheh 		break;
799ccd979bdSMark Fasheh 	default:
800ccd979bdSMark Fasheh 		BUG();
801ccd979bdSMark Fasheh 	}
802ccd979bdSMark Fasheh 	mlog_exit_void();
803ccd979bdSMark Fasheh }
804ccd979bdSMark Fasheh 
805ccd979bdSMark Fasheh /* WARNING: This function lives in a world where the only three lock
806ccd979bdSMark Fasheh  * levels are EX, PR, and NL. It *will* have to be adjusted when more
807ccd979bdSMark Fasheh  * lock types are added. */
808ccd979bdSMark Fasheh static inline int ocfs2_highest_compat_lock_level(int level)
809ccd979bdSMark Fasheh {
810bd3e7610SJoel Becker 	int new_level = DLM_LOCK_EX;
811ccd979bdSMark Fasheh 
812bd3e7610SJoel Becker 	if (level == DLM_LOCK_EX)
813bd3e7610SJoel Becker 		new_level = DLM_LOCK_NL;
814bd3e7610SJoel Becker 	else if (level == DLM_LOCK_PR)
815bd3e7610SJoel Becker 		new_level = DLM_LOCK_PR;
816ccd979bdSMark Fasheh 	return new_level;
817ccd979bdSMark Fasheh }
818ccd979bdSMark Fasheh 
819ccd979bdSMark Fasheh static void lockres_set_flags(struct ocfs2_lock_res *lockres,
820ccd979bdSMark Fasheh 			      unsigned long newflags)
821ccd979bdSMark Fasheh {
822800deef3SChristoph Hellwig 	struct ocfs2_mask_waiter *mw, *tmp;
823ccd979bdSMark Fasheh 
824ccd979bdSMark Fasheh  	assert_spin_locked(&lockres->l_lock);
825ccd979bdSMark Fasheh 
826ccd979bdSMark Fasheh 	lockres->l_flags = newflags;
827ccd979bdSMark Fasheh 
828800deef3SChristoph Hellwig 	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
829ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
830ccd979bdSMark Fasheh 			continue;
831ccd979bdSMark Fasheh 
832ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
833ccd979bdSMark Fasheh 		mw->mw_status = 0;
834ccd979bdSMark Fasheh 		complete(&mw->mw_complete);
835ccd979bdSMark Fasheh 	}
836ccd979bdSMark Fasheh }
837ccd979bdSMark Fasheh static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
838ccd979bdSMark Fasheh {
839ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags | or);
840ccd979bdSMark Fasheh }
841ccd979bdSMark Fasheh static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
842ccd979bdSMark Fasheh 				unsigned long clear)
843ccd979bdSMark Fasheh {
844ccd979bdSMark Fasheh 	lockres_set_flags(lockres, lockres->l_flags & ~clear);
845ccd979bdSMark Fasheh }
846ccd979bdSMark Fasheh 
847ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
848ccd979bdSMark Fasheh {
849ccd979bdSMark Fasheh 	mlog_entry_void();
850ccd979bdSMark Fasheh 
851ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
852ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
853ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
854bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
855ccd979bdSMark Fasheh 
856ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
857ccd979bdSMark Fasheh 	if (lockres->l_level <=
858ccd979bdSMark Fasheh 	    ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
859bd3e7610SJoel Becker 		lockres->l_blocking = DLM_LOCK_NL;
860ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
861ccd979bdSMark Fasheh 	}
862ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
863ccd979bdSMark Fasheh 
864ccd979bdSMark Fasheh 	mlog_exit_void();
865ccd979bdSMark Fasheh }
866ccd979bdSMark Fasheh 
867ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
868ccd979bdSMark Fasheh {
869ccd979bdSMark Fasheh 	mlog_entry_void();
870ccd979bdSMark Fasheh 
871ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
872ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
873ccd979bdSMark Fasheh 
874ccd979bdSMark Fasheh 	/* Convert from RO to EX doesn't really need anything as our
875ccd979bdSMark Fasheh 	 * information is already up to data. Convert from NL to
876ccd979bdSMark Fasheh 	 * *anything* however should mark ourselves as needing an
877ccd979bdSMark Fasheh 	 * update */
878bd3e7610SJoel Becker 	if (lockres->l_level == DLM_LOCK_NL &&
879f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
880ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
881ccd979bdSMark Fasheh 
882ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
883a1912826SSunil Mushran 
884a1912826SSunil Mushran 	/*
885a1912826SSunil Mushran 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
886a1912826SSunil Mushran 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
887a1912826SSunil Mushran 	 * downconverting the lock before the upconvert has fully completed.
888a1912826SSunil Mushran 	 */
889a1912826SSunil Mushran 	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
890a1912826SSunil Mushran 
891ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
892ccd979bdSMark Fasheh 
893ccd979bdSMark Fasheh 	mlog_exit_void();
894ccd979bdSMark Fasheh }
895ccd979bdSMark Fasheh 
896ccd979bdSMark Fasheh static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
897ccd979bdSMark Fasheh {
898ccd979bdSMark Fasheh 	mlog_entry_void();
899ccd979bdSMark Fasheh 
9003cf0c507SRoel Kluin 	BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
901ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
902ccd979bdSMark Fasheh 
903bd3e7610SJoel Becker 	if (lockres->l_requested > DLM_LOCK_NL &&
904f625c979SMark Fasheh 	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
905f625c979SMark Fasheh 	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
906ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
907ccd979bdSMark Fasheh 
908ccd979bdSMark Fasheh 	lockres->l_level = lockres->l_requested;
909ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
910ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
911ccd979bdSMark Fasheh 
912ccd979bdSMark Fasheh 	mlog_exit_void();
913ccd979bdSMark Fasheh }
914ccd979bdSMark Fasheh 
915ccd979bdSMark Fasheh static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
916ccd979bdSMark Fasheh 				     int level)
917ccd979bdSMark Fasheh {
918ccd979bdSMark Fasheh 	int needs_downconvert = 0;
919ccd979bdSMark Fasheh 	mlog_entry_void();
920ccd979bdSMark Fasheh 
921ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
922ccd979bdSMark Fasheh 
923ccd979bdSMark Fasheh 	if (level > lockres->l_blocking) {
924ccd979bdSMark Fasheh 		/* only schedule a downconvert if we haven't already scheduled
925ccd979bdSMark Fasheh 		 * one that goes low enough to satisfy the level we're
926ccd979bdSMark Fasheh 		 * blocking.  this also catches the case where we get
927ccd979bdSMark Fasheh 		 * duplicate BASTs */
928ccd979bdSMark Fasheh 		if (ocfs2_highest_compat_lock_level(level) <
929ccd979bdSMark Fasheh 		    ocfs2_highest_compat_lock_level(lockres->l_blocking))
930ccd979bdSMark Fasheh 			needs_downconvert = 1;
931ccd979bdSMark Fasheh 
932ccd979bdSMark Fasheh 		lockres->l_blocking = level;
933ccd979bdSMark Fasheh 	}
934ccd979bdSMark Fasheh 
9350b94a909SWengang Wang 	if (needs_downconvert)
9360b94a909SWengang Wang 		lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
9370b94a909SWengang Wang 
938ccd979bdSMark Fasheh 	mlog_exit(needs_downconvert);
939ccd979bdSMark Fasheh 	return needs_downconvert;
940ccd979bdSMark Fasheh }
941ccd979bdSMark Fasheh 
942de551246SJoel Becker /*
943de551246SJoel Becker  * OCFS2_LOCK_PENDING and l_pending_gen.
944de551246SJoel Becker  *
945de551246SJoel Becker  * Why does OCFS2_LOCK_PENDING exist?  To close a race between setting
946de551246SJoel Becker  * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock().  See ocfs2_unblock_lock()
947de551246SJoel Becker  * for more details on the race.
948de551246SJoel Becker  *
949de551246SJoel Becker  * OCFS2_LOCK_PENDING closes the race quite nicely.  However, it introduces
950de551246SJoel Becker  * a race on itself.  In o2dlm, we can get the ast before ocfs2_dlm_lock()
951de551246SJoel Becker  * returns.  The ast clears OCFS2_LOCK_BUSY, and must therefore clear
952de551246SJoel Becker  * OCFS2_LOCK_PENDING at the same time.  When ocfs2_dlm_lock() returns,
953de551246SJoel Becker  * the caller is going to try to clear PENDING again.  If nothing else is
954de551246SJoel Becker  * happening, __lockres_clear_pending() sees PENDING is unset and does
955de551246SJoel Becker  * nothing.
956de551246SJoel Becker  *
957de551246SJoel Becker  * But what if another path (eg downconvert thread) has just started a
958de551246SJoel Becker  * new locking action?  The other path has re-set PENDING.  Our path
959de551246SJoel Becker  * cannot clear PENDING, because that will re-open the original race
960de551246SJoel Becker  * window.
961de551246SJoel Becker  *
962de551246SJoel Becker  * [Example]
963de551246SJoel Becker  *
964de551246SJoel Becker  * ocfs2_meta_lock()
965de551246SJoel Becker  *  ocfs2_cluster_lock()
966de551246SJoel Becker  *   set BUSY
967de551246SJoel Becker  *   set PENDING
968de551246SJoel Becker  *   drop l_lock
969de551246SJoel Becker  *   ocfs2_dlm_lock()
970de551246SJoel Becker  *    ocfs2_locking_ast()		ocfs2_downconvert_thread()
971de551246SJoel Becker  *     clear PENDING			 ocfs2_unblock_lock()
972de551246SJoel Becker  *					  take_l_lock
973de551246SJoel Becker  *					  !BUSY
974de551246SJoel Becker  *					  ocfs2_prepare_downconvert()
975de551246SJoel Becker  *					   set BUSY
976de551246SJoel Becker  *					   set PENDING
977de551246SJoel Becker  *					  drop l_lock
978de551246SJoel Becker  *   take l_lock
979de551246SJoel Becker  *   clear PENDING
980de551246SJoel Becker  *   drop l_lock
981de551246SJoel Becker  *			<window>
982de551246SJoel Becker  *					  ocfs2_dlm_lock()
983de551246SJoel Becker  *
984de551246SJoel Becker  * So as you can see, we now have a window where l_lock is not held,
985de551246SJoel Becker  * PENDING is not set, and ocfs2_dlm_lock() has not been called.
986de551246SJoel Becker  *
987de551246SJoel Becker  * The core problem is that ocfs2_cluster_lock() has cleared the PENDING
988de551246SJoel Becker  * set by ocfs2_prepare_downconvert().  That wasn't nice.
989de551246SJoel Becker  *
990de551246SJoel Becker  * To solve this we introduce l_pending_gen.  A call to
991de551246SJoel Becker  * lockres_clear_pending() will only do so when it is passed a generation
992de551246SJoel Becker  * number that matches the lockres.  lockres_set_pending() will return the
993de551246SJoel Becker  * current generation number.  When ocfs2_cluster_lock() goes to clear
994de551246SJoel Becker  * PENDING, it passes the generation it got from set_pending().  In our
995de551246SJoel Becker  * example above, the generation numbers will *not* match.  Thus,
996de551246SJoel Becker  * ocfs2_cluster_lock() will not clear the PENDING set by
997de551246SJoel Becker  * ocfs2_prepare_downconvert().
998de551246SJoel Becker  */
999de551246SJoel Becker 
1000de551246SJoel Becker /* Unlocked version for ocfs2_locking_ast() */
1001de551246SJoel Becker static void __lockres_clear_pending(struct ocfs2_lock_res *lockres,
1002de551246SJoel Becker 				    unsigned int generation,
1003de551246SJoel Becker 				    struct ocfs2_super *osb)
1004de551246SJoel Becker {
1005de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1006de551246SJoel Becker 
1007de551246SJoel Becker 	/*
1008de551246SJoel Becker 	 * The ast and locking functions can race us here.  The winner
1009de551246SJoel Becker 	 * will clear pending, the loser will not.
1010de551246SJoel Becker 	 */
1011de551246SJoel Becker 	if (!(lockres->l_flags & OCFS2_LOCK_PENDING) ||
1012de551246SJoel Becker 	    (lockres->l_pending_gen != generation))
1013de551246SJoel Becker 		return;
1014de551246SJoel Becker 
1015de551246SJoel Becker 	lockres_clear_flags(lockres, OCFS2_LOCK_PENDING);
1016de551246SJoel Becker 	lockres->l_pending_gen++;
1017de551246SJoel Becker 
1018de551246SJoel Becker 	/*
1019de551246SJoel Becker 	 * The downconvert thread may have skipped us because we
1020de551246SJoel Becker 	 * were PENDING.  Wake it up.
1021de551246SJoel Becker 	 */
1022de551246SJoel Becker 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
1023de551246SJoel Becker 		ocfs2_wake_downconvert_thread(osb);
1024de551246SJoel Becker }
1025de551246SJoel Becker 
1026de551246SJoel Becker /* Locked version for callers of ocfs2_dlm_lock() */
1027de551246SJoel Becker static void lockres_clear_pending(struct ocfs2_lock_res *lockres,
1028de551246SJoel Becker 				  unsigned int generation,
1029de551246SJoel Becker 				  struct ocfs2_super *osb)
1030de551246SJoel Becker {
1031de551246SJoel Becker 	unsigned long flags;
1032de551246SJoel Becker 
1033de551246SJoel Becker 	spin_lock_irqsave(&lockres->l_lock, flags);
1034de551246SJoel Becker 	__lockres_clear_pending(lockres, generation, osb);
1035de551246SJoel Becker 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1036de551246SJoel Becker }
1037de551246SJoel Becker 
1038de551246SJoel Becker static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres)
1039de551246SJoel Becker {
1040de551246SJoel Becker 	assert_spin_locked(&lockres->l_lock);
1041de551246SJoel Becker 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
1042de551246SJoel Becker 
1043de551246SJoel Becker 	lockres_or_flags(lockres, OCFS2_LOCK_PENDING);
1044de551246SJoel Becker 
1045de551246SJoel Becker 	return lockres->l_pending_gen;
1046de551246SJoel Becker }
1047de551246SJoel Becker 
1048de551246SJoel Becker 
1049c0e41338SJoel Becker static void ocfs2_blocking_ast(struct ocfs2_dlm_lksb *lksb, int level)
1050ccd979bdSMark Fasheh {
1051a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1052aa2623adSMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1053ccd979bdSMark Fasheh 	int needs_downconvert;
1054ccd979bdSMark Fasheh 	unsigned long flags;
1055ccd979bdSMark Fasheh 
1056bd3e7610SJoel Becker 	BUG_ON(level <= DLM_LOCK_NL);
1057ccd979bdSMark Fasheh 
1058aa2623adSMark Fasheh 	mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
1059aa2623adSMark Fasheh 	     lockres->l_name, level, lockres->l_level,
1060aa2623adSMark Fasheh 	     ocfs2_lock_type_string(lockres->l_type));
1061aa2623adSMark Fasheh 
1062cf8e06f1SMark Fasheh 	/*
1063cf8e06f1SMark Fasheh 	 * We can skip the bast for locks which don't enable caching -
1064cf8e06f1SMark Fasheh 	 * they'll be dropped at the earliest possible time anyway.
1065cf8e06f1SMark Fasheh 	 */
1066cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_NOCACHE)
1067cf8e06f1SMark Fasheh 		return;
1068cf8e06f1SMark Fasheh 
1069ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1070ccd979bdSMark Fasheh 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
1071ccd979bdSMark Fasheh 	if (needs_downconvert)
1072ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
1073ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1074ccd979bdSMark Fasheh 
1075d680efe9SMark Fasheh 	wake_up(&lockres->l_event);
1076d680efe9SMark Fasheh 
107734d024f8SMark Fasheh 	ocfs2_wake_downconvert_thread(osb);
1078ccd979bdSMark Fasheh }
1079ccd979bdSMark Fasheh 
1080c0e41338SJoel Becker static void ocfs2_locking_ast(struct ocfs2_dlm_lksb *lksb)
1081ccd979bdSMark Fasheh {
1082a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
1083de551246SJoel Becker 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1084ccd979bdSMark Fasheh 	unsigned long flags;
10851693a5c0SDavid Teigland 	int status;
1086ccd979bdSMark Fasheh 
1087ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1088ccd979bdSMark Fasheh 
10891693a5c0SDavid Teigland 	status = ocfs2_dlm_lock_status(&lockres->l_lksb);
10901693a5c0SDavid Teigland 
10911693a5c0SDavid Teigland 	if (status == -EAGAIN) {
10921693a5c0SDavid Teigland 		lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
10931693a5c0SDavid Teigland 		goto out;
10941693a5c0SDavid Teigland 	}
10951693a5c0SDavid Teigland 
10961693a5c0SDavid Teigland 	if (status) {
10978f2c9c1bSJoel Becker 		mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n",
10981693a5c0SDavid Teigland 		     lockres->l_name, status);
1099ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1100ccd979bdSMark Fasheh 		return;
1101ccd979bdSMark Fasheh 	}
1102ccd979bdSMark Fasheh 
1103ccd979bdSMark Fasheh 	switch(lockres->l_action) {
1104ccd979bdSMark Fasheh 	case OCFS2_AST_ATTACH:
1105ccd979bdSMark Fasheh 		ocfs2_generic_handle_attach_action(lockres);
1106e92d57dfSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
1107ccd979bdSMark Fasheh 		break;
1108ccd979bdSMark Fasheh 	case OCFS2_AST_CONVERT:
1109ccd979bdSMark Fasheh 		ocfs2_generic_handle_convert_action(lockres);
1110ccd979bdSMark Fasheh 		break;
1111ccd979bdSMark Fasheh 	case OCFS2_AST_DOWNCONVERT:
1112ccd979bdSMark Fasheh 		ocfs2_generic_handle_downconvert_action(lockres);
1113ccd979bdSMark Fasheh 		break;
1114ccd979bdSMark Fasheh 	default:
1115e92d57dfSMark Fasheh 		mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
1116e92d57dfSMark Fasheh 		     "lockres flags = 0x%lx, unlock action: %u\n",
1117e92d57dfSMark Fasheh 		     lockres->l_name, lockres->l_action, lockres->l_flags,
1118e92d57dfSMark Fasheh 		     lockres->l_unlock_action);
1119ccd979bdSMark Fasheh 		BUG();
1120ccd979bdSMark Fasheh 	}
11211693a5c0SDavid Teigland out:
1122ccd979bdSMark Fasheh 	/* set it to something invalid so if we get called again we
1123ccd979bdSMark Fasheh 	 * can catch it. */
1124ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_INVALID;
1125ccd979bdSMark Fasheh 
1126de551246SJoel Becker 	/* Did we try to cancel this lock?  Clear that state */
1127de551246SJoel Becker 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT)
1128de551246SJoel Becker 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1129de551246SJoel Becker 
1130de551246SJoel Becker 	/*
1131de551246SJoel Becker 	 * We may have beaten the locking functions here.  We certainly
1132de551246SJoel Becker 	 * know that dlm_lock() has been called :-)
1133de551246SJoel Becker 	 * Because we can't have two lock calls in flight at once, we
1134de551246SJoel Becker 	 * can use lockres->l_pending_gen.
1135de551246SJoel Becker 	 */
1136de551246SJoel Becker 	__lockres_clear_pending(lockres, lockres->l_pending_gen,  osb);
1137de551246SJoel Becker 
1138ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1139d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1140ccd979bdSMark Fasheh }
1141ccd979bdSMark Fasheh 
1142ccd979bdSMark Fasheh static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
1143ccd979bdSMark Fasheh 						int convert)
1144ccd979bdSMark Fasheh {
1145ccd979bdSMark Fasheh 	unsigned long flags;
1146ccd979bdSMark Fasheh 
1147ccd979bdSMark Fasheh 	mlog_entry_void();
1148ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1149ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
1150a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1151ccd979bdSMark Fasheh 	if (convert)
1152ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
1153ccd979bdSMark Fasheh 	else
1154ccd979bdSMark Fasheh 		lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
1155ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1156ccd979bdSMark Fasheh 
1157ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
1158ccd979bdSMark Fasheh 	mlog_exit_void();
1159ccd979bdSMark Fasheh }
1160ccd979bdSMark Fasheh 
1161ccd979bdSMark Fasheh /* Note: If we detect another process working on the lock (i.e.,
1162ccd979bdSMark Fasheh  * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
1163ccd979bdSMark Fasheh  * to do the right thing in that case.
1164ccd979bdSMark Fasheh  */
1165ccd979bdSMark Fasheh static int ocfs2_lock_create(struct ocfs2_super *osb,
1166ccd979bdSMark Fasheh 			     struct ocfs2_lock_res *lockres,
1167ccd979bdSMark Fasheh 			     int level,
1168bd3e7610SJoel Becker 			     u32 dlm_flags)
1169ccd979bdSMark Fasheh {
1170ccd979bdSMark Fasheh 	int ret = 0;
1171ccd979bdSMark Fasheh 	unsigned long flags;
1172de551246SJoel Becker 	unsigned int gen;
1173ccd979bdSMark Fasheh 
1174ccd979bdSMark Fasheh 	mlog_entry_void();
1175ccd979bdSMark Fasheh 
1176bd3e7610SJoel Becker 	mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level,
1177ccd979bdSMark Fasheh 	     dlm_flags);
1178ccd979bdSMark Fasheh 
1179ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1180ccd979bdSMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
1181ccd979bdSMark Fasheh 	    (lockres->l_flags & OCFS2_LOCK_BUSY)) {
1182ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1183ccd979bdSMark Fasheh 		goto bail;
1184ccd979bdSMark Fasheh 	}
1185ccd979bdSMark Fasheh 
1186ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_ATTACH;
1187ccd979bdSMark Fasheh 	lockres->l_requested = level;
1188ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1189de551246SJoel Becker 	gen = lockres_set_pending(lockres);
1190ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1191ccd979bdSMark Fasheh 
11924670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
1193ccd979bdSMark Fasheh 			     level,
1194ccd979bdSMark Fasheh 			     &lockres->l_lksb,
1195ccd979bdSMark Fasheh 			     dlm_flags,
1196ccd979bdSMark Fasheh 			     lockres->l_name,
1197a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
1198de551246SJoel Becker 	lockres_clear_pending(lockres, gen, osb);
11997431cd7eSJoel Becker 	if (ret) {
12007431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1201ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1202ccd979bdSMark Fasheh 	}
1203ccd979bdSMark Fasheh 
12047431cd7eSJoel Becker 	mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name);
1205ccd979bdSMark Fasheh 
1206ccd979bdSMark Fasheh bail:
1207ccd979bdSMark Fasheh 	mlog_exit(ret);
1208ccd979bdSMark Fasheh 	return ret;
1209ccd979bdSMark Fasheh }
1210ccd979bdSMark Fasheh 
1211ccd979bdSMark Fasheh static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
1212ccd979bdSMark Fasheh 					int flag)
1213ccd979bdSMark Fasheh {
1214ccd979bdSMark Fasheh 	unsigned long flags;
1215ccd979bdSMark Fasheh 	int ret;
1216ccd979bdSMark Fasheh 
1217ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1218ccd979bdSMark Fasheh 	ret = lockres->l_flags & flag;
1219ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1220ccd979bdSMark Fasheh 
1221ccd979bdSMark Fasheh 	return ret;
1222ccd979bdSMark Fasheh }
1223ccd979bdSMark Fasheh 
1224ccd979bdSMark Fasheh static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
1225ccd979bdSMark Fasheh 
1226ccd979bdSMark Fasheh {
1227ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1228ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
1229ccd979bdSMark Fasheh }
1230ccd979bdSMark Fasheh 
1231ccd979bdSMark Fasheh static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
1232ccd979bdSMark Fasheh 
1233ccd979bdSMark Fasheh {
1234ccd979bdSMark Fasheh 	wait_event(lockres->l_event,
1235ccd979bdSMark Fasheh 		   !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
1236ccd979bdSMark Fasheh }
1237ccd979bdSMark Fasheh 
1238ccd979bdSMark Fasheh /* predict what lock level we'll be dropping down to on behalf
1239ccd979bdSMark Fasheh  * of another node, and return true if the currently wanted
1240ccd979bdSMark Fasheh  * level will be compatible with it. */
1241ccd979bdSMark Fasheh static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
1242ccd979bdSMark Fasheh 						     int wanted)
1243ccd979bdSMark Fasheh {
1244ccd979bdSMark Fasheh 	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
1245ccd979bdSMark Fasheh 
1246ccd979bdSMark Fasheh 	return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
1247ccd979bdSMark Fasheh }
1248ccd979bdSMark Fasheh 
1249ccd979bdSMark Fasheh static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
1250ccd979bdSMark Fasheh {
1251ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&mw->mw_item);
1252ccd979bdSMark Fasheh 	init_completion(&mw->mw_complete);
12538ddb7b00SSunil Mushran 	ocfs2_init_start_time(mw);
1254ccd979bdSMark Fasheh }
1255ccd979bdSMark Fasheh 
1256ccd979bdSMark Fasheh static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
1257ccd979bdSMark Fasheh {
1258ccd979bdSMark Fasheh 	wait_for_completion(&mw->mw_complete);
1259ccd979bdSMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
1260ccd979bdSMark Fasheh 	INIT_COMPLETION(mw->mw_complete);
1261ccd979bdSMark Fasheh 	return mw->mw_status;
1262ccd979bdSMark Fasheh }
1263ccd979bdSMark Fasheh 
1264ccd979bdSMark Fasheh static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
1265ccd979bdSMark Fasheh 				    struct ocfs2_mask_waiter *mw,
1266ccd979bdSMark Fasheh 				    unsigned long mask,
1267ccd979bdSMark Fasheh 				    unsigned long goal)
1268ccd979bdSMark Fasheh {
1269ccd979bdSMark Fasheh 	BUG_ON(!list_empty(&mw->mw_item));
1270ccd979bdSMark Fasheh 
1271ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
1272ccd979bdSMark Fasheh 
1273ccd979bdSMark Fasheh 	list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
1274ccd979bdSMark Fasheh 	mw->mw_mask = mask;
1275ccd979bdSMark Fasheh 	mw->mw_goal = goal;
1276ccd979bdSMark Fasheh }
1277ccd979bdSMark Fasheh 
1278ccd979bdSMark Fasheh /* returns 0 if the mw that was removed was already satisfied, -EBUSY
1279ccd979bdSMark Fasheh  * if the mask still hadn't reached its goal */
1280ccd979bdSMark Fasheh static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
1281ccd979bdSMark Fasheh 				      struct ocfs2_mask_waiter *mw)
1282ccd979bdSMark Fasheh {
1283ccd979bdSMark Fasheh 	unsigned long flags;
1284ccd979bdSMark Fasheh 	int ret = 0;
1285ccd979bdSMark Fasheh 
1286ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1287ccd979bdSMark Fasheh 	if (!list_empty(&mw->mw_item)) {
1288ccd979bdSMark Fasheh 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1289ccd979bdSMark Fasheh 			ret = -EBUSY;
1290ccd979bdSMark Fasheh 
1291ccd979bdSMark Fasheh 		list_del_init(&mw->mw_item);
1292ccd979bdSMark Fasheh 		init_completion(&mw->mw_complete);
1293ccd979bdSMark Fasheh 	}
1294ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1295ccd979bdSMark Fasheh 
1296ccd979bdSMark Fasheh 	return ret;
1297ccd979bdSMark Fasheh 
1298ccd979bdSMark Fasheh }
1299ccd979bdSMark Fasheh 
1300cf8e06f1SMark Fasheh static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1301cf8e06f1SMark Fasheh 					     struct ocfs2_lock_res *lockres)
1302cf8e06f1SMark Fasheh {
1303cf8e06f1SMark Fasheh 	int ret;
1304cf8e06f1SMark Fasheh 
1305cf8e06f1SMark Fasheh 	ret = wait_for_completion_interruptible(&mw->mw_complete);
1306cf8e06f1SMark Fasheh 	if (ret)
1307cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, mw);
1308cf8e06f1SMark Fasheh 	else
1309cf8e06f1SMark Fasheh 		ret = mw->mw_status;
1310cf8e06f1SMark Fasheh 	/* Re-arm the completion in case we want to wait on it again */
1311cf8e06f1SMark Fasheh 	INIT_COMPLETION(mw->mw_complete);
1312cf8e06f1SMark Fasheh 	return ret;
1313cf8e06f1SMark Fasheh }
1314cf8e06f1SMark Fasheh 
1315cb25797dSJan Kara static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1316ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres,
1317ccd979bdSMark Fasheh 				int level,
1318bd3e7610SJoel Becker 				u32 lkm_flags,
1319cb25797dSJan Kara 				int arg_flags,
1320cb25797dSJan Kara 				int l_subclass,
1321cb25797dSJan Kara 				unsigned long caller_ip)
1322ccd979bdSMark Fasheh {
1323ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
1324ccd979bdSMark Fasheh 	int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1325ccd979bdSMark Fasheh 	int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1326ccd979bdSMark Fasheh 	unsigned long flags;
1327de551246SJoel Becker 	unsigned int gen;
13281693a5c0SDavid Teigland 	int noqueue_attempted = 0;
1329ccd979bdSMark Fasheh 
1330ccd979bdSMark Fasheh 	mlog_entry_void();
1331ccd979bdSMark Fasheh 
1332ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1333ccd979bdSMark Fasheh 
1334b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
1335bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
1336b80fc012SMark Fasheh 
1337ccd979bdSMark Fasheh again:
1338ccd979bdSMark Fasheh 	wait = 0;
1339ccd979bdSMark Fasheh 
1340a1912826SSunil Mushran 	spin_lock_irqsave(&lockres->l_lock, flags);
1341a1912826SSunil Mushran 
1342ccd979bdSMark Fasheh 	if (catch_signals && signal_pending(current)) {
1343ccd979bdSMark Fasheh 		ret = -ERESTARTSYS;
1344a1912826SSunil Mushran 		goto unlock;
1345ccd979bdSMark Fasheh 	}
1346ccd979bdSMark Fasheh 
1347ccd979bdSMark Fasheh 	mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1348ccd979bdSMark Fasheh 			"Cluster lock called on freeing lockres %s! flags "
1349ccd979bdSMark Fasheh 			"0x%lx\n", lockres->l_name, lockres->l_flags);
1350ccd979bdSMark Fasheh 
1351ccd979bdSMark Fasheh 	/* We only compare against the currently granted level
1352ccd979bdSMark Fasheh 	 * here. If the lock is blocked waiting on a downconvert,
1353ccd979bdSMark Fasheh 	 * we'll get caught below. */
1354ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1355ccd979bdSMark Fasheh 	    level > lockres->l_level) {
1356ccd979bdSMark Fasheh 		/* is someone sitting in dlm_lock? If so, wait on
1357ccd979bdSMark Fasheh 		 * them. */
1358ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1359ccd979bdSMark Fasheh 		wait = 1;
1360ccd979bdSMark Fasheh 		goto unlock;
1361ccd979bdSMark Fasheh 	}
1362ccd979bdSMark Fasheh 
1363a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
1364a1912826SSunil Mushran 		/*
1365a1912826SSunil Mushran 		 * We've upconverted. If the lock now has a level we can
1366a1912826SSunil Mushran 		 * work with, we take it. If, however, the lock is not at the
1367a1912826SSunil Mushran 		 * required level, we go thru the full cycle. One way this could
1368a1912826SSunil Mushran 		 * happen is if a process requesting an upconvert to PR is
1369a1912826SSunil Mushran 		 * closely followed by another requesting upconvert to an EX.
1370a1912826SSunil Mushran 		 * If the process requesting EX lands here, we want it to
1371a1912826SSunil Mushran 		 * continue attempting to upconvert and let the process
1372a1912826SSunil Mushran 		 * requesting PR take the lock.
1373a1912826SSunil Mushran 		 * If multiple processes request upconvert to PR, the first one
1374a1912826SSunil Mushran 		 * here will take the lock. The others will have to go thru the
1375a1912826SSunil Mushran 		 * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
1376a1912826SSunil Mushran 		 * downconvert request.
1377a1912826SSunil Mushran 		 */
1378a1912826SSunil Mushran 		if (level <= lockres->l_level)
1379a1912826SSunil Mushran 			goto update_holders;
1380a1912826SSunil Mushran 	}
1381a1912826SSunil Mushran 
1382ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1383ccd979bdSMark Fasheh 	    !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1384ccd979bdSMark Fasheh 		/* is the lock is currently blocked on behalf of
1385ccd979bdSMark Fasheh 		 * another node */
1386ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1387ccd979bdSMark Fasheh 		wait = 1;
1388ccd979bdSMark Fasheh 		goto unlock;
1389ccd979bdSMark Fasheh 	}
1390ccd979bdSMark Fasheh 
1391ccd979bdSMark Fasheh 	if (level > lockres->l_level) {
13921693a5c0SDavid Teigland 		if (noqueue_attempted > 0) {
13931693a5c0SDavid Teigland 			ret = -EAGAIN;
13941693a5c0SDavid Teigland 			goto unlock;
13951693a5c0SDavid Teigland 		}
13961693a5c0SDavid Teigland 		if (lkm_flags & DLM_LKF_NOQUEUE)
13971693a5c0SDavid Teigland 			noqueue_attempted = 1;
13981693a5c0SDavid Teigland 
1399ccd979bdSMark Fasheh 		if (lockres->l_action != OCFS2_AST_INVALID)
1400ccd979bdSMark Fasheh 			mlog(ML_ERROR, "lockres %s has action %u pending\n",
1401ccd979bdSMark Fasheh 			     lockres->l_name, lockres->l_action);
1402ccd979bdSMark Fasheh 
1403019d1b22SMark Fasheh 		if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1404019d1b22SMark Fasheh 			lockres->l_action = OCFS2_AST_ATTACH;
1405bd3e7610SJoel Becker 			lkm_flags &= ~DLM_LKF_CONVERT;
1406019d1b22SMark Fasheh 		} else {
1407ccd979bdSMark Fasheh 			lockres->l_action = OCFS2_AST_CONVERT;
1408bd3e7610SJoel Becker 			lkm_flags |= DLM_LKF_CONVERT;
1409019d1b22SMark Fasheh 		}
1410019d1b22SMark Fasheh 
1411ccd979bdSMark Fasheh 		lockres->l_requested = level;
1412ccd979bdSMark Fasheh 		lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1413de551246SJoel Becker 		gen = lockres_set_pending(lockres);
1414ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1415ccd979bdSMark Fasheh 
1416bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_IV);
1417bd3e7610SJoel Becker 		BUG_ON(level == DLM_LOCK_NL);
1418ccd979bdSMark Fasheh 
1419ccd979bdSMark Fasheh 		mlog(0, "lock %s, convert from %d to level = %d\n",
1420ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_level, level);
1421ccd979bdSMark Fasheh 
1422ccd979bdSMark Fasheh 		/* call dlm_lock to upgrade lock now */
14234670c46dSJoel Becker 		ret = ocfs2_dlm_lock(osb->cconn,
1424ccd979bdSMark Fasheh 				     level,
1425ccd979bdSMark Fasheh 				     &lockres->l_lksb,
1426019d1b22SMark Fasheh 				     lkm_flags,
1427ccd979bdSMark Fasheh 				     lockres->l_name,
1428a796d286SJoel Becker 				     OCFS2_LOCK_ID_MAX_LEN - 1);
1429de551246SJoel Becker 		lockres_clear_pending(lockres, gen, osb);
14307431cd7eSJoel Becker 		if (ret) {
14317431cd7eSJoel Becker 			if (!(lkm_flags & DLM_LKF_NOQUEUE) ||
14327431cd7eSJoel Becker 			    (ret != -EAGAIN)) {
143324ef1815SJoel Becker 				ocfs2_log_dlm_error("ocfs2_dlm_lock",
14347431cd7eSJoel Becker 						    ret, lockres);
1435ccd979bdSMark Fasheh 			}
1436ccd979bdSMark Fasheh 			ocfs2_recover_from_dlm_error(lockres, 1);
1437ccd979bdSMark Fasheh 			goto out;
1438ccd979bdSMark Fasheh 		}
1439ccd979bdSMark Fasheh 
144073ac36eaSColy Li 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1441ccd979bdSMark Fasheh 		     lockres->l_name);
1442ccd979bdSMark Fasheh 
1443ccd979bdSMark Fasheh 		/* At this point we've gone inside the dlm and need to
1444ccd979bdSMark Fasheh 		 * complete our work regardless. */
1445ccd979bdSMark Fasheh 		catch_signals = 0;
1446ccd979bdSMark Fasheh 
1447ccd979bdSMark Fasheh 		/* wait for busy to clear and carry on */
1448ccd979bdSMark Fasheh 		goto again;
1449ccd979bdSMark Fasheh 	}
1450ccd979bdSMark Fasheh 
1451a1912826SSunil Mushran update_holders:
1452ccd979bdSMark Fasheh 	/* Ok, if we get here then we're good to go. */
1453ccd979bdSMark Fasheh 	ocfs2_inc_holders(lockres, level);
1454ccd979bdSMark Fasheh 
1455ccd979bdSMark Fasheh 	ret = 0;
1456ccd979bdSMark Fasheh unlock:
1457a1912826SSunil Mushran 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
1458a1912826SSunil Mushran 
1459ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1460ccd979bdSMark Fasheh out:
1461ccd979bdSMark Fasheh 	/*
1462ccd979bdSMark Fasheh 	 * This is helping work around a lock inversion between the page lock
1463ccd979bdSMark Fasheh 	 * and dlm locks.  One path holds the page lock while calling aops
1464ccd979bdSMark Fasheh 	 * which block acquiring dlm locks.  The voting thread holds dlm
1465ccd979bdSMark Fasheh 	 * locks while acquiring page locks while down converting data locks.
1466ccd979bdSMark Fasheh 	 * This block is helping an aop path notice the inversion and back
1467ccd979bdSMark Fasheh 	 * off to unlock its page lock before trying the dlm lock again.
1468ccd979bdSMark Fasheh 	 */
1469ccd979bdSMark Fasheh 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1470ccd979bdSMark Fasheh 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1471ccd979bdSMark Fasheh 		wait = 0;
1472ccd979bdSMark Fasheh 		if (lockres_remove_mask_waiter(lockres, &mw))
1473ccd979bdSMark Fasheh 			ret = -EAGAIN;
1474ccd979bdSMark Fasheh 		else
1475ccd979bdSMark Fasheh 			goto again;
1476ccd979bdSMark Fasheh 	}
1477ccd979bdSMark Fasheh 	if (wait) {
1478ccd979bdSMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1479ccd979bdSMark Fasheh 		if (ret == 0)
1480ccd979bdSMark Fasheh 			goto again;
1481ccd979bdSMark Fasheh 		mlog_errno(ret);
1482ccd979bdSMark Fasheh 	}
14838ddb7b00SSunil Mushran 	ocfs2_update_lock_stats(lockres, level, &mw, ret);
1484ccd979bdSMark Fasheh 
1485cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1486cb25797dSJan Kara 	if (!ret && lockres->l_lockdep_map.key != NULL) {
1487cb25797dSJan Kara 		if (level == DLM_LOCK_PR)
1488cb25797dSJan Kara 			rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1489cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1490cb25797dSJan Kara 				caller_ip);
1491cb25797dSJan Kara 		else
1492cb25797dSJan Kara 			rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1493cb25797dSJan Kara 				!!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1494cb25797dSJan Kara 				caller_ip);
1495cb25797dSJan Kara 	}
1496cb25797dSJan Kara #endif
1497ccd979bdSMark Fasheh 	mlog_exit(ret);
1498ccd979bdSMark Fasheh 	return ret;
1499ccd979bdSMark Fasheh }
1500ccd979bdSMark Fasheh 
1501cb25797dSJan Kara static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1502ccd979bdSMark Fasheh 				     struct ocfs2_lock_res *lockres,
1503cb25797dSJan Kara 				     int level,
1504cb25797dSJan Kara 				     u32 lkm_flags,
1505cb25797dSJan Kara 				     int arg_flags)
1506cb25797dSJan Kara {
1507cb25797dSJan Kara 	return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1508cb25797dSJan Kara 				    0, _RET_IP_);
1509cb25797dSJan Kara }
1510cb25797dSJan Kara 
1511cb25797dSJan Kara 
1512cb25797dSJan Kara static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1513cb25797dSJan Kara 				   struct ocfs2_lock_res *lockres,
1514cb25797dSJan Kara 				   int level,
1515cb25797dSJan Kara 				   unsigned long caller_ip)
1516ccd979bdSMark Fasheh {
1517ccd979bdSMark Fasheh 	unsigned long flags;
1518ccd979bdSMark Fasheh 
1519ccd979bdSMark Fasheh 	mlog_entry_void();
1520ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1521ccd979bdSMark Fasheh 	ocfs2_dec_holders(lockres, level);
152234d024f8SMark Fasheh 	ocfs2_downconvert_on_unlock(osb, lockres);
1523ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1524cb25797dSJan Kara #ifdef CONFIG_DEBUG_LOCK_ALLOC
1525cb25797dSJan Kara 	if (lockres->l_lockdep_map.key != NULL)
1526cb25797dSJan Kara 		rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1527cb25797dSJan Kara #endif
1528ccd979bdSMark Fasheh 	mlog_exit_void();
1529ccd979bdSMark Fasheh }
1530ccd979bdSMark Fasheh 
1531da66116eSAdrian Bunk static int ocfs2_create_new_lock(struct ocfs2_super *osb,
1532d680efe9SMark Fasheh 				 struct ocfs2_lock_res *lockres,
153324c19ef4SMark Fasheh 				 int ex,
153424c19ef4SMark Fasheh 				 int local)
1535ccd979bdSMark Fasheh {
1536bd3e7610SJoel Becker 	int level =  ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1537ccd979bdSMark Fasheh 	unsigned long flags;
1538bd3e7610SJoel Becker 	u32 lkm_flags = local ? DLM_LKF_LOCAL : 0;
1539ccd979bdSMark Fasheh 
1540ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1541ccd979bdSMark Fasheh 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1542ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1543ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1544ccd979bdSMark Fasheh 
154524c19ef4SMark Fasheh 	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1546ccd979bdSMark Fasheh }
1547ccd979bdSMark Fasheh 
1548ccd979bdSMark Fasheh /* Grants us an EX lock on the data and metadata resources, skipping
1549ccd979bdSMark Fasheh  * the normal cluster directory lookup. Use this ONLY on newly created
1550ccd979bdSMark Fasheh  * inodes which other nodes can't possibly see, and which haven't been
1551ccd979bdSMark Fasheh  * hashed in the inode hash yet. This can give us a good performance
1552ccd979bdSMark Fasheh  * increase as it'll skip the network broadcast normally associated
1553ccd979bdSMark Fasheh  * with creating a new lock resource. */
1554ccd979bdSMark Fasheh int ocfs2_create_new_inode_locks(struct inode *inode)
1555ccd979bdSMark Fasheh {
1556ccd979bdSMark Fasheh 	int ret;
1557d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1558ccd979bdSMark Fasheh 
1559ccd979bdSMark Fasheh 	BUG_ON(!inode);
1560ccd979bdSMark Fasheh 	BUG_ON(!ocfs2_inode_is_new(inode));
1561ccd979bdSMark Fasheh 
1562ccd979bdSMark Fasheh 	mlog_entry_void();
1563ccd979bdSMark Fasheh 
1564b0697053SMark Fasheh 	mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
1565ccd979bdSMark Fasheh 
1566ccd979bdSMark Fasheh 	/* NOTE: That we don't increment any of the holder counts, nor
1567ccd979bdSMark Fasheh 	 * do we add anything to a journal handle. Since this is
1568ccd979bdSMark Fasheh 	 * supposed to be a new inode which the cluster doesn't know
1569ccd979bdSMark Fasheh 	 * about yet, there is no need to.  As far as the LVB handling
1570ccd979bdSMark Fasheh 	 * is concerned, this is basically like acquiring an EX lock
1571ccd979bdSMark Fasheh 	 * on a resource which has an invalid one -- we'll set it
1572ccd979bdSMark Fasheh 	 * valid when we release the EX. */
1573ccd979bdSMark Fasheh 
157424c19ef4SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1575ccd979bdSMark Fasheh 	if (ret) {
1576ccd979bdSMark Fasheh 		mlog_errno(ret);
1577ccd979bdSMark Fasheh 		goto bail;
1578ccd979bdSMark Fasheh 	}
1579ccd979bdSMark Fasheh 
158024c19ef4SMark Fasheh 	/*
1581bd3e7610SJoel Becker 	 * We don't want to use DLM_LKF_LOCAL on a meta data lock as they
158224c19ef4SMark Fasheh 	 * don't use a generation in their lock names.
158324c19ef4SMark Fasheh 	 */
1584e63aecb6SMark Fasheh 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0);
1585ccd979bdSMark Fasheh 	if (ret) {
1586ccd979bdSMark Fasheh 		mlog_errno(ret);
1587ccd979bdSMark Fasheh 		goto bail;
1588ccd979bdSMark Fasheh 	}
1589ccd979bdSMark Fasheh 
159050008630STiger Yang 	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
159150008630STiger Yang 	if (ret) {
159250008630STiger Yang 		mlog_errno(ret);
159350008630STiger Yang 		goto bail;
159450008630STiger Yang 	}
159550008630STiger Yang 
1596ccd979bdSMark Fasheh bail:
1597ccd979bdSMark Fasheh 	mlog_exit(ret);
1598ccd979bdSMark Fasheh 	return ret;
1599ccd979bdSMark Fasheh }
1600ccd979bdSMark Fasheh 
1601ccd979bdSMark Fasheh int ocfs2_rw_lock(struct inode *inode, int write)
1602ccd979bdSMark Fasheh {
1603ccd979bdSMark Fasheh 	int status, level;
1604ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres;
1605c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1606ccd979bdSMark Fasheh 
1607ccd979bdSMark Fasheh 	BUG_ON(!inode);
1608ccd979bdSMark Fasheh 
1609ccd979bdSMark Fasheh 	mlog_entry_void();
1610ccd979bdSMark Fasheh 
1611b0697053SMark Fasheh 	mlog(0, "inode %llu take %s RW lock\n",
1612b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1613ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1614ccd979bdSMark Fasheh 
1615d92bc512SColy Li 	if (ocfs2_mount_local(osb)) {
1616d92bc512SColy Li 		mlog_exit(0);
1617c271c5c2SSunil Mushran 		return 0;
1618d92bc512SColy Li 	}
1619c271c5c2SSunil Mushran 
1620ccd979bdSMark Fasheh 	lockres = &OCFS2_I(inode)->ip_rw_lockres;
1621ccd979bdSMark Fasheh 
1622bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1623ccd979bdSMark Fasheh 
1624ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1625ccd979bdSMark Fasheh 				    0);
1626ccd979bdSMark Fasheh 	if (status < 0)
1627ccd979bdSMark Fasheh 		mlog_errno(status);
1628ccd979bdSMark Fasheh 
1629ccd979bdSMark Fasheh 	mlog_exit(status);
1630ccd979bdSMark Fasheh 	return status;
1631ccd979bdSMark Fasheh }
1632ccd979bdSMark Fasheh 
1633ccd979bdSMark Fasheh void ocfs2_rw_unlock(struct inode *inode, int write)
1634ccd979bdSMark Fasheh {
1635bd3e7610SJoel Becker 	int level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
1636ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1637c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1638ccd979bdSMark Fasheh 
1639ccd979bdSMark Fasheh 	mlog_entry_void();
1640ccd979bdSMark Fasheh 
1641b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s RW lock\n",
1642b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
1643ccd979bdSMark Fasheh 	     write ? "EXMODE" : "PRMODE");
1644ccd979bdSMark Fasheh 
1645c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
1646ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1647ccd979bdSMark Fasheh 
1648ccd979bdSMark Fasheh 	mlog_exit_void();
1649ccd979bdSMark Fasheh }
1650ccd979bdSMark Fasheh 
165150008630STiger Yang /*
165250008630STiger Yang  * ocfs2_open_lock always get PR mode lock.
165350008630STiger Yang  */
165450008630STiger Yang int ocfs2_open_lock(struct inode *inode)
165550008630STiger Yang {
165650008630STiger Yang 	int status = 0;
165750008630STiger Yang 	struct ocfs2_lock_res *lockres;
165850008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
165950008630STiger Yang 
166050008630STiger Yang 	BUG_ON(!inode);
166150008630STiger Yang 
166250008630STiger Yang 	mlog_entry_void();
166350008630STiger Yang 
166450008630STiger Yang 	mlog(0, "inode %llu take PRMODE open lock\n",
166550008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
166650008630STiger Yang 
166750008630STiger Yang 	if (ocfs2_mount_local(osb))
166850008630STiger Yang 		goto out;
166950008630STiger Yang 
167050008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
167150008630STiger Yang 
167250008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1673bd3e7610SJoel Becker 				    DLM_LOCK_PR, 0, 0);
167450008630STiger Yang 	if (status < 0)
167550008630STiger Yang 		mlog_errno(status);
167650008630STiger Yang 
167750008630STiger Yang out:
167850008630STiger Yang 	mlog_exit(status);
167950008630STiger Yang 	return status;
168050008630STiger Yang }
168150008630STiger Yang 
168250008630STiger Yang int ocfs2_try_open_lock(struct inode *inode, int write)
168350008630STiger Yang {
168450008630STiger Yang 	int status = 0, level;
168550008630STiger Yang 	struct ocfs2_lock_res *lockres;
168650008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
168750008630STiger Yang 
168850008630STiger Yang 	BUG_ON(!inode);
168950008630STiger Yang 
169050008630STiger Yang 	mlog_entry_void();
169150008630STiger Yang 
169250008630STiger Yang 	mlog(0, "inode %llu try to take %s open lock\n",
169350008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
169450008630STiger Yang 	     write ? "EXMODE" : "PRMODE");
169550008630STiger Yang 
169650008630STiger Yang 	if (ocfs2_mount_local(osb))
169750008630STiger Yang 		goto out;
169850008630STiger Yang 
169950008630STiger Yang 	lockres = &OCFS2_I(inode)->ip_open_lockres;
170050008630STiger Yang 
1701bd3e7610SJoel Becker 	level = write ? DLM_LOCK_EX : DLM_LOCK_PR;
170250008630STiger Yang 
170350008630STiger Yang 	/*
170450008630STiger Yang 	 * The file system may already holding a PRMODE/EXMODE open lock.
1705bd3e7610SJoel Becker 	 * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on
170650008630STiger Yang 	 * other nodes and the -EAGAIN will indicate to the caller that
170750008630STiger Yang 	 * this inode is still in use.
170850008630STiger Yang 	 */
170950008630STiger Yang 	status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1710bd3e7610SJoel Becker 				    level, DLM_LKF_NOQUEUE, 0);
171150008630STiger Yang 
171250008630STiger Yang out:
171350008630STiger Yang 	mlog_exit(status);
171450008630STiger Yang 	return status;
171550008630STiger Yang }
171650008630STiger Yang 
171750008630STiger Yang /*
171850008630STiger Yang  * ocfs2_open_unlock unlock PR and EX mode open locks.
171950008630STiger Yang  */
172050008630STiger Yang void ocfs2_open_unlock(struct inode *inode)
172150008630STiger Yang {
172250008630STiger Yang 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
172350008630STiger Yang 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
172450008630STiger Yang 
172550008630STiger Yang 	mlog_entry_void();
172650008630STiger Yang 
172750008630STiger Yang 	mlog(0, "inode %llu drop open lock\n",
172850008630STiger Yang 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
172950008630STiger Yang 
173050008630STiger Yang 	if (ocfs2_mount_local(osb))
173150008630STiger Yang 		goto out;
173250008630STiger Yang 
173350008630STiger Yang 	if(lockres->l_ro_holders)
173450008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1735bd3e7610SJoel Becker 				     DLM_LOCK_PR);
173650008630STiger Yang 	if(lockres->l_ex_holders)
173750008630STiger Yang 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1738bd3e7610SJoel Becker 				     DLM_LOCK_EX);
173950008630STiger Yang 
174050008630STiger Yang out:
174150008630STiger Yang 	mlog_exit_void();
174250008630STiger Yang }
174350008630STiger Yang 
1744cf8e06f1SMark Fasheh static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres,
1745cf8e06f1SMark Fasheh 				     int level)
1746cf8e06f1SMark Fasheh {
1747cf8e06f1SMark Fasheh 	int ret;
1748cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
1749cf8e06f1SMark Fasheh 	unsigned long flags;
1750cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1751cf8e06f1SMark Fasheh 
1752cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1753cf8e06f1SMark Fasheh 
1754cf8e06f1SMark Fasheh retry_cancel:
1755cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1756cf8e06f1SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
1757cf8e06f1SMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
1758cf8e06f1SMark Fasheh 		if (ret) {
1759cf8e06f1SMark Fasheh 			spin_unlock_irqrestore(&lockres->l_lock, flags);
1760cf8e06f1SMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
1761cf8e06f1SMark Fasheh 			if (ret < 0) {
1762cf8e06f1SMark Fasheh 				mlog_errno(ret);
1763cf8e06f1SMark Fasheh 				goto out;
1764cf8e06f1SMark Fasheh 			}
1765cf8e06f1SMark Fasheh 			goto retry_cancel;
1766cf8e06f1SMark Fasheh 		}
1767cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1768cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1769cf8e06f1SMark Fasheh 
1770cf8e06f1SMark Fasheh 		ocfs2_wait_for_mask(&mw);
1771cf8e06f1SMark Fasheh 		goto retry_cancel;
1772cf8e06f1SMark Fasheh 	}
1773cf8e06f1SMark Fasheh 
1774cf8e06f1SMark Fasheh 	ret = -ERESTARTSYS;
1775cf8e06f1SMark Fasheh 	/*
1776cf8e06f1SMark Fasheh 	 * We may still have gotten the lock, in which case there's no
1777cf8e06f1SMark Fasheh 	 * point to restarting the syscall.
1778cf8e06f1SMark Fasheh 	 */
1779cf8e06f1SMark Fasheh 	if (lockres->l_level == level)
1780cf8e06f1SMark Fasheh 		ret = 0;
1781cf8e06f1SMark Fasheh 
1782cf8e06f1SMark Fasheh 	mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret,
1783cf8e06f1SMark Fasheh 	     lockres->l_flags, lockres->l_level, lockres->l_action);
1784cf8e06f1SMark Fasheh 
1785cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1786cf8e06f1SMark Fasheh 
1787cf8e06f1SMark Fasheh out:
1788cf8e06f1SMark Fasheh 	return ret;
1789cf8e06f1SMark Fasheh }
1790cf8e06f1SMark Fasheh 
1791cf8e06f1SMark Fasheh /*
1792cf8e06f1SMark Fasheh  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
1793cf8e06f1SMark Fasheh  * flock() calls. The locking approach this requires is sufficiently
1794cf8e06f1SMark Fasheh  * different from all other cluster lock types that we implement a
1795cf8e06f1SMark Fasheh  * seperate path to the "low-level" dlm calls. In particular:
1796cf8e06f1SMark Fasheh  *
1797cf8e06f1SMark Fasheh  * - No optimization of lock levels is done - we take at exactly
1798cf8e06f1SMark Fasheh  *   what's been requested.
1799cf8e06f1SMark Fasheh  *
1800cf8e06f1SMark Fasheh  * - No lock caching is employed. We immediately downconvert to
1801cf8e06f1SMark Fasheh  *   no-lock at unlock time. This also means flock locks never go on
1802cf8e06f1SMark Fasheh  *   the blocking list).
1803cf8e06f1SMark Fasheh  *
1804cf8e06f1SMark Fasheh  * - Since userspace can trivially deadlock itself with flock, we make
1805cf8e06f1SMark Fasheh  *   sure to allow cancellation of a misbehaving applications flock()
1806cf8e06f1SMark Fasheh  *   request.
1807cf8e06f1SMark Fasheh  *
1808cf8e06f1SMark Fasheh  * - Access to any flock lockres doesn't require concurrency, so we
1809cf8e06f1SMark Fasheh  *   can simplify the code by requiring the caller to guarantee
1810cf8e06f1SMark Fasheh  *   serialization of dlmglue flock calls.
1811cf8e06f1SMark Fasheh  */
1812cf8e06f1SMark Fasheh int ocfs2_file_lock(struct file *file, int ex, int trylock)
1813cf8e06f1SMark Fasheh {
1814e988cf1cSMark Fasheh 	int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
1815e988cf1cSMark Fasheh 	unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
1816cf8e06f1SMark Fasheh 	unsigned long flags;
1817cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1818cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1819cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1820cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1821cf8e06f1SMark Fasheh 
1822cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1823cf8e06f1SMark Fasheh 
1824cf8e06f1SMark Fasheh 	if ((lockres->l_flags & OCFS2_LOCK_BUSY) ||
1825bd3e7610SJoel Becker 	    (lockres->l_level > DLM_LOCK_NL)) {
1826cf8e06f1SMark Fasheh 		mlog(ML_ERROR,
1827cf8e06f1SMark Fasheh 		     "File lock \"%s\" has busy or locked state: flags: 0x%lx, "
1828cf8e06f1SMark Fasheh 		     "level: %u\n", lockres->l_name, lockres->l_flags,
1829cf8e06f1SMark Fasheh 		     lockres->l_level);
1830cf8e06f1SMark Fasheh 		return -EINVAL;
1831cf8e06f1SMark Fasheh 	}
1832cf8e06f1SMark Fasheh 
1833cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1834cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1835cf8e06f1SMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1836cf8e06f1SMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
1837cf8e06f1SMark Fasheh 
1838cf8e06f1SMark Fasheh 		/*
1839cf8e06f1SMark Fasheh 		 * Get the lock at NLMODE to start - that way we
1840cf8e06f1SMark Fasheh 		 * can cancel the upconvert request if need be.
1841cf8e06f1SMark Fasheh 		 */
1842e988cf1cSMark Fasheh 		ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
1843cf8e06f1SMark Fasheh 		if (ret < 0) {
1844cf8e06f1SMark Fasheh 			mlog_errno(ret);
1845cf8e06f1SMark Fasheh 			goto out;
1846cf8e06f1SMark Fasheh 		}
1847cf8e06f1SMark Fasheh 
1848cf8e06f1SMark Fasheh 		ret = ocfs2_wait_for_mask(&mw);
1849cf8e06f1SMark Fasheh 		if (ret) {
1850cf8e06f1SMark Fasheh 			mlog_errno(ret);
1851cf8e06f1SMark Fasheh 			goto out;
1852cf8e06f1SMark Fasheh 		}
1853cf8e06f1SMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
1854cf8e06f1SMark Fasheh 	}
1855cf8e06f1SMark Fasheh 
1856cf8e06f1SMark Fasheh 	lockres->l_action = OCFS2_AST_CONVERT;
1857e988cf1cSMark Fasheh 	lkm_flags |= DLM_LKF_CONVERT;
1858cf8e06f1SMark Fasheh 	lockres->l_requested = level;
1859cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1860cf8e06f1SMark Fasheh 
1861cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1862cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1863cf8e06f1SMark Fasheh 
18644670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags,
1865a796d286SJoel Becker 			     lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1);
18667431cd7eSJoel Becker 	if (ret) {
18677431cd7eSJoel Becker 		if (!trylock || (ret != -EAGAIN)) {
186824ef1815SJoel Becker 			ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
1869cf8e06f1SMark Fasheh 			ret = -EINVAL;
1870cf8e06f1SMark Fasheh 		}
1871cf8e06f1SMark Fasheh 
1872cf8e06f1SMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
1873cf8e06f1SMark Fasheh 		lockres_remove_mask_waiter(lockres, &mw);
1874cf8e06f1SMark Fasheh 		goto out;
1875cf8e06f1SMark Fasheh 	}
1876cf8e06f1SMark Fasheh 
1877cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask_interruptible(&mw, lockres);
1878cf8e06f1SMark Fasheh 	if (ret == -ERESTARTSYS) {
1879cf8e06f1SMark Fasheh 		/*
1880cf8e06f1SMark Fasheh 		 * Userspace can cause deadlock itself with
1881cf8e06f1SMark Fasheh 		 * flock(). Current behavior locally is to allow the
1882cf8e06f1SMark Fasheh 		 * deadlock, but abort the system call if a signal is
1883cf8e06f1SMark Fasheh 		 * received. We follow this example, otherwise a
1884cf8e06f1SMark Fasheh 		 * poorly written program could sit in kernel until
1885cf8e06f1SMark Fasheh 		 * reboot.
1886cf8e06f1SMark Fasheh 		 *
1887cf8e06f1SMark Fasheh 		 * Handling this is a bit more complicated for Ocfs2
1888cf8e06f1SMark Fasheh 		 * though. We can't exit this function with an
1889cf8e06f1SMark Fasheh 		 * outstanding lock request, so a cancel convert is
1890cf8e06f1SMark Fasheh 		 * required. We intentionally overwrite 'ret' - if the
1891cf8e06f1SMark Fasheh 		 * cancel fails and the lock was granted, it's easier
1892af901ca1SAndré Goddard Rosa 		 * to just bubble success back up to the user.
1893cf8e06f1SMark Fasheh 		 */
1894cf8e06f1SMark Fasheh 		ret = ocfs2_flock_handle_signal(lockres, level);
18951693a5c0SDavid Teigland 	} else if (!ret && (level > lockres->l_level)) {
18961693a5c0SDavid Teigland 		/* Trylock failed asynchronously */
18971693a5c0SDavid Teigland 		BUG_ON(!trylock);
18981693a5c0SDavid Teigland 		ret = -EAGAIN;
1899cf8e06f1SMark Fasheh 	}
1900cf8e06f1SMark Fasheh 
1901cf8e06f1SMark Fasheh out:
1902cf8e06f1SMark Fasheh 
1903cf8e06f1SMark Fasheh 	mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n",
1904cf8e06f1SMark Fasheh 	     lockres->l_name, ex, trylock, ret);
1905cf8e06f1SMark Fasheh 	return ret;
1906cf8e06f1SMark Fasheh }
1907cf8e06f1SMark Fasheh 
1908cf8e06f1SMark Fasheh void ocfs2_file_unlock(struct file *file)
1909cf8e06f1SMark Fasheh {
1910cf8e06f1SMark Fasheh 	int ret;
1911de551246SJoel Becker 	unsigned int gen;
1912cf8e06f1SMark Fasheh 	unsigned long flags;
1913cf8e06f1SMark Fasheh 	struct ocfs2_file_private *fp = file->private_data;
1914cf8e06f1SMark Fasheh 	struct ocfs2_lock_res *lockres = &fp->fp_flock;
1915cf8e06f1SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb);
1916cf8e06f1SMark Fasheh 	struct ocfs2_mask_waiter mw;
1917cf8e06f1SMark Fasheh 
1918cf8e06f1SMark Fasheh 	ocfs2_init_mask_waiter(&mw);
1919cf8e06f1SMark Fasheh 
1920cf8e06f1SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
1921cf8e06f1SMark Fasheh 		return;
1922cf8e06f1SMark Fasheh 
1923e988cf1cSMark Fasheh 	if (lockres->l_level == DLM_LOCK_NL)
1924cf8e06f1SMark Fasheh 		return;
1925cf8e06f1SMark Fasheh 
1926cf8e06f1SMark Fasheh 	mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
1927cf8e06f1SMark Fasheh 	     lockres->l_name, lockres->l_flags, lockres->l_level,
1928cf8e06f1SMark Fasheh 	     lockres->l_action);
1929cf8e06f1SMark Fasheh 
1930cf8e06f1SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
1931cf8e06f1SMark Fasheh 	/*
1932cf8e06f1SMark Fasheh 	 * Fake a blocking ast for the downconvert code.
1933cf8e06f1SMark Fasheh 	 */
1934cf8e06f1SMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
1935bd3e7610SJoel Becker 	lockres->l_blocking = DLM_LOCK_EX;
1936cf8e06f1SMark Fasheh 
1937e988cf1cSMark Fasheh 	gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
1938cf8e06f1SMark Fasheh 	lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1939cf8e06f1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
1940cf8e06f1SMark Fasheh 
1941e988cf1cSMark Fasheh 	ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
1942cf8e06f1SMark Fasheh 	if (ret) {
1943cf8e06f1SMark Fasheh 		mlog_errno(ret);
1944cf8e06f1SMark Fasheh 		return;
1945cf8e06f1SMark Fasheh 	}
1946cf8e06f1SMark Fasheh 
1947cf8e06f1SMark Fasheh 	ret = ocfs2_wait_for_mask(&mw);
1948cf8e06f1SMark Fasheh 	if (ret)
1949cf8e06f1SMark Fasheh 		mlog_errno(ret);
1950cf8e06f1SMark Fasheh }
1951cf8e06f1SMark Fasheh 
195234d024f8SMark Fasheh static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
1953ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
1954ccd979bdSMark Fasheh {
1955ccd979bdSMark Fasheh 	int kick = 0;
1956ccd979bdSMark Fasheh 
1957ccd979bdSMark Fasheh 	mlog_entry_void();
1958ccd979bdSMark Fasheh 
1959ccd979bdSMark Fasheh 	/* If we know that another node is waiting on our lock, kick
196034d024f8SMark Fasheh 	 * the downconvert thread * pre-emptively when we reach a release
1961ccd979bdSMark Fasheh 	 * condition. */
1962ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1963ccd979bdSMark Fasheh 		switch(lockres->l_blocking) {
1964bd3e7610SJoel Becker 		case DLM_LOCK_EX:
1965ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1966ccd979bdSMark Fasheh 				kick = 1;
1967ccd979bdSMark Fasheh 			break;
1968bd3e7610SJoel Becker 		case DLM_LOCK_PR:
1969ccd979bdSMark Fasheh 			if (!lockres->l_ex_holders)
1970ccd979bdSMark Fasheh 				kick = 1;
1971ccd979bdSMark Fasheh 			break;
1972ccd979bdSMark Fasheh 		default:
1973ccd979bdSMark Fasheh 			BUG();
1974ccd979bdSMark Fasheh 		}
1975ccd979bdSMark Fasheh 	}
1976ccd979bdSMark Fasheh 
1977ccd979bdSMark Fasheh 	if (kick)
197834d024f8SMark Fasheh 		ocfs2_wake_downconvert_thread(osb);
1979ccd979bdSMark Fasheh 
1980ccd979bdSMark Fasheh 	mlog_exit_void();
1981ccd979bdSMark Fasheh }
1982ccd979bdSMark Fasheh 
1983ccd979bdSMark Fasheh #define OCFS2_SEC_BITS   34
1984ccd979bdSMark Fasheh #define OCFS2_SEC_SHIFT  (64 - 34)
1985ccd979bdSMark Fasheh #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
1986ccd979bdSMark Fasheh 
1987ccd979bdSMark Fasheh /* LVB only has room for 64 bits of time here so we pack it for
1988ccd979bdSMark Fasheh  * now. */
1989ccd979bdSMark Fasheh static u64 ocfs2_pack_timespec(struct timespec *spec)
1990ccd979bdSMark Fasheh {
1991ccd979bdSMark Fasheh 	u64 res;
1992ccd979bdSMark Fasheh 	u64 sec = spec->tv_sec;
1993ccd979bdSMark Fasheh 	u32 nsec = spec->tv_nsec;
1994ccd979bdSMark Fasheh 
1995ccd979bdSMark Fasheh 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
1996ccd979bdSMark Fasheh 
1997ccd979bdSMark Fasheh 	return res;
1998ccd979bdSMark Fasheh }
1999ccd979bdSMark Fasheh 
2000ccd979bdSMark Fasheh /* Call this with the lockres locked. I am reasonably sure we don't
2001ccd979bdSMark Fasheh  * need ip_lock in this function as anyone who would be changing those
2002e63aecb6SMark Fasheh  * values is supposed to be blocked in ocfs2_inode_lock right now. */
2003ccd979bdSMark Fasheh static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2004ccd979bdSMark Fasheh {
2005ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2006e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2007ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2008ccd979bdSMark Fasheh 
2009ccd979bdSMark Fasheh 	mlog_entry_void();
2010ccd979bdSMark Fasheh 
2011a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2012ccd979bdSMark Fasheh 
201324c19ef4SMark Fasheh 	/*
201424c19ef4SMark Fasheh 	 * Invalidate the LVB of a deleted inode - this way other
201524c19ef4SMark Fasheh 	 * nodes are forced to go to disk and discover the new inode
201624c19ef4SMark Fasheh 	 * status.
201724c19ef4SMark Fasheh 	 */
201824c19ef4SMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
201924c19ef4SMark Fasheh 		lvb->lvb_version = 0;
202024c19ef4SMark Fasheh 		goto out;
202124c19ef4SMark Fasheh 	}
202224c19ef4SMark Fasheh 
20234d3b83f7SMark Fasheh 	lvb->lvb_version   = OCFS2_LVB_VERSION;
2024ccd979bdSMark Fasheh 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
2025ccd979bdSMark Fasheh 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
2026ccd979bdSMark Fasheh 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
2027ccd979bdSMark Fasheh 	lvb->lvb_igid      = cpu_to_be32(inode->i_gid);
2028ccd979bdSMark Fasheh 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
2029ccd979bdSMark Fasheh 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
2030ccd979bdSMark Fasheh 	lvb->lvb_iatime_packed  =
2031ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
2032ccd979bdSMark Fasheh 	lvb->lvb_ictime_packed =
2033ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
2034ccd979bdSMark Fasheh 	lvb->lvb_imtime_packed =
2035ccd979bdSMark Fasheh 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
2036ca4d147eSHerbert Poetzl 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
203715b1e36bSMark Fasheh 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
2038f9e2d82eSMark Fasheh 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
2039ccd979bdSMark Fasheh 
204024c19ef4SMark Fasheh out:
2041ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2042ccd979bdSMark Fasheh 
2043ccd979bdSMark Fasheh 	mlog_exit_void();
2044ccd979bdSMark Fasheh }
2045ccd979bdSMark Fasheh 
2046ccd979bdSMark Fasheh static void ocfs2_unpack_timespec(struct timespec *spec,
2047ccd979bdSMark Fasheh 				  u64 packed_time)
2048ccd979bdSMark Fasheh {
2049ccd979bdSMark Fasheh 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
2050ccd979bdSMark Fasheh 	spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
2051ccd979bdSMark Fasheh }
2052ccd979bdSMark Fasheh 
2053ccd979bdSMark Fasheh static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2054ccd979bdSMark Fasheh {
2055ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2056e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2057ccd979bdSMark Fasheh 	struct ocfs2_meta_lvb *lvb;
2058ccd979bdSMark Fasheh 
2059ccd979bdSMark Fasheh 	mlog_entry_void();
2060ccd979bdSMark Fasheh 
2061ccd979bdSMark Fasheh 	mlog_meta_lvb(0, lockres);
2062ccd979bdSMark Fasheh 
2063a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2064ccd979bdSMark Fasheh 
2065ccd979bdSMark Fasheh 	/* We're safe here without the lockres lock... */
2066ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2067ccd979bdSMark Fasheh 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
2068ccd979bdSMark Fasheh 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
2069ccd979bdSMark Fasheh 
2070ca4d147eSHerbert Poetzl 	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
207115b1e36bSMark Fasheh 	oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
2072ca4d147eSHerbert Poetzl 	ocfs2_set_inode_flags(inode);
2073ca4d147eSHerbert Poetzl 
2074ccd979bdSMark Fasheh 	/* fast-symlinks are a special case */
2075ccd979bdSMark Fasheh 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
2076ccd979bdSMark Fasheh 		inode->i_blocks = 0;
2077ccd979bdSMark Fasheh 	else
20788110b073SMark Fasheh 		inode->i_blocks = ocfs2_inode_sector_count(inode);
2079ccd979bdSMark Fasheh 
2080ccd979bdSMark Fasheh 	inode->i_uid     = be32_to_cpu(lvb->lvb_iuid);
2081ccd979bdSMark Fasheh 	inode->i_gid     = be32_to_cpu(lvb->lvb_igid);
2082ccd979bdSMark Fasheh 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
2083ccd979bdSMark Fasheh 	inode->i_nlink   = be16_to_cpu(lvb->lvb_inlink);
2084ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_atime,
2085ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_iatime_packed));
2086ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_mtime,
2087ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_imtime_packed));
2088ccd979bdSMark Fasheh 	ocfs2_unpack_timespec(&inode->i_ctime,
2089ccd979bdSMark Fasheh 			      be64_to_cpu(lvb->lvb_ictime_packed));
2090ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2091ccd979bdSMark Fasheh 
2092ccd979bdSMark Fasheh 	mlog_exit_void();
2093ccd979bdSMark Fasheh }
2094ccd979bdSMark Fasheh 
2095f9e2d82eSMark Fasheh static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
2096f9e2d82eSMark Fasheh 					      struct ocfs2_lock_res *lockres)
2097ccd979bdSMark Fasheh {
2098a641dc2aSMark Fasheh 	struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2099ccd979bdSMark Fasheh 
21001c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
21011c520dfbSJoel Becker 	    && lvb->lvb_version == OCFS2_LVB_VERSION
2102f9e2d82eSMark Fasheh 	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
2103ccd979bdSMark Fasheh 		return 1;
2104ccd979bdSMark Fasheh 	return 0;
2105ccd979bdSMark Fasheh }
2106ccd979bdSMark Fasheh 
2107ccd979bdSMark Fasheh /* Determine whether a lock resource needs to be refreshed, and
2108ccd979bdSMark Fasheh  * arbitrate who gets to refresh it.
2109ccd979bdSMark Fasheh  *
2110ccd979bdSMark Fasheh  *   0 means no refresh needed.
2111ccd979bdSMark Fasheh  *
2112ccd979bdSMark Fasheh  *   > 0 means you need to refresh this and you MUST call
2113ccd979bdSMark Fasheh  *   ocfs2_complete_lock_res_refresh afterwards. */
2114ccd979bdSMark Fasheh static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
2115ccd979bdSMark Fasheh {
2116ccd979bdSMark Fasheh 	unsigned long flags;
2117ccd979bdSMark Fasheh 	int status = 0;
2118ccd979bdSMark Fasheh 
2119ccd979bdSMark Fasheh 	mlog_entry_void();
2120ccd979bdSMark Fasheh 
2121ccd979bdSMark Fasheh refresh_check:
2122ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2123ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2124ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2125ccd979bdSMark Fasheh 		goto bail;
2126ccd979bdSMark Fasheh 	}
2127ccd979bdSMark Fasheh 
2128ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2129ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
2130ccd979bdSMark Fasheh 
2131ccd979bdSMark Fasheh 		ocfs2_wait_on_refreshing_lock(lockres);
2132ccd979bdSMark Fasheh 		goto refresh_check;
2133ccd979bdSMark Fasheh 	}
2134ccd979bdSMark Fasheh 
2135ccd979bdSMark Fasheh 	/* Ok, I'll be the one to refresh this lock. */
2136ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
2137ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2138ccd979bdSMark Fasheh 
2139ccd979bdSMark Fasheh 	status = 1;
2140ccd979bdSMark Fasheh bail:
2141ccd979bdSMark Fasheh 	mlog_exit(status);
2142ccd979bdSMark Fasheh 	return status;
2143ccd979bdSMark Fasheh }
2144ccd979bdSMark Fasheh 
2145ccd979bdSMark Fasheh /* If status is non zero, I'll mark it as not being in refresh
2146ccd979bdSMark Fasheh  * anymroe, but i won't clear the needs refresh flag. */
2147ccd979bdSMark Fasheh static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
2148ccd979bdSMark Fasheh 						   int status)
2149ccd979bdSMark Fasheh {
2150ccd979bdSMark Fasheh 	unsigned long flags;
2151ccd979bdSMark Fasheh 	mlog_entry_void();
2152ccd979bdSMark Fasheh 
2153ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
2154ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
2155ccd979bdSMark Fasheh 	if (!status)
2156ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
2157ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
2158ccd979bdSMark Fasheh 
2159ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
2160ccd979bdSMark Fasheh 
2161ccd979bdSMark Fasheh 	mlog_exit_void();
2162ccd979bdSMark Fasheh }
2163ccd979bdSMark Fasheh 
2164ccd979bdSMark Fasheh /* may or may not return a bh if it went to disk. */
2165e63aecb6SMark Fasheh static int ocfs2_inode_lock_update(struct inode *inode,
2166ccd979bdSMark Fasheh 				  struct buffer_head **bh)
2167ccd979bdSMark Fasheh {
2168ccd979bdSMark Fasheh 	int status = 0;
2169ccd979bdSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2170e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
2171ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe;
2172c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2173ccd979bdSMark Fasheh 
2174ccd979bdSMark Fasheh 	mlog_entry_void();
2175ccd979bdSMark Fasheh 
2176be9e986bSMark Fasheh 	if (ocfs2_mount_local(osb))
2177be9e986bSMark Fasheh 		goto bail;
2178be9e986bSMark Fasheh 
2179ccd979bdSMark Fasheh 	spin_lock(&oi->ip_lock);
2180ccd979bdSMark Fasheh 	if (oi->ip_flags & OCFS2_INODE_DELETED) {
2181b0697053SMark Fasheh 		mlog(0, "Orphaned inode %llu was deleted while we "
2182ccd979bdSMark Fasheh 		     "were waiting on a lock. ip_flags = 0x%x\n",
2183b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno, oi->ip_flags);
2184ccd979bdSMark Fasheh 		spin_unlock(&oi->ip_lock);
2185ccd979bdSMark Fasheh 		status = -ENOENT;
2186ccd979bdSMark Fasheh 		goto bail;
2187ccd979bdSMark Fasheh 	}
2188ccd979bdSMark Fasheh 	spin_unlock(&oi->ip_lock);
2189ccd979bdSMark Fasheh 
2190ccd979bdSMark Fasheh 	if (!ocfs2_should_refresh_lock_res(lockres))
2191ccd979bdSMark Fasheh 		goto bail;
2192ccd979bdSMark Fasheh 
2193ccd979bdSMark Fasheh 	/* This will discard any caching information we might have had
2194ccd979bdSMark Fasheh 	 * for the inode metadata. */
21958cb471e8SJoel Becker 	ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2196ccd979bdSMark Fasheh 
219783418978SMark Fasheh 	ocfs2_extent_map_trunc(inode, 0);
219883418978SMark Fasheh 
2199be9e986bSMark Fasheh 	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
2200b0697053SMark Fasheh 		mlog(0, "Trusting LVB on inode %llu\n",
2201b0697053SMark Fasheh 		     (unsigned long long)oi->ip_blkno);
2202ccd979bdSMark Fasheh 		ocfs2_refresh_inode_from_lvb(inode);
2203ccd979bdSMark Fasheh 	} else {
2204ccd979bdSMark Fasheh 		/* Boo, we have to go to disk. */
2205ccd979bdSMark Fasheh 		/* read bh, cast, ocfs2_refresh_inode */
2206b657c95cSJoel Becker 		status = ocfs2_read_inode_block(inode, bh);
2207ccd979bdSMark Fasheh 		if (status < 0) {
2208ccd979bdSMark Fasheh 			mlog_errno(status);
2209ccd979bdSMark Fasheh 			goto bail_refresh;
2210ccd979bdSMark Fasheh 		}
2211ccd979bdSMark Fasheh 		fe = (struct ocfs2_dinode *) (*bh)->b_data;
2212ccd979bdSMark Fasheh 
2213ccd979bdSMark Fasheh 		/* This is a good chance to make sure we're not
2214b657c95cSJoel Becker 		 * locking an invalid object.  ocfs2_read_inode_block()
2215b657c95cSJoel Becker 		 * already checked that the inode block is sane.
2216ccd979bdSMark Fasheh 		 *
2217ccd979bdSMark Fasheh 		 * We bug on a stale inode here because we checked
2218ccd979bdSMark Fasheh 		 * above whether it was wiped from disk. The wiping
2219ccd979bdSMark Fasheh 		 * node provides a guarantee that we receive that
2220ccd979bdSMark Fasheh 		 * message and can mark the inode before dropping any
2221ccd979bdSMark Fasheh 		 * locks associated with it. */
2222ccd979bdSMark Fasheh 		mlog_bug_on_msg(inode->i_generation !=
2223ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_generation),
2224b0697053SMark Fasheh 				"Invalid dinode %llu disk generation: %u "
2225ccd979bdSMark Fasheh 				"inode->i_generation: %u\n",
2226b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2227b0697053SMark Fasheh 				le32_to_cpu(fe->i_generation),
2228ccd979bdSMark Fasheh 				inode->i_generation);
2229ccd979bdSMark Fasheh 		mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
2230ccd979bdSMark Fasheh 				!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
2231b0697053SMark Fasheh 				"Stale dinode %llu dtime: %llu flags: 0x%x\n",
2232b0697053SMark Fasheh 				(unsigned long long)oi->ip_blkno,
2233b0697053SMark Fasheh 				(unsigned long long)le64_to_cpu(fe->i_dtime),
2234ccd979bdSMark Fasheh 				le32_to_cpu(fe->i_flags));
2235ccd979bdSMark Fasheh 
2236ccd979bdSMark Fasheh 		ocfs2_refresh_inode(inode, fe);
22378ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2238ccd979bdSMark Fasheh 	}
2239ccd979bdSMark Fasheh 
2240ccd979bdSMark Fasheh 	status = 0;
2241ccd979bdSMark Fasheh bail_refresh:
2242ccd979bdSMark Fasheh 	ocfs2_complete_lock_res_refresh(lockres, status);
2243ccd979bdSMark Fasheh bail:
2244ccd979bdSMark Fasheh 	mlog_exit(status);
2245ccd979bdSMark Fasheh 	return status;
2246ccd979bdSMark Fasheh }
2247ccd979bdSMark Fasheh 
2248ccd979bdSMark Fasheh static int ocfs2_assign_bh(struct inode *inode,
2249ccd979bdSMark Fasheh 			   struct buffer_head **ret_bh,
2250ccd979bdSMark Fasheh 			   struct buffer_head *passed_bh)
2251ccd979bdSMark Fasheh {
2252ccd979bdSMark Fasheh 	int status;
2253ccd979bdSMark Fasheh 
2254ccd979bdSMark Fasheh 	if (passed_bh) {
2255ccd979bdSMark Fasheh 		/* Ok, the update went to disk for us, use the
2256ccd979bdSMark Fasheh 		 * returned bh. */
2257ccd979bdSMark Fasheh 		*ret_bh = passed_bh;
2258ccd979bdSMark Fasheh 		get_bh(*ret_bh);
2259ccd979bdSMark Fasheh 
2260ccd979bdSMark Fasheh 		return 0;
2261ccd979bdSMark Fasheh 	}
2262ccd979bdSMark Fasheh 
2263b657c95cSJoel Becker 	status = ocfs2_read_inode_block(inode, ret_bh);
2264ccd979bdSMark Fasheh 	if (status < 0)
2265ccd979bdSMark Fasheh 		mlog_errno(status);
2266ccd979bdSMark Fasheh 
2267ccd979bdSMark Fasheh 	return status;
2268ccd979bdSMark Fasheh }
2269ccd979bdSMark Fasheh 
2270ccd979bdSMark Fasheh /*
2271ccd979bdSMark Fasheh  * returns < 0 error if the callback will never be called, otherwise
2272ccd979bdSMark Fasheh  * the result of the lock will be communicated via the callback.
2273ccd979bdSMark Fasheh  */
2274cb25797dSJan Kara int ocfs2_inode_lock_full_nested(struct inode *inode,
2275ccd979bdSMark Fasheh 				 struct buffer_head **ret_bh,
2276ccd979bdSMark Fasheh 				 int ex,
2277cb25797dSJan Kara 				 int arg_flags,
2278cb25797dSJan Kara 				 int subclass)
2279ccd979bdSMark Fasheh {
2280bd3e7610SJoel Becker 	int status, level, acquired;
2281bd3e7610SJoel Becker 	u32 dlm_flags;
2282c271c5c2SSunil Mushran 	struct ocfs2_lock_res *lockres = NULL;
2283ccd979bdSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2284ccd979bdSMark Fasheh 	struct buffer_head *local_bh = NULL;
2285ccd979bdSMark Fasheh 
2286ccd979bdSMark Fasheh 	BUG_ON(!inode);
2287ccd979bdSMark Fasheh 
2288ccd979bdSMark Fasheh 	mlog_entry_void();
2289ccd979bdSMark Fasheh 
2290b0697053SMark Fasheh 	mlog(0, "inode %llu, take %s META lock\n",
2291b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2292ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2293ccd979bdSMark Fasheh 
2294ccd979bdSMark Fasheh 	status = 0;
2295ccd979bdSMark Fasheh 	acquired = 0;
2296ccd979bdSMark Fasheh 	/* We'll allow faking a readonly metadata lock for
2297ccd979bdSMark Fasheh 	 * rodevices. */
2298ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb)) {
2299ccd979bdSMark Fasheh 		if (ex)
2300ccd979bdSMark Fasheh 			status = -EROFS;
2301ccd979bdSMark Fasheh 		goto bail;
2302ccd979bdSMark Fasheh 	}
2303ccd979bdSMark Fasheh 
2304c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2305c271c5c2SSunil Mushran 		goto local;
2306c271c5c2SSunil Mushran 
2307ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2308553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2309ccd979bdSMark Fasheh 
2310e63aecb6SMark Fasheh 	lockres = &OCFS2_I(inode)->ip_inode_lockres;
2311bd3e7610SJoel Becker 	level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2312ccd979bdSMark Fasheh 	dlm_flags = 0;
2313ccd979bdSMark Fasheh 	if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2314bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_NOQUEUE;
2315ccd979bdSMark Fasheh 
2316cb25797dSJan Kara 	status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2317cb25797dSJan Kara 				      arg_flags, subclass, _RET_IP_);
2318ccd979bdSMark Fasheh 	if (status < 0) {
2319ccd979bdSMark Fasheh 		if (status != -EAGAIN && status != -EIOCBRETRY)
2320ccd979bdSMark Fasheh 			mlog_errno(status);
2321ccd979bdSMark Fasheh 		goto bail;
2322ccd979bdSMark Fasheh 	}
2323ccd979bdSMark Fasheh 
2324ccd979bdSMark Fasheh 	/* Notify the error cleanup path to drop the cluster lock. */
2325ccd979bdSMark Fasheh 	acquired = 1;
2326ccd979bdSMark Fasheh 
2327ccd979bdSMark Fasheh 	/* We wait twice because a node may have died while we were in
2328ccd979bdSMark Fasheh 	 * the lower dlm layers. The second time though, we've
2329ccd979bdSMark Fasheh 	 * committed to owning this lock so we don't allow signals to
2330ccd979bdSMark Fasheh 	 * abort the operation. */
2331ccd979bdSMark Fasheh 	if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
2332553abd04SJoel Becker 		ocfs2_wait_for_recovery(osb);
2333ccd979bdSMark Fasheh 
2334c271c5c2SSunil Mushran local:
233524c19ef4SMark Fasheh 	/*
233624c19ef4SMark Fasheh 	 * We only see this flag if we're being called from
233724c19ef4SMark Fasheh 	 * ocfs2_read_locked_inode(). It means we're locking an inode
233824c19ef4SMark Fasheh 	 * which hasn't been populated yet, so clear the refresh flag
233924c19ef4SMark Fasheh 	 * and let the caller handle it.
234024c19ef4SMark Fasheh 	 */
234124c19ef4SMark Fasheh 	if (inode->i_state & I_NEW) {
234224c19ef4SMark Fasheh 		status = 0;
2343c271c5c2SSunil Mushran 		if (lockres)
234424c19ef4SMark Fasheh 			ocfs2_complete_lock_res_refresh(lockres, 0);
234524c19ef4SMark Fasheh 		goto bail;
234624c19ef4SMark Fasheh 	}
234724c19ef4SMark Fasheh 
2348ccd979bdSMark Fasheh 	/* This is fun. The caller may want a bh back, or it may
2349e63aecb6SMark Fasheh 	 * not. ocfs2_inode_lock_update definitely wants one in, but
2350ccd979bdSMark Fasheh 	 * may or may not read one, depending on what's in the
2351ccd979bdSMark Fasheh 	 * LVB. The result of all of this is that we've *only* gone to
2352ccd979bdSMark Fasheh 	 * disk if we have to, so the complexity is worthwhile. */
2353e63aecb6SMark Fasheh 	status = ocfs2_inode_lock_update(inode, &local_bh);
2354ccd979bdSMark Fasheh 	if (status < 0) {
2355ccd979bdSMark Fasheh 		if (status != -ENOENT)
2356ccd979bdSMark Fasheh 			mlog_errno(status);
2357ccd979bdSMark Fasheh 		goto bail;
2358ccd979bdSMark Fasheh 	}
2359ccd979bdSMark Fasheh 
2360ccd979bdSMark Fasheh 	if (ret_bh) {
2361ccd979bdSMark Fasheh 		status = ocfs2_assign_bh(inode, ret_bh, local_bh);
2362ccd979bdSMark Fasheh 		if (status < 0) {
2363ccd979bdSMark Fasheh 			mlog_errno(status);
2364ccd979bdSMark Fasheh 			goto bail;
2365ccd979bdSMark Fasheh 		}
2366ccd979bdSMark Fasheh 	}
2367ccd979bdSMark Fasheh 
2368ccd979bdSMark Fasheh bail:
2369ccd979bdSMark Fasheh 	if (status < 0) {
2370ccd979bdSMark Fasheh 		if (ret_bh && (*ret_bh)) {
2371ccd979bdSMark Fasheh 			brelse(*ret_bh);
2372ccd979bdSMark Fasheh 			*ret_bh = NULL;
2373ccd979bdSMark Fasheh 		}
2374ccd979bdSMark Fasheh 		if (acquired)
2375e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2376ccd979bdSMark Fasheh 	}
2377ccd979bdSMark Fasheh 
2378ccd979bdSMark Fasheh 	if (local_bh)
2379ccd979bdSMark Fasheh 		brelse(local_bh);
2380ccd979bdSMark Fasheh 
2381ccd979bdSMark Fasheh 	mlog_exit(status);
2382ccd979bdSMark Fasheh 	return status;
2383ccd979bdSMark Fasheh }
2384ccd979bdSMark Fasheh 
2385ccd979bdSMark Fasheh /*
238634d024f8SMark Fasheh  * This is working around a lock inversion between tasks acquiring DLM
238734d024f8SMark Fasheh  * locks while holding a page lock and the downconvert thread which
238834d024f8SMark Fasheh  * blocks dlm lock acquiry while acquiring page locks.
2389ccd979bdSMark Fasheh  *
2390ccd979bdSMark Fasheh  * ** These _with_page variantes are only intended to be called from aop
2391ccd979bdSMark Fasheh  * methods that hold page locks and return a very specific *positive* error
2392ccd979bdSMark Fasheh  * code that aop methods pass up to the VFS -- test for errors with != 0. **
2393ccd979bdSMark Fasheh  *
239434d024f8SMark Fasheh  * The DLM is called such that it returns -EAGAIN if it would have
239534d024f8SMark Fasheh  * blocked waiting for the downconvert thread.  In that case we unlock
239634d024f8SMark Fasheh  * our page so the downconvert thread can make progress.  Once we've
239734d024f8SMark Fasheh  * done this we have to return AOP_TRUNCATED_PAGE so the aop method
239834d024f8SMark Fasheh  * that called us can bubble that back up into the VFS who will then
239934d024f8SMark Fasheh  * immediately retry the aop call.
2400ccd979bdSMark Fasheh  *
2401ccd979bdSMark Fasheh  * We do a blocking lock and immediate unlock before returning, though, so that
2402ccd979bdSMark Fasheh  * the lock has a great chance of being cached on this node by the time the VFS
2403ccd979bdSMark Fasheh  * calls back to retry the aop.    This has a potential to livelock as nodes
2404ccd979bdSMark Fasheh  * ping locks back and forth, but that's a risk we're willing to take to avoid
2405ccd979bdSMark Fasheh  * the lock inversion simply.
2406ccd979bdSMark Fasheh  */
2407e63aecb6SMark Fasheh int ocfs2_inode_lock_with_page(struct inode *inode,
2408ccd979bdSMark Fasheh 			      struct buffer_head **ret_bh,
2409ccd979bdSMark Fasheh 			      int ex,
2410ccd979bdSMark Fasheh 			      struct page *page)
2411ccd979bdSMark Fasheh {
2412ccd979bdSMark Fasheh 	int ret;
2413ccd979bdSMark Fasheh 
2414e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
2415ccd979bdSMark Fasheh 	if (ret == -EAGAIN) {
2416ccd979bdSMark Fasheh 		unlock_page(page);
2417e63aecb6SMark Fasheh 		if (ocfs2_inode_lock(inode, ret_bh, ex) == 0)
2418e63aecb6SMark Fasheh 			ocfs2_inode_unlock(inode, ex);
2419ccd979bdSMark Fasheh 		ret = AOP_TRUNCATED_PAGE;
2420ccd979bdSMark Fasheh 	}
2421ccd979bdSMark Fasheh 
2422ccd979bdSMark Fasheh 	return ret;
2423ccd979bdSMark Fasheh }
2424ccd979bdSMark Fasheh 
2425e63aecb6SMark Fasheh int ocfs2_inode_lock_atime(struct inode *inode,
24267f1a37e3STiger Yang 			  struct vfsmount *vfsmnt,
24277f1a37e3STiger Yang 			  int *level)
24287f1a37e3STiger Yang {
24297f1a37e3STiger Yang 	int ret;
24307f1a37e3STiger Yang 
24317f1a37e3STiger Yang 	mlog_entry_void();
2432e63aecb6SMark Fasheh 	ret = ocfs2_inode_lock(inode, NULL, 0);
24337f1a37e3STiger Yang 	if (ret < 0) {
24347f1a37e3STiger Yang 		mlog_errno(ret);
24357f1a37e3STiger Yang 		return ret;
24367f1a37e3STiger Yang 	}
24377f1a37e3STiger Yang 
24387f1a37e3STiger Yang 	/*
24397f1a37e3STiger Yang 	 * If we should update atime, we will get EX lock,
24407f1a37e3STiger Yang 	 * otherwise we just get PR lock.
24417f1a37e3STiger Yang 	 */
24427f1a37e3STiger Yang 	if (ocfs2_should_update_atime(inode, vfsmnt)) {
24437f1a37e3STiger Yang 		struct buffer_head *bh = NULL;
24447f1a37e3STiger Yang 
2445e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 0);
2446e63aecb6SMark Fasheh 		ret = ocfs2_inode_lock(inode, &bh, 1);
24477f1a37e3STiger Yang 		if (ret < 0) {
24487f1a37e3STiger Yang 			mlog_errno(ret);
24497f1a37e3STiger Yang 			return ret;
24507f1a37e3STiger Yang 		}
24517f1a37e3STiger Yang 		*level = 1;
24527f1a37e3STiger Yang 		if (ocfs2_should_update_atime(inode, vfsmnt))
24537f1a37e3STiger Yang 			ocfs2_update_inode_atime(inode, bh);
24547f1a37e3STiger Yang 		if (bh)
24557f1a37e3STiger Yang 			brelse(bh);
24567f1a37e3STiger Yang 	} else
24577f1a37e3STiger Yang 		*level = 0;
24587f1a37e3STiger Yang 
24597f1a37e3STiger Yang 	mlog_exit(ret);
24607f1a37e3STiger Yang 	return ret;
24617f1a37e3STiger Yang }
24627f1a37e3STiger Yang 
2463e63aecb6SMark Fasheh void ocfs2_inode_unlock(struct inode *inode,
2464ccd979bdSMark Fasheh 		       int ex)
2465ccd979bdSMark Fasheh {
2466bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2467e63aecb6SMark Fasheh 	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres;
2468c271c5c2SSunil Mushran 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2469ccd979bdSMark Fasheh 
2470ccd979bdSMark Fasheh 	mlog_entry_void();
2471ccd979bdSMark Fasheh 
2472b0697053SMark Fasheh 	mlog(0, "inode %llu drop %s META lock\n",
2473b0697053SMark Fasheh 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
2474ccd979bdSMark Fasheh 	     ex ? "EXMODE" : "PRMODE");
2475ccd979bdSMark Fasheh 
2476c271c5c2SSunil Mushran 	if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
2477c271c5c2SSunil Mushran 	    !ocfs2_mount_local(osb))
2478ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
2479ccd979bdSMark Fasheh 
2480ccd979bdSMark Fasheh 	mlog_exit_void();
2481ccd979bdSMark Fasheh }
2482ccd979bdSMark Fasheh 
2483df152c24SSunil Mushran int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
248483273932SSrinivas Eeda {
248583273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
248683273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
248783273932SSrinivas Eeda 	int status = 0;
248883273932SSrinivas Eeda 
2489df152c24SSunil Mushran 	if (ocfs2_is_hard_readonly(osb))
2490df152c24SSunil Mushran 		return -EROFS;
2491df152c24SSunil Mushran 
2492df152c24SSunil Mushran 	if (ocfs2_mount_local(osb))
2493df152c24SSunil Mushran 		return 0;
2494df152c24SSunil Mushran 
249583273932SSrinivas Eeda 	lockres = &osb->osb_orphan_scan.os_lockres;
2496df152c24SSunil Mushran 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
249783273932SSrinivas Eeda 	if (status < 0)
249883273932SSrinivas Eeda 		return status;
249983273932SSrinivas Eeda 
250083273932SSrinivas Eeda 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
25011c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
25021c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
250383273932SSrinivas Eeda 		*seqno = be32_to_cpu(lvb->lvb_os_seqno);
25043211949fSSunil Mushran 	else
25053211949fSSunil Mushran 		*seqno = osb->osb_orphan_scan.os_seqno + 1;
25063211949fSSunil Mushran 
250783273932SSrinivas Eeda 	return status;
250883273932SSrinivas Eeda }
250983273932SSrinivas Eeda 
2510df152c24SSunil Mushran void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
251183273932SSrinivas Eeda {
251283273932SSrinivas Eeda 	struct ocfs2_lock_res *lockres;
251383273932SSrinivas Eeda 	struct ocfs2_orphan_scan_lvb *lvb;
251483273932SSrinivas Eeda 
2515df152c24SSunil Mushran 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
251683273932SSrinivas Eeda 		lockres = &osb->osb_orphan_scan.os_lockres;
251783273932SSrinivas Eeda 		lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
251883273932SSrinivas Eeda 		lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
251983273932SSrinivas Eeda 		lvb->lvb_os_seqno = cpu_to_be32(seqno);
2520df152c24SSunil Mushran 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2521df152c24SSunil Mushran 	}
252283273932SSrinivas Eeda }
252383273932SSrinivas Eeda 
2524ccd979bdSMark Fasheh int ocfs2_super_lock(struct ocfs2_super *osb,
2525ccd979bdSMark Fasheh 		     int ex)
2526ccd979bdSMark Fasheh {
2527c271c5c2SSunil Mushran 	int status = 0;
2528bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2529ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2530ccd979bdSMark Fasheh 
2531ccd979bdSMark Fasheh 	mlog_entry_void();
2532ccd979bdSMark Fasheh 
2533ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2534ccd979bdSMark Fasheh 		return -EROFS;
2535ccd979bdSMark Fasheh 
2536c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2537c271c5c2SSunil Mushran 		goto bail;
2538c271c5c2SSunil Mushran 
2539ccd979bdSMark Fasheh 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
2540ccd979bdSMark Fasheh 	if (status < 0) {
2541ccd979bdSMark Fasheh 		mlog_errno(status);
2542ccd979bdSMark Fasheh 		goto bail;
2543ccd979bdSMark Fasheh 	}
2544ccd979bdSMark Fasheh 
2545ccd979bdSMark Fasheh 	/* The super block lock path is really in the best position to
2546ccd979bdSMark Fasheh 	 * know when resources covered by the lock need to be
2547ccd979bdSMark Fasheh 	 * refreshed, so we do it here. Of course, making sense of
2548ccd979bdSMark Fasheh 	 * everything is up to the caller :) */
2549ccd979bdSMark Fasheh 	status = ocfs2_should_refresh_lock_res(lockres);
2550ccd979bdSMark Fasheh 	if (status < 0) {
2551ccd979bdSMark Fasheh 		mlog_errno(status);
2552ccd979bdSMark Fasheh 		goto bail;
2553ccd979bdSMark Fasheh 	}
2554ccd979bdSMark Fasheh 	if (status) {
25558e8a4603SMark Fasheh 		status = ocfs2_refresh_slot_info(osb);
2556ccd979bdSMark Fasheh 
2557ccd979bdSMark Fasheh 		ocfs2_complete_lock_res_refresh(lockres, status);
2558ccd979bdSMark Fasheh 
2559ccd979bdSMark Fasheh 		if (status < 0)
2560ccd979bdSMark Fasheh 			mlog_errno(status);
25618ddb7b00SSunil Mushran 		ocfs2_track_lock_refresh(lockres);
2562ccd979bdSMark Fasheh 	}
2563ccd979bdSMark Fasheh bail:
2564ccd979bdSMark Fasheh 	mlog_exit(status);
2565ccd979bdSMark Fasheh 	return status;
2566ccd979bdSMark Fasheh }
2567ccd979bdSMark Fasheh 
2568ccd979bdSMark Fasheh void ocfs2_super_unlock(struct ocfs2_super *osb,
2569ccd979bdSMark Fasheh 			int ex)
2570ccd979bdSMark Fasheh {
2571bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2572ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
2573ccd979bdSMark Fasheh 
2574c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2575ccd979bdSMark Fasheh 		ocfs2_cluster_unlock(osb, lockres, level);
2576ccd979bdSMark Fasheh }
2577ccd979bdSMark Fasheh 
2578ccd979bdSMark Fasheh int ocfs2_rename_lock(struct ocfs2_super *osb)
2579ccd979bdSMark Fasheh {
2580ccd979bdSMark Fasheh 	int status;
2581ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2582ccd979bdSMark Fasheh 
2583ccd979bdSMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2584ccd979bdSMark Fasheh 		return -EROFS;
2585ccd979bdSMark Fasheh 
2586c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2587c271c5c2SSunil Mushran 		return 0;
2588c271c5c2SSunil Mushran 
2589bd3e7610SJoel Becker 	status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2590ccd979bdSMark Fasheh 	if (status < 0)
2591ccd979bdSMark Fasheh 		mlog_errno(status);
2592ccd979bdSMark Fasheh 
2593ccd979bdSMark Fasheh 	return status;
2594ccd979bdSMark Fasheh }
2595ccd979bdSMark Fasheh 
2596ccd979bdSMark Fasheh void ocfs2_rename_unlock(struct ocfs2_super *osb)
2597ccd979bdSMark Fasheh {
2598ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
2599ccd979bdSMark Fasheh 
2600c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2601bd3e7610SJoel Becker 		ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2602ccd979bdSMark Fasheh }
2603ccd979bdSMark Fasheh 
26046ca497a8Swengang wang int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex)
26056ca497a8Swengang wang {
26066ca497a8Swengang wang 	int status;
26076ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26086ca497a8Swengang wang 
26096ca497a8Swengang wang 	if (ocfs2_is_hard_readonly(osb))
26106ca497a8Swengang wang 		return -EROFS;
26116ca497a8Swengang wang 
26126ca497a8Swengang wang 	if (ocfs2_mount_local(osb))
26136ca497a8Swengang wang 		return 0;
26146ca497a8Swengang wang 
26156ca497a8Swengang wang 	status = ocfs2_cluster_lock(osb, lockres, ex ? LKM_EXMODE : LKM_PRMODE,
26166ca497a8Swengang wang 				    0, 0);
26176ca497a8Swengang wang 	if (status < 0)
26186ca497a8Swengang wang 		mlog(ML_ERROR, "lock on nfs sync lock failed %d\n", status);
26196ca497a8Swengang wang 
26206ca497a8Swengang wang 	return status;
26216ca497a8Swengang wang }
26226ca497a8Swengang wang 
26236ca497a8Swengang wang void ocfs2_nfs_sync_unlock(struct ocfs2_super *osb, int ex)
26246ca497a8Swengang wang {
26256ca497a8Swengang wang 	struct ocfs2_lock_res *lockres = &osb->osb_nfs_sync_lockres;
26266ca497a8Swengang wang 
26276ca497a8Swengang wang 	if (!ocfs2_mount_local(osb))
26286ca497a8Swengang wang 		ocfs2_cluster_unlock(osb, lockres,
26296ca497a8Swengang wang 				     ex ? LKM_EXMODE : LKM_PRMODE);
26306ca497a8Swengang wang }
26316ca497a8Swengang wang 
2632d680efe9SMark Fasheh int ocfs2_dentry_lock(struct dentry *dentry, int ex)
2633d680efe9SMark Fasheh {
2634d680efe9SMark Fasheh 	int ret;
2635bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2636d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2637d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2638d680efe9SMark Fasheh 
2639d680efe9SMark Fasheh 	BUG_ON(!dl);
2640d680efe9SMark Fasheh 
2641d680efe9SMark Fasheh 	if (ocfs2_is_hard_readonly(osb))
2642d680efe9SMark Fasheh 		return -EROFS;
2643d680efe9SMark Fasheh 
2644c271c5c2SSunil Mushran 	if (ocfs2_mount_local(osb))
2645c271c5c2SSunil Mushran 		return 0;
2646c271c5c2SSunil Mushran 
2647d680efe9SMark Fasheh 	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
2648d680efe9SMark Fasheh 	if (ret < 0)
2649d680efe9SMark Fasheh 		mlog_errno(ret);
2650d680efe9SMark Fasheh 
2651d680efe9SMark Fasheh 	return ret;
2652d680efe9SMark Fasheh }
2653d680efe9SMark Fasheh 
2654d680efe9SMark Fasheh void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
2655d680efe9SMark Fasheh {
2656bd3e7610SJoel Becker 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2657d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
2658d680efe9SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
2659d680efe9SMark Fasheh 
2660c271c5c2SSunil Mushran 	if (!ocfs2_mount_local(osb))
2661d680efe9SMark Fasheh 		ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
2662d680efe9SMark Fasheh }
2663d680efe9SMark Fasheh 
2664ccd979bdSMark Fasheh /* Reference counting of the dlm debug structure. We want this because
2665ccd979bdSMark Fasheh  * open references on the debug inodes can live on after a mount, so
2666ccd979bdSMark Fasheh  * we can't rely on the ocfs2_super to always exist. */
2667ccd979bdSMark Fasheh static void ocfs2_dlm_debug_free(struct kref *kref)
2668ccd979bdSMark Fasheh {
2669ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2670ccd979bdSMark Fasheh 
2671ccd979bdSMark Fasheh 	dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
2672ccd979bdSMark Fasheh 
2673ccd979bdSMark Fasheh 	kfree(dlm_debug);
2674ccd979bdSMark Fasheh }
2675ccd979bdSMark Fasheh 
2676ccd979bdSMark Fasheh void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
2677ccd979bdSMark Fasheh {
2678ccd979bdSMark Fasheh 	if (dlm_debug)
2679ccd979bdSMark Fasheh 		kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
2680ccd979bdSMark Fasheh }
2681ccd979bdSMark Fasheh 
2682ccd979bdSMark Fasheh static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
2683ccd979bdSMark Fasheh {
2684ccd979bdSMark Fasheh 	kref_get(&debug->d_refcnt);
2685ccd979bdSMark Fasheh }
2686ccd979bdSMark Fasheh 
2687ccd979bdSMark Fasheh struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
2688ccd979bdSMark Fasheh {
2689ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug;
2690ccd979bdSMark Fasheh 
2691ccd979bdSMark Fasheh 	dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
2692ccd979bdSMark Fasheh 	if (!dlm_debug) {
2693ccd979bdSMark Fasheh 		mlog_errno(-ENOMEM);
2694ccd979bdSMark Fasheh 		goto out;
2695ccd979bdSMark Fasheh 	}
2696ccd979bdSMark Fasheh 
2697ccd979bdSMark Fasheh 	kref_init(&dlm_debug->d_refcnt);
2698ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
2699ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = NULL;
2700ccd979bdSMark Fasheh out:
2701ccd979bdSMark Fasheh 	return dlm_debug;
2702ccd979bdSMark Fasheh }
2703ccd979bdSMark Fasheh 
2704ccd979bdSMark Fasheh /* Access to this is arbitrated for us via seq_file->sem. */
2705ccd979bdSMark Fasheh struct ocfs2_dlm_seq_priv {
2706ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *p_dlm_debug;
2707ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_iter_res;
2708ccd979bdSMark Fasheh 	struct ocfs2_lock_res p_tmp_res;
2709ccd979bdSMark Fasheh };
2710ccd979bdSMark Fasheh 
2711ccd979bdSMark Fasheh static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
2712ccd979bdSMark Fasheh 						 struct ocfs2_dlm_seq_priv *priv)
2713ccd979bdSMark Fasheh {
2714ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter, *ret = NULL;
2715ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
2716ccd979bdSMark Fasheh 
2717ccd979bdSMark Fasheh 	assert_spin_locked(&ocfs2_dlm_tracking_lock);
2718ccd979bdSMark Fasheh 
2719ccd979bdSMark Fasheh 	list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
2720ccd979bdSMark Fasheh 		/* discover the head of the list */
2721ccd979bdSMark Fasheh 		if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
2722ccd979bdSMark Fasheh 			mlog(0, "End of list found, %p\n", ret);
2723ccd979bdSMark Fasheh 			break;
2724ccd979bdSMark Fasheh 		}
2725ccd979bdSMark Fasheh 
2726ccd979bdSMark Fasheh 		/* We track our "dummy" iteration lockres' by a NULL
2727ccd979bdSMark Fasheh 		 * l_ops field. */
2728ccd979bdSMark Fasheh 		if (iter->l_ops != NULL) {
2729ccd979bdSMark Fasheh 			ret = iter;
2730ccd979bdSMark Fasheh 			break;
2731ccd979bdSMark Fasheh 		}
2732ccd979bdSMark Fasheh 	}
2733ccd979bdSMark Fasheh 
2734ccd979bdSMark Fasheh 	return ret;
2735ccd979bdSMark Fasheh }
2736ccd979bdSMark Fasheh 
2737ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2738ccd979bdSMark Fasheh {
2739ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2740ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter;
2741ccd979bdSMark Fasheh 
2742ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2743ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2744ccd979bdSMark Fasheh 	if (iter) {
2745ccd979bdSMark Fasheh 		/* Since lockres' have the lifetime of their container
2746ccd979bdSMark Fasheh 		 * (which can be inodes, ocfs2_supers, etc) we want to
2747ccd979bdSMark Fasheh 		 * copy this out to a temporary lockres while still
2748ccd979bdSMark Fasheh 		 * under the spinlock. Obviously after this we can't
2749ccd979bdSMark Fasheh 		 * trust any pointers on the copy returned, but that's
2750ccd979bdSMark Fasheh 		 * ok as the information we want isn't typically held
2751ccd979bdSMark Fasheh 		 * in them. */
2752ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2753ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2754ccd979bdSMark Fasheh 	}
2755ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2756ccd979bdSMark Fasheh 
2757ccd979bdSMark Fasheh 	return iter;
2758ccd979bdSMark Fasheh }
2759ccd979bdSMark Fasheh 
2760ccd979bdSMark Fasheh static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2761ccd979bdSMark Fasheh {
2762ccd979bdSMark Fasheh }
2763ccd979bdSMark Fasheh 
2764ccd979bdSMark Fasheh static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2765ccd979bdSMark Fasheh {
2766ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = m->private;
2767ccd979bdSMark Fasheh 	struct ocfs2_lock_res *iter = v;
2768ccd979bdSMark Fasheh 	struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2769ccd979bdSMark Fasheh 
2770ccd979bdSMark Fasheh 	spin_lock(&ocfs2_dlm_tracking_lock);
2771ccd979bdSMark Fasheh 	iter = ocfs2_dlm_next_res(iter, priv);
2772ccd979bdSMark Fasheh 	list_del_init(&dummy->l_debug_list);
2773ccd979bdSMark Fasheh 	if (iter) {
2774ccd979bdSMark Fasheh 		list_add(&dummy->l_debug_list, &iter->l_debug_list);
2775ccd979bdSMark Fasheh 		priv->p_tmp_res = *iter;
2776ccd979bdSMark Fasheh 		iter = &priv->p_tmp_res;
2777ccd979bdSMark Fasheh 	}
2778ccd979bdSMark Fasheh 	spin_unlock(&ocfs2_dlm_tracking_lock);
2779ccd979bdSMark Fasheh 
2780ccd979bdSMark Fasheh 	return iter;
2781ccd979bdSMark Fasheh }
2782ccd979bdSMark Fasheh 
2783ccd979bdSMark Fasheh /* So that debugfs.ocfs2 can determine which format is being used */
27848ddb7b00SSunil Mushran #define OCFS2_DLM_DEBUG_STR_VERSION 2
2785ccd979bdSMark Fasheh static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2786ccd979bdSMark Fasheh {
2787ccd979bdSMark Fasheh 	int i;
2788ccd979bdSMark Fasheh 	char *lvb;
2789ccd979bdSMark Fasheh 	struct ocfs2_lock_res *lockres = v;
2790ccd979bdSMark Fasheh 
2791ccd979bdSMark Fasheh 	if (!lockres)
2792ccd979bdSMark Fasheh 		return -EINVAL;
2793ccd979bdSMark Fasheh 
2794d680efe9SMark Fasheh 	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2795d680efe9SMark Fasheh 
2796d680efe9SMark Fasheh 	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2797d680efe9SMark Fasheh 		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2798d680efe9SMark Fasheh 			   lockres->l_name,
2799d680efe9SMark Fasheh 			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2800d680efe9SMark Fasheh 	else
2801d680efe9SMark Fasheh 		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2802d680efe9SMark Fasheh 
2803d680efe9SMark Fasheh 	seq_printf(m, "%d\t"
2804ccd979bdSMark Fasheh 		   "0x%lx\t"
2805ccd979bdSMark Fasheh 		   "0x%x\t"
2806ccd979bdSMark Fasheh 		   "0x%x\t"
2807ccd979bdSMark Fasheh 		   "%u\t"
2808ccd979bdSMark Fasheh 		   "%u\t"
2809ccd979bdSMark Fasheh 		   "%d\t"
2810ccd979bdSMark Fasheh 		   "%d\t",
2811ccd979bdSMark Fasheh 		   lockres->l_level,
2812ccd979bdSMark Fasheh 		   lockres->l_flags,
2813ccd979bdSMark Fasheh 		   lockres->l_action,
2814ccd979bdSMark Fasheh 		   lockres->l_unlock_action,
2815ccd979bdSMark Fasheh 		   lockres->l_ro_holders,
2816ccd979bdSMark Fasheh 		   lockres->l_ex_holders,
2817ccd979bdSMark Fasheh 		   lockres->l_requested,
2818ccd979bdSMark Fasheh 		   lockres->l_blocking);
2819ccd979bdSMark Fasheh 
2820ccd979bdSMark Fasheh 	/* Dump the raw LVB */
28218f2c9c1bSJoel Becker 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2822ccd979bdSMark Fasheh 	for(i = 0; i < DLM_LVB_LEN; i++)
2823ccd979bdSMark Fasheh 		seq_printf(m, "0x%x\t", lvb[i]);
2824ccd979bdSMark Fasheh 
28258ddb7b00SSunil Mushran #ifdef CONFIG_OCFS2_FS_STATS
28268ddb7b00SSunil Mushran # define lock_num_prmode(_l)		(_l)->l_lock_num_prmode
28278ddb7b00SSunil Mushran # define lock_num_exmode(_l)		(_l)->l_lock_num_exmode
28288ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(_l)->l_lock_num_prmode_failed
28298ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(_l)->l_lock_num_exmode_failed
28308ddb7b00SSunil Mushran # define lock_total_prmode(_l)		(_l)->l_lock_total_prmode
28318ddb7b00SSunil Mushran # define lock_total_exmode(_l)		(_l)->l_lock_total_exmode
28328ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(_l)->l_lock_max_prmode
28338ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(_l)->l_lock_max_exmode
28348ddb7b00SSunil Mushran # define lock_refresh(_l)		(_l)->l_lock_refresh
28358ddb7b00SSunil Mushran #else
2836dd25e55eSRandy Dunlap # define lock_num_prmode(_l)		(0ULL)
2837dd25e55eSRandy Dunlap # define lock_num_exmode(_l)		(0ULL)
28388ddb7b00SSunil Mushran # define lock_num_prmode_failed(_l)	(0)
28398ddb7b00SSunil Mushran # define lock_num_exmode_failed(_l)	(0)
2840dd25e55eSRandy Dunlap # define lock_total_prmode(_l)		(0ULL)
2841dd25e55eSRandy Dunlap # define lock_total_exmode(_l)		(0ULL)
28428ddb7b00SSunil Mushran # define lock_max_prmode(_l)		(0)
28438ddb7b00SSunil Mushran # define lock_max_exmode(_l)		(0)
28448ddb7b00SSunil Mushran # define lock_refresh(_l)		(0)
28458ddb7b00SSunil Mushran #endif
28468ddb7b00SSunil Mushran 	/* The following seq_print was added in version 2 of this output */
28478ddb7b00SSunil Mushran 	seq_printf(m, "%llu\t"
28488ddb7b00SSunil Mushran 		   "%llu\t"
28498ddb7b00SSunil Mushran 		   "%u\t"
28508ddb7b00SSunil Mushran 		   "%u\t"
28518ddb7b00SSunil Mushran 		   "%llu\t"
28528ddb7b00SSunil Mushran 		   "%llu\t"
28538ddb7b00SSunil Mushran 		   "%u\t"
28548ddb7b00SSunil Mushran 		   "%u\t"
28558ddb7b00SSunil Mushran 		   "%u\t",
28568ddb7b00SSunil Mushran 		   lock_num_prmode(lockres),
28578ddb7b00SSunil Mushran 		   lock_num_exmode(lockres),
28588ddb7b00SSunil Mushran 		   lock_num_prmode_failed(lockres),
28598ddb7b00SSunil Mushran 		   lock_num_exmode_failed(lockres),
28608ddb7b00SSunil Mushran 		   lock_total_prmode(lockres),
28618ddb7b00SSunil Mushran 		   lock_total_exmode(lockres),
28628ddb7b00SSunil Mushran 		   lock_max_prmode(lockres),
28638ddb7b00SSunil Mushran 		   lock_max_exmode(lockres),
28648ddb7b00SSunil Mushran 		   lock_refresh(lockres));
28658ddb7b00SSunil Mushran 
2866ccd979bdSMark Fasheh 	/* End the line */
2867ccd979bdSMark Fasheh 	seq_printf(m, "\n");
2868ccd979bdSMark Fasheh 	return 0;
2869ccd979bdSMark Fasheh }
2870ccd979bdSMark Fasheh 
287190d99779SJan Engelhardt static const struct seq_operations ocfs2_dlm_seq_ops = {
2872ccd979bdSMark Fasheh 	.start =	ocfs2_dlm_seq_start,
2873ccd979bdSMark Fasheh 	.stop =		ocfs2_dlm_seq_stop,
2874ccd979bdSMark Fasheh 	.next =		ocfs2_dlm_seq_next,
2875ccd979bdSMark Fasheh 	.show =		ocfs2_dlm_seq_show,
2876ccd979bdSMark Fasheh };
2877ccd979bdSMark Fasheh 
2878ccd979bdSMark Fasheh static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2879ccd979bdSMark Fasheh {
2880ccd979bdSMark Fasheh 	struct seq_file *seq = (struct seq_file *) file->private_data;
2881ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv = seq->private;
2882ccd979bdSMark Fasheh 	struct ocfs2_lock_res *res = &priv->p_iter_res;
2883ccd979bdSMark Fasheh 
2884ccd979bdSMark Fasheh 	ocfs2_remove_lockres_tracking(res);
2885ccd979bdSMark Fasheh 	ocfs2_put_dlm_debug(priv->p_dlm_debug);
2886ccd979bdSMark Fasheh 	return seq_release_private(inode, file);
2887ccd979bdSMark Fasheh }
2888ccd979bdSMark Fasheh 
2889ccd979bdSMark Fasheh static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2890ccd979bdSMark Fasheh {
2891ccd979bdSMark Fasheh 	int ret;
2892ccd979bdSMark Fasheh 	struct ocfs2_dlm_seq_priv *priv;
2893ccd979bdSMark Fasheh 	struct seq_file *seq;
2894ccd979bdSMark Fasheh 	struct ocfs2_super *osb;
2895ccd979bdSMark Fasheh 
2896ccd979bdSMark Fasheh 	priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
2897ccd979bdSMark Fasheh 	if (!priv) {
2898ccd979bdSMark Fasheh 		ret = -ENOMEM;
2899ccd979bdSMark Fasheh 		mlog_errno(ret);
2900ccd979bdSMark Fasheh 		goto out;
2901ccd979bdSMark Fasheh 	}
29028e18e294STheodore Ts'o 	osb = inode->i_private;
2903ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2904ccd979bdSMark Fasheh 	priv->p_dlm_debug = osb->osb_dlm_debug;
2905ccd979bdSMark Fasheh 	INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2906ccd979bdSMark Fasheh 
2907ccd979bdSMark Fasheh 	ret = seq_open(file, &ocfs2_dlm_seq_ops);
2908ccd979bdSMark Fasheh 	if (ret) {
2909ccd979bdSMark Fasheh 		kfree(priv);
2910ccd979bdSMark Fasheh 		mlog_errno(ret);
2911ccd979bdSMark Fasheh 		goto out;
2912ccd979bdSMark Fasheh 	}
2913ccd979bdSMark Fasheh 
2914ccd979bdSMark Fasheh 	seq = (struct seq_file *) file->private_data;
2915ccd979bdSMark Fasheh 	seq->private = priv;
2916ccd979bdSMark Fasheh 
2917ccd979bdSMark Fasheh 	ocfs2_add_lockres_tracking(&priv->p_iter_res,
2918ccd979bdSMark Fasheh 				   priv->p_dlm_debug);
2919ccd979bdSMark Fasheh 
2920ccd979bdSMark Fasheh out:
2921ccd979bdSMark Fasheh 	return ret;
2922ccd979bdSMark Fasheh }
2923ccd979bdSMark Fasheh 
29244b6f5d20SArjan van de Ven static const struct file_operations ocfs2_dlm_debug_fops = {
2925ccd979bdSMark Fasheh 	.open =		ocfs2_dlm_debug_open,
2926ccd979bdSMark Fasheh 	.release =	ocfs2_dlm_debug_release,
2927ccd979bdSMark Fasheh 	.read =		seq_read,
2928ccd979bdSMark Fasheh 	.llseek =	seq_lseek,
2929ccd979bdSMark Fasheh };
2930ccd979bdSMark Fasheh 
2931ccd979bdSMark Fasheh static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2932ccd979bdSMark Fasheh {
2933ccd979bdSMark Fasheh 	int ret = 0;
2934ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2935ccd979bdSMark Fasheh 
2936ccd979bdSMark Fasheh 	dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2937ccd979bdSMark Fasheh 							 S_IFREG|S_IRUSR,
2938ccd979bdSMark Fasheh 							 osb->osb_debug_root,
2939ccd979bdSMark Fasheh 							 osb,
2940ccd979bdSMark Fasheh 							 &ocfs2_dlm_debug_fops);
2941ccd979bdSMark Fasheh 	if (!dlm_debug->d_locking_state) {
2942ccd979bdSMark Fasheh 		ret = -EINVAL;
2943ccd979bdSMark Fasheh 		mlog(ML_ERROR,
2944ccd979bdSMark Fasheh 		     "Unable to create locking state debugfs file.\n");
2945ccd979bdSMark Fasheh 		goto out;
2946ccd979bdSMark Fasheh 	}
2947ccd979bdSMark Fasheh 
2948ccd979bdSMark Fasheh 	ocfs2_get_dlm_debug(dlm_debug);
2949ccd979bdSMark Fasheh out:
2950ccd979bdSMark Fasheh 	return ret;
2951ccd979bdSMark Fasheh }
2952ccd979bdSMark Fasheh 
2953ccd979bdSMark Fasheh static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2954ccd979bdSMark Fasheh {
2955ccd979bdSMark Fasheh 	struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2956ccd979bdSMark Fasheh 
2957ccd979bdSMark Fasheh 	if (dlm_debug) {
2958ccd979bdSMark Fasheh 		debugfs_remove(dlm_debug->d_locking_state);
2959ccd979bdSMark Fasheh 		ocfs2_put_dlm_debug(dlm_debug);
2960ccd979bdSMark Fasheh 	}
2961ccd979bdSMark Fasheh }
2962ccd979bdSMark Fasheh 
2963ccd979bdSMark Fasheh int ocfs2_dlm_init(struct ocfs2_super *osb)
2964ccd979bdSMark Fasheh {
2965c271c5c2SSunil Mushran 	int status = 0;
29664670c46dSJoel Becker 	struct ocfs2_cluster_connection *conn = NULL;
2967ccd979bdSMark Fasheh 
2968ccd979bdSMark Fasheh 	mlog_entry_void();
2969ccd979bdSMark Fasheh 
29700abd6d18SMark Fasheh 	if (ocfs2_mount_local(osb)) {
29710abd6d18SMark Fasheh 		osb->node_num = 0;
2972c271c5c2SSunil Mushran 		goto local;
29730abd6d18SMark Fasheh 	}
2974c271c5c2SSunil Mushran 
2975ccd979bdSMark Fasheh 	status = ocfs2_dlm_init_debug(osb);
2976ccd979bdSMark Fasheh 	if (status < 0) {
2977ccd979bdSMark Fasheh 		mlog_errno(status);
2978ccd979bdSMark Fasheh 		goto bail;
2979ccd979bdSMark Fasheh 	}
2980ccd979bdSMark Fasheh 
298134d024f8SMark Fasheh 	/* launch downconvert thread */
298234d024f8SMark Fasheh 	osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc");
298334d024f8SMark Fasheh 	if (IS_ERR(osb->dc_task)) {
298434d024f8SMark Fasheh 		status = PTR_ERR(osb->dc_task);
298534d024f8SMark Fasheh 		osb->dc_task = NULL;
2986ccd979bdSMark Fasheh 		mlog_errno(status);
2987ccd979bdSMark Fasheh 		goto bail;
2988ccd979bdSMark Fasheh 	}
2989ccd979bdSMark Fasheh 
2990ccd979bdSMark Fasheh 	/* for now, uuid == domain */
29919c6c877cSJoel Becker 	status = ocfs2_cluster_connect(osb->osb_cluster_stack,
29929c6c877cSJoel Becker 				       osb->uuid_str,
29934670c46dSJoel Becker 				       strlen(osb->uuid_str),
29944670c46dSJoel Becker 				       ocfs2_do_node_down, osb,
29954670c46dSJoel Becker 				       &conn);
29964670c46dSJoel Becker 	if (status) {
2997ccd979bdSMark Fasheh 		mlog_errno(status);
2998ccd979bdSMark Fasheh 		goto bail;
2999ccd979bdSMark Fasheh 	}
3000ccd979bdSMark Fasheh 
30010abd6d18SMark Fasheh 	status = ocfs2_cluster_this_node(&osb->node_num);
30020abd6d18SMark Fasheh 	if (status < 0) {
30030abd6d18SMark Fasheh 		mlog_errno(status);
30040abd6d18SMark Fasheh 		mlog(ML_ERROR,
30050abd6d18SMark Fasheh 		     "could not find this host's node number\n");
3006286eaa95SJoel Becker 		ocfs2_cluster_disconnect(conn, 0);
30070abd6d18SMark Fasheh 		goto bail;
30080abd6d18SMark Fasheh 	}
30090abd6d18SMark Fasheh 
3010c271c5c2SSunil Mushran local:
3011ccd979bdSMark Fasheh 	ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
3012ccd979bdSMark Fasheh 	ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
30136ca497a8Swengang wang 	ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
301483273932SSrinivas Eeda 	ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
3015ccd979bdSMark Fasheh 
30164670c46dSJoel Becker 	osb->cconn = conn;
3017ccd979bdSMark Fasheh 
3018ccd979bdSMark Fasheh 	status = 0;
3019ccd979bdSMark Fasheh bail:
3020ccd979bdSMark Fasheh 	if (status < 0) {
3021ccd979bdSMark Fasheh 		ocfs2_dlm_shutdown_debug(osb);
302234d024f8SMark Fasheh 		if (osb->dc_task)
302334d024f8SMark Fasheh 			kthread_stop(osb->dc_task);
3024ccd979bdSMark Fasheh 	}
3025ccd979bdSMark Fasheh 
3026ccd979bdSMark Fasheh 	mlog_exit(status);
3027ccd979bdSMark Fasheh 	return status;
3028ccd979bdSMark Fasheh }
3029ccd979bdSMark Fasheh 
3030286eaa95SJoel Becker void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
3031286eaa95SJoel Becker 			int hangup_pending)
3032ccd979bdSMark Fasheh {
3033ccd979bdSMark Fasheh 	mlog_entry_void();
3034ccd979bdSMark Fasheh 
3035ccd979bdSMark Fasheh 	ocfs2_drop_osb_locks(osb);
3036ccd979bdSMark Fasheh 
30374670c46dSJoel Becker 	/*
30384670c46dSJoel Becker 	 * Now that we have dropped all locks and ocfs2_dismount_volume()
30394670c46dSJoel Becker 	 * has disabled recovery, the DLM won't be talking to us.  It's
30404670c46dSJoel Becker 	 * safe to tear things down before disconnecting the cluster.
30414670c46dSJoel Becker 	 */
30424670c46dSJoel Becker 
304334d024f8SMark Fasheh 	if (osb->dc_task) {
304434d024f8SMark Fasheh 		kthread_stop(osb->dc_task);
304534d024f8SMark Fasheh 		osb->dc_task = NULL;
3046ccd979bdSMark Fasheh 	}
3047ccd979bdSMark Fasheh 
3048ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_super_lockres);
3049ccd979bdSMark Fasheh 	ocfs2_lock_res_free(&osb->osb_rename_lockres);
30506ca497a8Swengang wang 	ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
305183273932SSrinivas Eeda 	ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
3052ccd979bdSMark Fasheh 
3053286eaa95SJoel Becker 	ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
30544670c46dSJoel Becker 	osb->cconn = NULL;
3055ccd979bdSMark Fasheh 
3056ccd979bdSMark Fasheh 	ocfs2_dlm_shutdown_debug(osb);
3057ccd979bdSMark Fasheh 
3058ccd979bdSMark Fasheh 	mlog_exit_void();
3059ccd979bdSMark Fasheh }
3060ccd979bdSMark Fasheh 
3061c0e41338SJoel Becker static void ocfs2_unlock_ast(struct ocfs2_dlm_lksb *lksb, int error)
3062ccd979bdSMark Fasheh {
3063a796d286SJoel Becker 	struct ocfs2_lock_res *lockres = ocfs2_lksb_to_lock_res(lksb);
3064ccd979bdSMark Fasheh 	unsigned long flags;
3065ccd979bdSMark Fasheh 
3066ccd979bdSMark Fasheh 	mlog_entry_void();
3067ccd979bdSMark Fasheh 
3068ccd979bdSMark Fasheh 	mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,
3069ccd979bdSMark Fasheh 	     lockres->l_unlock_action);
3070ccd979bdSMark Fasheh 
3071ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3072de551246SJoel Becker 	if (error) {
30737431cd7eSJoel Becker 		mlog(ML_ERROR, "Dlm passes error %d for lock %s, "
30747431cd7eSJoel Becker 		     "unlock_action %d\n", error, lockres->l_name,
3075ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3076ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3077d92bc512SColy Li 		mlog_exit_void();
3078ccd979bdSMark Fasheh 		return;
3079ccd979bdSMark Fasheh 	}
3080ccd979bdSMark Fasheh 
3081ccd979bdSMark Fasheh 	switch(lockres->l_unlock_action) {
3082ccd979bdSMark Fasheh 	case OCFS2_UNLOCK_CANCEL_CONVERT:
3083ccd979bdSMark Fasheh 		mlog(0, "Cancel convert success for %s\n", lockres->l_name);
3084ccd979bdSMark Fasheh 		lockres->l_action = OCFS2_AST_INVALID;
3085a4b91965SSunil Mushran 		/* Downconvert thread may have requeued this lock, we
3086a4b91965SSunil Mushran 		 * need to wake it. */
3087a4b91965SSunil Mushran 		if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3088a4b91965SSunil Mushran 			ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
3089ccd979bdSMark Fasheh 		break;
3090ccd979bdSMark Fasheh 	case OCFS2_UNLOCK_DROP_LOCK:
3091bd3e7610SJoel Becker 		lockres->l_level = DLM_LOCK_IV;
3092ccd979bdSMark Fasheh 		break;
3093ccd979bdSMark Fasheh 	default:
3094ccd979bdSMark Fasheh 		BUG();
3095ccd979bdSMark Fasheh 	}
3096ccd979bdSMark Fasheh 
3097ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
3098ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
3099ccd979bdSMark Fasheh 	wake_up(&lockres->l_event);
310007f9eebcSDavid Teigland 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3101ccd979bdSMark Fasheh 
3102ccd979bdSMark Fasheh 	mlog_exit_void();
3103ccd979bdSMark Fasheh }
3104ccd979bdSMark Fasheh 
3105ccd979bdSMark Fasheh static int ocfs2_drop_lock(struct ocfs2_super *osb,
31060d5dc6c2SMark Fasheh 			   struct ocfs2_lock_res *lockres)
3107ccd979bdSMark Fasheh {
31087431cd7eSJoel Becker 	int ret;
3109ccd979bdSMark Fasheh 	unsigned long flags;
3110bd3e7610SJoel Becker 	u32 lkm_flags = 0;
3111ccd979bdSMark Fasheh 
3112ccd979bdSMark Fasheh 	/* We didn't get anywhere near actually using this lockres. */
3113ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
3114ccd979bdSMark Fasheh 		goto out;
3115ccd979bdSMark Fasheh 
3116b80fc012SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
3117bd3e7610SJoel Becker 		lkm_flags |= DLM_LKF_VALBLK;
3118b80fc012SMark Fasheh 
3119ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3120ccd979bdSMark Fasheh 
3121ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
3122ccd979bdSMark Fasheh 			"lockres %s, flags 0x%lx\n",
3123ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3124ccd979bdSMark Fasheh 
3125ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_BUSY) {
3126ccd979bdSMark Fasheh 		mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
3127ccd979bdSMark Fasheh 		     "%u, unlock_action = %u\n",
3128ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags, lockres->l_action,
3129ccd979bdSMark Fasheh 		     lockres->l_unlock_action);
3130ccd979bdSMark Fasheh 
3131ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3132ccd979bdSMark Fasheh 
3133ccd979bdSMark Fasheh 		/* XXX: Today we just wait on any busy
3134ccd979bdSMark Fasheh 		 * locks... Perhaps we need to cancel converts in the
3135ccd979bdSMark Fasheh 		 * future? */
3136ccd979bdSMark Fasheh 		ocfs2_wait_on_busy_lock(lockres);
3137ccd979bdSMark Fasheh 
3138ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3139ccd979bdSMark Fasheh 	}
3140ccd979bdSMark Fasheh 
31410d5dc6c2SMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
31420d5dc6c2SMark Fasheh 		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
3143bd3e7610SJoel Becker 		    lockres->l_level == DLM_LOCK_EX &&
31440d5dc6c2SMark Fasheh 		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
31450d5dc6c2SMark Fasheh 			lockres->l_ops->set_lvb(lockres);
31460d5dc6c2SMark Fasheh 	}
3147ccd979bdSMark Fasheh 
3148ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
3149ccd979bdSMark Fasheh 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
3150ccd979bdSMark Fasheh 		     lockres->l_name);
3151ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
3152ccd979bdSMark Fasheh 		mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
3153ccd979bdSMark Fasheh 
3154ccd979bdSMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
3155ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3156ccd979bdSMark Fasheh 		goto out;
3157ccd979bdSMark Fasheh 	}
3158ccd979bdSMark Fasheh 
3159ccd979bdSMark Fasheh 	lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
3160ccd979bdSMark Fasheh 
3161ccd979bdSMark Fasheh 	/* make sure we never get here while waiting for an ast to
3162ccd979bdSMark Fasheh 	 * fire. */
3163ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
3164ccd979bdSMark Fasheh 
3165ccd979bdSMark Fasheh 	/* is this necessary? */
3166ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3167ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
3168ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3169ccd979bdSMark Fasheh 
3170ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3171ccd979bdSMark Fasheh 
3172a796d286SJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags);
31737431cd7eSJoel Becker 	if (ret) {
31747431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3175ccd979bdSMark Fasheh 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
3176cf0acdcdSJoel Becker 		ocfs2_dlm_dump_lksb(&lockres->l_lksb);
3177ccd979bdSMark Fasheh 		BUG();
3178ccd979bdSMark Fasheh 	}
317973ac36eaSColy Li 	mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
3180ccd979bdSMark Fasheh 	     lockres->l_name);
3181ccd979bdSMark Fasheh 
3182ccd979bdSMark Fasheh 	ocfs2_wait_on_busy_lock(lockres);
3183ccd979bdSMark Fasheh out:
3184ccd979bdSMark Fasheh 	mlog_exit(0);
3185ccd979bdSMark Fasheh 	return 0;
3186ccd979bdSMark Fasheh }
3187ccd979bdSMark Fasheh 
3188ccd979bdSMark Fasheh /* Mark the lockres as being dropped. It will no longer be
3189ccd979bdSMark Fasheh  * queued if blocking, but we still may have to wait on it
319034d024f8SMark Fasheh  * being dequeued from the downconvert thread before we can consider
3191ccd979bdSMark Fasheh  * it safe to drop.
3192ccd979bdSMark Fasheh  *
3193ccd979bdSMark Fasheh  * You can *not* attempt to call cluster_lock on this lockres anymore. */
3194ccd979bdSMark Fasheh void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
3195ccd979bdSMark Fasheh {
3196ccd979bdSMark Fasheh 	int status;
3197ccd979bdSMark Fasheh 	struct ocfs2_mask_waiter mw;
3198ccd979bdSMark Fasheh 	unsigned long flags;
3199ccd979bdSMark Fasheh 
3200ccd979bdSMark Fasheh 	ocfs2_init_mask_waiter(&mw);
3201ccd979bdSMark Fasheh 
3202ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3203ccd979bdSMark Fasheh 	lockres->l_flags |= OCFS2_LOCK_FREEING;
3204ccd979bdSMark Fasheh 	while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
3205ccd979bdSMark Fasheh 		lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
3206ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3207ccd979bdSMark Fasheh 
3208ccd979bdSMark Fasheh 		mlog(0, "Waiting on lockres %s\n", lockres->l_name);
3209ccd979bdSMark Fasheh 
3210ccd979bdSMark Fasheh 		status = ocfs2_wait_for_mask(&mw);
3211ccd979bdSMark Fasheh 		if (status)
3212ccd979bdSMark Fasheh 			mlog_errno(status);
3213ccd979bdSMark Fasheh 
3214ccd979bdSMark Fasheh 		spin_lock_irqsave(&lockres->l_lock, flags);
3215ccd979bdSMark Fasheh 	}
3216ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3217ccd979bdSMark Fasheh }
3218ccd979bdSMark Fasheh 
3219d680efe9SMark Fasheh void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
3220d680efe9SMark Fasheh 			       struct ocfs2_lock_res *lockres)
3221d680efe9SMark Fasheh {
3222d680efe9SMark Fasheh 	int ret;
3223d680efe9SMark Fasheh 
3224d680efe9SMark Fasheh 	ocfs2_mark_lockres_freeing(lockres);
32250d5dc6c2SMark Fasheh 	ret = ocfs2_drop_lock(osb, lockres);
3226d680efe9SMark Fasheh 	if (ret)
3227d680efe9SMark Fasheh 		mlog_errno(ret);
3228d680efe9SMark Fasheh }
3229d680efe9SMark Fasheh 
3230ccd979bdSMark Fasheh static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
3231ccd979bdSMark Fasheh {
3232d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
3233d680efe9SMark Fasheh 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
32346ca497a8Swengang wang 	ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
323583273932SSrinivas Eeda 	ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
3236ccd979bdSMark Fasheh }
3237ccd979bdSMark Fasheh 
3238ccd979bdSMark Fasheh int ocfs2_drop_inode_locks(struct inode *inode)
3239ccd979bdSMark Fasheh {
3240ccd979bdSMark Fasheh 	int status, err;
3241ccd979bdSMark Fasheh 
3242ccd979bdSMark Fasheh 	mlog_entry_void();
3243ccd979bdSMark Fasheh 
3244ccd979bdSMark Fasheh 	/* No need to call ocfs2_mark_lockres_freeing here -
3245ccd979bdSMark Fasheh 	 * ocfs2_clear_inode has done it for us. */
3246ccd979bdSMark Fasheh 
3247ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
324850008630STiger Yang 			      &OCFS2_I(inode)->ip_open_lockres);
3249ccd979bdSMark Fasheh 	if (err < 0)
3250ccd979bdSMark Fasheh 		mlog_errno(err);
3251ccd979bdSMark Fasheh 
3252ccd979bdSMark Fasheh 	status = err;
3253ccd979bdSMark Fasheh 
3254ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
3255e63aecb6SMark Fasheh 			      &OCFS2_I(inode)->ip_inode_lockres);
3256ccd979bdSMark Fasheh 	if (err < 0)
3257ccd979bdSMark Fasheh 		mlog_errno(err);
3258ccd979bdSMark Fasheh 	if (err < 0 && !status)
3259ccd979bdSMark Fasheh 		status = err;
3260ccd979bdSMark Fasheh 
3261ccd979bdSMark Fasheh 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
32620d5dc6c2SMark Fasheh 			      &OCFS2_I(inode)->ip_rw_lockres);
3263ccd979bdSMark Fasheh 	if (err < 0)
3264ccd979bdSMark Fasheh 		mlog_errno(err);
3265ccd979bdSMark Fasheh 	if (err < 0 && !status)
3266ccd979bdSMark Fasheh 		status = err;
3267ccd979bdSMark Fasheh 
3268ccd979bdSMark Fasheh 	mlog_exit(status);
3269ccd979bdSMark Fasheh 	return status;
3270ccd979bdSMark Fasheh }
3271ccd979bdSMark Fasheh 
3272de551246SJoel Becker static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
3273ccd979bdSMark Fasheh 					      int new_level)
3274ccd979bdSMark Fasheh {
3275ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3276ccd979bdSMark Fasheh 
3277bd3e7610SJoel Becker 	BUG_ON(lockres->l_blocking <= DLM_LOCK_NL);
3278ccd979bdSMark Fasheh 
3279ccd979bdSMark Fasheh 	if (lockres->l_level <= new_level) {
3280bd3e7610SJoel Becker 		mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n",
3281ccd979bdSMark Fasheh 		     lockres->l_level, new_level);
3282ccd979bdSMark Fasheh 		BUG();
3283ccd979bdSMark Fasheh 	}
3284ccd979bdSMark Fasheh 
3285ccd979bdSMark Fasheh 	mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",
3286ccd979bdSMark Fasheh 	     lockres->l_name, new_level, lockres->l_blocking);
3287ccd979bdSMark Fasheh 
3288ccd979bdSMark Fasheh 	lockres->l_action = OCFS2_AST_DOWNCONVERT;
3289ccd979bdSMark Fasheh 	lockres->l_requested = new_level;
3290ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
3291de551246SJoel Becker 	return lockres_set_pending(lockres);
3292ccd979bdSMark Fasheh }
3293ccd979bdSMark Fasheh 
3294ccd979bdSMark Fasheh static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
3295ccd979bdSMark Fasheh 				  struct ocfs2_lock_res *lockres,
3296ccd979bdSMark Fasheh 				  int new_level,
3297de551246SJoel Becker 				  int lvb,
3298de551246SJoel Becker 				  unsigned int generation)
3299ccd979bdSMark Fasheh {
3300bd3e7610SJoel Becker 	int ret;
3301bd3e7610SJoel Becker 	u32 dlm_flags = DLM_LKF_CONVERT;
3302ccd979bdSMark Fasheh 
3303ccd979bdSMark Fasheh 	mlog_entry_void();
3304ccd979bdSMark Fasheh 
3305ccd979bdSMark Fasheh 	if (lvb)
3306bd3e7610SJoel Becker 		dlm_flags |= DLM_LKF_VALBLK;
3307ccd979bdSMark Fasheh 
33084670c46dSJoel Becker 	ret = ocfs2_dlm_lock(osb->cconn,
3309ccd979bdSMark Fasheh 			     new_level,
3310ccd979bdSMark Fasheh 			     &lockres->l_lksb,
3311ccd979bdSMark Fasheh 			     dlm_flags,
3312ccd979bdSMark Fasheh 			     lockres->l_name,
3313a796d286SJoel Becker 			     OCFS2_LOCK_ID_MAX_LEN - 1);
3314de551246SJoel Becker 	lockres_clear_pending(lockres, generation, osb);
33157431cd7eSJoel Becker 	if (ret) {
33167431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres);
3317ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 1);
3318ccd979bdSMark Fasheh 		goto bail;
3319ccd979bdSMark Fasheh 	}
3320ccd979bdSMark Fasheh 
3321ccd979bdSMark Fasheh 	ret = 0;
3322ccd979bdSMark Fasheh bail:
3323ccd979bdSMark Fasheh 	mlog_exit(ret);
3324ccd979bdSMark Fasheh 	return ret;
3325ccd979bdSMark Fasheh }
3326ccd979bdSMark Fasheh 
332724ef1815SJoel Becker /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */
3328ccd979bdSMark Fasheh static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
3329ccd979bdSMark Fasheh 				        struct ocfs2_lock_res *lockres)
3330ccd979bdSMark Fasheh {
3331ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
3332ccd979bdSMark Fasheh 
3333ccd979bdSMark Fasheh 	mlog_entry_void();
3334ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3335ccd979bdSMark Fasheh 
3336ccd979bdSMark Fasheh 	if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
3337ccd979bdSMark Fasheh 		/* If we're already trying to cancel a lock conversion
3338ccd979bdSMark Fasheh 		 * then just drop the spinlock and allow the caller to
3339ccd979bdSMark Fasheh 		 * requeue this lock. */
3340ccd979bdSMark Fasheh 
3341ccd979bdSMark Fasheh 		mlog(0, "Lockres %s, skip convert\n", lockres->l_name);
3342ccd979bdSMark Fasheh 		return 0;
3343ccd979bdSMark Fasheh 	}
3344ccd979bdSMark Fasheh 
3345ccd979bdSMark Fasheh 	/* were we in a convert when we got the bast fire? */
3346ccd979bdSMark Fasheh 	BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
3347ccd979bdSMark Fasheh 	       lockres->l_action != OCFS2_AST_DOWNCONVERT);
3348ccd979bdSMark Fasheh 	/* set things up for the unlockast to know to just
3349ccd979bdSMark Fasheh 	 * clear out the ast_action and unset busy, etc. */
3350ccd979bdSMark Fasheh 	lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
3351ccd979bdSMark Fasheh 
3352ccd979bdSMark Fasheh 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
3353ccd979bdSMark Fasheh 			"lock %s, invalid flags: 0x%lx\n",
3354ccd979bdSMark Fasheh 			lockres->l_name, lockres->l_flags);
3355ccd979bdSMark Fasheh 
3356ccd979bdSMark Fasheh 	return 1;
3357ccd979bdSMark Fasheh }
3358ccd979bdSMark Fasheh 
3359ccd979bdSMark Fasheh static int ocfs2_cancel_convert(struct ocfs2_super *osb,
3360ccd979bdSMark Fasheh 				struct ocfs2_lock_res *lockres)
3361ccd979bdSMark Fasheh {
3362ccd979bdSMark Fasheh 	int ret;
3363ccd979bdSMark Fasheh 
3364ccd979bdSMark Fasheh 	mlog_entry_void();
3365ccd979bdSMark Fasheh 	mlog(0, "lock %s\n", lockres->l_name);
3366ccd979bdSMark Fasheh 
33674670c46dSJoel Becker 	ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb,
3368a796d286SJoel Becker 			       DLM_LKF_CANCEL);
33697431cd7eSJoel Becker 	if (ret) {
33707431cd7eSJoel Becker 		ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres);
3371ccd979bdSMark Fasheh 		ocfs2_recover_from_dlm_error(lockres, 0);
3372ccd979bdSMark Fasheh 	}
3373ccd979bdSMark Fasheh 
337424ef1815SJoel Becker 	mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name);
3375ccd979bdSMark Fasheh 
3376ccd979bdSMark Fasheh 	mlog_exit(ret);
3377ccd979bdSMark Fasheh 	return ret;
3378ccd979bdSMark Fasheh }
3379ccd979bdSMark Fasheh 
3380b5e500e2SMark Fasheh static int ocfs2_unblock_lock(struct ocfs2_super *osb,
3381ccd979bdSMark Fasheh 			      struct ocfs2_lock_res *lockres,
3382cc567d89SMark Fasheh 			      struct ocfs2_unblock_ctl *ctl)
3383ccd979bdSMark Fasheh {
3384ccd979bdSMark Fasheh 	unsigned long flags;
3385ccd979bdSMark Fasheh 	int blocking;
3386ccd979bdSMark Fasheh 	int new_level;
3387079b8057SSunil Mushran 	int level;
3388ccd979bdSMark Fasheh 	int ret = 0;
33895ef0d4eaSMark Fasheh 	int set_lvb = 0;
3390de551246SJoel Becker 	unsigned int gen;
3391ccd979bdSMark Fasheh 
3392ccd979bdSMark Fasheh 	mlog_entry_void();
3393ccd979bdSMark Fasheh 
3394ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3395ccd979bdSMark Fasheh 
3396ccd979bdSMark Fasheh recheck:
3397db0f6ce6SSunil Mushran 	/*
3398db0f6ce6SSunil Mushran 	 * Is it still blocking? If not, we have no more work to do.
3399db0f6ce6SSunil Mushran 	 */
3400db0f6ce6SSunil Mushran 	if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
3401db0f6ce6SSunil Mushran 		BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
3402db0f6ce6SSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3403db0f6ce6SSunil Mushran 		ret = 0;
3404db0f6ce6SSunil Mushran 		goto leave;
3405db0f6ce6SSunil Mushran 	}
3406db0f6ce6SSunil Mushran 
3407ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
3408de551246SJoel Becker 		/* XXX
3409de551246SJoel Becker 		 * This is a *big* race.  The OCFS2_LOCK_PENDING flag
3410de551246SJoel Becker 		 * exists entirely for one reason - another thread has set
3411de551246SJoel Becker 		 * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock().
3412de551246SJoel Becker 		 *
3413de551246SJoel Becker 		 * If we do ocfs2_cancel_convert() before the other thread
3414de551246SJoel Becker 		 * calls dlm_lock(), our cancel will do nothing.  We will
3415de551246SJoel Becker 		 * get no ast, and we will have no way of knowing the
3416de551246SJoel Becker 		 * cancel failed.  Meanwhile, the other thread will call
3417de551246SJoel Becker 		 * into dlm_lock() and wait...forever.
3418de551246SJoel Becker 		 *
3419de551246SJoel Becker 		 * Why forever?  Because another node has asked for the
3420de551246SJoel Becker 		 * lock first; that's why we're here in unblock_lock().
3421de551246SJoel Becker 		 *
3422de551246SJoel Becker 		 * The solution is OCFS2_LOCK_PENDING.  When PENDING is
3423de551246SJoel Becker 		 * set, we just requeue the unblock.  Only when the other
3424de551246SJoel Becker 		 * thread has called dlm_lock() and cleared PENDING will
3425de551246SJoel Becker 		 * we then cancel their request.
3426de551246SJoel Becker 		 *
3427de551246SJoel Becker 		 * All callers of dlm_lock() must set OCFS2_DLM_PENDING
3428de551246SJoel Becker 		 * at the same time they set OCFS2_DLM_BUSY.  They must
3429de551246SJoel Becker 		 * clear OCFS2_DLM_PENDING after dlm_lock() returns.
3430de551246SJoel Becker 		 */
3431de551246SJoel Becker 		if (lockres->l_flags & OCFS2_LOCK_PENDING)
3432de551246SJoel Becker 			goto leave_requeue;
3433de551246SJoel Becker 
3434d680efe9SMark Fasheh 		ctl->requeue = 1;
3435ccd979bdSMark Fasheh 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
3436ccd979bdSMark Fasheh 		spin_unlock_irqrestore(&lockres->l_lock, flags);
3437ccd979bdSMark Fasheh 		if (ret) {
3438ccd979bdSMark Fasheh 			ret = ocfs2_cancel_convert(osb, lockres);
3439ccd979bdSMark Fasheh 			if (ret < 0)
3440ccd979bdSMark Fasheh 				mlog_errno(ret);
3441ccd979bdSMark Fasheh 		}
3442ccd979bdSMark Fasheh 		goto leave;
3443ccd979bdSMark Fasheh 	}
3444ccd979bdSMark Fasheh 
3445a1912826SSunil Mushran 	/*
3446a1912826SSunil Mushran 	 * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
3447a1912826SSunil Mushran 	 * set when the ast is received for an upconvert just before the
3448a1912826SSunil Mushran 	 * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
3449a1912826SSunil Mushran 	 * on the heels of the ast, we want to delay the downconvert just
3450a1912826SSunil Mushran 	 * enough to allow the up requestor to do its task. Because this
3451a1912826SSunil Mushran 	 * lock is in the blocked queue, the lock will be downconverted
3452a1912826SSunil Mushran 	 * as soon as the requestor is done with the lock.
3453a1912826SSunil Mushran 	 */
3454a1912826SSunil Mushran 	if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
3455a1912826SSunil Mushran 		goto leave_requeue;
3456a1912826SSunil Mushran 
34570d74125aSSunil Mushran 	/*
34580d74125aSSunil Mushran 	 * How can we block and yet be at NL?  We were trying to upconvert
34590d74125aSSunil Mushran 	 * from NL and got canceled.  The code comes back here, and now
34600d74125aSSunil Mushran 	 * we notice and clear BLOCKING.
34610d74125aSSunil Mushran 	 */
34620d74125aSSunil Mushran 	if (lockres->l_level == DLM_LOCK_NL) {
34630d74125aSSunil Mushran 		BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
34640d74125aSSunil Mushran 		lockres->l_blocking = DLM_LOCK_NL;
34650d74125aSSunil Mushran 		lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
34660d74125aSSunil Mushran 		spin_unlock_irqrestore(&lockres->l_lock, flags);
34670d74125aSSunil Mushran 		goto leave;
34680d74125aSSunil Mushran 	}
34690d74125aSSunil Mushran 
3470ccd979bdSMark Fasheh 	/* if we're blocking an exclusive and we have *any* holders,
3471ccd979bdSMark Fasheh 	 * then requeue. */
3472bd3e7610SJoel Becker 	if ((lockres->l_blocking == DLM_LOCK_EX)
3473f7fbfdd1SMark Fasheh 	    && (lockres->l_ex_holders || lockres->l_ro_holders))
3474f7fbfdd1SMark Fasheh 		goto leave_requeue;
3475ccd979bdSMark Fasheh 
3476ccd979bdSMark Fasheh 	/* If it's a PR we're blocking, then only
3477ccd979bdSMark Fasheh 	 * requeue if we've got any EX holders */
3478bd3e7610SJoel Becker 	if (lockres->l_blocking == DLM_LOCK_PR &&
3479f7fbfdd1SMark Fasheh 	    lockres->l_ex_holders)
3480f7fbfdd1SMark Fasheh 		goto leave_requeue;
3481f7fbfdd1SMark Fasheh 
3482f7fbfdd1SMark Fasheh 	/*
3483f7fbfdd1SMark Fasheh 	 * Can we get a lock in this state if the holder counts are
3484f7fbfdd1SMark Fasheh 	 * zero? The meta data unblock code used to check this.
3485f7fbfdd1SMark Fasheh 	 */
3486f7fbfdd1SMark Fasheh 	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
3487f7fbfdd1SMark Fasheh 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
3488f7fbfdd1SMark Fasheh 		goto leave_requeue;
3489ccd979bdSMark Fasheh 
349016d5b956SMark Fasheh 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
349116d5b956SMark Fasheh 
349216d5b956SMark Fasheh 	if (lockres->l_ops->check_downconvert
349316d5b956SMark Fasheh 	    && !lockres->l_ops->check_downconvert(lockres, new_level))
349416d5b956SMark Fasheh 		goto leave_requeue;
349516d5b956SMark Fasheh 
3496ccd979bdSMark Fasheh 	/* If we get here, then we know that there are no more
3497ccd979bdSMark Fasheh 	 * incompatible holders (and anyone asking for an incompatible
3498ccd979bdSMark Fasheh 	 * lock is blocked). We can now downconvert the lock */
3499cc567d89SMark Fasheh 	if (!lockres->l_ops->downconvert_worker)
3500ccd979bdSMark Fasheh 		goto downconvert;
3501ccd979bdSMark Fasheh 
3502ccd979bdSMark Fasheh 	/* Some lockres types want to do a bit of work before
3503ccd979bdSMark Fasheh 	 * downconverting a lock. Allow that here. The worker function
3504ccd979bdSMark Fasheh 	 * may sleep, so we save off a copy of what we're blocking as
3505ccd979bdSMark Fasheh 	 * it may change while we're not holding the spin lock. */
3506ccd979bdSMark Fasheh 	blocking = lockres->l_blocking;
3507079b8057SSunil Mushran 	level = lockres->l_level;
3508ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3509ccd979bdSMark Fasheh 
3510cc567d89SMark Fasheh 	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
3511d680efe9SMark Fasheh 
3512d680efe9SMark Fasheh 	if (ctl->unblock_action == UNBLOCK_STOP_POST)
3513d680efe9SMark Fasheh 		goto leave;
3514ccd979bdSMark Fasheh 
3515ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3516079b8057SSunil Mushran 	if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
3517ccd979bdSMark Fasheh 		/* If this changed underneath us, then we can't drop
3518ccd979bdSMark Fasheh 		 * it just yet. */
3519ccd979bdSMark Fasheh 		goto recheck;
3520ccd979bdSMark Fasheh 	}
3521ccd979bdSMark Fasheh 
3522ccd979bdSMark Fasheh downconvert:
3523d680efe9SMark Fasheh 	ctl->requeue = 0;
3524ccd979bdSMark Fasheh 
35255ef0d4eaSMark Fasheh 	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
3526bd3e7610SJoel Becker 		if (lockres->l_level == DLM_LOCK_EX)
35275ef0d4eaSMark Fasheh 			set_lvb = 1;
35285ef0d4eaSMark Fasheh 
35295ef0d4eaSMark Fasheh 		/*
35305ef0d4eaSMark Fasheh 		 * We only set the lvb if the lock has been fully
35315ef0d4eaSMark Fasheh 		 * refreshed - otherwise we risk setting stale
35325ef0d4eaSMark Fasheh 		 * data. Otherwise, there's no need to actually clear
35335ef0d4eaSMark Fasheh 		 * out the lvb here as it's value is still valid.
35345ef0d4eaSMark Fasheh 		 */
35355ef0d4eaSMark Fasheh 		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
35365ef0d4eaSMark Fasheh 			lockres->l_ops->set_lvb(lockres);
35375ef0d4eaSMark Fasheh 	}
35385ef0d4eaSMark Fasheh 
3539de551246SJoel Becker 	gen = ocfs2_prepare_downconvert(lockres, new_level);
3540ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3541de551246SJoel Becker 	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb,
3542de551246SJoel Becker 				     gen);
3543de551246SJoel Becker 
3544ccd979bdSMark Fasheh leave:
3545ccd979bdSMark Fasheh 	mlog_exit(ret);
3546ccd979bdSMark Fasheh 	return ret;
3547f7fbfdd1SMark Fasheh 
3548f7fbfdd1SMark Fasheh leave_requeue:
3549f7fbfdd1SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3550f7fbfdd1SMark Fasheh 	ctl->requeue = 1;
3551f7fbfdd1SMark Fasheh 
3552f7fbfdd1SMark Fasheh 	mlog_exit(0);
3553f7fbfdd1SMark Fasheh 	return 0;
3554ccd979bdSMark Fasheh }
3555ccd979bdSMark Fasheh 
3556d680efe9SMark Fasheh static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3557ccd979bdSMark Fasheh 				     int blocking)
3558ccd979bdSMark Fasheh {
3559ccd979bdSMark Fasheh 	struct inode *inode;
3560ccd979bdSMark Fasheh 	struct address_space *mapping;
3561ccd979bdSMark Fasheh 
3562ccd979bdSMark Fasheh        	inode = ocfs2_lock_res_inode(lockres);
3563ccd979bdSMark Fasheh 	mapping = inode->i_mapping;
3564ccd979bdSMark Fasheh 
35651044e401SMark Fasheh 	if (!S_ISREG(inode->i_mode))
3566f1f54068SMark Fasheh 		goto out;
3567f1f54068SMark Fasheh 
35687f4a2a97SMark Fasheh 	/*
35697f4a2a97SMark Fasheh 	 * We need this before the filemap_fdatawrite() so that it can
35707f4a2a97SMark Fasheh 	 * transfer the dirty bit from the PTE to the
35717f4a2a97SMark Fasheh 	 * page. Unfortunately this means that even for EX->PR
35727f4a2a97SMark Fasheh 	 * downconverts, we'll lose our mappings and have to build
35737f4a2a97SMark Fasheh 	 * them up again.
35747f4a2a97SMark Fasheh 	 */
35757f4a2a97SMark Fasheh 	unmap_mapping_range(mapping, 0, 0, 0);
35767f4a2a97SMark Fasheh 
3577ccd979bdSMark Fasheh 	if (filemap_fdatawrite(mapping)) {
3578b0697053SMark Fasheh 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
3579b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
3580ccd979bdSMark Fasheh 	}
3581ccd979bdSMark Fasheh 	sync_mapping_buffers(mapping);
3582bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_EX) {
3583ccd979bdSMark Fasheh 		truncate_inode_pages(mapping, 0);
3584ccd979bdSMark Fasheh 	} else {
3585ccd979bdSMark Fasheh 		/* We only need to wait on the I/O if we're not also
3586ccd979bdSMark Fasheh 		 * truncating pages because truncate_inode_pages waits
3587ccd979bdSMark Fasheh 		 * for us above. We don't truncate pages if we're
3588ccd979bdSMark Fasheh 		 * blocking anything < EXMODE because we want to keep
3589ccd979bdSMark Fasheh 		 * them around in that case. */
3590ccd979bdSMark Fasheh 		filemap_fdatawait(mapping);
3591ccd979bdSMark Fasheh 	}
3592ccd979bdSMark Fasheh 
3593f1f54068SMark Fasheh out:
3594d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE;
3595ccd979bdSMark Fasheh }
3596ccd979bdSMark Fasheh 
3597a4338481STao Ma static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3598a4338481STao Ma 				 struct ocfs2_lock_res *lockres,
3599810d5aebSMark Fasheh 				 int new_level)
3600810d5aebSMark Fasheh {
3601a4338481STao Ma 	int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3602810d5aebSMark Fasheh 
3603bd3e7610SJoel Becker 	BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3604bd3e7610SJoel Becker 	BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
3605810d5aebSMark Fasheh 
3606810d5aebSMark Fasheh 	if (checkpointed)
3607810d5aebSMark Fasheh 		return 1;
3608810d5aebSMark Fasheh 
3609a4338481STao Ma 	ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3610810d5aebSMark Fasheh 	return 0;
3611810d5aebSMark Fasheh }
3612810d5aebSMark Fasheh 
3613a4338481STao Ma static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3614a4338481STao Ma 					int new_level)
3615a4338481STao Ma {
3616a4338481STao Ma 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3617a4338481STao Ma 
3618a4338481STao Ma 	return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3619a4338481STao Ma }
3620a4338481STao Ma 
3621810d5aebSMark Fasheh static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3622810d5aebSMark Fasheh {
3623810d5aebSMark Fasheh 	struct inode *inode = ocfs2_lock_res_inode(lockres);
3624810d5aebSMark Fasheh 
3625810d5aebSMark Fasheh 	__ocfs2_stuff_meta_lvb(inode);
3626810d5aebSMark Fasheh }
3627810d5aebSMark Fasheh 
3628d680efe9SMark Fasheh /*
3629d680efe9SMark Fasheh  * Does the final reference drop on our dentry lock. Right now this
363034d024f8SMark Fasheh  * happens in the downconvert thread, but we could choose to simplify the
3631d680efe9SMark Fasheh  * dlmglue API and push these off to the ocfs2_wq in the future.
3632d680efe9SMark Fasheh  */
3633d680efe9SMark Fasheh static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
3634d680efe9SMark Fasheh 				     struct ocfs2_lock_res *lockres)
3635d680efe9SMark Fasheh {
3636d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3637d680efe9SMark Fasheh 	ocfs2_dentry_lock_put(osb, dl);
3638d680efe9SMark Fasheh }
3639d680efe9SMark Fasheh 
3640d680efe9SMark Fasheh /*
3641d680efe9SMark Fasheh  * d_delete() matching dentries before the lock downconvert.
3642d680efe9SMark Fasheh  *
3643d680efe9SMark Fasheh  * At this point, any process waiting to destroy the
3644d680efe9SMark Fasheh  * dentry_lock due to last ref count is stopped by the
3645d680efe9SMark Fasheh  * OCFS2_LOCK_QUEUED flag.
3646d680efe9SMark Fasheh  *
3647d680efe9SMark Fasheh  * We have two potential problems
3648d680efe9SMark Fasheh  *
3649d680efe9SMark Fasheh  * 1) If we do the last reference drop on our dentry_lock (via dput)
3650d680efe9SMark Fasheh  *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
3651d680efe9SMark Fasheh  *    the downconvert to finish. Instead we take an elevated
3652d680efe9SMark Fasheh  *    reference and push the drop until after we've completed our
3653d680efe9SMark Fasheh  *    unblock processing.
3654d680efe9SMark Fasheh  *
3655d680efe9SMark Fasheh  * 2) There might be another process with a final reference,
3656d680efe9SMark Fasheh  *    waiting on us to finish processing. If this is the case, we
3657d680efe9SMark Fasheh  *    detect it and exit out - there's no more dentries anyway.
3658d680efe9SMark Fasheh  */
3659d680efe9SMark Fasheh static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3660d680efe9SMark Fasheh 				       int blocking)
3661d680efe9SMark Fasheh {
3662d680efe9SMark Fasheh 	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3663d680efe9SMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
3664d680efe9SMark Fasheh 	struct dentry *dentry;
3665d680efe9SMark Fasheh 	unsigned long flags;
3666d680efe9SMark Fasheh 	int extra_ref = 0;
3667d680efe9SMark Fasheh 
3668d680efe9SMark Fasheh 	/*
3669d680efe9SMark Fasheh 	 * This node is blocking another node from getting a read
3670d680efe9SMark Fasheh 	 * lock. This happens when we've renamed within a
3671d680efe9SMark Fasheh 	 * directory. We've forced the other nodes to d_delete(), but
3672d680efe9SMark Fasheh 	 * we never actually dropped our lock because it's still
3673d680efe9SMark Fasheh 	 * valid. The downconvert code will retain a PR for this node,
3674d680efe9SMark Fasheh 	 * so there's no further work to do.
3675d680efe9SMark Fasheh 	 */
3676bd3e7610SJoel Becker 	if (blocking == DLM_LOCK_PR)
3677d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3678d680efe9SMark Fasheh 
3679d680efe9SMark Fasheh 	/*
3680d680efe9SMark Fasheh 	 * Mark this inode as potentially orphaned. The code in
3681d680efe9SMark Fasheh 	 * ocfs2_delete_inode() will figure out whether it actually
3682d680efe9SMark Fasheh 	 * needs to be freed or not.
3683d680efe9SMark Fasheh 	 */
3684d680efe9SMark Fasheh 	spin_lock(&oi->ip_lock);
3685d680efe9SMark Fasheh 	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
3686d680efe9SMark Fasheh 	spin_unlock(&oi->ip_lock);
3687d680efe9SMark Fasheh 
3688d680efe9SMark Fasheh 	/*
3689d680efe9SMark Fasheh 	 * Yuck. We need to make sure however that the check of
3690d680efe9SMark Fasheh 	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
3691d680efe9SMark Fasheh 	 * respect to a reference decrement or the setting of that
3692d680efe9SMark Fasheh 	 * flag.
3693d680efe9SMark Fasheh 	 */
3694d680efe9SMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3695d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3696d680efe9SMark Fasheh 	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
3697d680efe9SMark Fasheh 	    && dl->dl_count) {
3698d680efe9SMark Fasheh 		dl->dl_count++;
3699d680efe9SMark Fasheh 		extra_ref = 1;
3700d680efe9SMark Fasheh 	}
3701d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3702d680efe9SMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3703d680efe9SMark Fasheh 
3704d680efe9SMark Fasheh 	mlog(0, "extra_ref = %d\n", extra_ref);
3705d680efe9SMark Fasheh 
3706d680efe9SMark Fasheh 	/*
3707d680efe9SMark Fasheh 	 * We have a process waiting on us in ocfs2_dentry_iput(),
3708d680efe9SMark Fasheh 	 * which means we can't have any more outstanding
3709d680efe9SMark Fasheh 	 * aliases. There's no need to do any more work.
3710d680efe9SMark Fasheh 	 */
3711d680efe9SMark Fasheh 	if (!extra_ref)
3712d680efe9SMark Fasheh 		return UNBLOCK_CONTINUE;
3713d680efe9SMark Fasheh 
3714d680efe9SMark Fasheh 	spin_lock(&dentry_attach_lock);
3715d680efe9SMark Fasheh 	while (1) {
3716d680efe9SMark Fasheh 		dentry = ocfs2_find_local_alias(dl->dl_inode,
3717d680efe9SMark Fasheh 						dl->dl_parent_blkno, 1);
3718d680efe9SMark Fasheh 		if (!dentry)
3719d680efe9SMark Fasheh 			break;
3720d680efe9SMark Fasheh 		spin_unlock(&dentry_attach_lock);
3721d680efe9SMark Fasheh 
3722d680efe9SMark Fasheh 		mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
3723d680efe9SMark Fasheh 		     dentry->d_name.name);
3724d680efe9SMark Fasheh 
3725d680efe9SMark Fasheh 		/*
3726d680efe9SMark Fasheh 		 * The following dcache calls may do an
3727d680efe9SMark Fasheh 		 * iput(). Normally we don't want that from the
3728d680efe9SMark Fasheh 		 * downconverting thread, but in this case it's ok
3729d680efe9SMark Fasheh 		 * because the requesting node already has an
3730d680efe9SMark Fasheh 		 * exclusive lock on the inode, so it can't be queued
3731d680efe9SMark Fasheh 		 * for a downconvert.
3732d680efe9SMark Fasheh 		 */
3733d680efe9SMark Fasheh 		d_delete(dentry);
3734d680efe9SMark Fasheh 		dput(dentry);
3735d680efe9SMark Fasheh 
3736d680efe9SMark Fasheh 		spin_lock(&dentry_attach_lock);
3737d680efe9SMark Fasheh 	}
3738d680efe9SMark Fasheh 	spin_unlock(&dentry_attach_lock);
3739d680efe9SMark Fasheh 
3740d680efe9SMark Fasheh 	/*
3741d680efe9SMark Fasheh 	 * If we are the last holder of this dentry lock, there is no
3742d680efe9SMark Fasheh 	 * reason to downconvert so skip straight to the unlock.
3743d680efe9SMark Fasheh 	 */
3744d680efe9SMark Fasheh 	if (dl->dl_count == 1)
3745d680efe9SMark Fasheh 		return UNBLOCK_STOP_POST;
3746d680efe9SMark Fasheh 
3747d680efe9SMark Fasheh 	return UNBLOCK_CONTINUE_POST;
3748d680efe9SMark Fasheh }
3749d680efe9SMark Fasheh 
37508dec98edSTao Ma static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
37518dec98edSTao Ma 					    int new_level)
37528dec98edSTao Ma {
37538dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37548dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37558dec98edSTao Ma 
37568dec98edSTao Ma 	return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
37578dec98edSTao Ma }
37588dec98edSTao Ma 
37598dec98edSTao Ma static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
37608dec98edSTao Ma 					 int blocking)
37618dec98edSTao Ma {
37628dec98edSTao Ma 	struct ocfs2_refcount_tree *tree =
37638dec98edSTao Ma 				ocfs2_lock_res_refcount_tree(lockres);
37648dec98edSTao Ma 
37658dec98edSTao Ma 	ocfs2_metadata_cache_purge(&tree->rf_ci);
37668dec98edSTao Ma 
37678dec98edSTao Ma 	return UNBLOCK_CONTINUE;
37688dec98edSTao Ma }
37698dec98edSTao Ma 
37709e33d69fSJan Kara static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
37719e33d69fSJan Kara {
37729e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb;
37739e33d69fSJan Kara 	struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
37749e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
37759e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
37769e33d69fSJan Kara 
37779e33d69fSJan Kara 	mlog_entry_void();
37789e33d69fSJan Kara 
3779a641dc2aSMark Fasheh 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
37809e33d69fSJan Kara 	lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
37819e33d69fSJan Kara 	lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
37829e33d69fSJan Kara 	lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
37839e33d69fSJan Kara 	lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
37849e33d69fSJan Kara 	lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
37859e33d69fSJan Kara 	lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
37869e33d69fSJan Kara 	lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
37879e33d69fSJan Kara 
37889e33d69fSJan Kara 	mlog_exit_void();
37899e33d69fSJan Kara }
37909e33d69fSJan Kara 
37919e33d69fSJan Kara void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
37929e33d69fSJan Kara {
37939e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
37949e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
37959e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
37969e33d69fSJan Kara 
37979e33d69fSJan Kara 	mlog_entry_void();
37989e33d69fSJan Kara 	if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
37999e33d69fSJan Kara 		ocfs2_cluster_unlock(osb, lockres, level);
38009e33d69fSJan Kara 	mlog_exit_void();
38019e33d69fSJan Kara }
38029e33d69fSJan Kara 
38039e33d69fSJan Kara static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
38049e33d69fSJan Kara {
38059e33d69fSJan Kara 	struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
38069e33d69fSJan Kara 					    oinfo->dqi_gi.dqi_type);
38079e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38089e33d69fSJan Kara 	struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
380985eb8b73SJoel Becker 	struct buffer_head *bh = NULL;
38109e33d69fSJan Kara 	struct ocfs2_global_disk_dqinfo *gdinfo;
38119e33d69fSJan Kara 	int status = 0;
38129e33d69fSJan Kara 
38131c520dfbSJoel Becker 	if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
38141c520dfbSJoel Becker 	    lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
38159e33d69fSJan Kara 		info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
38169e33d69fSJan Kara 		info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
38179e33d69fSJan Kara 		oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
38189e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
38199e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
38209e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38219e33d69fSJan Kara 					be32_to_cpu(lvb->lvb_free_entry);
38229e33d69fSJan Kara 	} else {
382385eb8b73SJoel Becker 		status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
382485eb8b73SJoel Becker 		if (status) {
38259e33d69fSJan Kara 			mlog_errno(status);
38269e33d69fSJan Kara 			goto bail;
38279e33d69fSJan Kara 		}
38289e33d69fSJan Kara 		gdinfo = (struct ocfs2_global_disk_dqinfo *)
38299e33d69fSJan Kara 					(bh->b_data + OCFS2_GLOBAL_INFO_OFF);
38309e33d69fSJan Kara 		info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
38319e33d69fSJan Kara 		info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
38329e33d69fSJan Kara 		oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
38339e33d69fSJan Kara 		oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
38349e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
38359e33d69fSJan Kara 		oinfo->dqi_gi.dqi_free_entry =
38369e33d69fSJan Kara 					le32_to_cpu(gdinfo->dqi_free_entry);
38379e33d69fSJan Kara 		brelse(bh);
38389e33d69fSJan Kara 		ocfs2_track_lock_refresh(lockres);
38399e33d69fSJan Kara 	}
38409e33d69fSJan Kara 
38419e33d69fSJan Kara bail:
38429e33d69fSJan Kara 	return status;
38439e33d69fSJan Kara }
38449e33d69fSJan Kara 
38459e33d69fSJan Kara /* Lock quota info, this function expects at least shared lock on the quota file
38469e33d69fSJan Kara  * so that we can safely refresh quota info from disk. */
38479e33d69fSJan Kara int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
38489e33d69fSJan Kara {
38499e33d69fSJan Kara 	struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
38509e33d69fSJan Kara 	struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
38519e33d69fSJan Kara 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38529e33d69fSJan Kara 	int status = 0;
38539e33d69fSJan Kara 
38549e33d69fSJan Kara 	mlog_entry_void();
38559e33d69fSJan Kara 
38569e33d69fSJan Kara 	/* On RO devices, locking really isn't needed... */
38579e33d69fSJan Kara 	if (ocfs2_is_hard_readonly(osb)) {
38589e33d69fSJan Kara 		if (ex)
38599e33d69fSJan Kara 			status = -EROFS;
38609e33d69fSJan Kara 		goto bail;
38619e33d69fSJan Kara 	}
38629e33d69fSJan Kara 	if (ocfs2_mount_local(osb))
38639e33d69fSJan Kara 		goto bail;
38649e33d69fSJan Kara 
38659e33d69fSJan Kara 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38669e33d69fSJan Kara 	if (status < 0) {
38679e33d69fSJan Kara 		mlog_errno(status);
38689e33d69fSJan Kara 		goto bail;
38699e33d69fSJan Kara 	}
38709e33d69fSJan Kara 	if (!ocfs2_should_refresh_lock_res(lockres))
38719e33d69fSJan Kara 		goto bail;
38729e33d69fSJan Kara 	/* OK, we have the lock but we need to refresh the quota info */
38739e33d69fSJan Kara 	status = ocfs2_refresh_qinfo(oinfo);
38749e33d69fSJan Kara 	if (status)
38759e33d69fSJan Kara 		ocfs2_qinfo_unlock(oinfo, ex);
38769e33d69fSJan Kara 	ocfs2_complete_lock_res_refresh(lockres, status);
38779e33d69fSJan Kara bail:
38789e33d69fSJan Kara 	mlog_exit(status);
38799e33d69fSJan Kara 	return status;
38809e33d69fSJan Kara }
38819e33d69fSJan Kara 
38828dec98edSTao Ma int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
38838dec98edSTao Ma {
38848dec98edSTao Ma 	int status;
38858dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38868dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
38878dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
38888dec98edSTao Ma 
38898dec98edSTao Ma 
38908dec98edSTao Ma 	if (ocfs2_is_hard_readonly(osb))
38918dec98edSTao Ma 		return -EROFS;
38928dec98edSTao Ma 
38938dec98edSTao Ma 	if (ocfs2_mount_local(osb))
38948dec98edSTao Ma 		return 0;
38958dec98edSTao Ma 
38968dec98edSTao Ma 	status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
38978dec98edSTao Ma 	if (status < 0)
38988dec98edSTao Ma 		mlog_errno(status);
38998dec98edSTao Ma 
39008dec98edSTao Ma 	return status;
39018dec98edSTao Ma }
39028dec98edSTao Ma 
39038dec98edSTao Ma void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
39048dec98edSTao Ma {
39058dec98edSTao Ma 	int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
39068dec98edSTao Ma 	struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
39078dec98edSTao Ma 	struct ocfs2_super *osb = lockres->l_priv;
39088dec98edSTao Ma 
39098dec98edSTao Ma 	if (!ocfs2_mount_local(osb))
39108dec98edSTao Ma 		ocfs2_cluster_unlock(osb, lockres, level);
39118dec98edSTao Ma }
39128dec98edSTao Ma 
39134670c46dSJoel Becker /*
39144670c46dSJoel Becker  * This is the filesystem locking protocol.  It provides the lock handling
39154670c46dSJoel Becker  * hooks for the underlying DLM.  It has a maximum version number.
39164670c46dSJoel Becker  * The version number allows interoperability with systems running at
39174670c46dSJoel Becker  * the same major number and an equal or smaller minor number.
39184670c46dSJoel Becker  *
39194670c46dSJoel Becker  * Whenever the filesystem does new things with locks (adds or removes a
39204670c46dSJoel Becker  * lock, orders them differently, does different things underneath a lock),
39214670c46dSJoel Becker  * the version must be changed.  The protocol is negotiated when joining
39224670c46dSJoel Becker  * the dlm domain.  A node may join the domain if its major version is
39234670c46dSJoel Becker  * identical to all other nodes and its minor version is greater than
39244670c46dSJoel Becker  * or equal to all other nodes.  When its minor version is greater than
39254670c46dSJoel Becker  * the other nodes, it will run at the minor version specified by the
39264670c46dSJoel Becker  * other nodes.
39274670c46dSJoel Becker  *
39284670c46dSJoel Becker  * If a locking change is made that will not be compatible with older
39294670c46dSJoel Becker  * versions, the major number must be increased and the minor version set
39304670c46dSJoel Becker  * to zero.  If a change merely adds a behavior that can be disabled when
39314670c46dSJoel Becker  * speaking to older versions, the minor version must be increased.  If a
39324670c46dSJoel Becker  * change adds a fully backwards compatible change (eg, LVB changes that
39334670c46dSJoel Becker  * are just ignored by older versions), the version does not need to be
39344670c46dSJoel Becker  * updated.
39354670c46dSJoel Becker  */
393624ef1815SJoel Becker static struct ocfs2_locking_protocol lproto = {
39374670c46dSJoel Becker 	.lp_max_version = {
39384670c46dSJoel Becker 		.pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
39394670c46dSJoel Becker 		.pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
39404670c46dSJoel Becker 	},
394124ef1815SJoel Becker 	.lp_lock_ast		= ocfs2_locking_ast,
394224ef1815SJoel Becker 	.lp_blocking_ast	= ocfs2_blocking_ast,
394324ef1815SJoel Becker 	.lp_unlock_ast		= ocfs2_unlock_ast,
394424ef1815SJoel Becker };
394524ef1815SJoel Becker 
394663e0c48aSJoel Becker void ocfs2_set_locking_protocol(void)
394724ef1815SJoel Becker {
394863e0c48aSJoel Becker 	ocfs2_stack_glue_set_locking_protocol(&lproto);
394924ef1815SJoel Becker }
395024ef1815SJoel Becker 
395124ef1815SJoel Becker 
395200600056SAdrian Bunk static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3953ccd979bdSMark Fasheh 				       struct ocfs2_lock_res *lockres)
3954ccd979bdSMark Fasheh {
3955ccd979bdSMark Fasheh 	int status;
3956d680efe9SMark Fasheh 	struct ocfs2_unblock_ctl ctl = {0, 0,};
3957ccd979bdSMark Fasheh 	unsigned long flags;
3958ccd979bdSMark Fasheh 
3959ccd979bdSMark Fasheh 	/* Our reference to the lockres in this function can be
3960ccd979bdSMark Fasheh 	 * considered valid until we remove the OCFS2_LOCK_QUEUED
3961ccd979bdSMark Fasheh 	 * flag. */
3962ccd979bdSMark Fasheh 
3963ccd979bdSMark Fasheh 	mlog_entry_void();
3964ccd979bdSMark Fasheh 
3965ccd979bdSMark Fasheh 	BUG_ON(!lockres);
3966ccd979bdSMark Fasheh 	BUG_ON(!lockres->l_ops);
3967ccd979bdSMark Fasheh 
3968ccd979bdSMark Fasheh 	mlog(0, "lockres %s blocked.\n", lockres->l_name);
3969ccd979bdSMark Fasheh 
3970ccd979bdSMark Fasheh 	/* Detect whether a lock has been marked as going away while
397134d024f8SMark Fasheh 	 * the downconvert thread was processing other things. A lock can
3972ccd979bdSMark Fasheh 	 * still be marked with OCFS2_LOCK_FREEING after this check,
3973ccd979bdSMark Fasheh 	 * but short circuiting here will still save us some
3974ccd979bdSMark Fasheh 	 * performance. */
3975ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3976ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING)
3977ccd979bdSMark Fasheh 		goto unqueue;
3978ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3979ccd979bdSMark Fasheh 
3980b5e500e2SMark Fasheh 	status = ocfs2_unblock_lock(osb, lockres, &ctl);
3981ccd979bdSMark Fasheh 	if (status < 0)
3982ccd979bdSMark Fasheh 		mlog_errno(status);
3983ccd979bdSMark Fasheh 
3984ccd979bdSMark Fasheh 	spin_lock_irqsave(&lockres->l_lock, flags);
3985ccd979bdSMark Fasheh unqueue:
3986d680efe9SMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
3987ccd979bdSMark Fasheh 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3988ccd979bdSMark Fasheh 	} else
3989ccd979bdSMark Fasheh 		ocfs2_schedule_blocked_lock(osb, lockres);
3990ccd979bdSMark Fasheh 
3991ccd979bdSMark Fasheh 	mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
3992d680efe9SMark Fasheh 	     ctl.requeue ? "yes" : "no");
3993ccd979bdSMark Fasheh 	spin_unlock_irqrestore(&lockres->l_lock, flags);
3994ccd979bdSMark Fasheh 
3995d680efe9SMark Fasheh 	if (ctl.unblock_action != UNBLOCK_CONTINUE
3996d680efe9SMark Fasheh 	    && lockres->l_ops->post_unlock)
3997d680efe9SMark Fasheh 		lockres->l_ops->post_unlock(osb, lockres);
3998d680efe9SMark Fasheh 
3999ccd979bdSMark Fasheh 	mlog_exit_void();
4000ccd979bdSMark Fasheh }
4001ccd979bdSMark Fasheh 
4002ccd979bdSMark Fasheh static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
4003ccd979bdSMark Fasheh 					struct ocfs2_lock_res *lockres)
4004ccd979bdSMark Fasheh {
4005ccd979bdSMark Fasheh 	mlog_entry_void();
4006ccd979bdSMark Fasheh 
4007ccd979bdSMark Fasheh 	assert_spin_locked(&lockres->l_lock);
4008ccd979bdSMark Fasheh 
4009ccd979bdSMark Fasheh 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
4010ccd979bdSMark Fasheh 		/* Do not schedule a lock for downconvert when it's on
4011ccd979bdSMark Fasheh 		 * the way to destruction - any nodes wanting access
4012ccd979bdSMark Fasheh 		 * to the resource will get it soon. */
4013ccd979bdSMark Fasheh 		mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",
4014ccd979bdSMark Fasheh 		     lockres->l_name, lockres->l_flags);
4015ccd979bdSMark Fasheh 		return;
4016ccd979bdSMark Fasheh 	}
4017ccd979bdSMark Fasheh 
4018ccd979bdSMark Fasheh 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
4019ccd979bdSMark Fasheh 
402034d024f8SMark Fasheh 	spin_lock(&osb->dc_task_lock);
4021ccd979bdSMark Fasheh 	if (list_empty(&lockres->l_blocked_list)) {
4022ccd979bdSMark Fasheh 		list_add_tail(&lockres->l_blocked_list,
4023ccd979bdSMark Fasheh 			      &osb->blocked_lock_list);
4024ccd979bdSMark Fasheh 		osb->blocked_lock_count++;
4025ccd979bdSMark Fasheh 	}
402634d024f8SMark Fasheh 	spin_unlock(&osb->dc_task_lock);
4027ccd979bdSMark Fasheh 
4028ccd979bdSMark Fasheh 	mlog_exit_void();
4029ccd979bdSMark Fasheh }
403034d024f8SMark Fasheh 
403134d024f8SMark Fasheh static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
403234d024f8SMark Fasheh {
403334d024f8SMark Fasheh 	unsigned long processed;
403434d024f8SMark Fasheh 	struct ocfs2_lock_res *lockres;
403534d024f8SMark Fasheh 
403634d024f8SMark Fasheh 	mlog_entry_void();
403734d024f8SMark Fasheh 
403834d024f8SMark Fasheh 	spin_lock(&osb->dc_task_lock);
403934d024f8SMark Fasheh 	/* grab this early so we know to try again if a state change and
404034d024f8SMark Fasheh 	 * wake happens part-way through our work  */
404134d024f8SMark Fasheh 	osb->dc_work_sequence = osb->dc_wake_sequence;
404234d024f8SMark Fasheh 
404334d024f8SMark Fasheh 	processed = osb->blocked_lock_count;
404434d024f8SMark Fasheh 	while (processed) {
404534d024f8SMark Fasheh 		BUG_ON(list_empty(&osb->blocked_lock_list));
404634d024f8SMark Fasheh 
404734d024f8SMark Fasheh 		lockres = list_entry(osb->blocked_lock_list.next,
404834d024f8SMark Fasheh 				     struct ocfs2_lock_res, l_blocked_list);
404934d024f8SMark Fasheh 		list_del_init(&lockres->l_blocked_list);
405034d024f8SMark Fasheh 		osb->blocked_lock_count--;
405134d024f8SMark Fasheh 		spin_unlock(&osb->dc_task_lock);
405234d024f8SMark Fasheh 
405334d024f8SMark Fasheh 		BUG_ON(!processed);
405434d024f8SMark Fasheh 		processed--;
405534d024f8SMark Fasheh 
405634d024f8SMark Fasheh 		ocfs2_process_blocked_lock(osb, lockres);
405734d024f8SMark Fasheh 
405834d024f8SMark Fasheh 		spin_lock(&osb->dc_task_lock);
405934d024f8SMark Fasheh 	}
406034d024f8SMark Fasheh 	spin_unlock(&osb->dc_task_lock);
406134d024f8SMark Fasheh 
406234d024f8SMark Fasheh 	mlog_exit_void();
406334d024f8SMark Fasheh }
406434d024f8SMark Fasheh 
406534d024f8SMark Fasheh static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
406634d024f8SMark Fasheh {
406734d024f8SMark Fasheh 	int empty = 0;
406834d024f8SMark Fasheh 
406934d024f8SMark Fasheh 	spin_lock(&osb->dc_task_lock);
407034d024f8SMark Fasheh 	if (list_empty(&osb->blocked_lock_list))
407134d024f8SMark Fasheh 		empty = 1;
407234d024f8SMark Fasheh 
407334d024f8SMark Fasheh 	spin_unlock(&osb->dc_task_lock);
407434d024f8SMark Fasheh 	return empty;
407534d024f8SMark Fasheh }
407634d024f8SMark Fasheh 
407734d024f8SMark Fasheh static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
407834d024f8SMark Fasheh {
407934d024f8SMark Fasheh 	int should_wake = 0;
408034d024f8SMark Fasheh 
408134d024f8SMark Fasheh 	spin_lock(&osb->dc_task_lock);
408234d024f8SMark Fasheh 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
408334d024f8SMark Fasheh 		should_wake = 1;
408434d024f8SMark Fasheh 	spin_unlock(&osb->dc_task_lock);
408534d024f8SMark Fasheh 
408634d024f8SMark Fasheh 	return should_wake;
408734d024f8SMark Fasheh }
408834d024f8SMark Fasheh 
4089200bfae3SAdrian Bunk static int ocfs2_downconvert_thread(void *arg)
409034d024f8SMark Fasheh {
409134d024f8SMark Fasheh 	int status = 0;
409234d024f8SMark Fasheh 	struct ocfs2_super *osb = arg;
409334d024f8SMark Fasheh 
409434d024f8SMark Fasheh 	/* only quit once we've been asked to stop and there is no more
409534d024f8SMark Fasheh 	 * work available */
409634d024f8SMark Fasheh 	while (!(kthread_should_stop() &&
409734d024f8SMark Fasheh 		ocfs2_downconvert_thread_lists_empty(osb))) {
409834d024f8SMark Fasheh 
409934d024f8SMark Fasheh 		wait_event_interruptible(osb->dc_event,
410034d024f8SMark Fasheh 					 ocfs2_downconvert_thread_should_wake(osb) ||
410134d024f8SMark Fasheh 					 kthread_should_stop());
410234d024f8SMark Fasheh 
410334d024f8SMark Fasheh 		mlog(0, "downconvert_thread: awoken\n");
410434d024f8SMark Fasheh 
410534d024f8SMark Fasheh 		ocfs2_downconvert_thread_do_work(osb);
410634d024f8SMark Fasheh 	}
410734d024f8SMark Fasheh 
410834d024f8SMark Fasheh 	osb->dc_task = NULL;
410934d024f8SMark Fasheh 	return status;
411034d024f8SMark Fasheh }
411134d024f8SMark Fasheh 
411234d024f8SMark Fasheh void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
411334d024f8SMark Fasheh {
411434d024f8SMark Fasheh 	spin_lock(&osb->dc_task_lock);
411534d024f8SMark Fasheh 	/* make sure the voting thread gets a swipe at whatever changes
411634d024f8SMark Fasheh 	 * the caller may have made to the voting state */
411734d024f8SMark Fasheh 	osb->dc_wake_sequence++;
411834d024f8SMark Fasheh 	spin_unlock(&osb->dc_task_lock);
411934d024f8SMark Fasheh 	wake_up(&osb->dc_event);
412034d024f8SMark Fasheh }
4121