xref: /openbmc/linux/fs/nfs/pnfs.c (revision 55e43d6abd078ed6d219902ce8cb4d68e3c993ba)
185e174baSRicardo Labiaga /*
285e174baSRicardo Labiaga  *  pNFS functions to call and manage layout drivers.
385e174baSRicardo Labiaga  *
485e174baSRicardo Labiaga  *  Copyright (c) 2002 [year of first publication]
585e174baSRicardo Labiaga  *  The Regents of the University of Michigan
685e174baSRicardo Labiaga  *  All Rights Reserved
785e174baSRicardo Labiaga  *
885e174baSRicardo Labiaga  *  Dean Hildebrand <dhildebz@umich.edu>
985e174baSRicardo Labiaga  *
1085e174baSRicardo Labiaga  *  Permission is granted to use, copy, create derivative works, and
1185e174baSRicardo Labiaga  *  redistribute this software and such derivative works for any purpose,
1285e174baSRicardo Labiaga  *  so long as the name of the University of Michigan is not used in
1385e174baSRicardo Labiaga  *  any advertising or publicity pertaining to the use or distribution
1485e174baSRicardo Labiaga  *  of this software without specific, written prior authorization. If
1585e174baSRicardo Labiaga  *  the above copyright notice or any other identification of the
1685e174baSRicardo Labiaga  *  University of Michigan is included in any copy of any portion of
1785e174baSRicardo Labiaga  *  this software, then the disclaimer below must also be included.
1885e174baSRicardo Labiaga  *
1985e174baSRicardo Labiaga  *  This software is provided as is, without representation or warranty
2085e174baSRicardo Labiaga  *  of any kind either express or implied, including without limitation
2185e174baSRicardo Labiaga  *  the implied warranties of merchantability, fitness for a particular
2285e174baSRicardo Labiaga  *  purpose, or noninfringement.  The Regents of the University of
2385e174baSRicardo Labiaga  *  Michigan shall not be liable for any damages, including special,
2485e174baSRicardo Labiaga  *  indirect, incidental, or consequential damages, with respect to any
2585e174baSRicardo Labiaga  *  claim arising out of or in connection with the use of the software,
2685e174baSRicardo Labiaga  *  even if it has been or is hereafter advised of the possibility of
2785e174baSRicardo Labiaga  *  such damages.
2885e174baSRicardo Labiaga  */
2985e174baSRicardo Labiaga 
3085e174baSRicardo Labiaga #include <linux/nfs_fs.h>
31493292ddSTrond Myklebust #include <linux/nfs_page.h>
32143cb494SPaul Gortmaker #include <linux/module.h>
33ca440c38SJeff Layton #include <linux/sort.h>
34974cec8cSAndy Adamson #include "internal.h"
3585e174baSRicardo Labiaga #include "pnfs.h"
3664419a9bSAndy Adamson #include "iostat.h"
37cc668ab3STrond Myklebust #include "nfs4trace.h"
3840dd4b7aSTrond Myklebust #include "delegation.h"
398733408dSPeng Tao #include "nfs42.h"
401b146fcfSFred Isaman #include "nfs4_fs.h"
4185e174baSRicardo Labiaga 
4285e174baSRicardo Labiaga #define NFSDBG_FACILITY		NFSDBG_PNFS
4325c75333STrond Myklebust #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
4485e174baSRicardo Labiaga 
4502c35fcaSFred Isaman /* Locking:
4602c35fcaSFred Isaman  *
4702c35fcaSFred Isaman  * pnfs_spinlock:
4802c35fcaSFred Isaman  *      protects pnfs_modules_tbl.
4902c35fcaSFred Isaman  */
5002c35fcaSFred Isaman static DEFINE_SPINLOCK(pnfs_spinlock);
5102c35fcaSFred Isaman 
5202c35fcaSFred Isaman /*
5302c35fcaSFred Isaman  * pnfs_modules_tbl holds all pnfs modules
5402c35fcaSFred Isaman  */
5502c35fcaSFred Isaman static LIST_HEAD(pnfs_modules_tbl);
5602c35fcaSFred Isaman 
5713c13a6aSTrond Myklebust static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
5868f74479STrond Myklebust static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
5968f74479STrond Myklebust 		struct list_head *free_me,
6068f74479STrond Myklebust 		const struct pnfs_layout_range *range,
6168f74479STrond Myklebust 		u32 seq);
62fe1cf946STrond Myklebust static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
63fe1cf946STrond Myklebust 		                struct list_head *tmp_list);
64aa1e0e3aSPeng Tao 
6502c35fcaSFred Isaman /* Return the registered pnfs layout driver module matching given id */
6602c35fcaSFred Isaman static struct pnfs_layoutdriver_type *
find_pnfs_driver_locked(u32 id)6702c35fcaSFred Isaman find_pnfs_driver_locked(u32 id)
6802c35fcaSFred Isaman {
6902c35fcaSFred Isaman 	struct pnfs_layoutdriver_type *local;
7002c35fcaSFred Isaman 
7102c35fcaSFred Isaman 	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
7202c35fcaSFred Isaman 		if (local->id == id)
7302c35fcaSFred Isaman 			goto out;
7402c35fcaSFred Isaman 	local = NULL;
7502c35fcaSFred Isaman out:
7602c35fcaSFred Isaman 	dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
7702c35fcaSFred Isaman 	return local;
7802c35fcaSFred Isaman }
7902c35fcaSFred Isaman 
8085e174baSRicardo Labiaga static struct pnfs_layoutdriver_type *
find_pnfs_driver(u32 id)8185e174baSRicardo Labiaga find_pnfs_driver(u32 id)
8285e174baSRicardo Labiaga {
8302c35fcaSFred Isaman 	struct pnfs_layoutdriver_type *local;
8402c35fcaSFred Isaman 
8502c35fcaSFred Isaman 	spin_lock(&pnfs_spinlock);
8602c35fcaSFred Isaman 	local = find_pnfs_driver_locked(id);
870a9c63faSTrond Myklebust 	if (local != NULL && !try_module_get(local->owner)) {
880a9c63faSTrond Myklebust 		dprintk("%s: Could not grab reference on module\n", __func__);
890a9c63faSTrond Myklebust 		local = NULL;
900a9c63faSTrond Myklebust 	}
9102c35fcaSFred Isaman 	spin_unlock(&pnfs_spinlock);
9202c35fcaSFred Isaman 	return local;
9385e174baSRicardo Labiaga }
9485e174baSRicardo Labiaga 
pnfs_find_layoutdriver(u32 id)957c9d845fSTrond Myklebust const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id)
967c9d845fSTrond Myklebust {
977c9d845fSTrond Myklebust 	return find_pnfs_driver(id);
987c9d845fSTrond Myklebust }
997c9d845fSTrond Myklebust 
pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type * ld)1007c9d845fSTrond Myklebust void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld)
1017c9d845fSTrond Myklebust {
1027c9d845fSTrond Myklebust 	if (ld)
1037c9d845fSTrond Myklebust 		module_put(ld->owner);
1047c9d845fSTrond Myklebust }
1057c9d845fSTrond Myklebust 
10685e174baSRicardo Labiaga void
unset_pnfs_layoutdriver(struct nfs_server * nfss)10785e174baSRicardo Labiaga unset_pnfs_layoutdriver(struct nfs_server *nfss)
10885e174baSRicardo Labiaga {
109738fd0f3SBenny Halevy 	if (nfss->pnfs_curr_ld) {
110738fd0f3SBenny Halevy 		if (nfss->pnfs_curr_ld->clear_layoutdriver)
111738fd0f3SBenny Halevy 			nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
1122a4c8994STrond Myklebust 		/* Decrement the MDS count. Purge the deviceid cache if zero */
1132a4c8994STrond Myklebust 		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
1142a4c8994STrond Myklebust 			nfs4_deviceid_purge_client(nfss->nfs_client);
11502c35fcaSFred Isaman 		module_put(nfss->pnfs_curr_ld->owner);
116738fd0f3SBenny Halevy 	}
11785e174baSRicardo Labiaga 	nfss->pnfs_curr_ld = NULL;
11885e174baSRicardo Labiaga }
11985e174baSRicardo Labiaga 
12085e174baSRicardo Labiaga /*
121ca440c38SJeff Layton  * When the server sends a list of layout types, we choose one in the order
122ca440c38SJeff Layton  * given in the list below.
123ca440c38SJeff Layton  *
124ca440c38SJeff Layton  * FIXME: should this list be configurable in some fashion? module param?
125ca440c38SJeff Layton  * 	  mount option? something else?
126ca440c38SJeff Layton  */
127ca440c38SJeff Layton static const u32 ld_prefs[] = {
128ca440c38SJeff Layton 	LAYOUT_SCSI,
129ca440c38SJeff Layton 	LAYOUT_BLOCK_VOLUME,
130ca440c38SJeff Layton 	LAYOUT_OSD2_OBJECTS,
131ca440c38SJeff Layton 	LAYOUT_FLEX_FILES,
132ca440c38SJeff Layton 	LAYOUT_NFSV4_1_FILES,
133ca440c38SJeff Layton 	0
134ca440c38SJeff Layton };
135ca440c38SJeff Layton 
136ca440c38SJeff Layton static int
ld_cmp(const void * e1,const void * e2)137ca440c38SJeff Layton ld_cmp(const void *e1, const void *e2)
138ca440c38SJeff Layton {
139ca440c38SJeff Layton 	u32 ld1 = *((u32 *)e1);
140ca440c38SJeff Layton 	u32 ld2 = *((u32 *)e2);
141ca440c38SJeff Layton 	int i;
142ca440c38SJeff Layton 
143ca440c38SJeff Layton 	for (i = 0; ld_prefs[i] != 0; i++) {
144ca440c38SJeff Layton 		if (ld1 == ld_prefs[i])
145ca440c38SJeff Layton 			return -1;
146ca440c38SJeff Layton 
147ca440c38SJeff Layton 		if (ld2 == ld_prefs[i])
148ca440c38SJeff Layton 			return 1;
149ca440c38SJeff Layton 	}
150ca440c38SJeff Layton 	return 0;
151ca440c38SJeff Layton }
152ca440c38SJeff Layton 
153ca440c38SJeff Layton /*
15485e174baSRicardo Labiaga  * Try to set the server's pnfs module to the pnfs layout type specified by id.
15585e174baSRicardo Labiaga  * Currently only one pNFS layout driver per filesystem is supported.
15685e174baSRicardo Labiaga  *
1573132e49eSJeff Layton  * @ids array of layout types supported by MDS.
15885e174baSRicardo Labiaga  */
15985e174baSRicardo Labiaga void
set_pnfs_layoutdriver(struct nfs_server * server,const struct nfs_fh * mntfh,struct nfs_fsinfo * fsinfo)160738fd0f3SBenny Halevy set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
161ca440c38SJeff Layton 		      struct nfs_fsinfo *fsinfo)
16285e174baSRicardo Labiaga {
16385e174baSRicardo Labiaga 	struct pnfs_layoutdriver_type *ld_type = NULL;
1643132e49eSJeff Layton 	u32 id;
165ca440c38SJeff Layton 	int i;
16685e174baSRicardo Labiaga 
16719274716SAnna Schumaker 	if (fsinfo->nlayouttypes == 0)
16819274716SAnna Schumaker 		goto out_no_driver;
16985e174baSRicardo Labiaga 	if (!(server->nfs_client->cl_exchange_flags &
17085e174baSRicardo Labiaga 		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
1713132e49eSJeff Layton 		printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
1723132e49eSJeff Layton 			__func__, server->nfs_client->cl_exchange_flags);
17385e174baSRicardo Labiaga 		goto out_no_driver;
17485e174baSRicardo Labiaga 	}
1753132e49eSJeff Layton 
176ca440c38SJeff Layton 	sort(fsinfo->layouttype, fsinfo->nlayouttypes,
177ca440c38SJeff Layton 		sizeof(*fsinfo->layouttype), ld_cmp, NULL);
1783132e49eSJeff Layton 
179ca440c38SJeff Layton 	for (i = 0; i < fsinfo->nlayouttypes; i++) {
180ca440c38SJeff Layton 		id = fsinfo->layouttype[i];
18185e174baSRicardo Labiaga 		ld_type = find_pnfs_driver(id);
18285e174baSRicardo Labiaga 		if (!ld_type) {
183ca440c38SJeff Layton 			request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
184ca440c38SJeff Layton 					id);
18585e174baSRicardo Labiaga 			ld_type = find_pnfs_driver(id);
1863132e49eSJeff Layton 		}
187ca440c38SJeff Layton 		if (ld_type)
188ca440c38SJeff Layton 			break;
189ca440c38SJeff Layton 	}
1903132e49eSJeff Layton 
19185e174baSRicardo Labiaga 	if (!ld_type) {
192ca440c38SJeff Layton 		dprintk("%s: No pNFS module found!\n", __func__);
19385e174baSRicardo Labiaga 		goto out_no_driver;
19485e174baSRicardo Labiaga 	}
1953132e49eSJeff Layton 
19685e174baSRicardo Labiaga 	server->pnfs_curr_ld = ld_type;
197738fd0f3SBenny Halevy 	if (ld_type->set_layoutdriver
198738fd0f3SBenny Halevy 	    && ld_type->set_layoutdriver(server, mntfh)) {
199a030889aSWeston Andros Adamson 		printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
200a030889aSWeston Andros Adamson 			"driver %u.\n", __func__, id);
201738fd0f3SBenny Halevy 		module_put(ld_type->owner);
202738fd0f3SBenny Halevy 		goto out_no_driver;
203738fd0f3SBenny Halevy 	}
2042a4c8994STrond Myklebust 	/* Bump the MDS count */
2052a4c8994STrond Myklebust 	atomic_inc(&server->nfs_client->cl_mds_count);
206ea8eecddSChristoph Hellwig 
20785e174baSRicardo Labiaga 	dprintk("%s: pNFS module for %u set\n", __func__, id);
20885e174baSRicardo Labiaga 	return;
20985e174baSRicardo Labiaga 
21085e174baSRicardo Labiaga out_no_driver:
21185e174baSRicardo Labiaga 	dprintk("%s: Using NFSv4 I/O\n", __func__);
21285e174baSRicardo Labiaga 	server->pnfs_curr_ld = NULL;
21385e174baSRicardo Labiaga }
21402c35fcaSFred Isaman 
21502c35fcaSFred Isaman int
pnfs_register_layoutdriver(struct pnfs_layoutdriver_type * ld_type)21602c35fcaSFred Isaman pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
21702c35fcaSFred Isaman {
21802c35fcaSFred Isaman 	int status = -EINVAL;
21902c35fcaSFred Isaman 	struct pnfs_layoutdriver_type *tmp;
22002c35fcaSFred Isaman 
22102c35fcaSFred Isaman 	if (ld_type->id == 0) {
222a030889aSWeston Andros Adamson 		printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
22302c35fcaSFred Isaman 		return status;
22402c35fcaSFred Isaman 	}
225b1f69b75SAndy Adamson 	if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
226a030889aSWeston Andros Adamson 		printk(KERN_ERR "NFS: %s Layout driver must provide "
227b1f69b75SAndy Adamson 		       "alloc_lseg and free_lseg.\n", __func__);
228b1f69b75SAndy Adamson 		return status;
229b1f69b75SAndy Adamson 	}
23002c35fcaSFred Isaman 
23102c35fcaSFred Isaman 	spin_lock(&pnfs_spinlock);
23202c35fcaSFred Isaman 	tmp = find_pnfs_driver_locked(ld_type->id);
23302c35fcaSFred Isaman 	if (!tmp) {
23402c35fcaSFred Isaman 		list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
23502c35fcaSFred Isaman 		status = 0;
23602c35fcaSFred Isaman 		dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
23702c35fcaSFred Isaman 			ld_type->name);
23802c35fcaSFred Isaman 	} else {
239a030889aSWeston Andros Adamson 		printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
24002c35fcaSFred Isaman 			__func__, ld_type->id);
24102c35fcaSFred Isaman 	}
24202c35fcaSFred Isaman 	spin_unlock(&pnfs_spinlock);
24302c35fcaSFred Isaman 
24402c35fcaSFred Isaman 	return status;
24502c35fcaSFred Isaman }
24602c35fcaSFred Isaman EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
24702c35fcaSFred Isaman 
24802c35fcaSFred Isaman void
pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type * ld_type)24902c35fcaSFred Isaman pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
25002c35fcaSFred Isaman {
25102c35fcaSFred Isaman 	dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
25202c35fcaSFred Isaman 	spin_lock(&pnfs_spinlock);
25302c35fcaSFred Isaman 	list_del(&ld_type->pnfs_tblid);
25402c35fcaSFred Isaman 	spin_unlock(&pnfs_spinlock);
25502c35fcaSFred Isaman }
25602c35fcaSFred Isaman EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
257e5e94017SBenny Halevy 
258b1f69b75SAndy Adamson /*
259b1f69b75SAndy Adamson  * pNFS client layout cache
260b1f69b75SAndy Adamson  */
261b1f69b75SAndy Adamson 
262cc6e5340SFred Isaman /* Need to hold i_lock if caller does not already hold reference */
26343f1b3daSFred Isaman void
pnfs_get_layout_hdr(struct pnfs_layout_hdr * lo)26470c3bd2bSTrond Myklebust pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
265e5e94017SBenny Halevy {
2662b28a7beSElena Reshetova 	refcount_inc(&lo->plh_refcount);
267e5e94017SBenny Halevy }
268e5e94017SBenny Halevy 
269636fb9c8SBenny Halevy static struct pnfs_layout_hdr *
pnfs_alloc_layout_hdr(struct inode * ino,gfp_t gfp_flags)270636fb9c8SBenny Halevy pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
271636fb9c8SBenny Halevy {
272636fb9c8SBenny Halevy 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
27357934278STrond Myklebust 	return ld->alloc_layout_hdr(ino, gfp_flags);
274636fb9c8SBenny Halevy }
275636fb9c8SBenny Halevy 
276636fb9c8SBenny Halevy static void
pnfs_free_layout_hdr(struct pnfs_layout_hdr * lo)277636fb9c8SBenny Halevy pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
278636fb9c8SBenny Halevy {
2799c626381STrond Myklebust 	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
2809c626381STrond Myklebust 	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
2819c626381STrond Myklebust 
282cf6605d1STrond Myklebust 	if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
2839c626381STrond Myklebust 		struct nfs_client *clp = server->nfs_client;
2849c626381STrond Myklebust 
2859c626381STrond Myklebust 		spin_lock(&clp->cl_lock);
286cf6605d1STrond Myklebust 		list_del_rcu(&lo->plh_layouts);
2879c626381STrond Myklebust 		spin_unlock(&clp->cl_lock);
2889c626381STrond Myklebust 	}
289a52458b4SNeilBrown 	put_cred(lo->plh_lc_cred);
29057934278STrond Myklebust 	return ld->free_layout_hdr(lo);
291636fb9c8SBenny Halevy }
292636fb9c8SBenny Halevy 
293e5e94017SBenny Halevy static void
pnfs_detach_layout_hdr(struct pnfs_layout_hdr * lo)2946622c3eaSTrond Myklebust pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
295e5e94017SBenny Halevy {
296bb346f63STrond Myklebust 	struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
297e5e94017SBenny Halevy 	dprintk("%s: freeing layout cache %p\n", __func__, lo);
298bb346f63STrond Myklebust 	nfsi->layout = NULL;
299bb346f63STrond Myklebust 	/* Reset MDS Threshold I/O counters */
300bb346f63STrond Myklebust 	nfsi->write_io = 0;
301bb346f63STrond Myklebust 	nfsi->read_io = 0;
302e5e94017SBenny Halevy }
303e5e94017SBenny Halevy 
304b1f69b75SAndy Adamson void
pnfs_put_layout_hdr(struct pnfs_layout_hdr * lo)30570c3bd2bSTrond Myklebust pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
306974cec8cSAndy Adamson {
3079c6376ebSTrond Myklebust 	struct inode *inode;
308b6d49ecdSTrond Myklebust 	unsigned long i_state;
309cc6e5340SFred Isaman 
3109c6376ebSTrond Myklebust 	if (!lo)
3119c6376ebSTrond Myklebust 		return;
3129c6376ebSTrond Myklebust 	inode = lo->plh_inode;
31313c13a6aSTrond Myklebust 	pnfs_layoutreturn_before_put_layout_hdr(lo);
31413c13a6aSTrond Myklebust 
3152b28a7beSElena Reshetova 	if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
316566f8737SPeng Tao 		if (!list_empty(&lo->plh_segs))
317566f8737SPeng Tao 			WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
3186622c3eaSTrond Myklebust 		pnfs_detach_layout_hdr(lo);
319b6d49ecdSTrond Myklebust 		i_state = inode->i_state;
320974cec8cSAndy Adamson 		spin_unlock(&inode->i_lock);
3216622c3eaSTrond Myklebust 		pnfs_free_layout_hdr(lo);
322b6d49ecdSTrond Myklebust 		/* Notify pnfs_destroy_layout_final() that we're done */
323b6d49ecdSTrond Myklebust 		if (i_state & (I_FREEING | I_CLEAR))
324b6d49ecdSTrond Myklebust 			wake_up_var(lo);
325974cec8cSAndy Adamson 	}
326cc6e5340SFred Isaman }
327974cec8cSAndy Adamson 
328b5fdf841STrond Myklebust static struct inode *
pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr * lo)329b5fdf841STrond Myklebust pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
330b5fdf841STrond Myklebust {
331b5fdf841STrond Myklebust 	struct inode *inode = igrab(lo->plh_inode);
332b5fdf841STrond Myklebust 	if (inode)
333b5fdf841STrond Myklebust 		return inode;
334b5fdf841STrond Myklebust 	set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
335b5fdf841STrond Myklebust 	return NULL;
336b5fdf841STrond Myklebust }
337b5fdf841STrond Myklebust 
3381bcf34fdSTrond Myklebust /*
3391bcf34fdSTrond Myklebust  * Compare 2 layout stateid sequence ids, to see which is newer,
3401bcf34fdSTrond Myklebust  * taking into account wraparound issues.
3411bcf34fdSTrond Myklebust  */
pnfs_seqid_is_newer(u32 s1,u32 s2)3421bcf34fdSTrond Myklebust static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
3431bcf34fdSTrond Myklebust {
3441bcf34fdSTrond Myklebust 	return (s32)(s1 - s2) > 0;
3451bcf34fdSTrond Myklebust }
3461bcf34fdSTrond Myklebust 
pnfs_barrier_update(struct pnfs_layout_hdr * lo,u32 newseq)3471bcf34fdSTrond Myklebust static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
3481bcf34fdSTrond Myklebust {
34945baadaaSTrond Myklebust 	if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
3501bcf34fdSTrond Myklebust 		lo->plh_barrier = newseq;
3511bcf34fdSTrond Myklebust }
3521bcf34fdSTrond Myklebust 
353ae5a459dSTrond Myklebust static void
pnfs_set_plh_return_info(struct pnfs_layout_hdr * lo,enum pnfs_iomode iomode,u32 seq)3544aab9732STrond Myklebust pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
3554aab9732STrond Myklebust 			 u32 seq)
3564aab9732STrond Myklebust {
3574aab9732STrond Myklebust 	if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
3584aab9732STrond Myklebust 		iomode = IOMODE_ANY;
3594aab9732STrond Myklebust 	lo->plh_return_iomode = iomode;
3604aab9732STrond Myklebust 	set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
361e20772cbSTrond Myklebust 	/*
362e20772cbSTrond Myklebust 	 * We must set lo->plh_return_seq to avoid livelocks with
363e20772cbSTrond Myklebust 	 * pnfs_layout_need_return()
364e20772cbSTrond Myklebust 	 */
365e20772cbSTrond Myklebust 	if (seq == 0)
366e20772cbSTrond Myklebust 		seq = be32_to_cpu(lo->plh_stateid.seqid);
367e20772cbSTrond Myklebust 	if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
3684aab9732STrond Myklebust 		lo->plh_return_seq = seq;
3691bcf34fdSTrond Myklebust 	pnfs_barrier_update(lo, seq);
3704aab9732STrond Myklebust }
3714aab9732STrond Myklebust 
3724aab9732STrond Myklebust static void
pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr * lo)373ae5a459dSTrond Myklebust pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
374ae5a459dSTrond Myklebust {
3755466d214STrond Myklebust 	struct pnfs_layout_segment *lseg;
376ae5a459dSTrond Myklebust 	lo->plh_return_iomode = 0;
377ae5a459dSTrond Myklebust 	lo->plh_return_seq = 0;
378ae5a459dSTrond Myklebust 	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
3795466d214STrond Myklebust 	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
3805466d214STrond Myklebust 		if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
3815466d214STrond Myklebust 			continue;
3825466d214STrond Myklebust 		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
3835466d214STrond Myklebust 	}
384ae5a459dSTrond Myklebust }
385ae5a459dSTrond Myklebust 
pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr * lo)386362fb578STrond Myklebust static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
387362fb578STrond Myklebust {
388362fb578STrond Myklebust 	clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
389362fb578STrond Myklebust 	clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags);
390362fb578STrond Myklebust 	smp_mb__after_atomic();
391362fb578STrond Myklebust 	wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
392362fb578STrond Myklebust 	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
393362fb578STrond Myklebust }
394362fb578STrond Myklebust 
395fe1cf946STrond Myklebust static void
pnfs_clear_lseg_state(struct pnfs_layout_segment * lseg,struct list_head * free_me)396fe1cf946STrond Myklebust pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
397fe1cf946STrond Myklebust 		struct list_head *free_me)
398fe1cf946STrond Myklebust {
399fe1cf946STrond Myklebust 	clear_bit(NFS_LSEG_ROC, &lseg->pls_flags);
400fe1cf946STrond Myklebust 	clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
401fe1cf946STrond Myklebust 	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags))
402fe1cf946STrond Myklebust 		pnfs_lseg_dec_and_remove_zero(lseg, free_me);
403fe1cf946STrond Myklebust 	if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
404fe1cf946STrond Myklebust 		pnfs_lseg_dec_and_remove_zero(lseg, free_me);
405fe1cf946STrond Myklebust }
406fe1cf946STrond Myklebust 
4072454dfeaSTrond Myklebust /*
40830cb3ee2STrond Myklebust  * Update the seqid of a layout stateid after receiving
40930cb3ee2STrond Myklebust  * NFS4ERR_OLD_STATEID
4107380020eSTrond Myklebust  */
nfs4_layout_refresh_old_stateid(nfs4_stateid * dst,struct pnfs_layout_range * dst_range,struct inode * inode)41130cb3ee2STrond Myklebust bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
412ecf84026STrond Myklebust 		struct pnfs_layout_range *dst_range,
413ecf84026STrond Myklebust 		struct inode *inode)
4147380020eSTrond Myklebust {
4157380020eSTrond Myklebust 	struct pnfs_layout_hdr *lo;
416c16467dcSTrond Myklebust 	struct pnfs_layout_range range = {
417c16467dcSTrond Myklebust 		.iomode = IOMODE_ANY,
418c16467dcSTrond Myklebust 		.offset = 0,
419c16467dcSTrond Myklebust 		.length = NFS4_MAX_UINT64,
420c16467dcSTrond Myklebust 	};
4217380020eSTrond Myklebust 	bool ret = false;
422c16467dcSTrond Myklebust 	LIST_HEAD(head);
423c16467dcSTrond Myklebust 	int err;
4247380020eSTrond Myklebust 
4257380020eSTrond Myklebust 	spin_lock(&inode->i_lock);
4267380020eSTrond Myklebust 	lo = NFS_I(inode)->layout;
42730cb3ee2STrond Myklebust 	if (lo &&  pnfs_layout_is_valid(lo) &&
42830cb3ee2STrond Myklebust 	    nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
42930cb3ee2STrond Myklebust 		/* Is our call using the most recent seqid? If so, bump it */
43030cb3ee2STrond Myklebust 		if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) {
43130cb3ee2STrond Myklebust 			nfs4_stateid_seqid_inc(dst);
43230cb3ee2STrond Myklebust 			ret = true;
43330cb3ee2STrond Myklebust 			goto out;
43430cb3ee2STrond Myklebust 		}
43530cb3ee2STrond Myklebust 		/* Try to update the seqid to the most recent */
436c16467dcSTrond Myklebust 		err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
437c16467dcSTrond Myklebust 		if (err != -EBUSY) {
4387380020eSTrond Myklebust 			dst->seqid = lo->plh_stateid.seqid;
439ecf84026STrond Myklebust 			*dst_range = range;
4407380020eSTrond Myklebust 			ret = true;
4417380020eSTrond Myklebust 		}
442c16467dcSTrond Myklebust 	}
44330cb3ee2STrond Myklebust out:
4447380020eSTrond Myklebust 	spin_unlock(&inode->i_lock);
445c16467dcSTrond Myklebust 	pnfs_free_lseg_list(&head);
4467380020eSTrond Myklebust 	return ret;
4477380020eSTrond Myklebust }
4487380020eSTrond Myklebust 
4497380020eSTrond Myklebust /*
4502454dfeaSTrond Myklebust  * Mark a pnfs_layout_hdr and all associated layout segments as invalid
4512454dfeaSTrond Myklebust  *
4522454dfeaSTrond Myklebust  * In order to continue using the pnfs_layout_hdr, a full recovery
4532454dfeaSTrond Myklebust  * is required.
4542454dfeaSTrond Myklebust  * Note that caller must hold inode->i_lock.
4552454dfeaSTrond Myklebust  */
4565f46be04STrond Myklebust int
pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr * lo,struct list_head * lseg_list)4572454dfeaSTrond Myklebust pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
4582454dfeaSTrond Myklebust 		struct list_head *lseg_list)
4592454dfeaSTrond Myklebust {
4602454dfeaSTrond Myklebust 	struct pnfs_layout_range range = {
4612454dfeaSTrond Myklebust 		.iomode = IOMODE_ANY,
4622454dfeaSTrond Myklebust 		.offset = 0,
4632454dfeaSTrond Myklebust 		.length = NFS4_MAX_UINT64,
4642454dfeaSTrond Myklebust 	};
465fe1cf946STrond Myklebust 	struct pnfs_layout_segment *lseg, *next;
4662454dfeaSTrond Myklebust 
4672454dfeaSTrond Myklebust 	set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
468fe1cf946STrond Myklebust 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
469fe1cf946STrond Myklebust 		pnfs_clear_lseg_state(lseg, lseg_list);
4705466d214STrond Myklebust 	pnfs_clear_layoutreturn_info(lo);
47168f74479STrond Myklebust 	pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
472880265c7STrond Myklebust 	set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
473362fb578STrond Myklebust 	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
474362fb578STrond Myklebust 	    !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
475362fb578STrond Myklebust 		pnfs_clear_layoutreturn_waitbit(lo);
476fe1cf946STrond Myklebust 	return !list_empty(&lo->plh_segs);
4772454dfeaSTrond Myklebust }
4782454dfeaSTrond Myklebust 
479b9e028fdSTrond Myklebust static int
pnfs_iomode_to_fail_bit(u32 iomode)480b9e028fdSTrond Myklebust pnfs_iomode_to_fail_bit(u32 iomode)
481b9e028fdSTrond Myklebust {
482b9e028fdSTrond Myklebust 	return iomode == IOMODE_RW ?
483b9e028fdSTrond Myklebust 		NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
484b9e028fdSTrond Myklebust }
485b9e028fdSTrond Myklebust 
486b9e028fdSTrond Myklebust static void
pnfs_layout_set_fail_bit(struct pnfs_layout_hdr * lo,int fail_bit)4873e621214STrond Myklebust pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
488b9e028fdSTrond Myklebust {
48925c75333STrond Myklebust 	lo->plh_retry_timestamp = jiffies;
49039e88fcfSYanchuan Nian 	if (!test_and_set_bit(fail_bit, &lo->plh_flags))
4912b28a7beSElena Reshetova 		refcount_inc(&lo->plh_refcount);
4923e621214STrond Myklebust }
4933e621214STrond Myklebust 
4943e621214STrond Myklebust static void
pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr * lo,int fail_bit)4953e621214STrond Myklebust pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
4963e621214STrond Myklebust {
4973e621214STrond Myklebust 	if (test_and_clear_bit(fail_bit, &lo->plh_flags))
4982b28a7beSElena Reshetova 		refcount_dec(&lo->plh_refcount);
4993e621214STrond Myklebust }
5003e621214STrond Myklebust 
5013e621214STrond Myklebust static void
pnfs_layout_io_set_failed(struct pnfs_layout_hdr * lo,u32 iomode)5023e621214STrond Myklebust pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
5033e621214STrond Myklebust {
5043e621214STrond Myklebust 	struct inode *inode = lo->plh_inode;
505115ce575STrond Myklebust 	struct pnfs_layout_range range = {
506115ce575STrond Myklebust 		.iomode = iomode,
507115ce575STrond Myklebust 		.offset = 0,
508115ce575STrond Myklebust 		.length = NFS4_MAX_UINT64,
509115ce575STrond Myklebust 	};
510115ce575STrond Myklebust 	LIST_HEAD(head);
5113e621214STrond Myklebust 
5123e621214STrond Myklebust 	spin_lock(&inode->i_lock);
5133e621214STrond Myklebust 	pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
51428d4411fSOlga Kornievskaia 	pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
5153e621214STrond Myklebust 	spin_unlock(&inode->i_lock);
516115ce575STrond Myklebust 	pnfs_free_lseg_list(&head);
517b9e028fdSTrond Myklebust 	dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
518b9e028fdSTrond Myklebust 			iomode == IOMODE_RW ?  "RW" : "READ");
519b9e028fdSTrond Myklebust }
520b9e028fdSTrond Myklebust 
521b9e028fdSTrond Myklebust static bool
pnfs_layout_io_test_failed(struct pnfs_layout_hdr * lo,u32 iomode)522b9e028fdSTrond Myklebust pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
523b9e028fdSTrond Myklebust {
52425c75333STrond Myklebust 	unsigned long start, end;
5253e621214STrond Myklebust 	int fail_bit = pnfs_iomode_to_fail_bit(iomode);
5263e621214STrond Myklebust 
5273e621214STrond Myklebust 	if (test_bit(fail_bit, &lo->plh_flags) == 0)
52825c75333STrond Myklebust 		return false;
52925c75333STrond Myklebust 	end = jiffies;
53025c75333STrond Myklebust 	start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
53125c75333STrond Myklebust 	if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
53225c75333STrond Myklebust 		/* It is time to retry the failed layoutgets */
5333e621214STrond Myklebust 		pnfs_layout_clear_fail_bit(lo, fail_bit);
53425c75333STrond Myklebust 		return false;
53525c75333STrond Myklebust 	}
53625c75333STrond Myklebust 	return true;
537b9e028fdSTrond Myklebust }
538b9e028fdSTrond Myklebust 
539974cec8cSAndy Adamson static void
pnfs_init_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg,const struct pnfs_layout_range * range,const nfs4_stateid * stateid)540119cef97STrond Myklebust pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
541119cef97STrond Myklebust 		const struct pnfs_layout_range *range,
542119cef97STrond Myklebust 		const nfs4_stateid *stateid)
543974cec8cSAndy Adamson {
544566052c5SFred Isaman 	INIT_LIST_HEAD(&lseg->pls_list);
545a9bae566SPeng Tao 	INIT_LIST_HEAD(&lseg->pls_lc_list);
546a9901899STrond Myklebust 	INIT_LIST_HEAD(&lseg->pls_commits);
547eba6dd69SElena Reshetova 	refcount_set(&lseg->pls_refcount, 1);
5484541d16cSFred Isaman 	set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
549566052c5SFred Isaman 	lseg->pls_layout = lo;
550119cef97STrond Myklebust 	lseg->pls_range = *range;
551119cef97STrond Myklebust 	lseg->pls_seq = be32_to_cpu(stateid->seqid);
552974cec8cSAndy Adamson }
553974cec8cSAndy Adamson 
pnfs_free_lseg(struct pnfs_layout_segment * lseg)554905ca191STrond Myklebust static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
555974cec8cSAndy Adamson {
55668f74479STrond Myklebust 	if (lseg != NULL) {
55768f74479STrond Myklebust 		struct inode *inode = lseg->pls_layout->plh_inode;
55868f74479STrond Myklebust 		NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg);
55968f74479STrond Myklebust 	}
560974cec8cSAndy Adamson }
561974cec8cSAndy Adamson 
562d684d2aeSFred Isaman static void
pnfs_layout_remove_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg)56357036a37STrond Myklebust pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
56457036a37STrond Myklebust 		struct pnfs_layout_segment *lseg)
565974cec8cSAndy Adamson {
566d20581aaSBenny Halevy 	WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
567d684d2aeSFred Isaman 	list_del_init(&lseg->pls_list);
5688f0d27dcSTrond Myklebust 	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
5692b28a7beSElena Reshetova 	refcount_dec(&lo->plh_refcount);
570abb3e1c8STrond Myklebust 	if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
571abb3e1c8STrond Myklebust 		return;
5727b650994STrond Myklebust 	if (list_empty(&lo->plh_segs) &&
5737b650994STrond Myklebust 	    !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
5747b650994STrond Myklebust 	    !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
575334a8f37STrond Myklebust 		if (atomic_read(&lo->plh_outstanding) == 0)
5762d148c7eSTrond Myklebust 			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
577173f77e9STrond Myklebust 		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
5782d148c7eSTrond Myklebust 	}
5794541d16cSFred Isaman }
580d684d2aeSFred Isaman 
58168f74479STrond Myklebust static bool
pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg)58268f74479STrond Myklebust pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo,
58368f74479STrond Myklebust 		struct pnfs_layout_segment *lseg)
58468f74479STrond Myklebust {
58568f74479STrond Myklebust 	if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
58668f74479STrond Myklebust 	    pnfs_layout_is_valid(lo)) {
5874aab9732STrond Myklebust 		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
58868f74479STrond Myklebust 		list_move_tail(&lseg->pls_list, &lo->plh_return_segs);
58968f74479STrond Myklebust 		return true;
59068f74479STrond Myklebust 	}
59168f74479STrond Myklebust 	return false;
59268f74479STrond Myklebust }
59368f74479STrond Myklebust 
594bae724efSFred Isaman void
pnfs_put_lseg(struct pnfs_layout_segment * lseg)5959369a431STrond Myklebust pnfs_put_lseg(struct pnfs_layout_segment *lseg)
596d684d2aeSFred Isaman {
59757036a37STrond Myklebust 	struct pnfs_layout_hdr *lo;
598d684d2aeSFred Isaman 	struct inode *inode;
599d684d2aeSFred Isaman 
600d684d2aeSFred Isaman 	if (!lseg)
601d684d2aeSFred Isaman 		return;
602d684d2aeSFred Isaman 
603d684d2aeSFred Isaman 	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
604eba6dd69SElena Reshetova 		refcount_read(&lseg->pls_refcount),
605d684d2aeSFred Isaman 		test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
6064ef2e4f8STrond Myklebust 
60757036a37STrond Myklebust 	lo = lseg->pls_layout;
60857036a37STrond Myklebust 	inode = lo->plh_inode;
6094ef2e4f8STrond Myklebust 
610eba6dd69SElena Reshetova 	if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
6118f0d27dcSTrond Myklebust 		pnfs_get_layout_hdr(lo);
61257036a37STrond Myklebust 		pnfs_layout_remove_lseg(lo, lseg);
61368f74479STrond Myklebust 		if (pnfs_cache_lseg_for_layoutreturn(lo, lseg))
61468f74479STrond Myklebust 			lseg = NULL;
615d684d2aeSFred Isaman 		spin_unlock(&inode->i_lock);
616905ca191STrond Myklebust 		pnfs_free_lseg(lseg);
6178f0d27dcSTrond Myklebust 		pnfs_put_layout_hdr(lo);
618d684d2aeSFred Isaman 	}
619974cec8cSAndy Adamson }
6209369a431STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_put_lseg);
621974cec8cSAndy Adamson 
622fb3296ebSBenny Halevy /*
623fb3296ebSBenny Halevy  * is l2 fully contained in l1?
624fb3296ebSBenny Halevy  *   start1                             end1
625fb3296ebSBenny Halevy  *   [----------------------------------)
626fb3296ebSBenny Halevy  *           start2           end2
627fb3296ebSBenny Halevy  *           [----------------)
628fb3296ebSBenny Halevy  */
6293cb2df17STrond Myklebust static bool
pnfs_lseg_range_contained(const struct pnfs_layout_range * l1,const struct pnfs_layout_range * l2)6307dc0ac70STrond Myklebust pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
6313cb2df17STrond Myklebust 		 const struct pnfs_layout_range *l2)
632fb3296ebSBenny Halevy {
633fb3296ebSBenny Halevy 	u64 start1 = l1->offset;
63417822b20STrond Myklebust 	u64 end1 = pnfs_end_offset(start1, l1->length);
635fb3296ebSBenny Halevy 	u64 start2 = l2->offset;
63617822b20STrond Myklebust 	u64 end2 = pnfs_end_offset(start2, l2->length);
637fb3296ebSBenny Halevy 
638fb3296ebSBenny Halevy 	return (start1 <= start2) && (end1 >= end2);
639fb3296ebSBenny Halevy }
640fb3296ebSBenny Halevy 
pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment * lseg,struct list_head * tmp_list)64124956804STrond Myklebust static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
64224956804STrond Myklebust 		struct list_head *tmp_list)
64324956804STrond Myklebust {
644eba6dd69SElena Reshetova 	if (!refcount_dec_and_test(&lseg->pls_refcount))
64524956804STrond Myklebust 		return false;
64624956804STrond Myklebust 	pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
64724956804STrond Myklebust 	list_add(&lseg->pls_list, tmp_list);
64824956804STrond Myklebust 	return true;
64924956804STrond Myklebust }
65024956804STrond Myklebust 
6514541d16cSFred Isaman /* Returns 1 if lseg is removed from list, 0 otherwise */
mark_lseg_invalid(struct pnfs_layout_segment * lseg,struct list_head * tmp_list)6524541d16cSFred Isaman static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
6534541d16cSFred Isaman 			     struct list_head *tmp_list)
6544541d16cSFred Isaman {
6554541d16cSFred Isaman 	int rv = 0;
6564541d16cSFred Isaman 
6574541d16cSFred Isaman 	if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
6584541d16cSFred Isaman 		/* Remove the reference keeping the lseg in the
6594541d16cSFred Isaman 		 * list.  It will now be removed when all
6604541d16cSFred Isaman 		 * outstanding io is finished.
6614541d16cSFred Isaman 		 */
662d684d2aeSFred Isaman 		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
663eba6dd69SElena Reshetova 			refcount_read(&lseg->pls_refcount));
66424956804STrond Myklebust 		if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
665d684d2aeSFred Isaman 			rv = 1;
666d684d2aeSFred Isaman 	}
6674541d16cSFred Isaman 	return rv;
6684541d16cSFred Isaman }
6694541d16cSFred Isaman 
670e036f464STrond Myklebust static bool
pnfs_should_free_range(const struct pnfs_layout_range * lseg_range,const struct pnfs_layout_range * recall_range)671e036f464STrond Myklebust pnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
672e036f464STrond Myklebust 		 const struct pnfs_layout_range *recall_range)
673e036f464STrond Myklebust {
674e036f464STrond Myklebust 	return (recall_range->iomode == IOMODE_ANY ||
675e036f464STrond Myklebust 		lseg_range->iomode == recall_range->iomode) &&
676e036f464STrond Myklebust 	       pnfs_lseg_range_intersecting(lseg_range, recall_range);
677e036f464STrond Myklebust }
678e036f464STrond Myklebust 
679e036f464STrond Myklebust static bool
pnfs_match_lseg_recall(const struct pnfs_layout_segment * lseg,const struct pnfs_layout_range * recall_range,u32 seq)680e036f464STrond Myklebust pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg,
681e036f464STrond Myklebust 		const struct pnfs_layout_range *recall_range,
682e036f464STrond Myklebust 		u32 seq)
683e036f464STrond Myklebust {
684e036f464STrond Myklebust 	if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq))
685e036f464STrond Myklebust 		return false;
686e036f464STrond Myklebust 	if (recall_range == NULL)
687e036f464STrond Myklebust 		return true;
688e036f464STrond Myklebust 	return pnfs_should_free_range(&lseg->pls_range, recall_range);
689e036f464STrond Myklebust }
690e036f464STrond Myklebust 
6916d597e17SJeff Layton /**
6926d597e17SJeff Layton  * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later
6936d597e17SJeff Layton  * @lo: layout header containing the lsegs
6946d597e17SJeff Layton  * @tmp_list: list head where doomed lsegs should go
6956d597e17SJeff Layton  * @recall_range: optional recall range argument to match (may be NULL)
6966d597e17SJeff Layton  * @seq: only invalidate lsegs obtained prior to this sequence (may be 0)
6976d597e17SJeff Layton  *
6986d597e17SJeff Layton  * Walk the list of lsegs in the layout header, and tear down any that should
6996d597e17SJeff Layton  * be destroyed. If "recall_range" is specified then the segment must match
7006d597e17SJeff Layton  * that range. If "seq" is non-zero, then only match segments that were handed
7016d597e17SJeff Layton  * out at or before that sequence.
7026d597e17SJeff Layton  *
7036d597e17SJeff Layton  * Returns number of matching invalid lsegs remaining in list after scanning
7046d597e17SJeff Layton  * it and purging them.
7054541d16cSFred Isaman  */
70643f1b3daSFred Isaman int
pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr * lo,struct list_head * tmp_list,const struct pnfs_layout_range * recall_range,u32 seq)70749a85061STrond Myklebust pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
7084541d16cSFred Isaman 			    struct list_head *tmp_list,
7096d597e17SJeff Layton 			    const struct pnfs_layout_range *recall_range,
7106d597e17SJeff Layton 			    u32 seq)
711974cec8cSAndy Adamson {
712974cec8cSAndy Adamson 	struct pnfs_layout_segment *lseg, *next;
713b739a5bdSTrond Myklebust 	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
71471b39854STrond Myklebust 	int remaining = 0;
715974cec8cSAndy Adamson 
716974cec8cSAndy Adamson 	dprintk("%s:Begin lo %p\n", __func__, lo);
717974cec8cSAndy Adamson 
7188006bfbaSTrond Myklebust 	if (list_empty(&lo->plh_segs))
71938511722SFred Isaman 		return 0;
7204541d16cSFred Isaman 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
721e036f464STrond Myklebust 		if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
7226d597e17SJeff Layton 			dprintk("%s: freeing lseg %p iomode %d seq %u "
7234541d16cSFred Isaman 				"offset %llu length %llu\n", __func__,
7246d597e17SJeff Layton 				lseg, lseg->pls_range.iomode, lseg->pls_seq,
7256d597e17SJeff Layton 				lseg->pls_range.offset, lseg->pls_range.length);
726b739a5bdSTrond Myklebust 			if (mark_lseg_invalid(lseg, tmp_list))
727b739a5bdSTrond Myklebust 				continue;
72871b39854STrond Myklebust 			remaining++;
729b739a5bdSTrond Myklebust 			pnfs_lseg_cancel_io(server, lseg);
730974cec8cSAndy Adamson 		}
73171b39854STrond Myklebust 	dprintk("%s:Return %i\n", __func__, remaining);
73271b39854STrond Myklebust 	return remaining;
733974cec8cSAndy Adamson }
734974cec8cSAndy Adamson 
73568f74479STrond Myklebust static void
pnfs_free_returned_lsegs(struct pnfs_layout_hdr * lo,struct list_head * free_me,const struct pnfs_layout_range * range,u32 seq)73668f74479STrond Myklebust pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
73768f74479STrond Myklebust 		struct list_head *free_me,
73868f74479STrond Myklebust 		const struct pnfs_layout_range *range,
73968f74479STrond Myklebust 		u32 seq)
74068f74479STrond Myklebust {
74168f74479STrond Myklebust 	struct pnfs_layout_segment *lseg, *next;
74268f74479STrond Myklebust 
74368f74479STrond Myklebust 	list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) {
74468f74479STrond Myklebust 		if (pnfs_match_lseg_recall(lseg, range, seq))
74568f74479STrond Myklebust 			list_move_tail(&lseg->pls_list, free_me);
74668f74479STrond Myklebust 	}
74768f74479STrond Myklebust }
74868f74479STrond Myklebust 
749f49f9baaSFred Isaman /* note free_me must contain lsegs from a single layout_hdr */
75043f1b3daSFred Isaman void
pnfs_free_lseg_list(struct list_head * free_me)7514541d16cSFred Isaman pnfs_free_lseg_list(struct list_head *free_me)
752974cec8cSAndy Adamson {
7534541d16cSFred Isaman 	struct pnfs_layout_segment *lseg, *tmp;
754974cec8cSAndy Adamson 
755f49f9baaSFred Isaman 	if (list_empty(free_me))
756f49f9baaSFred Isaman 		return;
757f49f9baaSFred Isaman 
7584541d16cSFred Isaman 	list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
759566052c5SFred Isaman 		list_del(&lseg->pls_list);
760905ca191STrond Myklebust 		pnfs_free_lseg(lseg);
761974cec8cSAndy Adamson 	}
762974cec8cSAndy Adamson }
763974cec8cSAndy Adamson 
__pnfs_destroy_layout(struct nfs_inode * nfsi)764b6d49ecdSTrond Myklebust static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
765e5e94017SBenny Halevy {
766e5e94017SBenny Halevy 	struct pnfs_layout_hdr *lo;
767974cec8cSAndy Adamson 	LIST_HEAD(tmp_list);
768e5e94017SBenny Halevy 
769e5e94017SBenny Halevy 	spin_lock(&nfsi->vfs_inode.i_lock);
770e5e94017SBenny Halevy 	lo = nfsi->layout;
771e5e94017SBenny Halevy 	if (lo) {
7723e621214STrond Myklebust 		pnfs_get_layout_hdr(lo);
7732454dfeaSTrond Myklebust 		pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
7743e621214STrond Myklebust 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
7753e621214STrond Myklebust 		pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
776e5e94017SBenny Halevy 		spin_unlock(&nfsi->vfs_inode.i_lock);
777974cec8cSAndy Adamson 		pnfs_free_lseg_list(&tmp_list);
7781f18b82cSTrond Myklebust 		nfs_commit_inode(&nfsi->vfs_inode, 0);
7793e621214STrond Myklebust 		pnfs_put_layout_hdr(lo);
7803e621214STrond Myklebust 	} else
7813e621214STrond Myklebust 		spin_unlock(&nfsi->vfs_inode.i_lock);
782b6d49ecdSTrond Myklebust 	return lo;
783b6d49ecdSTrond Myklebust }
784b6d49ecdSTrond Myklebust 
pnfs_destroy_layout(struct nfs_inode * nfsi)785b6d49ecdSTrond Myklebust void pnfs_destroy_layout(struct nfs_inode *nfsi)
786b6d49ecdSTrond Myklebust {
787b6d49ecdSTrond Myklebust 	__pnfs_destroy_layout(nfsi);
788e5e94017SBenny Halevy }
789041245c8SAndy Adamson EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
790e5e94017SBenny Halevy 
pnfs_layout_removed(struct nfs_inode * nfsi,struct pnfs_layout_hdr * lo)791b6d49ecdSTrond Myklebust static bool pnfs_layout_removed(struct nfs_inode *nfsi,
792b6d49ecdSTrond Myklebust 				struct pnfs_layout_hdr *lo)
793b6d49ecdSTrond Myklebust {
794b6d49ecdSTrond Myklebust 	bool ret;
795b6d49ecdSTrond Myklebust 
796b6d49ecdSTrond Myklebust 	spin_lock(&nfsi->vfs_inode.i_lock);
797b6d49ecdSTrond Myklebust 	ret = nfsi->layout != lo;
798b6d49ecdSTrond Myklebust 	spin_unlock(&nfsi->vfs_inode.i_lock);
799b6d49ecdSTrond Myklebust 	return ret;
800b6d49ecdSTrond Myklebust }
801b6d49ecdSTrond Myklebust 
pnfs_destroy_layout_final(struct nfs_inode * nfsi)802b6d49ecdSTrond Myklebust void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
803b6d49ecdSTrond Myklebust {
804b6d49ecdSTrond Myklebust 	struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
805b6d49ecdSTrond Myklebust 
806b6d49ecdSTrond Myklebust 	if (lo)
807b6d49ecdSTrond Myklebust 		wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
808b6d49ecdSTrond Myklebust }
809b6d49ecdSTrond Myklebust 
810fd9a8d71STrond Myklebust static bool
pnfs_layout_add_bulk_destroy_list(struct inode * inode,struct list_head * layout_list)811fd9a8d71STrond Myklebust pnfs_layout_add_bulk_destroy_list(struct inode *inode,
812fd9a8d71STrond Myklebust 		struct list_head *layout_list)
813fd9a8d71STrond Myklebust {
814fd9a8d71STrond Myklebust 	struct pnfs_layout_hdr *lo;
815fd9a8d71STrond Myklebust 	bool ret = false;
816fd9a8d71STrond Myklebust 
817fd9a8d71STrond Myklebust 	spin_lock(&inode->i_lock);
818fd9a8d71STrond Myklebust 	lo = NFS_I(inode)->layout;
819fd9a8d71STrond Myklebust 	if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
820fd9a8d71STrond Myklebust 		pnfs_get_layout_hdr(lo);
821fd9a8d71STrond Myklebust 		list_add(&lo->plh_bulk_destroy, layout_list);
822fd9a8d71STrond Myklebust 		ret = true;
823fd9a8d71STrond Myklebust 	}
824fd9a8d71STrond Myklebust 	spin_unlock(&inode->i_lock);
825fd9a8d71STrond Myklebust 	return ret;
826fd9a8d71STrond Myklebust }
827fd9a8d71STrond Myklebust 
828fd9a8d71STrond Myklebust /* Caller must hold rcu_read_lock and clp->cl_lock */
829fd9a8d71STrond Myklebust static int
pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client * clp,struct nfs_server * server,struct list_head * layout_list)830fd9a8d71STrond Myklebust pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
831fd9a8d71STrond Myklebust 		struct nfs_server *server,
832fd9a8d71STrond Myklebust 		struct list_head *layout_list)
8335085607dSTrond Myklebust 	__must_hold(&clp->cl_lock)
8345085607dSTrond Myklebust 	__must_hold(RCU)
835fd9a8d71STrond Myklebust {
836fd9a8d71STrond Myklebust 	struct pnfs_layout_hdr *lo, *next;
837fd9a8d71STrond Myklebust 	struct inode *inode;
838fd9a8d71STrond Myklebust 
839fd9a8d71STrond Myklebust 	list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
8405085607dSTrond Myklebust 		if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
8415085607dSTrond Myklebust 		    test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
8425085607dSTrond Myklebust 		    !list_empty(&lo->plh_bulk_destroy))
843b85f5620STrond Myklebust 			continue;
8445085607dSTrond Myklebust 		/* If the sb is being destroyed, just bail */
8455085607dSTrond Myklebust 		if (!nfs_sb_active(server->super))
8465085607dSTrond Myklebust 			break;
847b5fdf841STrond Myklebust 		inode = pnfs_grab_inode_layout_hdr(lo);
8485085607dSTrond Myklebust 		if (inode != NULL) {
849cf6605d1STrond Myklebust 			if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
850cf6605d1STrond Myklebust 				list_del_rcu(&lo->plh_layouts);
8515085607dSTrond Myklebust 			if (pnfs_layout_add_bulk_destroy_list(inode,
8525085607dSTrond Myklebust 						layout_list))
853fd9a8d71STrond Myklebust 				continue;
854fd9a8d71STrond Myklebust 			rcu_read_unlock();
855fd9a8d71STrond Myklebust 			spin_unlock(&clp->cl_lock);
856fd9a8d71STrond Myklebust 			iput(inode);
8575085607dSTrond Myklebust 		} else {
8585085607dSTrond Myklebust 			rcu_read_unlock();
8595085607dSTrond Myklebust 			spin_unlock(&clp->cl_lock);
8605085607dSTrond Myklebust 		}
8615085607dSTrond Myklebust 		nfs_sb_deactive(server->super);
862fd9a8d71STrond Myklebust 		spin_lock(&clp->cl_lock);
863fd9a8d71STrond Myklebust 		rcu_read_lock();
864fd9a8d71STrond Myklebust 		return -EAGAIN;
865fd9a8d71STrond Myklebust 	}
866fd9a8d71STrond Myklebust 	return 0;
867fd9a8d71STrond Myklebust }
868fd9a8d71STrond Myklebust 
869fd9a8d71STrond Myklebust static int
pnfs_layout_free_bulk_destroy_list(struct list_head * layout_list,bool is_bulk_recall)870fd9a8d71STrond Myklebust pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
871fd9a8d71STrond Myklebust 		bool is_bulk_recall)
872fd9a8d71STrond Myklebust {
873fd9a8d71STrond Myklebust 	struct pnfs_layout_hdr *lo;
874fd9a8d71STrond Myklebust 	struct inode *inode;
875fd9a8d71STrond Myklebust 	LIST_HEAD(lseg_list);
876fd9a8d71STrond Myklebust 	int ret = 0;
877fd9a8d71STrond Myklebust 
878fd9a8d71STrond Myklebust 	while (!list_empty(layout_list)) {
879fd9a8d71STrond Myklebust 		lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
880fd9a8d71STrond Myklebust 				plh_bulk_destroy);
881fd9a8d71STrond Myklebust 		dprintk("%s freeing layout for inode %lu\n", __func__,
882fd9a8d71STrond Myklebust 			lo->plh_inode->i_ino);
883fd9a8d71STrond Myklebust 		inode = lo->plh_inode;
8847c5d1875SChristoph Hellwig 
8857c5d1875SChristoph Hellwig 		pnfs_layoutcommit_inode(inode, false);
8867c5d1875SChristoph Hellwig 
887fd9a8d71STrond Myklebust 		spin_lock(&inode->i_lock);
888fd9a8d71STrond Myklebust 		list_del_init(&lo->plh_bulk_destroy);
8899fd4b9fcSTrond Myklebust 		if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
890fd9a8d71STrond Myklebust 			if (is_bulk_recall)
891fd9a8d71STrond Myklebust 				set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
892fd9a8d71STrond Myklebust 			ret = -EAGAIN;
8939fd4b9fcSTrond Myklebust 		}
894fd9a8d71STrond Myklebust 		spin_unlock(&inode->i_lock);
895fd9a8d71STrond Myklebust 		pnfs_free_lseg_list(&lseg_list);
896b20135d0STrond Myklebust 		/* Free all lsegs that are attached to commit buckets */
897b20135d0STrond Myklebust 		nfs_commit_inode(inode, 0);
898fd9a8d71STrond Myklebust 		pnfs_put_layout_hdr(lo);
8995085607dSTrond Myklebust 		nfs_iput_and_deactive(inode);
900fd9a8d71STrond Myklebust 	}
901fd9a8d71STrond Myklebust 	return ret;
902fd9a8d71STrond Myklebust }
903fd9a8d71STrond Myklebust 
904fd9a8d71STrond Myklebust int
pnfs_destroy_layouts_byfsid(struct nfs_client * clp,struct nfs_fsid * fsid,bool is_recall)905fd9a8d71STrond Myklebust pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
906fd9a8d71STrond Myklebust 		struct nfs_fsid *fsid,
907fd9a8d71STrond Myklebust 		bool is_recall)
908fd9a8d71STrond Myklebust {
909fd9a8d71STrond Myklebust 	struct nfs_server *server;
910fd9a8d71STrond Myklebust 	LIST_HEAD(layout_list);
911fd9a8d71STrond Myklebust 
912fd9a8d71STrond Myklebust 	spin_lock(&clp->cl_lock);
913fd9a8d71STrond Myklebust 	rcu_read_lock();
914fd9a8d71STrond Myklebust restart:
915fd9a8d71STrond Myklebust 	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
916fd9a8d71STrond Myklebust 		if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
917fd9a8d71STrond Myklebust 			continue;
918fd9a8d71STrond Myklebust 		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
919fd9a8d71STrond Myklebust 				server,
920fd9a8d71STrond Myklebust 				&layout_list) != 0)
921fd9a8d71STrond Myklebust 			goto restart;
922fd9a8d71STrond Myklebust 	}
923fd9a8d71STrond Myklebust 	rcu_read_unlock();
924fd9a8d71STrond Myklebust 	spin_unlock(&clp->cl_lock);
925fd9a8d71STrond Myklebust 
926fd9a8d71STrond Myklebust 	if (list_empty(&layout_list))
927fd9a8d71STrond Myklebust 		return 0;
928fd9a8d71STrond Myklebust 	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
929fd9a8d71STrond Myklebust }
930fd9a8d71STrond Myklebust 
931fd9a8d71STrond Myklebust int
pnfs_destroy_layouts_byclid(struct nfs_client * clp,bool is_recall)932fd9a8d71STrond Myklebust pnfs_destroy_layouts_byclid(struct nfs_client *clp,
933fd9a8d71STrond Myklebust 		bool is_recall)
934fd9a8d71STrond Myklebust {
935fd9a8d71STrond Myklebust 	struct nfs_server *server;
936fd9a8d71STrond Myklebust 	LIST_HEAD(layout_list);
937fd9a8d71STrond Myklebust 
938fd9a8d71STrond Myklebust 	spin_lock(&clp->cl_lock);
939fd9a8d71STrond Myklebust 	rcu_read_lock();
940fd9a8d71STrond Myklebust restart:
941fd9a8d71STrond Myklebust 	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
942fd9a8d71STrond Myklebust 		if (pnfs_layout_bulk_destroy_byserver_locked(clp,
943fd9a8d71STrond Myklebust 					server,
944fd9a8d71STrond Myklebust 					&layout_list) != 0)
945fd9a8d71STrond Myklebust 			goto restart;
946fd9a8d71STrond Myklebust 	}
947fd9a8d71STrond Myklebust 	rcu_read_unlock();
948fd9a8d71STrond Myklebust 	spin_unlock(&clp->cl_lock);
949fd9a8d71STrond Myklebust 
950fd9a8d71STrond Myklebust 	if (list_empty(&layout_list))
951fd9a8d71STrond Myklebust 		return 0;
952fd9a8d71STrond Myklebust 	return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
953fd9a8d71STrond Myklebust }
954fd9a8d71STrond Myklebust 
955974cec8cSAndy Adamson /*
9569f266451SWang Qing  * Called by the state manager to remove all layouts established under an
957974cec8cSAndy Adamson  * expired lease.
958974cec8cSAndy Adamson  */
959974cec8cSAndy Adamson void
pnfs_destroy_all_layouts(struct nfs_client * clp)960974cec8cSAndy Adamson pnfs_destroy_all_layouts(struct nfs_client *clp)
961974cec8cSAndy Adamson {
962c47abcf8SAndy Adamson 	nfs4_deviceid_mark_client_invalid(clp);
963c47abcf8SAndy Adamson 	nfs4_deviceid_purge_client(clp);
964c47abcf8SAndy Adamson 
965fd9a8d71STrond Myklebust 	pnfs_destroy_layouts_byclid(clp, false);
966974cec8cSAndy Adamson }
967974cec8cSAndy Adamson 
96859b56394STrond Myklebust static void
pnfs_set_layout_cred(struct pnfs_layout_hdr * lo,const struct cred * cred)96959b56394STrond Myklebust pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
97059b56394STrond Myklebust {
97159b56394STrond Myklebust 	const struct cred *old;
97259b56394STrond Myklebust 
97359b56394STrond Myklebust 	if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) {
97459b56394STrond Myklebust 		old = xchg(&lo->plh_lc_cred, get_cred(cred));
97559b56394STrond Myklebust 		put_cred(old);
97659b56394STrond Myklebust 	}
97759b56394STrond Myklebust }
97859b56394STrond Myklebust 
979fd6002e9SFred Isaman /* update lo->plh_stateid with new if is more recent */
98043f1b3daSFred Isaman void
pnfs_set_layout_stateid(struct pnfs_layout_hdr * lo,const nfs4_stateid * new,const struct cred * cred,bool update_barrier)98143f1b3daSFred Isaman pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
98259b56394STrond Myklebust 			const struct cred *cred, bool update_barrier)
983b1f69b75SAndy Adamson {
984aa95edf3STrond Myklebust 	u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
985aa95edf3STrond Myklebust 	u32 newseq = be32_to_cpu(new->seqid);
9862a59a041STrond Myklebust 
9872a59a041STrond Myklebust 	if (!pnfs_layout_is_valid(lo)) {
98859b56394STrond Myklebust 		pnfs_set_layout_cred(lo, cred);
9892a59a041STrond Myklebust 		nfs4_stateid_copy(&lo->plh_stateid, new);
9902a59a041STrond Myklebust 		lo->plh_barrier = newseq;
9912a59a041STrond Myklebust 		pnfs_clear_layoutreturn_info(lo);
9922a59a041STrond Myklebust 		clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
9932a59a041STrond Myklebust 		return;
9942a59a041STrond Myklebust 	}
995aa95edf3STrond Myklebust 
996aa95edf3STrond Myklebust 	if (pnfs_seqid_is_newer(newseq, oldseq))
997f597c537STrond Myklebust 		nfs4_stateid_copy(&lo->plh_stateid, new);
998aa95edf3STrond Myklebust 
999aa95edf3STrond Myklebust 	if (update_barrier) {
1000aa95edf3STrond Myklebust 		pnfs_barrier_update(lo, newseq);
1001aa95edf3STrond Myklebust 		return;
1002aa95edf3STrond Myklebust 	}
1003ecebb80bSTrond Myklebust 	/*
1004ecebb80bSTrond Myklebust 	 * Because of wraparound, we want to keep the barrier
1005aa95edf3STrond Myklebust 	 * "close" to the current seqids. We really only want to
1006aa95edf3STrond Myklebust 	 * get here from a layoutget call.
100743f1b3daSFred Isaman 	 */
1008aa95edf3STrond Myklebust 	if (atomic_read(&lo->plh_outstanding) == 1)
1009aa95edf3STrond Myklebust 		 pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
101043f1b3daSFred Isaman }
1011b1f69b75SAndy Adamson 
1012cf7d63f1SFred Isaman static bool
pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr * lo,const nfs4_stateid * stateid)101319c54abaSTrond Myklebust pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
101419c54abaSTrond Myklebust 		const nfs4_stateid *stateid)
1015cf7d63f1SFred Isaman {
101625a1a621STrond Myklebust 	u32 seqid = be32_to_cpu(stateid->seqid);
101725a1a621STrond Myklebust 
1018d6236a98STrond Myklebust 	return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
101925a1a621STrond Myklebust }
102019c54abaSTrond Myklebust 
102119c54abaSTrond Myklebust /* lget is set to 1 if called from inside send_layoutget call chain */
102219c54abaSTrond Myklebust static bool
pnfs_layoutgets_blocked(const struct pnfs_layout_hdr * lo)1023e1c06f80STrond Myklebust pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
102419c54abaSTrond Myklebust {
1025f7e8917aSFred Isaman 	return lo->plh_block_lgets ||
1026e1c06f80STrond Myklebust 		test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
1027cf7d63f1SFred Isaman }
1028cf7d63f1SFred Isaman 
10295e36e2a9SFred Isaman static struct nfs_server *
pnfs_find_server(struct inode * inode,struct nfs_open_context * ctx)10305e36e2a9SFred Isaman pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx)
10315e36e2a9SFred Isaman {
10325e36e2a9SFred Isaman 	struct nfs_server *server;
10335e36e2a9SFred Isaman 
103478746a38SFred Isaman 	if (inode) {
10355e36e2a9SFred Isaman 		server = NFS_SERVER(inode);
103678746a38SFred Isaman 	} else {
10375e36e2a9SFred Isaman 		struct dentry *parent_dir = dget_parent(ctx->dentry);
10385e36e2a9SFred Isaman 		server = NFS_SERVER(parent_dir->d_inode);
10395e36e2a9SFred Isaman 		dput(parent_dir);
10405e36e2a9SFred Isaman 	}
10415e36e2a9SFred Isaman 	return server;
10425e36e2a9SFred Isaman }
10435e36e2a9SFred Isaman 
nfs4_free_pages(struct page ** pages,size_t size)104429a8bfe5STrond Myklebust static void nfs4_free_pages(struct page **pages, size_t size)
104529a8bfe5STrond Myklebust {
104629a8bfe5STrond Myklebust 	int i;
104729a8bfe5STrond Myklebust 
104829a8bfe5STrond Myklebust 	if (!pages)
104929a8bfe5STrond Myklebust 		return;
105029a8bfe5STrond Myklebust 
105129a8bfe5STrond Myklebust 	for (i = 0; i < size; i++) {
105229a8bfe5STrond Myklebust 		if (!pages[i])
105329a8bfe5STrond Myklebust 			break;
105429a8bfe5STrond Myklebust 		__free_page(pages[i]);
105529a8bfe5STrond Myklebust 	}
105629a8bfe5STrond Myklebust 	kfree(pages);
105729a8bfe5STrond Myklebust }
105829a8bfe5STrond Myklebust 
nfs4_alloc_pages(size_t size,gfp_t gfp_flags)105929a8bfe5STrond Myklebust static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
106029a8bfe5STrond Myklebust {
106129a8bfe5STrond Myklebust 	struct page **pages;
106229a8bfe5STrond Myklebust 	int i;
106329a8bfe5STrond Myklebust 
1064a2791d3aSTrond Myklebust 	pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
106529a8bfe5STrond Myklebust 	if (!pages) {
106629a8bfe5STrond Myklebust 		dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
106729a8bfe5STrond Myklebust 		return NULL;
106829a8bfe5STrond Myklebust 	}
106929a8bfe5STrond Myklebust 
107029a8bfe5STrond Myklebust 	for (i = 0; i < size; i++) {
107129a8bfe5STrond Myklebust 		pages[i] = alloc_page(gfp_flags);
107229a8bfe5STrond Myklebust 		if (!pages[i]) {
107329a8bfe5STrond Myklebust 			dprintk("%s: failed to allocate page\n", __func__);
1074a2791d3aSTrond Myklebust 			nfs4_free_pages(pages, i);
107529a8bfe5STrond Myklebust 			return NULL;
107629a8bfe5STrond Myklebust 		}
107729a8bfe5STrond Myklebust 	}
107829a8bfe5STrond Myklebust 
107929a8bfe5STrond Myklebust 	return pages;
108029a8bfe5STrond Myklebust }
108129a8bfe5STrond Myklebust 
1082587f03deSFred Isaman static struct nfs4_layoutget *
pnfs_alloc_init_layoutget_args(struct inode * ino,struct nfs_open_context * ctx,const nfs4_stateid * stateid,const struct pnfs_layout_range * range,gfp_t gfp_flags)10835e36e2a9SFred Isaman pnfs_alloc_init_layoutget_args(struct inode *ino,
1084e5e94017SBenny Halevy 	   struct nfs_open_context *ctx,
10852409a976SFred Isaman 	   const nfs4_stateid *stateid,
1086e144e539STrond Myklebust 	   const struct pnfs_layout_range *range,
1087587f03deSFred Isaman 	   gfp_t gfp_flags)
1088e5e94017SBenny Halevy {
10895e36e2a9SFred Isaman 	struct nfs_server *server = pnfs_find_server(ino, ctx);
109028ced9a8STrond Myklebust 	size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
1091dacb452dSFred Isaman 	size_t max_pages = max_response_pages(server);
1092b1f69b75SAndy Adamson 	struct nfs4_layoutget *lgp;
1093e5e94017SBenny Halevy 
1094b1f69b75SAndy Adamson 	dprintk("--> %s\n", __func__);
1095b1f69b75SAndy Adamson 
1096a75b9df9STrond Myklebust 	lgp = kzalloc(sizeof(*lgp), gfp_flags);
1097cf7d63f1SFred Isaman 	if (lgp == NULL)
1098587f03deSFred Isaman 		return NULL;
109935124a09SWeston Andros Adamson 
110028ced9a8STrond Myklebust 	if (max_reply_sz) {
110128ced9a8STrond Myklebust 		size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
110228ced9a8STrond Myklebust 		if (npages < max_pages)
110328ced9a8STrond Myklebust 			max_pages = npages;
110428ced9a8STrond Myklebust 	}
110528ced9a8STrond Myklebust 
1106dacb452dSFred Isaman 	lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
1107dacb452dSFred Isaman 	if (!lgp->args.layout.pages) {
1108dacb452dSFred Isaman 		kfree(lgp);
1109dacb452dSFred Isaman 		return NULL;
1110dacb452dSFred Isaman 	}
1111dacb452dSFred Isaman 	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
1112dacb452dSFred Isaman 	lgp->res.layoutp = &lgp->args.layout;
1113dacb452dSFred Isaman 
1114d49e0d5bSTrond Myklebust 	/* Don't confuse uninitialised result and success */
1115d49e0d5bSTrond Myklebust 	lgp->res.status = -NFS4ERR_DELAY;
11162d89a1d3STrond Myklebust 
111709cbfeafSKirill A. Shutemov 	lgp->args.minlength = PAGE_SIZE;
1118fb3296ebSBenny Halevy 	if (lgp->args.minlength > range->length)
1119fb3296ebSBenny Halevy 		lgp->args.minlength = range->length;
11205e36e2a9SFred Isaman 	if (ino) {
11215e36e2a9SFred Isaman 		loff_t i_size = i_size_read(ino);
11225e36e2a9SFred Isaman 
11232d89a1d3STrond Myklebust 		if (range->iomode == IOMODE_READ) {
11242d89a1d3STrond Myklebust 			if (range->offset >= i_size)
11252d89a1d3STrond Myklebust 				lgp->args.minlength = 0;
11262d89a1d3STrond Myklebust 			else if (i_size - range->offset < lgp->args.minlength)
11272d89a1d3STrond Myklebust 				lgp->args.minlength = i_size - range->offset;
11282d89a1d3STrond Myklebust 		}
11295e36e2a9SFred Isaman 	}
1130b1f69b75SAndy Adamson 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
1131e144e539STrond Myklebust 	pnfs_copy_range(&lgp->args.range, range);
1132b1f69b75SAndy Adamson 	lgp->args.type = server->pnfs_curr_ld->id;
1133b1f69b75SAndy Adamson 	lgp->args.inode = ino;
1134b1f69b75SAndy Adamson 	lgp->args.ctx = get_nfs_open_context(ctx);
1135183d9e7bSJeff Layton 	nfs4_stateid_copy(&lgp->args.stateid, stateid);
1136a75b9df9STrond Myklebust 	lgp->gfp_flags = gfp_flags;
113763ec2b69STrond Myklebust 	lgp->cred = ctx->cred;
1138587f03deSFred Isaman 	return lgp;
1139974cec8cSAndy Adamson }
1140974cec8cSAndy Adamson 
pnfs_layoutget_free(struct nfs4_layoutget * lgp)114129a8bfe5STrond Myklebust void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
114229a8bfe5STrond Myklebust {
114329a8bfe5STrond Myklebust 	size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
114429a8bfe5STrond Myklebust 
114529a8bfe5STrond Myklebust 	nfs4_free_pages(lgp->args.layout.pages, max_pages);
1146b4e89bcbSTrond Myklebust 	pnfs_put_layout_hdr(lgp->lo);
114729a8bfe5STrond Myklebust 	put_nfs_open_context(lgp->args.ctx);
114829a8bfe5STrond Myklebust 	kfree(lgp);
114929a8bfe5STrond Myklebust }
115029a8bfe5STrond Myklebust 
pnfs_clear_layoutcommit(struct inode * inode,struct list_head * head)115124956804STrond Myklebust static void pnfs_clear_layoutcommit(struct inode *inode,
115224956804STrond Myklebust 		struct list_head *head)
115324956804STrond Myklebust {
115424956804STrond Myklebust 	struct nfs_inode *nfsi = NFS_I(inode);
115524956804STrond Myklebust 	struct pnfs_layout_segment *lseg, *tmp;
115624956804STrond Myklebust 
115724956804STrond Myklebust 	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
115824956804STrond Myklebust 		return;
115924956804STrond Myklebust 	list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
116024956804STrond Myklebust 		if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
116124956804STrond Myklebust 			continue;
116224956804STrond Myklebust 		pnfs_lseg_dec_and_remove_zero(lseg, head);
116324956804STrond Myklebust 	}
116424956804STrond Myklebust }
116524956804STrond Myklebust 
pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr * lo,const nfs4_stateid * arg_stateid,const struct pnfs_layout_range * range,const nfs4_stateid * stateid)116668f74479STrond Myklebust void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
11672a974425STrond Myklebust 		const nfs4_stateid *arg_stateid,
116868f74479STrond Myklebust 		const struct pnfs_layout_range *range,
116968f74479STrond Myklebust 		const nfs4_stateid *stateid)
117068f74479STrond Myklebust {
117168f74479STrond Myklebust 	struct inode *inode = lo->plh_inode;
117268f74479STrond Myklebust 	LIST_HEAD(freeme);
117368f74479STrond Myklebust 
117468f74479STrond Myklebust 	spin_lock(&inode->i_lock);
1175d8a7055fSTrond Myklebust 	if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
11762a974425STrond Myklebust 		goto out_unlock;
1177d8a7055fSTrond Myklebust 	if (stateid && pnfs_layout_is_valid(lo)) {
11782a974425STrond Myklebust 		u32 seq = be32_to_cpu(arg_stateid->seqid);
11792a974425STrond Myklebust 
118068f74479STrond Myklebust 		pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
118168f74479STrond Myklebust 		pnfs_free_returned_lsegs(lo, &freeme, range, seq);
118259b56394STrond Myklebust 		pnfs_set_layout_stateid(lo, stateid, NULL, true);
118368f74479STrond Myklebust 	} else
118468f74479STrond Myklebust 		pnfs_mark_layout_stateid_invalid(lo, &freeme);
11852a974425STrond Myklebust out_unlock:
118668f74479STrond Myklebust 	pnfs_clear_layoutreturn_waitbit(lo);
118768f74479STrond Myklebust 	spin_unlock(&inode->i_lock);
118868f74479STrond Myklebust 	pnfs_free_lseg_list(&freeme);
118968f74479STrond Myklebust 
119068f74479STrond Myklebust }
119168f74479STrond Myklebust 
119213c13a6aSTrond Myklebust static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr * lo,nfs4_stateid * stateid,const struct cred ** cred,enum pnfs_iomode * iomode)1193e5fd1904STrond Myklebust pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
1194e5fd1904STrond Myklebust 		nfs4_stateid *stateid,
119544ea8dfcSTrond Myklebust 		const struct cred **cred,
1196e5fd1904STrond Myklebust 		enum pnfs_iomode *iomode)
119713c13a6aSTrond Myklebust {
1198bf0291ddSTrond Myklebust 	/* Serialise LAYOUTGET/LAYOUTRETURN */
1199*96c9ff35STrond Myklebust 	if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
1200bf0291ddSTrond Myklebust 		return false;
12016604b203STrond Myklebust 	if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
120213c13a6aSTrond Myklebust 		return false;
12036604b203STrond Myklebust 	set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
120413c13a6aSTrond Myklebust 	pnfs_get_layout_hdr(lo);
1205e5fd1904STrond Myklebust 	nfs4_stateid_copy(stateid, &lo->plh_stateid);
120644ea8dfcSTrond Myklebust 	*cred = get_cred(lo->plh_lc_cred);
12071bcf34fdSTrond Myklebust 	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
1208e5fd1904STrond Myklebust 		if (lo->plh_return_seq != 0)
1209e5fd1904STrond Myklebust 			stateid->seqid = cpu_to_be32(lo->plh_return_seq);
1210e5fd1904STrond Myklebust 		if (iomode != NULL)
1211e5fd1904STrond Myklebust 			*iomode = lo->plh_return_iomode;
12128e0acf90STrond Myklebust 		pnfs_clear_layoutreturn_info(lo);
12131bcf34fdSTrond Myklebust 	} else if (iomode != NULL)
1214e5fd1904STrond Myklebust 		*iomode = IOMODE_ANY;
12151bcf34fdSTrond Myklebust 	pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid));
121613c13a6aSTrond Myklebust 	return true;
121713c13a6aSTrond Myklebust }
121813c13a6aSTrond Myklebust 
1219828ed9ecSTrond Myklebust static void
pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args * args,struct pnfs_layout_hdr * lo,const nfs4_stateid * stateid,enum pnfs_iomode iomode)1220828ed9ecSTrond Myklebust pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args,
1221828ed9ecSTrond Myklebust 		struct pnfs_layout_hdr *lo,
1222828ed9ecSTrond Myklebust 		const nfs4_stateid *stateid,
1223828ed9ecSTrond Myklebust 		enum pnfs_iomode iomode)
1224828ed9ecSTrond Myklebust {
1225828ed9ecSTrond Myklebust 	struct inode *inode = lo->plh_inode;
1226828ed9ecSTrond Myklebust 
1227828ed9ecSTrond Myklebust 	args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id;
1228828ed9ecSTrond Myklebust 	args->inode = inode;
1229828ed9ecSTrond Myklebust 	args->range.iomode = iomode;
1230828ed9ecSTrond Myklebust 	args->range.offset = 0;
1231828ed9ecSTrond Myklebust 	args->range.length = NFS4_MAX_UINT64;
1232828ed9ecSTrond Myklebust 	args->layout = lo;
1233828ed9ecSTrond Myklebust 	nfs4_stateid_copy(&args->stateid, stateid);
1234828ed9ecSTrond Myklebust }
1235828ed9ecSTrond Myklebust 
1236f40eb5d0SPeng Tao static int
pnfs_send_layoutreturn(struct pnfs_layout_hdr * lo,const nfs4_stateid * stateid,const struct cred ** pcred,enum pnfs_iomode iomode,bool sync)123744ea8dfcSTrond Myklebust pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
123844ea8dfcSTrond Myklebust 		       const nfs4_stateid *stateid,
123944ea8dfcSTrond Myklebust 		       const struct cred **pcred,
124044ea8dfcSTrond Myklebust 		       enum pnfs_iomode iomode,
124144ea8dfcSTrond Myklebust 		       bool sync)
1242f40eb5d0SPeng Tao {
1243f40eb5d0SPeng Tao 	struct inode *ino = lo->plh_inode;
1244287bd3e9STrond Myklebust 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
1245f40eb5d0SPeng Tao 	struct nfs4_layoutreturn *lrp;
124644ea8dfcSTrond Myklebust 	const struct cred *cred = *pcred;
1247f40eb5d0SPeng Tao 	int status = 0;
1248f40eb5d0SPeng Tao 
124944ea8dfcSTrond Myklebust 	*pcred = NULL;
125063d8a41bSTrond Myklebust 	lrp = kzalloc(sizeof(*lrp), nfs_io_gfp_mask());
1251f40eb5d0SPeng Tao 	if (unlikely(lrp == NULL)) {
1252f40eb5d0SPeng Tao 		status = -ENOMEM;
1253f40eb5d0SPeng Tao 		spin_lock(&ino->i_lock);
1254d67ae825STom Haynes 		pnfs_clear_layoutreturn_waitbit(lo);
1255f40eb5d0SPeng Tao 		spin_unlock(&ino->i_lock);
125644ea8dfcSTrond Myklebust 		put_cred(cred);
1257f40eb5d0SPeng Tao 		pnfs_put_layout_hdr(lo);
1258f40eb5d0SPeng Tao 		goto out;
1259f40eb5d0SPeng Tao 	}
1260f40eb5d0SPeng Tao 
1261828ed9ecSTrond Myklebust 	pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
12624d796d75STrond Myklebust 	lrp->args.ld_private = &lrp->ld_private;
1263f40eb5d0SPeng Tao 	lrp->clp = NFS_SERVER(ino)->nfs_client;
126444ea8dfcSTrond Myklebust 	lrp->cred = cred;
1265287bd3e9STrond Myklebust 	if (ld->prepare_layoutreturn)
1266287bd3e9STrond Myklebust 		ld->prepare_layoutreturn(&lrp->args);
1267f40eb5d0SPeng Tao 
12686c16605dSPeng Tao 	status = nfs4_proc_layoutreturn(lrp, sync);
1269f40eb5d0SPeng Tao out:
1270f40eb5d0SPeng Tao 	dprintk("<-- %s status: %d\n", __func__, status);
1271f40eb5d0SPeng Tao 	return status;
1272f40eb5d0SPeng Tao }
1273f40eb5d0SPeng Tao 
1274d474f961STrond Myklebust static bool
pnfs_layout_segments_returnable(struct pnfs_layout_hdr * lo,enum pnfs_iomode iomode,u32 seq)1275d474f961STrond Myklebust pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
1276d474f961STrond Myklebust 				enum pnfs_iomode iomode,
1277d474f961STrond Myklebust 				u32 seq)
1278d474f961STrond Myklebust {
1279d474f961STrond Myklebust 	struct pnfs_layout_range recall_range = {
1280d474f961STrond Myklebust 		.length = NFS4_MAX_UINT64,
1281d474f961STrond Myklebust 		.iomode = iomode,
1282d474f961STrond Myklebust 	};
1283d474f961STrond Myklebust 	return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
1284d474f961STrond Myklebust 					       &recall_range, seq) != -EBUSY;
1285d474f961STrond Myklebust }
1286d474f961STrond Myklebust 
128713c13a6aSTrond Myklebust /* Return true if layoutreturn is needed */
128813c13a6aSTrond Myklebust static bool
pnfs_layout_need_return(struct pnfs_layout_hdr * lo)128913c13a6aSTrond Myklebust pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
129013c13a6aSTrond Myklebust {
12912370abdaSTrond Myklebust 	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
129213c13a6aSTrond Myklebust 		return false;
1293d474f961STrond Myklebust 	return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
1294d474f961STrond Myklebust 					       lo->plh_return_seq);
129513c13a6aSTrond Myklebust }
129613c13a6aSTrond Myklebust 
pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr * lo)129713c13a6aSTrond Myklebust static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
129813c13a6aSTrond Myklebust {
129913c13a6aSTrond Myklebust 	struct inode *inode= lo->plh_inode;
130013c13a6aSTrond Myklebust 
13012370abdaSTrond Myklebust 	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
130213c13a6aSTrond Myklebust 		return;
130313c13a6aSTrond Myklebust 	spin_lock(&inode->i_lock);
130413c13a6aSTrond Myklebust 	if (pnfs_layout_need_return(lo)) {
130544ea8dfcSTrond Myklebust 		const struct cred *cred;
130613c13a6aSTrond Myklebust 		nfs4_stateid stateid;
130713c13a6aSTrond Myklebust 		enum pnfs_iomode iomode;
130813c13a6aSTrond Myklebust 		bool send;
130913c13a6aSTrond Myklebust 
131044ea8dfcSTrond Myklebust 		send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
131113c13a6aSTrond Myklebust 		spin_unlock(&inode->i_lock);
131213c13a6aSTrond Myklebust 		if (send) {
131313c13a6aSTrond Myklebust 			/* Send an async layoutreturn so we dont deadlock */
131444ea8dfcSTrond Myklebust 			pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
131513c13a6aSTrond Myklebust 		}
131613c13a6aSTrond Myklebust 	} else
131713c13a6aSTrond Myklebust 		spin_unlock(&inode->i_lock);
131813c13a6aSTrond Myklebust }
131913c13a6aSTrond Myklebust 
1320293b3b06SAndy Adamson /*
1321293b3b06SAndy Adamson  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
1322293b3b06SAndy Adamson  * when the layout segment list is empty.
1323293b3b06SAndy Adamson  *
1324293b3b06SAndy Adamson  * Note that a pnfs_layout_hdr can exist with an empty layout segment
1325293b3b06SAndy Adamson  * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
1326293b3b06SAndy Adamson  * deviceid is marked invalid.
1327293b3b06SAndy Adamson  */
1328cbe82603SBenny Halevy int
_pnfs_return_layout(struct inode * ino)1329cbe82603SBenny Halevy _pnfs_return_layout(struct inode *ino)
1330cbe82603SBenny Halevy {
1331cbe82603SBenny Halevy 	struct pnfs_layout_hdr *lo = NULL;
1332cbe82603SBenny Halevy 	struct nfs_inode *nfsi = NFS_I(ino);
1333a421d218SAnna Schumaker 	struct pnfs_layout_range range = {
1334a421d218SAnna Schumaker 		.iomode		= IOMODE_ANY,
1335a421d218SAnna Schumaker 		.offset		= 0,
1336a421d218SAnna Schumaker 		.length		= NFS4_MAX_UINT64,
1337a421d218SAnna Schumaker 	};
1338cbe82603SBenny Halevy 	LIST_HEAD(tmp_list);
133944ea8dfcSTrond Myklebust 	const struct cred *cred;
1340cbe82603SBenny Halevy 	nfs4_stateid stateid;
134124408f52STrond Myklebust 	int status = 0;
134293b7f7adSOlga Kornievskaia 	bool send, valid_layout;
1343cbe82603SBenny Halevy 
1344366d5052SAndy Adamson 	dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
1345cbe82603SBenny Halevy 
1346cbe82603SBenny Halevy 	spin_lock(&ino->i_lock);
1347cbe82603SBenny Halevy 	lo = nfsi->layout;
1348e5929f3cSTrond Myklebust 	if (!lo) {
1349cbe82603SBenny Halevy 		spin_unlock(&ino->i_lock);
1350293b3b06SAndy Adamson 		dprintk("NFS: %s no layout to return\n", __func__);
1351293b3b06SAndy Adamson 		goto out;
1352cbe82603SBenny Halevy 	}
1353cbe82603SBenny Halevy 	/* Reference matched in nfs4_layoutreturn_release */
135470c3bd2bSTrond Myklebust 	pnfs_get_layout_hdr(lo);
135524408f52STrond Myklebust 	/* Is there an outstanding layoutreturn ? */
135624408f52STrond Myklebust 	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
135724408f52STrond Myklebust 		spin_unlock(&ino->i_lock);
135824408f52STrond Myklebust 		if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
135924408f52STrond Myklebust 					TASK_UNINTERRUPTIBLE))
136024408f52STrond Myklebust 			goto out_put_layout_hdr;
136124408f52STrond Myklebust 		spin_lock(&ino->i_lock);
136224408f52STrond Myklebust 	}
136393b7f7adSOlga Kornievskaia 	valid_layout = pnfs_layout_is_valid(lo);
136424956804STrond Myklebust 	pnfs_clear_layoutcommit(ino, &tmp_list);
1365a421d218SAnna Schumaker 	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
1366c88953d8SChristoph Hellwig 
1367a421d218SAnna Schumaker 	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
1368c88953d8SChristoph Hellwig 		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
1369c88953d8SChristoph Hellwig 
1370293b3b06SAndy Adamson 	/* Don't send a LAYOUTRETURN if list was initially empty */
137193b7f7adSOlga Kornievskaia 	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
137293b7f7adSOlga Kornievskaia 			!valid_layout) {
1373293b3b06SAndy Adamson 		spin_unlock(&ino->i_lock);
1374293b3b06SAndy Adamson 		dprintk("NFS: %s no layout segments to return\n", __func__);
13757bcc1058STrond Myklebust 		goto out_wait_layoutreturn;
1376293b3b06SAndy Adamson 	}
137747abadefSChristoph Hellwig 
137844ea8dfcSTrond Myklebust 	send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
1379cbe82603SBenny Halevy 	spin_unlock(&ino->i_lock);
13807f27392cSTrond Myklebust 	if (send)
138144ea8dfcSTrond Myklebust 		status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
13827bcc1058STrond Myklebust out_wait_layoutreturn:
13837bcc1058STrond Myklebust 	wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
13847f27392cSTrond Myklebust out_put_layout_hdr:
1385ee6625a9STrond Myklebust 	pnfs_free_lseg_list(&tmp_list);
13867f27392cSTrond Myklebust 	pnfs_put_layout_hdr(lo);
1387cbe82603SBenny Halevy out:
1388cbe82603SBenny Halevy 	dprintk("<-- %s status: %d\n", __func__, status);
1389cbe82603SBenny Halevy 	return status;
1390cbe82603SBenny Halevy }
1391cbe82603SBenny Halevy 
139224028672STrond Myklebust int
pnfs_commit_and_return_layout(struct inode * inode)139324028672STrond Myklebust pnfs_commit_and_return_layout(struct inode *inode)
139424028672STrond Myklebust {
139524028672STrond Myklebust 	struct pnfs_layout_hdr *lo;
139624028672STrond Myklebust 	int ret;
139724028672STrond Myklebust 
139824028672STrond Myklebust 	spin_lock(&inode->i_lock);
139924028672STrond Myklebust 	lo = NFS_I(inode)->layout;
140024028672STrond Myklebust 	if (lo == NULL) {
140124028672STrond Myklebust 		spin_unlock(&inode->i_lock);
140224028672STrond Myklebust 		return 0;
140324028672STrond Myklebust 	}
140424028672STrond Myklebust 	pnfs_get_layout_hdr(lo);
140524028672STrond Myklebust 	/* Block new layoutgets and read/write to ds */
140624028672STrond Myklebust 	lo->plh_block_lgets++;
140724028672STrond Myklebust 	spin_unlock(&inode->i_lock);
140824028672STrond Myklebust 	filemap_fdatawait(inode->i_mapping);
140924028672STrond Myklebust 	ret = pnfs_layoutcommit_inode(inode, true);
141024028672STrond Myklebust 	if (ret == 0)
141124028672STrond Myklebust 		ret = _pnfs_return_layout(inode);
141224028672STrond Myklebust 	spin_lock(&inode->i_lock);
141324028672STrond Myklebust 	lo->plh_block_lgets--;
141424028672STrond Myklebust 	spin_unlock(&inode->i_lock);
141524028672STrond Myklebust 	pnfs_put_layout_hdr(lo);
141624028672STrond Myklebust 	return ret;
141724028672STrond Myklebust }
141824028672STrond Myklebust 
pnfs_roc(struct inode * ino,struct nfs4_layoutreturn_args * args,struct nfs4_layoutreturn_res * res,const struct cred * cred)14191c5bd76dSTrond Myklebust bool pnfs_roc(struct inode *ino,
14201c5bd76dSTrond Myklebust 		struct nfs4_layoutreturn_args *args,
14211c5bd76dSTrond Myklebust 		struct nfs4_layoutreturn_res *res,
1422a52458b4SNeilBrown 		const struct cred *cred)
1423f7e8917aSFred Isaman {
142440dd4b7aSTrond Myklebust 	struct nfs_inode *nfsi = NFS_I(ino);
142540dd4b7aSTrond Myklebust 	struct nfs_open_context *ctx;
142640dd4b7aSTrond Myklebust 	struct nfs4_state *state;
1427f7e8917aSFred Isaman 	struct pnfs_layout_hdr *lo;
14281c5bd76dSTrond Myklebust 	struct pnfs_layout_segment *lseg, *next;
142944ea8dfcSTrond Myklebust 	const struct cred *lc_cred;
1430193e3aa2SPeng Tao 	nfs4_stateid stateid;
14311c5bd76dSTrond Myklebust 	enum pnfs_iomode iomode = 0;
14321c5bd76dSTrond Myklebust 	bool layoutreturn = false, roc = false;
1433e71708d4STrond Myklebust 	bool skip_read = false;
1434f7e8917aSFred Isaman 
14351c5bd76dSTrond Myklebust 	if (!nfs_have_layout(ino))
14361c5bd76dSTrond Myklebust 		return false;
143729ade5dbSTrond Myklebust retry:
14380de43976STrond Myklebust 	rcu_read_lock();
1439f7e8917aSFred Isaman 	spin_lock(&ino->i_lock);
144040dd4b7aSTrond Myklebust 	lo = nfsi->layout;
14410cdc329eSTrond Myklebust 	if (!lo || !pnfs_layout_is_valid(lo) ||
14429c6376ebSTrond Myklebust 	    test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
14439c6376ebSTrond Myklebust 		lo = NULL;
144440dd4b7aSTrond Myklebust 		goto out_noroc;
14459c6376ebSTrond Myklebust 	}
144629ade5dbSTrond Myklebust 	pnfs_get_layout_hdr(lo);
14479c6376ebSTrond Myklebust 	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
144829ade5dbSTrond Myklebust 		spin_unlock(&ino->i_lock);
14490de43976STrond Myklebust 		rcu_read_unlock();
145029ade5dbSTrond Myklebust 		wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
145129ade5dbSTrond Myklebust 				TASK_UNINTERRUPTIBLE);
145229ade5dbSTrond Myklebust 		pnfs_put_layout_hdr(lo);
145329ade5dbSTrond Myklebust 		goto retry;
145429ade5dbSTrond Myklebust 	}
145540dd4b7aSTrond Myklebust 
1456e755d638SPeng Tao 	/* no roc if we hold a delegation */
1457e71708d4STrond Myklebust 	if (nfs4_check_delegation(ino, FMODE_READ)) {
1458e71708d4STrond Myklebust 		if (nfs4_check_delegation(ino, FMODE_WRITE))
145940dd4b7aSTrond Myklebust 			goto out_noroc;
1460e71708d4STrond Myklebust 		skip_read = true;
1461e71708d4STrond Myklebust 	}
146240dd4b7aSTrond Myklebust 
14630de43976STrond Myklebust 	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
146440dd4b7aSTrond Myklebust 		state = ctx->state;
1465e71708d4STrond Myklebust 		if (state == NULL)
1466e71708d4STrond Myklebust 			continue;
146740dd4b7aSTrond Myklebust 		/* Don't return layout if there is open file state */
1468e71708d4STrond Myklebust 		if (state->state & FMODE_WRITE)
146940dd4b7aSTrond Myklebust 			goto out_noroc;
1470e71708d4STrond Myklebust 		if (state->state & FMODE_READ)
1471e71708d4STrond Myklebust 			skip_read = true;
147240dd4b7aSTrond Myklebust 	}
147340dd4b7aSTrond Myklebust 
1474e755d638SPeng Tao 
14751c5bd76dSTrond Myklebust 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
1476e71708d4STrond Myklebust 		if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
1477e71708d4STrond Myklebust 			continue;
1478e755d638SPeng Tao 		/* If we are sending layoutreturn, invalidate all valid lsegs */
14791c5bd76dSTrond Myklebust 		if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
14801c5bd76dSTrond Myklebust 			continue;
14811c5bd76dSTrond Myklebust 		/*
14821c5bd76dSTrond Myklebust 		 * Note: mark lseg for return so pnfs_layout_remove_lseg
14831c5bd76dSTrond Myklebust 		 * doesn't invalidate the layout for us.
14841c5bd76dSTrond Myklebust 		 */
14851c5bd76dSTrond Myklebust 		set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
14861c5bd76dSTrond Myklebust 		if (!mark_lseg_invalid(lseg, &lo->plh_return_segs))
14871c5bd76dSTrond Myklebust 			continue;
14881c5bd76dSTrond Myklebust 		pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
148969820d22STrond Myklebust 	}
149069820d22STrond Myklebust 
14911c5bd76dSTrond Myklebust 	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
149269820d22STrond Myklebust 		goto out_noroc;
149369820d22STrond Myklebust 
1494500d701fSPeng Tao 	/* ROC in two conditions:
1495e755d638SPeng Tao 	 * 1. there are ROC lsegs
1496e755d638SPeng Tao 	 * 2. we don't send layoutreturn
1497e755d638SPeng Tao 	 */
1498500d701fSPeng Tao 	/* lo ref dropped in pnfs_roc_release() */
149944ea8dfcSTrond Myklebust 	layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode);
15001c5bd76dSTrond Myklebust 	/* If the creds don't match, we can't compound the layoutreturn */
15014d8948c7STrond Myklebust 	if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0)
15021c5bd76dSTrond Myklebust 		goto out_noroc;
15031c5bd76dSTrond Myklebust 
15041c5bd76dSTrond Myklebust 	roc = layoutreturn;
15051c5bd76dSTrond Myklebust 	pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
15061c5bd76dSTrond Myklebust 	res->lrs_present = 0;
15071c5bd76dSTrond Myklebust 	layoutreturn = false;
150844ea8dfcSTrond Myklebust 	put_cred(lc_cred);
15094d8948c7STrond Myklebust 
1510e755d638SPeng Tao out_noroc:
1511f7e8917aSFred Isaman 	spin_unlock(&ino->i_lock);
15120de43976STrond Myklebust 	rcu_read_unlock();
15137140171eSTrond Myklebust 	pnfs_layoutcommit_inode(ino, true);
1514287bd3e9STrond Myklebust 	if (roc) {
1515287bd3e9STrond Myklebust 		struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
1516287bd3e9STrond Myklebust 		if (ld->prepare_layoutreturn)
1517287bd3e9STrond Myklebust 			ld->prepare_layoutreturn(args);
15189c6376ebSTrond Myklebust 		pnfs_put_layout_hdr(lo);
1519287bd3e9STrond Myklebust 		return true;
1520287bd3e9STrond Myklebust 	}
1521e755d638SPeng Tao 	if (layoutreturn)
152244ea8dfcSTrond Myklebust 		pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
15239c6376ebSTrond Myklebust 	pnfs_put_layout_hdr(lo);
1524287bd3e9STrond Myklebust 	return false;
1525f7e8917aSFred Isaman }
1526f7e8917aSFred Isaman 
pnfs_roc_done(struct rpc_task * task,struct nfs4_layoutreturn_args ** argpp,struct nfs4_layoutreturn_res ** respp,int * ret)1527078000d0STrond Myklebust int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
1528078000d0STrond Myklebust 		  struct nfs4_layoutreturn_res **respp, int *ret)
1529287a9c55STrond Myklebust {
1530287a9c55STrond Myklebust 	struct nfs4_layoutreturn_args *arg = *argpp;
1531287a9c55STrond Myklebust 	int retval = -EAGAIN;
1532287a9c55STrond Myklebust 
1533287a9c55STrond Myklebust 	if (!arg)
1534287a9c55STrond Myklebust 		return 0;
1535287a9c55STrond Myklebust 	/* Handle Layoutreturn errors */
1536287a9c55STrond Myklebust 	switch (*ret) {
1537287a9c55STrond Myklebust 	case 0:
1538287a9c55STrond Myklebust 		retval = 0;
1539287a9c55STrond Myklebust 		break;
15406109bcf7STrond Myklebust 	case -NFS4ERR_NOMATCHING_LAYOUT:
15416109bcf7STrond Myklebust 		/* Was there an RPC level error? If not, retry */
15426109bcf7STrond Myklebust 		if (task->tk_rpc_status == 0)
15436109bcf7STrond Myklebust 			break;
15446109bcf7STrond Myklebust 		/* If the call was not sent, let caller handle it */
15456109bcf7STrond Myklebust 		if (!RPC_WAS_SENT(task))
15466109bcf7STrond Myklebust 			return 0;
15476109bcf7STrond Myklebust 		/*
15486109bcf7STrond Myklebust 		 * Otherwise, assume the call succeeded and
15496109bcf7STrond Myklebust 		 * that we need to release the layout
15506109bcf7STrond Myklebust 		 */
15516109bcf7STrond Myklebust 		*ret = 0;
15526109bcf7STrond Myklebust 		(*respp)->lrs_present = 0;
15536109bcf7STrond Myklebust 		retval = 0;
15546109bcf7STrond Myklebust 		break;
1555078a432dSTrond Myklebust 	case -NFS4ERR_DELAY:
1556078a432dSTrond Myklebust 		/* Let the caller handle the retry */
1557078a432dSTrond Myklebust 		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
1558078a432dSTrond Myklebust 		return 0;
1559287a9c55STrond Myklebust 	case -NFS4ERR_OLD_STATEID:
156030cb3ee2STrond Myklebust 		if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
1561078000d0STrond Myklebust 						     &arg->range, arg->inode))
1562287a9c55STrond Myklebust 			break;
1563287a9c55STrond Myklebust 		*ret = -NFS4ERR_NOMATCHING_LAYOUT;
1564287a9c55STrond Myklebust 		return -EAGAIN;
1565287a9c55STrond Myklebust 	}
1566287a9c55STrond Myklebust 	*argpp = NULL;
1567287a9c55STrond Myklebust 	*respp = NULL;
1568287a9c55STrond Myklebust 	return retval;
1569287a9c55STrond Myklebust }
1570287a9c55STrond Myklebust 
pnfs_roc_release(struct nfs4_layoutreturn_args * args,struct nfs4_layoutreturn_res * res,int ret)15711c5bd76dSTrond Myklebust void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
15721c5bd76dSTrond Myklebust 		struct nfs4_layoutreturn_res *res,
15731c5bd76dSTrond Myklebust 		int ret)
1574f7e8917aSFred Isaman {
15751c5bd76dSTrond Myklebust 	struct pnfs_layout_hdr *lo = args->layout;
157667bbceedSTrond Myklebust 	struct inode *inode = args->inode;
15771c5bd76dSTrond Myklebust 	const nfs4_stateid *res_stateid = NULL;
1578287bd3e9STrond Myklebust 	struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
1579f7e8917aSFred Isaman 
15809c47b18cSTrond Myklebust 	switch (ret) {
15819c47b18cSTrond Myklebust 	case -NFS4ERR_NOMATCHING_LAYOUT:
158267bbceedSTrond Myklebust 		spin_lock(&inode->i_lock);
158367bbceedSTrond Myklebust 		if (pnfs_layout_is_valid(lo) &&
158467bbceedSTrond Myklebust 		    nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
158567bbceedSTrond Myklebust 			pnfs_set_plh_return_info(lo, args->range.iomode, 0);
1586c18d1e17STrond Myklebust 		pnfs_clear_layoutreturn_waitbit(lo);
158767bbceedSTrond Myklebust 		spin_unlock(&inode->i_lock);
15889c47b18cSTrond Myklebust 		break;
15899c47b18cSTrond Myklebust 	case 0:
15901c5bd76dSTrond Myklebust 		if (res->lrs_present)
15911c5bd76dSTrond Myklebust 			res_stateid = &res->stateid;
1592df561f66SGustavo A. R. Silva 		fallthrough;
15939c47b18cSTrond Myklebust 	default:
1594c18d1e17STrond Myklebust 		pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range,
1595c18d1e17STrond Myklebust 					     res_stateid);
1596f7e8917aSFred Isaman 	}
1597a19b4785STrond Myklebust 	trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret);
1598287bd3e9STrond Myklebust 	if (ld_private && ld_private->ops && ld_private->ops->free)
1599287bd3e9STrond Myklebust 		ld_private->ops->free(ld_private);
16001c5bd76dSTrond Myklebust 	pnfs_put_layout_hdr(lo);
1601f7e8917aSFred Isaman }
1602f7e8917aSFred Isaman 
pnfs_wait_on_layoutreturn(struct inode * ino,struct rpc_task * task)1603500d701fSPeng Tao bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
1604500d701fSPeng Tao {
1605500d701fSPeng Tao 	struct nfs_inode *nfsi = NFS_I(ino);
1606500d701fSPeng Tao         struct pnfs_layout_hdr *lo;
1607500d701fSPeng Tao         bool sleep = false;
1608500d701fSPeng Tao 
1609500d701fSPeng Tao 	/* we might not have grabbed lo reference. so need to check under
1610500d701fSPeng Tao 	 * i_lock */
1611500d701fSPeng Tao         spin_lock(&ino->i_lock);
1612500d701fSPeng Tao         lo = nfsi->layout;
1613ee284e35STrond Myklebust         if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
1614500d701fSPeng Tao                 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
1615ee284e35STrond Myklebust                 sleep = true;
1616ee284e35STrond Myklebust 	}
1617ee284e35STrond Myklebust         spin_unlock(&ino->i_lock);
1618500d701fSPeng Tao         return sleep;
1619500d701fSPeng Tao }
1620500d701fSPeng Tao 
1621b1f69b75SAndy Adamson /*
1622b1f69b75SAndy Adamson  * Compare two layout segments for sorting into layout cache.
1623b1f69b75SAndy Adamson  * We want to preferentially return RW over RO layouts, so ensure those
1624b1f69b75SAndy Adamson  * are seen first.
1625b1f69b75SAndy Adamson  */
1626b1f69b75SAndy Adamson static s64
pnfs_lseg_range_cmp(const struct pnfs_layout_range * l1,const struct pnfs_layout_range * l2)16277dc0ac70STrond Myklebust pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
16283cb2df17STrond Myklebust 	   const struct pnfs_layout_range *l2)
1629b1f69b75SAndy Adamson {
1630fb3296ebSBenny Halevy 	s64 d;
1631fb3296ebSBenny Halevy 
1632fb3296ebSBenny Halevy 	/* high offset > low offset */
1633fb3296ebSBenny Halevy 	d = l1->offset - l2->offset;
1634fb3296ebSBenny Halevy 	if (d)
1635fb3296ebSBenny Halevy 		return d;
1636fb3296ebSBenny Halevy 
1637fb3296ebSBenny Halevy 	/* short length > long length */
1638fb3296ebSBenny Halevy 	d = l2->length - l1->length;
1639fb3296ebSBenny Halevy 	if (d)
1640fb3296ebSBenny Halevy 		return d;
1641fb3296ebSBenny Halevy 
1642b1f69b75SAndy Adamson 	/* read > read/write */
1643fb3296ebSBenny Halevy 	return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
1644b1f69b75SAndy Adamson }
1645b1f69b75SAndy Adamson 
164603772d2fSTrond Myklebust static bool
pnfs_lseg_range_is_after(const struct pnfs_layout_range * l1,const struct pnfs_layout_range * l2)164703772d2fSTrond Myklebust pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1,
164803772d2fSTrond Myklebust 		const struct pnfs_layout_range *l2)
1649974cec8cSAndy Adamson {
165003772d2fSTrond Myklebust 	return pnfs_lseg_range_cmp(l1, l2) > 0;
165103772d2fSTrond Myklebust }
165203772d2fSTrond Myklebust 
165303772d2fSTrond Myklebust static bool
pnfs_lseg_no_merge(struct pnfs_layout_segment * lseg,struct pnfs_layout_segment * old)165403772d2fSTrond Myklebust pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg,
165503772d2fSTrond Myklebust 		struct pnfs_layout_segment *old)
165603772d2fSTrond Myklebust {
165703772d2fSTrond Myklebust 	return false;
165803772d2fSTrond Myklebust }
165903772d2fSTrond Myklebust 
166003772d2fSTrond Myklebust void
pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg,bool (* is_after)(const struct pnfs_layout_range *,const struct pnfs_layout_range *),bool (* do_merge)(struct pnfs_layout_segment *,struct pnfs_layout_segment *),struct list_head * free_me)166103772d2fSTrond Myklebust pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo,
166203772d2fSTrond Myklebust 		   struct pnfs_layout_segment *lseg,
166303772d2fSTrond Myklebust 		   bool (*is_after)(const struct pnfs_layout_range *,
166403772d2fSTrond Myklebust 			   const struct pnfs_layout_range *),
166503772d2fSTrond Myklebust 		   bool (*do_merge)(struct pnfs_layout_segment *,
166603772d2fSTrond Myklebust 			   struct pnfs_layout_segment *),
166703772d2fSTrond Myklebust 		   struct list_head *free_me)
166803772d2fSTrond Myklebust {
166903772d2fSTrond Myklebust 	struct pnfs_layout_segment *lp, *tmp;
1670b1f69b75SAndy Adamson 
1671974cec8cSAndy Adamson 	dprintk("%s:Begin\n", __func__);
1672974cec8cSAndy Adamson 
167303772d2fSTrond Myklebust 	list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) {
167403772d2fSTrond Myklebust 		if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0)
167503772d2fSTrond Myklebust 			continue;
167603772d2fSTrond Myklebust 		if (do_merge(lseg, lp)) {
167703772d2fSTrond Myklebust 			mark_lseg_invalid(lp, free_me);
167803772d2fSTrond Myklebust 			continue;
167903772d2fSTrond Myklebust 		}
168003772d2fSTrond Myklebust 		if (is_after(&lseg->pls_range, &lp->pls_range))
1681b1f69b75SAndy Adamson 			continue;
1682566052c5SFred Isaman 		list_add_tail(&lseg->pls_list, &lp->pls_list);
1683b1f69b75SAndy Adamson 		dprintk("%s: inserted lseg %p "
1684b1f69b75SAndy Adamson 			"iomode %d offset %llu length %llu before "
1685b1f69b75SAndy Adamson 			"lp %p iomode %d offset %llu length %llu\n",
1686566052c5SFred Isaman 			__func__, lseg, lseg->pls_range.iomode,
1687566052c5SFred Isaman 			lseg->pls_range.offset, lseg->pls_range.length,
1688566052c5SFred Isaman 			lp, lp->pls_range.iomode, lp->pls_range.offset,
1689566052c5SFred Isaman 			lp->pls_range.length);
1690fb3296ebSBenny Halevy 		goto out;
1691974cec8cSAndy Adamson 	}
1692b7edfaa1SFred Isaman 	list_add_tail(&lseg->pls_list, &lo->plh_segs);
1693b1f69b75SAndy Adamson 	dprintk("%s: inserted lseg %p "
1694b1f69b75SAndy Adamson 		"iomode %d offset %llu length %llu at tail\n",
1695566052c5SFred Isaman 		__func__, lseg, lseg->pls_range.iomode,
1696566052c5SFred Isaman 		lseg->pls_range.offset, lseg->pls_range.length);
1697fb3296ebSBenny Halevy out:
169870c3bd2bSTrond Myklebust 	pnfs_get_layout_hdr(lo);
1699974cec8cSAndy Adamson 
1700974cec8cSAndy Adamson 	dprintk("%s:Return\n", __func__);
1701974cec8cSAndy Adamson }
170203772d2fSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg);
170303772d2fSTrond Myklebust 
170403772d2fSTrond Myklebust static void
pnfs_layout_insert_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg,struct list_head * free_me)170503772d2fSTrond Myklebust pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
170603772d2fSTrond Myklebust 		   struct pnfs_layout_segment *lseg,
170703772d2fSTrond Myklebust 		   struct list_head *free_me)
170803772d2fSTrond Myklebust {
170903772d2fSTrond Myklebust 	struct inode *inode = lo->plh_inode;
171003772d2fSTrond Myklebust 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
171103772d2fSTrond Myklebust 
171203772d2fSTrond Myklebust 	if (ld->add_lseg != NULL)
171303772d2fSTrond Myklebust 		ld->add_lseg(lo, lseg, free_me);
171403772d2fSTrond Myklebust 	else
171503772d2fSTrond Myklebust 		pnfs_generic_layout_insert_lseg(lo, lseg,
171603772d2fSTrond Myklebust 				pnfs_lseg_range_is_after,
171703772d2fSTrond Myklebust 				pnfs_lseg_no_merge,
171803772d2fSTrond Myklebust 				free_me);
171903772d2fSTrond Myklebust }
1720e5e94017SBenny Halevy 
1721e5e94017SBenny Halevy static struct pnfs_layout_hdr *
alloc_init_layout_hdr(struct inode * ino,struct nfs_open_context * ctx,gfp_t gfp_flags)17229fa40758SPeng Tao alloc_init_layout_hdr(struct inode *ino,
17239fa40758SPeng Tao 		      struct nfs_open_context *ctx,
17249fa40758SPeng Tao 		      gfp_t gfp_flags)
1725e5e94017SBenny Halevy {
1726e5e94017SBenny Halevy 	struct pnfs_layout_hdr *lo;
1727e5e94017SBenny Halevy 
1728636fb9c8SBenny Halevy 	lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
1729e5e94017SBenny Halevy 	if (!lo)
1730e5e94017SBenny Halevy 		return NULL;
17312b28a7beSElena Reshetova 	refcount_set(&lo->plh_refcount, 1);
1732b7edfaa1SFred Isaman 	INIT_LIST_HEAD(&lo->plh_layouts);
1733b7edfaa1SFred Isaman 	INIT_LIST_HEAD(&lo->plh_segs);
173468f74479STrond Myklebust 	INIT_LIST_HEAD(&lo->plh_return_segs);
1735fd9a8d71STrond Myklebust 	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
1736b7edfaa1SFred Isaman 	lo->plh_inode = ino;
1737a52458b4SNeilBrown 	lo->plh_lc_cred = get_cred(ctx->cred);
173867a3b721STrond Myklebust 	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
1739e5e94017SBenny Halevy 	return lo;
1740e5e94017SBenny Halevy }
1741e5e94017SBenny Halevy 
1742e5e94017SBenny Halevy static struct pnfs_layout_hdr *
pnfs_find_alloc_layout(struct inode * ino,struct nfs_open_context * ctx,gfp_t gfp_flags)17439fa40758SPeng Tao pnfs_find_alloc_layout(struct inode *ino,
17449fa40758SPeng Tao 		       struct nfs_open_context *ctx,
17459fa40758SPeng Tao 		       gfp_t gfp_flags)
1746e5241e43STrond Myklebust 	__releases(&ino->i_lock)
1747e5241e43STrond Myklebust 	__acquires(&ino->i_lock)
1748e5e94017SBenny Halevy {
1749e5e94017SBenny Halevy 	struct nfs_inode *nfsi = NFS_I(ino);
1750e5e94017SBenny Halevy 	struct pnfs_layout_hdr *new = NULL;
1751e5e94017SBenny Halevy 
1752e5e94017SBenny Halevy 	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
1753e5e94017SBenny Halevy 
1754251ec410STrond Myklebust 	if (nfsi->layout != NULL)
1755251ec410STrond Myklebust 		goto out_existing;
1756e5e94017SBenny Halevy 	spin_unlock(&ino->i_lock);
17579fa40758SPeng Tao 	new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
1758e5e94017SBenny Halevy 	spin_lock(&ino->i_lock);
1759e5e94017SBenny Halevy 
1760251ec410STrond Myklebust 	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
1761e5e94017SBenny Halevy 		nfsi->layout = new;
1762251ec410STrond Myklebust 		return new;
17637175fe90SYanchuan Nian 	} else if (new != NULL)
1764636fb9c8SBenny Halevy 		pnfs_free_layout_hdr(new);
1765251ec410STrond Myklebust out_existing:
1766251ec410STrond Myklebust 	pnfs_get_layout_hdr(nfsi->layout);
1767e5e94017SBenny Halevy 	return nfsi->layout;
1768e5e94017SBenny Halevy }
1769e5e94017SBenny Halevy 
1770b1f69b75SAndy Adamson /*
1771b1f69b75SAndy Adamson  * iomode matching rules:
1772c7d73af2STom Haynes  * iomode	lseg	strict match
1773c7d73af2STom Haynes  *                      iomode
1774c7d73af2STom Haynes  * -----	-----	------ -----
1775c7d73af2STom Haynes  * ANY		READ	N/A    true
1776c7d73af2STom Haynes  * ANY		RW	N/A    true
1777c7d73af2STom Haynes  * RW		READ	N/A    false
1778c7d73af2STom Haynes  * RW		RW	N/A    true
1779c7d73af2STom Haynes  * READ		READ	N/A    true
1780c7d73af2STom Haynes  * READ		RW	true   false
1781c7d73af2STom Haynes  * READ		RW	false  true
1782b1f69b75SAndy Adamson  */
17833cb2df17STrond Myklebust static bool
pnfs_lseg_range_match(const struct pnfs_layout_range * ls_range,const struct pnfs_layout_range * range,bool strict_iomode)17847dc0ac70STrond Myklebust pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
1785c7d73af2STom Haynes 		 const struct pnfs_layout_range *range,
1786c7d73af2STom Haynes 		 bool strict_iomode)
1787b1f69b75SAndy Adamson {
1788fb3296ebSBenny Halevy 	struct pnfs_layout_range range1;
1789fb3296ebSBenny Halevy 
1790fb3296ebSBenny Halevy 	if ((range->iomode == IOMODE_RW &&
1791fb3296ebSBenny Halevy 	     ls_range->iomode != IOMODE_RW) ||
1792c7d73af2STom Haynes 	    (range->iomode != ls_range->iomode &&
17936089dd0dSThomas Meyer 	     strict_iomode) ||
17947dc0ac70STrond Myklebust 	    !pnfs_lseg_range_intersecting(ls_range, range))
179510db5b7aSGustavo A. R. Silva 		return false;
1796fb3296ebSBenny Halevy 
1797fb3296ebSBenny Halevy 	/* range1 covers only the first byte in the range */
1798fb3296ebSBenny Halevy 	range1 = *range;
1799fb3296ebSBenny Halevy 	range1.length = 1;
18007dc0ac70STrond Myklebust 	return pnfs_lseg_range_contained(ls_range, &range1);
1801b1f69b75SAndy Adamson }
1802b1f69b75SAndy Adamson 
1803b1f69b75SAndy Adamson /*
1804b1f69b75SAndy Adamson  * lookup range in layout
1805b1f69b75SAndy Adamson  */
1806e5e94017SBenny Halevy static struct pnfs_layout_segment *
pnfs_find_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_range * range,bool strict_iomode)1807fb3296ebSBenny Halevy pnfs_find_lseg(struct pnfs_layout_hdr *lo,
1808c7d73af2STom Haynes 		struct pnfs_layout_range *range,
1809c7d73af2STom Haynes 		bool strict_iomode)
1810e5e94017SBenny Halevy {
1811b1f69b75SAndy Adamson 	struct pnfs_layout_segment *lseg, *ret = NULL;
1812b1f69b75SAndy Adamson 
1813b1f69b75SAndy Adamson 	dprintk("%s:Begin\n", __func__);
1814b1f69b75SAndy Adamson 
1815b7edfaa1SFred Isaman 	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
18164541d16cSFred Isaman 		if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
1817c7d73af2STom Haynes 		    pnfs_lseg_range_match(&lseg->pls_range, range,
1818c7d73af2STom Haynes 					  strict_iomode)) {
18199369a431STrond Myklebust 			ret = pnfs_get_lseg(lseg);
1820b1f69b75SAndy Adamson 			break;
1821b1f69b75SAndy Adamson 		}
1822b1f69b75SAndy Adamson 	}
1823b1f69b75SAndy Adamson 
1824b1f69b75SAndy Adamson 	dprintk("%s:Return lseg %p ref %d\n",
1825eba6dd69SElena Reshetova 		__func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
1826b1f69b75SAndy Adamson 	return ret;
1827e5e94017SBenny Halevy }
1828e5e94017SBenny Halevy 
1829e5e94017SBenny Halevy /*
1830d23d61c8SAndy Adamson  * Use mdsthreshold hints set at each OPEN to determine if I/O should go
1831d23d61c8SAndy Adamson  * to the MDS or over pNFS
1832d23d61c8SAndy Adamson  *
1833d23d61c8SAndy Adamson  * The nfs_inode read_io and write_io fields are cumulative counters reset
1834d23d61c8SAndy Adamson  * when there are no layout segments. Note that in pnfs_update_layout iomode
1835d23d61c8SAndy Adamson  * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
1836d23d61c8SAndy Adamson  * WRITE request.
1837d23d61c8SAndy Adamson  *
1838d23d61c8SAndy Adamson  * A return of true means use MDS I/O.
1839d23d61c8SAndy Adamson  *
1840d23d61c8SAndy Adamson  * From rfc 5661:
1841d23d61c8SAndy Adamson  * If a file's size is smaller than the file size threshold, data accesses
1842d23d61c8SAndy Adamson  * SHOULD be sent to the metadata server.  If an I/O request has a length that
1843d23d61c8SAndy Adamson  * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
1844d23d61c8SAndy Adamson  * server.  If both file size and I/O size are provided, the client SHOULD
1845d23d61c8SAndy Adamson  * reach or exceed  both thresholds before sending its read or write
1846d23d61c8SAndy Adamson  * requests to the data server.
1847d23d61c8SAndy Adamson  */
pnfs_within_mdsthreshold(struct nfs_open_context * ctx,struct inode * ino,int iomode)1848d23d61c8SAndy Adamson static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
1849d23d61c8SAndy Adamson 				     struct inode *ino, int iomode)
1850d23d61c8SAndy Adamson {
1851d23d61c8SAndy Adamson 	struct nfs4_threshold *t = ctx->mdsthreshold;
1852d23d61c8SAndy Adamson 	struct nfs_inode *nfsi = NFS_I(ino);
1853d23d61c8SAndy Adamson 	loff_t fsize = i_size_read(ino);
1854d23d61c8SAndy Adamson 	bool size = false, size_set = false, io = false, io_set = false, ret = false;
1855d23d61c8SAndy Adamson 
1856d23d61c8SAndy Adamson 	if (t == NULL)
1857d23d61c8SAndy Adamson 		return ret;
1858d23d61c8SAndy Adamson 
1859d23d61c8SAndy Adamson 	dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
1860d23d61c8SAndy Adamson 		__func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
1861d23d61c8SAndy Adamson 
1862d23d61c8SAndy Adamson 	switch (iomode) {
1863d23d61c8SAndy Adamson 	case IOMODE_READ:
1864d23d61c8SAndy Adamson 		if (t->bm & THRESHOLD_RD) {
1865d23d61c8SAndy Adamson 			dprintk("%s fsize %llu\n", __func__, fsize);
1866d23d61c8SAndy Adamson 			size_set = true;
1867d23d61c8SAndy Adamson 			if (fsize < t->rd_sz)
1868d23d61c8SAndy Adamson 				size = true;
1869d23d61c8SAndy Adamson 		}
1870d23d61c8SAndy Adamson 		if (t->bm & THRESHOLD_RD_IO) {
1871d23d61c8SAndy Adamson 			dprintk("%s nfsi->read_io %llu\n", __func__,
1872d23d61c8SAndy Adamson 				nfsi->read_io);
1873d23d61c8SAndy Adamson 			io_set = true;
1874d23d61c8SAndy Adamson 			if (nfsi->read_io < t->rd_io_sz)
1875d23d61c8SAndy Adamson 				io = true;
1876d23d61c8SAndy Adamson 		}
1877d23d61c8SAndy Adamson 		break;
1878d23d61c8SAndy Adamson 	case IOMODE_RW:
1879d23d61c8SAndy Adamson 		if (t->bm & THRESHOLD_WR) {
1880d23d61c8SAndy Adamson 			dprintk("%s fsize %llu\n", __func__, fsize);
1881d23d61c8SAndy Adamson 			size_set = true;
1882d23d61c8SAndy Adamson 			if (fsize < t->wr_sz)
1883d23d61c8SAndy Adamson 				size = true;
1884d23d61c8SAndy Adamson 		}
1885d23d61c8SAndy Adamson 		if (t->bm & THRESHOLD_WR_IO) {
1886d23d61c8SAndy Adamson 			dprintk("%s nfsi->write_io %llu\n", __func__,
1887d23d61c8SAndy Adamson 				nfsi->write_io);
1888d23d61c8SAndy Adamson 			io_set = true;
1889d23d61c8SAndy Adamson 			if (nfsi->write_io < t->wr_io_sz)
1890d23d61c8SAndy Adamson 				io = true;
1891d23d61c8SAndy Adamson 		}
1892d23d61c8SAndy Adamson 		break;
1893d23d61c8SAndy Adamson 	}
1894d23d61c8SAndy Adamson 	if (size_set && io_set) {
1895d23d61c8SAndy Adamson 		if (size && io)
1896d23d61c8SAndy Adamson 			ret = true;
1897d23d61c8SAndy Adamson 	} else if (size || io)
1898d23d61c8SAndy Adamson 		ret = true;
1899d23d61c8SAndy Adamson 
1900d23d61c8SAndy Adamson 	dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1901d23d61c8SAndy Adamson 	return ret;
1902d23d61c8SAndy Adamson }
1903d23d61c8SAndy Adamson 
pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr * lo)1904d03360aaSTrond Myklebust static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
1905aa8a45eeSPeng Tao {
1906aa8a45eeSPeng Tao 	/*
1907aa8a45eeSPeng Tao 	 * send layoutcommit as it can hold up layoutreturn due to lseg
1908aa8a45eeSPeng Tao 	 * reference
1909aa8a45eeSPeng Tao 	 */
1910aa8a45eeSPeng Tao 	pnfs_layoutcommit_inode(lo->plh_inode, false);
1911d03360aaSTrond Myklebust 	return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
19122e5b29f0STrond Myklebust 				   nfs_wait_bit_killable,
1913f5d39b02SPeter Zijlstra 				   TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
1914aa8a45eeSPeng Tao }
1915aa8a45eeSPeng Tao 
nfs_layoutget_begin(struct pnfs_layout_hdr * lo)1916411ae722STrond Myklebust static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
1917411ae722STrond Myklebust {
1918411ae722STrond Myklebust 	atomic_inc(&lo->plh_outstanding);
1919411ae722STrond Myklebust }
1920411ae722STrond Myklebust 
nfs_layoutget_end(struct pnfs_layout_hdr * lo)1921411ae722STrond Myklebust static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
1922411ae722STrond Myklebust {
1923880265c7STrond Myklebust 	if (atomic_dec_and_test(&lo->plh_outstanding) &&
1924880265c7STrond Myklebust 	    test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
1925880265c7STrond Myklebust 		wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
1926411ae722STrond Myklebust }
1927411ae722STrond Myklebust 
pnfs_is_first_layoutget(struct pnfs_layout_hdr * lo)1928d29b468dSTrond Myklebust static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
1929d29b468dSTrond Myklebust {
1930d29b468dSTrond Myklebust 	return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags);
1931d29b468dSTrond Myklebust }
1932d29b468dSTrond Myklebust 
pnfs_clear_first_layoutget(struct pnfs_layout_hdr * lo)1933d67ae825STom Haynes static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
1934d67ae825STom Haynes {
1935d67ae825STom Haynes 	unsigned long *bitlock = &lo->plh_flags;
1936d67ae825STom Haynes 
1937d67ae825STom Haynes 	clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
1938d67ae825STom Haynes 	smp_mb__after_atomic();
1939d67ae825STom Haynes 	wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
1940d67ae825STom Haynes }
1941d67ae825STom Haynes 
_add_to_server_list(struct pnfs_layout_hdr * lo,struct nfs_server * server)194278746a38SFred Isaman static void _add_to_server_list(struct pnfs_layout_hdr *lo,
194378746a38SFred Isaman 				struct nfs_server *server)
194478746a38SFred Isaman {
1945cf6605d1STrond Myklebust 	if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
194678746a38SFred Isaman 		struct nfs_client *clp = server->nfs_client;
194778746a38SFred Isaman 
194878746a38SFred Isaman 		/* The lo must be on the clp list if there is any
194978746a38SFred Isaman 		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
195078746a38SFred Isaman 		 */
195178746a38SFred Isaman 		spin_lock(&clp->cl_lock);
1952cf6605d1STrond Myklebust 		list_add_tail_rcu(&lo->plh_layouts, &server->layouts);
195378746a38SFred Isaman 		spin_unlock(&clp->cl_lock);
195478746a38SFred Isaman 	}
195578746a38SFred Isaman }
195678746a38SFred Isaman 
1957d23d61c8SAndy Adamson /*
1958e5e94017SBenny Halevy  * Layout segment is retreived from the server if not cached.
1959e5e94017SBenny Halevy  * The appropriate layout segment is referenced and returned to the caller.
1960e5e94017SBenny Halevy  */
19617c24d948SAndy Adamson struct pnfs_layout_segment *
pnfs_update_layout(struct inode * ino,struct nfs_open_context * ctx,loff_t pos,u64 count,enum pnfs_iomode iomode,bool strict_iomode,gfp_t gfp_flags)1962e5e94017SBenny Halevy pnfs_update_layout(struct inode *ino,
1963e5e94017SBenny Halevy 		   struct nfs_open_context *ctx,
1964fb3296ebSBenny Halevy 		   loff_t pos,
1965fb3296ebSBenny Halevy 		   u64 count,
1966a75b9df9STrond Myklebust 		   enum pnfs_iomode iomode,
1967c7d73af2STom Haynes 		   bool strict_iomode,
1968a75b9df9STrond Myklebust 		   gfp_t gfp_flags)
1969e5e94017SBenny Halevy {
1970fb3296ebSBenny Halevy 	struct pnfs_layout_range arg = {
1971fb3296ebSBenny Halevy 		.iomode = iomode,
1972fb3296ebSBenny Halevy 		.offset = pos,
1973fb3296ebSBenny Halevy 		.length = count,
1974fb3296ebSBenny Halevy 	};
197570d2f7b1STrond Myklebust 	unsigned pg_offset;
19766382a441SWeston Andros Adamson 	struct nfs_server *server = NFS_SERVER(ino);
19776382a441SWeston Andros Adamson 	struct nfs_client *clp = server->nfs_client;
1978183d9e7bSJeff Layton 	struct pnfs_layout_hdr *lo = NULL;
1979e5e94017SBenny Halevy 	struct pnfs_layout_segment *lseg = NULL;
1980587f03deSFred Isaman 	struct nfs4_layoutget *lgp;
1981183d9e7bSJeff Layton 	nfs4_stateid stateid;
1982183d9e7bSJeff Layton 	long timeout = 0;
198366b53f32STrond Myklebust 	unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
198430005121SWeston Andros Adamson 	bool first;
1985e5e94017SBenny Halevy 
19869a4bf31dSJeff Layton 	if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
1987183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
19889a4bf31dSJeff Layton 				 PNFS_UPDATE_LAYOUT_NO_PNFS);
1989f86bbcf8STrond Myklebust 		goto out;
19909a4bf31dSJeff Layton 	}
1991d23d61c8SAndy Adamson 
19929a4bf31dSJeff Layton 	if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
1993183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
19949a4bf31dSJeff Layton 				 PNFS_UPDATE_LAYOUT_MDSTHRESH);
1995f86bbcf8STrond Myklebust 		goto out;
19969a4bf31dSJeff Layton 	}
1997d23d61c8SAndy Adamson 
19989bf87482SPeng Tao lookup_again:
1999fbc63fb1SNeilBrown 	if (!nfs4_valid_open_stateid(ctx->state)) {
2000fbc63fb1SNeilBrown 		trace_pnfs_update_layout(ino, pos, count,
2001fbc63fb1SNeilBrown 					 iomode, lo, lseg,
2002fbc63fb1SNeilBrown 					 PNFS_UPDATE_LAYOUT_INVALID_OPEN);
2003fbc63fb1SNeilBrown 		lseg = ERR_PTR(-EIO);
2004fbc63fb1SNeilBrown 		goto out;
2005fbc63fb1SNeilBrown 	}
2006fbc63fb1SNeilBrown 
2007d03360aaSTrond Myklebust 	lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
2008d03360aaSTrond Myklebust 	if (IS_ERR(lseg))
2009d03360aaSTrond Myklebust 		goto out;
20109bf87482SPeng Tao 	first = false;
2011e5e94017SBenny Halevy 	spin_lock(&ino->i_lock);
20129fa40758SPeng Tao 	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
2013830ffb56STrond Myklebust 	if (lo == NULL) {
2014830ffb56STrond Myklebust 		spin_unlock(&ino->i_lock);
20153764a17eSTrond Myklebust 		lseg = ERR_PTR(-ENOMEM);
2016183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20179a4bf31dSJeff Layton 				 PNFS_UPDATE_LAYOUT_NOMEM);
2018830ffb56STrond Myklebust 		goto out;
2019830ffb56STrond Myklebust 	}
2020e5e94017SBenny Halevy 
202143f1b3daSFred Isaman 	/* Do we even need to bother with this? */
2022a59c30acSTrond Myklebust 	if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
2023183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20249a4bf31dSJeff Layton 				 PNFS_UPDATE_LAYOUT_BULK_RECALL);
202543f1b3daSFred Isaman 		dprintk("%s matches recall, use MDS\n", __func__);
2026e5e94017SBenny Halevy 		goto out_unlock;
2027e5e94017SBenny Halevy 	}
2028e5e94017SBenny Halevy 
2029e5e94017SBenny Halevy 	/* if LAYOUTGET already failed once we don't try again */
20302e5b29f0STrond Myklebust 	if (pnfs_layout_io_test_failed(lo, iomode)) {
2031183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20329a4bf31dSJeff Layton 				 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
2033e5e94017SBenny Halevy 		goto out_unlock;
20349a4bf31dSJeff Layton 	}
2035e5e94017SBenny Halevy 
2036411ae722STrond Myklebust 	/*
2037411ae722STrond Myklebust 	 * If the layout segment list is empty, but there are outstanding
2038411ae722STrond Myklebust 	 * layoutget calls, then they might be subject to a layoutrecall.
2039411ae722STrond Myklebust 	 */
2040880265c7STrond Myklebust 	if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
2041411ae722STrond Myklebust 	    atomic_read(&lo->plh_outstanding) != 0) {
2042411ae722STrond Myklebust 		spin_unlock(&ino->i_lock);
2043880265c7STrond Myklebust 		lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
2044880265c7STrond Myklebust 					   TASK_KILLABLE));
204558bbeab4STrond Myklebust 		if (IS_ERR(lseg))
2046411ae722STrond Myklebust 			goto out_put_layout_hdr;
2047411ae722STrond Myklebust 		pnfs_put_layout_hdr(lo);
2048411ae722STrond Myklebust 		goto lookup_again;
2049411ae722STrond Myklebust 	}
2050411ae722STrond Myklebust 
20512c8d5fc3STrond Myklebust 	/*
20522c8d5fc3STrond Myklebust 	 * Because we free lsegs when sending LAYOUTRETURN, we need to wait
20532c8d5fc3STrond Myklebust 	 * for LAYOUTRETURN.
20542c8d5fc3STrond Myklebust 	 */
20552c8d5fc3STrond Myklebust 	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
20562c8d5fc3STrond Myklebust 		spin_unlock(&ino->i_lock);
20572c8d5fc3STrond Myklebust 		dprintk("%s wait for layoutreturn\n", __func__);
20582c8d5fc3STrond Myklebust 		lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo));
20592c8d5fc3STrond Myklebust 		if (!IS_ERR(lseg)) {
20602c8d5fc3STrond Myklebust 			pnfs_put_layout_hdr(lo);
20612c8d5fc3STrond Myklebust 			dprintk("%s retrying\n", __func__);
20622c8d5fc3STrond Myklebust 			trace_pnfs_update_layout(ino, pos, count, iomode, lo,
20632c8d5fc3STrond Myklebust 						 lseg,
20642c8d5fc3STrond Myklebust 						 PNFS_UPDATE_LAYOUT_RETRY);
20652c8d5fc3STrond Myklebust 			goto lookup_again;
20662c8d5fc3STrond Myklebust 		}
20672c8d5fc3STrond Myklebust 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20682c8d5fc3STrond Myklebust 					 PNFS_UPDATE_LAYOUT_RETURN);
20692c8d5fc3STrond Myklebust 		goto out_put_layout_hdr;
20702c8d5fc3STrond Myklebust 	}
20712c8d5fc3STrond Myklebust 
2072c7d73af2STom Haynes 	lseg = pnfs_find_lseg(lo, &arg, strict_iomode);
2073183d9e7bSJeff Layton 	if (lseg) {
2074183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
2075183d9e7bSJeff Layton 				PNFS_UPDATE_LAYOUT_FOUND_CACHED);
2076183d9e7bSJeff Layton 		goto out_unlock;
2077183d9e7bSJeff Layton 	}
2078183d9e7bSJeff Layton 
2079183d9e7bSJeff Layton 	/*
2080183d9e7bSJeff Layton 	 * Choose a stateid for the LAYOUTGET. If we don't have a layout
2081183d9e7bSJeff Layton 	 * stateid, or it has been invalidated, then we must use the open
2082183d9e7bSJeff Layton 	 * stateid.
2083183d9e7bSJeff Layton 	 */
208467a3b721STrond Myklebust 	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
2085d9aba2b4STrond Myklebust 		int status;
2086183d9e7bSJeff Layton 
2087183d9e7bSJeff Layton 		/*
2088183d9e7bSJeff Layton 		 * The first layoutget for the file. Need to serialize per
20899bf87482SPeng Tao 		 * RFC 5661 Errata 3208.
20909bf87482SPeng Tao 		 */
20919bf87482SPeng Tao 		if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET,
20929bf87482SPeng Tao 				     &lo->plh_flags)) {
20939bf87482SPeng Tao 			spin_unlock(&ino->i_lock);
2094d03360aaSTrond Myklebust 			lseg = ERR_PTR(wait_on_bit(&lo->plh_flags,
2095d03360aaSTrond Myklebust 						NFS_LAYOUT_FIRST_LAYOUTGET,
2096d03360aaSTrond Myklebust 						TASK_KILLABLE));
2097d03360aaSTrond Myklebust 			if (IS_ERR(lseg))
2098d03360aaSTrond Myklebust 				goto out_put_layout_hdr;
20999bf87482SPeng Tao 			pnfs_put_layout_hdr(lo);
2100183d9e7bSJeff Layton 			dprintk("%s retrying\n", __func__);
21019bf87482SPeng Tao 			goto lookup_again;
21029bf87482SPeng Tao 		}
2103183d9e7bSJeff Layton 
2104fbf4bcc9STrond Myklebust 		spin_unlock(&ino->i_lock);
2105183d9e7bSJeff Layton 		first = true;
2106d9aba2b4STrond Myklebust 		status = nfs4_select_rw_stateid(ctx->state,
210770d2f7b1STrond Myklebust 					iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
2108d9aba2b4STrond Myklebust 					NULL, &stateid, NULL);
2109d9aba2b4STrond Myklebust 		if (status != 0) {
2110731c74ddSTrond Myklebust 			lseg = ERR_PTR(status);
211170d2f7b1STrond Myklebust 			trace_pnfs_update_layout(ino, pos, count,
211270d2f7b1STrond Myklebust 					iomode, lo, lseg,
211370d2f7b1STrond Myklebust 					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
2114d9aba2b4STrond Myklebust 			nfs4_schedule_stateid_recovery(server, ctx->state);
2115d9aba2b4STrond Myklebust 			pnfs_clear_first_layoutget(lo);
2116d9aba2b4STrond Myklebust 			pnfs_put_layout_hdr(lo);
2117d9aba2b4STrond Myklebust 			goto lookup_again;
211870d2f7b1STrond Myklebust 		}
2119fbf4bcc9STrond Myklebust 		spin_lock(&ino->i_lock);
21209bf87482SPeng Tao 	} else {
2121183d9e7bSJeff Layton 		nfs4_stateid_copy(&stateid, &lo->plh_stateid);
21229a4bf31dSJeff Layton 	}
2123568e8c49SAndy Adamson 
21249a4bf31dSJeff Layton 	if (pnfs_layoutgets_blocked(lo)) {
2125183d9e7bSJeff Layton 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
21269a4bf31dSJeff Layton 				PNFS_UPDATE_LAYOUT_BLOCKED);
2127cf7d63f1SFred Isaman 		goto out_unlock;
21289a4bf31dSJeff Layton 	}
2129411ae722STrond Myklebust 	nfs_layoutget_begin(lo);
2130f49f9baaSFred Isaman 	spin_unlock(&ino->i_lock);
213130005121SWeston Andros Adamson 
213278746a38SFred Isaman 	_add_to_server_list(lo, server);
2133e5e94017SBenny Halevy 
213409cbfeafSKirill A. Shutemov 	pg_offset = arg.offset & ~PAGE_MASK;
2135707ed5fdSBenny Halevy 	if (pg_offset) {
2136707ed5fdSBenny Halevy 		arg.offset -= pg_offset;
2137707ed5fdSBenny Halevy 		arg.length += pg_offset;
2138707ed5fdSBenny Halevy 	}
21397c24d948SAndy Adamson 	if (arg.length != NFS4_MAX_UINT64)
214009cbfeafSKirill A. Shutemov 		arg.length = PAGE_ALIGN(arg.length);
2141707ed5fdSBenny Halevy 
21425e36e2a9SFred Isaman 	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
2143587f03deSFred Isaman 	if (!lgp) {
21443764a17eSTrond Myklebust 		lseg = ERR_PTR(-ENOMEM);
2145587f03deSFred Isaman 		trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
2146587f03deSFred Isaman 					 PNFS_UPDATE_LAYOUT_NOMEM);
2147411ae722STrond Myklebust 		nfs_layoutget_end(lo);
2148587f03deSFred Isaman 		goto out_put_layout_hdr;
2149587f03deSFred Isaman 	}
2150587f03deSFred Isaman 
2151b4e89bcbSTrond Myklebust 	lgp->lo = lo;
2152b4e89bcbSTrond Myklebust 	pnfs_get_layout_hdr(lo);
2153b4e89bcbSTrond Myklebust 
2154dacb452dSFred Isaman 	lseg = nfs4_proc_layoutget(lgp, &timeout);
2155183d9e7bSJeff Layton 	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
2156183d9e7bSJeff Layton 				 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
2157411ae722STrond Myklebust 	nfs_layoutget_end(lo);
215883026d80SJeff Layton 	if (IS_ERR(lseg)) {
2159183d9e7bSJeff Layton 		switch(PTR_ERR(lseg)) {
2160e85d7ee4STrond Myklebust 		case -EBUSY:
2161183d9e7bSJeff Layton 			if (time_after(jiffies, giveup))
2162183d9e7bSJeff Layton 				lseg = NULL;
216366b53f32STrond Myklebust 			break;
216466b53f32STrond Myklebust 		case -ERECALLCONFLICT:
2165183d9e7bSJeff Layton 		case -EAGAIN:
216656b38a1fSTrond Myklebust 			break;
2167fe44fb23STrond Myklebust 		case -ENODATA:
2168fe44fb23STrond Myklebust 			/* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
2169fe44fb23STrond Myklebust 			pnfs_layout_set_fail_bit(
2170fe44fb23STrond Myklebust 				lo, pnfs_iomode_to_fail_bit(iomode));
2171fe44fb23STrond Myklebust 			lseg = NULL;
2172fe44fb23STrond Myklebust 			goto out_put_layout_hdr;
2173183d9e7bSJeff Layton 		default:
217483026d80SJeff Layton 			if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
217583026d80SJeff Layton 				pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
217683026d80SJeff Layton 				lseg = NULL;
217783026d80SJeff Layton 			}
217856b38a1fSTrond Myklebust 			goto out_put_layout_hdr;
217956b38a1fSTrond Myklebust 		}
218056b38a1fSTrond Myklebust 		if (lseg) {
218156b38a1fSTrond Myklebust 			if (first)
218256b38a1fSTrond Myklebust 				pnfs_clear_first_layoutget(lo);
218356b38a1fSTrond Myklebust 			trace_pnfs_update_layout(ino, pos, count,
218456b38a1fSTrond Myklebust 				iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
218556b38a1fSTrond Myklebust 			pnfs_put_layout_hdr(lo);
218656b38a1fSTrond Myklebust 			goto lookup_again;
2187183d9e7bSJeff Layton 		}
218883026d80SJeff Layton 	} else {
218983026d80SJeff Layton 		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
219083026d80SJeff Layton 	}
219183026d80SJeff Layton 
2192830ffb56STrond Myklebust out_put_layout_hdr:
2193d67ae825STom Haynes 	if (first)
2194d67ae825STom Haynes 		pnfs_clear_first_layoutget(lo);
2195d5b9216fSTrond Myklebust 	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
2196d5b9216fSTrond Myklebust 				 PNFS_UPDATE_LAYOUT_EXIT);
219770c3bd2bSTrond Myklebust 	pnfs_put_layout_hdr(lo);
2198e5e94017SBenny Halevy out:
2199f86bbcf8STrond Myklebust 	dprintk("%s: inode %s/%llu pNFS layout segment %s for "
2200f86bbcf8STrond Myklebust 			"(%s, offset: %llu, length: %llu)\n",
2201f86bbcf8STrond Myklebust 			__func__, ino->i_sb->s_id,
2202f86bbcf8STrond Myklebust 			(unsigned long long)NFS_FILEID(ino),
2203d600ad1fSPeng Tao 			IS_ERR_OR_NULL(lseg) ? "not found" : "found",
2204f86bbcf8STrond Myklebust 			iomode==IOMODE_RW ?  "read/write" : "read-only",
2205f86bbcf8STrond Myklebust 			(unsigned long long)pos,
2206f86bbcf8STrond Myklebust 			(unsigned long long)count);
2207e5e94017SBenny Halevy 	return lseg;
2208e5e94017SBenny Halevy out_unlock:
2209e5e94017SBenny Halevy 	spin_unlock(&ino->i_lock);
2210830ffb56STrond Myklebust 	goto out_put_layout_hdr;
2211e5e94017SBenny Halevy }
22127c24d948SAndy Adamson EXPORT_SYMBOL_GPL(pnfs_update_layout);
2213b1f69b75SAndy Adamson 
2214540d9864STrond Myklebust static bool
pnfs_sanity_check_layout_range(struct pnfs_layout_range * range)2215540d9864STrond Myklebust pnfs_sanity_check_layout_range(struct pnfs_layout_range *range)
2216540d9864STrond Myklebust {
2217540d9864STrond Myklebust 	switch (range->iomode) {
2218540d9864STrond Myklebust 	case IOMODE_READ:
2219540d9864STrond Myklebust 	case IOMODE_RW:
2220540d9864STrond Myklebust 		break;
2221540d9864STrond Myklebust 	default:
2222540d9864STrond Myklebust 		return false;
2223540d9864STrond Myklebust 	}
2224540d9864STrond Myklebust 	if (range->offset == NFS4_MAX_UINT64)
2225540d9864STrond Myklebust 		return false;
2226540d9864STrond Myklebust 	if (range->length == 0)
2227540d9864STrond Myklebust 		return false;
2228540d9864STrond Myklebust 	if (range->length != NFS4_MAX_UINT64 &&
2229540d9864STrond Myklebust 	    range->length > NFS4_MAX_UINT64 - range->offset)
2230540d9864STrond Myklebust 		return false;
2231540d9864STrond Myklebust 	return true;
2232540d9864STrond Myklebust }
2233540d9864STrond Myklebust 
223478746a38SFred Isaman static struct pnfs_layout_hdr *
_pnfs_grab_empty_layout(struct inode * ino,struct nfs_open_context * ctx)223578746a38SFred Isaman _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
223678746a38SFred Isaman {
223778746a38SFred Isaman 	struct pnfs_layout_hdr *lo;
223878746a38SFred Isaman 
223978746a38SFred Isaman 	spin_lock(&ino->i_lock);
224063d8a41bSTrond Myklebust 	lo = pnfs_find_alloc_layout(ino, ctx, nfs_io_gfp_mask());
224178746a38SFred Isaman 	if (!lo)
224278746a38SFred Isaman 		goto out_unlock;
224378746a38SFred Isaman 	if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
224478746a38SFred Isaman 		goto out_unlock;
224578746a38SFred Isaman 	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
224678746a38SFred Isaman 		goto out_unlock;
224778746a38SFred Isaman 	if (pnfs_layoutgets_blocked(lo))
224878746a38SFred Isaman 		goto out_unlock;
224978746a38SFred Isaman 	if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
225078746a38SFred Isaman 		goto out_unlock;
2251411ae722STrond Myklebust 	nfs_layoutget_begin(lo);
225278746a38SFred Isaman 	spin_unlock(&ino->i_lock);
225378746a38SFred Isaman 	_add_to_server_list(lo, NFS_SERVER(ino));
225478746a38SFred Isaman 	return lo;
225578746a38SFred Isaman 
225678746a38SFred Isaman out_unlock:
225778746a38SFred Isaman 	spin_unlock(&ino->i_lock);
225878746a38SFred Isaman 	pnfs_put_layout_hdr(lo);
225978746a38SFred Isaman 	return NULL;
226078746a38SFred Isaman }
226178746a38SFred Isaman 
_lgopen_prepare_attached(struct nfs4_opendata * data,struct nfs_open_context * ctx)22622409a976SFred Isaman static void _lgopen_prepare_attached(struct nfs4_opendata *data,
22632409a976SFred Isaman 				     struct nfs_open_context *ctx)
22642409a976SFred Isaman {
226578746a38SFred Isaman 	struct inode *ino = data->dentry->d_inode;
226678746a38SFred Isaman 	struct pnfs_layout_range rng = {
226778746a38SFred Isaman 		.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
226878746a38SFred Isaman 			  IOMODE_RW: IOMODE_READ,
226978746a38SFred Isaman 		.offset = 0,
227078746a38SFred Isaman 		.length = NFS4_MAX_UINT64,
227178746a38SFred Isaman 	};
227278746a38SFred Isaman 	struct nfs4_layoutget *lgp;
227378746a38SFred Isaman 	struct pnfs_layout_hdr *lo;
227478746a38SFred Isaman 
227564294b08STrond Myklebust 	/* Heuristic: don't send layoutget if we have cached data */
227664294b08STrond Myklebust 	if (rng.iomode == IOMODE_READ &&
227764294b08STrond Myklebust 	   (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0))
227864294b08STrond Myklebust 		return;
227964294b08STrond Myklebust 
228078746a38SFred Isaman 	lo = _pnfs_grab_empty_layout(ino, ctx);
228178746a38SFred Isaman 	if (!lo)
228278746a38SFred Isaman 		return;
228363d8a41bSTrond Myklebust 	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng,
228463d8a41bSTrond Myklebust 					     nfs_io_gfp_mask());
228578746a38SFred Isaman 	if (!lgp) {
228678746a38SFred Isaman 		pnfs_clear_first_layoutget(lo);
2287cb2856c5STrond Myklebust 		nfs_layoutget_end(lo);
228878746a38SFred Isaman 		pnfs_put_layout_hdr(lo);
228978746a38SFred Isaman 		return;
229078746a38SFred Isaman 	}
2291b4e89bcbSTrond Myklebust 	lgp->lo = lo;
229278746a38SFred Isaman 	data->lgp = lgp;
229378746a38SFred Isaman 	data->o_arg.lg_args = &lgp->args;
229478746a38SFred Isaman 	data->o_res.lg_res = &lgp->res;
22952409a976SFred Isaman }
22962409a976SFred Isaman 
_lgopen_prepare_floating(struct nfs4_opendata * data,struct nfs_open_context * ctx)22972409a976SFred Isaman static void _lgopen_prepare_floating(struct nfs4_opendata *data,
22982409a976SFred Isaman 				     struct nfs_open_context *ctx)
22992409a976SFred Isaman {
2300b4e89bcbSTrond Myklebust 	struct inode *ino = data->dentry->d_inode;
23012409a976SFred Isaman 	struct pnfs_layout_range rng = {
23022409a976SFred Isaman 		.iomode = (data->o_arg.fmode & FMODE_WRITE) ?
23032409a976SFred Isaman 			  IOMODE_RW: IOMODE_READ,
23042409a976SFred Isaman 		.offset = 0,
23052409a976SFred Isaman 		.length = NFS4_MAX_UINT64,
23062409a976SFred Isaman 	};
23072409a976SFred Isaman 	struct nfs4_layoutget *lgp;
23082409a976SFred Isaman 
230963d8a41bSTrond Myklebust 	lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng,
231063d8a41bSTrond Myklebust 					     nfs_io_gfp_mask());
23112409a976SFred Isaman 	if (!lgp)
23122409a976SFred Isaman 		return;
23132409a976SFred Isaman 	data->lgp = lgp;
23142409a976SFred Isaman 	data->o_arg.lg_args = &lgp->args;
23152409a976SFred Isaman 	data->o_res.lg_res = &lgp->res;
23162409a976SFred Isaman }
23172409a976SFred Isaman 
pnfs_lgopen_prepare(struct nfs4_opendata * data,struct nfs_open_context * ctx)23182409a976SFred Isaman void pnfs_lgopen_prepare(struct nfs4_opendata *data,
23192409a976SFred Isaman 			 struct nfs_open_context *ctx)
23202409a976SFred Isaman {
23212409a976SFred Isaman 	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
23222409a976SFred Isaman 
23232409a976SFred Isaman 	if (!(pnfs_enabled_sb(server) &&
23242409a976SFred Isaman 	      server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN))
23252409a976SFred Isaman 		return;
23262409a976SFred Isaman 	/* Could check on max_ops, but currently hardcoded high enough */
23276e01260cSFred Isaman 	if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
23286e01260cSFred Isaman 		return;
2329b4e89bcbSTrond Myklebust 	if (data->lgp)
2330b4e89bcbSTrond Myklebust 		return;
23312409a976SFred Isaman 	if (data->state)
23322409a976SFred Isaman 		_lgopen_prepare_attached(data, ctx);
23332409a976SFred Isaman 	else
23342409a976SFred Isaman 		_lgopen_prepare_floating(data, ctx);
23352409a976SFred Isaman }
23362409a976SFred Isaman 
pnfs_parse_lgopen(struct inode * ino,struct nfs4_layoutget * lgp,struct nfs_open_context * ctx)23372409a976SFred Isaman void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
23382409a976SFred Isaman 		       struct nfs_open_context *ctx)
23392409a976SFred Isaman {
23402409a976SFred Isaman 	struct pnfs_layout_hdr *lo;
23412409a976SFred Isaman 	struct pnfs_layout_segment *lseg;
2342c49b5209SFred Isaman 	struct nfs_server *srv = NFS_SERVER(ino);
23432409a976SFred Isaman 	u32 iomode;
23442409a976SFred Isaman 
23456e01260cSFred Isaman 	if (!lgp)
23462409a976SFred Isaman 		return;
23476e01260cSFred Isaman 	dprintk("%s: entered with status %i\n", __func__, lgp->res.status);
23486e01260cSFred Isaman 	if (lgp->res.status) {
23496e01260cSFred Isaman 		switch (lgp->res.status) {
23506e01260cSFred Isaman 		default:
23518dc96566STrond Myklebust 			break;
23528dc96566STrond Myklebust 		/*
23538dc96566STrond Myklebust 		 * Halt lgopen attempts if the server doesn't recognise
23548dc96566STrond Myklebust 		 * the "current stateid" value, the layout type, or the
23558dc96566STrond Myklebust 		 * layoutget operation as being valid.
23568dc96566STrond Myklebust 		 * Also if it complains about too many ops in the compound
23578dc96566STrond Myklebust 		 * or of the request/reply being too big.
23586e01260cSFred Isaman 		 */
23598dc96566STrond Myklebust 		case -NFS4ERR_BAD_STATEID:
23608dc96566STrond Myklebust 		case -NFS4ERR_NOTSUPP:
23618dc96566STrond Myklebust 		case -NFS4ERR_REP_TOO_BIG:
23628dc96566STrond Myklebust 		case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
23638dc96566STrond Myklebust 		case -NFS4ERR_REQ_TOO_BIG:
23648dc96566STrond Myklebust 		case -NFS4ERR_TOO_MANY_OPS:
23658dc96566STrond Myklebust 		case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
2366c49b5209SFred Isaman 			srv->caps &= ~NFS_CAP_LGOPEN;
23676e01260cSFred Isaman 		}
23686e01260cSFred Isaman 		return;
23696e01260cSFred Isaman 	}
2370b4e89bcbSTrond Myklebust 	if (!lgp->lo) {
237178746a38SFred Isaman 		lo = _pnfs_grab_empty_layout(ino, ctx);
237278746a38SFred Isaman 		if (!lo)
237378746a38SFred Isaman 			return;
2374b4e89bcbSTrond Myklebust 		lgp->lo = lo;
23752409a976SFred Isaman 	} else
2376b4e89bcbSTrond Myklebust 		lo = lgp->lo;
23772409a976SFred Isaman 
23782409a976SFred Isaman 	lseg = pnfs_layout_process(lgp);
237932f1c28fSTrond Myklebust 	if (!IS_ERR(lseg)) {
23802409a976SFred Isaman 		iomode = lgp->args.range.iomode;
23812409a976SFred Isaman 		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
23822409a976SFred Isaman 		pnfs_put_lseg(lseg);
23832409a976SFred Isaman 	}
238430ae2412SFred Isaman }
238530ae2412SFred Isaman 
nfs4_lgopen_release(struct nfs4_layoutget * lgp)238630ae2412SFred Isaman void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
238730ae2412SFred Isaman {
238830ae2412SFred Isaman 	if (lgp != NULL) {
2389b4e89bcbSTrond Myklebust 		if (lgp->lo) {
2390b4e89bcbSTrond Myklebust 			pnfs_clear_first_layoutget(lgp->lo);
2391b4e89bcbSTrond Myklebust 			nfs_layoutget_end(lgp->lo);
239230ae2412SFred Isaman 		}
239330ae2412SFred Isaman 		pnfs_layoutget_free(lgp);
239430ae2412SFred Isaman 	}
23952409a976SFred Isaman }
23962409a976SFred Isaman 
2397a0b0a6e3STrond Myklebust struct pnfs_layout_segment *
pnfs_layout_process(struct nfs4_layoutget * lgp)2398b1f69b75SAndy Adamson pnfs_layout_process(struct nfs4_layoutget *lgp)
2399b1f69b75SAndy Adamson {
2400b4e89bcbSTrond Myklebust 	struct pnfs_layout_hdr *lo = lgp->lo;
2401b1f69b75SAndy Adamson 	struct nfs4_layoutget_res *res = &lgp->res;
2402b1f69b75SAndy Adamson 	struct pnfs_layout_segment *lseg;
2403b7edfaa1SFred Isaman 	struct inode *ino = lo->plh_inode;
240478096ccaSTrond Myklebust 	LIST_HEAD(free_me);
2405540d9864STrond Myklebust 
2406540d9864STrond Myklebust 	if (!pnfs_sanity_check_layout_range(&res->range))
24071b3c6d07SJeff Layton 		return ERR_PTR(-EINVAL);
2408b1f69b75SAndy Adamson 
2409b1f69b75SAndy Adamson 	/* Inject layout blob into I/O device driver */
2410a75b9df9STrond Myklebust 	lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
24111b3c6d07SJeff Layton 	if (IS_ERR_OR_NULL(lseg)) {
2412b1f69b75SAndy Adamson 		if (!lseg)
24131b3c6d07SJeff Layton 			lseg = ERR_PTR(-ENOMEM);
24141b3c6d07SJeff Layton 
24151b3c6d07SJeff Layton 		dprintk("%s: Could not allocate layout: error %ld\n",
24161b3c6d07SJeff Layton 		       __func__, PTR_ERR(lseg));
24171b3c6d07SJeff Layton 		return lseg;
2418b1f69b75SAndy Adamson 	}
2419b1f69b75SAndy Adamson 
2420119cef97STrond Myklebust 	pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);
24211013df61SChristoph Hellwig 
2422b1f69b75SAndy Adamson 	spin_lock(&ino->i_lock);
2423e1c06f80STrond Myklebust 	if (pnfs_layoutgets_blocked(lo)) {
242443f1b3daSFred Isaman 		dprintk("%s forget reply due to state\n", __func__);
24251b3c6d07SJeff Layton 		goto out_forget;
242643f1b3daSFred Isaman 	}
2427038d6493STrond Myklebust 
2428880265c7STrond Myklebust 	if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
2429880265c7STrond Myklebust 	    !pnfs_is_first_layoutget(lo))
24300b77f97aSTrond Myklebust 		goto out_forget;
24310b77f97aSTrond Myklebust 
2432d29b468dSTrond Myklebust 	if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
2433362f7474SChristoph Hellwig 		/* existing state ID, make sure the sequence number matches. */
2434362f7474SChristoph Hellwig 		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
24350b77f97aSTrond Myklebust 			if (!pnfs_layout_is_valid(lo))
2436d29b468dSTrond Myklebust 				lo->plh_barrier = 0;
2437362f7474SChristoph Hellwig 			dprintk("%s forget reply due to sequence\n", __func__);
24381b3c6d07SJeff Layton 			goto out_forget;
2439362f7474SChristoph Hellwig 		}
244059b56394STrond Myklebust 		pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false);
2441d29b468dSTrond Myklebust 	} else if (pnfs_layout_is_valid(lo)) {
2442362f7474SChristoph Hellwig 		/*
2443362f7474SChristoph Hellwig 		 * We got an entirely new state ID.  Mark all segments for the
24449888d837STrond Myklebust 		 * inode invalid, and retry the layoutget
2445362f7474SChristoph Hellwig 		 */
244608bd8dbeSTrond Myklebust 		struct pnfs_layout_range range = {
244708bd8dbeSTrond Myklebust 			.iomode = IOMODE_ANY,
244808bd8dbeSTrond Myklebust 			.length = NFS4_MAX_UINT64,
244908bd8dbeSTrond Myklebust 		};
2450fb700ef0STrond Myklebust 		pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
24519888d837STrond Myklebust 		goto out_forget;
2452d29b468dSTrond Myklebust 	} else {
2453d29b468dSTrond Myklebust 		/* We have a completely new layout */
2454d29b468dSTrond Myklebust 		pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
2455362f7474SChristoph Hellwig 	}
2456038d6493STrond Myklebust 
24579369a431STrond Myklebust 	pnfs_get_lseg(lseg);
245803772d2fSTrond Myklebust 	pnfs_layout_insert_lseg(lo, lseg, &free_me);
24598e0acf90STrond Myklebust 
2460b1f69b75SAndy Adamson 
24613976143bSPeng Tao 	if (res->return_on_close)
2462f7e8917aSFred Isaman 		set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
2463f7e8917aSFred Isaman 
2464b1f69b75SAndy Adamson 	spin_unlock(&ino->i_lock);
246578096ccaSTrond Myklebust 	pnfs_free_lseg_list(&free_me);
2466a0b0a6e3STrond Myklebust 	return lseg;
246743f1b3daSFred Isaman 
24681b3c6d07SJeff Layton out_forget:
246943f1b3daSFred Isaman 	spin_unlock(&ino->i_lock);
247043f1b3daSFred Isaman 	lseg->pls_layout = lo;
247143f1b3daSFred Isaman 	NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
24721b3c6d07SJeff Layton 	return ERR_PTR(-EAGAIN);
2473b1f69b75SAndy Adamson }
2474b1f69b75SAndy Adamson 
24752f215968STrond Myklebust /**
24762f215968STrond Myklebust  * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
24772f215968STrond Myklebust  * @lo: pointer to layout header
24782f215968STrond Myklebust  * @tmp_list: list header to be used with pnfs_free_lseg_list()
24792f215968STrond Myklebust  * @return_range: describe layout segment ranges to be returned
2480e0b7d420STrond Myklebust  * @seq: stateid seqid to match
24812f215968STrond Myklebust  *
24822f215968STrond Myklebust  * This function is mainly intended for use by layoutrecall. It attempts
24832f215968STrond Myklebust  * to free the layout segment immediately, or else to mark it for return
24842f215968STrond Myklebust  * as soon as its reference count drops to zero.
2485e0b7d420STrond Myklebust  *
2486e0b7d420STrond Myklebust  * Returns
2487e0b7d420STrond Myklebust  * - 0: a layoutreturn needs to be scheduled.
2488e0b7d420STrond Myklebust  * - EBUSY: there are layout segment that are still in use.
2489e0b7d420STrond Myklebust  * - ENOENT: there are no layout segments that need to be returned.
24902f215968STrond Myklebust  */
249110335556STrond Myklebust int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr * lo,struct list_head * tmp_list,const struct pnfs_layout_range * return_range,u32 seq)2492016256dfSPeng Tao pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
2493016256dfSPeng Tao 				struct list_head *tmp_list,
24946d597e17SJeff Layton 				const struct pnfs_layout_range *return_range,
24956d597e17SJeff Layton 				u32 seq)
2496016256dfSPeng Tao {
2497016256dfSPeng Tao 	struct pnfs_layout_segment *lseg, *next;
2498b739a5bdSTrond Myklebust 	struct nfs_server *server = NFS_SERVER(lo->plh_inode);
249910335556STrond Myklebust 	int remaining = 0;
2500016256dfSPeng Tao 
2501016256dfSPeng Tao 	dprintk("%s:Begin lo %p\n", __func__, lo);
2502016256dfSPeng Tao 
2503fc7ff367STrond Myklebust 	assert_spin_locked(&lo->plh_inode->i_lock);
2504016256dfSPeng Tao 
250539fd0186STrond Myklebust 	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
250639fd0186STrond Myklebust 		tmp_list = &lo->plh_return_segs;
250739fd0186STrond Myklebust 
2508016256dfSPeng Tao 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
2509e036f464STrond Myklebust 		if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
2510016256dfSPeng Tao 			dprintk("%s: marking lseg %p iomode %d "
2511016256dfSPeng Tao 				"offset %llu length %llu\n", __func__,
2512016256dfSPeng Tao 				lseg, lseg->pls_range.iomode,
2513016256dfSPeng Tao 				lseg->pls_range.offset,
2514016256dfSPeng Tao 				lseg->pls_range.length);
251539fd0186STrond Myklebust 			if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
251639fd0186STrond Myklebust 				tmp_list = &lo->plh_return_segs;
2517ff041727STrond Myklebust 			if (mark_lseg_invalid(lseg, tmp_list))
25182f215968STrond Myklebust 				continue;
25192f215968STrond Myklebust 			remaining++;
2520016256dfSPeng Tao 			set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
2521b739a5bdSTrond Myklebust 			pnfs_lseg_cancel_io(server, lseg);
2522016256dfSPeng Tao 		}
25236d597e17SJeff Layton 
2524e0b7d420STrond Myklebust 	if (remaining) {
25256d597e17SJeff Layton 		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
2526e0b7d420STrond Myklebust 		return -EBUSY;
2527e0b7d420STrond Myklebust 	}
25286d597e17SJeff Layton 
2529e0b7d420STrond Myklebust 	if (!list_empty(&lo->plh_return_segs)) {
2530e0b7d420STrond Myklebust 		pnfs_set_plh_return_info(lo, return_range->iomode, seq);
2531e0b7d420STrond Myklebust 		return 0;
2532e0b7d420STrond Myklebust 	}
2533e0b7d420STrond Myklebust 
2534e0b7d420STrond Myklebust 	return -ENOENT;
2535016256dfSPeng Tao }
2536016256dfSPeng Tao 
2537b5fdf841STrond Myklebust static void
pnfs_mark_layout_for_return(struct inode * inode,const struct pnfs_layout_range * range)2538b5fdf841STrond Myklebust pnfs_mark_layout_for_return(struct inode *inode,
2539b5fdf841STrond Myklebust 			    const struct pnfs_layout_range *range)
2540016256dfSPeng Tao {
2541b5fdf841STrond Myklebust 	struct pnfs_layout_hdr *lo;
254210335556STrond Myklebust 	bool return_now = false;
2543016256dfSPeng Tao 
2544016256dfSPeng Tao 	spin_lock(&inode->i_lock);
2545b5fdf841STrond Myklebust 	lo = NFS_I(inode)->layout;
2546bdebfccdSTrond Myklebust 	if (!pnfs_layout_is_valid(lo)) {
2547bdebfccdSTrond Myklebust 		spin_unlock(&inode->i_lock);
2548bdebfccdSTrond Myklebust 		return;
2549bdebfccdSTrond Myklebust 	}
2550b5fdf841STrond Myklebust 	pnfs_set_plh_return_info(lo, range->iomode, 0);
2551016256dfSPeng Tao 	/*
2552016256dfSPeng Tao 	 * mark all matching lsegs so that we are sure to have no live
2553016256dfSPeng Tao 	 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
2554016256dfSPeng Tao 	 * for how it works.
2555016256dfSPeng Tao 	 */
2556b5fdf841STrond Myklebust 	if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
255744ea8dfcSTrond Myklebust 		const struct cred *cred;
255810335556STrond Myklebust 		nfs4_stateid stateid;
2559e5fd1904STrond Myklebust 		enum pnfs_iomode iomode;
256010335556STrond Myklebust 
256144ea8dfcSTrond Myklebust 		return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
2562016256dfSPeng Tao 		spin_unlock(&inode->i_lock);
256310335556STrond Myklebust 		if (return_now)
256444ea8dfcSTrond Myklebust 			pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
256510335556STrond Myklebust 	} else {
256610335556STrond Myklebust 		spin_unlock(&inode->i_lock);
2567b20135d0STrond Myklebust 		nfs_commit_inode(inode, 0);
2568016256dfSPeng Tao 	}
2569016256dfSPeng Tao }
2570b5fdf841STrond Myklebust 
pnfs_error_mark_layout_for_return(struct inode * inode,struct pnfs_layout_segment * lseg)2571b5fdf841STrond Myklebust void pnfs_error_mark_layout_for_return(struct inode *inode,
2572b5fdf841STrond Myklebust 				       struct pnfs_layout_segment *lseg)
2573b5fdf841STrond Myklebust {
2574b5fdf841STrond Myklebust 	struct pnfs_layout_range range = {
2575b5fdf841STrond Myklebust 		.iomode = lseg->pls_range.iomode,
2576b5fdf841STrond Myklebust 		.offset = 0,
2577b5fdf841STrond Myklebust 		.length = NFS4_MAX_UINT64,
2578b5fdf841STrond Myklebust 	};
2579b5fdf841STrond Myklebust 
2580b5fdf841STrond Myklebust 	pnfs_mark_layout_for_return(inode, &range);
2581b5fdf841STrond Myklebust }
2582016256dfSPeng Tao EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
2583016256dfSPeng Tao 
2584b5fdf841STrond Myklebust static bool
pnfs_layout_can_be_returned(struct pnfs_layout_hdr * lo)2585b5fdf841STrond Myklebust pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
2586b5fdf841STrond Myklebust {
2587b5fdf841STrond Myklebust 	return pnfs_layout_is_valid(lo) &&
2588b5fdf841STrond Myklebust 		!test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
2589b5fdf841STrond Myklebust 		!test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
2590b5fdf841STrond Myklebust }
2591b5fdf841STrond Myklebust 
2592b5fdf841STrond Myklebust static struct pnfs_layout_segment *
pnfs_find_first_lseg(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range,enum pnfs_iomode iomode)2593b5fdf841STrond Myklebust pnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
2594b5fdf841STrond Myklebust 		     const struct pnfs_layout_range *range,
2595b5fdf841STrond Myklebust 		     enum pnfs_iomode iomode)
2596b5fdf841STrond Myklebust {
2597b5fdf841STrond Myklebust 	struct pnfs_layout_segment *lseg;
2598b5fdf841STrond Myklebust 
2599b5fdf841STrond Myklebust 	list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
2600b5fdf841STrond Myklebust 		if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
2601b5fdf841STrond Myklebust 			continue;
2602b5fdf841STrond Myklebust 		if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
2603b5fdf841STrond Myklebust 			continue;
2604b5fdf841STrond Myklebust 		if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
2605b5fdf841STrond Myklebust 			continue;
2606b5fdf841STrond Myklebust 		if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
2607b5fdf841STrond Myklebust 			return lseg;
2608b5fdf841STrond Myklebust 	}
2609b5fdf841STrond Myklebust 	return NULL;
2610b5fdf841STrond Myklebust }
2611b5fdf841STrond Myklebust 
2612b5fdf841STrond Myklebust /* Find open file states whose mode matches that of the range */
2613b5fdf841STrond Myklebust static bool
pnfs_should_return_unused_layout(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range)2614b5fdf841STrond Myklebust pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
2615b5fdf841STrond Myklebust 				 const struct pnfs_layout_range *range)
2616b5fdf841STrond Myklebust {
2617b5fdf841STrond Myklebust 	struct list_head *head;
2618b5fdf841STrond Myklebust 	struct nfs_open_context *ctx;
2619b5fdf841STrond Myklebust 	fmode_t mode = 0;
2620b5fdf841STrond Myklebust 
2621b5fdf841STrond Myklebust 	if (!pnfs_layout_can_be_returned(lo) ||
2622b5fdf841STrond Myklebust 	    !pnfs_find_first_lseg(lo, range, range->iomode))
2623b5fdf841STrond Myklebust 		return false;
2624b5fdf841STrond Myklebust 
2625b5fdf841STrond Myklebust 	head = &NFS_I(lo->plh_inode)->open_files;
2626b5fdf841STrond Myklebust 	list_for_each_entry_rcu(ctx, head, list) {
2627b5fdf841STrond Myklebust 		if (ctx->state)
2628b5fdf841STrond Myklebust 			mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
2629b5fdf841STrond Myklebust 	}
2630b5fdf841STrond Myklebust 
2631b5fdf841STrond Myklebust 	switch (range->iomode) {
2632b5fdf841STrond Myklebust 	default:
2633b5fdf841STrond Myklebust 		break;
2634b5fdf841STrond Myklebust 	case IOMODE_READ:
2635b5fdf841STrond Myklebust 		mode &= ~FMODE_WRITE;
2636b5fdf841STrond Myklebust 		break;
2637b5fdf841STrond Myklebust 	case IOMODE_RW:
2638b5fdf841STrond Myklebust 		if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
2639b5fdf841STrond Myklebust 			mode &= ~FMODE_READ;
2640b5fdf841STrond Myklebust 	}
2641b5fdf841STrond Myklebust 	return mode == 0;
2642b5fdf841STrond Myklebust }
2643b5fdf841STrond Myklebust 
pnfs_layout_return_unused_byserver(struct nfs_server * server,void * data)2644f6395572STrond Myklebust static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
2645f6395572STrond Myklebust 					      void *data)
2646b5fdf841STrond Myklebust {
2647b5fdf841STrond Myklebust 	const struct pnfs_layout_range *range = data;
2648f6395572STrond Myklebust 	const struct cred *cred;
2649b5fdf841STrond Myklebust 	struct pnfs_layout_hdr *lo;
2650b5fdf841STrond Myklebust 	struct inode *inode;
2651f6395572STrond Myklebust 	nfs4_stateid stateid;
2652f6395572STrond Myklebust 	enum pnfs_iomode iomode;
2653f6395572STrond Myklebust 
2654b5fdf841STrond Myklebust restart:
2655b5fdf841STrond Myklebust 	rcu_read_lock();
2656b5fdf841STrond Myklebust 	list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
2657f6395572STrond Myklebust 		inode = lo->plh_inode;
2658f6395572STrond Myklebust 		if (!inode || !pnfs_layout_can_be_returned(lo) ||
2659b5fdf841STrond Myklebust 		    test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
2660b5fdf841STrond Myklebust 			continue;
2661b5fdf841STrond Myklebust 		spin_lock(&inode->i_lock);
2662f6395572STrond Myklebust 		if (!lo->plh_inode ||
2663f6395572STrond Myklebust 		    !pnfs_should_return_unused_layout(lo, range)) {
2664b5fdf841STrond Myklebust 			spin_unlock(&inode->i_lock);
2665b5fdf841STrond Myklebust 			continue;
2666b5fdf841STrond Myklebust 		}
2667f6395572STrond Myklebust 		pnfs_get_layout_hdr(lo);
2668f6395572STrond Myklebust 		pnfs_set_plh_return_info(lo, range->iomode, 0);
2669f6395572STrond Myklebust 		if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
2670f6395572STrond Myklebust 						    range, 0) != 0 ||
2671f6395572STrond Myklebust 		    !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
2672b5fdf841STrond Myklebust 			spin_unlock(&inode->i_lock);
2673b5fdf841STrond Myklebust 			rcu_read_unlock();
2674f6395572STrond Myklebust 			pnfs_put_layout_hdr(lo);
2675f6395572STrond Myklebust 			cond_resched();
2676f6395572STrond Myklebust 			goto restart;
2677f6395572STrond Myklebust 		}
2678f6395572STrond Myklebust 		spin_unlock(&inode->i_lock);
2679f6395572STrond Myklebust 		rcu_read_unlock();
2680f6395572STrond Myklebust 		pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
2681f6395572STrond Myklebust 		pnfs_put_layout_hdr(lo);
2682b5fdf841STrond Myklebust 		cond_resched();
2683b5fdf841STrond Myklebust 		goto restart;
2684b5fdf841STrond Myklebust 	}
2685b5fdf841STrond Myklebust 	rcu_read_unlock();
2686b5fdf841STrond Myklebust 	return 0;
2687b5fdf841STrond Myklebust }
2688b5fdf841STrond Myklebust 
2689b5fdf841STrond Myklebust void
pnfs_layout_return_unused_byclid(struct nfs_client * clp,enum pnfs_iomode iomode)2690b5fdf841STrond Myklebust pnfs_layout_return_unused_byclid(struct nfs_client *clp,
2691b5fdf841STrond Myklebust 				 enum pnfs_iomode iomode)
2692b5fdf841STrond Myklebust {
2693b5fdf841STrond Myklebust 	struct pnfs_layout_range range = {
2694b5fdf841STrond Myklebust 		.iomode = iomode,
2695b5fdf841STrond Myklebust 		.offset = 0,
2696b5fdf841STrond Myklebust 		.length = NFS4_MAX_UINT64,
2697b5fdf841STrond Myklebust 	};
2698b5fdf841STrond Myklebust 
2699b5fdf841STrond Myklebust 	nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
2700b5fdf841STrond Myklebust 			&range);
2701b5fdf841STrond Myklebust }
2702b5fdf841STrond Myklebust 
2703d8007d4dSTrond Myklebust void
pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor * pgio)2704b3230e80STrond Myklebust pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
2705b3230e80STrond Myklebust {
2706b3230e80STrond Myklebust 	if (pgio->pg_lseg == NULL ||
2707b3230e80STrond Myklebust 	    test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
2708b3230e80STrond Myklebust 		return;
2709b3230e80STrond Myklebust 	pnfs_put_lseg(pgio->pg_lseg);
2710b3230e80STrond Myklebust 	pgio->pg_lseg = NULL;
2711b3230e80STrond Myklebust }
2712b3230e80STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
2713b3230e80STrond Myklebust 
271408cb5b0fSBenjamin Coddington /*
271508cb5b0fSBenjamin Coddington  * Check for any intersection between the request and the pgio->pg_lseg,
271608cb5b0fSBenjamin Coddington  * and if none, put this pgio->pg_lseg away.
271708cb5b0fSBenjamin Coddington  */
2718e1e54ab7STrond Myklebust void
pnfs_generic_pg_check_range(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)271908cb5b0fSBenjamin Coddington pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
272008cb5b0fSBenjamin Coddington {
272108cb5b0fSBenjamin Coddington 	if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
272208cb5b0fSBenjamin Coddington 		pnfs_put_lseg(pgio->pg_lseg);
272308cb5b0fSBenjamin Coddington 		pgio->pg_lseg = NULL;
272408cb5b0fSBenjamin Coddington 	}
272508cb5b0fSBenjamin Coddington }
2726e1e54ab7STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
272708cb5b0fSBenjamin Coddington 
2728b3230e80STrond Myklebust void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)2729d8007d4dSTrond Myklebust pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
2730d8007d4dSTrond Myklebust {
2731d1d97395SYang Li 	u64 rd_size;
27321fd937bdSPeng Tao 
2733b3230e80STrond Myklebust 	pnfs_generic_pg_check_layout(pgio);
273408cb5b0fSBenjamin Coddington 	pnfs_generic_pg_check_range(pgio, req);
2735cb5d04bcSPeng Tao 	if (pgio->pg_lseg == NULL) {
27361fd937bdSPeng Tao 		if (pgio->pg_dreq == NULL)
27371fd937bdSPeng Tao 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
27381fd937bdSPeng Tao 		else
273975aa038dSTrond Myklebust 			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
274075aa038dSTrond Myklebust 						      req_offset(req));
27411fd937bdSPeng Tao 
274263d8a41bSTrond Myklebust 		pgio->pg_lseg =
274363d8a41bSTrond Myklebust 			pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
274463d8a41bSTrond Myklebust 					   req_offset(req), rd_size,
274563d8a41bSTrond Myklebust 					   IOMODE_READ, false,
274663d8a41bSTrond Myklebust 					   nfs_io_gfp_mask());
2747d600ad1fSPeng Tao 		if (IS_ERR(pgio->pg_lseg)) {
2748d600ad1fSPeng Tao 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
2749d600ad1fSPeng Tao 			pgio->pg_lseg = NULL;
2750d600ad1fSPeng Tao 			return;
2751d600ad1fSPeng Tao 		}
2752cb5d04bcSPeng Tao 	}
2753e885de1aSTrond Myklebust 	/* If no lseg, fall back to read through mds */
2754e885de1aSTrond Myklebust 	if (pgio->pg_lseg == NULL)
27551f945357STrond Myklebust 		nfs_pageio_reset_read_mds(pgio);
2756e885de1aSTrond Myklebust 
2757d8007d4dSTrond Myklebust }
2758d8007d4dSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
2759d8007d4dSTrond Myklebust 
2760d8007d4dSTrond Myklebust void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor * pgio,struct nfs_page * req,u64 wb_size)27616296556fSPeng Tao pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
27626296556fSPeng Tao 			   struct nfs_page *req, u64 wb_size)
2763d8007d4dSTrond Myklebust {
2764b3230e80STrond Myklebust 	pnfs_generic_pg_check_layout(pgio);
276508cb5b0fSBenjamin Coddington 	pnfs_generic_pg_check_range(pgio, req);
2766d600ad1fSPeng Tao 	if (pgio->pg_lseg == NULL) {
276763d8a41bSTrond Myklebust 		pgio->pg_lseg =
276863d8a41bSTrond Myklebust 			pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
276963d8a41bSTrond Myklebust 					   req_offset(req), wb_size, IOMODE_RW,
277063d8a41bSTrond Myklebust 					   false, nfs_io_gfp_mask());
2771d600ad1fSPeng Tao 		if (IS_ERR(pgio->pg_lseg)) {
2772d600ad1fSPeng Tao 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
2773d600ad1fSPeng Tao 			pgio->pg_lseg = NULL;
2774d600ad1fSPeng Tao 			return;
2775d600ad1fSPeng Tao 		}
2776d600ad1fSPeng Tao 	}
2777e885de1aSTrond Myklebust 	/* If no lseg, fall back to write through mds */
2778e885de1aSTrond Myklebust 	if (pgio->pg_lseg == NULL)
27791f945357STrond Myklebust 		nfs_pageio_reset_write_mds(pgio);
2780d8007d4dSTrond Myklebust }
2781d8007d4dSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
2782d8007d4dSTrond Myklebust 
2783180bb5ecSWeston Andros Adamson void
pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor * desc)2784180bb5ecSWeston Andros Adamson pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc)
2785180bb5ecSWeston Andros Adamson {
2786180bb5ecSWeston Andros Adamson 	if (desc->pg_lseg) {
2787180bb5ecSWeston Andros Adamson 		pnfs_put_lseg(desc->pg_lseg);
2788180bb5ecSWeston Andros Adamson 		desc->pg_lseg = NULL;
2789180bb5ecSWeston Andros Adamson 	}
2790180bb5ecSWeston Andros Adamson }
2791180bb5ecSWeston Andros Adamson EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup);
2792180bb5ecSWeston Andros Adamson 
2793b4fdac1aSWeston Andros Adamson /*
2794b4fdac1aSWeston Andros Adamson  * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
2795b4fdac1aSWeston Andros Adamson  * of bytes (maximum @req->wb_bytes) that can be coalesced.
2796b4fdac1aSWeston Andros Adamson  */
2797b4fdac1aSWeston Andros Adamson size_t
pnfs_generic_pg_test(struct nfs_pageio_descriptor * pgio,struct nfs_page * prev,struct nfs_page * req)2798a7d42ddbSWeston Andros Adamson pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
2799a7d42ddbSWeston Andros Adamson 		     struct nfs_page *prev, struct nfs_page *req)
2800bae724efSFred Isaman {
28010f9c429eSWeston Andros Adamson 	unsigned int size;
2802c5e20cb7SWeston Andros Adamson 	u64 seg_end, req_start, seg_left;
28030f9c429eSWeston Andros Adamson 
28040f9c429eSWeston Andros Adamson 	size = nfs_generic_pg_test(pgio, prev, req);
28050f9c429eSWeston Andros Adamson 	if (!size)
28060f9c429eSWeston Andros Adamson 		return 0;
2807bae724efSFred Isaman 
280819982ba8STrond Myklebust 	/*
2809c5e20cb7SWeston Andros Adamson 	 * 'size' contains the number of bytes left in the current page (up
2810c5e20cb7SWeston Andros Adamson 	 * to the original size asked for in @req->wb_bytes).
2811c5e20cb7SWeston Andros Adamson 	 *
2812c5e20cb7SWeston Andros Adamson 	 * Calculate how many bytes are left in the layout segment
2813c5e20cb7SWeston Andros Adamson 	 * and if there are less bytes than 'size', return that instead.
281419982ba8STrond Myklebust 	 *
281519982ba8STrond Myklebust 	 * Please also note that 'end_offset' is actually the offset of the
281619982ba8STrond Myklebust 	 * first byte that lies outside the pnfs_layout_range. FIXME?
281719982ba8STrond Myklebust 	 *
281819982ba8STrond Myklebust 	 */
281919b54848SWeston Andros Adamson 	if (pgio->pg_lseg) {
282017822b20STrond Myklebust 		seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
282119982ba8STrond Myklebust 				     pgio->pg_lseg->pls_range.length);
2822c5e20cb7SWeston Andros Adamson 		req_start = req_offset(req);
282308cb5b0fSBenjamin Coddington 
2824c5e20cb7SWeston Andros Adamson 		/* start of request is past the last byte of this segment */
282508cb5b0fSBenjamin Coddington 		if (req_start >= seg_end)
2826b4fdac1aSWeston Andros Adamson 			return 0;
2827c5e20cb7SWeston Andros Adamson 
2828c5e20cb7SWeston Andros Adamson 		/* adjust 'size' iff there are fewer bytes left in the
2829c5e20cb7SWeston Andros Adamson 		 * segment than what nfs_generic_pg_test returned */
2830c5e20cb7SWeston Andros Adamson 		seg_left = seg_end - req_start;
2831c5e20cb7SWeston Andros Adamson 		if (seg_left < size)
2832c5e20cb7SWeston Andros Adamson 			size = (unsigned int)seg_left;
283319b54848SWeston Andros Adamson 	}
28340f9c429eSWeston Andros Adamson 
283519b54848SWeston Andros Adamson 	return size;
2836bae724efSFred Isaman }
283789a58e32SBenny Halevy EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
2838bae724efSFred Isaman 
pnfs_write_done_resend_to_mds(struct nfs_pgio_header * hdr)283953113ad3SWeston Andros Adamson int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
2840e2fecb21STrond Myklebust {
2841e2fecb21STrond Myklebust 	struct nfs_pageio_descriptor pgio;
2842e2fecb21STrond Myklebust 
2843e2fecb21STrond Myklebust 	/* Resend all requests through the MDS */
284453113ad3SWeston Andros Adamson 	nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
284553113ad3SWeston Andros Adamson 			      hdr->completion_ops);
284653113ad3SWeston Andros Adamson 	return nfs_pageio_resend(&pgio, hdr);
2847e2fecb21STrond Myklebust }
2848e7dd79afSAndy Adamson EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
2849e2fecb21STrond Myklebust 
pnfs_ld_handle_write_error(struct nfs_pgio_header * hdr)2850d45f60c6SWeston Andros Adamson static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
28511acbbb4eSFred Isaman {
2852cd841605SFred Isaman 
2853cd841605SFred Isaman 	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
2854cd841605SFred Isaman 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
28551acbbb4eSFred Isaman 	    PNFS_LAYOUTRET_ON_ERROR) {
2856cd841605SFred Isaman 		pnfs_return_layout(hdr->inode);
28571acbbb4eSFred Isaman 	}
28586c75dc0dSFred Isaman 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
285953113ad3SWeston Andros Adamson 		hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
28601acbbb4eSFred Isaman }
28611acbbb4eSFred Isaman 
2862d20581aaSBenny Halevy /*
2863d20581aaSBenny Halevy  * Called by non rpc-based layout drivers
2864d20581aaSBenny Halevy  */
pnfs_ld_write_done(struct nfs_pgio_header * hdr)2865d45f60c6SWeston Andros Adamson void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
286694ad1c80SFred Isaman {
2867f8417b48SKinglong Mee 	if (likely(!hdr->pnfs_error)) {
286867af7611STrond Myklebust 		pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
286967af7611STrond Myklebust 				hdr->mds_offset + hdr->res.count);
2870d45f60c6SWeston Andros Adamson 		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
2871f8417b48SKinglong Mee 	}
2872f8417b48SKinglong Mee 	trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
2873f8417b48SKinglong Mee 	if (unlikely(hdr->pnfs_error))
2874d45f60c6SWeston Andros Adamson 		pnfs_ld_handle_write_error(hdr);
2875d45f60c6SWeston Andros Adamson 	hdr->mds_ops->rpc_release(hdr);
287644b83799SFred Isaman }
2877d20581aaSBenny Halevy EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
287844b83799SFred Isaman 
2879dce81290STrond Myklebust static void
pnfs_write_through_mds(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr)2880dce81290STrond Myklebust pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
2881d45f60c6SWeston Andros Adamson 		struct nfs_pgio_header *hdr)
2882dce81290STrond Myklebust {
288348d635f1SPeng Tao 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2884a7d42ddbSWeston Andros Adamson 
28856c75dc0dSFred Isaman 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2886a7d42ddbSWeston Andros Adamson 		list_splice_tail_init(&hdr->pages, &mirror->pg_list);
2887dce81290STrond Myklebust 		nfs_pageio_reset_write_mds(desc);
2888a7d42ddbSWeston Andros Adamson 		mirror->pg_recoalesce = 1;
28896c75dc0dSFred Isaman 	}
2890ba4a76f7SScott Mayhew 	hdr->completion_ops->completion(hdr);
2891dce81290STrond Myklebust }
2892dce81290STrond Myklebust 
2893dce81290STrond Myklebust static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops,struct pnfs_layout_segment * lseg,int how)2894d45f60c6SWeston Andros Adamson pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
2895dce81290STrond Myklebust 			const struct rpc_call_ops *call_ops,
2896dce81290STrond Myklebust 			struct pnfs_layout_segment *lseg,
2897dce81290STrond Myklebust 			int how)
28980382b744SAndy Adamson {
2899cd841605SFred Isaman 	struct inode *inode = hdr->inode;
29000382b744SAndy Adamson 	enum pnfs_try_status trypnfs;
29010382b744SAndy Adamson 	struct nfs_server *nfss = NFS_SERVER(inode);
29020382b744SAndy Adamson 
2903cd841605SFred Isaman 	hdr->mds_ops = call_ops;
29040382b744SAndy Adamson 
29050382b744SAndy Adamson 	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
2906d45f60c6SWeston Andros Adamson 		inode->i_ino, hdr->args.count, hdr->args.offset, how);
2907d45f60c6SWeston Andros Adamson 	trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
29086c75dc0dSFred Isaman 	if (trypnfs != PNFS_NOT_ATTEMPTED)
29090382b744SAndy Adamson 		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
29100382b744SAndy Adamson 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
29110382b744SAndy Adamson 	return trypnfs;
29120382b744SAndy Adamson }
29130382b744SAndy Adamson 
2914dce81290STrond Myklebust static void
pnfs_do_write(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr,int how)29157f714720SWeston Andros Adamson pnfs_do_write(struct nfs_pageio_descriptor *desc,
29167f714720SWeston Andros Adamson 	      struct nfs_pgio_header *hdr, int how)
2917dce81290STrond Myklebust {
2918dce81290STrond Myklebust 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
2919dce81290STrond Myklebust 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
2920dce81290STrond Myklebust 	enum pnfs_try_status trypnfs;
2921dce81290STrond Myklebust 
2922d45f60c6SWeston Andros Adamson 	trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
292337f8aa16STrond Myklebust 	switch (trypnfs) {
292437f8aa16STrond Myklebust 	case PNFS_NOT_ATTEMPTED:
2925d45f60c6SWeston Andros Adamson 		pnfs_write_through_mds(desc, hdr);
2926ffb81717SGustavo A. R. Silva 		break;
292737f8aa16STrond Myklebust 	case PNFS_ATTEMPTED:
292837f8aa16STrond Myklebust 		break;
292937f8aa16STrond Myklebust 	case PNFS_TRY_AGAIN:
293037f8aa16STrond Myklebust 		/* cleanup hdr and prepare to redo pnfs */
293137f8aa16STrond Myklebust 		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
293237f8aa16STrond Myklebust 			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
293337f8aa16STrond Myklebust 			list_splice_init(&hdr->pages, &mirror->pg_list);
293437f8aa16STrond Myklebust 			mirror->pg_recoalesce = 1;
293537f8aa16STrond Myklebust 		}
293637f8aa16STrond Myklebust 		hdr->mds_ops->rpc_release(hdr);
293737f8aa16STrond Myklebust 	}
2938dce81290STrond Myklebust }
2939dce81290STrond Myklebust 
pnfs_writehdr_free(struct nfs_pgio_header * hdr)29406c75dc0dSFred Isaman static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
29416c75dc0dSFred Isaman {
29429369a431STrond Myklebust 	pnfs_put_lseg(hdr->lseg);
29431e7f3a48SWeston Andros Adamson 	nfs_pgio_header_free(hdr);
29446c75dc0dSFred Isaman }
29456c75dc0dSFred Isaman 
2946dce81290STrond Myklebust int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor * desc)2947dce81290STrond Myklebust pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
2948dce81290STrond Myklebust {
29496c75dc0dSFred Isaman 	struct nfs_pgio_header *hdr;
2950dce81290STrond Myklebust 	int ret;
2951dce81290STrond Myklebust 
29521e7f3a48SWeston Andros Adamson 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
29531e7f3a48SWeston Andros Adamson 	if (!hdr) {
29542bff2288SPeng Tao 		desc->pg_error = -ENOMEM;
29552bff2288SPeng Tao 		return desc->pg_error;
29566c75dc0dSFred Isaman 	}
29576c75dc0dSFred Isaman 	nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
2958180bb5ecSWeston Andros Adamson 
29599369a431STrond Myklebust 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
2960ef2c488cSAnna Schumaker 	ret = nfs_generic_pgio(desc, hdr);
2961180bb5ecSWeston Andros Adamson 	if (!ret)
29627f714720SWeston Andros Adamson 		pnfs_do_write(desc, hdr, desc->pg_ioflags);
2963a7d42ddbSWeston Andros Adamson 
2964dce81290STrond Myklebust 	return ret;
2965dce81290STrond Myklebust }
2966dce81290STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
2967dce81290STrond Myklebust 
pnfs_read_done_resend_to_mds(struct nfs_pgio_header * hdr)296853113ad3SWeston Andros Adamson int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
296962e4a769STrond Myklebust {
297062e4a769STrond Myklebust 	struct nfs_pageio_descriptor pgio;
297162e4a769STrond Myklebust 
29721acbbb4eSFred Isaman 	/* Resend all requests through the MDS */
297353113ad3SWeston Andros Adamson 	nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
297453113ad3SWeston Andros Adamson 	return nfs_pageio_resend(&pgio, hdr);
29751acbbb4eSFred Isaman }
2976e7dd79afSAndy Adamson EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
29771acbbb4eSFred Isaman 
pnfs_ld_handle_read_error(struct nfs_pgio_header * hdr)2978d45f60c6SWeston Andros Adamson static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
29791acbbb4eSFred Isaman {
2980cd841605SFred Isaman 	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
2981cd841605SFred Isaman 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
29821acbbb4eSFred Isaman 	    PNFS_LAYOUTRET_ON_ERROR) {
2983cd841605SFred Isaman 		pnfs_return_layout(hdr->inode);
29841acbbb4eSFred Isaman 	}
29854db6e0b7SFred Isaman 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
298653113ad3SWeston Andros Adamson 		hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
298762e4a769STrond Myklebust }
298862e4a769STrond Myklebust 
2989b1f69b75SAndy Adamson /*
2990d20581aaSBenny Halevy  * Called by non rpc-based layout drivers
2991d20581aaSBenny Halevy  */
pnfs_ld_read_done(struct nfs_pgio_header * hdr)2992d45f60c6SWeston Andros Adamson void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
2993d20581aaSBenny Halevy {
2994bfc505deSTrond Myklebust 	if (likely(!hdr->pnfs_error))
2995d45f60c6SWeston Andros Adamson 		hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
2996f8417b48SKinglong Mee 	trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
2997f8417b48SKinglong Mee 	if (unlikely(hdr->pnfs_error))
2998d45f60c6SWeston Andros Adamson 		pnfs_ld_handle_read_error(hdr);
2999d45f60c6SWeston Andros Adamson 	hdr->mds_ops->rpc_release(hdr);
3000d20581aaSBenny Halevy }
3001d20581aaSBenny Halevy EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
3002d20581aaSBenny Halevy 
3003493292ddSTrond Myklebust static void
pnfs_read_through_mds(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr)3004493292ddSTrond Myklebust pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
3005d45f60c6SWeston Andros Adamson 		struct nfs_pgio_header *hdr)
3006493292ddSTrond Myklebust {
300748d635f1SPeng Tao 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
3008a7d42ddbSWeston Andros Adamson 
30094db6e0b7SFred Isaman 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
3010a7d42ddbSWeston Andros Adamson 		list_splice_tail_init(&hdr->pages, &mirror->pg_list);
3011493292ddSTrond Myklebust 		nfs_pageio_reset_read_mds(desc);
3012a7d42ddbSWeston Andros Adamson 		mirror->pg_recoalesce = 1;
30134db6e0b7SFred Isaman 	}
3014ba4a76f7SScott Mayhew 	hdr->completion_ops->completion(hdr);
3015493292ddSTrond Myklebust }
3016493292ddSTrond Myklebust 
3017d20581aaSBenny Halevy /*
301864419a9bSAndy Adamson  * Call the appropriate parallel I/O subsystem read function.
301964419a9bSAndy Adamson  */
3020493292ddSTrond Myklebust static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops,struct pnfs_layout_segment * lseg)3021d45f60c6SWeston Andros Adamson pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
3022493292ddSTrond Myklebust 		       const struct rpc_call_ops *call_ops,
3023493292ddSTrond Myklebust 		       struct pnfs_layout_segment *lseg)
302464419a9bSAndy Adamson {
3025cd841605SFred Isaman 	struct inode *inode = hdr->inode;
302664419a9bSAndy Adamson 	struct nfs_server *nfss = NFS_SERVER(inode);
302764419a9bSAndy Adamson 	enum pnfs_try_status trypnfs;
302864419a9bSAndy Adamson 
3029cd841605SFred Isaman 	hdr->mds_ops = call_ops;
303064419a9bSAndy Adamson 
303164419a9bSAndy Adamson 	dprintk("%s: Reading ino:%lu %u@%llu\n",
3032d45f60c6SWeston Andros Adamson 		__func__, inode->i_ino, hdr->args.count, hdr->args.offset);
303364419a9bSAndy Adamson 
3034d45f60c6SWeston Andros Adamson 	trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
30354db6e0b7SFred Isaman 	if (trypnfs != PNFS_NOT_ATTEMPTED)
303664419a9bSAndy Adamson 		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
303764419a9bSAndy Adamson 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
303864419a9bSAndy Adamson 	return trypnfs;
303964419a9bSAndy Adamson }
3040863a3c6cSAndy Adamson 
3041ceb11e13SPeng Tao /* Resend all requests through pnfs. */
pnfs_read_resend_pnfs(struct nfs_pgio_header * hdr,unsigned int mirror_idx)3042563c53e7STrond Myklebust void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
3043563c53e7STrond Myklebust 			   unsigned int mirror_idx)
3044ceb11e13SPeng Tao {
3045ceb11e13SPeng Tao 	struct nfs_pageio_descriptor pgio;
3046ceb11e13SPeng Tao 
30471b1bc66bSWeston Andros Adamson 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
304854e4a0dfSTrond Myklebust 		/* Prevent deadlocks with layoutreturn! */
304954e4a0dfSTrond Myklebust 		pnfs_put_lseg(hdr->lseg);
305054e4a0dfSTrond Myklebust 		hdr->lseg = NULL;
305154e4a0dfSTrond Myklebust 
30521b1bc66bSWeston Andros Adamson 		nfs_pageio_init_read(&pgio, hdr->inode, false,
30531b1bc66bSWeston Andros Adamson 					hdr->completion_ops);
3054563c53e7STrond Myklebust 		pgio.pg_mirror_idx = mirror_idx;
30551b1bc66bSWeston Andros Adamson 		hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
30561b1bc66bSWeston Andros Adamson 	}
3057ceb11e13SPeng Tao }
3058ceb11e13SPeng Tao EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs);
3059ceb11e13SPeng Tao 
3060493292ddSTrond Myklebust static void
pnfs_do_read(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr)30617f714720SWeston Andros Adamson pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
3062493292ddSTrond Myklebust {
3063493292ddSTrond Myklebust 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
3064493292ddSTrond Myklebust 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
3065493292ddSTrond Myklebust 	enum pnfs_try_status trypnfs;
3066493292ddSTrond Myklebust 
3067d45f60c6SWeston Andros Adamson 	trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
30686aeafd05STrond Myklebust 	switch (trypnfs) {
30696aeafd05STrond Myklebust 	case PNFS_NOT_ATTEMPTED:
3070d45f60c6SWeston Andros Adamson 		pnfs_read_through_mds(desc, hdr);
3071ffb81717SGustavo A. R. Silva 		break;
30726aeafd05STrond Myklebust 	case PNFS_ATTEMPTED:
30736aeafd05STrond Myklebust 		break;
30746aeafd05STrond Myklebust 	case PNFS_TRY_AGAIN:
30756aeafd05STrond Myklebust 		/* cleanup hdr and prepare to redo pnfs */
30766aeafd05STrond Myklebust 		if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
30776aeafd05STrond Myklebust 			struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
30786aeafd05STrond Myklebust 			list_splice_init(&hdr->pages, &mirror->pg_list);
30796aeafd05STrond Myklebust 			mirror->pg_recoalesce = 1;
30806aeafd05STrond Myklebust 		}
30816aeafd05STrond Myklebust 		hdr->mds_ops->rpc_release(hdr);
30826aeafd05STrond Myklebust 	}
3083493292ddSTrond Myklebust }
3084493292ddSTrond Myklebust 
pnfs_readhdr_free(struct nfs_pgio_header * hdr)30854db6e0b7SFred Isaman static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
30864db6e0b7SFred Isaman {
30879369a431STrond Myklebust 	pnfs_put_lseg(hdr->lseg);
30881e7f3a48SWeston Andros Adamson 	nfs_pgio_header_free(hdr);
30894db6e0b7SFred Isaman }
30904db6e0b7SFred Isaman 
3091493292ddSTrond Myklebust int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor * desc)3092493292ddSTrond Myklebust pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
3093493292ddSTrond Myklebust {
30944db6e0b7SFred Isaman 	struct nfs_pgio_header *hdr;
3095493292ddSTrond Myklebust 	int ret;
3096493292ddSTrond Myklebust 
30971e7f3a48SWeston Andros Adamson 	hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
30981e7f3a48SWeston Andros Adamson 	if (!hdr) {
30992bff2288SPeng Tao 		desc->pg_error = -ENOMEM;
31002bff2288SPeng Tao 		return desc->pg_error;
3101493292ddSTrond Myklebust 	}
31024db6e0b7SFred Isaman 	nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
31039369a431STrond Myklebust 	hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
3104ef2c488cSAnna Schumaker 	ret = nfs_generic_pgio(desc, hdr);
3105180bb5ecSWeston Andros Adamson 	if (!ret)
31067f714720SWeston Andros Adamson 		pnfs_do_read(desc, hdr);
31074db6e0b7SFred Isaman 	return ret;
3108493292ddSTrond Myklebust }
3109493292ddSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
3110493292ddSTrond Myklebust 
pnfs_clear_layoutcommitting(struct inode * inode)311171244d9bSTrond Myklebust static void pnfs_clear_layoutcommitting(struct inode *inode)
311271244d9bSTrond Myklebust {
311371244d9bSTrond Myklebust 	unsigned long *bitlock = &NFS_I(inode)->flags;
311471244d9bSTrond Myklebust 
311571244d9bSTrond Myklebust 	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
31164e857c58SPeter Zijlstra 	smp_mb__after_atomic();
311771244d9bSTrond Myklebust 	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
311871244d9bSTrond Myklebust }
311971244d9bSTrond Myklebust 
3120863a3c6cSAndy Adamson /*
3121a9bae566SPeng Tao  * There can be multiple RW segments.
3122863a3c6cSAndy Adamson  */
pnfs_list_write_lseg(struct inode * inode,struct list_head * listp)3123a9bae566SPeng Tao static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
3124863a3c6cSAndy Adamson {
3125a9bae566SPeng Tao 	struct pnfs_layout_segment *lseg;
3126863a3c6cSAndy Adamson 
3127a9bae566SPeng Tao 	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
3128a9bae566SPeng Tao 		if (lseg->pls_range.iomode == IOMODE_RW &&
3129a073dbffSTrond Myklebust 		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
3130a9bae566SPeng Tao 			list_add(&lseg->pls_lc_list, listp);
3131a9bae566SPeng Tao 	}
3132863a3c6cSAndy Adamson }
3133863a3c6cSAndy Adamson 
pnfs_list_write_lseg_done(struct inode * inode,struct list_head * listp)3134a073dbffSTrond Myklebust static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
3135a073dbffSTrond Myklebust {
3136a073dbffSTrond Myklebust 	struct pnfs_layout_segment *lseg, *tmp;
3137a073dbffSTrond Myklebust 
3138a073dbffSTrond Myklebust 	/* Matched by references in pnfs_set_layoutcommit */
3139a073dbffSTrond Myklebust 	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
3140a073dbffSTrond Myklebust 		list_del_init(&lseg->pls_lc_list);
3141a073dbffSTrond Myklebust 		pnfs_put_lseg(lseg);
3142a073dbffSTrond Myklebust 	}
3143a073dbffSTrond Myklebust 
314471244d9bSTrond Myklebust 	pnfs_clear_layoutcommitting(inode);
3145a073dbffSTrond Myklebust }
3146a073dbffSTrond Myklebust 
pnfs_set_lo_fail(struct pnfs_layout_segment * lseg)31471b0ae068SPeng Tao void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
31481b0ae068SPeng Tao {
3149b9e028fdSTrond Myklebust 	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
31501b0ae068SPeng Tao }
31511b0ae068SPeng Tao EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
31521b0ae068SPeng Tao 
3153863a3c6cSAndy Adamson void
pnfs_set_layoutcommit(struct inode * inode,struct pnfs_layout_segment * lseg,loff_t end_pos)315467af7611STrond Myklebust pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
315567af7611STrond Myklebust 		loff_t end_pos)
3156863a3c6cSAndy Adamson {
3157cd841605SFred Isaman 	struct nfs_inode *nfsi = NFS_I(inode);
315879a48a1fSWeston Andros Adamson 	bool mark_as_dirty = false;
3159863a3c6cSAndy Adamson 
3160cd841605SFred Isaman 	spin_lock(&inode->i_lock);
3161863a3c6cSAndy Adamson 	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
316229559b11STrond Myklebust 		nfsi->layout->plh_lwb = end_pos;
316379a48a1fSWeston Andros Adamson 		mark_as_dirty = true;
3164863a3c6cSAndy Adamson 		dprintk("%s: Set layoutcommit for inode %lu ",
3165cd841605SFred Isaman 			__func__, inode->i_ino);
316629559b11STrond Myklebust 	} else if (end_pos > nfsi->layout->plh_lwb)
316729559b11STrond Myklebust 		nfsi->layout->plh_lwb = end_pos;
316867af7611STrond Myklebust 	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
3169a9bae566SPeng Tao 		/* references matched in nfs4_layoutcommit_release */
317067af7611STrond Myklebust 		pnfs_get_lseg(lseg);
3171a9bae566SPeng Tao 	}
3172cd841605SFred Isaman 	spin_unlock(&inode->i_lock);
3173acff5880SPeng Tao 	dprintk("%s: lseg %p end_pos %llu\n",
317467af7611STrond Myklebust 		__func__, lseg, nfsi->layout->plh_lwb);
317579a48a1fSWeston Andros Adamson 
317679a48a1fSWeston Andros Adamson 	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
317779a48a1fSWeston Andros Adamson 	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
317879a48a1fSWeston Andros Adamson 	if (mark_as_dirty)
3179cd841605SFred Isaman 		mark_inode_dirty_sync(inode);
3180863a3c6cSAndy Adamson }
3181863a3c6cSAndy Adamson EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
3182863a3c6cSAndy Adamson 
pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data * data)3183db29c089SAndy Adamson void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
3184db29c089SAndy Adamson {
3185db29c089SAndy Adamson 	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
3186db29c089SAndy Adamson 
3187db29c089SAndy Adamson 	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
3188db29c089SAndy Adamson 		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
3189a073dbffSTrond Myklebust 	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
3190db29c089SAndy Adamson }
3191db29c089SAndy Adamson 
3192de4b15c7SAndy Adamson /*
3193de4b15c7SAndy Adamson  * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
3194de4b15c7SAndy Adamson  * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
3195de4b15c7SAndy Adamson  * data to disk to allow the server to recover the data if it crashes.
3196de4b15c7SAndy Adamson  * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
3197de4b15c7SAndy Adamson  * is off, and a COMMIT is sent to a data server, or
3198de4b15c7SAndy Adamson  * if WRITEs to a data server return NFS_DATA_SYNC.
3199de4b15c7SAndy Adamson  */
3200863a3c6cSAndy Adamson int
pnfs_layoutcommit_inode(struct inode * inode,bool sync)3201ef311537SAndy Adamson pnfs_layoutcommit_inode(struct inode *inode, bool sync)
3202863a3c6cSAndy Adamson {
32035f919c9fSChristoph Hellwig 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
3204863a3c6cSAndy Adamson 	struct nfs4_layoutcommit_data *data;
3205863a3c6cSAndy Adamson 	struct nfs_inode *nfsi = NFS_I(inode);
3206863a3c6cSAndy Adamson 	loff_t end_pos;
320771244d9bSTrond Myklebust 	int status;
320871244d9bSTrond Myklebust 
320971244d9bSTrond Myklebust 	if (!pnfs_layoutcommit_outstanding(inode))
321071244d9bSTrond Myklebust 		return 0;
3211863a3c6cSAndy Adamson 
3212863a3c6cSAndy Adamson 	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
3213863a3c6cSAndy Adamson 
321471244d9bSTrond Myklebust 	status = -EAGAIN;
321571244d9bSTrond Myklebust 	if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
321671244d9bSTrond Myklebust 		if (!sync)
321771244d9bSTrond Myklebust 			goto out;
321874316201SNeilBrown 		status = wait_on_bit_lock_action(&nfsi->flags,
321971244d9bSTrond Myklebust 				NFS_INO_LAYOUTCOMMITTING,
322071244d9bSTrond Myklebust 				nfs_wait_bit_killable,
3221f5d39b02SPeter Zijlstra 				TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
322271244d9bSTrond Myklebust 		if (status)
3223de4b15c7SAndy Adamson 			goto out;
3224de4b15c7SAndy Adamson 	}
3225863a3c6cSAndy Adamson 
322671244d9bSTrond Myklebust 	status = -ENOMEM;
322771244d9bSTrond Myklebust 	/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
322863d8a41bSTrond Myklebust 	data = kzalloc(sizeof(*data), nfs_io_gfp_mask());
322971244d9bSTrond Myklebust 	if (!data)
323071244d9bSTrond Myklebust 		goto clear_layoutcommitting;
323192407e75SPeng Tao 
323271244d9bSTrond Myklebust 	status = 0;
323371244d9bSTrond Myklebust 	spin_lock(&inode->i_lock);
323471244d9bSTrond Myklebust 	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
323571244d9bSTrond Myklebust 		goto out_unlock;
323692407e75SPeng Tao 
3237a9bae566SPeng Tao 	INIT_LIST_HEAD(&data->lseg_list);
3238a9bae566SPeng Tao 	pnfs_list_write_lseg(inode, &data->lseg_list);
3239863a3c6cSAndy Adamson 
3240acff5880SPeng Tao 	end_pos = nfsi->layout->plh_lwb;
3241863a3c6cSAndy Adamson 
3242f597c537STrond Myklebust 	nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
324397a728f5STrond Myklebust 	data->cred = get_cred(nfsi->layout->plh_lc_cred);
3244863a3c6cSAndy Adamson 	spin_unlock(&inode->i_lock);
3245863a3c6cSAndy Adamson 
3246863a3c6cSAndy Adamson 	data->args.inode = inode;
3247863a3c6cSAndy Adamson 	nfs_fattr_init(&data->fattr);
3248863a3c6cSAndy Adamson 	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
3249863a3c6cSAndy Adamson 	data->res.fattr = &data->fattr;
32502e18d4d8STrond Myklebust 	if (end_pos != 0)
3251863a3c6cSAndy Adamson 		data->args.lastbytewritten = end_pos - 1;
32522e18d4d8STrond Myklebust 	else
32532e18d4d8STrond Myklebust 		data->args.lastbytewritten = U64_MAX;
3254863a3c6cSAndy Adamson 	data->res.server = NFS_SERVER(inode);
3255863a3c6cSAndy Adamson 
32565f919c9fSChristoph Hellwig 	if (ld->prepare_layoutcommit) {
32575f919c9fSChristoph Hellwig 		status = ld->prepare_layoutcommit(&data->args);
32585f919c9fSChristoph Hellwig 		if (status) {
3259a52458b4SNeilBrown 			put_cred(data->cred);
32605f919c9fSChristoph Hellwig 			spin_lock(&inode->i_lock);
326129559b11STrond Myklebust 			set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
326229559b11STrond Myklebust 			if (end_pos > nfsi->layout->plh_lwb)
32635f919c9fSChristoph Hellwig 				nfsi->layout->plh_lwb = end_pos;
32643471648aSJeff Layton 			goto out_unlock;
32655f919c9fSChristoph Hellwig 		}
32665f919c9fSChristoph Hellwig 	}
32675f919c9fSChristoph Hellwig 
32685f919c9fSChristoph Hellwig 
3269863a3c6cSAndy Adamson 	status = nfs4_proc_layoutcommit(data, sync);
3270863a3c6cSAndy Adamson out:
327192407e75SPeng Tao 	if (status)
327292407e75SPeng Tao 		mark_inode_dirty_sync(inode);
3273863a3c6cSAndy Adamson 	dprintk("<-- %s status %d\n", __func__, status);
3274863a3c6cSAndy Adamson 	return status;
327571244d9bSTrond Myklebust out_unlock:
327671244d9bSTrond Myklebust 	spin_unlock(&inode->i_lock);
327792407e75SPeng Tao 	kfree(data);
327871244d9bSTrond Myklebust clear_layoutcommitting:
327971244d9bSTrond Myklebust 	pnfs_clear_layoutcommitting(inode);
328092407e75SPeng Tao 	goto out;
3281863a3c6cSAndy Adamson }
328272cff449SPeng Tao EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
328382be417aSAndy Adamson 
32845bb89b47STrond Myklebust int
pnfs_generic_sync(struct inode * inode,bool datasync)32855bb89b47STrond Myklebust pnfs_generic_sync(struct inode *inode, bool datasync)
32865bb89b47STrond Myklebust {
32875bb89b47STrond Myklebust 	return pnfs_layoutcommit_inode(inode, true);
32885bb89b47STrond Myklebust }
32895bb89b47STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_sync);
32905bb89b47STrond Myklebust 
pnfs_mdsthreshold_alloc(void)329182be417aSAndy Adamson struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
329282be417aSAndy Adamson {
329382be417aSAndy Adamson 	struct nfs4_threshold *thp;
329482be417aSAndy Adamson 
329563d8a41bSTrond Myklebust 	thp = kzalloc(sizeof(*thp), nfs_io_gfp_mask());
329682be417aSAndy Adamson 	if (!thp) {
329782be417aSAndy Adamson 		dprintk("%s mdsthreshold allocation failed\n", __func__);
329882be417aSAndy Adamson 		return NULL;
329982be417aSAndy Adamson 	}
330082be417aSAndy Adamson 	return thp;
330182be417aSAndy Adamson }
33028733408dSPeng Tao 
3303865a7ecbSPeng Tao #if IS_ENABLED(CONFIG_NFS_V4_2)
33048733408dSPeng Tao int
pnfs_report_layoutstat(struct inode * inode,gfp_t gfp_flags)3305c8ad8894STrond Myklebust pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
33068733408dSPeng Tao {
33078733408dSPeng Tao 	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
33088733408dSPeng Tao 	struct nfs_server *server = NFS_SERVER(inode);
33091bfe3b25SPeng Tao 	struct nfs_inode *nfsi = NFS_I(inode);
33108733408dSPeng Tao 	struct nfs42_layoutstat_data *data;
33118733408dSPeng Tao 	struct pnfs_layout_hdr *hdr;
33128733408dSPeng Tao 	int status = 0;
33138733408dSPeng Tao 
33148733408dSPeng Tao 	if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
33158733408dSPeng Tao 		goto out;
33168733408dSPeng Tao 
33176c5a0d89STrond Myklebust 	if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
33186c5a0d89STrond Myklebust 		goto out;
33196c5a0d89STrond Myklebust 
33201bfe3b25SPeng Tao 	if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
33211bfe3b25SPeng Tao 		goto out;
33221bfe3b25SPeng Tao 
33238733408dSPeng Tao 	spin_lock(&inode->i_lock);
33248733408dSPeng Tao 	if (!NFS_I(inode)->layout) {
33258733408dSPeng Tao 		spin_unlock(&inode->i_lock);
3326f538d0baSTrond Myklebust 		goto out_clear_layoutstats;
33278733408dSPeng Tao 	}
33288733408dSPeng Tao 	hdr = NFS_I(inode)->layout;
33298733408dSPeng Tao 	pnfs_get_layout_hdr(hdr);
33308733408dSPeng Tao 	spin_unlock(&inode->i_lock);
33318733408dSPeng Tao 
3332c8ad8894STrond Myklebust 	data = kzalloc(sizeof(*data), gfp_flags);
33338733408dSPeng Tao 	if (!data) {
33348733408dSPeng Tao 		status = -ENOMEM;
33358733408dSPeng Tao 		goto out_put;
33368733408dSPeng Tao 	}
33378733408dSPeng Tao 
33388733408dSPeng Tao 	data->args.fh = NFS_FH(inode);
33398733408dSPeng Tao 	data->args.inode = inode;
33408733408dSPeng Tao 	status = ld->prepare_layoutstats(&data->args);
33418733408dSPeng Tao 	if (status)
33428733408dSPeng Tao 		goto out_free;
33438733408dSPeng Tao 
33448733408dSPeng Tao 	status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
33458733408dSPeng Tao 
33468733408dSPeng Tao out:
33478733408dSPeng Tao 	dprintk("%s returns %d\n", __func__, status);
33488733408dSPeng Tao 	return status;
33498733408dSPeng Tao 
33508733408dSPeng Tao out_free:
33518733408dSPeng Tao 	kfree(data);
33528733408dSPeng Tao out_put:
33538733408dSPeng Tao 	pnfs_put_layout_hdr(hdr);
3354f538d0baSTrond Myklebust out_clear_layoutstats:
33551bfe3b25SPeng Tao 	smp_mb__before_atomic();
33561bfe3b25SPeng Tao 	clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
33571bfe3b25SPeng Tao 	smp_mb__after_atomic();
33588733408dSPeng Tao 	goto out;
33598733408dSPeng Tao }
33608733408dSPeng Tao EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
3361865a7ecbSPeng Tao #endif
3362bbf58bf3STrond Myklebust 
3363bbf58bf3STrond Myklebust unsigned int layoutstats_timer;
3364bbf58bf3STrond Myklebust module_param(layoutstats_timer, uint, 0644);
3365bbf58bf3STrond Myklebust EXPORT_SYMBOL_GPL(layoutstats_timer);
3366