185e174baSRicardo Labiaga /*
285e174baSRicardo Labiaga * pNFS functions to call and manage layout drivers.
385e174baSRicardo Labiaga *
485e174baSRicardo Labiaga * Copyright (c) 2002 [year of first publication]
585e174baSRicardo Labiaga * The Regents of the University of Michigan
685e174baSRicardo Labiaga * All Rights Reserved
785e174baSRicardo Labiaga *
885e174baSRicardo Labiaga * Dean Hildebrand <dhildebz@umich.edu>
985e174baSRicardo Labiaga *
1085e174baSRicardo Labiaga * Permission is granted to use, copy, create derivative works, and
1185e174baSRicardo Labiaga * redistribute this software and such derivative works for any purpose,
1285e174baSRicardo Labiaga * so long as the name of the University of Michigan is not used in
1385e174baSRicardo Labiaga * any advertising or publicity pertaining to the use or distribution
1485e174baSRicardo Labiaga * of this software without specific, written prior authorization. If
1585e174baSRicardo Labiaga * the above copyright notice or any other identification of the
1685e174baSRicardo Labiaga * University of Michigan is included in any copy of any portion of
1785e174baSRicardo Labiaga * this software, then the disclaimer below must also be included.
1885e174baSRicardo Labiaga *
1985e174baSRicardo Labiaga * This software is provided as is, without representation or warranty
2085e174baSRicardo Labiaga * of any kind either express or implied, including without limitation
2185e174baSRicardo Labiaga * the implied warranties of merchantability, fitness for a particular
2285e174baSRicardo Labiaga * purpose, or noninfringement. The Regents of the University of
2385e174baSRicardo Labiaga * Michigan shall not be liable for any damages, including special,
2485e174baSRicardo Labiaga * indirect, incidental, or consequential damages, with respect to any
2585e174baSRicardo Labiaga * claim arising out of or in connection with the use of the software,
2685e174baSRicardo Labiaga * even if it has been or is hereafter advised of the possibility of
2785e174baSRicardo Labiaga * such damages.
2885e174baSRicardo Labiaga */
2985e174baSRicardo Labiaga
3085e174baSRicardo Labiaga #include <linux/nfs_fs.h>
31493292ddSTrond Myklebust #include <linux/nfs_page.h>
32143cb494SPaul Gortmaker #include <linux/module.h>
33ca440c38SJeff Layton #include <linux/sort.h>
34974cec8cSAndy Adamson #include "internal.h"
3585e174baSRicardo Labiaga #include "pnfs.h"
3664419a9bSAndy Adamson #include "iostat.h"
37cc668ab3STrond Myklebust #include "nfs4trace.h"
3840dd4b7aSTrond Myklebust #include "delegation.h"
398733408dSPeng Tao #include "nfs42.h"
401b146fcfSFred Isaman #include "nfs4_fs.h"
4185e174baSRicardo Labiaga
4285e174baSRicardo Labiaga #define NFSDBG_FACILITY NFSDBG_PNFS
4325c75333STrond Myklebust #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
4485e174baSRicardo Labiaga
4502c35fcaSFred Isaman /* Locking:
4602c35fcaSFred Isaman *
4702c35fcaSFred Isaman * pnfs_spinlock:
4802c35fcaSFred Isaman * protects pnfs_modules_tbl.
4902c35fcaSFred Isaman */
5002c35fcaSFred Isaman static DEFINE_SPINLOCK(pnfs_spinlock);
5102c35fcaSFred Isaman
5202c35fcaSFred Isaman /*
5302c35fcaSFred Isaman * pnfs_modules_tbl holds all pnfs modules
5402c35fcaSFred Isaman */
5502c35fcaSFred Isaman static LIST_HEAD(pnfs_modules_tbl);
5602c35fcaSFred Isaman
5713c13a6aSTrond Myklebust static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
5868f74479STrond Myklebust static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
5968f74479STrond Myklebust struct list_head *free_me,
6068f74479STrond Myklebust const struct pnfs_layout_range *range,
6168f74479STrond Myklebust u32 seq);
62fe1cf946STrond Myklebust static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
63fe1cf946STrond Myklebust struct list_head *tmp_list);
64aa1e0e3aSPeng Tao
6502c35fcaSFred Isaman /* Return the registered pnfs layout driver module matching given id */
6602c35fcaSFred Isaman static struct pnfs_layoutdriver_type *
find_pnfs_driver_locked(u32 id)6702c35fcaSFred Isaman find_pnfs_driver_locked(u32 id)
6802c35fcaSFred Isaman {
6902c35fcaSFred Isaman struct pnfs_layoutdriver_type *local;
7002c35fcaSFred Isaman
7102c35fcaSFred Isaman list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
7202c35fcaSFred Isaman if (local->id == id)
7302c35fcaSFred Isaman goto out;
7402c35fcaSFred Isaman local = NULL;
7502c35fcaSFred Isaman out:
7602c35fcaSFred Isaman dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
7702c35fcaSFred Isaman return local;
7802c35fcaSFred Isaman }
7902c35fcaSFred Isaman
8085e174baSRicardo Labiaga static struct pnfs_layoutdriver_type *
find_pnfs_driver(u32 id)8185e174baSRicardo Labiaga find_pnfs_driver(u32 id)
8285e174baSRicardo Labiaga {
8302c35fcaSFred Isaman struct pnfs_layoutdriver_type *local;
8402c35fcaSFred Isaman
8502c35fcaSFred Isaman spin_lock(&pnfs_spinlock);
8602c35fcaSFred Isaman local = find_pnfs_driver_locked(id);
870a9c63faSTrond Myklebust if (local != NULL && !try_module_get(local->owner)) {
880a9c63faSTrond Myklebust dprintk("%s: Could not grab reference on module\n", __func__);
890a9c63faSTrond Myklebust local = NULL;
900a9c63faSTrond Myklebust }
9102c35fcaSFred Isaman spin_unlock(&pnfs_spinlock);
9202c35fcaSFred Isaman return local;
9385e174baSRicardo Labiaga }
9485e174baSRicardo Labiaga
pnfs_find_layoutdriver(u32 id)957c9d845fSTrond Myklebust const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id)
967c9d845fSTrond Myklebust {
977c9d845fSTrond Myklebust return find_pnfs_driver(id);
987c9d845fSTrond Myklebust }
997c9d845fSTrond Myklebust
pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type * ld)1007c9d845fSTrond Myklebust void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld)
1017c9d845fSTrond Myklebust {
1027c9d845fSTrond Myklebust if (ld)
1037c9d845fSTrond Myklebust module_put(ld->owner);
1047c9d845fSTrond Myklebust }
1057c9d845fSTrond Myklebust
10685e174baSRicardo Labiaga void
unset_pnfs_layoutdriver(struct nfs_server * nfss)10785e174baSRicardo Labiaga unset_pnfs_layoutdriver(struct nfs_server *nfss)
10885e174baSRicardo Labiaga {
109738fd0f3SBenny Halevy if (nfss->pnfs_curr_ld) {
110738fd0f3SBenny Halevy if (nfss->pnfs_curr_ld->clear_layoutdriver)
111738fd0f3SBenny Halevy nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
1122a4c8994STrond Myklebust /* Decrement the MDS count. Purge the deviceid cache if zero */
1132a4c8994STrond Myklebust if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
1142a4c8994STrond Myklebust nfs4_deviceid_purge_client(nfss->nfs_client);
11502c35fcaSFred Isaman module_put(nfss->pnfs_curr_ld->owner);
116738fd0f3SBenny Halevy }
11785e174baSRicardo Labiaga nfss->pnfs_curr_ld = NULL;
11885e174baSRicardo Labiaga }
11985e174baSRicardo Labiaga
12085e174baSRicardo Labiaga /*
121ca440c38SJeff Layton * When the server sends a list of layout types, we choose one in the order
122ca440c38SJeff Layton * given in the list below.
123ca440c38SJeff Layton *
124ca440c38SJeff Layton * FIXME: should this list be configurable in some fashion? module param?
125ca440c38SJeff Layton * mount option? something else?
126ca440c38SJeff Layton */
127ca440c38SJeff Layton static const u32 ld_prefs[] = {
128ca440c38SJeff Layton LAYOUT_SCSI,
129ca440c38SJeff Layton LAYOUT_BLOCK_VOLUME,
130ca440c38SJeff Layton LAYOUT_OSD2_OBJECTS,
131ca440c38SJeff Layton LAYOUT_FLEX_FILES,
132ca440c38SJeff Layton LAYOUT_NFSV4_1_FILES,
133ca440c38SJeff Layton 0
134ca440c38SJeff Layton };
135ca440c38SJeff Layton
136ca440c38SJeff Layton static int
ld_cmp(const void * e1,const void * e2)137ca440c38SJeff Layton ld_cmp(const void *e1, const void *e2)
138ca440c38SJeff Layton {
139ca440c38SJeff Layton u32 ld1 = *((u32 *)e1);
140ca440c38SJeff Layton u32 ld2 = *((u32 *)e2);
141ca440c38SJeff Layton int i;
142ca440c38SJeff Layton
143ca440c38SJeff Layton for (i = 0; ld_prefs[i] != 0; i++) {
144ca440c38SJeff Layton if (ld1 == ld_prefs[i])
145ca440c38SJeff Layton return -1;
146ca440c38SJeff Layton
147ca440c38SJeff Layton if (ld2 == ld_prefs[i])
148ca440c38SJeff Layton return 1;
149ca440c38SJeff Layton }
150ca440c38SJeff Layton return 0;
151ca440c38SJeff Layton }
152ca440c38SJeff Layton
153ca440c38SJeff Layton /*
15485e174baSRicardo Labiaga * Try to set the server's pnfs module to the pnfs layout type specified by id.
15585e174baSRicardo Labiaga * Currently only one pNFS layout driver per filesystem is supported.
15685e174baSRicardo Labiaga *
1573132e49eSJeff Layton * @ids array of layout types supported by MDS.
15885e174baSRicardo Labiaga */
15985e174baSRicardo Labiaga void
set_pnfs_layoutdriver(struct nfs_server * server,const struct nfs_fh * mntfh,struct nfs_fsinfo * fsinfo)160738fd0f3SBenny Halevy set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
161ca440c38SJeff Layton struct nfs_fsinfo *fsinfo)
16285e174baSRicardo Labiaga {
16385e174baSRicardo Labiaga struct pnfs_layoutdriver_type *ld_type = NULL;
1643132e49eSJeff Layton u32 id;
165ca440c38SJeff Layton int i;
16685e174baSRicardo Labiaga
16719274716SAnna Schumaker if (fsinfo->nlayouttypes == 0)
16819274716SAnna Schumaker goto out_no_driver;
16985e174baSRicardo Labiaga if (!(server->nfs_client->cl_exchange_flags &
17085e174baSRicardo Labiaga (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
1713132e49eSJeff Layton printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
1723132e49eSJeff Layton __func__, server->nfs_client->cl_exchange_flags);
17385e174baSRicardo Labiaga goto out_no_driver;
17485e174baSRicardo Labiaga }
1753132e49eSJeff Layton
176ca440c38SJeff Layton sort(fsinfo->layouttype, fsinfo->nlayouttypes,
177ca440c38SJeff Layton sizeof(*fsinfo->layouttype), ld_cmp, NULL);
1783132e49eSJeff Layton
179ca440c38SJeff Layton for (i = 0; i < fsinfo->nlayouttypes; i++) {
180ca440c38SJeff Layton id = fsinfo->layouttype[i];
18185e174baSRicardo Labiaga ld_type = find_pnfs_driver(id);
18285e174baSRicardo Labiaga if (!ld_type) {
183ca440c38SJeff Layton request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
184ca440c38SJeff Layton id);
18585e174baSRicardo Labiaga ld_type = find_pnfs_driver(id);
1863132e49eSJeff Layton }
187ca440c38SJeff Layton if (ld_type)
188ca440c38SJeff Layton break;
189ca440c38SJeff Layton }
1903132e49eSJeff Layton
19185e174baSRicardo Labiaga if (!ld_type) {
192ca440c38SJeff Layton dprintk("%s: No pNFS module found!\n", __func__);
19385e174baSRicardo Labiaga goto out_no_driver;
19485e174baSRicardo Labiaga }
1953132e49eSJeff Layton
19685e174baSRicardo Labiaga server->pnfs_curr_ld = ld_type;
197738fd0f3SBenny Halevy if (ld_type->set_layoutdriver
198738fd0f3SBenny Halevy && ld_type->set_layoutdriver(server, mntfh)) {
199a030889aSWeston Andros Adamson printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
200a030889aSWeston Andros Adamson "driver %u.\n", __func__, id);
201738fd0f3SBenny Halevy module_put(ld_type->owner);
202738fd0f3SBenny Halevy goto out_no_driver;
203738fd0f3SBenny Halevy }
2042a4c8994STrond Myklebust /* Bump the MDS count */
2052a4c8994STrond Myklebust atomic_inc(&server->nfs_client->cl_mds_count);
206ea8eecddSChristoph Hellwig
20785e174baSRicardo Labiaga dprintk("%s: pNFS module for %u set\n", __func__, id);
20885e174baSRicardo Labiaga return;
20985e174baSRicardo Labiaga
21085e174baSRicardo Labiaga out_no_driver:
21185e174baSRicardo Labiaga dprintk("%s: Using NFSv4 I/O\n", __func__);
21285e174baSRicardo Labiaga server->pnfs_curr_ld = NULL;
21385e174baSRicardo Labiaga }
21402c35fcaSFred Isaman
21502c35fcaSFred Isaman int
pnfs_register_layoutdriver(struct pnfs_layoutdriver_type * ld_type)21602c35fcaSFred Isaman pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
21702c35fcaSFred Isaman {
21802c35fcaSFred Isaman int status = -EINVAL;
21902c35fcaSFred Isaman struct pnfs_layoutdriver_type *tmp;
22002c35fcaSFred Isaman
22102c35fcaSFred Isaman if (ld_type->id == 0) {
222a030889aSWeston Andros Adamson printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
22302c35fcaSFred Isaman return status;
22402c35fcaSFred Isaman }
225b1f69b75SAndy Adamson if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
226a030889aSWeston Andros Adamson printk(KERN_ERR "NFS: %s Layout driver must provide "
227b1f69b75SAndy Adamson "alloc_lseg and free_lseg.\n", __func__);
228b1f69b75SAndy Adamson return status;
229b1f69b75SAndy Adamson }
23002c35fcaSFred Isaman
23102c35fcaSFred Isaman spin_lock(&pnfs_spinlock);
23202c35fcaSFred Isaman tmp = find_pnfs_driver_locked(ld_type->id);
23302c35fcaSFred Isaman if (!tmp) {
23402c35fcaSFred Isaman list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
23502c35fcaSFred Isaman status = 0;
23602c35fcaSFred Isaman dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
23702c35fcaSFred Isaman ld_type->name);
23802c35fcaSFred Isaman } else {
239a030889aSWeston Andros Adamson printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
24002c35fcaSFred Isaman __func__, ld_type->id);
24102c35fcaSFred Isaman }
24202c35fcaSFred Isaman spin_unlock(&pnfs_spinlock);
24302c35fcaSFred Isaman
24402c35fcaSFred Isaman return status;
24502c35fcaSFred Isaman }
24602c35fcaSFred Isaman EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
24702c35fcaSFred Isaman
24802c35fcaSFred Isaman void
pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type * ld_type)24902c35fcaSFred Isaman pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
25002c35fcaSFred Isaman {
25102c35fcaSFred Isaman dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
25202c35fcaSFred Isaman spin_lock(&pnfs_spinlock);
25302c35fcaSFred Isaman list_del(&ld_type->pnfs_tblid);
25402c35fcaSFred Isaman spin_unlock(&pnfs_spinlock);
25502c35fcaSFred Isaman }
25602c35fcaSFred Isaman EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
257e5e94017SBenny Halevy
258b1f69b75SAndy Adamson /*
259b1f69b75SAndy Adamson * pNFS client layout cache
260b1f69b75SAndy Adamson */
261b1f69b75SAndy Adamson
262cc6e5340SFred Isaman /* Need to hold i_lock if caller does not already hold reference */
26343f1b3daSFred Isaman void
pnfs_get_layout_hdr(struct pnfs_layout_hdr * lo)26470c3bd2bSTrond Myklebust pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
265e5e94017SBenny Halevy {
2662b28a7beSElena Reshetova refcount_inc(&lo->plh_refcount);
267e5e94017SBenny Halevy }
268e5e94017SBenny Halevy
269636fb9c8SBenny Halevy static struct pnfs_layout_hdr *
pnfs_alloc_layout_hdr(struct inode * ino,gfp_t gfp_flags)270636fb9c8SBenny Halevy pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
271636fb9c8SBenny Halevy {
272636fb9c8SBenny Halevy struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
27357934278STrond Myklebust return ld->alloc_layout_hdr(ino, gfp_flags);
274636fb9c8SBenny Halevy }
275636fb9c8SBenny Halevy
276636fb9c8SBenny Halevy static void
pnfs_free_layout_hdr(struct pnfs_layout_hdr * lo)277636fb9c8SBenny Halevy pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
278636fb9c8SBenny Halevy {
2799c626381STrond Myklebust struct nfs_server *server = NFS_SERVER(lo->plh_inode);
2809c626381STrond Myklebust struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
2819c626381STrond Myklebust
282cf6605d1STrond Myklebust if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
2839c626381STrond Myklebust struct nfs_client *clp = server->nfs_client;
2849c626381STrond Myklebust
2859c626381STrond Myklebust spin_lock(&clp->cl_lock);
286cf6605d1STrond Myklebust list_del_rcu(&lo->plh_layouts);
2879c626381STrond Myklebust spin_unlock(&clp->cl_lock);
2889c626381STrond Myklebust }
289a52458b4SNeilBrown put_cred(lo->plh_lc_cred);
29057934278STrond Myklebust return ld->free_layout_hdr(lo);
291636fb9c8SBenny Halevy }
292636fb9c8SBenny Halevy
293e5e94017SBenny Halevy static void
pnfs_detach_layout_hdr(struct pnfs_layout_hdr * lo)2946622c3eaSTrond Myklebust pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
295e5e94017SBenny Halevy {
296bb346f63STrond Myklebust struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
297e5e94017SBenny Halevy dprintk("%s: freeing layout cache %p\n", __func__, lo);
298bb346f63STrond Myklebust nfsi->layout = NULL;
299bb346f63STrond Myklebust /* Reset MDS Threshold I/O counters */
300bb346f63STrond Myklebust nfsi->write_io = 0;
301bb346f63STrond Myklebust nfsi->read_io = 0;
302e5e94017SBenny Halevy }
303e5e94017SBenny Halevy
304b1f69b75SAndy Adamson void
pnfs_put_layout_hdr(struct pnfs_layout_hdr * lo)30570c3bd2bSTrond Myklebust pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
306974cec8cSAndy Adamson {
3079c6376ebSTrond Myklebust struct inode *inode;
308b6d49ecdSTrond Myklebust unsigned long i_state;
309cc6e5340SFred Isaman
3109c6376ebSTrond Myklebust if (!lo)
3119c6376ebSTrond Myklebust return;
3129c6376ebSTrond Myklebust inode = lo->plh_inode;
31313c13a6aSTrond Myklebust pnfs_layoutreturn_before_put_layout_hdr(lo);
31413c13a6aSTrond Myklebust
3152b28a7beSElena Reshetova if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
316566f8737SPeng Tao if (!list_empty(&lo->plh_segs))
317566f8737SPeng Tao WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
3186622c3eaSTrond Myklebust pnfs_detach_layout_hdr(lo);
319b6d49ecdSTrond Myklebust i_state = inode->i_state;
320974cec8cSAndy Adamson spin_unlock(&inode->i_lock);
3216622c3eaSTrond Myklebust pnfs_free_layout_hdr(lo);
322b6d49ecdSTrond Myklebust /* Notify pnfs_destroy_layout_final() that we're done */
323b6d49ecdSTrond Myklebust if (i_state & (I_FREEING | I_CLEAR))
324b6d49ecdSTrond Myklebust wake_up_var(lo);
325974cec8cSAndy Adamson }
326cc6e5340SFred Isaman }
327974cec8cSAndy Adamson
328b5fdf841STrond Myklebust static struct inode *
pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr * lo)329b5fdf841STrond Myklebust pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
330b5fdf841STrond Myklebust {
331b5fdf841STrond Myklebust struct inode *inode = igrab(lo->plh_inode);
332b5fdf841STrond Myklebust if (inode)
333b5fdf841STrond Myklebust return inode;
334b5fdf841STrond Myklebust set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
335b5fdf841STrond Myklebust return NULL;
336b5fdf841STrond Myklebust }
337b5fdf841STrond Myklebust
3381bcf34fdSTrond Myklebust /*
3391bcf34fdSTrond Myklebust * Compare 2 layout stateid sequence ids, to see which is newer,
3401bcf34fdSTrond Myklebust * taking into account wraparound issues.
3411bcf34fdSTrond Myklebust */
pnfs_seqid_is_newer(u32 s1,u32 s2)3421bcf34fdSTrond Myklebust static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
3431bcf34fdSTrond Myklebust {
3441bcf34fdSTrond Myklebust return (s32)(s1 - s2) > 0;
3451bcf34fdSTrond Myklebust }
3461bcf34fdSTrond Myklebust
pnfs_barrier_update(struct pnfs_layout_hdr * lo,u32 newseq)3471bcf34fdSTrond Myklebust static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq)
3481bcf34fdSTrond Myklebust {
34945baadaaSTrond Myklebust if (pnfs_seqid_is_newer(newseq, lo->plh_barrier) || !lo->plh_barrier)
3501bcf34fdSTrond Myklebust lo->plh_barrier = newseq;
3511bcf34fdSTrond Myklebust }
3521bcf34fdSTrond Myklebust
353ae5a459dSTrond Myklebust static void
pnfs_set_plh_return_info(struct pnfs_layout_hdr * lo,enum pnfs_iomode iomode,u32 seq)3544aab9732STrond Myklebust pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
3554aab9732STrond Myklebust u32 seq)
3564aab9732STrond Myklebust {
3574aab9732STrond Myklebust if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
3584aab9732STrond Myklebust iomode = IOMODE_ANY;
3594aab9732STrond Myklebust lo->plh_return_iomode = iomode;
3604aab9732STrond Myklebust set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
361e20772cbSTrond Myklebust /*
362e20772cbSTrond Myklebust * We must set lo->plh_return_seq to avoid livelocks with
363e20772cbSTrond Myklebust * pnfs_layout_need_return()
364e20772cbSTrond Myklebust */
365e20772cbSTrond Myklebust if (seq == 0)
366e20772cbSTrond Myklebust seq = be32_to_cpu(lo->plh_stateid.seqid);
367e20772cbSTrond Myklebust if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq))
3684aab9732STrond Myklebust lo->plh_return_seq = seq;
3691bcf34fdSTrond Myklebust pnfs_barrier_update(lo, seq);
3704aab9732STrond Myklebust }
3714aab9732STrond Myklebust
3724aab9732STrond Myklebust static void
pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr * lo)373ae5a459dSTrond Myklebust pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
374ae5a459dSTrond Myklebust {
3755466d214STrond Myklebust struct pnfs_layout_segment *lseg;
376ae5a459dSTrond Myklebust lo->plh_return_iomode = 0;
377ae5a459dSTrond Myklebust lo->plh_return_seq = 0;
378ae5a459dSTrond Myklebust clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
3795466d214STrond Myklebust list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
3805466d214STrond Myklebust if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
3815466d214STrond Myklebust continue;
3825466d214STrond Myklebust pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
3835466d214STrond Myklebust }
384ae5a459dSTrond Myklebust }
385ae5a459dSTrond Myklebust
pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr * lo)386362fb578STrond Myklebust static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
387362fb578STrond Myklebust {
388362fb578STrond Myklebust clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
389362fb578STrond Myklebust clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags);
390362fb578STrond Myklebust smp_mb__after_atomic();
391362fb578STrond Myklebust wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
392362fb578STrond Myklebust rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
393362fb578STrond Myklebust }
394362fb578STrond Myklebust
395fe1cf946STrond Myklebust static void
pnfs_clear_lseg_state(struct pnfs_layout_segment * lseg,struct list_head * free_me)396fe1cf946STrond Myklebust pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
397fe1cf946STrond Myklebust struct list_head *free_me)
398fe1cf946STrond Myklebust {
399fe1cf946STrond Myklebust clear_bit(NFS_LSEG_ROC, &lseg->pls_flags);
400fe1cf946STrond Myklebust clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
401fe1cf946STrond Myklebust if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags))
402fe1cf946STrond Myklebust pnfs_lseg_dec_and_remove_zero(lseg, free_me);
403fe1cf946STrond Myklebust if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
404fe1cf946STrond Myklebust pnfs_lseg_dec_and_remove_zero(lseg, free_me);
405fe1cf946STrond Myklebust }
406fe1cf946STrond Myklebust
4072454dfeaSTrond Myklebust /*
40830cb3ee2STrond Myklebust * Update the seqid of a layout stateid after receiving
40930cb3ee2STrond Myklebust * NFS4ERR_OLD_STATEID
4107380020eSTrond Myklebust */
nfs4_layout_refresh_old_stateid(nfs4_stateid * dst,struct pnfs_layout_range * dst_range,struct inode * inode)41130cb3ee2STrond Myklebust bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
412ecf84026STrond Myklebust struct pnfs_layout_range *dst_range,
413ecf84026STrond Myklebust struct inode *inode)
4147380020eSTrond Myklebust {
4157380020eSTrond Myklebust struct pnfs_layout_hdr *lo;
416c16467dcSTrond Myklebust struct pnfs_layout_range range = {
417c16467dcSTrond Myklebust .iomode = IOMODE_ANY,
418c16467dcSTrond Myklebust .offset = 0,
419c16467dcSTrond Myklebust .length = NFS4_MAX_UINT64,
420c16467dcSTrond Myklebust };
4217380020eSTrond Myklebust bool ret = false;
422c16467dcSTrond Myklebust LIST_HEAD(head);
423c16467dcSTrond Myklebust int err;
4247380020eSTrond Myklebust
4257380020eSTrond Myklebust spin_lock(&inode->i_lock);
4267380020eSTrond Myklebust lo = NFS_I(inode)->layout;
42730cb3ee2STrond Myklebust if (lo && pnfs_layout_is_valid(lo) &&
42830cb3ee2STrond Myklebust nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
42930cb3ee2STrond Myklebust /* Is our call using the most recent seqid? If so, bump it */
43030cb3ee2STrond Myklebust if (!nfs4_stateid_is_newer(&lo->plh_stateid, dst)) {
43130cb3ee2STrond Myklebust nfs4_stateid_seqid_inc(dst);
43230cb3ee2STrond Myklebust ret = true;
43330cb3ee2STrond Myklebust goto out;
43430cb3ee2STrond Myklebust }
43530cb3ee2STrond Myklebust /* Try to update the seqid to the most recent */
436c16467dcSTrond Myklebust err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
437c16467dcSTrond Myklebust if (err != -EBUSY) {
4387380020eSTrond Myklebust dst->seqid = lo->plh_stateid.seqid;
439ecf84026STrond Myklebust *dst_range = range;
4407380020eSTrond Myklebust ret = true;
4417380020eSTrond Myklebust }
442c16467dcSTrond Myklebust }
44330cb3ee2STrond Myklebust out:
4447380020eSTrond Myklebust spin_unlock(&inode->i_lock);
445c16467dcSTrond Myklebust pnfs_free_lseg_list(&head);
4467380020eSTrond Myklebust return ret;
4477380020eSTrond Myklebust }
4487380020eSTrond Myklebust
4497380020eSTrond Myklebust /*
4502454dfeaSTrond Myklebust * Mark a pnfs_layout_hdr and all associated layout segments as invalid
4512454dfeaSTrond Myklebust *
4522454dfeaSTrond Myklebust * In order to continue using the pnfs_layout_hdr, a full recovery
4532454dfeaSTrond Myklebust * is required.
4542454dfeaSTrond Myklebust * Note that caller must hold inode->i_lock.
4552454dfeaSTrond Myklebust */
4565f46be04STrond Myklebust int
pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr * lo,struct list_head * lseg_list)4572454dfeaSTrond Myklebust pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
4582454dfeaSTrond Myklebust struct list_head *lseg_list)
4592454dfeaSTrond Myklebust {
4602454dfeaSTrond Myklebust struct pnfs_layout_range range = {
4612454dfeaSTrond Myklebust .iomode = IOMODE_ANY,
4622454dfeaSTrond Myklebust .offset = 0,
4632454dfeaSTrond Myklebust .length = NFS4_MAX_UINT64,
4642454dfeaSTrond Myklebust };
465fe1cf946STrond Myklebust struct pnfs_layout_segment *lseg, *next;
4662454dfeaSTrond Myklebust
4672454dfeaSTrond Myklebust set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
468fe1cf946STrond Myklebust list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
469fe1cf946STrond Myklebust pnfs_clear_lseg_state(lseg, lseg_list);
4705466d214STrond Myklebust pnfs_clear_layoutreturn_info(lo);
47168f74479STrond Myklebust pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
472880265c7STrond Myklebust set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
473362fb578STrond Myklebust if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
474362fb578STrond Myklebust !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
475362fb578STrond Myklebust pnfs_clear_layoutreturn_waitbit(lo);
476fe1cf946STrond Myklebust return !list_empty(&lo->plh_segs);
4772454dfeaSTrond Myklebust }
4782454dfeaSTrond Myklebust
479b9e028fdSTrond Myklebust static int
pnfs_iomode_to_fail_bit(u32 iomode)480b9e028fdSTrond Myklebust pnfs_iomode_to_fail_bit(u32 iomode)
481b9e028fdSTrond Myklebust {
482b9e028fdSTrond Myklebust return iomode == IOMODE_RW ?
483b9e028fdSTrond Myklebust NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
484b9e028fdSTrond Myklebust }
485b9e028fdSTrond Myklebust
486b9e028fdSTrond Myklebust static void
pnfs_layout_set_fail_bit(struct pnfs_layout_hdr * lo,int fail_bit)4873e621214STrond Myklebust pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
488b9e028fdSTrond Myklebust {
48925c75333STrond Myklebust lo->plh_retry_timestamp = jiffies;
49039e88fcfSYanchuan Nian if (!test_and_set_bit(fail_bit, &lo->plh_flags))
4912b28a7beSElena Reshetova refcount_inc(&lo->plh_refcount);
4923e621214STrond Myklebust }
4933e621214STrond Myklebust
4943e621214STrond Myklebust static void
pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr * lo,int fail_bit)4953e621214STrond Myklebust pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
4963e621214STrond Myklebust {
4973e621214STrond Myklebust if (test_and_clear_bit(fail_bit, &lo->plh_flags))
4982b28a7beSElena Reshetova refcount_dec(&lo->plh_refcount);
4993e621214STrond Myklebust }
5003e621214STrond Myklebust
5013e621214STrond Myklebust static void
pnfs_layout_io_set_failed(struct pnfs_layout_hdr * lo,u32 iomode)5023e621214STrond Myklebust pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
5033e621214STrond Myklebust {
5043e621214STrond Myklebust struct inode *inode = lo->plh_inode;
505115ce575STrond Myklebust struct pnfs_layout_range range = {
506115ce575STrond Myklebust .iomode = iomode,
507115ce575STrond Myklebust .offset = 0,
508115ce575STrond Myklebust .length = NFS4_MAX_UINT64,
509115ce575STrond Myklebust };
510115ce575STrond Myklebust LIST_HEAD(head);
5113e621214STrond Myklebust
5123e621214STrond Myklebust spin_lock(&inode->i_lock);
5133e621214STrond Myklebust pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
51428d4411fSOlga Kornievskaia pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
5153e621214STrond Myklebust spin_unlock(&inode->i_lock);
516115ce575STrond Myklebust pnfs_free_lseg_list(&head);
517b9e028fdSTrond Myklebust dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
518b9e028fdSTrond Myklebust iomode == IOMODE_RW ? "RW" : "READ");
519b9e028fdSTrond Myklebust }
520b9e028fdSTrond Myklebust
521b9e028fdSTrond Myklebust static bool
pnfs_layout_io_test_failed(struct pnfs_layout_hdr * lo,u32 iomode)522b9e028fdSTrond Myklebust pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
523b9e028fdSTrond Myklebust {
52425c75333STrond Myklebust unsigned long start, end;
5253e621214STrond Myklebust int fail_bit = pnfs_iomode_to_fail_bit(iomode);
5263e621214STrond Myklebust
5273e621214STrond Myklebust if (test_bit(fail_bit, &lo->plh_flags) == 0)
52825c75333STrond Myklebust return false;
52925c75333STrond Myklebust end = jiffies;
53025c75333STrond Myklebust start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
53125c75333STrond Myklebust if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
53225c75333STrond Myklebust /* It is time to retry the failed layoutgets */
5333e621214STrond Myklebust pnfs_layout_clear_fail_bit(lo, fail_bit);
53425c75333STrond Myklebust return false;
53525c75333STrond Myklebust }
53625c75333STrond Myklebust return true;
537b9e028fdSTrond Myklebust }
538b9e028fdSTrond Myklebust
539974cec8cSAndy Adamson static void
pnfs_init_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg,const struct pnfs_layout_range * range,const nfs4_stateid * stateid)540119cef97STrond Myklebust pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
541119cef97STrond Myklebust const struct pnfs_layout_range *range,
542119cef97STrond Myklebust const nfs4_stateid *stateid)
543974cec8cSAndy Adamson {
544566052c5SFred Isaman INIT_LIST_HEAD(&lseg->pls_list);
545a9bae566SPeng Tao INIT_LIST_HEAD(&lseg->pls_lc_list);
546a9901899STrond Myklebust INIT_LIST_HEAD(&lseg->pls_commits);
547eba6dd69SElena Reshetova refcount_set(&lseg->pls_refcount, 1);
5484541d16cSFred Isaman set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
549566052c5SFred Isaman lseg->pls_layout = lo;
550119cef97STrond Myklebust lseg->pls_range = *range;
551119cef97STrond Myklebust lseg->pls_seq = be32_to_cpu(stateid->seqid);
552974cec8cSAndy Adamson }
553974cec8cSAndy Adamson
pnfs_free_lseg(struct pnfs_layout_segment * lseg)554905ca191STrond Myklebust static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
555974cec8cSAndy Adamson {
55668f74479STrond Myklebust if (lseg != NULL) {
55768f74479STrond Myklebust struct inode *inode = lseg->pls_layout->plh_inode;
55868f74479STrond Myklebust NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg);
55968f74479STrond Myklebust }
560974cec8cSAndy Adamson }
561974cec8cSAndy Adamson
562d684d2aeSFred Isaman static void
pnfs_layout_remove_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg)56357036a37STrond Myklebust pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
56457036a37STrond Myklebust struct pnfs_layout_segment *lseg)
565974cec8cSAndy Adamson {
566d20581aaSBenny Halevy WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
567d684d2aeSFred Isaman list_del_init(&lseg->pls_list);
5688f0d27dcSTrond Myklebust /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
5692b28a7beSElena Reshetova refcount_dec(&lo->plh_refcount);
570abb3e1c8STrond Myklebust if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
571abb3e1c8STrond Myklebust return;
5727b650994STrond Myklebust if (list_empty(&lo->plh_segs) &&
5737b650994STrond Myklebust !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
5747b650994STrond Myklebust !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
575334a8f37STrond Myklebust if (atomic_read(&lo->plh_outstanding) == 0)
5762d148c7eSTrond Myklebust set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
577173f77e9STrond Myklebust clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
5782d148c7eSTrond Myklebust }
5794541d16cSFred Isaman }
580d684d2aeSFred Isaman
58168f74479STrond Myklebust static bool
pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg)58268f74479STrond Myklebust pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo,
58368f74479STrond Myklebust struct pnfs_layout_segment *lseg)
58468f74479STrond Myklebust {
58568f74479STrond Myklebust if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
58668f74479STrond Myklebust pnfs_layout_is_valid(lo)) {
5874aab9732STrond Myklebust pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
58868f74479STrond Myklebust list_move_tail(&lseg->pls_list, &lo->plh_return_segs);
58968f74479STrond Myklebust return true;
59068f74479STrond Myklebust }
59168f74479STrond Myklebust return false;
59268f74479STrond Myklebust }
59368f74479STrond Myklebust
594bae724efSFred Isaman void
pnfs_put_lseg(struct pnfs_layout_segment * lseg)5959369a431STrond Myklebust pnfs_put_lseg(struct pnfs_layout_segment *lseg)
596d684d2aeSFred Isaman {
59757036a37STrond Myklebust struct pnfs_layout_hdr *lo;
598d684d2aeSFred Isaman struct inode *inode;
599d684d2aeSFred Isaman
600d684d2aeSFred Isaman if (!lseg)
601d684d2aeSFred Isaman return;
602d684d2aeSFred Isaman
603d684d2aeSFred Isaman dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
604eba6dd69SElena Reshetova refcount_read(&lseg->pls_refcount),
605d684d2aeSFred Isaman test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
6064ef2e4f8STrond Myklebust
60757036a37STrond Myklebust lo = lseg->pls_layout;
60857036a37STrond Myklebust inode = lo->plh_inode;
6094ef2e4f8STrond Myklebust
610eba6dd69SElena Reshetova if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
6118f0d27dcSTrond Myklebust pnfs_get_layout_hdr(lo);
61257036a37STrond Myklebust pnfs_layout_remove_lseg(lo, lseg);
61368f74479STrond Myklebust if (pnfs_cache_lseg_for_layoutreturn(lo, lseg))
61468f74479STrond Myklebust lseg = NULL;
615d684d2aeSFred Isaman spin_unlock(&inode->i_lock);
616905ca191STrond Myklebust pnfs_free_lseg(lseg);
6178f0d27dcSTrond Myklebust pnfs_put_layout_hdr(lo);
618d684d2aeSFred Isaman }
619974cec8cSAndy Adamson }
6209369a431STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_put_lseg);
621974cec8cSAndy Adamson
622fb3296ebSBenny Halevy /*
623fb3296ebSBenny Halevy * is l2 fully contained in l1?
624fb3296ebSBenny Halevy * start1 end1
625fb3296ebSBenny Halevy * [----------------------------------)
626fb3296ebSBenny Halevy * start2 end2
627fb3296ebSBenny Halevy * [----------------)
628fb3296ebSBenny Halevy */
6293cb2df17STrond Myklebust static bool
pnfs_lseg_range_contained(const struct pnfs_layout_range * l1,const struct pnfs_layout_range * l2)6307dc0ac70STrond Myklebust pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
6313cb2df17STrond Myklebust const struct pnfs_layout_range *l2)
632fb3296ebSBenny Halevy {
633fb3296ebSBenny Halevy u64 start1 = l1->offset;
63417822b20STrond Myklebust u64 end1 = pnfs_end_offset(start1, l1->length);
635fb3296ebSBenny Halevy u64 start2 = l2->offset;
63617822b20STrond Myklebust u64 end2 = pnfs_end_offset(start2, l2->length);
637fb3296ebSBenny Halevy
638fb3296ebSBenny Halevy return (start1 <= start2) && (end1 >= end2);
639fb3296ebSBenny Halevy }
640fb3296ebSBenny Halevy
pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment * lseg,struct list_head * tmp_list)64124956804STrond Myklebust static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
64224956804STrond Myklebust struct list_head *tmp_list)
64324956804STrond Myklebust {
644eba6dd69SElena Reshetova if (!refcount_dec_and_test(&lseg->pls_refcount))
64524956804STrond Myklebust return false;
64624956804STrond Myklebust pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
64724956804STrond Myklebust list_add(&lseg->pls_list, tmp_list);
64824956804STrond Myklebust return true;
64924956804STrond Myklebust }
65024956804STrond Myklebust
6514541d16cSFred Isaman /* Returns 1 if lseg is removed from list, 0 otherwise */
mark_lseg_invalid(struct pnfs_layout_segment * lseg,struct list_head * tmp_list)6524541d16cSFred Isaman static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
6534541d16cSFred Isaman struct list_head *tmp_list)
6544541d16cSFred Isaman {
6554541d16cSFred Isaman int rv = 0;
6564541d16cSFred Isaman
6574541d16cSFred Isaman if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
6584541d16cSFred Isaman /* Remove the reference keeping the lseg in the
6594541d16cSFred Isaman * list. It will now be removed when all
6604541d16cSFred Isaman * outstanding io is finished.
6614541d16cSFred Isaman */
662d684d2aeSFred Isaman dprintk("%s: lseg %p ref %d\n", __func__, lseg,
663eba6dd69SElena Reshetova refcount_read(&lseg->pls_refcount));
66424956804STrond Myklebust if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
665d684d2aeSFred Isaman rv = 1;
666d684d2aeSFred Isaman }
6674541d16cSFred Isaman return rv;
6684541d16cSFred Isaman }
6694541d16cSFred Isaman
670e036f464STrond Myklebust static bool
pnfs_should_free_range(const struct pnfs_layout_range * lseg_range,const struct pnfs_layout_range * recall_range)671e036f464STrond Myklebust pnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
672e036f464STrond Myklebust const struct pnfs_layout_range *recall_range)
673e036f464STrond Myklebust {
674e036f464STrond Myklebust return (recall_range->iomode == IOMODE_ANY ||
675e036f464STrond Myklebust lseg_range->iomode == recall_range->iomode) &&
676e036f464STrond Myklebust pnfs_lseg_range_intersecting(lseg_range, recall_range);
677e036f464STrond Myklebust }
678e036f464STrond Myklebust
679e036f464STrond Myklebust static bool
pnfs_match_lseg_recall(const struct pnfs_layout_segment * lseg,const struct pnfs_layout_range * recall_range,u32 seq)680e036f464STrond Myklebust pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg,
681e036f464STrond Myklebust const struct pnfs_layout_range *recall_range,
682e036f464STrond Myklebust u32 seq)
683e036f464STrond Myklebust {
684e036f464STrond Myklebust if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq))
685e036f464STrond Myklebust return false;
686e036f464STrond Myklebust if (recall_range == NULL)
687e036f464STrond Myklebust return true;
688e036f464STrond Myklebust return pnfs_should_free_range(&lseg->pls_range, recall_range);
689e036f464STrond Myklebust }
690e036f464STrond Myklebust
6916d597e17SJeff Layton /**
6926d597e17SJeff Layton * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later
6936d597e17SJeff Layton * @lo: layout header containing the lsegs
6946d597e17SJeff Layton * @tmp_list: list head where doomed lsegs should go
6956d597e17SJeff Layton * @recall_range: optional recall range argument to match (may be NULL)
6966d597e17SJeff Layton * @seq: only invalidate lsegs obtained prior to this sequence (may be 0)
6976d597e17SJeff Layton *
6986d597e17SJeff Layton * Walk the list of lsegs in the layout header, and tear down any that should
6996d597e17SJeff Layton * be destroyed. If "recall_range" is specified then the segment must match
7006d597e17SJeff Layton * that range. If "seq" is non-zero, then only match segments that were handed
7016d597e17SJeff Layton * out at or before that sequence.
7026d597e17SJeff Layton *
7036d597e17SJeff Layton * Returns number of matching invalid lsegs remaining in list after scanning
7046d597e17SJeff Layton * it and purging them.
7054541d16cSFred Isaman */
70643f1b3daSFred Isaman int
pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr * lo,struct list_head * tmp_list,const struct pnfs_layout_range * recall_range,u32 seq)70749a85061STrond Myklebust pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
7084541d16cSFred Isaman struct list_head *tmp_list,
7096d597e17SJeff Layton const struct pnfs_layout_range *recall_range,
7106d597e17SJeff Layton u32 seq)
711974cec8cSAndy Adamson {
712974cec8cSAndy Adamson struct pnfs_layout_segment *lseg, *next;
713b739a5bdSTrond Myklebust struct nfs_server *server = NFS_SERVER(lo->plh_inode);
71471b39854STrond Myklebust int remaining = 0;
715974cec8cSAndy Adamson
716974cec8cSAndy Adamson dprintk("%s:Begin lo %p\n", __func__, lo);
717974cec8cSAndy Adamson
7188006bfbaSTrond Myklebust if (list_empty(&lo->plh_segs))
71938511722SFred Isaman return 0;
7204541d16cSFred Isaman list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
721e036f464STrond Myklebust if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
7226d597e17SJeff Layton dprintk("%s: freeing lseg %p iomode %d seq %u "
7234541d16cSFred Isaman "offset %llu length %llu\n", __func__,
7246d597e17SJeff Layton lseg, lseg->pls_range.iomode, lseg->pls_seq,
7256d597e17SJeff Layton lseg->pls_range.offset, lseg->pls_range.length);
726b739a5bdSTrond Myklebust if (mark_lseg_invalid(lseg, tmp_list))
727b739a5bdSTrond Myklebust continue;
72871b39854STrond Myklebust remaining++;
729b739a5bdSTrond Myklebust pnfs_lseg_cancel_io(server, lseg);
730974cec8cSAndy Adamson }
73171b39854STrond Myklebust dprintk("%s:Return %i\n", __func__, remaining);
73271b39854STrond Myklebust return remaining;
733974cec8cSAndy Adamson }
734974cec8cSAndy Adamson
73568f74479STrond Myklebust static void
pnfs_free_returned_lsegs(struct pnfs_layout_hdr * lo,struct list_head * free_me,const struct pnfs_layout_range * range,u32 seq)73668f74479STrond Myklebust pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
73768f74479STrond Myklebust struct list_head *free_me,
73868f74479STrond Myklebust const struct pnfs_layout_range *range,
73968f74479STrond Myklebust u32 seq)
74068f74479STrond Myklebust {
74168f74479STrond Myklebust struct pnfs_layout_segment *lseg, *next;
74268f74479STrond Myklebust
74368f74479STrond Myklebust list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) {
74468f74479STrond Myklebust if (pnfs_match_lseg_recall(lseg, range, seq))
74568f74479STrond Myklebust list_move_tail(&lseg->pls_list, free_me);
74668f74479STrond Myklebust }
74768f74479STrond Myklebust }
74868f74479STrond Myklebust
749f49f9baaSFred Isaman /* note free_me must contain lsegs from a single layout_hdr */
75043f1b3daSFred Isaman void
pnfs_free_lseg_list(struct list_head * free_me)7514541d16cSFred Isaman pnfs_free_lseg_list(struct list_head *free_me)
752974cec8cSAndy Adamson {
7534541d16cSFred Isaman struct pnfs_layout_segment *lseg, *tmp;
754974cec8cSAndy Adamson
755f49f9baaSFred Isaman if (list_empty(free_me))
756f49f9baaSFred Isaman return;
757f49f9baaSFred Isaman
7584541d16cSFred Isaman list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
759566052c5SFred Isaman list_del(&lseg->pls_list);
760905ca191STrond Myklebust pnfs_free_lseg(lseg);
761974cec8cSAndy Adamson }
762974cec8cSAndy Adamson }
763974cec8cSAndy Adamson
__pnfs_destroy_layout(struct nfs_inode * nfsi)764b6d49ecdSTrond Myklebust static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
765e5e94017SBenny Halevy {
766e5e94017SBenny Halevy struct pnfs_layout_hdr *lo;
767974cec8cSAndy Adamson LIST_HEAD(tmp_list);
768e5e94017SBenny Halevy
769e5e94017SBenny Halevy spin_lock(&nfsi->vfs_inode.i_lock);
770e5e94017SBenny Halevy lo = nfsi->layout;
771e5e94017SBenny Halevy if (lo) {
7723e621214STrond Myklebust pnfs_get_layout_hdr(lo);
7732454dfeaSTrond Myklebust pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
7743e621214STrond Myklebust pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
7753e621214STrond Myklebust pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
776e5e94017SBenny Halevy spin_unlock(&nfsi->vfs_inode.i_lock);
777974cec8cSAndy Adamson pnfs_free_lseg_list(&tmp_list);
7781f18b82cSTrond Myklebust nfs_commit_inode(&nfsi->vfs_inode, 0);
7793e621214STrond Myklebust pnfs_put_layout_hdr(lo);
7803e621214STrond Myklebust } else
7813e621214STrond Myklebust spin_unlock(&nfsi->vfs_inode.i_lock);
782b6d49ecdSTrond Myklebust return lo;
783b6d49ecdSTrond Myklebust }
784b6d49ecdSTrond Myklebust
pnfs_destroy_layout(struct nfs_inode * nfsi)785b6d49ecdSTrond Myklebust void pnfs_destroy_layout(struct nfs_inode *nfsi)
786b6d49ecdSTrond Myklebust {
787b6d49ecdSTrond Myklebust __pnfs_destroy_layout(nfsi);
788e5e94017SBenny Halevy }
789041245c8SAndy Adamson EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
790e5e94017SBenny Halevy
pnfs_layout_removed(struct nfs_inode * nfsi,struct pnfs_layout_hdr * lo)791b6d49ecdSTrond Myklebust static bool pnfs_layout_removed(struct nfs_inode *nfsi,
792b6d49ecdSTrond Myklebust struct pnfs_layout_hdr *lo)
793b6d49ecdSTrond Myklebust {
794b6d49ecdSTrond Myklebust bool ret;
795b6d49ecdSTrond Myklebust
796b6d49ecdSTrond Myklebust spin_lock(&nfsi->vfs_inode.i_lock);
797b6d49ecdSTrond Myklebust ret = nfsi->layout != lo;
798b6d49ecdSTrond Myklebust spin_unlock(&nfsi->vfs_inode.i_lock);
799b6d49ecdSTrond Myklebust return ret;
800b6d49ecdSTrond Myklebust }
801b6d49ecdSTrond Myklebust
pnfs_destroy_layout_final(struct nfs_inode * nfsi)802b6d49ecdSTrond Myklebust void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
803b6d49ecdSTrond Myklebust {
804b6d49ecdSTrond Myklebust struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
805b6d49ecdSTrond Myklebust
806b6d49ecdSTrond Myklebust if (lo)
807b6d49ecdSTrond Myklebust wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
808b6d49ecdSTrond Myklebust }
809b6d49ecdSTrond Myklebust
810fd9a8d71STrond Myklebust static bool
pnfs_layout_add_bulk_destroy_list(struct inode * inode,struct list_head * layout_list)811fd9a8d71STrond Myklebust pnfs_layout_add_bulk_destroy_list(struct inode *inode,
812fd9a8d71STrond Myklebust struct list_head *layout_list)
813fd9a8d71STrond Myklebust {
814fd9a8d71STrond Myklebust struct pnfs_layout_hdr *lo;
815fd9a8d71STrond Myklebust bool ret = false;
816fd9a8d71STrond Myklebust
817fd9a8d71STrond Myklebust spin_lock(&inode->i_lock);
818fd9a8d71STrond Myklebust lo = NFS_I(inode)->layout;
819fd9a8d71STrond Myklebust if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
820fd9a8d71STrond Myklebust pnfs_get_layout_hdr(lo);
821fd9a8d71STrond Myklebust list_add(&lo->plh_bulk_destroy, layout_list);
822fd9a8d71STrond Myklebust ret = true;
823fd9a8d71STrond Myklebust }
824fd9a8d71STrond Myklebust spin_unlock(&inode->i_lock);
825fd9a8d71STrond Myklebust return ret;
826fd9a8d71STrond Myklebust }
827fd9a8d71STrond Myklebust
828fd9a8d71STrond Myklebust /* Caller must hold rcu_read_lock and clp->cl_lock */
829fd9a8d71STrond Myklebust static int
pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client * clp,struct nfs_server * server,struct list_head * layout_list)830fd9a8d71STrond Myklebust pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
831fd9a8d71STrond Myklebust struct nfs_server *server,
832fd9a8d71STrond Myklebust struct list_head *layout_list)
8335085607dSTrond Myklebust __must_hold(&clp->cl_lock)
8345085607dSTrond Myklebust __must_hold(RCU)
835fd9a8d71STrond Myklebust {
836fd9a8d71STrond Myklebust struct pnfs_layout_hdr *lo, *next;
837fd9a8d71STrond Myklebust struct inode *inode;
838fd9a8d71STrond Myklebust
839fd9a8d71STrond Myklebust list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
8405085607dSTrond Myklebust if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
8415085607dSTrond Myklebust test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) ||
8425085607dSTrond Myklebust !list_empty(&lo->plh_bulk_destroy))
843b85f5620STrond Myklebust continue;
8445085607dSTrond Myklebust /* If the sb is being destroyed, just bail */
8455085607dSTrond Myklebust if (!nfs_sb_active(server->super))
8465085607dSTrond Myklebust break;
847b5fdf841STrond Myklebust inode = pnfs_grab_inode_layout_hdr(lo);
8485085607dSTrond Myklebust if (inode != NULL) {
849cf6605d1STrond Myklebust if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
850cf6605d1STrond Myklebust list_del_rcu(&lo->plh_layouts);
8515085607dSTrond Myklebust if (pnfs_layout_add_bulk_destroy_list(inode,
8525085607dSTrond Myklebust layout_list))
853fd9a8d71STrond Myklebust continue;
854fd9a8d71STrond Myklebust rcu_read_unlock();
855fd9a8d71STrond Myklebust spin_unlock(&clp->cl_lock);
856fd9a8d71STrond Myklebust iput(inode);
8575085607dSTrond Myklebust } else {
8585085607dSTrond Myklebust rcu_read_unlock();
8595085607dSTrond Myklebust spin_unlock(&clp->cl_lock);
8605085607dSTrond Myklebust }
8615085607dSTrond Myklebust nfs_sb_deactive(server->super);
862fd9a8d71STrond Myklebust spin_lock(&clp->cl_lock);
863fd9a8d71STrond Myklebust rcu_read_lock();
864fd9a8d71STrond Myklebust return -EAGAIN;
865fd9a8d71STrond Myklebust }
866fd9a8d71STrond Myklebust return 0;
867fd9a8d71STrond Myklebust }
868fd9a8d71STrond Myklebust
869fd9a8d71STrond Myklebust static int
pnfs_layout_free_bulk_destroy_list(struct list_head * layout_list,bool is_bulk_recall)870fd9a8d71STrond Myklebust pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
871fd9a8d71STrond Myklebust bool is_bulk_recall)
872fd9a8d71STrond Myklebust {
873fd9a8d71STrond Myklebust struct pnfs_layout_hdr *lo;
874fd9a8d71STrond Myklebust struct inode *inode;
875fd9a8d71STrond Myklebust LIST_HEAD(lseg_list);
876fd9a8d71STrond Myklebust int ret = 0;
877fd9a8d71STrond Myklebust
878fd9a8d71STrond Myklebust while (!list_empty(layout_list)) {
879fd9a8d71STrond Myklebust lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
880fd9a8d71STrond Myklebust plh_bulk_destroy);
881fd9a8d71STrond Myklebust dprintk("%s freeing layout for inode %lu\n", __func__,
882fd9a8d71STrond Myklebust lo->plh_inode->i_ino);
883fd9a8d71STrond Myklebust inode = lo->plh_inode;
8847c5d1875SChristoph Hellwig
8857c5d1875SChristoph Hellwig pnfs_layoutcommit_inode(inode, false);
8867c5d1875SChristoph Hellwig
887fd9a8d71STrond Myklebust spin_lock(&inode->i_lock);
888fd9a8d71STrond Myklebust list_del_init(&lo->plh_bulk_destroy);
8899fd4b9fcSTrond Myklebust if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
890fd9a8d71STrond Myklebust if (is_bulk_recall)
891fd9a8d71STrond Myklebust set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
892fd9a8d71STrond Myklebust ret = -EAGAIN;
8939fd4b9fcSTrond Myklebust }
894fd9a8d71STrond Myklebust spin_unlock(&inode->i_lock);
895fd9a8d71STrond Myklebust pnfs_free_lseg_list(&lseg_list);
896b20135d0STrond Myklebust /* Free all lsegs that are attached to commit buckets */
897b20135d0STrond Myklebust nfs_commit_inode(inode, 0);
898fd9a8d71STrond Myklebust pnfs_put_layout_hdr(lo);
8995085607dSTrond Myklebust nfs_iput_and_deactive(inode);
900fd9a8d71STrond Myklebust }
901fd9a8d71STrond Myklebust return ret;
902fd9a8d71STrond Myklebust }
903fd9a8d71STrond Myklebust
904fd9a8d71STrond Myklebust int
pnfs_destroy_layouts_byfsid(struct nfs_client * clp,struct nfs_fsid * fsid,bool is_recall)905fd9a8d71STrond Myklebust pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
906fd9a8d71STrond Myklebust struct nfs_fsid *fsid,
907fd9a8d71STrond Myklebust bool is_recall)
908fd9a8d71STrond Myklebust {
909fd9a8d71STrond Myklebust struct nfs_server *server;
910fd9a8d71STrond Myklebust LIST_HEAD(layout_list);
911fd9a8d71STrond Myklebust
912fd9a8d71STrond Myklebust spin_lock(&clp->cl_lock);
913fd9a8d71STrond Myklebust rcu_read_lock();
914fd9a8d71STrond Myklebust restart:
915fd9a8d71STrond Myklebust list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
916fd9a8d71STrond Myklebust if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
917fd9a8d71STrond Myklebust continue;
918fd9a8d71STrond Myklebust if (pnfs_layout_bulk_destroy_byserver_locked(clp,
919fd9a8d71STrond Myklebust server,
920fd9a8d71STrond Myklebust &layout_list) != 0)
921fd9a8d71STrond Myklebust goto restart;
922fd9a8d71STrond Myklebust }
923fd9a8d71STrond Myklebust rcu_read_unlock();
924fd9a8d71STrond Myklebust spin_unlock(&clp->cl_lock);
925fd9a8d71STrond Myklebust
926fd9a8d71STrond Myklebust if (list_empty(&layout_list))
927fd9a8d71STrond Myklebust return 0;
928fd9a8d71STrond Myklebust return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
929fd9a8d71STrond Myklebust }
930fd9a8d71STrond Myklebust
931fd9a8d71STrond Myklebust int
pnfs_destroy_layouts_byclid(struct nfs_client * clp,bool is_recall)932fd9a8d71STrond Myklebust pnfs_destroy_layouts_byclid(struct nfs_client *clp,
933fd9a8d71STrond Myklebust bool is_recall)
934fd9a8d71STrond Myklebust {
935fd9a8d71STrond Myklebust struct nfs_server *server;
936fd9a8d71STrond Myklebust LIST_HEAD(layout_list);
937fd9a8d71STrond Myklebust
938fd9a8d71STrond Myklebust spin_lock(&clp->cl_lock);
939fd9a8d71STrond Myklebust rcu_read_lock();
940fd9a8d71STrond Myklebust restart:
941fd9a8d71STrond Myklebust list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
942fd9a8d71STrond Myklebust if (pnfs_layout_bulk_destroy_byserver_locked(clp,
943fd9a8d71STrond Myklebust server,
944fd9a8d71STrond Myklebust &layout_list) != 0)
945fd9a8d71STrond Myklebust goto restart;
946fd9a8d71STrond Myklebust }
947fd9a8d71STrond Myklebust rcu_read_unlock();
948fd9a8d71STrond Myklebust spin_unlock(&clp->cl_lock);
949fd9a8d71STrond Myklebust
950fd9a8d71STrond Myklebust if (list_empty(&layout_list))
951fd9a8d71STrond Myklebust return 0;
952fd9a8d71STrond Myklebust return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
953fd9a8d71STrond Myklebust }
954fd9a8d71STrond Myklebust
955974cec8cSAndy Adamson /*
9569f266451SWang Qing * Called by the state manager to remove all layouts established under an
957974cec8cSAndy Adamson * expired lease.
958974cec8cSAndy Adamson */
959974cec8cSAndy Adamson void
pnfs_destroy_all_layouts(struct nfs_client * clp)960974cec8cSAndy Adamson pnfs_destroy_all_layouts(struct nfs_client *clp)
961974cec8cSAndy Adamson {
962c47abcf8SAndy Adamson nfs4_deviceid_mark_client_invalid(clp);
963c47abcf8SAndy Adamson nfs4_deviceid_purge_client(clp);
964c47abcf8SAndy Adamson
965fd9a8d71STrond Myklebust pnfs_destroy_layouts_byclid(clp, false);
966974cec8cSAndy Adamson }
967974cec8cSAndy Adamson
96859b56394STrond Myklebust static void
pnfs_set_layout_cred(struct pnfs_layout_hdr * lo,const struct cred * cred)96959b56394STrond Myklebust pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
97059b56394STrond Myklebust {
97159b56394STrond Myklebust const struct cred *old;
97259b56394STrond Myklebust
97359b56394STrond Myklebust if (cred && cred_fscmp(lo->plh_lc_cred, cred) != 0) {
97459b56394STrond Myklebust old = xchg(&lo->plh_lc_cred, get_cred(cred));
97559b56394STrond Myklebust put_cred(old);
97659b56394STrond Myklebust }
97759b56394STrond Myklebust }
97859b56394STrond Myklebust
979fd6002e9SFred Isaman /* update lo->plh_stateid with new if is more recent */
98043f1b3daSFred Isaman void
pnfs_set_layout_stateid(struct pnfs_layout_hdr * lo,const nfs4_stateid * new,const struct cred * cred,bool update_barrier)98143f1b3daSFred Isaman pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
98259b56394STrond Myklebust const struct cred *cred, bool update_barrier)
983b1f69b75SAndy Adamson {
984aa95edf3STrond Myklebust u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
985aa95edf3STrond Myklebust u32 newseq = be32_to_cpu(new->seqid);
9862a59a041STrond Myklebust
9872a59a041STrond Myklebust if (!pnfs_layout_is_valid(lo)) {
98859b56394STrond Myklebust pnfs_set_layout_cred(lo, cred);
9892a59a041STrond Myklebust nfs4_stateid_copy(&lo->plh_stateid, new);
9902a59a041STrond Myklebust lo->plh_barrier = newseq;
9912a59a041STrond Myklebust pnfs_clear_layoutreturn_info(lo);
9922a59a041STrond Myklebust clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
9932a59a041STrond Myklebust return;
9942a59a041STrond Myklebust }
995aa95edf3STrond Myklebust
996aa95edf3STrond Myklebust if (pnfs_seqid_is_newer(newseq, oldseq))
997f597c537STrond Myklebust nfs4_stateid_copy(&lo->plh_stateid, new);
998aa95edf3STrond Myklebust
999aa95edf3STrond Myklebust if (update_barrier) {
1000aa95edf3STrond Myklebust pnfs_barrier_update(lo, newseq);
1001aa95edf3STrond Myklebust return;
1002aa95edf3STrond Myklebust }
1003ecebb80bSTrond Myklebust /*
1004ecebb80bSTrond Myklebust * Because of wraparound, we want to keep the barrier
1005aa95edf3STrond Myklebust * "close" to the current seqids. We really only want to
1006aa95edf3STrond Myklebust * get here from a layoutget call.
100743f1b3daSFred Isaman */
1008aa95edf3STrond Myklebust if (atomic_read(&lo->plh_outstanding) == 1)
1009aa95edf3STrond Myklebust pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
101043f1b3daSFred Isaman }
1011b1f69b75SAndy Adamson
1012cf7d63f1SFred Isaman static bool
pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr * lo,const nfs4_stateid * stateid)101319c54abaSTrond Myklebust pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
101419c54abaSTrond Myklebust const nfs4_stateid *stateid)
1015cf7d63f1SFred Isaman {
101625a1a621STrond Myklebust u32 seqid = be32_to_cpu(stateid->seqid);
101725a1a621STrond Myklebust
1018d6236a98STrond Myklebust return lo->plh_barrier && pnfs_seqid_is_newer(lo->plh_barrier, seqid);
101925a1a621STrond Myklebust }
102019c54abaSTrond Myklebust
102119c54abaSTrond Myklebust /* lget is set to 1 if called from inside send_layoutget call chain */
102219c54abaSTrond Myklebust static bool
pnfs_layoutgets_blocked(const struct pnfs_layout_hdr * lo)1023e1c06f80STrond Myklebust pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
102419c54abaSTrond Myklebust {
1025f7e8917aSFred Isaman return lo->plh_block_lgets ||
1026e1c06f80STrond Myklebust test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
1027cf7d63f1SFred Isaman }
1028cf7d63f1SFred Isaman
10295e36e2a9SFred Isaman static struct nfs_server *
pnfs_find_server(struct inode * inode,struct nfs_open_context * ctx)10305e36e2a9SFred Isaman pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx)
10315e36e2a9SFred Isaman {
10325e36e2a9SFred Isaman struct nfs_server *server;
10335e36e2a9SFred Isaman
103478746a38SFred Isaman if (inode) {
10355e36e2a9SFred Isaman server = NFS_SERVER(inode);
103678746a38SFred Isaman } else {
10375e36e2a9SFred Isaman struct dentry *parent_dir = dget_parent(ctx->dentry);
10385e36e2a9SFred Isaman server = NFS_SERVER(parent_dir->d_inode);
10395e36e2a9SFred Isaman dput(parent_dir);
10405e36e2a9SFred Isaman }
10415e36e2a9SFred Isaman return server;
10425e36e2a9SFred Isaman }
10435e36e2a9SFred Isaman
nfs4_free_pages(struct page ** pages,size_t size)104429a8bfe5STrond Myklebust static void nfs4_free_pages(struct page **pages, size_t size)
104529a8bfe5STrond Myklebust {
104629a8bfe5STrond Myklebust int i;
104729a8bfe5STrond Myklebust
104829a8bfe5STrond Myklebust if (!pages)
104929a8bfe5STrond Myklebust return;
105029a8bfe5STrond Myklebust
105129a8bfe5STrond Myklebust for (i = 0; i < size; i++) {
105229a8bfe5STrond Myklebust if (!pages[i])
105329a8bfe5STrond Myklebust break;
105429a8bfe5STrond Myklebust __free_page(pages[i]);
105529a8bfe5STrond Myklebust }
105629a8bfe5STrond Myklebust kfree(pages);
105729a8bfe5STrond Myklebust }
105829a8bfe5STrond Myklebust
nfs4_alloc_pages(size_t size,gfp_t gfp_flags)105929a8bfe5STrond Myklebust static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
106029a8bfe5STrond Myklebust {
106129a8bfe5STrond Myklebust struct page **pages;
106229a8bfe5STrond Myklebust int i;
106329a8bfe5STrond Myklebust
1064a2791d3aSTrond Myklebust pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
106529a8bfe5STrond Myklebust if (!pages) {
106629a8bfe5STrond Myklebust dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
106729a8bfe5STrond Myklebust return NULL;
106829a8bfe5STrond Myklebust }
106929a8bfe5STrond Myklebust
107029a8bfe5STrond Myklebust for (i = 0; i < size; i++) {
107129a8bfe5STrond Myklebust pages[i] = alloc_page(gfp_flags);
107229a8bfe5STrond Myklebust if (!pages[i]) {
107329a8bfe5STrond Myklebust dprintk("%s: failed to allocate page\n", __func__);
1074a2791d3aSTrond Myklebust nfs4_free_pages(pages, i);
107529a8bfe5STrond Myklebust return NULL;
107629a8bfe5STrond Myklebust }
107729a8bfe5STrond Myklebust }
107829a8bfe5STrond Myklebust
107929a8bfe5STrond Myklebust return pages;
108029a8bfe5STrond Myklebust }
108129a8bfe5STrond Myklebust
1082587f03deSFred Isaman static struct nfs4_layoutget *
pnfs_alloc_init_layoutget_args(struct inode * ino,struct nfs_open_context * ctx,const nfs4_stateid * stateid,const struct pnfs_layout_range * range,gfp_t gfp_flags)10835e36e2a9SFred Isaman pnfs_alloc_init_layoutget_args(struct inode *ino,
1084e5e94017SBenny Halevy struct nfs_open_context *ctx,
10852409a976SFred Isaman const nfs4_stateid *stateid,
1086e144e539STrond Myklebust const struct pnfs_layout_range *range,
1087587f03deSFred Isaman gfp_t gfp_flags)
1088e5e94017SBenny Halevy {
10895e36e2a9SFred Isaman struct nfs_server *server = pnfs_find_server(ino, ctx);
109028ced9a8STrond Myklebust size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
1091dacb452dSFred Isaman size_t max_pages = max_response_pages(server);
1092b1f69b75SAndy Adamson struct nfs4_layoutget *lgp;
1093e5e94017SBenny Halevy
1094b1f69b75SAndy Adamson dprintk("--> %s\n", __func__);
1095b1f69b75SAndy Adamson
1096a75b9df9STrond Myklebust lgp = kzalloc(sizeof(*lgp), gfp_flags);
1097cf7d63f1SFred Isaman if (lgp == NULL)
1098587f03deSFred Isaman return NULL;
109935124a09SWeston Andros Adamson
110028ced9a8STrond Myklebust if (max_reply_sz) {
110128ced9a8STrond Myklebust size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
110228ced9a8STrond Myklebust if (npages < max_pages)
110328ced9a8STrond Myklebust max_pages = npages;
110428ced9a8STrond Myklebust }
110528ced9a8STrond Myklebust
1106dacb452dSFred Isaman lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
1107dacb452dSFred Isaman if (!lgp->args.layout.pages) {
1108dacb452dSFred Isaman kfree(lgp);
1109dacb452dSFred Isaman return NULL;
1110dacb452dSFred Isaman }
1111dacb452dSFred Isaman lgp->args.layout.pglen = max_pages * PAGE_SIZE;
1112dacb452dSFred Isaman lgp->res.layoutp = &lgp->args.layout;
1113dacb452dSFred Isaman
1114d49e0d5bSTrond Myklebust /* Don't confuse uninitialised result and success */
1115d49e0d5bSTrond Myklebust lgp->res.status = -NFS4ERR_DELAY;
11162d89a1d3STrond Myklebust
111709cbfeafSKirill A. Shutemov lgp->args.minlength = PAGE_SIZE;
1118fb3296ebSBenny Halevy if (lgp->args.minlength > range->length)
1119fb3296ebSBenny Halevy lgp->args.minlength = range->length;
11205e36e2a9SFred Isaman if (ino) {
11215e36e2a9SFred Isaman loff_t i_size = i_size_read(ino);
11225e36e2a9SFred Isaman
11232d89a1d3STrond Myklebust if (range->iomode == IOMODE_READ) {
11242d89a1d3STrond Myklebust if (range->offset >= i_size)
11252d89a1d3STrond Myklebust lgp->args.minlength = 0;
11262d89a1d3STrond Myklebust else if (i_size - range->offset < lgp->args.minlength)
11272d89a1d3STrond Myklebust lgp->args.minlength = i_size - range->offset;
11282d89a1d3STrond Myklebust }
11295e36e2a9SFred Isaman }
1130b1f69b75SAndy Adamson lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
1131e144e539STrond Myklebust pnfs_copy_range(&lgp->args.range, range);
1132b1f69b75SAndy Adamson lgp->args.type = server->pnfs_curr_ld->id;
1133b1f69b75SAndy Adamson lgp->args.inode = ino;
1134b1f69b75SAndy Adamson lgp->args.ctx = get_nfs_open_context(ctx);
1135183d9e7bSJeff Layton nfs4_stateid_copy(&lgp->args.stateid, stateid);
1136a75b9df9STrond Myklebust lgp->gfp_flags = gfp_flags;
113763ec2b69STrond Myklebust lgp->cred = ctx->cred;
1138587f03deSFred Isaman return lgp;
1139974cec8cSAndy Adamson }
1140974cec8cSAndy Adamson
pnfs_layoutget_free(struct nfs4_layoutget * lgp)114129a8bfe5STrond Myklebust void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
114229a8bfe5STrond Myklebust {
114329a8bfe5STrond Myklebust size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
114429a8bfe5STrond Myklebust
114529a8bfe5STrond Myklebust nfs4_free_pages(lgp->args.layout.pages, max_pages);
1146b4e89bcbSTrond Myklebust pnfs_put_layout_hdr(lgp->lo);
114729a8bfe5STrond Myklebust put_nfs_open_context(lgp->args.ctx);
114829a8bfe5STrond Myklebust kfree(lgp);
114929a8bfe5STrond Myklebust }
115029a8bfe5STrond Myklebust
pnfs_clear_layoutcommit(struct inode * inode,struct list_head * head)115124956804STrond Myklebust static void pnfs_clear_layoutcommit(struct inode *inode,
115224956804STrond Myklebust struct list_head *head)
115324956804STrond Myklebust {
115424956804STrond Myklebust struct nfs_inode *nfsi = NFS_I(inode);
115524956804STrond Myklebust struct pnfs_layout_segment *lseg, *tmp;
115624956804STrond Myklebust
115724956804STrond Myklebust if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
115824956804STrond Myklebust return;
115924956804STrond Myklebust list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
116024956804STrond Myklebust if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
116124956804STrond Myklebust continue;
116224956804STrond Myklebust pnfs_lseg_dec_and_remove_zero(lseg, head);
116324956804STrond Myklebust }
116424956804STrond Myklebust }
116524956804STrond Myklebust
pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr * lo,const nfs4_stateid * arg_stateid,const struct pnfs_layout_range * range,const nfs4_stateid * stateid)116668f74479STrond Myklebust void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
11672a974425STrond Myklebust const nfs4_stateid *arg_stateid,
116868f74479STrond Myklebust const struct pnfs_layout_range *range,
116968f74479STrond Myklebust const nfs4_stateid *stateid)
117068f74479STrond Myklebust {
117168f74479STrond Myklebust struct inode *inode = lo->plh_inode;
117268f74479STrond Myklebust LIST_HEAD(freeme);
117368f74479STrond Myklebust
117468f74479STrond Myklebust spin_lock(&inode->i_lock);
1175d8a7055fSTrond Myklebust if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
11762a974425STrond Myklebust goto out_unlock;
1177d8a7055fSTrond Myklebust if (stateid && pnfs_layout_is_valid(lo)) {
11782a974425STrond Myklebust u32 seq = be32_to_cpu(arg_stateid->seqid);
11792a974425STrond Myklebust
118068f74479STrond Myklebust pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
118168f74479STrond Myklebust pnfs_free_returned_lsegs(lo, &freeme, range, seq);
118259b56394STrond Myklebust pnfs_set_layout_stateid(lo, stateid, NULL, true);
118368f74479STrond Myklebust } else
118468f74479STrond Myklebust pnfs_mark_layout_stateid_invalid(lo, &freeme);
11852a974425STrond Myklebust out_unlock:
118668f74479STrond Myklebust pnfs_clear_layoutreturn_waitbit(lo);
118768f74479STrond Myklebust spin_unlock(&inode->i_lock);
118868f74479STrond Myklebust pnfs_free_lseg_list(&freeme);
118968f74479STrond Myklebust
119068f74479STrond Myklebust }
119168f74479STrond Myklebust
119213c13a6aSTrond Myklebust static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr * lo,nfs4_stateid * stateid,const struct cred ** cred,enum pnfs_iomode * iomode)1193e5fd1904STrond Myklebust pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
1194e5fd1904STrond Myklebust nfs4_stateid *stateid,
119544ea8dfcSTrond Myklebust const struct cred **cred,
1196e5fd1904STrond Myklebust enum pnfs_iomode *iomode)
119713c13a6aSTrond Myklebust {
1198bf0291ddSTrond Myklebust /* Serialise LAYOUTGET/LAYOUTRETURN */
1199*96c9ff35STrond Myklebust if (atomic_read(&lo->plh_outstanding) != 0 && lo->plh_return_seq == 0)
1200bf0291ddSTrond Myklebust return false;
12016604b203STrond Myklebust if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
120213c13a6aSTrond Myklebust return false;
12036604b203STrond Myklebust set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
120413c13a6aSTrond Myklebust pnfs_get_layout_hdr(lo);
1205e5fd1904STrond Myklebust nfs4_stateid_copy(stateid, &lo->plh_stateid);
120644ea8dfcSTrond Myklebust *cred = get_cred(lo->plh_lc_cred);
12071bcf34fdSTrond Myklebust if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
1208e5fd1904STrond Myklebust if (lo->plh_return_seq != 0)
1209e5fd1904STrond Myklebust stateid->seqid = cpu_to_be32(lo->plh_return_seq);
1210e5fd1904STrond Myklebust if (iomode != NULL)
1211e5fd1904STrond Myklebust *iomode = lo->plh_return_iomode;
12128e0acf90STrond Myklebust pnfs_clear_layoutreturn_info(lo);
12131bcf34fdSTrond Myklebust } else if (iomode != NULL)
1214e5fd1904STrond Myklebust *iomode = IOMODE_ANY;
12151bcf34fdSTrond Myklebust pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid));
121613c13a6aSTrond Myklebust return true;
121713c13a6aSTrond Myklebust }
121813c13a6aSTrond Myklebust
1219828ed9ecSTrond Myklebust static void
pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args * args,struct pnfs_layout_hdr * lo,const nfs4_stateid * stateid,enum pnfs_iomode iomode)1220828ed9ecSTrond Myklebust pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args,
1221828ed9ecSTrond Myklebust struct pnfs_layout_hdr *lo,
1222828ed9ecSTrond Myklebust const nfs4_stateid *stateid,
1223828ed9ecSTrond Myklebust enum pnfs_iomode iomode)
1224828ed9ecSTrond Myklebust {
1225828ed9ecSTrond Myklebust struct inode *inode = lo->plh_inode;
1226828ed9ecSTrond Myklebust
1227828ed9ecSTrond Myklebust args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id;
1228828ed9ecSTrond Myklebust args->inode = inode;
1229828ed9ecSTrond Myklebust args->range.iomode = iomode;
1230828ed9ecSTrond Myklebust args->range.offset = 0;
1231828ed9ecSTrond Myklebust args->range.length = NFS4_MAX_UINT64;
1232828ed9ecSTrond Myklebust args->layout = lo;
1233828ed9ecSTrond Myklebust nfs4_stateid_copy(&args->stateid, stateid);
1234828ed9ecSTrond Myklebust }
1235828ed9ecSTrond Myklebust
1236f40eb5d0SPeng Tao static int
pnfs_send_layoutreturn(struct pnfs_layout_hdr * lo,const nfs4_stateid * stateid,const struct cred ** pcred,enum pnfs_iomode iomode,bool sync)123744ea8dfcSTrond Myklebust pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
123844ea8dfcSTrond Myklebust const nfs4_stateid *stateid,
123944ea8dfcSTrond Myklebust const struct cred **pcred,
124044ea8dfcSTrond Myklebust enum pnfs_iomode iomode,
124144ea8dfcSTrond Myklebust bool sync)
1242f40eb5d0SPeng Tao {
1243f40eb5d0SPeng Tao struct inode *ino = lo->plh_inode;
1244287bd3e9STrond Myklebust struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
1245f40eb5d0SPeng Tao struct nfs4_layoutreturn *lrp;
124644ea8dfcSTrond Myklebust const struct cred *cred = *pcred;
1247f40eb5d0SPeng Tao int status = 0;
1248f40eb5d0SPeng Tao
124944ea8dfcSTrond Myklebust *pcred = NULL;
125063d8a41bSTrond Myklebust lrp = kzalloc(sizeof(*lrp), nfs_io_gfp_mask());
1251f40eb5d0SPeng Tao if (unlikely(lrp == NULL)) {
1252f40eb5d0SPeng Tao status = -ENOMEM;
1253f40eb5d0SPeng Tao spin_lock(&ino->i_lock);
1254d67ae825STom Haynes pnfs_clear_layoutreturn_waitbit(lo);
1255f40eb5d0SPeng Tao spin_unlock(&ino->i_lock);
125644ea8dfcSTrond Myklebust put_cred(cred);
1257f40eb5d0SPeng Tao pnfs_put_layout_hdr(lo);
1258f40eb5d0SPeng Tao goto out;
1259f40eb5d0SPeng Tao }
1260f40eb5d0SPeng Tao
1261828ed9ecSTrond Myklebust pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
12624d796d75STrond Myklebust lrp->args.ld_private = &lrp->ld_private;
1263f40eb5d0SPeng Tao lrp->clp = NFS_SERVER(ino)->nfs_client;
126444ea8dfcSTrond Myklebust lrp->cred = cred;
1265287bd3e9STrond Myklebust if (ld->prepare_layoutreturn)
1266287bd3e9STrond Myklebust ld->prepare_layoutreturn(&lrp->args);
1267f40eb5d0SPeng Tao
12686c16605dSPeng Tao status = nfs4_proc_layoutreturn(lrp, sync);
1269f40eb5d0SPeng Tao out:
1270f40eb5d0SPeng Tao dprintk("<-- %s status: %d\n", __func__, status);
1271f40eb5d0SPeng Tao return status;
1272f40eb5d0SPeng Tao }
1273f40eb5d0SPeng Tao
1274d474f961STrond Myklebust static bool
pnfs_layout_segments_returnable(struct pnfs_layout_hdr * lo,enum pnfs_iomode iomode,u32 seq)1275d474f961STrond Myklebust pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
1276d474f961STrond Myklebust enum pnfs_iomode iomode,
1277d474f961STrond Myklebust u32 seq)
1278d474f961STrond Myklebust {
1279d474f961STrond Myklebust struct pnfs_layout_range recall_range = {
1280d474f961STrond Myklebust .length = NFS4_MAX_UINT64,
1281d474f961STrond Myklebust .iomode = iomode,
1282d474f961STrond Myklebust };
1283d474f961STrond Myklebust return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
1284d474f961STrond Myklebust &recall_range, seq) != -EBUSY;
1285d474f961STrond Myklebust }
1286d474f961STrond Myklebust
128713c13a6aSTrond Myklebust /* Return true if layoutreturn is needed */
128813c13a6aSTrond Myklebust static bool
pnfs_layout_need_return(struct pnfs_layout_hdr * lo)128913c13a6aSTrond Myklebust pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
129013c13a6aSTrond Myklebust {
12912370abdaSTrond Myklebust if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
129213c13a6aSTrond Myklebust return false;
1293d474f961STrond Myklebust return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
1294d474f961STrond Myklebust lo->plh_return_seq);
129513c13a6aSTrond Myklebust }
129613c13a6aSTrond Myklebust
pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr * lo)129713c13a6aSTrond Myklebust static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
129813c13a6aSTrond Myklebust {
129913c13a6aSTrond Myklebust struct inode *inode= lo->plh_inode;
130013c13a6aSTrond Myklebust
13012370abdaSTrond Myklebust if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
130213c13a6aSTrond Myklebust return;
130313c13a6aSTrond Myklebust spin_lock(&inode->i_lock);
130413c13a6aSTrond Myklebust if (pnfs_layout_need_return(lo)) {
130544ea8dfcSTrond Myklebust const struct cred *cred;
130613c13a6aSTrond Myklebust nfs4_stateid stateid;
130713c13a6aSTrond Myklebust enum pnfs_iomode iomode;
130813c13a6aSTrond Myklebust bool send;
130913c13a6aSTrond Myklebust
131044ea8dfcSTrond Myklebust send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
131113c13a6aSTrond Myklebust spin_unlock(&inode->i_lock);
131213c13a6aSTrond Myklebust if (send) {
131313c13a6aSTrond Myklebust /* Send an async layoutreturn so we dont deadlock */
131444ea8dfcSTrond Myklebust pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
131513c13a6aSTrond Myklebust }
131613c13a6aSTrond Myklebust } else
131713c13a6aSTrond Myklebust spin_unlock(&inode->i_lock);
131813c13a6aSTrond Myklebust }
131913c13a6aSTrond Myklebust
1320293b3b06SAndy Adamson /*
1321293b3b06SAndy Adamson * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
1322293b3b06SAndy Adamson * when the layout segment list is empty.
1323293b3b06SAndy Adamson *
1324293b3b06SAndy Adamson * Note that a pnfs_layout_hdr can exist with an empty layout segment
1325293b3b06SAndy Adamson * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
1326293b3b06SAndy Adamson * deviceid is marked invalid.
1327293b3b06SAndy Adamson */
1328cbe82603SBenny Halevy int
_pnfs_return_layout(struct inode * ino)1329cbe82603SBenny Halevy _pnfs_return_layout(struct inode *ino)
1330cbe82603SBenny Halevy {
1331cbe82603SBenny Halevy struct pnfs_layout_hdr *lo = NULL;
1332cbe82603SBenny Halevy struct nfs_inode *nfsi = NFS_I(ino);
1333a421d218SAnna Schumaker struct pnfs_layout_range range = {
1334a421d218SAnna Schumaker .iomode = IOMODE_ANY,
1335a421d218SAnna Schumaker .offset = 0,
1336a421d218SAnna Schumaker .length = NFS4_MAX_UINT64,
1337a421d218SAnna Schumaker };
1338cbe82603SBenny Halevy LIST_HEAD(tmp_list);
133944ea8dfcSTrond Myklebust const struct cred *cred;
1340cbe82603SBenny Halevy nfs4_stateid stateid;
134124408f52STrond Myklebust int status = 0;
134293b7f7adSOlga Kornievskaia bool send, valid_layout;
1343cbe82603SBenny Halevy
1344366d5052SAndy Adamson dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
1345cbe82603SBenny Halevy
1346cbe82603SBenny Halevy spin_lock(&ino->i_lock);
1347cbe82603SBenny Halevy lo = nfsi->layout;
1348e5929f3cSTrond Myklebust if (!lo) {
1349cbe82603SBenny Halevy spin_unlock(&ino->i_lock);
1350293b3b06SAndy Adamson dprintk("NFS: %s no layout to return\n", __func__);
1351293b3b06SAndy Adamson goto out;
1352cbe82603SBenny Halevy }
1353cbe82603SBenny Halevy /* Reference matched in nfs4_layoutreturn_release */
135470c3bd2bSTrond Myklebust pnfs_get_layout_hdr(lo);
135524408f52STrond Myklebust /* Is there an outstanding layoutreturn ? */
135624408f52STrond Myklebust if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
135724408f52STrond Myklebust spin_unlock(&ino->i_lock);
135824408f52STrond Myklebust if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
135924408f52STrond Myklebust TASK_UNINTERRUPTIBLE))
136024408f52STrond Myklebust goto out_put_layout_hdr;
136124408f52STrond Myklebust spin_lock(&ino->i_lock);
136224408f52STrond Myklebust }
136393b7f7adSOlga Kornievskaia valid_layout = pnfs_layout_is_valid(lo);
136424956804STrond Myklebust pnfs_clear_layoutcommit(ino, &tmp_list);
1365a421d218SAnna Schumaker pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
1366c88953d8SChristoph Hellwig
1367a421d218SAnna Schumaker if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
1368c88953d8SChristoph Hellwig NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
1369c88953d8SChristoph Hellwig
1370293b3b06SAndy Adamson /* Don't send a LAYOUTRETURN if list was initially empty */
137193b7f7adSOlga Kornievskaia if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
137293b7f7adSOlga Kornievskaia !valid_layout) {
1373293b3b06SAndy Adamson spin_unlock(&ino->i_lock);
1374293b3b06SAndy Adamson dprintk("NFS: %s no layout segments to return\n", __func__);
13757bcc1058STrond Myklebust goto out_wait_layoutreturn;
1376293b3b06SAndy Adamson }
137747abadefSChristoph Hellwig
137844ea8dfcSTrond Myklebust send = pnfs_prepare_layoutreturn(lo, &stateid, &cred, NULL);
1379cbe82603SBenny Halevy spin_unlock(&ino->i_lock);
13807f27392cSTrond Myklebust if (send)
138144ea8dfcSTrond Myklebust status = pnfs_send_layoutreturn(lo, &stateid, &cred, IOMODE_ANY, true);
13827bcc1058STrond Myklebust out_wait_layoutreturn:
13837bcc1058STrond Myklebust wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE);
13847f27392cSTrond Myklebust out_put_layout_hdr:
1385ee6625a9STrond Myklebust pnfs_free_lseg_list(&tmp_list);
13867f27392cSTrond Myklebust pnfs_put_layout_hdr(lo);
1387cbe82603SBenny Halevy out:
1388cbe82603SBenny Halevy dprintk("<-- %s status: %d\n", __func__, status);
1389cbe82603SBenny Halevy return status;
1390cbe82603SBenny Halevy }
1391cbe82603SBenny Halevy
139224028672STrond Myklebust int
pnfs_commit_and_return_layout(struct inode * inode)139324028672STrond Myklebust pnfs_commit_and_return_layout(struct inode *inode)
139424028672STrond Myklebust {
139524028672STrond Myklebust struct pnfs_layout_hdr *lo;
139624028672STrond Myklebust int ret;
139724028672STrond Myklebust
139824028672STrond Myklebust spin_lock(&inode->i_lock);
139924028672STrond Myklebust lo = NFS_I(inode)->layout;
140024028672STrond Myklebust if (lo == NULL) {
140124028672STrond Myklebust spin_unlock(&inode->i_lock);
140224028672STrond Myklebust return 0;
140324028672STrond Myklebust }
140424028672STrond Myklebust pnfs_get_layout_hdr(lo);
140524028672STrond Myklebust /* Block new layoutgets and read/write to ds */
140624028672STrond Myklebust lo->plh_block_lgets++;
140724028672STrond Myklebust spin_unlock(&inode->i_lock);
140824028672STrond Myklebust filemap_fdatawait(inode->i_mapping);
140924028672STrond Myklebust ret = pnfs_layoutcommit_inode(inode, true);
141024028672STrond Myklebust if (ret == 0)
141124028672STrond Myklebust ret = _pnfs_return_layout(inode);
141224028672STrond Myklebust spin_lock(&inode->i_lock);
141324028672STrond Myklebust lo->plh_block_lgets--;
141424028672STrond Myklebust spin_unlock(&inode->i_lock);
141524028672STrond Myklebust pnfs_put_layout_hdr(lo);
141624028672STrond Myklebust return ret;
141724028672STrond Myklebust }
141824028672STrond Myklebust
pnfs_roc(struct inode * ino,struct nfs4_layoutreturn_args * args,struct nfs4_layoutreturn_res * res,const struct cred * cred)14191c5bd76dSTrond Myklebust bool pnfs_roc(struct inode *ino,
14201c5bd76dSTrond Myklebust struct nfs4_layoutreturn_args *args,
14211c5bd76dSTrond Myklebust struct nfs4_layoutreturn_res *res,
1422a52458b4SNeilBrown const struct cred *cred)
1423f7e8917aSFred Isaman {
142440dd4b7aSTrond Myklebust struct nfs_inode *nfsi = NFS_I(ino);
142540dd4b7aSTrond Myklebust struct nfs_open_context *ctx;
142640dd4b7aSTrond Myklebust struct nfs4_state *state;
1427f7e8917aSFred Isaman struct pnfs_layout_hdr *lo;
14281c5bd76dSTrond Myklebust struct pnfs_layout_segment *lseg, *next;
142944ea8dfcSTrond Myklebust const struct cred *lc_cred;
1430193e3aa2SPeng Tao nfs4_stateid stateid;
14311c5bd76dSTrond Myklebust enum pnfs_iomode iomode = 0;
14321c5bd76dSTrond Myklebust bool layoutreturn = false, roc = false;
1433e71708d4STrond Myklebust bool skip_read = false;
1434f7e8917aSFred Isaman
14351c5bd76dSTrond Myklebust if (!nfs_have_layout(ino))
14361c5bd76dSTrond Myklebust return false;
143729ade5dbSTrond Myklebust retry:
14380de43976STrond Myklebust rcu_read_lock();
1439f7e8917aSFred Isaman spin_lock(&ino->i_lock);
144040dd4b7aSTrond Myklebust lo = nfsi->layout;
14410cdc329eSTrond Myklebust if (!lo || !pnfs_layout_is_valid(lo) ||
14429c6376ebSTrond Myklebust test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
14439c6376ebSTrond Myklebust lo = NULL;
144440dd4b7aSTrond Myklebust goto out_noroc;
14459c6376ebSTrond Myklebust }
144629ade5dbSTrond Myklebust pnfs_get_layout_hdr(lo);
14479c6376ebSTrond Myklebust if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
144829ade5dbSTrond Myklebust spin_unlock(&ino->i_lock);
14490de43976STrond Myklebust rcu_read_unlock();
145029ade5dbSTrond Myklebust wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
145129ade5dbSTrond Myklebust TASK_UNINTERRUPTIBLE);
145229ade5dbSTrond Myklebust pnfs_put_layout_hdr(lo);
145329ade5dbSTrond Myklebust goto retry;
145429ade5dbSTrond Myklebust }
145540dd4b7aSTrond Myklebust
1456e755d638SPeng Tao /* no roc if we hold a delegation */
1457e71708d4STrond Myklebust if (nfs4_check_delegation(ino, FMODE_READ)) {
1458e71708d4STrond Myklebust if (nfs4_check_delegation(ino, FMODE_WRITE))
145940dd4b7aSTrond Myklebust goto out_noroc;
1460e71708d4STrond Myklebust skip_read = true;
1461e71708d4STrond Myklebust }
146240dd4b7aSTrond Myklebust
14630de43976STrond Myklebust list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
146440dd4b7aSTrond Myklebust state = ctx->state;
1465e71708d4STrond Myklebust if (state == NULL)
1466e71708d4STrond Myklebust continue;
146740dd4b7aSTrond Myklebust /* Don't return layout if there is open file state */
1468e71708d4STrond Myklebust if (state->state & FMODE_WRITE)
146940dd4b7aSTrond Myklebust goto out_noroc;
1470e71708d4STrond Myklebust if (state->state & FMODE_READ)
1471e71708d4STrond Myklebust skip_read = true;
147240dd4b7aSTrond Myklebust }
147340dd4b7aSTrond Myklebust
1474e755d638SPeng Tao
14751c5bd76dSTrond Myklebust list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
1476e71708d4STrond Myklebust if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
1477e71708d4STrond Myklebust continue;
1478e755d638SPeng Tao /* If we are sending layoutreturn, invalidate all valid lsegs */
14791c5bd76dSTrond Myklebust if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
14801c5bd76dSTrond Myklebust continue;
14811c5bd76dSTrond Myklebust /*
14821c5bd76dSTrond Myklebust * Note: mark lseg for return so pnfs_layout_remove_lseg
14831c5bd76dSTrond Myklebust * doesn't invalidate the layout for us.
14841c5bd76dSTrond Myklebust */
14851c5bd76dSTrond Myklebust set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
14861c5bd76dSTrond Myklebust if (!mark_lseg_invalid(lseg, &lo->plh_return_segs))
14871c5bd76dSTrond Myklebust continue;
14881c5bd76dSTrond Myklebust pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
148969820d22STrond Myklebust }
149069820d22STrond Myklebust
14911c5bd76dSTrond Myklebust if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
149269820d22STrond Myklebust goto out_noroc;
149369820d22STrond Myklebust
1494500d701fSPeng Tao /* ROC in two conditions:
1495e755d638SPeng Tao * 1. there are ROC lsegs
1496e755d638SPeng Tao * 2. we don't send layoutreturn
1497e755d638SPeng Tao */
1498500d701fSPeng Tao /* lo ref dropped in pnfs_roc_release() */
149944ea8dfcSTrond Myklebust layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &lc_cred, &iomode);
15001c5bd76dSTrond Myklebust /* If the creds don't match, we can't compound the layoutreturn */
15014d8948c7STrond Myklebust if (!layoutreturn || cred_fscmp(cred, lc_cred) != 0)
15021c5bd76dSTrond Myklebust goto out_noroc;
15031c5bd76dSTrond Myklebust
15041c5bd76dSTrond Myklebust roc = layoutreturn;
15051c5bd76dSTrond Myklebust pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
15061c5bd76dSTrond Myklebust res->lrs_present = 0;
15071c5bd76dSTrond Myklebust layoutreturn = false;
150844ea8dfcSTrond Myklebust put_cred(lc_cred);
15094d8948c7STrond Myklebust
1510e755d638SPeng Tao out_noroc:
1511f7e8917aSFred Isaman spin_unlock(&ino->i_lock);
15120de43976STrond Myklebust rcu_read_unlock();
15137140171eSTrond Myklebust pnfs_layoutcommit_inode(ino, true);
1514287bd3e9STrond Myklebust if (roc) {
1515287bd3e9STrond Myklebust struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
1516287bd3e9STrond Myklebust if (ld->prepare_layoutreturn)
1517287bd3e9STrond Myklebust ld->prepare_layoutreturn(args);
15189c6376ebSTrond Myklebust pnfs_put_layout_hdr(lo);
1519287bd3e9STrond Myklebust return true;
1520287bd3e9STrond Myklebust }
1521e755d638SPeng Tao if (layoutreturn)
152244ea8dfcSTrond Myklebust pnfs_send_layoutreturn(lo, &stateid, &lc_cred, iomode, true);
15239c6376ebSTrond Myklebust pnfs_put_layout_hdr(lo);
1524287bd3e9STrond Myklebust return false;
1525f7e8917aSFred Isaman }
1526f7e8917aSFred Isaman
pnfs_roc_done(struct rpc_task * task,struct nfs4_layoutreturn_args ** argpp,struct nfs4_layoutreturn_res ** respp,int * ret)1527078000d0STrond Myklebust int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
1528078000d0STrond Myklebust struct nfs4_layoutreturn_res **respp, int *ret)
1529287a9c55STrond Myklebust {
1530287a9c55STrond Myklebust struct nfs4_layoutreturn_args *arg = *argpp;
1531287a9c55STrond Myklebust int retval = -EAGAIN;
1532287a9c55STrond Myklebust
1533287a9c55STrond Myklebust if (!arg)
1534287a9c55STrond Myklebust return 0;
1535287a9c55STrond Myklebust /* Handle Layoutreturn errors */
1536287a9c55STrond Myklebust switch (*ret) {
1537287a9c55STrond Myklebust case 0:
1538287a9c55STrond Myklebust retval = 0;
1539287a9c55STrond Myklebust break;
15406109bcf7STrond Myklebust case -NFS4ERR_NOMATCHING_LAYOUT:
15416109bcf7STrond Myklebust /* Was there an RPC level error? If not, retry */
15426109bcf7STrond Myklebust if (task->tk_rpc_status == 0)
15436109bcf7STrond Myklebust break;
15446109bcf7STrond Myklebust /* If the call was not sent, let caller handle it */
15456109bcf7STrond Myklebust if (!RPC_WAS_SENT(task))
15466109bcf7STrond Myklebust return 0;
15476109bcf7STrond Myklebust /*
15486109bcf7STrond Myklebust * Otherwise, assume the call succeeded and
15496109bcf7STrond Myklebust * that we need to release the layout
15506109bcf7STrond Myklebust */
15516109bcf7STrond Myklebust *ret = 0;
15526109bcf7STrond Myklebust (*respp)->lrs_present = 0;
15536109bcf7STrond Myklebust retval = 0;
15546109bcf7STrond Myklebust break;
1555078a432dSTrond Myklebust case -NFS4ERR_DELAY:
1556078a432dSTrond Myklebust /* Let the caller handle the retry */
1557078a432dSTrond Myklebust *ret = -NFS4ERR_NOMATCHING_LAYOUT;
1558078a432dSTrond Myklebust return 0;
1559287a9c55STrond Myklebust case -NFS4ERR_OLD_STATEID:
156030cb3ee2STrond Myklebust if (!nfs4_layout_refresh_old_stateid(&arg->stateid,
1561078000d0STrond Myklebust &arg->range, arg->inode))
1562287a9c55STrond Myklebust break;
1563287a9c55STrond Myklebust *ret = -NFS4ERR_NOMATCHING_LAYOUT;
1564287a9c55STrond Myklebust return -EAGAIN;
1565287a9c55STrond Myklebust }
1566287a9c55STrond Myklebust *argpp = NULL;
1567287a9c55STrond Myklebust *respp = NULL;
1568287a9c55STrond Myklebust return retval;
1569287a9c55STrond Myklebust }
1570287a9c55STrond Myklebust
pnfs_roc_release(struct nfs4_layoutreturn_args * args,struct nfs4_layoutreturn_res * res,int ret)15711c5bd76dSTrond Myklebust void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
15721c5bd76dSTrond Myklebust struct nfs4_layoutreturn_res *res,
15731c5bd76dSTrond Myklebust int ret)
1574f7e8917aSFred Isaman {
15751c5bd76dSTrond Myklebust struct pnfs_layout_hdr *lo = args->layout;
157667bbceedSTrond Myklebust struct inode *inode = args->inode;
15771c5bd76dSTrond Myklebust const nfs4_stateid *res_stateid = NULL;
1578287bd3e9STrond Myklebust struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
1579f7e8917aSFred Isaman
15809c47b18cSTrond Myklebust switch (ret) {
15819c47b18cSTrond Myklebust case -NFS4ERR_NOMATCHING_LAYOUT:
158267bbceedSTrond Myklebust spin_lock(&inode->i_lock);
158367bbceedSTrond Myklebust if (pnfs_layout_is_valid(lo) &&
158467bbceedSTrond Myklebust nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
158567bbceedSTrond Myklebust pnfs_set_plh_return_info(lo, args->range.iomode, 0);
1586c18d1e17STrond Myklebust pnfs_clear_layoutreturn_waitbit(lo);
158767bbceedSTrond Myklebust spin_unlock(&inode->i_lock);
15889c47b18cSTrond Myklebust break;
15899c47b18cSTrond Myklebust case 0:
15901c5bd76dSTrond Myklebust if (res->lrs_present)
15911c5bd76dSTrond Myklebust res_stateid = &res->stateid;
1592df561f66SGustavo A. R. Silva fallthrough;
15939c47b18cSTrond Myklebust default:
1594c18d1e17STrond Myklebust pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range,
1595c18d1e17STrond Myklebust res_stateid);
1596f7e8917aSFred Isaman }
1597a19b4785STrond Myklebust trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret);
1598287bd3e9STrond Myklebust if (ld_private && ld_private->ops && ld_private->ops->free)
1599287bd3e9STrond Myklebust ld_private->ops->free(ld_private);
16001c5bd76dSTrond Myklebust pnfs_put_layout_hdr(lo);
1601f7e8917aSFred Isaman }
1602f7e8917aSFred Isaman
pnfs_wait_on_layoutreturn(struct inode * ino,struct rpc_task * task)1603500d701fSPeng Tao bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
1604500d701fSPeng Tao {
1605500d701fSPeng Tao struct nfs_inode *nfsi = NFS_I(ino);
1606500d701fSPeng Tao struct pnfs_layout_hdr *lo;
1607500d701fSPeng Tao bool sleep = false;
1608500d701fSPeng Tao
1609500d701fSPeng Tao /* we might not have grabbed lo reference. so need to check under
1610500d701fSPeng Tao * i_lock */
1611500d701fSPeng Tao spin_lock(&ino->i_lock);
1612500d701fSPeng Tao lo = nfsi->layout;
1613ee284e35STrond Myklebust if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
1614500d701fSPeng Tao rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
1615ee284e35STrond Myklebust sleep = true;
1616ee284e35STrond Myklebust }
1617ee284e35STrond Myklebust spin_unlock(&ino->i_lock);
1618500d701fSPeng Tao return sleep;
1619500d701fSPeng Tao }
1620500d701fSPeng Tao
1621b1f69b75SAndy Adamson /*
1622b1f69b75SAndy Adamson * Compare two layout segments for sorting into layout cache.
1623b1f69b75SAndy Adamson * We want to preferentially return RW over RO layouts, so ensure those
1624b1f69b75SAndy Adamson * are seen first.
1625b1f69b75SAndy Adamson */
1626b1f69b75SAndy Adamson static s64
pnfs_lseg_range_cmp(const struct pnfs_layout_range * l1,const struct pnfs_layout_range * l2)16277dc0ac70STrond Myklebust pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
16283cb2df17STrond Myklebust const struct pnfs_layout_range *l2)
1629b1f69b75SAndy Adamson {
1630fb3296ebSBenny Halevy s64 d;
1631fb3296ebSBenny Halevy
1632fb3296ebSBenny Halevy /* high offset > low offset */
1633fb3296ebSBenny Halevy d = l1->offset - l2->offset;
1634fb3296ebSBenny Halevy if (d)
1635fb3296ebSBenny Halevy return d;
1636fb3296ebSBenny Halevy
1637fb3296ebSBenny Halevy /* short length > long length */
1638fb3296ebSBenny Halevy d = l2->length - l1->length;
1639fb3296ebSBenny Halevy if (d)
1640fb3296ebSBenny Halevy return d;
1641fb3296ebSBenny Halevy
1642b1f69b75SAndy Adamson /* read > read/write */
1643fb3296ebSBenny Halevy return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
1644b1f69b75SAndy Adamson }
1645b1f69b75SAndy Adamson
164603772d2fSTrond Myklebust static bool
pnfs_lseg_range_is_after(const struct pnfs_layout_range * l1,const struct pnfs_layout_range * l2)164703772d2fSTrond Myklebust pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1,
164803772d2fSTrond Myklebust const struct pnfs_layout_range *l2)
1649974cec8cSAndy Adamson {
165003772d2fSTrond Myklebust return pnfs_lseg_range_cmp(l1, l2) > 0;
165103772d2fSTrond Myklebust }
165203772d2fSTrond Myklebust
165303772d2fSTrond Myklebust static bool
pnfs_lseg_no_merge(struct pnfs_layout_segment * lseg,struct pnfs_layout_segment * old)165403772d2fSTrond Myklebust pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg,
165503772d2fSTrond Myklebust struct pnfs_layout_segment *old)
165603772d2fSTrond Myklebust {
165703772d2fSTrond Myklebust return false;
165803772d2fSTrond Myklebust }
165903772d2fSTrond Myklebust
166003772d2fSTrond Myklebust void
pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg,bool (* is_after)(const struct pnfs_layout_range *,const struct pnfs_layout_range *),bool (* do_merge)(struct pnfs_layout_segment *,struct pnfs_layout_segment *),struct list_head * free_me)166103772d2fSTrond Myklebust pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo,
166203772d2fSTrond Myklebust struct pnfs_layout_segment *lseg,
166303772d2fSTrond Myklebust bool (*is_after)(const struct pnfs_layout_range *,
166403772d2fSTrond Myklebust const struct pnfs_layout_range *),
166503772d2fSTrond Myklebust bool (*do_merge)(struct pnfs_layout_segment *,
166603772d2fSTrond Myklebust struct pnfs_layout_segment *),
166703772d2fSTrond Myklebust struct list_head *free_me)
166803772d2fSTrond Myklebust {
166903772d2fSTrond Myklebust struct pnfs_layout_segment *lp, *tmp;
1670b1f69b75SAndy Adamson
1671974cec8cSAndy Adamson dprintk("%s:Begin\n", __func__);
1672974cec8cSAndy Adamson
167303772d2fSTrond Myklebust list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) {
167403772d2fSTrond Myklebust if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0)
167503772d2fSTrond Myklebust continue;
167603772d2fSTrond Myklebust if (do_merge(lseg, lp)) {
167703772d2fSTrond Myklebust mark_lseg_invalid(lp, free_me);
167803772d2fSTrond Myklebust continue;
167903772d2fSTrond Myklebust }
168003772d2fSTrond Myklebust if (is_after(&lseg->pls_range, &lp->pls_range))
1681b1f69b75SAndy Adamson continue;
1682566052c5SFred Isaman list_add_tail(&lseg->pls_list, &lp->pls_list);
1683b1f69b75SAndy Adamson dprintk("%s: inserted lseg %p "
1684b1f69b75SAndy Adamson "iomode %d offset %llu length %llu before "
1685b1f69b75SAndy Adamson "lp %p iomode %d offset %llu length %llu\n",
1686566052c5SFred Isaman __func__, lseg, lseg->pls_range.iomode,
1687566052c5SFred Isaman lseg->pls_range.offset, lseg->pls_range.length,
1688566052c5SFred Isaman lp, lp->pls_range.iomode, lp->pls_range.offset,
1689566052c5SFred Isaman lp->pls_range.length);
1690fb3296ebSBenny Halevy goto out;
1691974cec8cSAndy Adamson }
1692b7edfaa1SFred Isaman list_add_tail(&lseg->pls_list, &lo->plh_segs);
1693b1f69b75SAndy Adamson dprintk("%s: inserted lseg %p "
1694b1f69b75SAndy Adamson "iomode %d offset %llu length %llu at tail\n",
1695566052c5SFred Isaman __func__, lseg, lseg->pls_range.iomode,
1696566052c5SFred Isaman lseg->pls_range.offset, lseg->pls_range.length);
1697fb3296ebSBenny Halevy out:
169870c3bd2bSTrond Myklebust pnfs_get_layout_hdr(lo);
1699974cec8cSAndy Adamson
1700974cec8cSAndy Adamson dprintk("%s:Return\n", __func__);
1701974cec8cSAndy Adamson }
170203772d2fSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg);
170303772d2fSTrond Myklebust
170403772d2fSTrond Myklebust static void
pnfs_layout_insert_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_segment * lseg,struct list_head * free_me)170503772d2fSTrond Myklebust pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
170603772d2fSTrond Myklebust struct pnfs_layout_segment *lseg,
170703772d2fSTrond Myklebust struct list_head *free_me)
170803772d2fSTrond Myklebust {
170903772d2fSTrond Myklebust struct inode *inode = lo->plh_inode;
171003772d2fSTrond Myklebust struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
171103772d2fSTrond Myklebust
171203772d2fSTrond Myklebust if (ld->add_lseg != NULL)
171303772d2fSTrond Myklebust ld->add_lseg(lo, lseg, free_me);
171403772d2fSTrond Myklebust else
171503772d2fSTrond Myklebust pnfs_generic_layout_insert_lseg(lo, lseg,
171603772d2fSTrond Myklebust pnfs_lseg_range_is_after,
171703772d2fSTrond Myklebust pnfs_lseg_no_merge,
171803772d2fSTrond Myklebust free_me);
171903772d2fSTrond Myklebust }
1720e5e94017SBenny Halevy
1721e5e94017SBenny Halevy static struct pnfs_layout_hdr *
alloc_init_layout_hdr(struct inode * ino,struct nfs_open_context * ctx,gfp_t gfp_flags)17229fa40758SPeng Tao alloc_init_layout_hdr(struct inode *ino,
17239fa40758SPeng Tao struct nfs_open_context *ctx,
17249fa40758SPeng Tao gfp_t gfp_flags)
1725e5e94017SBenny Halevy {
1726e5e94017SBenny Halevy struct pnfs_layout_hdr *lo;
1727e5e94017SBenny Halevy
1728636fb9c8SBenny Halevy lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
1729e5e94017SBenny Halevy if (!lo)
1730e5e94017SBenny Halevy return NULL;
17312b28a7beSElena Reshetova refcount_set(&lo->plh_refcount, 1);
1732b7edfaa1SFred Isaman INIT_LIST_HEAD(&lo->plh_layouts);
1733b7edfaa1SFred Isaman INIT_LIST_HEAD(&lo->plh_segs);
173468f74479STrond Myklebust INIT_LIST_HEAD(&lo->plh_return_segs);
1735fd9a8d71STrond Myklebust INIT_LIST_HEAD(&lo->plh_bulk_destroy);
1736b7edfaa1SFred Isaman lo->plh_inode = ino;
1737a52458b4SNeilBrown lo->plh_lc_cred = get_cred(ctx->cred);
173867a3b721STrond Myklebust lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
1739e5e94017SBenny Halevy return lo;
1740e5e94017SBenny Halevy }
1741e5e94017SBenny Halevy
1742e5e94017SBenny Halevy static struct pnfs_layout_hdr *
pnfs_find_alloc_layout(struct inode * ino,struct nfs_open_context * ctx,gfp_t gfp_flags)17439fa40758SPeng Tao pnfs_find_alloc_layout(struct inode *ino,
17449fa40758SPeng Tao struct nfs_open_context *ctx,
17459fa40758SPeng Tao gfp_t gfp_flags)
1746e5241e43STrond Myklebust __releases(&ino->i_lock)
1747e5241e43STrond Myklebust __acquires(&ino->i_lock)
1748e5e94017SBenny Halevy {
1749e5e94017SBenny Halevy struct nfs_inode *nfsi = NFS_I(ino);
1750e5e94017SBenny Halevy struct pnfs_layout_hdr *new = NULL;
1751e5e94017SBenny Halevy
1752e5e94017SBenny Halevy dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
1753e5e94017SBenny Halevy
1754251ec410STrond Myklebust if (nfsi->layout != NULL)
1755251ec410STrond Myklebust goto out_existing;
1756e5e94017SBenny Halevy spin_unlock(&ino->i_lock);
17579fa40758SPeng Tao new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
1758e5e94017SBenny Halevy spin_lock(&ino->i_lock);
1759e5e94017SBenny Halevy
1760251ec410STrond Myklebust if (likely(nfsi->layout == NULL)) { /* Won the race? */
1761e5e94017SBenny Halevy nfsi->layout = new;
1762251ec410STrond Myklebust return new;
17637175fe90SYanchuan Nian } else if (new != NULL)
1764636fb9c8SBenny Halevy pnfs_free_layout_hdr(new);
1765251ec410STrond Myklebust out_existing:
1766251ec410STrond Myklebust pnfs_get_layout_hdr(nfsi->layout);
1767e5e94017SBenny Halevy return nfsi->layout;
1768e5e94017SBenny Halevy }
1769e5e94017SBenny Halevy
1770b1f69b75SAndy Adamson /*
1771b1f69b75SAndy Adamson * iomode matching rules:
1772c7d73af2STom Haynes * iomode lseg strict match
1773c7d73af2STom Haynes * iomode
1774c7d73af2STom Haynes * ----- ----- ------ -----
1775c7d73af2STom Haynes * ANY READ N/A true
1776c7d73af2STom Haynes * ANY RW N/A true
1777c7d73af2STom Haynes * RW READ N/A false
1778c7d73af2STom Haynes * RW RW N/A true
1779c7d73af2STom Haynes * READ READ N/A true
1780c7d73af2STom Haynes * READ RW true false
1781c7d73af2STom Haynes * READ RW false true
1782b1f69b75SAndy Adamson */
17833cb2df17STrond Myklebust static bool
pnfs_lseg_range_match(const struct pnfs_layout_range * ls_range,const struct pnfs_layout_range * range,bool strict_iomode)17847dc0ac70STrond Myklebust pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
1785c7d73af2STom Haynes const struct pnfs_layout_range *range,
1786c7d73af2STom Haynes bool strict_iomode)
1787b1f69b75SAndy Adamson {
1788fb3296ebSBenny Halevy struct pnfs_layout_range range1;
1789fb3296ebSBenny Halevy
1790fb3296ebSBenny Halevy if ((range->iomode == IOMODE_RW &&
1791fb3296ebSBenny Halevy ls_range->iomode != IOMODE_RW) ||
1792c7d73af2STom Haynes (range->iomode != ls_range->iomode &&
17936089dd0dSThomas Meyer strict_iomode) ||
17947dc0ac70STrond Myklebust !pnfs_lseg_range_intersecting(ls_range, range))
179510db5b7aSGustavo A. R. Silva return false;
1796fb3296ebSBenny Halevy
1797fb3296ebSBenny Halevy /* range1 covers only the first byte in the range */
1798fb3296ebSBenny Halevy range1 = *range;
1799fb3296ebSBenny Halevy range1.length = 1;
18007dc0ac70STrond Myklebust return pnfs_lseg_range_contained(ls_range, &range1);
1801b1f69b75SAndy Adamson }
1802b1f69b75SAndy Adamson
1803b1f69b75SAndy Adamson /*
1804b1f69b75SAndy Adamson * lookup range in layout
1805b1f69b75SAndy Adamson */
1806e5e94017SBenny Halevy static struct pnfs_layout_segment *
pnfs_find_lseg(struct pnfs_layout_hdr * lo,struct pnfs_layout_range * range,bool strict_iomode)1807fb3296ebSBenny Halevy pnfs_find_lseg(struct pnfs_layout_hdr *lo,
1808c7d73af2STom Haynes struct pnfs_layout_range *range,
1809c7d73af2STom Haynes bool strict_iomode)
1810e5e94017SBenny Halevy {
1811b1f69b75SAndy Adamson struct pnfs_layout_segment *lseg, *ret = NULL;
1812b1f69b75SAndy Adamson
1813b1f69b75SAndy Adamson dprintk("%s:Begin\n", __func__);
1814b1f69b75SAndy Adamson
1815b7edfaa1SFred Isaman list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
18164541d16cSFred Isaman if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
1817c7d73af2STom Haynes pnfs_lseg_range_match(&lseg->pls_range, range,
1818c7d73af2STom Haynes strict_iomode)) {
18199369a431STrond Myklebust ret = pnfs_get_lseg(lseg);
1820b1f69b75SAndy Adamson break;
1821b1f69b75SAndy Adamson }
1822b1f69b75SAndy Adamson }
1823b1f69b75SAndy Adamson
1824b1f69b75SAndy Adamson dprintk("%s:Return lseg %p ref %d\n",
1825eba6dd69SElena Reshetova __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
1826b1f69b75SAndy Adamson return ret;
1827e5e94017SBenny Halevy }
1828e5e94017SBenny Halevy
1829e5e94017SBenny Halevy /*
1830d23d61c8SAndy Adamson * Use mdsthreshold hints set at each OPEN to determine if I/O should go
1831d23d61c8SAndy Adamson * to the MDS or over pNFS
1832d23d61c8SAndy Adamson *
1833d23d61c8SAndy Adamson * The nfs_inode read_io and write_io fields are cumulative counters reset
1834d23d61c8SAndy Adamson * when there are no layout segments. Note that in pnfs_update_layout iomode
1835d23d61c8SAndy Adamson * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
1836d23d61c8SAndy Adamson * WRITE request.
1837d23d61c8SAndy Adamson *
1838d23d61c8SAndy Adamson * A return of true means use MDS I/O.
1839d23d61c8SAndy Adamson *
1840d23d61c8SAndy Adamson * From rfc 5661:
1841d23d61c8SAndy Adamson * If a file's size is smaller than the file size threshold, data accesses
1842d23d61c8SAndy Adamson * SHOULD be sent to the metadata server. If an I/O request has a length that
1843d23d61c8SAndy Adamson * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
1844d23d61c8SAndy Adamson * server. If both file size and I/O size are provided, the client SHOULD
1845d23d61c8SAndy Adamson * reach or exceed both thresholds before sending its read or write
1846d23d61c8SAndy Adamson * requests to the data server.
1847d23d61c8SAndy Adamson */
pnfs_within_mdsthreshold(struct nfs_open_context * ctx,struct inode * ino,int iomode)1848d23d61c8SAndy Adamson static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
1849d23d61c8SAndy Adamson struct inode *ino, int iomode)
1850d23d61c8SAndy Adamson {
1851d23d61c8SAndy Adamson struct nfs4_threshold *t = ctx->mdsthreshold;
1852d23d61c8SAndy Adamson struct nfs_inode *nfsi = NFS_I(ino);
1853d23d61c8SAndy Adamson loff_t fsize = i_size_read(ino);
1854d23d61c8SAndy Adamson bool size = false, size_set = false, io = false, io_set = false, ret = false;
1855d23d61c8SAndy Adamson
1856d23d61c8SAndy Adamson if (t == NULL)
1857d23d61c8SAndy Adamson return ret;
1858d23d61c8SAndy Adamson
1859d23d61c8SAndy Adamson dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
1860d23d61c8SAndy Adamson __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
1861d23d61c8SAndy Adamson
1862d23d61c8SAndy Adamson switch (iomode) {
1863d23d61c8SAndy Adamson case IOMODE_READ:
1864d23d61c8SAndy Adamson if (t->bm & THRESHOLD_RD) {
1865d23d61c8SAndy Adamson dprintk("%s fsize %llu\n", __func__, fsize);
1866d23d61c8SAndy Adamson size_set = true;
1867d23d61c8SAndy Adamson if (fsize < t->rd_sz)
1868d23d61c8SAndy Adamson size = true;
1869d23d61c8SAndy Adamson }
1870d23d61c8SAndy Adamson if (t->bm & THRESHOLD_RD_IO) {
1871d23d61c8SAndy Adamson dprintk("%s nfsi->read_io %llu\n", __func__,
1872d23d61c8SAndy Adamson nfsi->read_io);
1873d23d61c8SAndy Adamson io_set = true;
1874d23d61c8SAndy Adamson if (nfsi->read_io < t->rd_io_sz)
1875d23d61c8SAndy Adamson io = true;
1876d23d61c8SAndy Adamson }
1877d23d61c8SAndy Adamson break;
1878d23d61c8SAndy Adamson case IOMODE_RW:
1879d23d61c8SAndy Adamson if (t->bm & THRESHOLD_WR) {
1880d23d61c8SAndy Adamson dprintk("%s fsize %llu\n", __func__, fsize);
1881d23d61c8SAndy Adamson size_set = true;
1882d23d61c8SAndy Adamson if (fsize < t->wr_sz)
1883d23d61c8SAndy Adamson size = true;
1884d23d61c8SAndy Adamson }
1885d23d61c8SAndy Adamson if (t->bm & THRESHOLD_WR_IO) {
1886d23d61c8SAndy Adamson dprintk("%s nfsi->write_io %llu\n", __func__,
1887d23d61c8SAndy Adamson nfsi->write_io);
1888d23d61c8SAndy Adamson io_set = true;
1889d23d61c8SAndy Adamson if (nfsi->write_io < t->wr_io_sz)
1890d23d61c8SAndy Adamson io = true;
1891d23d61c8SAndy Adamson }
1892d23d61c8SAndy Adamson break;
1893d23d61c8SAndy Adamson }
1894d23d61c8SAndy Adamson if (size_set && io_set) {
1895d23d61c8SAndy Adamson if (size && io)
1896d23d61c8SAndy Adamson ret = true;
1897d23d61c8SAndy Adamson } else if (size || io)
1898d23d61c8SAndy Adamson ret = true;
1899d23d61c8SAndy Adamson
1900d23d61c8SAndy Adamson dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1901d23d61c8SAndy Adamson return ret;
1902d23d61c8SAndy Adamson }
1903d23d61c8SAndy Adamson
pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr * lo)1904d03360aaSTrond Myklebust static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
1905aa8a45eeSPeng Tao {
1906aa8a45eeSPeng Tao /*
1907aa8a45eeSPeng Tao * send layoutcommit as it can hold up layoutreturn due to lseg
1908aa8a45eeSPeng Tao * reference
1909aa8a45eeSPeng Tao */
1910aa8a45eeSPeng Tao pnfs_layoutcommit_inode(lo->plh_inode, false);
1911d03360aaSTrond Myklebust return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
19122e5b29f0STrond Myklebust nfs_wait_bit_killable,
1913f5d39b02SPeter Zijlstra TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
1914aa8a45eeSPeng Tao }
1915aa8a45eeSPeng Tao
nfs_layoutget_begin(struct pnfs_layout_hdr * lo)1916411ae722STrond Myklebust static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
1917411ae722STrond Myklebust {
1918411ae722STrond Myklebust atomic_inc(&lo->plh_outstanding);
1919411ae722STrond Myklebust }
1920411ae722STrond Myklebust
nfs_layoutget_end(struct pnfs_layout_hdr * lo)1921411ae722STrond Myklebust static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
1922411ae722STrond Myklebust {
1923880265c7STrond Myklebust if (atomic_dec_and_test(&lo->plh_outstanding) &&
1924880265c7STrond Myklebust test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
1925880265c7STrond Myklebust wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
1926411ae722STrond Myklebust }
1927411ae722STrond Myklebust
pnfs_is_first_layoutget(struct pnfs_layout_hdr * lo)1928d29b468dSTrond Myklebust static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
1929d29b468dSTrond Myklebust {
1930d29b468dSTrond Myklebust return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags);
1931d29b468dSTrond Myklebust }
1932d29b468dSTrond Myklebust
pnfs_clear_first_layoutget(struct pnfs_layout_hdr * lo)1933d67ae825STom Haynes static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
1934d67ae825STom Haynes {
1935d67ae825STom Haynes unsigned long *bitlock = &lo->plh_flags;
1936d67ae825STom Haynes
1937d67ae825STom Haynes clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
1938d67ae825STom Haynes smp_mb__after_atomic();
1939d67ae825STom Haynes wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
1940d67ae825STom Haynes }
1941d67ae825STom Haynes
_add_to_server_list(struct pnfs_layout_hdr * lo,struct nfs_server * server)194278746a38SFred Isaman static void _add_to_server_list(struct pnfs_layout_hdr *lo,
194378746a38SFred Isaman struct nfs_server *server)
194478746a38SFred Isaman {
1945cf6605d1STrond Myklebust if (!test_and_set_bit(NFS_LAYOUT_HASHED, &lo->plh_flags)) {
194678746a38SFred Isaman struct nfs_client *clp = server->nfs_client;
194778746a38SFred Isaman
194878746a38SFred Isaman /* The lo must be on the clp list if there is any
194978746a38SFred Isaman * chance of a CB_LAYOUTRECALL(FILE) coming in.
195078746a38SFred Isaman */
195178746a38SFred Isaman spin_lock(&clp->cl_lock);
1952cf6605d1STrond Myklebust list_add_tail_rcu(&lo->plh_layouts, &server->layouts);
195378746a38SFred Isaman spin_unlock(&clp->cl_lock);
195478746a38SFred Isaman }
195578746a38SFred Isaman }
195678746a38SFred Isaman
1957d23d61c8SAndy Adamson /*
1958e5e94017SBenny Halevy * Layout segment is retreived from the server if not cached.
1959e5e94017SBenny Halevy * The appropriate layout segment is referenced and returned to the caller.
1960e5e94017SBenny Halevy */
19617c24d948SAndy Adamson struct pnfs_layout_segment *
pnfs_update_layout(struct inode * ino,struct nfs_open_context * ctx,loff_t pos,u64 count,enum pnfs_iomode iomode,bool strict_iomode,gfp_t gfp_flags)1962e5e94017SBenny Halevy pnfs_update_layout(struct inode *ino,
1963e5e94017SBenny Halevy struct nfs_open_context *ctx,
1964fb3296ebSBenny Halevy loff_t pos,
1965fb3296ebSBenny Halevy u64 count,
1966a75b9df9STrond Myklebust enum pnfs_iomode iomode,
1967c7d73af2STom Haynes bool strict_iomode,
1968a75b9df9STrond Myklebust gfp_t gfp_flags)
1969e5e94017SBenny Halevy {
1970fb3296ebSBenny Halevy struct pnfs_layout_range arg = {
1971fb3296ebSBenny Halevy .iomode = iomode,
1972fb3296ebSBenny Halevy .offset = pos,
1973fb3296ebSBenny Halevy .length = count,
1974fb3296ebSBenny Halevy };
197570d2f7b1STrond Myklebust unsigned pg_offset;
19766382a441SWeston Andros Adamson struct nfs_server *server = NFS_SERVER(ino);
19776382a441SWeston Andros Adamson struct nfs_client *clp = server->nfs_client;
1978183d9e7bSJeff Layton struct pnfs_layout_hdr *lo = NULL;
1979e5e94017SBenny Halevy struct pnfs_layout_segment *lseg = NULL;
1980587f03deSFred Isaman struct nfs4_layoutget *lgp;
1981183d9e7bSJeff Layton nfs4_stateid stateid;
1982183d9e7bSJeff Layton long timeout = 0;
198366b53f32STrond Myklebust unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
198430005121SWeston Andros Adamson bool first;
1985e5e94017SBenny Halevy
19869a4bf31dSJeff Layton if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
1987183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
19889a4bf31dSJeff Layton PNFS_UPDATE_LAYOUT_NO_PNFS);
1989f86bbcf8STrond Myklebust goto out;
19909a4bf31dSJeff Layton }
1991d23d61c8SAndy Adamson
19929a4bf31dSJeff Layton if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
1993183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
19949a4bf31dSJeff Layton PNFS_UPDATE_LAYOUT_MDSTHRESH);
1995f86bbcf8STrond Myklebust goto out;
19969a4bf31dSJeff Layton }
1997d23d61c8SAndy Adamson
19989bf87482SPeng Tao lookup_again:
1999fbc63fb1SNeilBrown if (!nfs4_valid_open_stateid(ctx->state)) {
2000fbc63fb1SNeilBrown trace_pnfs_update_layout(ino, pos, count,
2001fbc63fb1SNeilBrown iomode, lo, lseg,
2002fbc63fb1SNeilBrown PNFS_UPDATE_LAYOUT_INVALID_OPEN);
2003fbc63fb1SNeilBrown lseg = ERR_PTR(-EIO);
2004fbc63fb1SNeilBrown goto out;
2005fbc63fb1SNeilBrown }
2006fbc63fb1SNeilBrown
2007d03360aaSTrond Myklebust lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
2008d03360aaSTrond Myklebust if (IS_ERR(lseg))
2009d03360aaSTrond Myklebust goto out;
20109bf87482SPeng Tao first = false;
2011e5e94017SBenny Halevy spin_lock(&ino->i_lock);
20129fa40758SPeng Tao lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
2013830ffb56STrond Myklebust if (lo == NULL) {
2014830ffb56STrond Myklebust spin_unlock(&ino->i_lock);
20153764a17eSTrond Myklebust lseg = ERR_PTR(-ENOMEM);
2016183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20179a4bf31dSJeff Layton PNFS_UPDATE_LAYOUT_NOMEM);
2018830ffb56STrond Myklebust goto out;
2019830ffb56STrond Myklebust }
2020e5e94017SBenny Halevy
202143f1b3daSFred Isaman /* Do we even need to bother with this? */
2022a59c30acSTrond Myklebust if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
2023183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20249a4bf31dSJeff Layton PNFS_UPDATE_LAYOUT_BULK_RECALL);
202543f1b3daSFred Isaman dprintk("%s matches recall, use MDS\n", __func__);
2026e5e94017SBenny Halevy goto out_unlock;
2027e5e94017SBenny Halevy }
2028e5e94017SBenny Halevy
2029e5e94017SBenny Halevy /* if LAYOUTGET already failed once we don't try again */
20302e5b29f0STrond Myklebust if (pnfs_layout_io_test_failed(lo, iomode)) {
2031183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20329a4bf31dSJeff Layton PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
2033e5e94017SBenny Halevy goto out_unlock;
20349a4bf31dSJeff Layton }
2035e5e94017SBenny Halevy
2036411ae722STrond Myklebust /*
2037411ae722STrond Myklebust * If the layout segment list is empty, but there are outstanding
2038411ae722STrond Myklebust * layoutget calls, then they might be subject to a layoutrecall.
2039411ae722STrond Myklebust */
2040880265c7STrond Myklebust if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
2041411ae722STrond Myklebust atomic_read(&lo->plh_outstanding) != 0) {
2042411ae722STrond Myklebust spin_unlock(&ino->i_lock);
2043880265c7STrond Myklebust lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
2044880265c7STrond Myklebust TASK_KILLABLE));
204558bbeab4STrond Myklebust if (IS_ERR(lseg))
2046411ae722STrond Myklebust goto out_put_layout_hdr;
2047411ae722STrond Myklebust pnfs_put_layout_hdr(lo);
2048411ae722STrond Myklebust goto lookup_again;
2049411ae722STrond Myklebust }
2050411ae722STrond Myklebust
20512c8d5fc3STrond Myklebust /*
20522c8d5fc3STrond Myklebust * Because we free lsegs when sending LAYOUTRETURN, we need to wait
20532c8d5fc3STrond Myklebust * for LAYOUTRETURN.
20542c8d5fc3STrond Myklebust */
20552c8d5fc3STrond Myklebust if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
20562c8d5fc3STrond Myklebust spin_unlock(&ino->i_lock);
20572c8d5fc3STrond Myklebust dprintk("%s wait for layoutreturn\n", __func__);
20582c8d5fc3STrond Myklebust lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo));
20592c8d5fc3STrond Myklebust if (!IS_ERR(lseg)) {
20602c8d5fc3STrond Myklebust pnfs_put_layout_hdr(lo);
20612c8d5fc3STrond Myklebust dprintk("%s retrying\n", __func__);
20622c8d5fc3STrond Myklebust trace_pnfs_update_layout(ino, pos, count, iomode, lo,
20632c8d5fc3STrond Myklebust lseg,
20642c8d5fc3STrond Myklebust PNFS_UPDATE_LAYOUT_RETRY);
20652c8d5fc3STrond Myklebust goto lookup_again;
20662c8d5fc3STrond Myklebust }
20672c8d5fc3STrond Myklebust trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
20682c8d5fc3STrond Myklebust PNFS_UPDATE_LAYOUT_RETURN);
20692c8d5fc3STrond Myklebust goto out_put_layout_hdr;
20702c8d5fc3STrond Myklebust }
20712c8d5fc3STrond Myklebust
2072c7d73af2STom Haynes lseg = pnfs_find_lseg(lo, &arg, strict_iomode);
2073183d9e7bSJeff Layton if (lseg) {
2074183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
2075183d9e7bSJeff Layton PNFS_UPDATE_LAYOUT_FOUND_CACHED);
2076183d9e7bSJeff Layton goto out_unlock;
2077183d9e7bSJeff Layton }
2078183d9e7bSJeff Layton
2079183d9e7bSJeff Layton /*
2080183d9e7bSJeff Layton * Choose a stateid for the LAYOUTGET. If we don't have a layout
2081183d9e7bSJeff Layton * stateid, or it has been invalidated, then we must use the open
2082183d9e7bSJeff Layton * stateid.
2083183d9e7bSJeff Layton */
208467a3b721STrond Myklebust if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
2085d9aba2b4STrond Myklebust int status;
2086183d9e7bSJeff Layton
2087183d9e7bSJeff Layton /*
2088183d9e7bSJeff Layton * The first layoutget for the file. Need to serialize per
20899bf87482SPeng Tao * RFC 5661 Errata 3208.
20909bf87482SPeng Tao */
20919bf87482SPeng Tao if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET,
20929bf87482SPeng Tao &lo->plh_flags)) {
20939bf87482SPeng Tao spin_unlock(&ino->i_lock);
2094d03360aaSTrond Myklebust lseg = ERR_PTR(wait_on_bit(&lo->plh_flags,
2095d03360aaSTrond Myklebust NFS_LAYOUT_FIRST_LAYOUTGET,
2096d03360aaSTrond Myklebust TASK_KILLABLE));
2097d03360aaSTrond Myklebust if (IS_ERR(lseg))
2098d03360aaSTrond Myklebust goto out_put_layout_hdr;
20999bf87482SPeng Tao pnfs_put_layout_hdr(lo);
2100183d9e7bSJeff Layton dprintk("%s retrying\n", __func__);
21019bf87482SPeng Tao goto lookup_again;
21029bf87482SPeng Tao }
2103183d9e7bSJeff Layton
2104fbf4bcc9STrond Myklebust spin_unlock(&ino->i_lock);
2105183d9e7bSJeff Layton first = true;
2106d9aba2b4STrond Myklebust status = nfs4_select_rw_stateid(ctx->state,
210770d2f7b1STrond Myklebust iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
2108d9aba2b4STrond Myklebust NULL, &stateid, NULL);
2109d9aba2b4STrond Myklebust if (status != 0) {
2110731c74ddSTrond Myklebust lseg = ERR_PTR(status);
211170d2f7b1STrond Myklebust trace_pnfs_update_layout(ino, pos, count,
211270d2f7b1STrond Myklebust iomode, lo, lseg,
211370d2f7b1STrond Myklebust PNFS_UPDATE_LAYOUT_INVALID_OPEN);
2114d9aba2b4STrond Myklebust nfs4_schedule_stateid_recovery(server, ctx->state);
2115d9aba2b4STrond Myklebust pnfs_clear_first_layoutget(lo);
2116d9aba2b4STrond Myklebust pnfs_put_layout_hdr(lo);
2117d9aba2b4STrond Myklebust goto lookup_again;
211870d2f7b1STrond Myklebust }
2119fbf4bcc9STrond Myklebust spin_lock(&ino->i_lock);
21209bf87482SPeng Tao } else {
2121183d9e7bSJeff Layton nfs4_stateid_copy(&stateid, &lo->plh_stateid);
21229a4bf31dSJeff Layton }
2123568e8c49SAndy Adamson
21249a4bf31dSJeff Layton if (pnfs_layoutgets_blocked(lo)) {
2125183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
21269a4bf31dSJeff Layton PNFS_UPDATE_LAYOUT_BLOCKED);
2127cf7d63f1SFred Isaman goto out_unlock;
21289a4bf31dSJeff Layton }
2129411ae722STrond Myklebust nfs_layoutget_begin(lo);
2130f49f9baaSFred Isaman spin_unlock(&ino->i_lock);
213130005121SWeston Andros Adamson
213278746a38SFred Isaman _add_to_server_list(lo, server);
2133e5e94017SBenny Halevy
213409cbfeafSKirill A. Shutemov pg_offset = arg.offset & ~PAGE_MASK;
2135707ed5fdSBenny Halevy if (pg_offset) {
2136707ed5fdSBenny Halevy arg.offset -= pg_offset;
2137707ed5fdSBenny Halevy arg.length += pg_offset;
2138707ed5fdSBenny Halevy }
21397c24d948SAndy Adamson if (arg.length != NFS4_MAX_UINT64)
214009cbfeafSKirill A. Shutemov arg.length = PAGE_ALIGN(arg.length);
2141707ed5fdSBenny Halevy
21425e36e2a9SFred Isaman lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
2143587f03deSFred Isaman if (!lgp) {
21443764a17eSTrond Myklebust lseg = ERR_PTR(-ENOMEM);
2145587f03deSFred Isaman trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
2146587f03deSFred Isaman PNFS_UPDATE_LAYOUT_NOMEM);
2147411ae722STrond Myklebust nfs_layoutget_end(lo);
2148587f03deSFred Isaman goto out_put_layout_hdr;
2149587f03deSFred Isaman }
2150587f03deSFred Isaman
2151b4e89bcbSTrond Myklebust lgp->lo = lo;
2152b4e89bcbSTrond Myklebust pnfs_get_layout_hdr(lo);
2153b4e89bcbSTrond Myklebust
2154dacb452dSFred Isaman lseg = nfs4_proc_layoutget(lgp, &timeout);
2155183d9e7bSJeff Layton trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
2156183d9e7bSJeff Layton PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
2157411ae722STrond Myklebust nfs_layoutget_end(lo);
215883026d80SJeff Layton if (IS_ERR(lseg)) {
2159183d9e7bSJeff Layton switch(PTR_ERR(lseg)) {
2160e85d7ee4STrond Myklebust case -EBUSY:
2161183d9e7bSJeff Layton if (time_after(jiffies, giveup))
2162183d9e7bSJeff Layton lseg = NULL;
216366b53f32STrond Myklebust break;
216466b53f32STrond Myklebust case -ERECALLCONFLICT:
2165183d9e7bSJeff Layton case -EAGAIN:
216656b38a1fSTrond Myklebust break;
2167fe44fb23STrond Myklebust case -ENODATA:
2168fe44fb23STrond Myklebust /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
2169fe44fb23STrond Myklebust pnfs_layout_set_fail_bit(
2170fe44fb23STrond Myklebust lo, pnfs_iomode_to_fail_bit(iomode));
2171fe44fb23STrond Myklebust lseg = NULL;
2172fe44fb23STrond Myklebust goto out_put_layout_hdr;
2173183d9e7bSJeff Layton default:
217483026d80SJeff Layton if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
217583026d80SJeff Layton pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
217683026d80SJeff Layton lseg = NULL;
217783026d80SJeff Layton }
217856b38a1fSTrond Myklebust goto out_put_layout_hdr;
217956b38a1fSTrond Myklebust }
218056b38a1fSTrond Myklebust if (lseg) {
218156b38a1fSTrond Myklebust if (first)
218256b38a1fSTrond Myklebust pnfs_clear_first_layoutget(lo);
218356b38a1fSTrond Myklebust trace_pnfs_update_layout(ino, pos, count,
218456b38a1fSTrond Myklebust iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
218556b38a1fSTrond Myklebust pnfs_put_layout_hdr(lo);
218656b38a1fSTrond Myklebust goto lookup_again;
2187183d9e7bSJeff Layton }
218883026d80SJeff Layton } else {
218983026d80SJeff Layton pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
219083026d80SJeff Layton }
219183026d80SJeff Layton
2192830ffb56STrond Myklebust out_put_layout_hdr:
2193d67ae825STom Haynes if (first)
2194d67ae825STom Haynes pnfs_clear_first_layoutget(lo);
2195d5b9216fSTrond Myklebust trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
2196d5b9216fSTrond Myklebust PNFS_UPDATE_LAYOUT_EXIT);
219770c3bd2bSTrond Myklebust pnfs_put_layout_hdr(lo);
2198e5e94017SBenny Halevy out:
2199f86bbcf8STrond Myklebust dprintk("%s: inode %s/%llu pNFS layout segment %s for "
2200f86bbcf8STrond Myklebust "(%s, offset: %llu, length: %llu)\n",
2201f86bbcf8STrond Myklebust __func__, ino->i_sb->s_id,
2202f86bbcf8STrond Myklebust (unsigned long long)NFS_FILEID(ino),
2203d600ad1fSPeng Tao IS_ERR_OR_NULL(lseg) ? "not found" : "found",
2204f86bbcf8STrond Myklebust iomode==IOMODE_RW ? "read/write" : "read-only",
2205f86bbcf8STrond Myklebust (unsigned long long)pos,
2206f86bbcf8STrond Myklebust (unsigned long long)count);
2207e5e94017SBenny Halevy return lseg;
2208e5e94017SBenny Halevy out_unlock:
2209e5e94017SBenny Halevy spin_unlock(&ino->i_lock);
2210830ffb56STrond Myklebust goto out_put_layout_hdr;
2211e5e94017SBenny Halevy }
22127c24d948SAndy Adamson EXPORT_SYMBOL_GPL(pnfs_update_layout);
2213b1f69b75SAndy Adamson
2214540d9864STrond Myklebust static bool
pnfs_sanity_check_layout_range(struct pnfs_layout_range * range)2215540d9864STrond Myklebust pnfs_sanity_check_layout_range(struct pnfs_layout_range *range)
2216540d9864STrond Myklebust {
2217540d9864STrond Myklebust switch (range->iomode) {
2218540d9864STrond Myklebust case IOMODE_READ:
2219540d9864STrond Myklebust case IOMODE_RW:
2220540d9864STrond Myklebust break;
2221540d9864STrond Myklebust default:
2222540d9864STrond Myklebust return false;
2223540d9864STrond Myklebust }
2224540d9864STrond Myklebust if (range->offset == NFS4_MAX_UINT64)
2225540d9864STrond Myklebust return false;
2226540d9864STrond Myklebust if (range->length == 0)
2227540d9864STrond Myklebust return false;
2228540d9864STrond Myklebust if (range->length != NFS4_MAX_UINT64 &&
2229540d9864STrond Myklebust range->length > NFS4_MAX_UINT64 - range->offset)
2230540d9864STrond Myklebust return false;
2231540d9864STrond Myklebust return true;
2232540d9864STrond Myklebust }
2233540d9864STrond Myklebust
223478746a38SFred Isaman static struct pnfs_layout_hdr *
_pnfs_grab_empty_layout(struct inode * ino,struct nfs_open_context * ctx)223578746a38SFred Isaman _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
223678746a38SFred Isaman {
223778746a38SFred Isaman struct pnfs_layout_hdr *lo;
223878746a38SFred Isaman
223978746a38SFred Isaman spin_lock(&ino->i_lock);
224063d8a41bSTrond Myklebust lo = pnfs_find_alloc_layout(ino, ctx, nfs_io_gfp_mask());
224178746a38SFred Isaman if (!lo)
224278746a38SFred Isaman goto out_unlock;
224378746a38SFred Isaman if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
224478746a38SFred Isaman goto out_unlock;
224578746a38SFred Isaman if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
224678746a38SFred Isaman goto out_unlock;
224778746a38SFred Isaman if (pnfs_layoutgets_blocked(lo))
224878746a38SFred Isaman goto out_unlock;
224978746a38SFred Isaman if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
225078746a38SFred Isaman goto out_unlock;
2251411ae722STrond Myklebust nfs_layoutget_begin(lo);
225278746a38SFred Isaman spin_unlock(&ino->i_lock);
225378746a38SFred Isaman _add_to_server_list(lo, NFS_SERVER(ino));
225478746a38SFred Isaman return lo;
225578746a38SFred Isaman
225678746a38SFred Isaman out_unlock:
225778746a38SFred Isaman spin_unlock(&ino->i_lock);
225878746a38SFred Isaman pnfs_put_layout_hdr(lo);
225978746a38SFred Isaman return NULL;
226078746a38SFred Isaman }
226178746a38SFred Isaman
_lgopen_prepare_attached(struct nfs4_opendata * data,struct nfs_open_context * ctx)22622409a976SFred Isaman static void _lgopen_prepare_attached(struct nfs4_opendata *data,
22632409a976SFred Isaman struct nfs_open_context *ctx)
22642409a976SFred Isaman {
226578746a38SFred Isaman struct inode *ino = data->dentry->d_inode;
226678746a38SFred Isaman struct pnfs_layout_range rng = {
226778746a38SFred Isaman .iomode = (data->o_arg.fmode & FMODE_WRITE) ?
226878746a38SFred Isaman IOMODE_RW: IOMODE_READ,
226978746a38SFred Isaman .offset = 0,
227078746a38SFred Isaman .length = NFS4_MAX_UINT64,
227178746a38SFred Isaman };
227278746a38SFred Isaman struct nfs4_layoutget *lgp;
227378746a38SFred Isaman struct pnfs_layout_hdr *lo;
227478746a38SFred Isaman
227564294b08STrond Myklebust /* Heuristic: don't send layoutget if we have cached data */
227664294b08STrond Myklebust if (rng.iomode == IOMODE_READ &&
227764294b08STrond Myklebust (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0))
227864294b08STrond Myklebust return;
227964294b08STrond Myklebust
228078746a38SFred Isaman lo = _pnfs_grab_empty_layout(ino, ctx);
228178746a38SFred Isaman if (!lo)
228278746a38SFred Isaman return;
228363d8a41bSTrond Myklebust lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, &rng,
228463d8a41bSTrond Myklebust nfs_io_gfp_mask());
228578746a38SFred Isaman if (!lgp) {
228678746a38SFred Isaman pnfs_clear_first_layoutget(lo);
2287cb2856c5STrond Myklebust nfs_layoutget_end(lo);
228878746a38SFred Isaman pnfs_put_layout_hdr(lo);
228978746a38SFred Isaman return;
229078746a38SFred Isaman }
2291b4e89bcbSTrond Myklebust lgp->lo = lo;
229278746a38SFred Isaman data->lgp = lgp;
229378746a38SFred Isaman data->o_arg.lg_args = &lgp->args;
229478746a38SFred Isaman data->o_res.lg_res = &lgp->res;
22952409a976SFred Isaman }
22962409a976SFred Isaman
_lgopen_prepare_floating(struct nfs4_opendata * data,struct nfs_open_context * ctx)22972409a976SFred Isaman static void _lgopen_prepare_floating(struct nfs4_opendata *data,
22982409a976SFred Isaman struct nfs_open_context *ctx)
22992409a976SFred Isaman {
2300b4e89bcbSTrond Myklebust struct inode *ino = data->dentry->d_inode;
23012409a976SFred Isaman struct pnfs_layout_range rng = {
23022409a976SFred Isaman .iomode = (data->o_arg.fmode & FMODE_WRITE) ?
23032409a976SFred Isaman IOMODE_RW: IOMODE_READ,
23042409a976SFred Isaman .offset = 0,
23052409a976SFred Isaman .length = NFS4_MAX_UINT64,
23062409a976SFred Isaman };
23072409a976SFred Isaman struct nfs4_layoutget *lgp;
23082409a976SFred Isaman
230963d8a41bSTrond Myklebust lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, &rng,
231063d8a41bSTrond Myklebust nfs_io_gfp_mask());
23112409a976SFred Isaman if (!lgp)
23122409a976SFred Isaman return;
23132409a976SFred Isaman data->lgp = lgp;
23142409a976SFred Isaman data->o_arg.lg_args = &lgp->args;
23152409a976SFred Isaman data->o_res.lg_res = &lgp->res;
23162409a976SFred Isaman }
23172409a976SFred Isaman
pnfs_lgopen_prepare(struct nfs4_opendata * data,struct nfs_open_context * ctx)23182409a976SFred Isaman void pnfs_lgopen_prepare(struct nfs4_opendata *data,
23192409a976SFred Isaman struct nfs_open_context *ctx)
23202409a976SFred Isaman {
23212409a976SFred Isaman struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
23222409a976SFred Isaman
23232409a976SFred Isaman if (!(pnfs_enabled_sb(server) &&
23242409a976SFred Isaman server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN))
23252409a976SFred Isaman return;
23262409a976SFred Isaman /* Could check on max_ops, but currently hardcoded high enough */
23276e01260cSFred Isaman if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
23286e01260cSFred Isaman return;
2329b4e89bcbSTrond Myklebust if (data->lgp)
2330b4e89bcbSTrond Myklebust return;
23312409a976SFred Isaman if (data->state)
23322409a976SFred Isaman _lgopen_prepare_attached(data, ctx);
23332409a976SFred Isaman else
23342409a976SFred Isaman _lgopen_prepare_floating(data, ctx);
23352409a976SFred Isaman }
23362409a976SFred Isaman
pnfs_parse_lgopen(struct inode * ino,struct nfs4_layoutget * lgp,struct nfs_open_context * ctx)23372409a976SFred Isaman void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
23382409a976SFred Isaman struct nfs_open_context *ctx)
23392409a976SFred Isaman {
23402409a976SFred Isaman struct pnfs_layout_hdr *lo;
23412409a976SFred Isaman struct pnfs_layout_segment *lseg;
2342c49b5209SFred Isaman struct nfs_server *srv = NFS_SERVER(ino);
23432409a976SFred Isaman u32 iomode;
23442409a976SFred Isaman
23456e01260cSFred Isaman if (!lgp)
23462409a976SFred Isaman return;
23476e01260cSFred Isaman dprintk("%s: entered with status %i\n", __func__, lgp->res.status);
23486e01260cSFred Isaman if (lgp->res.status) {
23496e01260cSFred Isaman switch (lgp->res.status) {
23506e01260cSFred Isaman default:
23518dc96566STrond Myklebust break;
23528dc96566STrond Myklebust /*
23538dc96566STrond Myklebust * Halt lgopen attempts if the server doesn't recognise
23548dc96566STrond Myklebust * the "current stateid" value, the layout type, or the
23558dc96566STrond Myklebust * layoutget operation as being valid.
23568dc96566STrond Myklebust * Also if it complains about too many ops in the compound
23578dc96566STrond Myklebust * or of the request/reply being too big.
23586e01260cSFred Isaman */
23598dc96566STrond Myklebust case -NFS4ERR_BAD_STATEID:
23608dc96566STrond Myklebust case -NFS4ERR_NOTSUPP:
23618dc96566STrond Myklebust case -NFS4ERR_REP_TOO_BIG:
23628dc96566STrond Myklebust case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
23638dc96566STrond Myklebust case -NFS4ERR_REQ_TOO_BIG:
23648dc96566STrond Myklebust case -NFS4ERR_TOO_MANY_OPS:
23658dc96566STrond Myklebust case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
2366c49b5209SFred Isaman srv->caps &= ~NFS_CAP_LGOPEN;
23676e01260cSFred Isaman }
23686e01260cSFred Isaman return;
23696e01260cSFred Isaman }
2370b4e89bcbSTrond Myklebust if (!lgp->lo) {
237178746a38SFred Isaman lo = _pnfs_grab_empty_layout(ino, ctx);
237278746a38SFred Isaman if (!lo)
237378746a38SFred Isaman return;
2374b4e89bcbSTrond Myklebust lgp->lo = lo;
23752409a976SFred Isaman } else
2376b4e89bcbSTrond Myklebust lo = lgp->lo;
23772409a976SFred Isaman
23782409a976SFred Isaman lseg = pnfs_layout_process(lgp);
237932f1c28fSTrond Myklebust if (!IS_ERR(lseg)) {
23802409a976SFred Isaman iomode = lgp->args.range.iomode;
23812409a976SFred Isaman pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
23822409a976SFred Isaman pnfs_put_lseg(lseg);
23832409a976SFred Isaman }
238430ae2412SFred Isaman }
238530ae2412SFred Isaman
nfs4_lgopen_release(struct nfs4_layoutget * lgp)238630ae2412SFred Isaman void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
238730ae2412SFred Isaman {
238830ae2412SFred Isaman if (lgp != NULL) {
2389b4e89bcbSTrond Myklebust if (lgp->lo) {
2390b4e89bcbSTrond Myklebust pnfs_clear_first_layoutget(lgp->lo);
2391b4e89bcbSTrond Myklebust nfs_layoutget_end(lgp->lo);
239230ae2412SFred Isaman }
239330ae2412SFred Isaman pnfs_layoutget_free(lgp);
239430ae2412SFred Isaman }
23952409a976SFred Isaman }
23962409a976SFred Isaman
2397a0b0a6e3STrond Myklebust struct pnfs_layout_segment *
pnfs_layout_process(struct nfs4_layoutget * lgp)2398b1f69b75SAndy Adamson pnfs_layout_process(struct nfs4_layoutget *lgp)
2399b1f69b75SAndy Adamson {
2400b4e89bcbSTrond Myklebust struct pnfs_layout_hdr *lo = lgp->lo;
2401b1f69b75SAndy Adamson struct nfs4_layoutget_res *res = &lgp->res;
2402b1f69b75SAndy Adamson struct pnfs_layout_segment *lseg;
2403b7edfaa1SFred Isaman struct inode *ino = lo->plh_inode;
240478096ccaSTrond Myklebust LIST_HEAD(free_me);
2405540d9864STrond Myklebust
2406540d9864STrond Myklebust if (!pnfs_sanity_check_layout_range(&res->range))
24071b3c6d07SJeff Layton return ERR_PTR(-EINVAL);
2408b1f69b75SAndy Adamson
2409b1f69b75SAndy Adamson /* Inject layout blob into I/O device driver */
2410a75b9df9STrond Myklebust lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
24111b3c6d07SJeff Layton if (IS_ERR_OR_NULL(lseg)) {
2412b1f69b75SAndy Adamson if (!lseg)
24131b3c6d07SJeff Layton lseg = ERR_PTR(-ENOMEM);
24141b3c6d07SJeff Layton
24151b3c6d07SJeff Layton dprintk("%s: Could not allocate layout: error %ld\n",
24161b3c6d07SJeff Layton __func__, PTR_ERR(lseg));
24171b3c6d07SJeff Layton return lseg;
2418b1f69b75SAndy Adamson }
2419b1f69b75SAndy Adamson
2420119cef97STrond Myklebust pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);
24211013df61SChristoph Hellwig
2422b1f69b75SAndy Adamson spin_lock(&ino->i_lock);
2423e1c06f80STrond Myklebust if (pnfs_layoutgets_blocked(lo)) {
242443f1b3daSFred Isaman dprintk("%s forget reply due to state\n", __func__);
24251b3c6d07SJeff Layton goto out_forget;
242643f1b3daSFred Isaman }
2427038d6493STrond Myklebust
2428880265c7STrond Myklebust if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
2429880265c7STrond Myklebust !pnfs_is_first_layoutget(lo))
24300b77f97aSTrond Myklebust goto out_forget;
24310b77f97aSTrond Myklebust
2432d29b468dSTrond Myklebust if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
2433362f7474SChristoph Hellwig /* existing state ID, make sure the sequence number matches. */
2434362f7474SChristoph Hellwig if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
24350b77f97aSTrond Myklebust if (!pnfs_layout_is_valid(lo))
2436d29b468dSTrond Myklebust lo->plh_barrier = 0;
2437362f7474SChristoph Hellwig dprintk("%s forget reply due to sequence\n", __func__);
24381b3c6d07SJeff Layton goto out_forget;
2439362f7474SChristoph Hellwig }
244059b56394STrond Myklebust pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false);
2441d29b468dSTrond Myklebust } else if (pnfs_layout_is_valid(lo)) {
2442362f7474SChristoph Hellwig /*
2443362f7474SChristoph Hellwig * We got an entirely new state ID. Mark all segments for the
24449888d837STrond Myklebust * inode invalid, and retry the layoutget
2445362f7474SChristoph Hellwig */
244608bd8dbeSTrond Myklebust struct pnfs_layout_range range = {
244708bd8dbeSTrond Myklebust .iomode = IOMODE_ANY,
244808bd8dbeSTrond Myklebust .length = NFS4_MAX_UINT64,
244908bd8dbeSTrond Myklebust };
2450fb700ef0STrond Myklebust pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
24519888d837STrond Myklebust goto out_forget;
2452d29b468dSTrond Myklebust } else {
2453d29b468dSTrond Myklebust /* We have a completely new layout */
2454d29b468dSTrond Myklebust pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
2455362f7474SChristoph Hellwig }
2456038d6493STrond Myklebust
24579369a431STrond Myklebust pnfs_get_lseg(lseg);
245803772d2fSTrond Myklebust pnfs_layout_insert_lseg(lo, lseg, &free_me);
24598e0acf90STrond Myklebust
2460b1f69b75SAndy Adamson
24613976143bSPeng Tao if (res->return_on_close)
2462f7e8917aSFred Isaman set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
2463f7e8917aSFred Isaman
2464b1f69b75SAndy Adamson spin_unlock(&ino->i_lock);
246578096ccaSTrond Myklebust pnfs_free_lseg_list(&free_me);
2466a0b0a6e3STrond Myklebust return lseg;
246743f1b3daSFred Isaman
24681b3c6d07SJeff Layton out_forget:
246943f1b3daSFred Isaman spin_unlock(&ino->i_lock);
247043f1b3daSFred Isaman lseg->pls_layout = lo;
247143f1b3daSFred Isaman NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
24721b3c6d07SJeff Layton return ERR_PTR(-EAGAIN);
2473b1f69b75SAndy Adamson }
2474b1f69b75SAndy Adamson
24752f215968STrond Myklebust /**
24762f215968STrond Myklebust * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
24772f215968STrond Myklebust * @lo: pointer to layout header
24782f215968STrond Myklebust * @tmp_list: list header to be used with pnfs_free_lseg_list()
24792f215968STrond Myklebust * @return_range: describe layout segment ranges to be returned
2480e0b7d420STrond Myklebust * @seq: stateid seqid to match
24812f215968STrond Myklebust *
24822f215968STrond Myklebust * This function is mainly intended for use by layoutrecall. It attempts
24832f215968STrond Myklebust * to free the layout segment immediately, or else to mark it for return
24842f215968STrond Myklebust * as soon as its reference count drops to zero.
2485e0b7d420STrond Myklebust *
2486e0b7d420STrond Myklebust * Returns
2487e0b7d420STrond Myklebust * - 0: a layoutreturn needs to be scheduled.
2488e0b7d420STrond Myklebust * - EBUSY: there are layout segment that are still in use.
2489e0b7d420STrond Myklebust * - ENOENT: there are no layout segments that need to be returned.
24902f215968STrond Myklebust */
249110335556STrond Myklebust int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr * lo,struct list_head * tmp_list,const struct pnfs_layout_range * return_range,u32 seq)2492016256dfSPeng Tao pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
2493016256dfSPeng Tao struct list_head *tmp_list,
24946d597e17SJeff Layton const struct pnfs_layout_range *return_range,
24956d597e17SJeff Layton u32 seq)
2496016256dfSPeng Tao {
2497016256dfSPeng Tao struct pnfs_layout_segment *lseg, *next;
2498b739a5bdSTrond Myklebust struct nfs_server *server = NFS_SERVER(lo->plh_inode);
249910335556STrond Myklebust int remaining = 0;
2500016256dfSPeng Tao
2501016256dfSPeng Tao dprintk("%s:Begin lo %p\n", __func__, lo);
2502016256dfSPeng Tao
2503fc7ff367STrond Myklebust assert_spin_locked(&lo->plh_inode->i_lock);
2504016256dfSPeng Tao
250539fd0186STrond Myklebust if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
250639fd0186STrond Myklebust tmp_list = &lo->plh_return_segs;
250739fd0186STrond Myklebust
2508016256dfSPeng Tao list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
2509e036f464STrond Myklebust if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
2510016256dfSPeng Tao dprintk("%s: marking lseg %p iomode %d "
2511016256dfSPeng Tao "offset %llu length %llu\n", __func__,
2512016256dfSPeng Tao lseg, lseg->pls_range.iomode,
2513016256dfSPeng Tao lseg->pls_range.offset,
2514016256dfSPeng Tao lseg->pls_range.length);
251539fd0186STrond Myklebust if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
251639fd0186STrond Myklebust tmp_list = &lo->plh_return_segs;
2517ff041727STrond Myklebust if (mark_lseg_invalid(lseg, tmp_list))
25182f215968STrond Myklebust continue;
25192f215968STrond Myklebust remaining++;
2520016256dfSPeng Tao set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
2521b739a5bdSTrond Myklebust pnfs_lseg_cancel_io(server, lseg);
2522016256dfSPeng Tao }
25236d597e17SJeff Layton
2524e0b7d420STrond Myklebust if (remaining) {
25256d597e17SJeff Layton pnfs_set_plh_return_info(lo, return_range->iomode, seq);
2526e0b7d420STrond Myklebust return -EBUSY;
2527e0b7d420STrond Myklebust }
25286d597e17SJeff Layton
2529e0b7d420STrond Myklebust if (!list_empty(&lo->plh_return_segs)) {
2530e0b7d420STrond Myklebust pnfs_set_plh_return_info(lo, return_range->iomode, seq);
2531e0b7d420STrond Myklebust return 0;
2532e0b7d420STrond Myklebust }
2533e0b7d420STrond Myklebust
2534e0b7d420STrond Myklebust return -ENOENT;
2535016256dfSPeng Tao }
2536016256dfSPeng Tao
2537b5fdf841STrond Myklebust static void
pnfs_mark_layout_for_return(struct inode * inode,const struct pnfs_layout_range * range)2538b5fdf841STrond Myklebust pnfs_mark_layout_for_return(struct inode *inode,
2539b5fdf841STrond Myklebust const struct pnfs_layout_range *range)
2540016256dfSPeng Tao {
2541b5fdf841STrond Myklebust struct pnfs_layout_hdr *lo;
254210335556STrond Myklebust bool return_now = false;
2543016256dfSPeng Tao
2544016256dfSPeng Tao spin_lock(&inode->i_lock);
2545b5fdf841STrond Myklebust lo = NFS_I(inode)->layout;
2546bdebfccdSTrond Myklebust if (!pnfs_layout_is_valid(lo)) {
2547bdebfccdSTrond Myklebust spin_unlock(&inode->i_lock);
2548bdebfccdSTrond Myklebust return;
2549bdebfccdSTrond Myklebust }
2550b5fdf841STrond Myklebust pnfs_set_plh_return_info(lo, range->iomode, 0);
2551016256dfSPeng Tao /*
2552016256dfSPeng Tao * mark all matching lsegs so that we are sure to have no live
2553016256dfSPeng Tao * segments at hand when sending layoutreturn. See pnfs_put_lseg()
2554016256dfSPeng Tao * for how it works.
2555016256dfSPeng Tao */
2556b5fdf841STrond Myklebust if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
255744ea8dfcSTrond Myklebust const struct cred *cred;
255810335556STrond Myklebust nfs4_stateid stateid;
2559e5fd1904STrond Myklebust enum pnfs_iomode iomode;
256010335556STrond Myklebust
256144ea8dfcSTrond Myklebust return_now = pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode);
2562016256dfSPeng Tao spin_unlock(&inode->i_lock);
256310335556STrond Myklebust if (return_now)
256444ea8dfcSTrond Myklebust pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
256510335556STrond Myklebust } else {
256610335556STrond Myklebust spin_unlock(&inode->i_lock);
2567b20135d0STrond Myklebust nfs_commit_inode(inode, 0);
2568016256dfSPeng Tao }
2569016256dfSPeng Tao }
2570b5fdf841STrond Myklebust
pnfs_error_mark_layout_for_return(struct inode * inode,struct pnfs_layout_segment * lseg)2571b5fdf841STrond Myklebust void pnfs_error_mark_layout_for_return(struct inode *inode,
2572b5fdf841STrond Myklebust struct pnfs_layout_segment *lseg)
2573b5fdf841STrond Myklebust {
2574b5fdf841STrond Myklebust struct pnfs_layout_range range = {
2575b5fdf841STrond Myklebust .iomode = lseg->pls_range.iomode,
2576b5fdf841STrond Myklebust .offset = 0,
2577b5fdf841STrond Myklebust .length = NFS4_MAX_UINT64,
2578b5fdf841STrond Myklebust };
2579b5fdf841STrond Myklebust
2580b5fdf841STrond Myklebust pnfs_mark_layout_for_return(inode, &range);
2581b5fdf841STrond Myklebust }
2582016256dfSPeng Tao EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
2583016256dfSPeng Tao
2584b5fdf841STrond Myklebust static bool
pnfs_layout_can_be_returned(struct pnfs_layout_hdr * lo)2585b5fdf841STrond Myklebust pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
2586b5fdf841STrond Myklebust {
2587b5fdf841STrond Myklebust return pnfs_layout_is_valid(lo) &&
2588b5fdf841STrond Myklebust !test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
2589b5fdf841STrond Myklebust !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
2590b5fdf841STrond Myklebust }
2591b5fdf841STrond Myklebust
2592b5fdf841STrond Myklebust static struct pnfs_layout_segment *
pnfs_find_first_lseg(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range,enum pnfs_iomode iomode)2593b5fdf841STrond Myklebust pnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
2594b5fdf841STrond Myklebust const struct pnfs_layout_range *range,
2595b5fdf841STrond Myklebust enum pnfs_iomode iomode)
2596b5fdf841STrond Myklebust {
2597b5fdf841STrond Myklebust struct pnfs_layout_segment *lseg;
2598b5fdf841STrond Myklebust
2599b5fdf841STrond Myklebust list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
2600b5fdf841STrond Myklebust if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
2601b5fdf841STrond Myklebust continue;
2602b5fdf841STrond Myklebust if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
2603b5fdf841STrond Myklebust continue;
2604b5fdf841STrond Myklebust if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
2605b5fdf841STrond Myklebust continue;
2606b5fdf841STrond Myklebust if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
2607b5fdf841STrond Myklebust return lseg;
2608b5fdf841STrond Myklebust }
2609b5fdf841STrond Myklebust return NULL;
2610b5fdf841STrond Myklebust }
2611b5fdf841STrond Myklebust
2612b5fdf841STrond Myklebust /* Find open file states whose mode matches that of the range */
2613b5fdf841STrond Myklebust static bool
pnfs_should_return_unused_layout(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range)2614b5fdf841STrond Myklebust pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
2615b5fdf841STrond Myklebust const struct pnfs_layout_range *range)
2616b5fdf841STrond Myklebust {
2617b5fdf841STrond Myklebust struct list_head *head;
2618b5fdf841STrond Myklebust struct nfs_open_context *ctx;
2619b5fdf841STrond Myklebust fmode_t mode = 0;
2620b5fdf841STrond Myklebust
2621b5fdf841STrond Myklebust if (!pnfs_layout_can_be_returned(lo) ||
2622b5fdf841STrond Myklebust !pnfs_find_first_lseg(lo, range, range->iomode))
2623b5fdf841STrond Myklebust return false;
2624b5fdf841STrond Myklebust
2625b5fdf841STrond Myklebust head = &NFS_I(lo->plh_inode)->open_files;
2626b5fdf841STrond Myklebust list_for_each_entry_rcu(ctx, head, list) {
2627b5fdf841STrond Myklebust if (ctx->state)
2628b5fdf841STrond Myklebust mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
2629b5fdf841STrond Myklebust }
2630b5fdf841STrond Myklebust
2631b5fdf841STrond Myklebust switch (range->iomode) {
2632b5fdf841STrond Myklebust default:
2633b5fdf841STrond Myklebust break;
2634b5fdf841STrond Myklebust case IOMODE_READ:
2635b5fdf841STrond Myklebust mode &= ~FMODE_WRITE;
2636b5fdf841STrond Myklebust break;
2637b5fdf841STrond Myklebust case IOMODE_RW:
2638b5fdf841STrond Myklebust if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
2639b5fdf841STrond Myklebust mode &= ~FMODE_READ;
2640b5fdf841STrond Myklebust }
2641b5fdf841STrond Myklebust return mode == 0;
2642b5fdf841STrond Myklebust }
2643b5fdf841STrond Myklebust
pnfs_layout_return_unused_byserver(struct nfs_server * server,void * data)2644f6395572STrond Myklebust static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
2645f6395572STrond Myklebust void *data)
2646b5fdf841STrond Myklebust {
2647b5fdf841STrond Myklebust const struct pnfs_layout_range *range = data;
2648f6395572STrond Myklebust const struct cred *cred;
2649b5fdf841STrond Myklebust struct pnfs_layout_hdr *lo;
2650b5fdf841STrond Myklebust struct inode *inode;
2651f6395572STrond Myklebust nfs4_stateid stateid;
2652f6395572STrond Myklebust enum pnfs_iomode iomode;
2653f6395572STrond Myklebust
2654b5fdf841STrond Myklebust restart:
2655b5fdf841STrond Myklebust rcu_read_lock();
2656b5fdf841STrond Myklebust list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
2657f6395572STrond Myklebust inode = lo->plh_inode;
2658f6395572STrond Myklebust if (!inode || !pnfs_layout_can_be_returned(lo) ||
2659b5fdf841STrond Myklebust test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
2660b5fdf841STrond Myklebust continue;
2661b5fdf841STrond Myklebust spin_lock(&inode->i_lock);
2662f6395572STrond Myklebust if (!lo->plh_inode ||
2663f6395572STrond Myklebust !pnfs_should_return_unused_layout(lo, range)) {
2664b5fdf841STrond Myklebust spin_unlock(&inode->i_lock);
2665b5fdf841STrond Myklebust continue;
2666b5fdf841STrond Myklebust }
2667f6395572STrond Myklebust pnfs_get_layout_hdr(lo);
2668f6395572STrond Myklebust pnfs_set_plh_return_info(lo, range->iomode, 0);
2669f6395572STrond Myklebust if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
2670f6395572STrond Myklebust range, 0) != 0 ||
2671f6395572STrond Myklebust !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
2672b5fdf841STrond Myklebust spin_unlock(&inode->i_lock);
2673b5fdf841STrond Myklebust rcu_read_unlock();
2674f6395572STrond Myklebust pnfs_put_layout_hdr(lo);
2675f6395572STrond Myklebust cond_resched();
2676f6395572STrond Myklebust goto restart;
2677f6395572STrond Myklebust }
2678f6395572STrond Myklebust spin_unlock(&inode->i_lock);
2679f6395572STrond Myklebust rcu_read_unlock();
2680f6395572STrond Myklebust pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
2681f6395572STrond Myklebust pnfs_put_layout_hdr(lo);
2682b5fdf841STrond Myklebust cond_resched();
2683b5fdf841STrond Myklebust goto restart;
2684b5fdf841STrond Myklebust }
2685b5fdf841STrond Myklebust rcu_read_unlock();
2686b5fdf841STrond Myklebust return 0;
2687b5fdf841STrond Myklebust }
2688b5fdf841STrond Myklebust
2689b5fdf841STrond Myklebust void
pnfs_layout_return_unused_byclid(struct nfs_client * clp,enum pnfs_iomode iomode)2690b5fdf841STrond Myklebust pnfs_layout_return_unused_byclid(struct nfs_client *clp,
2691b5fdf841STrond Myklebust enum pnfs_iomode iomode)
2692b5fdf841STrond Myklebust {
2693b5fdf841STrond Myklebust struct pnfs_layout_range range = {
2694b5fdf841STrond Myklebust .iomode = iomode,
2695b5fdf841STrond Myklebust .offset = 0,
2696b5fdf841STrond Myklebust .length = NFS4_MAX_UINT64,
2697b5fdf841STrond Myklebust };
2698b5fdf841STrond Myklebust
2699b5fdf841STrond Myklebust nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
2700b5fdf841STrond Myklebust &range);
2701b5fdf841STrond Myklebust }
2702b5fdf841STrond Myklebust
2703d8007d4dSTrond Myklebust void
pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor * pgio)2704b3230e80STrond Myklebust pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
2705b3230e80STrond Myklebust {
2706b3230e80STrond Myklebust if (pgio->pg_lseg == NULL ||
2707b3230e80STrond Myklebust test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
2708b3230e80STrond Myklebust return;
2709b3230e80STrond Myklebust pnfs_put_lseg(pgio->pg_lseg);
2710b3230e80STrond Myklebust pgio->pg_lseg = NULL;
2711b3230e80STrond Myklebust }
2712b3230e80STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
2713b3230e80STrond Myklebust
271408cb5b0fSBenjamin Coddington /*
271508cb5b0fSBenjamin Coddington * Check for any intersection between the request and the pgio->pg_lseg,
271608cb5b0fSBenjamin Coddington * and if none, put this pgio->pg_lseg away.
271708cb5b0fSBenjamin Coddington */
2718e1e54ab7STrond Myklebust void
pnfs_generic_pg_check_range(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)271908cb5b0fSBenjamin Coddington pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
272008cb5b0fSBenjamin Coddington {
272108cb5b0fSBenjamin Coddington if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
272208cb5b0fSBenjamin Coddington pnfs_put_lseg(pgio->pg_lseg);
272308cb5b0fSBenjamin Coddington pgio->pg_lseg = NULL;
272408cb5b0fSBenjamin Coddington }
272508cb5b0fSBenjamin Coddington }
2726e1e54ab7STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
272708cb5b0fSBenjamin Coddington
2728b3230e80STrond Myklebust void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)2729d8007d4dSTrond Myklebust pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
2730d8007d4dSTrond Myklebust {
2731d1d97395SYang Li u64 rd_size;
27321fd937bdSPeng Tao
2733b3230e80STrond Myklebust pnfs_generic_pg_check_layout(pgio);
273408cb5b0fSBenjamin Coddington pnfs_generic_pg_check_range(pgio, req);
2735cb5d04bcSPeng Tao if (pgio->pg_lseg == NULL) {
27361fd937bdSPeng Tao if (pgio->pg_dreq == NULL)
27371fd937bdSPeng Tao rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
27381fd937bdSPeng Tao else
273975aa038dSTrond Myklebust rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
274075aa038dSTrond Myklebust req_offset(req));
27411fd937bdSPeng Tao
274263d8a41bSTrond Myklebust pgio->pg_lseg =
274363d8a41bSTrond Myklebust pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
274463d8a41bSTrond Myklebust req_offset(req), rd_size,
274563d8a41bSTrond Myklebust IOMODE_READ, false,
274663d8a41bSTrond Myklebust nfs_io_gfp_mask());
2747d600ad1fSPeng Tao if (IS_ERR(pgio->pg_lseg)) {
2748d600ad1fSPeng Tao pgio->pg_error = PTR_ERR(pgio->pg_lseg);
2749d600ad1fSPeng Tao pgio->pg_lseg = NULL;
2750d600ad1fSPeng Tao return;
2751d600ad1fSPeng Tao }
2752cb5d04bcSPeng Tao }
2753e885de1aSTrond Myklebust /* If no lseg, fall back to read through mds */
2754e885de1aSTrond Myklebust if (pgio->pg_lseg == NULL)
27551f945357STrond Myklebust nfs_pageio_reset_read_mds(pgio);
2756e885de1aSTrond Myklebust
2757d8007d4dSTrond Myklebust }
2758d8007d4dSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
2759d8007d4dSTrond Myklebust
2760d8007d4dSTrond Myklebust void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor * pgio,struct nfs_page * req,u64 wb_size)27616296556fSPeng Tao pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
27626296556fSPeng Tao struct nfs_page *req, u64 wb_size)
2763d8007d4dSTrond Myklebust {
2764b3230e80STrond Myklebust pnfs_generic_pg_check_layout(pgio);
276508cb5b0fSBenjamin Coddington pnfs_generic_pg_check_range(pgio, req);
2766d600ad1fSPeng Tao if (pgio->pg_lseg == NULL) {
276763d8a41bSTrond Myklebust pgio->pg_lseg =
276863d8a41bSTrond Myklebust pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
276963d8a41bSTrond Myklebust req_offset(req), wb_size, IOMODE_RW,
277063d8a41bSTrond Myklebust false, nfs_io_gfp_mask());
2771d600ad1fSPeng Tao if (IS_ERR(pgio->pg_lseg)) {
2772d600ad1fSPeng Tao pgio->pg_error = PTR_ERR(pgio->pg_lseg);
2773d600ad1fSPeng Tao pgio->pg_lseg = NULL;
2774d600ad1fSPeng Tao return;
2775d600ad1fSPeng Tao }
2776d600ad1fSPeng Tao }
2777e885de1aSTrond Myklebust /* If no lseg, fall back to write through mds */
2778e885de1aSTrond Myklebust if (pgio->pg_lseg == NULL)
27791f945357STrond Myklebust nfs_pageio_reset_write_mds(pgio);
2780d8007d4dSTrond Myklebust }
2781d8007d4dSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
2782d8007d4dSTrond Myklebust
2783180bb5ecSWeston Andros Adamson void
pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor * desc)2784180bb5ecSWeston Andros Adamson pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc)
2785180bb5ecSWeston Andros Adamson {
2786180bb5ecSWeston Andros Adamson if (desc->pg_lseg) {
2787180bb5ecSWeston Andros Adamson pnfs_put_lseg(desc->pg_lseg);
2788180bb5ecSWeston Andros Adamson desc->pg_lseg = NULL;
2789180bb5ecSWeston Andros Adamson }
2790180bb5ecSWeston Andros Adamson }
2791180bb5ecSWeston Andros Adamson EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup);
2792180bb5ecSWeston Andros Adamson
2793b4fdac1aSWeston Andros Adamson /*
2794b4fdac1aSWeston Andros Adamson * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
2795b4fdac1aSWeston Andros Adamson * of bytes (maximum @req->wb_bytes) that can be coalesced.
2796b4fdac1aSWeston Andros Adamson */
2797b4fdac1aSWeston Andros Adamson size_t
pnfs_generic_pg_test(struct nfs_pageio_descriptor * pgio,struct nfs_page * prev,struct nfs_page * req)2798a7d42ddbSWeston Andros Adamson pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
2799a7d42ddbSWeston Andros Adamson struct nfs_page *prev, struct nfs_page *req)
2800bae724efSFred Isaman {
28010f9c429eSWeston Andros Adamson unsigned int size;
2802c5e20cb7SWeston Andros Adamson u64 seg_end, req_start, seg_left;
28030f9c429eSWeston Andros Adamson
28040f9c429eSWeston Andros Adamson size = nfs_generic_pg_test(pgio, prev, req);
28050f9c429eSWeston Andros Adamson if (!size)
28060f9c429eSWeston Andros Adamson return 0;
2807bae724efSFred Isaman
280819982ba8STrond Myklebust /*
2809c5e20cb7SWeston Andros Adamson * 'size' contains the number of bytes left in the current page (up
2810c5e20cb7SWeston Andros Adamson * to the original size asked for in @req->wb_bytes).
2811c5e20cb7SWeston Andros Adamson *
2812c5e20cb7SWeston Andros Adamson * Calculate how many bytes are left in the layout segment
2813c5e20cb7SWeston Andros Adamson * and if there are less bytes than 'size', return that instead.
281419982ba8STrond Myklebust *
281519982ba8STrond Myklebust * Please also note that 'end_offset' is actually the offset of the
281619982ba8STrond Myklebust * first byte that lies outside the pnfs_layout_range. FIXME?
281719982ba8STrond Myklebust *
281819982ba8STrond Myklebust */
281919b54848SWeston Andros Adamson if (pgio->pg_lseg) {
282017822b20STrond Myklebust seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
282119982ba8STrond Myklebust pgio->pg_lseg->pls_range.length);
2822c5e20cb7SWeston Andros Adamson req_start = req_offset(req);
282308cb5b0fSBenjamin Coddington
2824c5e20cb7SWeston Andros Adamson /* start of request is past the last byte of this segment */
282508cb5b0fSBenjamin Coddington if (req_start >= seg_end)
2826b4fdac1aSWeston Andros Adamson return 0;
2827c5e20cb7SWeston Andros Adamson
2828c5e20cb7SWeston Andros Adamson /* adjust 'size' iff there are fewer bytes left in the
2829c5e20cb7SWeston Andros Adamson * segment than what nfs_generic_pg_test returned */
2830c5e20cb7SWeston Andros Adamson seg_left = seg_end - req_start;
2831c5e20cb7SWeston Andros Adamson if (seg_left < size)
2832c5e20cb7SWeston Andros Adamson size = (unsigned int)seg_left;
283319b54848SWeston Andros Adamson }
28340f9c429eSWeston Andros Adamson
283519b54848SWeston Andros Adamson return size;
2836bae724efSFred Isaman }
283789a58e32SBenny Halevy EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
2838bae724efSFred Isaman
pnfs_write_done_resend_to_mds(struct nfs_pgio_header * hdr)283953113ad3SWeston Andros Adamson int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
2840e2fecb21STrond Myklebust {
2841e2fecb21STrond Myklebust struct nfs_pageio_descriptor pgio;
2842e2fecb21STrond Myklebust
2843e2fecb21STrond Myklebust /* Resend all requests through the MDS */
284453113ad3SWeston Andros Adamson nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
284553113ad3SWeston Andros Adamson hdr->completion_ops);
284653113ad3SWeston Andros Adamson return nfs_pageio_resend(&pgio, hdr);
2847e2fecb21STrond Myklebust }
2848e7dd79afSAndy Adamson EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
2849e2fecb21STrond Myklebust
pnfs_ld_handle_write_error(struct nfs_pgio_header * hdr)2850d45f60c6SWeston Andros Adamson static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
28511acbbb4eSFred Isaman {
2852cd841605SFred Isaman
2853cd841605SFred Isaman dprintk("pnfs write error = %d\n", hdr->pnfs_error);
2854cd841605SFred Isaman if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
28551acbbb4eSFred Isaman PNFS_LAYOUTRET_ON_ERROR) {
2856cd841605SFred Isaman pnfs_return_layout(hdr->inode);
28571acbbb4eSFred Isaman }
28586c75dc0dSFred Isaman if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
285953113ad3SWeston Andros Adamson hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
28601acbbb4eSFred Isaman }
28611acbbb4eSFred Isaman
2862d20581aaSBenny Halevy /*
2863d20581aaSBenny Halevy * Called by non rpc-based layout drivers
2864d20581aaSBenny Halevy */
pnfs_ld_write_done(struct nfs_pgio_header * hdr)2865d45f60c6SWeston Andros Adamson void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
286694ad1c80SFred Isaman {
2867f8417b48SKinglong Mee if (likely(!hdr->pnfs_error)) {
286867af7611STrond Myklebust pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
286967af7611STrond Myklebust hdr->mds_offset + hdr->res.count);
2870d45f60c6SWeston Andros Adamson hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
2871f8417b48SKinglong Mee }
2872f8417b48SKinglong Mee trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
2873f8417b48SKinglong Mee if (unlikely(hdr->pnfs_error))
2874d45f60c6SWeston Andros Adamson pnfs_ld_handle_write_error(hdr);
2875d45f60c6SWeston Andros Adamson hdr->mds_ops->rpc_release(hdr);
287644b83799SFred Isaman }
2877d20581aaSBenny Halevy EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
287844b83799SFred Isaman
2879dce81290STrond Myklebust static void
pnfs_write_through_mds(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr)2880dce81290STrond Myklebust pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
2881d45f60c6SWeston Andros Adamson struct nfs_pgio_header *hdr)
2882dce81290STrond Myklebust {
288348d635f1SPeng Tao struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2884a7d42ddbSWeston Andros Adamson
28856c75dc0dSFred Isaman if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2886a7d42ddbSWeston Andros Adamson list_splice_tail_init(&hdr->pages, &mirror->pg_list);
2887dce81290STrond Myklebust nfs_pageio_reset_write_mds(desc);
2888a7d42ddbSWeston Andros Adamson mirror->pg_recoalesce = 1;
28896c75dc0dSFred Isaman }
2890ba4a76f7SScott Mayhew hdr->completion_ops->completion(hdr);
2891dce81290STrond Myklebust }
2892dce81290STrond Myklebust
2893dce81290STrond Myklebust static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops,struct pnfs_layout_segment * lseg,int how)2894d45f60c6SWeston Andros Adamson pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
2895dce81290STrond Myklebust const struct rpc_call_ops *call_ops,
2896dce81290STrond Myklebust struct pnfs_layout_segment *lseg,
2897dce81290STrond Myklebust int how)
28980382b744SAndy Adamson {
2899cd841605SFred Isaman struct inode *inode = hdr->inode;
29000382b744SAndy Adamson enum pnfs_try_status trypnfs;
29010382b744SAndy Adamson struct nfs_server *nfss = NFS_SERVER(inode);
29020382b744SAndy Adamson
2903cd841605SFred Isaman hdr->mds_ops = call_ops;
29040382b744SAndy Adamson
29050382b744SAndy Adamson dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
2906d45f60c6SWeston Andros Adamson inode->i_ino, hdr->args.count, hdr->args.offset, how);
2907d45f60c6SWeston Andros Adamson trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
29086c75dc0dSFred Isaman if (trypnfs != PNFS_NOT_ATTEMPTED)
29090382b744SAndy Adamson nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
29100382b744SAndy Adamson dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
29110382b744SAndy Adamson return trypnfs;
29120382b744SAndy Adamson }
29130382b744SAndy Adamson
2914dce81290STrond Myklebust static void
pnfs_do_write(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr,int how)29157f714720SWeston Andros Adamson pnfs_do_write(struct nfs_pageio_descriptor *desc,
29167f714720SWeston Andros Adamson struct nfs_pgio_header *hdr, int how)
2917dce81290STrond Myklebust {
2918dce81290STrond Myklebust const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
2919dce81290STrond Myklebust struct pnfs_layout_segment *lseg = desc->pg_lseg;
2920dce81290STrond Myklebust enum pnfs_try_status trypnfs;
2921dce81290STrond Myklebust
2922d45f60c6SWeston Andros Adamson trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
292337f8aa16STrond Myklebust switch (trypnfs) {
292437f8aa16STrond Myklebust case PNFS_NOT_ATTEMPTED:
2925d45f60c6SWeston Andros Adamson pnfs_write_through_mds(desc, hdr);
2926ffb81717SGustavo A. R. Silva break;
292737f8aa16STrond Myklebust case PNFS_ATTEMPTED:
292837f8aa16STrond Myklebust break;
292937f8aa16STrond Myklebust case PNFS_TRY_AGAIN:
293037f8aa16STrond Myklebust /* cleanup hdr and prepare to redo pnfs */
293137f8aa16STrond Myklebust if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
293237f8aa16STrond Myklebust struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
293337f8aa16STrond Myklebust list_splice_init(&hdr->pages, &mirror->pg_list);
293437f8aa16STrond Myklebust mirror->pg_recoalesce = 1;
293537f8aa16STrond Myklebust }
293637f8aa16STrond Myklebust hdr->mds_ops->rpc_release(hdr);
293737f8aa16STrond Myklebust }
2938dce81290STrond Myklebust }
2939dce81290STrond Myklebust
pnfs_writehdr_free(struct nfs_pgio_header * hdr)29406c75dc0dSFred Isaman static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
29416c75dc0dSFred Isaman {
29429369a431STrond Myklebust pnfs_put_lseg(hdr->lseg);
29431e7f3a48SWeston Andros Adamson nfs_pgio_header_free(hdr);
29446c75dc0dSFred Isaman }
29456c75dc0dSFred Isaman
2946dce81290STrond Myklebust int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor * desc)2947dce81290STrond Myklebust pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
2948dce81290STrond Myklebust {
29496c75dc0dSFred Isaman struct nfs_pgio_header *hdr;
2950dce81290STrond Myklebust int ret;
2951dce81290STrond Myklebust
29521e7f3a48SWeston Andros Adamson hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
29531e7f3a48SWeston Andros Adamson if (!hdr) {
29542bff2288SPeng Tao desc->pg_error = -ENOMEM;
29552bff2288SPeng Tao return desc->pg_error;
29566c75dc0dSFred Isaman }
29576c75dc0dSFred Isaman nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
2958180bb5ecSWeston Andros Adamson
29599369a431STrond Myklebust hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
2960ef2c488cSAnna Schumaker ret = nfs_generic_pgio(desc, hdr);
2961180bb5ecSWeston Andros Adamson if (!ret)
29627f714720SWeston Andros Adamson pnfs_do_write(desc, hdr, desc->pg_ioflags);
2963a7d42ddbSWeston Andros Adamson
2964dce81290STrond Myklebust return ret;
2965dce81290STrond Myklebust }
2966dce81290STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
2967dce81290STrond Myklebust
pnfs_read_done_resend_to_mds(struct nfs_pgio_header * hdr)296853113ad3SWeston Andros Adamson int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
296962e4a769STrond Myklebust {
297062e4a769STrond Myklebust struct nfs_pageio_descriptor pgio;
297162e4a769STrond Myklebust
29721acbbb4eSFred Isaman /* Resend all requests through the MDS */
297353113ad3SWeston Andros Adamson nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
297453113ad3SWeston Andros Adamson return nfs_pageio_resend(&pgio, hdr);
29751acbbb4eSFred Isaman }
2976e7dd79afSAndy Adamson EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
29771acbbb4eSFred Isaman
pnfs_ld_handle_read_error(struct nfs_pgio_header * hdr)2978d45f60c6SWeston Andros Adamson static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
29791acbbb4eSFred Isaman {
2980cd841605SFred Isaman dprintk("pnfs read error = %d\n", hdr->pnfs_error);
2981cd841605SFred Isaman if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
29821acbbb4eSFred Isaman PNFS_LAYOUTRET_ON_ERROR) {
2983cd841605SFred Isaman pnfs_return_layout(hdr->inode);
29841acbbb4eSFred Isaman }
29854db6e0b7SFred Isaman if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
298653113ad3SWeston Andros Adamson hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
298762e4a769STrond Myklebust }
298862e4a769STrond Myklebust
2989b1f69b75SAndy Adamson /*
2990d20581aaSBenny Halevy * Called by non rpc-based layout drivers
2991d20581aaSBenny Halevy */
pnfs_ld_read_done(struct nfs_pgio_header * hdr)2992d45f60c6SWeston Andros Adamson void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
2993d20581aaSBenny Halevy {
2994bfc505deSTrond Myklebust if (likely(!hdr->pnfs_error))
2995d45f60c6SWeston Andros Adamson hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
2996f8417b48SKinglong Mee trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
2997f8417b48SKinglong Mee if (unlikely(hdr->pnfs_error))
2998d45f60c6SWeston Andros Adamson pnfs_ld_handle_read_error(hdr);
2999d45f60c6SWeston Andros Adamson hdr->mds_ops->rpc_release(hdr);
3000d20581aaSBenny Halevy }
3001d20581aaSBenny Halevy EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
3002d20581aaSBenny Halevy
3003493292ddSTrond Myklebust static void
pnfs_read_through_mds(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr)3004493292ddSTrond Myklebust pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
3005d45f60c6SWeston Andros Adamson struct nfs_pgio_header *hdr)
3006493292ddSTrond Myklebust {
300748d635f1SPeng Tao struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
3008a7d42ddbSWeston Andros Adamson
30094db6e0b7SFred Isaman if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
3010a7d42ddbSWeston Andros Adamson list_splice_tail_init(&hdr->pages, &mirror->pg_list);
3011493292ddSTrond Myklebust nfs_pageio_reset_read_mds(desc);
3012a7d42ddbSWeston Andros Adamson mirror->pg_recoalesce = 1;
30134db6e0b7SFred Isaman }
3014ba4a76f7SScott Mayhew hdr->completion_ops->completion(hdr);
3015493292ddSTrond Myklebust }
3016493292ddSTrond Myklebust
3017d20581aaSBenny Halevy /*
301864419a9bSAndy Adamson * Call the appropriate parallel I/O subsystem read function.
301964419a9bSAndy Adamson */
3020493292ddSTrond Myklebust static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops,struct pnfs_layout_segment * lseg)3021d45f60c6SWeston Andros Adamson pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
3022493292ddSTrond Myklebust const struct rpc_call_ops *call_ops,
3023493292ddSTrond Myklebust struct pnfs_layout_segment *lseg)
302464419a9bSAndy Adamson {
3025cd841605SFred Isaman struct inode *inode = hdr->inode;
302664419a9bSAndy Adamson struct nfs_server *nfss = NFS_SERVER(inode);
302764419a9bSAndy Adamson enum pnfs_try_status trypnfs;
302864419a9bSAndy Adamson
3029cd841605SFred Isaman hdr->mds_ops = call_ops;
303064419a9bSAndy Adamson
303164419a9bSAndy Adamson dprintk("%s: Reading ino:%lu %u@%llu\n",
3032d45f60c6SWeston Andros Adamson __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
303364419a9bSAndy Adamson
3034d45f60c6SWeston Andros Adamson trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
30354db6e0b7SFred Isaman if (trypnfs != PNFS_NOT_ATTEMPTED)
303664419a9bSAndy Adamson nfs_inc_stats(inode, NFSIOS_PNFS_READ);
303764419a9bSAndy Adamson dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
303864419a9bSAndy Adamson return trypnfs;
303964419a9bSAndy Adamson }
3040863a3c6cSAndy Adamson
3041ceb11e13SPeng Tao /* Resend all requests through pnfs. */
pnfs_read_resend_pnfs(struct nfs_pgio_header * hdr,unsigned int mirror_idx)3042563c53e7STrond Myklebust void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr,
3043563c53e7STrond Myklebust unsigned int mirror_idx)
3044ceb11e13SPeng Tao {
3045ceb11e13SPeng Tao struct nfs_pageio_descriptor pgio;
3046ceb11e13SPeng Tao
30471b1bc66bSWeston Andros Adamson if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
304854e4a0dfSTrond Myklebust /* Prevent deadlocks with layoutreturn! */
304954e4a0dfSTrond Myklebust pnfs_put_lseg(hdr->lseg);
305054e4a0dfSTrond Myklebust hdr->lseg = NULL;
305154e4a0dfSTrond Myklebust
30521b1bc66bSWeston Andros Adamson nfs_pageio_init_read(&pgio, hdr->inode, false,
30531b1bc66bSWeston Andros Adamson hdr->completion_ops);
3054563c53e7STrond Myklebust pgio.pg_mirror_idx = mirror_idx;
30551b1bc66bSWeston Andros Adamson hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
30561b1bc66bSWeston Andros Adamson }
3057ceb11e13SPeng Tao }
3058ceb11e13SPeng Tao EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs);
3059ceb11e13SPeng Tao
3060493292ddSTrond Myklebust static void
pnfs_do_read(struct nfs_pageio_descriptor * desc,struct nfs_pgio_header * hdr)30617f714720SWeston Andros Adamson pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
3062493292ddSTrond Myklebust {
3063493292ddSTrond Myklebust const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
3064493292ddSTrond Myklebust struct pnfs_layout_segment *lseg = desc->pg_lseg;
3065493292ddSTrond Myklebust enum pnfs_try_status trypnfs;
3066493292ddSTrond Myklebust
3067d45f60c6SWeston Andros Adamson trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
30686aeafd05STrond Myklebust switch (trypnfs) {
30696aeafd05STrond Myklebust case PNFS_NOT_ATTEMPTED:
3070d45f60c6SWeston Andros Adamson pnfs_read_through_mds(desc, hdr);
3071ffb81717SGustavo A. R. Silva break;
30726aeafd05STrond Myklebust case PNFS_ATTEMPTED:
30736aeafd05STrond Myklebust break;
30746aeafd05STrond Myklebust case PNFS_TRY_AGAIN:
30756aeafd05STrond Myklebust /* cleanup hdr and prepare to redo pnfs */
30766aeafd05STrond Myklebust if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
30776aeafd05STrond Myklebust struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
30786aeafd05STrond Myklebust list_splice_init(&hdr->pages, &mirror->pg_list);
30796aeafd05STrond Myklebust mirror->pg_recoalesce = 1;
30806aeafd05STrond Myklebust }
30816aeafd05STrond Myklebust hdr->mds_ops->rpc_release(hdr);
30826aeafd05STrond Myklebust }
3083493292ddSTrond Myklebust }
3084493292ddSTrond Myklebust
pnfs_readhdr_free(struct nfs_pgio_header * hdr)30854db6e0b7SFred Isaman static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
30864db6e0b7SFred Isaman {
30879369a431STrond Myklebust pnfs_put_lseg(hdr->lseg);
30881e7f3a48SWeston Andros Adamson nfs_pgio_header_free(hdr);
30894db6e0b7SFred Isaman }
30904db6e0b7SFred Isaman
3091493292ddSTrond Myklebust int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor * desc)3092493292ddSTrond Myklebust pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
3093493292ddSTrond Myklebust {
30944db6e0b7SFred Isaman struct nfs_pgio_header *hdr;
3095493292ddSTrond Myklebust int ret;
3096493292ddSTrond Myklebust
30971e7f3a48SWeston Andros Adamson hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
30981e7f3a48SWeston Andros Adamson if (!hdr) {
30992bff2288SPeng Tao desc->pg_error = -ENOMEM;
31002bff2288SPeng Tao return desc->pg_error;
3101493292ddSTrond Myklebust }
31024db6e0b7SFred Isaman nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
31039369a431STrond Myklebust hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
3104ef2c488cSAnna Schumaker ret = nfs_generic_pgio(desc, hdr);
3105180bb5ecSWeston Andros Adamson if (!ret)
31067f714720SWeston Andros Adamson pnfs_do_read(desc, hdr);
31074db6e0b7SFred Isaman return ret;
3108493292ddSTrond Myklebust }
3109493292ddSTrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
3110493292ddSTrond Myklebust
pnfs_clear_layoutcommitting(struct inode * inode)311171244d9bSTrond Myklebust static void pnfs_clear_layoutcommitting(struct inode *inode)
311271244d9bSTrond Myklebust {
311371244d9bSTrond Myklebust unsigned long *bitlock = &NFS_I(inode)->flags;
311471244d9bSTrond Myklebust
311571244d9bSTrond Myklebust clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
31164e857c58SPeter Zijlstra smp_mb__after_atomic();
311771244d9bSTrond Myklebust wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
311871244d9bSTrond Myklebust }
311971244d9bSTrond Myklebust
3120863a3c6cSAndy Adamson /*
3121a9bae566SPeng Tao * There can be multiple RW segments.
3122863a3c6cSAndy Adamson */
pnfs_list_write_lseg(struct inode * inode,struct list_head * listp)3123a9bae566SPeng Tao static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
3124863a3c6cSAndy Adamson {
3125a9bae566SPeng Tao struct pnfs_layout_segment *lseg;
3126863a3c6cSAndy Adamson
3127a9bae566SPeng Tao list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
3128a9bae566SPeng Tao if (lseg->pls_range.iomode == IOMODE_RW &&
3129a073dbffSTrond Myklebust test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
3130a9bae566SPeng Tao list_add(&lseg->pls_lc_list, listp);
3131a9bae566SPeng Tao }
3132863a3c6cSAndy Adamson }
3133863a3c6cSAndy Adamson
pnfs_list_write_lseg_done(struct inode * inode,struct list_head * listp)3134a073dbffSTrond Myklebust static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
3135a073dbffSTrond Myklebust {
3136a073dbffSTrond Myklebust struct pnfs_layout_segment *lseg, *tmp;
3137a073dbffSTrond Myklebust
3138a073dbffSTrond Myklebust /* Matched by references in pnfs_set_layoutcommit */
3139a073dbffSTrond Myklebust list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
3140a073dbffSTrond Myklebust list_del_init(&lseg->pls_lc_list);
3141a073dbffSTrond Myklebust pnfs_put_lseg(lseg);
3142a073dbffSTrond Myklebust }
3143a073dbffSTrond Myklebust
314471244d9bSTrond Myklebust pnfs_clear_layoutcommitting(inode);
3145a073dbffSTrond Myklebust }
3146a073dbffSTrond Myklebust
pnfs_set_lo_fail(struct pnfs_layout_segment * lseg)31471b0ae068SPeng Tao void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
31481b0ae068SPeng Tao {
3149b9e028fdSTrond Myklebust pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
31501b0ae068SPeng Tao }
31511b0ae068SPeng Tao EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
31521b0ae068SPeng Tao
3153863a3c6cSAndy Adamson void
pnfs_set_layoutcommit(struct inode * inode,struct pnfs_layout_segment * lseg,loff_t end_pos)315467af7611STrond Myklebust pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
315567af7611STrond Myklebust loff_t end_pos)
3156863a3c6cSAndy Adamson {
3157cd841605SFred Isaman struct nfs_inode *nfsi = NFS_I(inode);
315879a48a1fSWeston Andros Adamson bool mark_as_dirty = false;
3159863a3c6cSAndy Adamson
3160cd841605SFred Isaman spin_lock(&inode->i_lock);
3161863a3c6cSAndy Adamson if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
316229559b11STrond Myklebust nfsi->layout->plh_lwb = end_pos;
316379a48a1fSWeston Andros Adamson mark_as_dirty = true;
3164863a3c6cSAndy Adamson dprintk("%s: Set layoutcommit for inode %lu ",
3165cd841605SFred Isaman __func__, inode->i_ino);
316629559b11STrond Myklebust } else if (end_pos > nfsi->layout->plh_lwb)
316729559b11STrond Myklebust nfsi->layout->plh_lwb = end_pos;
316867af7611STrond Myklebust if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
3169a9bae566SPeng Tao /* references matched in nfs4_layoutcommit_release */
317067af7611STrond Myklebust pnfs_get_lseg(lseg);
3171a9bae566SPeng Tao }
3172cd841605SFred Isaman spin_unlock(&inode->i_lock);
3173acff5880SPeng Tao dprintk("%s: lseg %p end_pos %llu\n",
317467af7611STrond Myklebust __func__, lseg, nfsi->layout->plh_lwb);
317579a48a1fSWeston Andros Adamson
317679a48a1fSWeston Andros Adamson /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
317779a48a1fSWeston Andros Adamson * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
317879a48a1fSWeston Andros Adamson if (mark_as_dirty)
3179cd841605SFred Isaman mark_inode_dirty_sync(inode);
3180863a3c6cSAndy Adamson }
3181863a3c6cSAndy Adamson EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
3182863a3c6cSAndy Adamson
pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data * data)3183db29c089SAndy Adamson void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
3184db29c089SAndy Adamson {
3185db29c089SAndy Adamson struct nfs_server *nfss = NFS_SERVER(data->args.inode);
3186db29c089SAndy Adamson
3187db29c089SAndy Adamson if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
3188db29c089SAndy Adamson nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
3189a073dbffSTrond Myklebust pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
3190db29c089SAndy Adamson }
3191db29c089SAndy Adamson
3192de4b15c7SAndy Adamson /*
3193de4b15c7SAndy Adamson * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
3194de4b15c7SAndy Adamson * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
3195de4b15c7SAndy Adamson * data to disk to allow the server to recover the data if it crashes.
3196de4b15c7SAndy Adamson * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
3197de4b15c7SAndy Adamson * is off, and a COMMIT is sent to a data server, or
3198de4b15c7SAndy Adamson * if WRITEs to a data server return NFS_DATA_SYNC.
3199de4b15c7SAndy Adamson */
3200863a3c6cSAndy Adamson int
pnfs_layoutcommit_inode(struct inode * inode,bool sync)3201ef311537SAndy Adamson pnfs_layoutcommit_inode(struct inode *inode, bool sync)
3202863a3c6cSAndy Adamson {
32035f919c9fSChristoph Hellwig struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
3204863a3c6cSAndy Adamson struct nfs4_layoutcommit_data *data;
3205863a3c6cSAndy Adamson struct nfs_inode *nfsi = NFS_I(inode);
3206863a3c6cSAndy Adamson loff_t end_pos;
320771244d9bSTrond Myklebust int status;
320871244d9bSTrond Myklebust
320971244d9bSTrond Myklebust if (!pnfs_layoutcommit_outstanding(inode))
321071244d9bSTrond Myklebust return 0;
3211863a3c6cSAndy Adamson
3212863a3c6cSAndy Adamson dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
3213863a3c6cSAndy Adamson
321471244d9bSTrond Myklebust status = -EAGAIN;
321571244d9bSTrond Myklebust if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
321671244d9bSTrond Myklebust if (!sync)
321771244d9bSTrond Myklebust goto out;
321874316201SNeilBrown status = wait_on_bit_lock_action(&nfsi->flags,
321971244d9bSTrond Myklebust NFS_INO_LAYOUTCOMMITTING,
322071244d9bSTrond Myklebust nfs_wait_bit_killable,
3221f5d39b02SPeter Zijlstra TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
322271244d9bSTrond Myklebust if (status)
3223de4b15c7SAndy Adamson goto out;
3224de4b15c7SAndy Adamson }
3225863a3c6cSAndy Adamson
322671244d9bSTrond Myklebust status = -ENOMEM;
322771244d9bSTrond Myklebust /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
322863d8a41bSTrond Myklebust data = kzalloc(sizeof(*data), nfs_io_gfp_mask());
322971244d9bSTrond Myklebust if (!data)
323071244d9bSTrond Myklebust goto clear_layoutcommitting;
323192407e75SPeng Tao
323271244d9bSTrond Myklebust status = 0;
323371244d9bSTrond Myklebust spin_lock(&inode->i_lock);
323471244d9bSTrond Myklebust if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
323571244d9bSTrond Myklebust goto out_unlock;
323692407e75SPeng Tao
3237a9bae566SPeng Tao INIT_LIST_HEAD(&data->lseg_list);
3238a9bae566SPeng Tao pnfs_list_write_lseg(inode, &data->lseg_list);
3239863a3c6cSAndy Adamson
3240acff5880SPeng Tao end_pos = nfsi->layout->plh_lwb;
3241863a3c6cSAndy Adamson
3242f597c537STrond Myklebust nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
324397a728f5STrond Myklebust data->cred = get_cred(nfsi->layout->plh_lc_cred);
3244863a3c6cSAndy Adamson spin_unlock(&inode->i_lock);
3245863a3c6cSAndy Adamson
3246863a3c6cSAndy Adamson data->args.inode = inode;
3247863a3c6cSAndy Adamson nfs_fattr_init(&data->fattr);
3248863a3c6cSAndy Adamson data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
3249863a3c6cSAndy Adamson data->res.fattr = &data->fattr;
32502e18d4d8STrond Myklebust if (end_pos != 0)
3251863a3c6cSAndy Adamson data->args.lastbytewritten = end_pos - 1;
32522e18d4d8STrond Myklebust else
32532e18d4d8STrond Myklebust data->args.lastbytewritten = U64_MAX;
3254863a3c6cSAndy Adamson data->res.server = NFS_SERVER(inode);
3255863a3c6cSAndy Adamson
32565f919c9fSChristoph Hellwig if (ld->prepare_layoutcommit) {
32575f919c9fSChristoph Hellwig status = ld->prepare_layoutcommit(&data->args);
32585f919c9fSChristoph Hellwig if (status) {
3259a52458b4SNeilBrown put_cred(data->cred);
32605f919c9fSChristoph Hellwig spin_lock(&inode->i_lock);
326129559b11STrond Myklebust set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
326229559b11STrond Myklebust if (end_pos > nfsi->layout->plh_lwb)
32635f919c9fSChristoph Hellwig nfsi->layout->plh_lwb = end_pos;
32643471648aSJeff Layton goto out_unlock;
32655f919c9fSChristoph Hellwig }
32665f919c9fSChristoph Hellwig }
32675f919c9fSChristoph Hellwig
32685f919c9fSChristoph Hellwig
3269863a3c6cSAndy Adamson status = nfs4_proc_layoutcommit(data, sync);
3270863a3c6cSAndy Adamson out:
327192407e75SPeng Tao if (status)
327292407e75SPeng Tao mark_inode_dirty_sync(inode);
3273863a3c6cSAndy Adamson dprintk("<-- %s status %d\n", __func__, status);
3274863a3c6cSAndy Adamson return status;
327571244d9bSTrond Myklebust out_unlock:
327671244d9bSTrond Myklebust spin_unlock(&inode->i_lock);
327792407e75SPeng Tao kfree(data);
327871244d9bSTrond Myklebust clear_layoutcommitting:
327971244d9bSTrond Myklebust pnfs_clear_layoutcommitting(inode);
328092407e75SPeng Tao goto out;
3281863a3c6cSAndy Adamson }
328272cff449SPeng Tao EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
328382be417aSAndy Adamson
32845bb89b47STrond Myklebust int
pnfs_generic_sync(struct inode * inode,bool datasync)32855bb89b47STrond Myklebust pnfs_generic_sync(struct inode *inode, bool datasync)
32865bb89b47STrond Myklebust {
32875bb89b47STrond Myklebust return pnfs_layoutcommit_inode(inode, true);
32885bb89b47STrond Myklebust }
32895bb89b47STrond Myklebust EXPORT_SYMBOL_GPL(pnfs_generic_sync);
32905bb89b47STrond Myklebust
pnfs_mdsthreshold_alloc(void)329182be417aSAndy Adamson struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
329282be417aSAndy Adamson {
329382be417aSAndy Adamson struct nfs4_threshold *thp;
329482be417aSAndy Adamson
329563d8a41bSTrond Myklebust thp = kzalloc(sizeof(*thp), nfs_io_gfp_mask());
329682be417aSAndy Adamson if (!thp) {
329782be417aSAndy Adamson dprintk("%s mdsthreshold allocation failed\n", __func__);
329882be417aSAndy Adamson return NULL;
329982be417aSAndy Adamson }
330082be417aSAndy Adamson return thp;
330182be417aSAndy Adamson }
33028733408dSPeng Tao
3303865a7ecbSPeng Tao #if IS_ENABLED(CONFIG_NFS_V4_2)
33048733408dSPeng Tao int
pnfs_report_layoutstat(struct inode * inode,gfp_t gfp_flags)3305c8ad8894STrond Myklebust pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
33068733408dSPeng Tao {
33078733408dSPeng Tao struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
33088733408dSPeng Tao struct nfs_server *server = NFS_SERVER(inode);
33091bfe3b25SPeng Tao struct nfs_inode *nfsi = NFS_I(inode);
33108733408dSPeng Tao struct nfs42_layoutstat_data *data;
33118733408dSPeng Tao struct pnfs_layout_hdr *hdr;
33128733408dSPeng Tao int status = 0;
33138733408dSPeng Tao
33148733408dSPeng Tao if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
33158733408dSPeng Tao goto out;
33168733408dSPeng Tao
33176c5a0d89STrond Myklebust if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
33186c5a0d89STrond Myklebust goto out;
33196c5a0d89STrond Myklebust
33201bfe3b25SPeng Tao if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
33211bfe3b25SPeng Tao goto out;
33221bfe3b25SPeng Tao
33238733408dSPeng Tao spin_lock(&inode->i_lock);
33248733408dSPeng Tao if (!NFS_I(inode)->layout) {
33258733408dSPeng Tao spin_unlock(&inode->i_lock);
3326f538d0baSTrond Myklebust goto out_clear_layoutstats;
33278733408dSPeng Tao }
33288733408dSPeng Tao hdr = NFS_I(inode)->layout;
33298733408dSPeng Tao pnfs_get_layout_hdr(hdr);
33308733408dSPeng Tao spin_unlock(&inode->i_lock);
33318733408dSPeng Tao
3332c8ad8894STrond Myklebust data = kzalloc(sizeof(*data), gfp_flags);
33338733408dSPeng Tao if (!data) {
33348733408dSPeng Tao status = -ENOMEM;
33358733408dSPeng Tao goto out_put;
33368733408dSPeng Tao }
33378733408dSPeng Tao
33388733408dSPeng Tao data->args.fh = NFS_FH(inode);
33398733408dSPeng Tao data->args.inode = inode;
33408733408dSPeng Tao status = ld->prepare_layoutstats(&data->args);
33418733408dSPeng Tao if (status)
33428733408dSPeng Tao goto out_free;
33438733408dSPeng Tao
33448733408dSPeng Tao status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
33458733408dSPeng Tao
33468733408dSPeng Tao out:
33478733408dSPeng Tao dprintk("%s returns %d\n", __func__, status);
33488733408dSPeng Tao return status;
33498733408dSPeng Tao
33508733408dSPeng Tao out_free:
33518733408dSPeng Tao kfree(data);
33528733408dSPeng Tao out_put:
33538733408dSPeng Tao pnfs_put_layout_hdr(hdr);
3354f538d0baSTrond Myklebust out_clear_layoutstats:
33551bfe3b25SPeng Tao smp_mb__before_atomic();
33561bfe3b25SPeng Tao clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
33571bfe3b25SPeng Tao smp_mb__after_atomic();
33588733408dSPeng Tao goto out;
33598733408dSPeng Tao }
33608733408dSPeng Tao EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
3361865a7ecbSPeng Tao #endif
3362bbf58bf3STrond Myklebust
3363bbf58bf3STrond Myklebust unsigned int layoutstats_timer;
3364bbf58bf3STrond Myklebust module_param(layoutstats_timer, uint, 0644);
3365bbf58bf3STrond Myklebust EXPORT_SYMBOL_GPL(layoutstats_timer);
3366