1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2d67ae825STom Haynes /*
3d67ae825STom Haynes * Device operations for the pnfs nfs4 file layout driver.
4d67ae825STom Haynes *
5d67ae825STom Haynes * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
6d67ae825STom Haynes *
7d67ae825STom Haynes * Tao Peng <bergwolf@primarydata.com>
8d67ae825STom Haynes */
9d67ae825STom Haynes
10d67ae825STom Haynes #include <linux/nfs_fs.h>
11d67ae825STom Haynes #include <linux/vmalloc.h>
12d67ae825STom Haynes #include <linux/module.h>
13d67ae825STom Haynes #include <linux/sunrpc/addr.h>
14d67ae825STom Haynes
15d67ae825STom Haynes #include "../internal.h"
16d67ae825STom Haynes #include "../nfs4session.h"
17d67ae825STom Haynes #include "flexfilelayout.h"
18d67ae825STom Haynes
19d67ae825STom Haynes #define NFSDBG_FACILITY NFSDBG_PNFS_LD
20d67ae825STom Haynes
2168f46159STrond Myklebust static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
2215d03055STrond Myklebust static unsigned int dataserver_retrans;
23d67ae825STom Haynes
2465990d1aSFred Isaman static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
2565990d1aSFred Isaman
nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds * mirror_ds)26d67ae825STom Haynes void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
27d67ae825STom Haynes {
2865990d1aSFred Isaman if (!IS_ERR_OR_NULL(mirror_ds))
29d67ae825STom Haynes nfs4_put_deviceid_node(&mirror_ds->id_node);
30d67ae825STom Haynes }
31d67ae825STom Haynes
nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds * mirror_ds)32d67ae825STom Haynes void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
33d67ae825STom Haynes {
34d67ae825STom Haynes nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
35d67ae825STom Haynes nfs4_pnfs_ds_put(mirror_ds->ds);
361feb2616SWeston Andros Adamson kfree(mirror_ds->ds_versions);
3784a80f62STrond Myklebust kfree_rcu(mirror_ds, id_node.rcu);
38d67ae825STom Haynes }
39d67ae825STom Haynes
40d67ae825STom Haynes /* Decode opaque device data and construct new_ds using it */
41d67ae825STom Haynes struct nfs4_ff_layout_ds *
nfs4_ff_alloc_deviceid_node(struct nfs_server * server,struct pnfs_device * pdev,gfp_t gfp_flags)42d67ae825STom Haynes nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
43d67ae825STom Haynes gfp_t gfp_flags)
44d67ae825STom Haynes {
45d67ae825STom Haynes struct xdr_stream stream;
46d67ae825STom Haynes struct xdr_buf buf;
47d67ae825STom Haynes struct page *scratch;
48d67ae825STom Haynes struct list_head dsaddrs;
49d67ae825STom Haynes struct nfs4_pnfs_ds_addr *da;
50d67ae825STom Haynes struct nfs4_ff_layout_ds *new_ds = NULL;
51d67ae825STom Haynes struct nfs4_ff_ds_version *ds_versions = NULL;
52d67ae825STom Haynes u32 mp_count;
53d67ae825STom Haynes u32 version_count;
54d67ae825STom Haynes __be32 *p;
55d67ae825STom Haynes int i, ret = -ENOMEM;
56d67ae825STom Haynes
57d67ae825STom Haynes /* set up xdr stream */
58d67ae825STom Haynes scratch = alloc_page(gfp_flags);
59d67ae825STom Haynes if (!scratch)
60d67ae825STom Haynes goto out_err;
61d67ae825STom Haynes
62d67ae825STom Haynes new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags);
63d67ae825STom Haynes if (!new_ds)
64d67ae825STom Haynes goto out_scratch;
65d67ae825STom Haynes
66d67ae825STom Haynes nfs4_init_deviceid_node(&new_ds->id_node,
67d67ae825STom Haynes server,
68d67ae825STom Haynes &pdev->dev_id);
69d67ae825STom Haynes INIT_LIST_HEAD(&dsaddrs);
70d67ae825STom Haynes
71d67ae825STom Haynes xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
720ae4c3e8SChuck Lever xdr_set_scratch_page(&stream, scratch);
73d67ae825STom Haynes
74d67ae825STom Haynes /* multipath count */
75d67ae825STom Haynes p = xdr_inline_decode(&stream, 4);
76d67ae825STom Haynes if (unlikely(!p))
77d67ae825STom Haynes goto out_err_drain_dsaddrs;
78d67ae825STom Haynes mp_count = be32_to_cpup(p);
79d67ae825STom Haynes dprintk("%s: multipath ds count %d\n", __func__, mp_count);
80d67ae825STom Haynes
81d67ae825STom Haynes for (i = 0; i < mp_count; i++) {
82d67ae825STom Haynes /* multipath ds */
83d67ae825STom Haynes da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
84d67ae825STom Haynes &stream, gfp_flags);
85d67ae825STom Haynes if (da)
86d67ae825STom Haynes list_add_tail(&da->da_node, &dsaddrs);
87d67ae825STom Haynes }
88d67ae825STom Haynes if (list_empty(&dsaddrs)) {
89d67ae825STom Haynes dprintk("%s: no suitable DS addresses found\n",
90d67ae825STom Haynes __func__);
91d67ae825STom Haynes ret = -ENOMEDIUM;
92d67ae825STom Haynes goto out_err_drain_dsaddrs;
93d67ae825STom Haynes }
94d67ae825STom Haynes
95d67ae825STom Haynes /* version count */
96d67ae825STom Haynes p = xdr_inline_decode(&stream, 4);
97d67ae825STom Haynes if (unlikely(!p))
98d67ae825STom Haynes goto out_err_drain_dsaddrs;
99d67ae825STom Haynes version_count = be32_to_cpup(p);
100d67ae825STom Haynes dprintk("%s: version count %d\n", __func__, version_count);
101d67ae825STom Haynes
1026396bb22SKees Cook ds_versions = kcalloc(version_count,
1036396bb22SKees Cook sizeof(struct nfs4_ff_ds_version),
104d67ae825STom Haynes gfp_flags);
105d67ae825STom Haynes if (!ds_versions)
106d67ae825STom Haynes goto out_scratch;
107d67ae825STom Haynes
108d67ae825STom Haynes for (i = 0; i < version_count; i++) {
109d67ae825STom Haynes /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) +
110d67ae825STom Haynes * tightly_coupled(4) */
111d67ae825STom Haynes p = xdr_inline_decode(&stream, 20);
112d67ae825STom Haynes if (unlikely(!p))
113d67ae825STom Haynes goto out_err_drain_dsaddrs;
114d67ae825STom Haynes ds_versions[i].version = be32_to_cpup(p++);
115d67ae825STom Haynes ds_versions[i].minor_version = be32_to_cpup(p++);
116*940261a1SAnna Schumaker ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
117*940261a1SAnna Schumaker server->nfs_client->cl_proto);
118*940261a1SAnna Schumaker ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
119*940261a1SAnna Schumaker server->nfs_client->cl_proto);
120d67ae825STom Haynes ds_versions[i].tightly_coupled = be32_to_cpup(p);
121d67ae825STom Haynes
122d67ae825STom Haynes if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
123d67ae825STom Haynes ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE;
124d67ae825STom Haynes if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
125d67ae825STom Haynes ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
126d67ae825STom Haynes
127a7878ca1STigran Mkrtchyan /*
128a7878ca1STigran Mkrtchyan * check for valid major/minor combination.
129a7878ca1STigran Mkrtchyan * currently we support dataserver which talk:
130a7878ca1STigran Mkrtchyan * v3, v4.0, v4.1, v4.2
131a7878ca1STigran Mkrtchyan */
132a7878ca1STigran Mkrtchyan if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
133a7878ca1STigran Mkrtchyan (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
134d67ae825STom Haynes dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
135d67ae825STom Haynes i, ds_versions[i].version,
136d67ae825STom Haynes ds_versions[i].minor_version);
137d67ae825STom Haynes ret = -EPROTONOSUPPORT;
138d67ae825STom Haynes goto out_err_drain_dsaddrs;
139d67ae825STom Haynes }
140d67ae825STom Haynes
141d67ae825STom Haynes dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n",
142d67ae825STom Haynes __func__, i, ds_versions[i].version,
143d67ae825STom Haynes ds_versions[i].minor_version,
144d67ae825STom Haynes ds_versions[i].rsize,
145d67ae825STom Haynes ds_versions[i].wsize,
146d67ae825STom Haynes ds_versions[i].tightly_coupled);
147d67ae825STom Haynes }
148d67ae825STom Haynes
149d67ae825STom Haynes new_ds->ds_versions = ds_versions;
150d67ae825STom Haynes new_ds->ds_versions_cnt = version_count;
151d67ae825STom Haynes
152d67ae825STom Haynes new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
153d67ae825STom Haynes if (!new_ds->ds)
154d67ae825STom Haynes goto out_err_drain_dsaddrs;
155d67ae825STom Haynes
156d67ae825STom Haynes /* If DS was already in cache, free ds addrs */
157d67ae825STom Haynes while (!list_empty(&dsaddrs)) {
158d67ae825STom Haynes da = list_first_entry(&dsaddrs,
159d67ae825STom Haynes struct nfs4_pnfs_ds_addr,
160d67ae825STom Haynes da_node);
161d67ae825STom Haynes list_del_init(&da->da_node);
162d67ae825STom Haynes kfree(da->da_remotestr);
163d67ae825STom Haynes kfree(da);
164d67ae825STom Haynes }
165d67ae825STom Haynes
166d67ae825STom Haynes __free_page(scratch);
167d67ae825STom Haynes return new_ds;
168d67ae825STom Haynes
169d67ae825STom Haynes out_err_drain_dsaddrs:
170d67ae825STom Haynes while (!list_empty(&dsaddrs)) {
171d67ae825STom Haynes da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
172d67ae825STom Haynes da_node);
173d67ae825STom Haynes list_del_init(&da->da_node);
174d67ae825STom Haynes kfree(da->da_remotestr);
175d67ae825STom Haynes kfree(da);
176d67ae825STom Haynes }
177d67ae825STom Haynes
178d67ae825STom Haynes kfree(ds_versions);
179d67ae825STom Haynes out_scratch:
180d67ae825STom Haynes __free_page(scratch);
181d67ae825STom Haynes out_err:
182d67ae825STom Haynes kfree(new_ds);
183d67ae825STom Haynes
184d67ae825STom Haynes dprintk("%s ERROR: returning %d\n", __func__, ret);
185d67ae825STom Haynes return NULL;
186d67ae825STom Haynes }
187d67ae825STom Haynes
extend_ds_error(struct nfs4_ff_layout_ds_err * err,u64 offset,u64 length)188d67ae825STom Haynes static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
189d67ae825STom Haynes u64 offset, u64 length)
190d67ae825STom Haynes {
191d67ae825STom Haynes u64 end;
192d67ae825STom Haynes
19317822b20STrond Myklebust end = max_t(u64, pnfs_end_offset(err->offset, err->length),
19417822b20STrond Myklebust pnfs_end_offset(offset, length));
195d67ae825STom Haynes err->offset = min_t(u64, err->offset, offset);
196d67ae825STom Haynes err->length = end - err->offset;
197d67ae825STom Haynes }
198d67ae825STom Haynes
199b819ed4bSTrond Myklebust static int
ff_ds_error_match(const struct nfs4_ff_layout_ds_err * e1,const struct nfs4_ff_layout_ds_err * e2)200b819ed4bSTrond Myklebust ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
201b819ed4bSTrond Myklebust const struct nfs4_ff_layout_ds_err *e2)
202d67ae825STom Haynes {
203b819ed4bSTrond Myklebust int ret;
204b819ed4bSTrond Myklebust
205b819ed4bSTrond Myklebust if (e1->opnum != e2->opnum)
206b819ed4bSTrond Myklebust return e1->opnum < e2->opnum ? -1 : 1;
207b819ed4bSTrond Myklebust if (e1->status != e2->status)
208b819ed4bSTrond Myklebust return e1->status < e2->status ? -1 : 1;
20993b717fdSTrond Myklebust ret = memcmp(e1->stateid.data, e2->stateid.data,
21093b717fdSTrond Myklebust sizeof(e1->stateid.data));
211b819ed4bSTrond Myklebust if (ret != 0)
212b819ed4bSTrond Myklebust return ret;
213b819ed4bSTrond Myklebust ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
214b819ed4bSTrond Myklebust if (ret != 0)
215b819ed4bSTrond Myklebust return ret;
21617822b20STrond Myklebust if (pnfs_end_offset(e1->offset, e1->length) < e2->offset)
217b819ed4bSTrond Myklebust return -1;
21817822b20STrond Myklebust if (e1->offset > pnfs_end_offset(e2->offset, e2->length))
219b819ed4bSTrond Myklebust return 1;
220b819ed4bSTrond Myklebust /* If ranges overlap or are contiguous, they are the same */
221b819ed4bSTrond Myklebust return 0;
222d67ae825STom Haynes }
223d67ae825STom Haynes
224b819ed4bSTrond Myklebust static void
ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout * flo,struct nfs4_ff_layout_ds_err * dserr)225d67ae825STom Haynes ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
226d67ae825STom Haynes struct nfs4_ff_layout_ds_err *dserr)
227d67ae825STom Haynes {
228b819ed4bSTrond Myklebust struct nfs4_ff_layout_ds_err *err, *tmp;
229b819ed4bSTrond Myklebust struct list_head *head = &flo->error_list;
230b819ed4bSTrond Myklebust int match;
231d67ae825STom Haynes
232b819ed4bSTrond Myklebust /* Do insertion sort w/ merges */
233b819ed4bSTrond Myklebust list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
234b819ed4bSTrond Myklebust match = ff_ds_error_match(err, dserr);
235b819ed4bSTrond Myklebust if (match < 0)
236b819ed4bSTrond Myklebust continue;
237b819ed4bSTrond Myklebust if (match > 0) {
238b819ed4bSTrond Myklebust /* Add entry "dserr" _before_ entry "err" */
239b819ed4bSTrond Myklebust head = &err->list;
240d67ae825STom Haynes break;
241d67ae825STom Haynes }
242b819ed4bSTrond Myklebust /* Entries match, so merge "err" into "dserr" */
243b819ed4bSTrond Myklebust extend_ds_error(dserr, err->offset, err->length);
244cb067935STrond Myklebust list_replace(&err->list, &dserr->list);
245b819ed4bSTrond Myklebust kfree(err);
246cb067935STrond Myklebust return;
247d67ae825STom Haynes }
248d67ae825STom Haynes
249b819ed4bSTrond Myklebust list_add_tail(&dserr->list, head);
250d67ae825STom Haynes }
251d67ae825STom Haynes
ff_layout_track_ds_error(struct nfs4_flexfile_layout * flo,struct nfs4_ff_layout_mirror * mirror,u64 offset,u64 length,int status,enum nfs_opnum4 opnum,gfp_t gfp_flags)252d67ae825STom Haynes int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
253d67ae825STom Haynes struct nfs4_ff_layout_mirror *mirror, u64 offset,
254d67ae825STom Haynes u64 length, int status, enum nfs_opnum4 opnum,
255d67ae825STom Haynes gfp_t gfp_flags)
256d67ae825STom Haynes {
257d67ae825STom Haynes struct nfs4_ff_layout_ds_err *dserr;
258d67ae825STom Haynes
259d67ae825STom Haynes if (status == 0)
260d67ae825STom Haynes return 0;
261d67ae825STom Haynes
2628e04fdfaSTrond Myklebust if (IS_ERR_OR_NULL(mirror->mirror_ds))
263d67ae825STom Haynes return -EINVAL;
264d67ae825STom Haynes
265d67ae825STom Haynes dserr = kmalloc(sizeof(*dserr), gfp_flags);
266d67ae825STom Haynes if (!dserr)
267d67ae825STom Haynes return -ENOMEM;
268d67ae825STom Haynes
269d67ae825STom Haynes INIT_LIST_HEAD(&dserr->list);
270d67ae825STom Haynes dserr->offset = offset;
271d67ae825STom Haynes dserr->length = length;
272d67ae825STom Haynes dserr->status = status;
273d67ae825STom Haynes dserr->opnum = opnum;
274d67ae825STom Haynes nfs4_stateid_copy(&dserr->stateid, &mirror->stateid);
275d67ae825STom Haynes memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid,
276d67ae825STom Haynes NFS4_DEVICEID4_SIZE);
277d67ae825STom Haynes
278d67ae825STom Haynes spin_lock(&flo->generic_hdr.plh_inode->i_lock);
279b819ed4bSTrond Myklebust ff_layout_add_ds_error_locked(flo, dserr);
280d67ae825STom Haynes spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
281d67ae825STom Haynes return 0;
282d67ae825STom Haynes }
283d67ae825STom Haynes
284a52458b4SNeilBrown static const struct cred *
ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror * mirror,u32 iomode)28557f3f4c0SJeff Layton ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
28657f3f4c0SJeff Layton {
287a52458b4SNeilBrown const struct cred *cred, __rcu **pcred;
28857f3f4c0SJeff Layton
2893064b686SJeff Layton if (iomode == IOMODE_READ)
2903064b686SJeff Layton pcred = &mirror->ro_cred;
2913064b686SJeff Layton else
2923064b686SJeff Layton pcred = &mirror->rw_cred;
29357f3f4c0SJeff Layton
29457f3f4c0SJeff Layton rcu_read_lock();
29557f3f4c0SJeff Layton do {
29657f3f4c0SJeff Layton cred = rcu_dereference(*pcred);
29757f3f4c0SJeff Layton if (!cred)
29857f3f4c0SJeff Layton break;
29957f3f4c0SJeff Layton
300a52458b4SNeilBrown cred = get_cred_rcu(cred);
30157f3f4c0SJeff Layton } while(!cred);
30257f3f4c0SJeff Layton rcu_read_unlock();
30357f3f4c0SJeff Layton return cred;
30457f3f4c0SJeff Layton }
30557f3f4c0SJeff Layton
306d67ae825STom Haynes struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror * mirror)307749da527STrond Myklebust nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror)
308d67ae825STom Haynes {
309d67ae825STom Haynes /* FIXME: For now assume there is only 1 version available for the DS */
310749da527STrond Myklebust return &mirror->fh_versions[0];
311d67ae825STom Haynes }
312d67ae825STom Haynes
3134cbc8a57STrond Myklebust void
nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror * mirror,nfs4_stateid * stateid)3144cbc8a57STrond Myklebust nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
315bb21ce0aSTigran Mkrtchyan nfs4_stateid *stateid)
316bb21ce0aSTigran Mkrtchyan {
3174cbc8a57STrond Myklebust if (nfs4_ff_layout_ds_version(mirror) == 4)
318bb21ce0aSTigran Mkrtchyan nfs4_stateid_copy(stateid, &mirror->stateid);
319bb21ce0aSTigran Mkrtchyan }
320bb21ce0aSTigran Mkrtchyan
321cefa587aSTrond Myklebust static bool
ff_layout_init_mirror_ds(struct pnfs_layout_hdr * lo,struct nfs4_ff_layout_mirror * mirror)322cefa587aSTrond Myklebust ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo,
323cefa587aSTrond Myklebust struct nfs4_ff_layout_mirror *mirror)
324cefa587aSTrond Myklebust {
325cefa587aSTrond Myklebust if (mirror == NULL)
326cefa587aSTrond Myklebust goto outerr;
327cefa587aSTrond Myklebust if (mirror->mirror_ds == NULL) {
328cefa587aSTrond Myklebust struct nfs4_deviceid_node *node;
329cefa587aSTrond Myklebust struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
330cefa587aSTrond Myklebust
331cefa587aSTrond Myklebust node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode),
332cefa587aSTrond Myklebust &mirror->devid, lo->plh_lc_cred,
333cefa587aSTrond Myklebust GFP_KERNEL);
334cefa587aSTrond Myklebust if (node)
335cefa587aSTrond Myklebust mirror_ds = FF_LAYOUT_MIRROR_DS(node);
336cefa587aSTrond Myklebust
337cefa587aSTrond Myklebust /* check for race with another call to this function */
338cefa587aSTrond Myklebust if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
339cefa587aSTrond Myklebust mirror_ds != ERR_PTR(-ENODEV))
340cefa587aSTrond Myklebust nfs4_put_deviceid_node(node);
341cefa587aSTrond Myklebust }
342cefa587aSTrond Myklebust
343cefa587aSTrond Myklebust if (IS_ERR(mirror->mirror_ds))
344cefa587aSTrond Myklebust goto outerr;
345cefa587aSTrond Myklebust
346cefa587aSTrond Myklebust return true;
347cefa587aSTrond Myklebust outerr:
348cefa587aSTrond Myklebust return false;
349cefa587aSTrond Myklebust }
350cefa587aSTrond Myklebust
35195e2b7e9SJeff Layton /**
35295e2b7e9SJeff Layton * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
35395e2b7e9SJeff Layton * @lseg: the layout segment we're operating on
3542444ff27STrond Myklebust * @mirror: layout mirror describing the DS to use
35595e2b7e9SJeff Layton * @fail_return: return layout on connect failure?
35695e2b7e9SJeff Layton *
35795e2b7e9SJeff Layton * Try to prepare a DS connection to accept an RPC call. This involves
35895e2b7e9SJeff Layton * selecting a mirror to use and connecting the client to it if it's not
35995e2b7e9SJeff Layton * already connected.
36095e2b7e9SJeff Layton *
36195e2b7e9SJeff Layton * Since we only need a single functioning mirror to satisfy a read, we don't
36295e2b7e9SJeff Layton * want to return the layout if there is one. For writes though, any down
36395e2b7e9SJeff Layton * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish
36495e2b7e9SJeff Layton * between the two cases.
36595e2b7e9SJeff Layton *
36695e2b7e9SJeff Layton * Returns a pointer to a connected DS object on success or NULL on failure.
36795e2b7e9SJeff Layton */
368d67ae825STom Haynes struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment * lseg,struct nfs4_ff_layout_mirror * mirror,bool fail_return)3692444ff27STrond Myklebust nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
3702444ff27STrond Myklebust struct nfs4_ff_layout_mirror *mirror,
371d67ae825STom Haynes bool fail_return)
372d67ae825STom Haynes {
373d67ae825STom Haynes struct nfs4_pnfs_ds *ds = NULL;
374d67ae825STom Haynes struct inode *ino = lseg->pls_layout->plh_inode;
375d67ae825STom Haynes struct nfs_server *s = NFS_SERVER(ino);
376d67ae825STom Haynes unsigned int max_payload;
377a33e4b03SWeston Andros Adamson int status;
378d67ae825STom Haynes
379cefa587aSTrond Myklebust if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
3800a156dd5STrond Myklebust goto noconnect;
381d67ae825STom Haynes
382d67ae825STom Haynes ds = mirror->mirror_ds->ds;
383a2915fa0SBaptiste Lepers if (READ_ONCE(ds->ds_clp))
384a2915fa0SBaptiste Lepers goto out;
385d67ae825STom Haynes /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
386d67ae825STom Haynes smp_rmb();
387d67ae825STom Haynes
388d67ae825STom Haynes /* FIXME: For now we assume the server sent only one version of NFS
389d67ae825STom Haynes * to use for the DS.
390d67ae825STom Haynes */
3912444ff27STrond Myklebust status = nfs4_pnfs_ds_connect(s, ds, &mirror->mirror_ds->id_node,
3922444ff27STrond Myklebust dataserver_timeo, dataserver_retrans,
393d67ae825STom Haynes mirror->mirror_ds->ds_versions[0].version,
3947d38de3fSAnna Schumaker mirror->mirror_ds->ds_versions[0].minor_version);
395d67ae825STom Haynes
396d67ae825STom Haynes /* connect success, check rsize/wsize limit */
397260f32adSTrond Myklebust if (!status) {
398d67ae825STom Haynes max_payload =
399d67ae825STom Haynes nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
400d67ae825STom Haynes NULL);
401d67ae825STom Haynes if (mirror->mirror_ds->ds_versions[0].rsize > max_payload)
402d67ae825STom Haynes mirror->mirror_ds->ds_versions[0].rsize = max_payload;
403d67ae825STom Haynes if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
404d67ae825STom Haynes mirror->mirror_ds->ds_versions[0].wsize = max_payload;
4053dc14735STrond Myklebust goto out;
4063dc14735STrond Myklebust }
4070a156dd5STrond Myklebust noconnect:
408d67ae825STom Haynes ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
409d67ae825STom Haynes mirror, lseg->pls_range.offset,
410d67ae825STom Haynes lseg->pls_range.length, NFS4ERR_NXIO,
411d67ae825STom Haynes OP_ILLEGAL, GFP_NOIO);
412f0922a6cSTrond Myklebust ff_layout_send_layouterror(lseg);
413094069f1SJeff Layton if (fail_return || !ff_layout_has_available_ds(lseg))
4142e5b29f0STrond Myklebust pnfs_error_mark_layout_for_return(ino, lseg);
415849dc324SJeff Layton ds = NULL;
416d67ae825STom Haynes out:
417d67ae825STom Haynes return ds;
418d67ae825STom Haynes }
419d67ae825STom Haynes
420a52458b4SNeilBrown const struct cred *
ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror * mirror,const struct pnfs_layout_range * range,const struct cred * mdscred)421312cd4cbSTrond Myklebust ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
422312cd4cbSTrond Myklebust const struct pnfs_layout_range *range,
423a52458b4SNeilBrown const struct cred *mdscred)
424d67ae825STom Haynes {
425a52458b4SNeilBrown const struct cred *cred;
426d67ae825STom Haynes
42710ec57e4STigran Mkrtchyan if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
428312cd4cbSTrond Myklebust cred = ff_layout_get_mirror_cred(mirror, range->iomode);
42957f3f4c0SJeff Layton if (!cred)
430a52458b4SNeilBrown cred = get_cred(mdscred);
43157f3f4c0SJeff Layton } else {
432a52458b4SNeilBrown cred = get_cred(mdscred);
43357f3f4c0SJeff Layton }
434d67ae825STom Haynes return cred;
435d67ae825STom Haynes }
436d67ae825STom Haynes
437d67ae825STom Haynes /**
438302fad7bSTrond Myklebust * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client
439561d6f8aSTrond Myklebust * @mirror: pointer to the mirror
440302fad7bSTrond Myklebust * @ds_clp: nfs_client for the DS
441302fad7bSTrond Myklebust * @inode: pointer to inode
442302fad7bSTrond Myklebust *
443d67ae825STom Haynes * Find or create a DS rpc client with th MDS server rpc client auth flavor
444d67ae825STom Haynes * in the nfs_client cl_ds_clients list.
445d67ae825STom Haynes */
446d67ae825STom Haynes struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror * mirror,struct nfs_client * ds_clp,struct inode * inode)447561d6f8aSTrond Myklebust nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
448d67ae825STom Haynes struct nfs_client *ds_clp, struct inode *inode)
449d67ae825STom Haynes {
450d67ae825STom Haynes switch (mirror->mirror_ds->ds_versions[0].version) {
451d67ae825STom Haynes case 3:
452d67ae825STom Haynes /* For NFSv3 DS, flavor is set when creating DS connections */
453d67ae825STom Haynes return ds_clp->cl_rpcclient;
454d67ae825STom Haynes case 4:
455d67ae825STom Haynes return nfs4_find_or_create_ds_client(ds_clp, inode);
456d67ae825STom Haynes default:
457d67ae825STom Haynes BUG();
458d67ae825STom Haynes }
459d67ae825STom Haynes }
460d67ae825STom Haynes
ff_layout_free_ds_ioerr(struct list_head * head)4615b9b3c85STrond Myklebust void ff_layout_free_ds_ioerr(struct list_head *head)
462d67ae825STom Haynes {
4635b9b3c85STrond Myklebust struct nfs4_ff_layout_ds_err *err;
4645b9b3c85STrond Myklebust
4655b9b3c85STrond Myklebust while (!list_empty(head)) {
4665b9b3c85STrond Myklebust err = list_first_entry(head,
4675b9b3c85STrond Myklebust struct nfs4_ff_layout_ds_err,
4685b9b3c85STrond Myklebust list);
4695b9b3c85STrond Myklebust list_del(&err->list);
4705b9b3c85STrond Myklebust kfree(err);
4715b9b3c85STrond Myklebust }
4725b9b3c85STrond Myklebust }
4735b9b3c85STrond Myklebust
4745b9b3c85STrond Myklebust /* called with inode i_lock held */
ff_layout_encode_ds_ioerr(struct xdr_stream * xdr,const struct list_head * head)4755b9b3c85STrond Myklebust int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head)
4765b9b3c85STrond Myklebust {
4775b9b3c85STrond Myklebust struct nfs4_ff_layout_ds_err *err;
478d67ae825STom Haynes __be32 *p;
479d67ae825STom Haynes
4805b9b3c85STrond Myklebust list_for_each_entry(err, head, list) {
481d67ae825STom Haynes /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE)
482d1354907STrond Myklebust * + array length + deviceid(NFS4_DEVICEID4_SIZE)
483d1354907STrond Myklebust * + status(4) + opnum(4)
484d67ae825STom Haynes */
485d67ae825STom Haynes p = xdr_reserve_space(xdr,
486d1354907STrond Myklebust 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE);
487d67ae825STom Haynes if (unlikely(!p))
488d67ae825STom Haynes return -ENOBUFS;
489d67ae825STom Haynes p = xdr_encode_hyper(p, err->offset);
490d67ae825STom Haynes p = xdr_encode_hyper(p, err->length);
491d67ae825STom Haynes p = xdr_encode_opaque_fixed(p, &err->stateid,
492d67ae825STom Haynes NFS4_STATEID_SIZE);
493d1354907STrond Myklebust /* Encode 1 error */
494d1354907STrond Myklebust *p++ = cpu_to_be32(1);
495d67ae825STom Haynes p = xdr_encode_opaque_fixed(p, &err->deviceid,
496d67ae825STom Haynes NFS4_DEVICEID4_SIZE);
497d67ae825STom Haynes *p++ = cpu_to_be32(err->status);
498d67ae825STom Haynes *p++ = cpu_to_be32(err->opnum);
4995b9b3c85STrond Myklebust dprintk("%s: offset %llu length %llu status %d op %d\n",
500d67ae825STom Haynes __func__, err->offset, err->length, err->status,
5015b9b3c85STrond Myklebust err->opnum);
502d67ae825STom Haynes }
503d67ae825STom Haynes
504d67ae825STom Haynes return 0;
505d67ae825STom Haynes }
506d67ae825STom Haynes
5075b9b3c85STrond Myklebust static
do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range,struct list_head * head,unsigned int maxnum)5085b9b3c85STrond Myklebust unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
5095b9b3c85STrond Myklebust const struct pnfs_layout_range *range,
5105b9b3c85STrond Myklebust struct list_head *head,
5115b9b3c85STrond Myklebust unsigned int maxnum)
5125b9b3c85STrond Myklebust {
5135b9b3c85STrond Myklebust struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo);
5145b9b3c85STrond Myklebust struct inode *inode = lo->plh_inode;
5155b9b3c85STrond Myklebust struct nfs4_ff_layout_ds_err *err, *n;
5165b9b3c85STrond Myklebust unsigned int ret = 0;
5175b9b3c85STrond Myklebust
5185b9b3c85STrond Myklebust spin_lock(&inode->i_lock);
5195b9b3c85STrond Myklebust list_for_each_entry_safe(err, n, &flo->error_list, list) {
5205b9b3c85STrond Myklebust if (!pnfs_is_range_intersecting(err->offset,
5215b9b3c85STrond Myklebust pnfs_end_offset(err->offset, err->length),
5225b9b3c85STrond Myklebust range->offset,
5235b9b3c85STrond Myklebust pnfs_end_offset(range->offset, range->length)))
5245b9b3c85STrond Myklebust continue;
5255b9b3c85STrond Myklebust if (!maxnum)
5265b9b3c85STrond Myklebust break;
5275b9b3c85STrond Myklebust list_move(&err->list, head);
5285b9b3c85STrond Myklebust maxnum--;
5295b9b3c85STrond Myklebust ret++;
5305b9b3c85STrond Myklebust }
5315b9b3c85STrond Myklebust spin_unlock(&inode->i_lock);
5325b9b3c85STrond Myklebust return ret;
5335b9b3c85STrond Myklebust }
5345b9b3c85STrond Myklebust
ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range,struct list_head * head,unsigned int maxnum)5355b9b3c85STrond Myklebust unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
5365b9b3c85STrond Myklebust const struct pnfs_layout_range *range,
5375b9b3c85STrond Myklebust struct list_head *head,
5385b9b3c85STrond Myklebust unsigned int maxnum)
5395b9b3c85STrond Myklebust {
5405b9b3c85STrond Myklebust unsigned int ret;
5415b9b3c85STrond Myklebust
5425b9b3c85STrond Myklebust ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum);
5435b9b3c85STrond Myklebust /* If we're over the max, discard all remaining entries */
5445b9b3c85STrond Myklebust if (ret == maxnum) {
5455b9b3c85STrond Myklebust LIST_HEAD(discard);
5465b9b3c85STrond Myklebust do_layout_fetch_ds_ioerr(lo, range, &discard, -1);
5475b9b3c85STrond Myklebust ff_layout_free_ds_ioerr(&discard);
5485b9b3c85STrond Myklebust }
5495b9b3c85STrond Myklebust return ret;
5505b9b3c85STrond Myklebust }
5515b9b3c85STrond Myklebust
ff_read_layout_has_available_ds(struct pnfs_layout_segment * lseg)55281d6dc8bSTrond Myklebust static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)
553d67ae825STom Haynes {
554d67ae825STom Haynes struct nfs4_ff_layout_mirror *mirror;
555d67ae825STom Haynes struct nfs4_deviceid_node *devid;
55681d6dc8bSTrond Myklebust u32 idx;
557d67ae825STom Haynes
558d67ae825STom Haynes for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
559d67ae825STom Haynes mirror = FF_LAYOUT_COMP(lseg, idx);
56065990d1aSFred Isaman if (mirror) {
56165990d1aSFred Isaman if (!mirror->mirror_ds)
56265990d1aSFred Isaman return true;
56365990d1aSFred Isaman if (IS_ERR(mirror->mirror_ds))
56465990d1aSFred Isaman continue;
565d67ae825STom Haynes devid = &mirror->mirror_ds->id_node;
56617aaec81STrond Myklebust if (!nfs4_test_deviceid_unavailable(devid))
567d67ae825STom Haynes return true;
568d67ae825STom Haynes }
569d67ae825STom Haynes }
570d67ae825STom Haynes
571d67ae825STom Haynes return false;
572d67ae825STom Haynes }
573d67ae825STom Haynes
ff_rw_layout_has_available_ds(struct pnfs_layout_segment * lseg)57481d6dc8bSTrond Myklebust static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)
57581d6dc8bSTrond Myklebust {
57681d6dc8bSTrond Myklebust struct nfs4_ff_layout_mirror *mirror;
57781d6dc8bSTrond Myklebust struct nfs4_deviceid_node *devid;
57881d6dc8bSTrond Myklebust u32 idx;
57981d6dc8bSTrond Myklebust
58081d6dc8bSTrond Myklebust for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
58181d6dc8bSTrond Myklebust mirror = FF_LAYOUT_COMP(lseg, idx);
58265990d1aSFred Isaman if (!mirror || IS_ERR(mirror->mirror_ds))
58381d6dc8bSTrond Myklebust return false;
58465990d1aSFred Isaman if (!mirror->mirror_ds)
58565990d1aSFred Isaman continue;
58681d6dc8bSTrond Myklebust devid = &mirror->mirror_ds->id_node;
58717aaec81STrond Myklebust if (nfs4_test_deviceid_unavailable(devid))
58881d6dc8bSTrond Myklebust return false;
58981d6dc8bSTrond Myklebust }
59081d6dc8bSTrond Myklebust
59181d6dc8bSTrond Myklebust return FF_LAYOUT_MIRROR_COUNT(lseg) != 0;
59281d6dc8bSTrond Myklebust }
59381d6dc8bSTrond Myklebust
ff_layout_has_available_ds(struct pnfs_layout_segment * lseg)59465990d1aSFred Isaman static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg)
59581d6dc8bSTrond Myklebust {
59681d6dc8bSTrond Myklebust if (lseg->pls_range.iomode == IOMODE_READ)
59781d6dc8bSTrond Myklebust return ff_read_layout_has_available_ds(lseg);
59881d6dc8bSTrond Myklebust /* Note: RW layout needs all mirrors available */
59981d6dc8bSTrond Myklebust return ff_rw_layout_has_available_ds(lseg);
60081d6dc8bSTrond Myklebust }
60181d6dc8bSTrond Myklebust
ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment * lseg)6023b13b4b3STom Haynes bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg)
6033b13b4b3STom Haynes {
6043b13b4b3STom Haynes return ff_layout_no_fallback_to_mds(lseg) ||
6053b13b4b3STom Haynes ff_layout_has_available_ds(lseg);
6063b13b4b3STom Haynes }
6073b13b4b3STom Haynes
ff_layout_avoid_read_on_rw(struct pnfs_layout_segment * lseg)608fb1084e3STom Haynes bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg)
609fb1084e3STom Haynes {
610fb1084e3STom Haynes return lseg->pls_range.iomode == IOMODE_RW &&
611fb1084e3STom Haynes ff_layout_no_read_on_rw(lseg);
612fb1084e3STom Haynes }
613fb1084e3STom Haynes
614d67ae825STom Haynes module_param(dataserver_retrans, uint, 0644);
615d67ae825STom Haynes MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
616d67ae825STom Haynes "retries a request before it attempts further "
617d67ae825STom Haynes " recovery action.");
618d67ae825STom Haynes module_param(dataserver_timeo, uint, 0644);
619d67ae825STom Haynes MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
620d67ae825STom Haynes "NFSv4.1 client waits for a response from a "
621d67ae825STom Haynes " data server before it retries an NFS request.");
622