xref: /openbmc/linux/net/sunrpc/xprtrdma/frwr_ops.c (revision 7e24a55b2122746c2eef192296fc84624354f895)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2a0ce85f5SChuck Lever /*
3ce5b3717SChuck Lever  * Copyright (c) 2015, 2017 Oracle.  All rights reserved.
4a0ce85f5SChuck Lever  * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
5a0ce85f5SChuck Lever  */
6a0ce85f5SChuck Lever 
7a0ce85f5SChuck Lever /* Lightweight memory registration using Fast Registration Work
8ce5b3717SChuck Lever  * Requests (FRWR).
9a0ce85f5SChuck Lever  *
102fb2a4d5SChuck Lever  * FRWR features ordered asynchronous registration and invalidation
112fb2a4d5SChuck Lever  * of arbitrarily-sized memory regions. This is the fastest and safest
12a0ce85f5SChuck Lever  * but most complex memory registration mode.
13a0ce85f5SChuck Lever  */
14a0ce85f5SChuck Lever 
15c14d86e5SChuck Lever /* Normal operation
16c14d86e5SChuck Lever  *
172fb2a4d5SChuck Lever  * A Memory Region is prepared for RDMA Read or Write using a FAST_REG
185f62412bSChuck Lever  * Work Request (frwr_map). When the RDMA operation is finished, this
19c14d86e5SChuck Lever  * Memory Region is invalidated using a LOCAL_INV Work Request
202fb2a4d5SChuck Lever  * (frwr_unmap_async and frwr_unmap_sync).
21c14d86e5SChuck Lever  *
222fb2a4d5SChuck Lever  * Typically FAST_REG Work Requests are not signaled, and neither are
232fb2a4d5SChuck Lever  * RDMA Send Work Requests (with the exception of signaling occasionally
242fb2a4d5SChuck Lever  * to prevent provider work queue overflows). This greatly reduces HCA
25c14d86e5SChuck Lever  * interrupt workload.
26c14d86e5SChuck Lever  */
27c14d86e5SChuck Lever 
28c14d86e5SChuck Lever /* Transport recovery
29c14d86e5SChuck Lever  *
302fb2a4d5SChuck Lever  * frwr_map and frwr_unmap_* cannot run at the same time the transport
312fb2a4d5SChuck Lever  * connect worker is running. The connect worker holds the transport
322fb2a4d5SChuck Lever  * send lock, just as ->send_request does. This prevents frwr_map and
332fb2a4d5SChuck Lever  * the connect worker from running concurrently. When a connection is
342fb2a4d5SChuck Lever  * closed, the Receive completion queue is drained before the allowing
352fb2a4d5SChuck Lever  * the connect worker to get control. This prevents frwr_unmap and the
362fb2a4d5SChuck Lever  * connect worker from running concurrently.
37c14d86e5SChuck Lever  *
382fb2a4d5SChuck Lever  * When the underlying transport disconnects, MRs that are in flight
399d2da4ffSChuck Lever  * are flushed and are likely unusable. Thus all MRs are destroyed.
409d2da4ffSChuck Lever  * New MRs are created on demand.
41c14d86e5SChuck Lever  */
42c14d86e5SChuck Lever 
43bd2abef3SChuck Lever #include <linux/sunrpc/svc_rdma.h>
44c8b920bbSChuck Lever 
45a0ce85f5SChuck Lever #include "xprt_rdma.h"
46b6e717cbSChuck Lever #include <trace/events/rpcrdma.h>
47a0ce85f5SChuck Lever 
frwr_cid_init(struct rpcrdma_ep * ep,struct rpcrdma_mr * mr)480a26d10eSChuck Lever static void frwr_cid_init(struct rpcrdma_ep *ep,
490a26d10eSChuck Lever 			  struct rpcrdma_mr *mr)
500a26d10eSChuck Lever {
510a26d10eSChuck Lever 	struct rpc_rdma_cid *cid = &mr->mr_cid;
520a26d10eSChuck Lever 
530a26d10eSChuck Lever 	cid->ci_queue_id = ep->re_attr.send_cq->res.id;
5413bcf7e3SChuck Lever 	cid->ci_completion_id = mr->mr_ibmr->res.id;
550a26d10eSChuck Lever }
560a26d10eSChuck Lever 
frwr_mr_unmap(struct rpcrdma_xprt * r_xprt,struct rpcrdma_mr * mr)57ef2be591SChuck Lever static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
5861da886bSChuck Lever {
597a03aeb6SChuck Lever 	if (mr->mr_device) {
60d379eaa8SChuck Lever 		trace_xprtrdma_mr_unmap(mr);
617a03aeb6SChuck Lever 		ib_dma_unmap_sg(mr->mr_device, mr->mr_sg, mr->mr_nents,
627a03aeb6SChuck Lever 				mr->mr_dir);
637a03aeb6SChuck Lever 		mr->mr_device = NULL;
6461da886bSChuck Lever 	}
65ef2be591SChuck Lever }
66ef2be591SChuck Lever 
67e4b52ca0SChuck Lever /**
68e4b52ca0SChuck Lever  * frwr_mr_release - Destroy one MR
69e4b52ca0SChuck Lever  * @mr: MR allocated by frwr_mr_init
70e4b52ca0SChuck Lever  *
71e4b52ca0SChuck Lever  */
frwr_mr_release(struct rpcrdma_mr * mr)72e4b52ca0SChuck Lever void frwr_mr_release(struct rpcrdma_mr *mr)
73ef2be591SChuck Lever {
74e4b52ca0SChuck Lever 	int rc;
75ef2be591SChuck Lever 
76e4b52ca0SChuck Lever 	frwr_mr_unmap(mr->mr_xprt, mr);
77ef2be591SChuck Lever 
7813bcf7e3SChuck Lever 	rc = ib_dereg_mr(mr->mr_ibmr);
79e4b52ca0SChuck Lever 	if (rc)
80e4b52ca0SChuck Lever 		trace_xprtrdma_frwr_dereg(mr, rc);
81e4b52ca0SChuck Lever 	kfree(mr->mr_sg);
82e4b52ca0SChuck Lever 	kfree(mr);
8361da886bSChuck Lever }
8461da886bSChuck Lever 
frwr_mr_put(struct rpcrdma_mr * mr)85ef2be591SChuck Lever static void frwr_mr_put(struct rpcrdma_mr *mr)
86ef2be591SChuck Lever {
87ef2be591SChuck Lever 	frwr_mr_unmap(mr->mr_xprt, mr);
88ef2be591SChuck Lever 
89ef2be591SChuck Lever 	/* The MR is returned to the req's MR free list instead
90ef2be591SChuck Lever 	 * of to the xprt's MR free list. No spinlock is needed.
91ef2be591SChuck Lever 	 */
92ef2be591SChuck Lever 	rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
93ef2be591SChuck Lever }
94ef2be591SChuck Lever 
95*83e025daSChuck Lever /**
96*83e025daSChuck Lever  * frwr_reset - Place MRs back on @req's free list
9740088f0eSChuck Lever  * @req: request to reset
9840088f0eSChuck Lever  *
9940088f0eSChuck Lever  * Used after a failed marshal. For FRWR, this means the MRs
10040088f0eSChuck Lever  * don't have to be fully released and recreated.
10140088f0eSChuck Lever  *
10240088f0eSChuck Lever  * NB: This is safe only as long as none of @req's MRs are
10340088f0eSChuck Lever  * involved with an ongoing asynchronous FAST_REG or LOCAL_INV
10440088f0eSChuck Lever  * Work Request.
10540088f0eSChuck Lever  */
frwr_reset(struct rpcrdma_req * req)10640088f0eSChuck Lever void frwr_reset(struct rpcrdma_req *req)
10740088f0eSChuck Lever {
10840088f0eSChuck Lever 	struct rpcrdma_mr *mr;
10940088f0eSChuck Lever 
110265a38d4SChuck Lever 	while ((mr = rpcrdma_mr_pop(&req->rl_registered)))
111ef2be591SChuck Lever 		frwr_mr_put(mr);
11240088f0eSChuck Lever }
11340088f0eSChuck Lever 
1145f62412bSChuck Lever /**
115253a5162SChuck Lever  * frwr_mr_init - Initialize one MR
116253a5162SChuck Lever  * @r_xprt: controlling transport instance
1175f62412bSChuck Lever  * @mr: generic MR to prepare for FRWR
1185f62412bSChuck Lever  *
1195f62412bSChuck Lever  * Returns zero if successful. Otherwise a negative errno
1205f62412bSChuck Lever  * is returned.
1215f62412bSChuck Lever  */
frwr_mr_init(struct rpcrdma_xprt * r_xprt,struct rpcrdma_mr * mr)122253a5162SChuck Lever int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
123d48b1d29SChuck Lever {
124e28ce900SChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
12593aa8e0aSChuck Lever 	unsigned int depth = ep->re_max_fr_depth;
126f85adb1bSChuck Lever 	struct scatterlist *sg;
127f85adb1bSChuck Lever 	struct ib_mr *frmr;
128d48b1d29SChuck Lever 
1292d77058cSChuck Lever 	sg = kcalloc_node(depth, sizeof(*sg), XPRTRDMA_GFP_FLAGS,
1302d77058cSChuck Lever 			  ibdev_to_node(ep->re_id->device));
1312d77058cSChuck Lever 	if (!sg)
1322d77058cSChuck Lever 		return -ENOMEM;
1332d77058cSChuck Lever 
13493aa8e0aSChuck Lever 	frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);
135f85adb1bSChuck Lever 	if (IS_ERR(frmr))
136d48b1d29SChuck Lever 		goto out_mr_err;
137d48b1d29SChuck Lever 
138253a5162SChuck Lever 	mr->mr_xprt = r_xprt;
13913bcf7e3SChuck Lever 	mr->mr_ibmr = frmr;
1407a03aeb6SChuck Lever 	mr->mr_device = NULL;
141054f1557SChuck Lever 	INIT_LIST_HEAD(&mr->mr_list);
1429a301cafSChuck Lever 	init_completion(&mr->mr_linv_done);
1430a26d10eSChuck Lever 	frwr_cid_init(ep, mr);
144f85adb1bSChuck Lever 
145f85adb1bSChuck Lever 	sg_init_table(sg, depth);
146f85adb1bSChuck Lever 	mr->mr_sg = sg;
147d48b1d29SChuck Lever 	return 0;
148d48b1d29SChuck Lever 
149d48b1d29SChuck Lever out_mr_err:
1502d77058cSChuck Lever 	kfree(sg);
151e4266f23SChuck Lever 	trace_xprtrdma_frwr_alloc(mr, PTR_ERR(frmr));
1522d77058cSChuck Lever 	return PTR_ERR(frmr);
153d48b1d29SChuck Lever }
154d48b1d29SChuck Lever 
1555f62412bSChuck Lever /**
15625868e61SChuck Lever  * frwr_query_device - Prepare a transport for use with FRWR
15793aa8e0aSChuck Lever  * @ep: endpoint to fill in
15825868e61SChuck Lever  * @device: RDMA device to query
1595f62412bSChuck Lever  *
1605f62412bSChuck Lever  * On success, sets:
16193aa8e0aSChuck Lever  *	ep->re_attr
16293aa8e0aSChuck Lever  *	ep->re_max_requests
16393aa8e0aSChuck Lever  *	ep->re_max_rdma_segs
16493aa8e0aSChuck Lever  *	ep->re_max_fr_depth
16593aa8e0aSChuck Lever  *	ep->re_mrtype
1665f62412bSChuck Lever  *
16725868e61SChuck Lever  * Return values:
16825868e61SChuck Lever  *   On success, returns zero.
16925868e61SChuck Lever  *   %-EINVAL - the device does not support FRWR memory registration
17025868e61SChuck Lever  *   %-ENOMEM - the device is not sufficiently capable for NFS/RDMA
171914fcad9SChuck Lever  */
frwr_query_device(struct rpcrdma_ep * ep,const struct ib_device * device)17293aa8e0aSChuck Lever int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
1733968cb58SChuck Lever {
17425868e61SChuck Lever 	const struct ib_device_attr *attrs = &device->attrs;
175914fcad9SChuck Lever 	int max_qp_wr, depth, delta;
1762e870368SChuck Lever 	unsigned int max_sge;
1772e870368SChuck Lever 
17825868e61SChuck Lever 	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
17925868e61SChuck Lever 	    attrs->max_fast_reg_page_list_len == 0) {
18025868e61SChuck Lever 		pr_err("rpcrdma: 'frwr' mode is not supported by device %s\n",
18125868e61SChuck Lever 		       device->name);
18225868e61SChuck Lever 		return -EINVAL;
18325868e61SChuck Lever 	}
18425868e61SChuck Lever 
1852e870368SChuck Lever 	max_sge = min_t(unsigned int, attrs->max_send_sge,
1862e870368SChuck Lever 			RPCRDMA_MAX_SEND_SGES);
1872e870368SChuck Lever 	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
1882e870368SChuck Lever 		pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge);
1892e870368SChuck Lever 		return -ENOMEM;
1902e870368SChuck Lever 	}
19193aa8e0aSChuck Lever 	ep->re_attr.cap.max_send_sge = max_sge;
19293aa8e0aSChuck Lever 	ep->re_attr.cap.max_recv_sge = 1;
1933968cb58SChuck Lever 
19493aa8e0aSChuck Lever 	ep->re_mrtype = IB_MR_TYPE_MEM_REG;
195e945c653SJason Gunthorpe 	if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG)
19693aa8e0aSChuck Lever 		ep->re_mrtype = IB_MR_TYPE_SG_GAPS;
1975e9fc6a0SChuck Lever 
198a7886849SChuck Lever 	/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
199a7886849SChuck Lever 	 * capability, but perform optimally when the MRs are not larger
200a7886849SChuck Lever 	 * than a page.
201a7886849SChuck Lever 	 */
20218d065a5SChuck Lever 	if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)
20393aa8e0aSChuck Lever 		ep->re_max_fr_depth = attrs->max_sge_rd;
204a7886849SChuck Lever 	else
20593aa8e0aSChuck Lever 		ep->re_max_fr_depth = attrs->max_fast_reg_page_list_len;
20693aa8e0aSChuck Lever 	if (ep->re_max_fr_depth > RPCRDMA_MAX_DATA_SEGS)
20793aa8e0aSChuck Lever 		ep->re_max_fr_depth = RPCRDMA_MAX_DATA_SEGS;
2083968cb58SChuck Lever 
209ce5b3717SChuck Lever 	/* Add room for frwr register and invalidate WRs.
210ce5b3717SChuck Lever 	 * 1. FRWR reg WR for head
211ce5b3717SChuck Lever 	 * 2. FRWR invalidate WR for head
212ce5b3717SChuck Lever 	 * 3. N FRWR reg WRs for pagelist
213ce5b3717SChuck Lever 	 * 4. N FRWR invalidate WRs for pagelist
214ce5b3717SChuck Lever 	 * 5. FRWR reg WR for tail
215ce5b3717SChuck Lever 	 * 6. FRWR invalidate WR for tail
2163968cb58SChuck Lever 	 * 7. The RDMA_SEND WR
2173968cb58SChuck Lever 	 */
2183968cb58SChuck Lever 	depth = 7;
2193968cb58SChuck Lever 
220ce5b3717SChuck Lever 	/* Calculate N if the device max FRWR depth is smaller than
2213968cb58SChuck Lever 	 * RPCRDMA_MAX_DATA_SEGS.
2223968cb58SChuck Lever 	 */
22393aa8e0aSChuck Lever 	if (ep->re_max_fr_depth < RPCRDMA_MAX_DATA_SEGS) {
22493aa8e0aSChuck Lever 		delta = RPCRDMA_MAX_DATA_SEGS - ep->re_max_fr_depth;
2253968cb58SChuck Lever 		do {
226ce5b3717SChuck Lever 			depth += 2; /* FRWR reg + invalidate */
22793aa8e0aSChuck Lever 			delta -= ep->re_max_fr_depth;
2283968cb58SChuck Lever 		} while (delta > 0);
2293968cb58SChuck Lever 	}
2303968cb58SChuck Lever 
23125868e61SChuck Lever 	max_qp_wr = attrs->max_qp_wr;
232914fcad9SChuck Lever 	max_qp_wr -= RPCRDMA_BACKWARD_WRS;
233914fcad9SChuck Lever 	max_qp_wr -= 1;
234914fcad9SChuck Lever 	if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
235914fcad9SChuck Lever 		return -ENOMEM;
23693aa8e0aSChuck Lever 	if (ep->re_max_requests > max_qp_wr)
23793aa8e0aSChuck Lever 		ep->re_max_requests = max_qp_wr;
23893aa8e0aSChuck Lever 	ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
23993aa8e0aSChuck Lever 	if (ep->re_attr.cap.max_send_wr > max_qp_wr) {
24093aa8e0aSChuck Lever 		ep->re_max_requests = max_qp_wr / depth;
24193aa8e0aSChuck Lever 		if (!ep->re_max_requests)
24225868e61SChuck Lever 			return -ENOMEM;
24393aa8e0aSChuck Lever 		ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
2443968cb58SChuck Lever 	}
24593aa8e0aSChuck Lever 	ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
24693aa8e0aSChuck Lever 	ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
24793aa8e0aSChuck Lever 	ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
24893aa8e0aSChuck Lever 	ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
24932e6b681SChuck Lever 	ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
25093aa8e0aSChuck Lever 	ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
2513968cb58SChuck Lever 
25293aa8e0aSChuck Lever 	ep->re_max_rdma_segs =
25393aa8e0aSChuck Lever 		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ep->re_max_fr_depth);
2546946f823SChuck Lever 	/* Reply chunks require segments for head and tail buffers */
25593aa8e0aSChuck Lever 	ep->re_max_rdma_segs += 2;
25693aa8e0aSChuck Lever 	if (ep->re_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
25793aa8e0aSChuck Lever 		ep->re_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;
2583968cb58SChuck Lever 
25918d065a5SChuck Lever 	/* Ensure the underlying device is capable of conveying the
26018d065a5SChuck Lever 	 * largest r/wsize NFS will ask for. This guarantees that
26118d065a5SChuck Lever 	 * failing over from one RDMA device to another will not
26218d065a5SChuck Lever 	 * break NFS I/O.
2631c9351eeSChuck Lever 	 */
26493aa8e0aSChuck Lever 	if ((ep->re_max_rdma_segs * ep->re_max_fr_depth) < RPCRDMA_MAX_SEGS)
26518d065a5SChuck Lever 		return -ENOMEM;
2661c9351eeSChuck Lever 
26718d065a5SChuck Lever 	return 0;
2681c9351eeSChuck Lever }
2691c9351eeSChuck Lever 
2702fa8f88dSChuck Lever /**
2715f62412bSChuck Lever  * frwr_map - Register a memory region
2725f62412bSChuck Lever  * @r_xprt: controlling transport
2735f62412bSChuck Lever  * @seg: memory region co-ordinates
2745f62412bSChuck Lever  * @nsegs: number of segments remaining
2755f62412bSChuck Lever  * @writing: true when RDMA Write will be used
2760a93fbcbSChuck Lever  * @xid: XID of RPC using the registered memory
2773b39f52aSChuck Lever  * @mr: MR to fill in
2785f62412bSChuck Lever  *
2795f62412bSChuck Lever  * Prepare a REG_MR Work Request to register a memory region
2809c1b4d77SChuck Lever  * for remote access via RDMA READ or RDMA WRITE.
2815f62412bSChuck Lever  *
2825f62412bSChuck Lever  * Returns the next segment or a negative errno pointer.
2833b39f52aSChuck Lever  * On success, @mr is filled in.
2849c1b4d77SChuck Lever  */
frwr_map(struct rpcrdma_xprt * r_xprt,struct rpcrdma_mr_seg * seg,int nsegs,bool writing,__be32 xid,struct rpcrdma_mr * mr)2855f62412bSChuck Lever struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
2865f62412bSChuck Lever 				struct rpcrdma_mr_seg *seg,
287ec482cc1SChuck Lever 				int nsegs, bool writing, __be32 xid,
2883b39f52aSChuck Lever 				struct rpcrdma_mr *mr)
2899c1b4d77SChuck Lever {
290e28ce900SChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
2913cf4e169SChuck Lever 	struct ib_reg_wr *reg_wr;
292ca1c6713SChuck Lever 	int i, n, dma_nents;
2933b39f52aSChuck Lever 	struct ib_mr *ibmr;
2949c1b4d77SChuck Lever 	u8 key;
2959c1b4d77SChuck Lever 
29693aa8e0aSChuck Lever 	if (nsegs > ep->re_max_fr_depth)
29793aa8e0aSChuck Lever 		nsegs = ep->re_max_fr_depth;
2984143f34eSSagi Grimberg 	for (i = 0; i < nsegs;) {
29967b16625SChuck Lever 		sg_set_page(&mr->mr_sg[i], seg->mr_page,
30067b16625SChuck Lever 			    seg->mr_len, seg->mr_offset);
3014143f34eSSagi Grimberg 
3029c1b4d77SChuck Lever 		++seg;
3039c1b4d77SChuck Lever 		++i;
30493aa8e0aSChuck Lever 		if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
3055e9fc6a0SChuck Lever 			continue;
30667b16625SChuck Lever 		if ((i < nsegs && seg->mr_offset) ||
3079c1b4d77SChuck Lever 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
3089c1b4d77SChuck Lever 			break;
3099c1b4d77SChuck Lever 	}
31096ceddeaSChuck Lever 	mr->mr_dir = rpcrdma_data_dir(writing);
311ca1c6713SChuck Lever 	mr->mr_nents = i;
3129c1b4d77SChuck Lever 
31393aa8e0aSChuck Lever 	dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,
314ca1c6713SChuck Lever 				  mr->mr_dir);
315ca1c6713SChuck Lever 	if (!dma_nents)
316564471d2SChuck Lever 		goto out_dmamap_err;
3177a03aeb6SChuck Lever 	mr->mr_device = ep->re_id->device;
3184143f34eSSagi Grimberg 
31913bcf7e3SChuck Lever 	ibmr = mr->mr_ibmr;
320ca1c6713SChuck Lever 	n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
321ca1c6713SChuck Lever 	if (n != dma_nents)
322564471d2SChuck Lever 		goto out_mapmr_err;
3234143f34eSSagi Grimberg 
3240a93fbcbSChuck Lever 	ibmr->iova &= 0x00000000ffffffff;
325ec482cc1SChuck Lever 	ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
32696ceddeaSChuck Lever 	key = (u8)(ibmr->rkey & 0x000000FF);
32796ceddeaSChuck Lever 	ib_update_fast_reg_key(ibmr, ++key);
3284143f34eSSagi Grimberg 
329dcff9ed2SChuck Lever 	reg_wr = &mr->mr_regwr;
33096ceddeaSChuck Lever 	reg_wr->mr = ibmr;
33196ceddeaSChuck Lever 	reg_wr->key = ibmr->rkey;
3323cf4e169SChuck Lever 	reg_wr->access = writing ?
3334143f34eSSagi Grimberg 			 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
3344143f34eSSagi Grimberg 			 IB_ACCESS_REMOTE_READ;
3359c1b4d77SChuck Lever 
33696ceddeaSChuck Lever 	mr->mr_handle = ibmr->rkey;
33796ceddeaSChuck Lever 	mr->mr_length = ibmr->length;
33896ceddeaSChuck Lever 	mr->mr_offset = ibmr->iova;
339ba217ec6SChuck Lever 	trace_xprtrdma_mr_map(mr);
3404143f34eSSagi Grimberg 
3416748b0caSChuck Lever 	return seg;
342564471d2SChuck Lever 
343564471d2SChuck Lever out_dmamap_err:
34453b2c1cbSChuck Lever 	trace_xprtrdma_frwr_sgerr(mr, i);
3456748b0caSChuck Lever 	return ERR_PTR(-EIO);
346564471d2SChuck Lever 
347564471d2SChuck Lever out_mapmr_err:
34853b2c1cbSChuck Lever 	trace_xprtrdma_frwr_maperr(mr, n);
3496748b0caSChuck Lever 	return ERR_PTR(-EIO);
350f2877623SChuck Lever }
3519c1b4d77SChuck Lever 
3525f62412bSChuck Lever /**
35384756894SChuck Lever  * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
354d6ccebf9SChuck Lever  * @cq: completion queue
355d6ccebf9SChuck Lever  * @wc: WCE for a completed FastReg WR
35684756894SChuck Lever  *
357e4b52ca0SChuck Lever  * Each flushed MR gets destroyed after the QP has drained.
35884756894SChuck Lever  */
frwr_wc_fastreg(struct ib_cq * cq,struct ib_wc * wc)35984756894SChuck Lever static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
36084756894SChuck Lever {
36184756894SChuck Lever 	struct ib_cqe *cqe = wc->wr_cqe;
362e10fa96dSChuck Lever 	struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
36384756894SChuck Lever 
36484756894SChuck Lever 	/* WARNING: Only wr_cqe and status are reliable at this point */
3650a26d10eSChuck Lever 	trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
366d6ccebf9SChuck Lever 
367f423f755SChuck Lever 	rpcrdma_flush_disconnect(cq->cq_context, wc);
36884756894SChuck Lever }
36984756894SChuck Lever 
37084756894SChuck Lever /**
37197d0de88SChuck Lever  * frwr_send - post Send WRs containing the RPC Call message
37297d0de88SChuck Lever  * @r_xprt: controlling transport instance
37397d0de88SChuck Lever  * @req: prepared RPC Call
374f2877623SChuck Lever  *
375e0f86bc4SChuck Lever  * For FRWR, chain any FastReg WRs to the Send WR. Only a
376f2877623SChuck Lever  * single ib_post_send call is needed to register memory
377f2877623SChuck Lever  * and then post the Send WR.
3785f62412bSChuck Lever  *
37997d0de88SChuck Lever  * Returns the return code from ib_post_send.
38097d0de88SChuck Lever  *
38197d0de88SChuck Lever  * Caller must hold the transport send lock to ensure that the
38297d0de88SChuck Lever  * pointers to the transport's rdma_cm_id and QP are stable.
383f2877623SChuck Lever  */
frwr_send(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req)38497d0de88SChuck Lever int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
385f2877623SChuck Lever {
386b3ce7a25SChuck Lever 	struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
3875ecef9c8SChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
388f2877623SChuck Lever 	struct rpcrdma_mr *mr;
389b3ce7a25SChuck Lever 	unsigned int num_wrs;
390d9ae8134SChuck Lever 	int ret;
391f2877623SChuck Lever 
392b3ce7a25SChuck Lever 	num_wrs = 1;
393b3ce7a25SChuck Lever 	post_wr = send_wr;
394f2877623SChuck Lever 	list_for_each_entry(mr, &req->rl_registered, mr_list) {
3954ddd0fc3SChuck Lever 		trace_xprtrdma_mr_fastreg(mr);
396f2877623SChuck Lever 
397e10fa96dSChuck Lever 		mr->mr_cqe.done = frwr_wc_fastreg;
398dcff9ed2SChuck Lever 		mr->mr_regwr.wr.next = post_wr;
399dcff9ed2SChuck Lever 		mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
400dcff9ed2SChuck Lever 		mr->mr_regwr.wr.num_sge = 0;
401dcff9ed2SChuck Lever 		mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
402dcff9ed2SChuck Lever 		mr->mr_regwr.wr.send_flags = 0;
403dcff9ed2SChuck Lever 		post_wr = &mr->mr_regwr.wr;
404b3ce7a25SChuck Lever 		++num_wrs;
405f2877623SChuck Lever 	}
406f2877623SChuck Lever 
407b3ce7a25SChuck Lever 	if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
408b3ce7a25SChuck Lever 		send_wr->send_flags |= IB_SEND_SIGNALED;
409b3ce7a25SChuck Lever 		ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
410b3ce7a25SChuck Lever 					  num_wrs - ep->re_send_count);
411b3ce7a25SChuck Lever 	} else {
412b3ce7a25SChuck Lever 		send_wr->send_flags &= ~IB_SEND_SIGNALED;
413b3ce7a25SChuck Lever 		ep->re_send_count -= num_wrs;
414b3ce7a25SChuck Lever 	}
415b3ce7a25SChuck Lever 
416b3ce7a25SChuck Lever 	trace_xprtrdma_post_send(req);
417d9ae8134SChuck Lever 	ret = ib_post_send(ep->re_id->qp, post_wr, NULL);
418d9ae8134SChuck Lever 	if (ret)
419d9ae8134SChuck Lever 		trace_xprtrdma_post_send_err(r_xprt, req, ret);
420d9ae8134SChuck Lever 	return ret;
4219c1b4d77SChuck Lever }
4229c1b4d77SChuck Lever 
4235f62412bSChuck Lever /**
4245f62412bSChuck Lever  * frwr_reminv - handle a remotely invalidated mr on the @mrs list
4255f62412bSChuck Lever  * @rep: Received reply
4265f62412bSChuck Lever  * @mrs: list of MRs to check
4275f62412bSChuck Lever  *
428c3441618SChuck Lever  */
frwr_reminv(struct rpcrdma_rep * rep,struct list_head * mrs)4295f62412bSChuck Lever void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
430c3441618SChuck Lever {
43196ceddeaSChuck Lever 	struct rpcrdma_mr *mr;
432c3441618SChuck Lever 
43396ceddeaSChuck Lever 	list_for_each_entry(mr, mrs, mr_list)
43496ceddeaSChuck Lever 		if (mr->mr_handle == rep->rr_inv_rkey) {
435054f1557SChuck Lever 			list_del_init(&mr->mr_list);
4364ddd0fc3SChuck Lever 			trace_xprtrdma_mr_reminv(mr);
437ef2be591SChuck Lever 			frwr_mr_put(mr);
438c3441618SChuck Lever 			break;	/* only one invalidated MR per RPC */
439c3441618SChuck Lever 		}
440c3441618SChuck Lever }
441c3441618SChuck Lever 
frwr_mr_done(struct ib_wc * wc,struct rpcrdma_mr * mr)442ef2be591SChuck Lever static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
44384756894SChuck Lever {
444e4b52ca0SChuck Lever 	if (likely(wc->status == IB_WC_SUCCESS))
445ef2be591SChuck Lever 		frwr_mr_put(mr);
44684756894SChuck Lever }
44784756894SChuck Lever 
44884756894SChuck Lever /**
44984756894SChuck Lever  * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC
450d6ccebf9SChuck Lever  * @cq: completion queue
451d6ccebf9SChuck Lever  * @wc: WCE for a completed LocalInv WR
45284756894SChuck Lever  *
45384756894SChuck Lever  */
frwr_wc_localinv(struct ib_cq * cq,struct ib_wc * wc)45484756894SChuck Lever static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
45584756894SChuck Lever {
45684756894SChuck Lever 	struct ib_cqe *cqe = wc->wr_cqe;
457e10fa96dSChuck Lever 	struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
45884756894SChuck Lever 
45984756894SChuck Lever 	/* WARNING: Only wr_cqe and status are reliable at this point */
4600a26d10eSChuck Lever 	trace_xprtrdma_wc_li(wc, &mr->mr_cid);
461ef2be591SChuck Lever 	frwr_mr_done(wc, mr);
462d6ccebf9SChuck Lever 
463f423f755SChuck Lever 	rpcrdma_flush_disconnect(cq->cq_context, wc);
46484756894SChuck Lever }
46584756894SChuck Lever 
46684756894SChuck Lever /**
46784756894SChuck Lever  * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC
468d6ccebf9SChuck Lever  * @cq: completion queue
469d6ccebf9SChuck Lever  * @wc: WCE for a completed LocalInv WR
47084756894SChuck Lever  *
47184756894SChuck Lever  * Awaken anyone waiting for an MR to finish being fenced.
47284756894SChuck Lever  */
frwr_wc_localinv_wake(struct ib_cq * cq,struct ib_wc * wc)47384756894SChuck Lever static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
47484756894SChuck Lever {
47584756894SChuck Lever 	struct ib_cqe *cqe = wc->wr_cqe;
476e10fa96dSChuck Lever 	struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
47784756894SChuck Lever 
47884756894SChuck Lever 	/* WARNING: Only wr_cqe and status are reliable at this point */
4790a26d10eSChuck Lever 	trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
480ef2be591SChuck Lever 	frwr_mr_done(wc, mr);
4819a301cafSChuck Lever 	complete(&mr->mr_linv_done);
482d6ccebf9SChuck Lever 
483f423f755SChuck Lever 	rpcrdma_flush_disconnect(cq->cq_context, wc);
48484756894SChuck Lever }
48584756894SChuck Lever 
4865f62412bSChuck Lever /**
4875f62412bSChuck Lever  * frwr_unmap_sync - invalidate memory regions that were registered for @req
48884756894SChuck Lever  * @r_xprt: controlling transport instance
48984756894SChuck Lever  * @req: rpcrdma_req with a non-empty list of MRs to process
490c9918ff5SChuck Lever  *
49184756894SChuck Lever  * Sleeps until it is safe for the host CPU to access the previously mapped
492d8099fedSChuck Lever  * memory regions. This guarantees that registered MRs are properly fenced
493d8099fedSChuck Lever  * from the server before the RPC consumer accesses the data in them. It
494d8099fedSChuck Lever  * also ensures proper Send flow control: waking the next RPC waits until
495d8099fedSChuck Lever  * this RPC has relinquished all its Send Queue entries.
496c9918ff5SChuck Lever  */
frwr_unmap_sync(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req)49784756894SChuck Lever void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
498c9918ff5SChuck Lever {
499d34ac5cdSBart Van Assche 	struct ib_send_wr *first, **prev, *last;
5005ecef9c8SChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
501d34ac5cdSBart Van Assche 	const struct ib_send_wr *bad_wr;
50296ceddeaSChuck Lever 	struct rpcrdma_mr *mr;
50384756894SChuck Lever 	int rc;
504c9918ff5SChuck Lever 
505451d26e1SChuck Lever 	/* ORDER: Invalidate all of the MRs first
506c9918ff5SChuck Lever 	 *
507c9918ff5SChuck Lever 	 * Chain the LOCAL_INV Work Requests and post them with
508c9918ff5SChuck Lever 	 * a single ib_post_send() call.
509c9918ff5SChuck Lever 	 */
510a100fda1SChuck Lever 	prev = &first;
511cb5a967fSBenjamin Coddington 	mr = rpcrdma_mr_pop(&req->rl_registered);
512cb5a967fSBenjamin Coddington 	do {
51384756894SChuck Lever 		trace_xprtrdma_mr_localinv(mr);
51484756894SChuck Lever 		r_xprt->rx_stats.local_inv_needed++;
515c8b920bbSChuck Lever 
516dcff9ed2SChuck Lever 		last = &mr->mr_invwr;
51784756894SChuck Lever 		last->next = NULL;
518e10fa96dSChuck Lever 		last->wr_cqe = &mr->mr_cqe;
51984756894SChuck Lever 		last->sg_list = NULL;
52084756894SChuck Lever 		last->num_sge = 0;
521a100fda1SChuck Lever 		last->opcode = IB_WR_LOCAL_INV;
52284756894SChuck Lever 		last->send_flags = IB_SEND_SIGNALED;
52396ceddeaSChuck Lever 		last->ex.invalidate_rkey = mr->mr_handle;
524c9918ff5SChuck Lever 
525e10fa96dSChuck Lever 		last->wr_cqe->done = frwr_wc_localinv;
526e10fa96dSChuck Lever 
527a100fda1SChuck Lever 		*prev = last;
528a100fda1SChuck Lever 		prev = &last->next;
529cb5a967fSBenjamin Coddington 	} while ((mr = rpcrdma_mr_pop(&req->rl_registered)));
530cb5a967fSBenjamin Coddington 
5319e895cd9SChuck Lever 	mr = container_of(last, struct rpcrdma_mr, mr_invwr);
532c9918ff5SChuck Lever 
533c9918ff5SChuck Lever 	/* Strong send queue ordering guarantees that when the
534c9918ff5SChuck Lever 	 * last WR in the chain completes, all WRs in the chain
535c9918ff5SChuck Lever 	 * are complete.
536c9918ff5SChuck Lever 	 */
537e10fa96dSChuck Lever 	last->wr_cqe->done = frwr_wc_localinv_wake;
5389a301cafSChuck Lever 	reinit_completion(&mr->mr_linv_done);
5398d38de65SChuck Lever 
540c9918ff5SChuck Lever 	/* Transport disconnect drains the receive CQ before it
541c9918ff5SChuck Lever 	 * replaces the QP. The RPC reply handler won't call us
54293aa8e0aSChuck Lever 	 * unless re_id->qp is a valid pointer.
543c9918ff5SChuck Lever 	 */
5448d75483aSChuck Lever 	bad_wr = NULL;
5455ecef9c8SChuck Lever 	rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
54684756894SChuck Lever 
54784756894SChuck Lever 	/* The final LOCAL_INV WR in the chain is supposed to
54884756894SChuck Lever 	 * do the wake. If it was never posted, the wake will
54984756894SChuck Lever 	 * not happen, so don't wait in that case.
55084756894SChuck Lever 	 */
5518d75483aSChuck Lever 	if (bad_wr != first)
5529a301cafSChuck Lever 		wait_for_completion(&mr->mr_linv_done);
55384756894SChuck Lever 	if (!rc)
554d7a21c1bSChuck Lever 		return;
555d7a21c1bSChuck Lever 
556e4b52ca0SChuck Lever 	/* On error, the MRs get destroyed once the QP has drained. */
55736a55edfSChuck Lever 	trace_xprtrdma_post_linv_err(req, rc);
5581143129eSChuck Lever 
5591143129eSChuck Lever 	/* Force a connection loss to ensure complete recovery.
5601143129eSChuck Lever 	 */
5611143129eSChuck Lever 	rpcrdma_force_disconnect(ep);
562c9918ff5SChuck Lever }
563d8099fedSChuck Lever 
564d8099fedSChuck Lever /**
565d8099fedSChuck Lever  * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC
566d6ccebf9SChuck Lever  * @cq:	completion queue
567d6ccebf9SChuck Lever  * @wc:	WCE for a completed LocalInv WR
568d8099fedSChuck Lever  *
569d8099fedSChuck Lever  */
frwr_wc_localinv_done(struct ib_cq * cq,struct ib_wc * wc)570d8099fedSChuck Lever static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
571d8099fedSChuck Lever {
572d8099fedSChuck Lever 	struct ib_cqe *cqe = wc->wr_cqe;
573e10fa96dSChuck Lever 	struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
57444438ad9SChuck Lever 	struct rpcrdma_rep *rep;
575d8099fedSChuck Lever 
576d8099fedSChuck Lever 	/* WARNING: Only wr_cqe and status are reliable at this point */
5770a26d10eSChuck Lever 	trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
5786dc6ec9eSChuck Lever 
57944438ad9SChuck Lever 	/* Ensure that @rep is generated before the MR is released */
58044438ad9SChuck Lever 	rep = mr->mr_req->rl_reply;
5816dc6ec9eSChuck Lever 	smp_rmb();
58244438ad9SChuck Lever 
5838a053433SChuck Lever 	if (wc->status != IB_WC_SUCCESS) {
5848a053433SChuck Lever 		if (rep)
5858a053433SChuck Lever 			rpcrdma_unpin_rqst(rep);
586f423f755SChuck Lever 		rpcrdma_flush_disconnect(cq->cq_context, wc);
5878a053433SChuck Lever 		return;
5888a053433SChuck Lever 	}
5898a053433SChuck Lever 	frwr_mr_put(mr);
5908a053433SChuck Lever 	rpcrdma_complete_rqst(rep);
591d8099fedSChuck Lever }
592d8099fedSChuck Lever 
593d8099fedSChuck Lever /**
594d8099fedSChuck Lever  * frwr_unmap_async - invalidate memory regions that were registered for @req
595d8099fedSChuck Lever  * @r_xprt: controlling transport instance
596d8099fedSChuck Lever  * @req: rpcrdma_req with a non-empty list of MRs to process
597d8099fedSChuck Lever  *
598d8099fedSChuck Lever  * This guarantees that registered MRs are properly fenced from the
599d8099fedSChuck Lever  * server before the RPC consumer accesses the data in them. It also
600d8099fedSChuck Lever  * ensures proper Send flow control: waking the next RPC waits until
601d8099fedSChuck Lever  * this RPC has relinquished all its Send Queue entries.
602d8099fedSChuck Lever  */
frwr_unmap_async(struct rpcrdma_xprt * r_xprt,struct rpcrdma_req * req)603d8099fedSChuck Lever void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
604d8099fedSChuck Lever {
605d8099fedSChuck Lever 	struct ib_send_wr *first, *last, **prev;
6065ecef9c8SChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
607d8099fedSChuck Lever 	struct rpcrdma_mr *mr;
608d8099fedSChuck Lever 	int rc;
609d8099fedSChuck Lever 
610d8099fedSChuck Lever 	/* Chain the LOCAL_INV Work Requests and post them with
611d8099fedSChuck Lever 	 * a single ib_post_send() call.
612d8099fedSChuck Lever 	 */
613d8099fedSChuck Lever 	prev = &first;
614cb5a967fSBenjamin Coddington 	mr = rpcrdma_mr_pop(&req->rl_registered);
615cb5a967fSBenjamin Coddington 	do {
616d8099fedSChuck Lever 		trace_xprtrdma_mr_localinv(mr);
617d8099fedSChuck Lever 		r_xprt->rx_stats.local_inv_needed++;
618d8099fedSChuck Lever 
619dcff9ed2SChuck Lever 		last = &mr->mr_invwr;
620d8099fedSChuck Lever 		last->next = NULL;
621e10fa96dSChuck Lever 		last->wr_cqe = &mr->mr_cqe;
622d8099fedSChuck Lever 		last->sg_list = NULL;
623d8099fedSChuck Lever 		last->num_sge = 0;
624d8099fedSChuck Lever 		last->opcode = IB_WR_LOCAL_INV;
625d8099fedSChuck Lever 		last->send_flags = IB_SEND_SIGNALED;
626d8099fedSChuck Lever 		last->ex.invalidate_rkey = mr->mr_handle;
627d8099fedSChuck Lever 
628e10fa96dSChuck Lever 		last->wr_cqe->done = frwr_wc_localinv;
629e10fa96dSChuck Lever 
630d8099fedSChuck Lever 		*prev = last;
631d8099fedSChuck Lever 		prev = &last->next;
632cb5a967fSBenjamin Coddington 	} while ((mr = rpcrdma_mr_pop(&req->rl_registered)));
633d8099fedSChuck Lever 
634d8099fedSChuck Lever 	/* Strong send queue ordering guarantees that when the
635d8099fedSChuck Lever 	 * last WR in the chain completes, all WRs in the chain
636d8099fedSChuck Lever 	 * are complete. The last completion will wake up the
637d8099fedSChuck Lever 	 * RPC waiter.
638d8099fedSChuck Lever 	 */
639e10fa96dSChuck Lever 	last->wr_cqe->done = frwr_wc_localinv_done;
640d8099fedSChuck Lever 
641d8099fedSChuck Lever 	/* Transport disconnect drains the receive CQ before it
642d8099fedSChuck Lever 	 * replaces the QP. The RPC reply handler won't call us
64393aa8e0aSChuck Lever 	 * unless re_id->qp is a valid pointer.
644d8099fedSChuck Lever 	 */
645e4b52ca0SChuck Lever 	rc = ib_post_send(ep->re_id->qp, first, NULL);
646d8099fedSChuck Lever 	if (!rc)
647d8099fedSChuck Lever 		return;
648d8099fedSChuck Lever 
649e4b52ca0SChuck Lever 	/* On error, the MRs get destroyed once the QP has drained. */
65036a55edfSChuck Lever 	trace_xprtrdma_post_linv_err(req, rc);
651d8099fedSChuck Lever 
652d8099fedSChuck Lever 	/* The final LOCAL_INV WR in the chain is supposed to
6538a053433SChuck Lever 	 * do the wake. If it was never posted, the wake does
6548a053433SChuck Lever 	 * not happen. Unpin the rqst in preparation for its
6558a053433SChuck Lever 	 * retransmission.
656d8099fedSChuck Lever 	 */
6578a053433SChuck Lever 	rpcrdma_unpin_rqst(req->rl_reply);
6581143129eSChuck Lever 
6591143129eSChuck Lever 	/* Force a connection loss to ensure complete recovery.
6601143129eSChuck Lever 	 */
6611143129eSChuck Lever 	rpcrdma_force_disconnect(ep);
662d8099fedSChuck Lever }
66321037b8cSChuck Lever 
66421037b8cSChuck Lever /**
66521037b8cSChuck Lever  * frwr_wp_create - Create an MR for padding Write chunks
66621037b8cSChuck Lever  * @r_xprt: transport resources to use
66721037b8cSChuck Lever  *
66821037b8cSChuck Lever  * Return 0 on success, negative errno on failure.
66921037b8cSChuck Lever  */
frwr_wp_create(struct rpcrdma_xprt * r_xprt)67021037b8cSChuck Lever int frwr_wp_create(struct rpcrdma_xprt *r_xprt)
67121037b8cSChuck Lever {
67221037b8cSChuck Lever 	struct rpcrdma_ep *ep = r_xprt->rx_ep;
67321037b8cSChuck Lever 	struct rpcrdma_mr_seg seg;
67421037b8cSChuck Lever 	struct rpcrdma_mr *mr;
67521037b8cSChuck Lever 
67621037b8cSChuck Lever 	mr = rpcrdma_mr_get(r_xprt);
67721037b8cSChuck Lever 	if (!mr)
67821037b8cSChuck Lever 		return -EAGAIN;
67921037b8cSChuck Lever 	mr->mr_req = NULL;
68021037b8cSChuck Lever 	ep->re_write_pad_mr = mr;
68121037b8cSChuck Lever 
68221037b8cSChuck Lever 	seg.mr_len = XDR_UNIT;
68321037b8cSChuck Lever 	seg.mr_page = virt_to_page(ep->re_write_pad);
68421037b8cSChuck Lever 	seg.mr_offset = offset_in_page(ep->re_write_pad);
68521037b8cSChuck Lever 	if (IS_ERR(frwr_map(r_xprt, &seg, 1, true, xdr_zero, mr)))
68621037b8cSChuck Lever 		return -EIO;
68721037b8cSChuck Lever 	trace_xprtrdma_mr_fastreg(mr);
68821037b8cSChuck Lever 
68921037b8cSChuck Lever 	mr->mr_cqe.done = frwr_wc_fastreg;
69021037b8cSChuck Lever 	mr->mr_regwr.wr.next = NULL;
69121037b8cSChuck Lever 	mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
69221037b8cSChuck Lever 	mr->mr_regwr.wr.num_sge = 0;
69321037b8cSChuck Lever 	mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
69421037b8cSChuck Lever 	mr->mr_regwr.wr.send_flags = 0;
69521037b8cSChuck Lever 
69621037b8cSChuck Lever 	return ib_post_send(ep->re_id->qp, &mr->mr_regwr.wr, NULL);
69721037b8cSChuck Lever }
698