1 /* 2 * Copyright (c) 2015 Oracle. All rights reserved. 3 * 4 * Support for backward direction RPCs on RPC/RDMA. 5 */ 6 7 #include <linux/module.h> 8 #include <linux/sunrpc/xprt.h> 9 #include <linux/sunrpc/svc.h> 10 #include <linux/sunrpc/svc_xprt.h> 11 12 #include "xprt_rdma.h" 13 14 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 15 # define RPCDBG_FACILITY RPCDBG_TRANS 16 #endif 17 18 #define RPCRDMA_BACKCHANNEL_DEBUG 19 20 static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, 21 struct rpc_rqst *rqst) 22 { 23 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 24 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 25 26 spin_lock(&buf->rb_reqslock); 27 list_del(&req->rl_all); 28 spin_unlock(&buf->rb_reqslock); 29 30 rpcrdma_destroy_req(&r_xprt->rx_ia, req); 31 32 kfree(rqst); 33 } 34 35 static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, 36 struct rpc_rqst *rqst) 37 { 38 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 39 struct rpcrdma_regbuf *rb; 40 struct rpcrdma_req *req; 41 struct xdr_buf *buf; 42 size_t size; 43 44 req = rpcrdma_create_req(r_xprt); 45 if (!req) 46 return -ENOMEM; 47 req->rl_backchannel = true; 48 49 size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); 50 rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); 51 if (IS_ERR(rb)) 52 goto out_fail; 53 req->rl_rdmabuf = rb; 54 55 size += RPCRDMA_INLINE_READ_THRESHOLD(rqst); 56 rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); 57 if (IS_ERR(rb)) 58 goto out_fail; 59 rb->rg_owner = req; 60 req->rl_sendbuf = rb; 61 /* so that rpcr_to_rdmar works when receiving a request */ 62 rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base; 63 64 buf = &rqst->rq_snd_buf; 65 buf->head[0].iov_base = rqst->rq_buffer; 66 buf->head[0].iov_len = 0; 67 buf->tail[0].iov_base = NULL; 68 buf->tail[0].iov_len = 0; 69 buf->page_len = 0; 70 buf->len = 0; 71 buf->buflen = size; 72 73 return 0; 74 75 out_fail: 76 rpcrdma_bc_free_rqst(r_xprt, rqst); 77 return -ENOMEM; 78 } 79 80 /* Allocate and add receive buffers to the rpcrdma_buffer's 81 * existing list of rep's. These are released when the 82 * transport is destroyed. 83 */ 84 static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 85 unsigned int count) 86 { 87 struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; 88 struct rpcrdma_rep *rep; 89 unsigned long flags; 90 int rc = 0; 91 92 while (count--) { 93 rep = rpcrdma_create_rep(r_xprt); 94 if (IS_ERR(rep)) { 95 pr_err("RPC: %s: reply buffer alloc failed\n", 96 __func__); 97 rc = PTR_ERR(rep); 98 break; 99 } 100 101 spin_lock_irqsave(&buffers->rb_lock, flags); 102 list_add(&rep->rr_list, &buffers->rb_recv_bufs); 103 spin_unlock_irqrestore(&buffers->rb_lock, flags); 104 } 105 106 return rc; 107 } 108 109 /** 110 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests 111 * @xprt: transport associated with these backchannel resources 112 * @reqs: number of concurrent incoming requests to expect 113 * 114 * Returns 0 on success; otherwise a negative errno 115 */ 116 int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) 117 { 118 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 119 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 120 struct rpc_rqst *rqst; 121 unsigned int i; 122 int rc; 123 124 /* The backchannel reply path returns each rpc_rqst to the 125 * bc_pa_list _after_ the reply is sent. If the server is 126 * faster than the client, it can send another backward 127 * direction request before the rpc_rqst is returned to the 128 * list. The client rejects the request in this case. 129 * 130 * Twice as many rpc_rqsts are prepared to ensure there is 131 * always an rpc_rqst available as soon as a reply is sent. 132 */ 133 if (reqs > RPCRDMA_BACKWARD_WRS >> 1) 134 goto out_err; 135 136 for (i = 0; i < (reqs << 1); i++) { 137 rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); 138 if (!rqst) { 139 pr_err("RPC: %s: Failed to create bc rpc_rqst\n", 140 __func__); 141 goto out_free; 142 } 143 144 rqst->rq_xprt = &r_xprt->rx_xprt; 145 INIT_LIST_HEAD(&rqst->rq_list); 146 INIT_LIST_HEAD(&rqst->rq_bc_list); 147 148 if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) 149 goto out_free; 150 151 spin_lock_bh(&xprt->bc_pa_lock); 152 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 153 spin_unlock_bh(&xprt->bc_pa_lock); 154 } 155 156 rc = rpcrdma_bc_setup_reps(r_xprt, reqs); 157 if (rc) 158 goto out_free; 159 160 rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); 161 if (rc) 162 goto out_free; 163 164 buffer->rb_bc_srv_max_requests = reqs; 165 request_module("svcrdma"); 166 167 return 0; 168 169 out_free: 170 xprt_rdma_bc_destroy(xprt, reqs); 171 172 out_err: 173 pr_err("RPC: %s: setup backchannel transport failed\n", __func__); 174 return -ENOMEM; 175 } 176 177 /** 178 * xprt_rdma_bc_up - Create transport endpoint for backchannel service 179 * @serv: server endpoint 180 * @net: network namespace 181 * 182 * The "xprt" is an implied argument: it supplies the name of the 183 * backchannel transport class. 184 * 185 * Returns zero on success, negative errno on failure 186 */ 187 int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) 188 { 189 int ret; 190 191 ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0); 192 if (ret < 0) 193 return ret; 194 return 0; 195 } 196 197 /** 198 * rpcrdma_bc_marshal_reply - Send backwards direction reply 199 * @rqst: buffer containing RPC reply data 200 * 201 * Returns zero on success. 202 */ 203 int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) 204 { 205 struct rpc_xprt *xprt = rqst->rq_xprt; 206 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 207 struct rpcrdma_req *req = rpcr_to_rdmar(rqst); 208 struct rpcrdma_msg *headerp; 209 size_t rpclen; 210 211 headerp = rdmab_to_msg(req->rl_rdmabuf); 212 headerp->rm_xid = rqst->rq_xid; 213 headerp->rm_vers = rpcrdma_version; 214 headerp->rm_credit = 215 cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests); 216 headerp->rm_type = rdma_msg; 217 headerp->rm_body.rm_chunks[0] = xdr_zero; 218 headerp->rm_body.rm_chunks[1] = xdr_zero; 219 headerp->rm_body.rm_chunks[2] = xdr_zero; 220 221 rpclen = rqst->rq_svec[0].iov_len; 222 223 pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", 224 __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); 225 pr_info("RPC: %s: RPC/RDMA: %*ph\n", 226 __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); 227 pr_info("RPC: %s: RPC: %*ph\n", 228 __func__, (int)rpclen, rqst->rq_svec[0].iov_base); 229 230 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); 231 req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; 232 req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); 233 234 req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); 235 req->rl_send_iov[1].length = rpclen; 236 req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); 237 238 req->rl_niovs = 2; 239 return 0; 240 } 241 242 /** 243 * xprt_rdma_bc_destroy - Release resources for handling backchannel requests 244 * @xprt: transport associated with these backchannel resources 245 * @reqs: number of incoming requests to destroy; ignored 246 */ 247 void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) 248 { 249 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 250 struct rpc_rqst *rqst, *tmp; 251 252 spin_lock_bh(&xprt->bc_pa_lock); 253 list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { 254 list_del(&rqst->rq_bc_pa_list); 255 spin_unlock_bh(&xprt->bc_pa_lock); 256 257 rpcrdma_bc_free_rqst(r_xprt, rqst); 258 259 spin_lock_bh(&xprt->bc_pa_lock); 260 } 261 spin_unlock_bh(&xprt->bc_pa_lock); 262 } 263 264 /** 265 * xprt_rdma_bc_free_rqst - Release a backchannel rqst 266 * @rqst: request to release 267 */ 268 void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) 269 { 270 struct rpc_xprt *xprt = rqst->rq_xprt; 271 272 smp_mb__before_atomic(); 273 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); 274 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 275 smp_mb__after_atomic(); 276 277 spin_lock_bh(&xprt->bc_pa_lock); 278 list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); 279 spin_unlock_bh(&xprt->bc_pa_lock); 280 } 281 282 /** 283 * rpcrdma_bc_receive_call - Handle a backward direction call 284 * @xprt: transport receiving the call 285 * @rep: receive buffer containing the call 286 * 287 * Called in the RPC reply handler, which runs in a tasklet. 288 * Be quick about it. 289 * 290 * Operational assumptions: 291 * o Backchannel credits are ignored, just as the NFS server 292 * forechannel currently does 293 * o The ULP manages a replay cache (eg, NFSv4.1 sessions). 294 * No replay detection is done at the transport level 295 */ 296 void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, 297 struct rpcrdma_rep *rep) 298 { 299 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 300 struct rpcrdma_msg *headerp; 301 struct svc_serv *bc_serv; 302 struct rpcrdma_req *req; 303 struct rpc_rqst *rqst; 304 struct xdr_buf *buf; 305 size_t size; 306 __be32 *p; 307 308 headerp = rdmab_to_msg(rep->rr_rdmabuf); 309 #ifdef RPCRDMA_BACKCHANNEL_DEBUG 310 pr_info("RPC: %s: callback XID %08x, length=%u\n", 311 __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len); 312 pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp); 313 #endif 314 315 /* Sanity check: 316 * Need at least enough bytes for RPC/RDMA header, as code 317 * here references the header fields by array offset. Also, 318 * backward calls are always inline, so ensure there 319 * are some bytes beyond the RPC/RDMA header. 320 */ 321 if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24) 322 goto out_short; 323 p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN); 324 size = rep->rr_len - RPCRDMA_HDRLEN_MIN; 325 326 /* Grab a free bc rqst */ 327 spin_lock(&xprt->bc_pa_lock); 328 if (list_empty(&xprt->bc_pa_list)) { 329 spin_unlock(&xprt->bc_pa_lock); 330 goto out_overflow; 331 } 332 rqst = list_first_entry(&xprt->bc_pa_list, 333 struct rpc_rqst, rq_bc_pa_list); 334 list_del(&rqst->rq_bc_pa_list); 335 spin_unlock(&xprt->bc_pa_lock); 336 #ifdef RPCRDMA_BACKCHANNEL_DEBUG 337 pr_info("RPC: %s: using rqst %p\n", __func__, rqst); 338 #endif 339 340 /* Prepare rqst */ 341 rqst->rq_reply_bytes_recvd = 0; 342 rqst->rq_bytes_sent = 0; 343 rqst->rq_xid = headerp->rm_xid; 344 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 345 346 buf = &rqst->rq_rcv_buf; 347 memset(buf, 0, sizeof(*buf)); 348 buf->head[0].iov_base = p; 349 buf->head[0].iov_len = size; 350 buf->len = size; 351 352 /* The receive buffer has to be hooked to the rpcrdma_req 353 * so that it can be reposted after the server is done 354 * parsing it but just before sending the backward 355 * direction reply. 356 */ 357 req = rpcr_to_rdmar(rqst); 358 #ifdef RPCRDMA_BACKCHANNEL_DEBUG 359 pr_info("RPC: %s: attaching rep %p to req %p\n", 360 __func__, rep, req); 361 #endif 362 req->rl_reply = rep; 363 364 /* Defeat the retransmit detection logic in send_request */ 365 req->rl_connect_cookie = 0; 366 367 /* Queue rqst for ULP's callback service */ 368 bc_serv = xprt->bc_serv; 369 spin_lock(&bc_serv->sv_cb_lock); 370 list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); 371 spin_unlock(&bc_serv->sv_cb_lock); 372 373 wake_up(&bc_serv->sv_cb_waitq); 374 375 r_xprt->rx_stats.bcall_count++; 376 return; 377 378 out_overflow: 379 pr_warn("RPC/RDMA backchannel overflow\n"); 380 xprt_disconnect_done(xprt); 381 /* This receive buffer gets reposted automatically 382 * when the connection is re-established. 383 */ 384 return; 385 386 out_short: 387 pr_warn("RPC/RDMA short backward direction call\n"); 388 389 if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) 390 xprt_disconnect_done(xprt); 391 else 392 pr_warn("RPC: %s: reposting rep %p\n", 393 __func__, rep); 394 } 395