1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 * All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include "qib.h" 36 37 /* cut down ridiculously long IB macro names */ 38 #define OP(x) IB_OPCODE_UC_##x 39 40 /** 41 * qib_make_uc_req - construct a request packet (SEND, RDMA write) 42 * @qp: a pointer to the QP 43 * 44 * Assumes the s_lock is held. 45 * 46 * Return 1 if constructed; otherwise, return 0. 47 */ 48 int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) 49 { 50 struct qib_qp_priv *priv = qp->priv; 51 struct ib_other_headers *ohdr; 52 struct rvt_swqe *wqe; 53 u32 hwords; 54 u32 bth0; 55 u32 len; 56 u32 pmtu = qp->pmtu; 57 int ret = 0; 58 59 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { 60 if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) 61 goto bail; 62 /* We are in the error state, flush the work request. */ 63 if (qp->s_last == READ_ONCE(qp->s_head)) 64 goto bail; 65 /* If DMAs are in progress, we can't flush immediately. */ 66 if (atomic_read(&priv->s_dma_busy)) { 67 qp->s_flags |= RVT_S_WAIT_DMA; 68 goto bail; 69 } 70 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 71 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 72 goto done; 73 } 74 75 ohdr = &priv->s_hdr->u.oth; 76 if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) 77 ohdr = &priv->s_hdr->u.l.oth; 78 79 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 80 hwords = 5; 81 bth0 = 0; 82 83 /* Get the next send request. */ 84 wqe = rvt_get_swqe_ptr(qp, qp->s_cur); 85 qp->s_wqe = NULL; 86 switch (qp->s_state) { 87 default: 88 if (!(ib_rvt_state_ops[qp->state] & 89 RVT_PROCESS_NEXT_SEND_OK)) 90 goto bail; 91 /* Check if send work queue is empty. */ 92 if (qp->s_cur == READ_ONCE(qp->s_head)) 93 goto bail; 94 /* 95 * Start a new request. 96 */ 97 qp->s_psn = wqe->psn; 98 qp->s_sge.sge = wqe->sg_list[0]; 99 qp->s_sge.sg_list = wqe->sg_list + 1; 100 qp->s_sge.num_sge = wqe->wr.num_sge; 101 qp->s_sge.total_len = wqe->length; 102 len = wqe->length; 103 qp->s_len = len; 104 switch (wqe->wr.opcode) { 105 case IB_WR_SEND: 106 case IB_WR_SEND_WITH_IMM: 107 if (len > pmtu) { 108 qp->s_state = OP(SEND_FIRST); 109 len = pmtu; 110 break; 111 } 112 if (wqe->wr.opcode == IB_WR_SEND) 113 qp->s_state = OP(SEND_ONLY); 114 else { 115 qp->s_state = 116 OP(SEND_ONLY_WITH_IMMEDIATE); 117 /* Immediate data comes after the BTH */ 118 ohdr->u.imm_data = wqe->wr.ex.imm_data; 119 hwords += 1; 120 } 121 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 122 bth0 |= IB_BTH_SOLICITED; 123 qp->s_wqe = wqe; 124 if (++qp->s_cur >= qp->s_size) 125 qp->s_cur = 0; 126 break; 127 128 case IB_WR_RDMA_WRITE: 129 case IB_WR_RDMA_WRITE_WITH_IMM: 130 ohdr->u.rc.reth.vaddr = 131 cpu_to_be64(wqe->rdma_wr.remote_addr); 132 ohdr->u.rc.reth.rkey = 133 cpu_to_be32(wqe->rdma_wr.rkey); 134 ohdr->u.rc.reth.length = cpu_to_be32(len); 135 hwords += sizeof(struct ib_reth) / 4; 136 if (len > pmtu) { 137 qp->s_state = OP(RDMA_WRITE_FIRST); 138 len = pmtu; 139 break; 140 } 141 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 142 qp->s_state = OP(RDMA_WRITE_ONLY); 143 else { 144 qp->s_state = 145 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); 146 /* Immediate data comes after the RETH */ 147 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; 148 hwords += 1; 149 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 150 bth0 |= IB_BTH_SOLICITED; 151 } 152 qp->s_wqe = wqe; 153 if (++qp->s_cur >= qp->s_size) 154 qp->s_cur = 0; 155 break; 156 157 default: 158 goto bail; 159 } 160 break; 161 162 case OP(SEND_FIRST): 163 qp->s_state = OP(SEND_MIDDLE); 164 fallthrough; 165 case OP(SEND_MIDDLE): 166 len = qp->s_len; 167 if (len > pmtu) { 168 len = pmtu; 169 break; 170 } 171 if (wqe->wr.opcode == IB_WR_SEND) 172 qp->s_state = OP(SEND_LAST); 173 else { 174 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 175 /* Immediate data comes after the BTH */ 176 ohdr->u.imm_data = wqe->wr.ex.imm_data; 177 hwords += 1; 178 } 179 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 180 bth0 |= IB_BTH_SOLICITED; 181 qp->s_wqe = wqe; 182 if (++qp->s_cur >= qp->s_size) 183 qp->s_cur = 0; 184 break; 185 186 case OP(RDMA_WRITE_FIRST): 187 qp->s_state = OP(RDMA_WRITE_MIDDLE); 188 fallthrough; 189 case OP(RDMA_WRITE_MIDDLE): 190 len = qp->s_len; 191 if (len > pmtu) { 192 len = pmtu; 193 break; 194 } 195 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 196 qp->s_state = OP(RDMA_WRITE_LAST); 197 else { 198 qp->s_state = 199 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); 200 /* Immediate data comes after the BTH */ 201 ohdr->u.imm_data = wqe->wr.ex.imm_data; 202 hwords += 1; 203 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 204 bth0 |= IB_BTH_SOLICITED; 205 } 206 qp->s_wqe = wqe; 207 if (++qp->s_cur >= qp->s_size) 208 qp->s_cur = 0; 209 break; 210 } 211 qp->s_len -= len; 212 qp->s_hdrwords = hwords; 213 qp->s_cur_sge = &qp->s_sge; 214 qp->s_cur_size = len; 215 qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), 216 qp->s_psn++ & QIB_PSN_MASK); 217 done: 218 return 1; 219 bail: 220 qp->s_flags &= ~RVT_S_BUSY; 221 return ret; 222 } 223 224 /** 225 * qib_uc_rcv - handle an incoming UC packet 226 * @ibp: the port the packet came in on 227 * @hdr: the header of the packet 228 * @has_grh: true if the packet has a GRH 229 * @data: the packet data 230 * @tlen: the length of the packet 231 * @qp: the QP for this packet. 232 * 233 * This is called from qib_qp_rcv() to process an incoming UC packet 234 * for the given QP. 235 * Called at interrupt level. 236 */ 237 void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, 238 int has_grh, void *data, u32 tlen, struct rvt_qp *qp) 239 { 240 struct ib_other_headers *ohdr; 241 u32 opcode; 242 u32 hdrsize; 243 u32 psn; 244 u32 pad; 245 struct ib_wc wc; 246 u32 pmtu = qp->pmtu; 247 struct ib_reth *reth; 248 int ret; 249 250 /* Check for GRH */ 251 if (!has_grh) { 252 ohdr = &hdr->u.oth; 253 hdrsize = 8 + 12; /* LRH + BTH */ 254 } else { 255 ohdr = &hdr->u.l.oth; 256 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ 257 } 258 259 opcode = be32_to_cpu(ohdr->bth[0]); 260 if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) 261 return; 262 263 psn = be32_to_cpu(ohdr->bth[2]); 264 opcode >>= 24; 265 266 /* Compare the PSN verses the expected PSN. */ 267 if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { 268 /* 269 * Handle a sequence error. 270 * Silently drop any current message. 271 */ 272 qp->r_psn = psn; 273 inv: 274 if (qp->r_state == OP(SEND_FIRST) || 275 qp->r_state == OP(SEND_MIDDLE)) { 276 set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); 277 qp->r_sge.num_sge = 0; 278 } else 279 rvt_put_ss(&qp->r_sge); 280 qp->r_state = OP(SEND_LAST); 281 switch (opcode) { 282 case OP(SEND_FIRST): 283 case OP(SEND_ONLY): 284 case OP(SEND_ONLY_WITH_IMMEDIATE): 285 goto send_first; 286 287 case OP(RDMA_WRITE_FIRST): 288 case OP(RDMA_WRITE_ONLY): 289 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 290 goto rdma_first; 291 292 default: 293 goto drop; 294 } 295 } 296 297 /* Check for opcode sequence errors. */ 298 switch (qp->r_state) { 299 case OP(SEND_FIRST): 300 case OP(SEND_MIDDLE): 301 if (opcode == OP(SEND_MIDDLE) || 302 opcode == OP(SEND_LAST) || 303 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 304 break; 305 goto inv; 306 307 case OP(RDMA_WRITE_FIRST): 308 case OP(RDMA_WRITE_MIDDLE): 309 if (opcode == OP(RDMA_WRITE_MIDDLE) || 310 opcode == OP(RDMA_WRITE_LAST) || 311 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 312 break; 313 goto inv; 314 315 default: 316 if (opcode == OP(SEND_FIRST) || 317 opcode == OP(SEND_ONLY) || 318 opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || 319 opcode == OP(RDMA_WRITE_FIRST) || 320 opcode == OP(RDMA_WRITE_ONLY) || 321 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) 322 break; 323 goto inv; 324 } 325 326 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) 327 rvt_comm_est(qp); 328 329 /* OK, process the packet. */ 330 switch (opcode) { 331 case OP(SEND_FIRST): 332 case OP(SEND_ONLY): 333 case OP(SEND_ONLY_WITH_IMMEDIATE): 334 send_first: 335 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) 336 qp->r_sge = qp->s_rdma_read_sge; 337 else { 338 ret = rvt_get_rwqe(qp, false); 339 if (ret < 0) 340 goto op_err; 341 if (!ret) 342 goto drop; 343 /* 344 * qp->s_rdma_read_sge will be the owner 345 * of the mr references. 346 */ 347 qp->s_rdma_read_sge = qp->r_sge; 348 } 349 qp->r_rcv_len = 0; 350 if (opcode == OP(SEND_ONLY)) 351 goto no_immediate_data; 352 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) 353 goto send_last_imm; 354 fallthrough; 355 case OP(SEND_MIDDLE): 356 /* Check for invalid length PMTU or posted rwqe len. */ 357 if (unlikely(tlen != (hdrsize + pmtu + 4))) 358 goto rewind; 359 qp->r_rcv_len += pmtu; 360 if (unlikely(qp->r_rcv_len > qp->r_len)) 361 goto rewind; 362 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false); 363 break; 364 365 case OP(SEND_LAST_WITH_IMMEDIATE): 366 send_last_imm: 367 wc.ex.imm_data = ohdr->u.imm_data; 368 hdrsize += 4; 369 wc.wc_flags = IB_WC_WITH_IMM; 370 goto send_last; 371 case OP(SEND_LAST): 372 no_immediate_data: 373 wc.ex.imm_data = 0; 374 wc.wc_flags = 0; 375 send_last: 376 /* Get the number of bytes the message was padded by. */ 377 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 378 /* Check for invalid length. */ 379 /* XXX LAST len should be >= 1 */ 380 if (unlikely(tlen < (hdrsize + pad + 4))) 381 goto rewind; 382 /* Don't count the CRC. */ 383 tlen -= (hdrsize + pad + 4); 384 wc.byte_len = tlen + qp->r_rcv_len; 385 if (unlikely(wc.byte_len > qp->r_len)) 386 goto rewind; 387 wc.opcode = IB_WC_RECV; 388 rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false); 389 rvt_put_ss(&qp->s_rdma_read_sge); 390 last_imm: 391 wc.wr_id = qp->r_wr_id; 392 wc.status = IB_WC_SUCCESS; 393 wc.qp = &qp->ibqp; 394 wc.src_qp = qp->remote_qpn; 395 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); 396 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 397 /* zero fields that are N/A */ 398 wc.vendor_err = 0; 399 wc.pkey_index = 0; 400 wc.dlid_path_bits = 0; 401 wc.port_num = 0; 402 /* Signal completion event if the solicited bit is set. */ 403 rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); 404 break; 405 406 case OP(RDMA_WRITE_FIRST): 407 case OP(RDMA_WRITE_ONLY): 408 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ 409 rdma_first: 410 if (unlikely(!(qp->qp_access_flags & 411 IB_ACCESS_REMOTE_WRITE))) { 412 goto drop; 413 } 414 reth = &ohdr->u.rc.reth; 415 hdrsize += sizeof(*reth); 416 qp->r_len = be32_to_cpu(reth->length); 417 qp->r_rcv_len = 0; 418 qp->r_sge.sg_list = NULL; 419 if (qp->r_len != 0) { 420 u32 rkey = be32_to_cpu(reth->rkey); 421 u64 vaddr = be64_to_cpu(reth->vaddr); 422 int ok; 423 424 /* Check rkey */ 425 ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, 426 vaddr, rkey, IB_ACCESS_REMOTE_WRITE); 427 if (unlikely(!ok)) 428 goto drop; 429 qp->r_sge.num_sge = 1; 430 } else { 431 qp->r_sge.num_sge = 0; 432 qp->r_sge.sge.mr = NULL; 433 qp->r_sge.sge.vaddr = NULL; 434 qp->r_sge.sge.length = 0; 435 qp->r_sge.sge.sge_length = 0; 436 } 437 if (opcode == OP(RDMA_WRITE_ONLY)) 438 goto rdma_last; 439 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { 440 wc.ex.imm_data = ohdr->u.rc.imm_data; 441 goto rdma_last_imm; 442 } 443 fallthrough; 444 case OP(RDMA_WRITE_MIDDLE): 445 /* Check for invalid length PMTU or posted rwqe len. */ 446 if (unlikely(tlen != (hdrsize + pmtu + 4))) 447 goto drop; 448 qp->r_rcv_len += pmtu; 449 if (unlikely(qp->r_rcv_len > qp->r_len)) 450 goto drop; 451 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); 452 break; 453 454 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 455 wc.ex.imm_data = ohdr->u.imm_data; 456 rdma_last_imm: 457 hdrsize += 4; 458 wc.wc_flags = IB_WC_WITH_IMM; 459 460 /* Get the number of bytes the message was padded by. */ 461 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 462 /* Check for invalid length. */ 463 /* XXX LAST len should be >= 1 */ 464 if (unlikely(tlen < (hdrsize + pad + 4))) 465 goto drop; 466 /* Don't count the CRC. */ 467 tlen -= (hdrsize + pad + 4); 468 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 469 goto drop; 470 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) 471 rvt_put_ss(&qp->s_rdma_read_sge); 472 else { 473 ret = rvt_get_rwqe(qp, true); 474 if (ret < 0) 475 goto op_err; 476 if (!ret) 477 goto drop; 478 } 479 wc.byte_len = qp->r_len; 480 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 481 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 482 rvt_put_ss(&qp->r_sge); 483 goto last_imm; 484 485 case OP(RDMA_WRITE_LAST): 486 rdma_last: 487 /* Get the number of bytes the message was padded by. */ 488 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 489 /* Check for invalid length. */ 490 /* XXX LAST len should be >= 1 */ 491 if (unlikely(tlen < (hdrsize + pad + 4))) 492 goto drop; 493 /* Don't count the CRC. */ 494 tlen -= (hdrsize + pad + 4); 495 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 496 goto drop; 497 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 498 rvt_put_ss(&qp->r_sge); 499 break; 500 501 default: 502 /* Drop packet for unknown opcodes. */ 503 goto drop; 504 } 505 qp->r_psn++; 506 qp->r_state = opcode; 507 return; 508 509 rewind: 510 set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); 511 qp->r_sge.num_sge = 0; 512 drop: 513 ibp->rvp.n_pkt_drops++; 514 return; 515 516 op_err: 517 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 518 return; 519 520 } 521