1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/log2.h> 35 #include <linux/etherdevice.h> 36 #include <net/ip.h> 37 #include <linux/slab.h> 38 #include <linux/netdevice.h> 39 #include <linux/vmalloc.h> 40 41 #include <rdma/ib_cache.h> 42 #include <rdma/ib_pack.h> 43 #include <rdma/ib_addr.h> 44 #include <rdma/ib_mad.h> 45 46 #include <linux/mlx4/driver.h> 47 #include <linux/mlx4/qp.h> 48 49 #include "mlx4_ib.h" 50 #include <rdma/mlx4-abi.h> 51 52 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, 53 struct mlx4_ib_cq *recv_cq); 54 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, 55 struct mlx4_ib_cq *recv_cq); 56 57 enum { 58 MLX4_IB_ACK_REQ_FREQ = 8, 59 }; 60 61 enum { 62 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, 63 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, 64 MLX4_IB_LINK_TYPE_IB = 0, 65 MLX4_IB_LINK_TYPE_ETH = 1 66 }; 67 68 enum { 69 /* 70 * Largest possible UD header: send with GRH and immediate 71 * data plus 18 bytes for an Ethernet header with VLAN/802.1Q 72 * tag. (LRH would only use 8 bytes, so Ethernet is the 73 * biggest case) 74 */ 75 MLX4_IB_UD_HEADER_SIZE = 82, 76 MLX4_IB_LSO_HEADER_SPARE = 128, 77 }; 78 79 struct mlx4_ib_sqp { 80 struct mlx4_ib_qp qp; 81 int pkey_index; 82 u32 qkey; 83 u32 send_psn; 84 struct ib_ud_header ud_header; 85 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 86 struct ib_qp *roce_v2_gsi; 87 }; 88 89 enum { 90 MLX4_IB_MIN_SQ_STRIDE = 6, 91 MLX4_IB_CACHE_LINE_SIZE = 64, 92 }; 93 94 enum { 95 MLX4_RAW_QP_MTU = 7, 96 MLX4_RAW_QP_MSGMAX = 31, 97 }; 98 99 #ifndef ETH_ALEN 100 #define ETH_ALEN 6 101 #endif 102 103 static const __be32 mlx4_ib_opcode[] = { 104 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), 105 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), 106 [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), 107 [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), 108 [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), 109 [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), 110 [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), 111 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 112 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), 113 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), 114 [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), 115 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), 116 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), 117 }; 118 119 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) 120 { 121 return container_of(mqp, struct mlx4_ib_sqp, qp); 122 } 123 124 static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 125 { 126 if (!mlx4_is_master(dev->dev)) 127 return 0; 128 129 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && 130 qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + 131 8 * MLX4_MFUNC_MAX; 132 } 133 134 static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 135 { 136 int proxy_sqp = 0; 137 int real_sqp = 0; 138 int i; 139 /* PPF or Native -- real SQP */ 140 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && 141 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && 142 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); 143 if (real_sqp) 144 return 1; 145 /* VF or PF -- proxy SQP */ 146 if (mlx4_is_mfunc(dev->dev)) { 147 for (i = 0; i < dev->dev->caps.num_ports; i++) { 148 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || 149 qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { 150 proxy_sqp = 1; 151 break; 152 } 153 } 154 } 155 if (proxy_sqp) 156 return 1; 157 158 return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP); 159 } 160 161 /* used for INIT/CLOSE port logic */ 162 static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 163 { 164 int proxy_qp0 = 0; 165 int real_qp0 = 0; 166 int i; 167 /* PPF or Native -- real QP0 */ 168 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && 169 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && 170 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); 171 if (real_qp0) 172 return 1; 173 /* VF or PF -- proxy QP0 */ 174 if (mlx4_is_mfunc(dev->dev)) { 175 for (i = 0; i < dev->dev->caps.num_ports; i++) { 176 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { 177 proxy_qp0 = 1; 178 break; 179 } 180 } 181 } 182 return proxy_qp0; 183 } 184 185 static void *get_wqe(struct mlx4_ib_qp *qp, int offset) 186 { 187 return mlx4_buf_offset(&qp->buf, offset); 188 } 189 190 static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) 191 { 192 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); 193 } 194 195 static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) 196 { 197 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); 198 } 199 200 /* 201 * Stamp a SQ WQE so that it is invalid if prefetched by marking the 202 * first four bytes of every 64 byte chunk with 203 * 0x7FFFFFF | (invalid_ownership_value << 31). 204 * 205 * When the max work request size is less than or equal to the WQE 206 * basic block size, as an optimization, we can stamp all WQEs with 207 * 0xffffffff, and skip the very first chunk of each WQE. 208 */ 209 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) 210 { 211 __be32 *wqe; 212 int i; 213 int s; 214 int ind; 215 void *buf; 216 __be32 stamp; 217 struct mlx4_wqe_ctrl_seg *ctrl; 218 219 if (qp->sq_max_wqes_per_wr > 1) { 220 s = roundup(size, 1U << qp->sq.wqe_shift); 221 for (i = 0; i < s; i += 64) { 222 ind = (i >> qp->sq.wqe_shift) + n; 223 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : 224 cpu_to_be32(0xffffffff); 225 buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 226 wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); 227 *wqe = stamp; 228 } 229 } else { 230 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); 231 s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; 232 for (i = 64; i < s; i += 64) { 233 wqe = buf + i; 234 *wqe = cpu_to_be32(0xffffffff); 235 } 236 } 237 } 238 239 static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) 240 { 241 struct mlx4_wqe_ctrl_seg *ctrl; 242 struct mlx4_wqe_inline_seg *inl; 243 void *wqe; 244 int s; 245 246 ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); 247 s = sizeof(struct mlx4_wqe_ctrl_seg); 248 249 if (qp->ibqp.qp_type == IB_QPT_UD) { 250 struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; 251 struct mlx4_av *av = (struct mlx4_av *)dgram->av; 252 memset(dgram, 0, sizeof *dgram); 253 av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); 254 s += sizeof(struct mlx4_wqe_datagram_seg); 255 } 256 257 /* Pad the remainder of the WQE with an inline data segment. */ 258 if (size > s) { 259 inl = wqe + s; 260 inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); 261 } 262 ctrl->srcrb_flags = 0; 263 ctrl->qpn_vlan.fence_size = size / 16; 264 /* 265 * Make sure descriptor is fully written before setting ownership bit 266 * (because HW can start executing as soon as we do). 267 */ 268 wmb(); 269 270 ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | 271 (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); 272 273 stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); 274 } 275 276 /* Post NOP WQE to prevent wrap-around in the middle of WR */ 277 static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) 278 { 279 unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); 280 if (unlikely(s < qp->sq_max_wqes_per_wr)) { 281 post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); 282 ind += s; 283 } 284 return ind; 285 } 286 287 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) 288 { 289 struct ib_event event; 290 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; 291 292 if (type == MLX4_EVENT_TYPE_PATH_MIG) 293 to_mibqp(qp)->port = to_mibqp(qp)->alt_port; 294 295 if (ibqp->event_handler) { 296 event.device = ibqp->device; 297 event.element.qp = ibqp; 298 switch (type) { 299 case MLX4_EVENT_TYPE_PATH_MIG: 300 event.event = IB_EVENT_PATH_MIG; 301 break; 302 case MLX4_EVENT_TYPE_COMM_EST: 303 event.event = IB_EVENT_COMM_EST; 304 break; 305 case MLX4_EVENT_TYPE_SQ_DRAINED: 306 event.event = IB_EVENT_SQ_DRAINED; 307 break; 308 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: 309 event.event = IB_EVENT_QP_LAST_WQE_REACHED; 310 break; 311 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: 312 event.event = IB_EVENT_QP_FATAL; 313 break; 314 case MLX4_EVENT_TYPE_PATH_MIG_FAILED: 315 event.event = IB_EVENT_PATH_MIG_ERR; 316 break; 317 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 318 event.event = IB_EVENT_QP_REQ_ERR; 319 break; 320 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: 321 event.event = IB_EVENT_QP_ACCESS_ERR; 322 break; 323 default: 324 pr_warn("Unexpected event type %d " 325 "on QP %06x\n", type, qp->qpn); 326 return; 327 } 328 329 ibqp->event_handler(&event, ibqp->qp_context); 330 } 331 } 332 333 static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) 334 { 335 /* 336 * UD WQEs must have a datagram segment. 337 * RC and UC WQEs might have a remote address segment. 338 * MLX WQEs need two extra inline data segments (for the UD 339 * header and space for the ICRC). 340 */ 341 switch (type) { 342 case MLX4_IB_QPT_UD: 343 return sizeof (struct mlx4_wqe_ctrl_seg) + 344 sizeof (struct mlx4_wqe_datagram_seg) + 345 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); 346 case MLX4_IB_QPT_PROXY_SMI_OWNER: 347 case MLX4_IB_QPT_PROXY_SMI: 348 case MLX4_IB_QPT_PROXY_GSI: 349 return sizeof (struct mlx4_wqe_ctrl_seg) + 350 sizeof (struct mlx4_wqe_datagram_seg) + 64; 351 case MLX4_IB_QPT_TUN_SMI_OWNER: 352 case MLX4_IB_QPT_TUN_GSI: 353 return sizeof (struct mlx4_wqe_ctrl_seg) + 354 sizeof (struct mlx4_wqe_datagram_seg); 355 356 case MLX4_IB_QPT_UC: 357 return sizeof (struct mlx4_wqe_ctrl_seg) + 358 sizeof (struct mlx4_wqe_raddr_seg); 359 case MLX4_IB_QPT_RC: 360 return sizeof (struct mlx4_wqe_ctrl_seg) + 361 sizeof (struct mlx4_wqe_masked_atomic_seg) + 362 sizeof (struct mlx4_wqe_raddr_seg); 363 case MLX4_IB_QPT_SMI: 364 case MLX4_IB_QPT_GSI: 365 return sizeof (struct mlx4_wqe_ctrl_seg) + 366 ALIGN(MLX4_IB_UD_HEADER_SIZE + 367 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, 368 MLX4_INLINE_ALIGN) * 369 sizeof (struct mlx4_wqe_inline_seg), 370 sizeof (struct mlx4_wqe_data_seg)) + 371 ALIGN(4 + 372 sizeof (struct mlx4_wqe_inline_seg), 373 sizeof (struct mlx4_wqe_data_seg)); 374 default: 375 return sizeof (struct mlx4_wqe_ctrl_seg); 376 } 377 } 378 379 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 380 int is_user, int has_rq, struct mlx4_ib_qp *qp) 381 { 382 /* Sanity check RQ size before proceeding */ 383 if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || 384 cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) 385 return -EINVAL; 386 387 if (!has_rq) { 388 if (cap->max_recv_wr) 389 return -EINVAL; 390 391 qp->rq.wqe_cnt = qp->rq.max_gs = 0; 392 } else { 393 /* HW requires >= 1 RQ entry with >= 1 gather entry */ 394 if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) 395 return -EINVAL; 396 397 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); 398 qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); 399 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); 400 } 401 402 /* leave userspace return values as they were, so as not to break ABI */ 403 if (is_user) { 404 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; 405 cap->max_recv_sge = qp->rq.max_gs; 406 } else { 407 cap->max_recv_wr = qp->rq.max_post = 408 min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); 409 cap->max_recv_sge = min(qp->rq.max_gs, 410 min(dev->dev->caps.max_sq_sg, 411 dev->dev->caps.max_rq_sg)); 412 } 413 414 return 0; 415 } 416 417 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 418 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp, 419 bool shrink_wqe) 420 { 421 int s; 422 423 /* Sanity check SQ size before proceeding */ 424 if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || 425 cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || 426 cap->max_inline_data + send_wqe_overhead(type, qp->flags) + 427 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 428 return -EINVAL; 429 430 /* 431 * For MLX transport we need 2 extra S/G entries: 432 * one for the header and one for the checksum at the end 433 */ 434 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || 435 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) && 436 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 437 return -EINVAL; 438 439 s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), 440 cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + 441 send_wqe_overhead(type, qp->flags); 442 443 if (s > dev->dev->caps.max_sq_desc_sz) 444 return -EINVAL; 445 446 /* 447 * Hermon supports shrinking WQEs, such that a single work 448 * request can include multiple units of 1 << wqe_shift. This 449 * way, work requests can differ in size, and do not have to 450 * be a power of 2 in size, saving memory and speeding up send 451 * WR posting. Unfortunately, if we do this then the 452 * wqe_index field in CQEs can't be used to look up the WR ID 453 * anymore, so we do this only if selective signaling is off. 454 * 455 * Further, on 32-bit platforms, we can't use vmap() to make 456 * the QP buffer virtually contiguous. Thus we have to use 457 * constant-sized WRs to make sure a WR is always fully within 458 * a single page-sized chunk. 459 * 460 * Finally, we use NOP work requests to pad the end of the 461 * work queue, to avoid wrap-around in the middle of WR. We 462 * set NEC bit to avoid getting completions with error for 463 * these NOP WRs, but since NEC is only supported starting 464 * with firmware 2.2.232, we use constant-sized WRs for older 465 * firmware. 466 * 467 * And, since MLX QPs only support SEND, we use constant-sized 468 * WRs in this case. 469 * 470 * We look for the smallest value of wqe_shift such that the 471 * resulting number of wqes does not exceed device 472 * capabilities. 473 * 474 * We set WQE size to at least 64 bytes, this way stamping 475 * invalidates each WQE. 476 */ 477 if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && 478 qp->sq_signal_bits && BITS_PER_LONG == 64 && 479 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && 480 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | 481 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) 482 qp->sq.wqe_shift = ilog2(64); 483 else 484 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 485 486 for (;;) { 487 qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); 488 489 /* 490 * We need to leave 2 KB + 1 WR of headroom in the SQ to 491 * allow HW to prefetch. 492 */ 493 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; 494 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * 495 qp->sq_max_wqes_per_wr + 496 qp->sq_spare_wqes); 497 498 if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) 499 break; 500 501 if (qp->sq_max_wqes_per_wr <= 1) 502 return -EINVAL; 503 504 ++qp->sq.wqe_shift; 505 } 506 507 qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, 508 (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - 509 send_wqe_overhead(type, qp->flags)) / 510 sizeof (struct mlx4_wqe_data_seg); 511 512 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 513 (qp->sq.wqe_cnt << qp->sq.wqe_shift); 514 if (qp->rq.wqe_shift > qp->sq.wqe_shift) { 515 qp->rq.offset = 0; 516 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; 517 } else { 518 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; 519 qp->sq.offset = 0; 520 } 521 522 cap->max_send_wr = qp->sq.max_post = 523 (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; 524 cap->max_send_sge = min(qp->sq.max_gs, 525 min(dev->dev->caps.max_sq_sg, 526 dev->dev->caps.max_rq_sg)); 527 /* We don't support inline sends for kernel QPs (yet) */ 528 cap->max_inline_data = 0; 529 530 return 0; 531 } 532 533 static int set_user_sq_size(struct mlx4_ib_dev *dev, 534 struct mlx4_ib_qp *qp, 535 struct mlx4_ib_create_qp *ucmd) 536 { 537 /* Sanity check SQ size before proceeding */ 538 if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || 539 ucmd->log_sq_stride > 540 ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || 541 ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) 542 return -EINVAL; 543 544 qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; 545 qp->sq.wqe_shift = ucmd->log_sq_stride; 546 547 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 548 (qp->sq.wqe_cnt << qp->sq.wqe_shift); 549 550 return 0; 551 } 552 553 static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) 554 { 555 int i; 556 557 qp->sqp_proxy_rcv = 558 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt, 559 GFP_KERNEL); 560 if (!qp->sqp_proxy_rcv) 561 return -ENOMEM; 562 for (i = 0; i < qp->rq.wqe_cnt; i++) { 563 qp->sqp_proxy_rcv[i].addr = 564 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr), 565 GFP_KERNEL); 566 if (!qp->sqp_proxy_rcv[i].addr) 567 goto err; 568 qp->sqp_proxy_rcv[i].map = 569 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, 570 sizeof (struct mlx4_ib_proxy_sqp_hdr), 571 DMA_FROM_DEVICE); 572 if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) { 573 kfree(qp->sqp_proxy_rcv[i].addr); 574 goto err; 575 } 576 } 577 return 0; 578 579 err: 580 while (i > 0) { 581 --i; 582 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, 583 sizeof (struct mlx4_ib_proxy_sqp_hdr), 584 DMA_FROM_DEVICE); 585 kfree(qp->sqp_proxy_rcv[i].addr); 586 } 587 kfree(qp->sqp_proxy_rcv); 588 qp->sqp_proxy_rcv = NULL; 589 return -ENOMEM; 590 } 591 592 static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) 593 { 594 int i; 595 596 for (i = 0; i < qp->rq.wqe_cnt; i++) { 597 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, 598 sizeof (struct mlx4_ib_proxy_sqp_hdr), 599 DMA_FROM_DEVICE); 600 kfree(qp->sqp_proxy_rcv[i].addr); 601 } 602 kfree(qp->sqp_proxy_rcv); 603 } 604 605 static int qp_has_rq(struct ib_qp_init_attr *attr) 606 { 607 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) 608 return 0; 609 610 return !attr->srq; 611 } 612 613 static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) 614 { 615 int i; 616 for (i = 0; i < dev->caps.num_ports; i++) { 617 if (qpn == dev->caps.qp0_proxy[i]) 618 return !!dev->caps.qp0_qkey[i]; 619 } 620 return 0; 621 } 622 623 static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, 624 struct mlx4_ib_qp *qp) 625 { 626 mutex_lock(&dev->counters_table[qp->port - 1].mutex); 627 mlx4_counter_free(dev->dev, qp->counter_index->index); 628 list_del(&qp->counter_index->list); 629 mutex_unlock(&dev->counters_table[qp->port - 1].mutex); 630 631 kfree(qp->counter_index); 632 qp->counter_index = NULL; 633 } 634 635 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 636 struct ib_qp_init_attr *init_attr, 637 struct ib_udata *udata, int sqpn, 638 struct mlx4_ib_qp **caller_qp) 639 { 640 int qpn; 641 int err; 642 struct ib_qp_cap backup_cap; 643 struct mlx4_ib_sqp *sqp = NULL; 644 struct mlx4_ib_qp *qp; 645 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; 646 struct mlx4_ib_cq *mcq; 647 unsigned long flags; 648 649 /* When tunneling special qps, we use a plain UD qp */ 650 if (sqpn) { 651 if (mlx4_is_mfunc(dev->dev) && 652 (!mlx4_is_master(dev->dev) || 653 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { 654 if (init_attr->qp_type == IB_QPT_GSI) 655 qp_type = MLX4_IB_QPT_PROXY_GSI; 656 else { 657 if (mlx4_is_master(dev->dev) || 658 qp0_enabled_vf(dev->dev, sqpn)) 659 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; 660 else 661 qp_type = MLX4_IB_QPT_PROXY_SMI; 662 } 663 } 664 qpn = sqpn; 665 /* add extra sg entry for tunneling */ 666 init_attr->cap.max_recv_sge++; 667 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { 668 struct mlx4_ib_qp_tunnel_init_attr *tnl_init = 669 container_of(init_attr, 670 struct mlx4_ib_qp_tunnel_init_attr, init_attr); 671 if ((tnl_init->proxy_qp_type != IB_QPT_SMI && 672 tnl_init->proxy_qp_type != IB_QPT_GSI) || 673 !mlx4_is_master(dev->dev)) 674 return -EINVAL; 675 if (tnl_init->proxy_qp_type == IB_QPT_GSI) 676 qp_type = MLX4_IB_QPT_TUN_GSI; 677 else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || 678 mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, 679 tnl_init->port)) 680 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; 681 else 682 qp_type = MLX4_IB_QPT_TUN_SMI; 683 /* we are definitely in the PPF here, since we are creating 684 * tunnel QPs. base_tunnel_sqpn is therefore valid. */ 685 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave 686 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; 687 sqpn = qpn; 688 } 689 690 if (!*caller_qp) { 691 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || 692 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | 693 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { 694 sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); 695 if (!sqp) 696 return -ENOMEM; 697 qp = &sqp->qp; 698 qp->pri.vid = 0xFFFF; 699 qp->alt.vid = 0xFFFF; 700 } else { 701 qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); 702 if (!qp) 703 return -ENOMEM; 704 qp->pri.vid = 0xFFFF; 705 qp->alt.vid = 0xFFFF; 706 } 707 } else 708 qp = *caller_qp; 709 710 qp->mlx4_ib_qp_type = qp_type; 711 712 mutex_init(&qp->mutex); 713 spin_lock_init(&qp->sq.lock); 714 spin_lock_init(&qp->rq.lock); 715 INIT_LIST_HEAD(&qp->gid_list); 716 INIT_LIST_HEAD(&qp->steering_rules); 717 718 qp->state = IB_QPS_RESET; 719 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 720 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 721 722 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); 723 if (err) 724 goto err; 725 726 if (pd->uobject) { 727 struct mlx4_ib_create_qp ucmd; 728 729 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { 730 err = -EFAULT; 731 goto err; 732 } 733 734 qp->sq_no_prefetch = ucmd.sq_no_prefetch; 735 736 err = set_user_sq_size(dev, qp, &ucmd); 737 if (err) 738 goto err; 739 740 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 741 qp->buf_size, 0, 0); 742 if (IS_ERR(qp->umem)) { 743 err = PTR_ERR(qp->umem); 744 goto err; 745 } 746 747 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), 748 qp->umem->page_shift, &qp->mtt); 749 if (err) 750 goto err_buf; 751 752 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); 753 if (err) 754 goto err_mtt; 755 756 if (qp_has_rq(init_attr)) { 757 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), 758 ucmd.db_addr, &qp->db); 759 if (err) 760 goto err_mtt; 761 } 762 } else { 763 qp->sq_no_prefetch = 0; 764 765 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 766 qp->flags |= MLX4_IB_QP_LSO; 767 768 if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { 769 if (dev->steering_support == 770 MLX4_STEERING_MODE_DEVICE_MANAGED) 771 qp->flags |= MLX4_IB_QP_NETIF; 772 else 773 goto err; 774 } 775 776 memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); 777 err = set_kernel_sq_size(dev, &init_attr->cap, 778 qp_type, qp, true); 779 if (err) 780 goto err; 781 782 if (qp_has_rq(init_attr)) { 783 err = mlx4_db_alloc(dev->dev, &qp->db, 0); 784 if (err) 785 goto err; 786 787 *qp->db.db = 0; 788 } 789 790 if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, 791 &qp->buf)) { 792 memcpy(&init_attr->cap, &backup_cap, 793 sizeof(backup_cap)); 794 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, 795 qp, false); 796 if (err) 797 goto err_db; 798 799 if (mlx4_buf_alloc(dev->dev, qp->buf_size, 800 PAGE_SIZE * 2, &qp->buf)) { 801 err = -ENOMEM; 802 goto err_db; 803 } 804 } 805 806 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, 807 &qp->mtt); 808 if (err) 809 goto err_buf; 810 811 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); 812 if (err) 813 goto err_mtt; 814 815 qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), 816 GFP_KERNEL | __GFP_NOWARN); 817 if (!qp->sq.wrid) 818 qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), 819 GFP_KERNEL, PAGE_KERNEL); 820 qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), 821 GFP_KERNEL | __GFP_NOWARN); 822 if (!qp->rq.wrid) 823 qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), 824 GFP_KERNEL, PAGE_KERNEL); 825 if (!qp->sq.wrid || !qp->rq.wrid) { 826 err = -ENOMEM; 827 goto err_wrid; 828 } 829 } 830 831 if (sqpn) { 832 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | 833 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { 834 if (alloc_proxy_bufs(pd->device, qp)) { 835 err = -ENOMEM; 836 goto err_wrid; 837 } 838 } 839 } else { 840 /* Raw packet QPNs may not have bits 6,7 set in their qp_num; 841 * otherwise, the WQE BlueFlame setup flow wrongly causes 842 * VLAN insertion. */ 843 if (init_attr->qp_type == IB_QPT_RAW_PACKET) 844 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 845 (init_attr->cap.max_send_wr ? 846 MLX4_RESERVE_ETH_BF_QP : 0) | 847 (init_attr->cap.max_recv_wr ? 848 MLX4_RESERVE_A0_QP : 0)); 849 else 850 if (qp->flags & MLX4_IB_QP_NETIF) 851 err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); 852 else 853 err = mlx4_qp_reserve_range(dev->dev, 1, 1, 854 &qpn, 0); 855 if (err) 856 goto err_proxy; 857 } 858 859 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) 860 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; 861 862 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); 863 if (err) 864 goto err_qpn; 865 866 if (init_attr->qp_type == IB_QPT_XRC_TGT) 867 qp->mqp.qpn |= (1 << 23); 868 869 /* 870 * Hardware wants QPN written in big-endian order (after 871 * shifting) for send doorbell. Precompute this value to save 872 * a little bit when posting sends. 873 */ 874 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 875 876 qp->mqp.event = mlx4_ib_qp_event; 877 if (!*caller_qp) 878 *caller_qp = qp; 879 880 spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); 881 mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), 882 to_mcq(init_attr->recv_cq)); 883 /* Maintain device to QPs access, needed for further handling 884 * via reset flow 885 */ 886 list_add_tail(&qp->qps_list, &dev->qp_list); 887 /* Maintain CQ to QPs access, needed for further handling 888 * via reset flow 889 */ 890 mcq = to_mcq(init_attr->send_cq); 891 list_add_tail(&qp->cq_send_list, &mcq->send_qp_list); 892 mcq = to_mcq(init_attr->recv_cq); 893 list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list); 894 mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq), 895 to_mcq(init_attr->recv_cq)); 896 spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); 897 return 0; 898 899 err_qpn: 900 if (!sqpn) { 901 if (qp->flags & MLX4_IB_QP_NETIF) 902 mlx4_ib_steer_qp_free(dev, qpn, 1); 903 else 904 mlx4_qp_release_range(dev->dev, qpn, 1); 905 } 906 err_proxy: 907 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) 908 free_proxy_bufs(pd->device, qp); 909 err_wrid: 910 if (pd->uobject) { 911 if (qp_has_rq(init_attr)) 912 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); 913 } else { 914 kvfree(qp->sq.wrid); 915 kvfree(qp->rq.wrid); 916 } 917 918 err_mtt: 919 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 920 921 err_buf: 922 if (pd->uobject) 923 ib_umem_release(qp->umem); 924 else 925 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 926 927 err_db: 928 if (!pd->uobject && qp_has_rq(init_attr)) 929 mlx4_db_free(dev->dev, &qp->db); 930 931 err: 932 if (sqp) 933 kfree(sqp); 934 else if (!*caller_qp) 935 kfree(qp); 936 return err; 937 } 938 939 static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) 940 { 941 switch (state) { 942 case IB_QPS_RESET: return MLX4_QP_STATE_RST; 943 case IB_QPS_INIT: return MLX4_QP_STATE_INIT; 944 case IB_QPS_RTR: return MLX4_QP_STATE_RTR; 945 case IB_QPS_RTS: return MLX4_QP_STATE_RTS; 946 case IB_QPS_SQD: return MLX4_QP_STATE_SQD; 947 case IB_QPS_SQE: return MLX4_QP_STATE_SQER; 948 case IB_QPS_ERR: return MLX4_QP_STATE_ERR; 949 default: return -1; 950 } 951 } 952 953 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) 954 __acquires(&send_cq->lock) __acquires(&recv_cq->lock) 955 { 956 if (send_cq == recv_cq) { 957 spin_lock(&send_cq->lock); 958 __acquire(&recv_cq->lock); 959 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 960 spin_lock(&send_cq->lock); 961 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); 962 } else { 963 spin_lock(&recv_cq->lock); 964 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); 965 } 966 } 967 968 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) 969 __releases(&send_cq->lock) __releases(&recv_cq->lock) 970 { 971 if (send_cq == recv_cq) { 972 __release(&recv_cq->lock); 973 spin_unlock(&send_cq->lock); 974 } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { 975 spin_unlock(&recv_cq->lock); 976 spin_unlock(&send_cq->lock); 977 } else { 978 spin_unlock(&send_cq->lock); 979 spin_unlock(&recv_cq->lock); 980 } 981 } 982 983 static void del_gid_entries(struct mlx4_ib_qp *qp) 984 { 985 struct mlx4_ib_gid_entry *ge, *tmp; 986 987 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { 988 list_del(&ge->list); 989 kfree(ge); 990 } 991 } 992 993 static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) 994 { 995 if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) 996 return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); 997 else 998 return to_mpd(qp->ibqp.pd); 999 } 1000 1001 static void get_cqs(struct mlx4_ib_qp *qp, 1002 struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) 1003 { 1004 switch (qp->ibqp.qp_type) { 1005 case IB_QPT_XRC_TGT: 1006 *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); 1007 *recv_cq = *send_cq; 1008 break; 1009 case IB_QPT_XRC_INI: 1010 *send_cq = to_mcq(qp->ibqp.send_cq); 1011 *recv_cq = *send_cq; 1012 break; 1013 default: 1014 *send_cq = to_mcq(qp->ibqp.send_cq); 1015 *recv_cq = to_mcq(qp->ibqp.recv_cq); 1016 break; 1017 } 1018 } 1019 1020 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 1021 int is_user) 1022 { 1023 struct mlx4_ib_cq *send_cq, *recv_cq; 1024 unsigned long flags; 1025 1026 if (qp->state != IB_QPS_RESET) { 1027 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), 1028 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) 1029 pr_warn("modify QP %06x to RESET failed.\n", 1030 qp->mqp.qpn); 1031 if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) { 1032 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); 1033 qp->pri.smac = 0; 1034 qp->pri.smac_port = 0; 1035 } 1036 if (qp->alt.smac) { 1037 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); 1038 qp->alt.smac = 0; 1039 } 1040 if (qp->pri.vid < 0x1000) { 1041 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); 1042 qp->pri.vid = 0xFFFF; 1043 qp->pri.candidate_vid = 0xFFFF; 1044 qp->pri.update_vid = 0; 1045 } 1046 if (qp->alt.vid < 0x1000) { 1047 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); 1048 qp->alt.vid = 0xFFFF; 1049 qp->alt.candidate_vid = 0xFFFF; 1050 qp->alt.update_vid = 0; 1051 } 1052 } 1053 1054 get_cqs(qp, &send_cq, &recv_cq); 1055 1056 spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); 1057 mlx4_ib_lock_cqs(send_cq, recv_cq); 1058 1059 /* del from lists under both locks above to protect reset flow paths */ 1060 list_del(&qp->qps_list); 1061 list_del(&qp->cq_send_list); 1062 list_del(&qp->cq_recv_list); 1063 if (!is_user) { 1064 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, 1065 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); 1066 if (send_cq != recv_cq) 1067 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 1068 } 1069 1070 mlx4_qp_remove(dev->dev, &qp->mqp); 1071 1072 mlx4_ib_unlock_cqs(send_cq, recv_cq); 1073 spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); 1074 1075 mlx4_qp_free(dev->dev, &qp->mqp); 1076 1077 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { 1078 if (qp->flags & MLX4_IB_QP_NETIF) 1079 mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); 1080 else 1081 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 1082 } 1083 1084 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 1085 1086 if (is_user) { 1087 if (qp->rq.wqe_cnt) 1088 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), 1089 &qp->db); 1090 ib_umem_release(qp->umem); 1091 } else { 1092 kvfree(qp->sq.wrid); 1093 kvfree(qp->rq.wrid); 1094 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | 1095 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) 1096 free_proxy_bufs(&dev->ib_dev, qp); 1097 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 1098 if (qp->rq.wqe_cnt) 1099 mlx4_db_free(dev->dev, &qp->db); 1100 } 1101 1102 del_gid_entries(qp); 1103 } 1104 1105 static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) 1106 { 1107 /* Native or PPF */ 1108 if (!mlx4_is_mfunc(dev->dev) || 1109 (mlx4_is_master(dev->dev) && 1110 attr->create_flags & MLX4_IB_SRIOV_SQP)) { 1111 return dev->dev->phys_caps.base_sqpn + 1112 (attr->qp_type == IB_QPT_SMI ? 0 : 2) + 1113 attr->port_num - 1; 1114 } 1115 /* PF or VF -- creating proxies */ 1116 if (attr->qp_type == IB_QPT_SMI) 1117 return dev->dev->caps.qp0_proxy[attr->port_num - 1]; 1118 else 1119 return dev->dev->caps.qp1_proxy[attr->port_num - 1]; 1120 } 1121 1122 static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, 1123 struct ib_qp_init_attr *init_attr, 1124 struct ib_udata *udata) 1125 { 1126 struct mlx4_ib_qp *qp = NULL; 1127 int err; 1128 int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; 1129 u16 xrcdn = 0; 1130 1131 /* 1132 * We only support LSO, vendor flag1, and multicast loopback blocking, 1133 * and only for kernel UD QPs. 1134 */ 1135 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | 1136 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | 1137 MLX4_IB_SRIOV_TUNNEL_QP | 1138 MLX4_IB_SRIOV_SQP | 1139 MLX4_IB_QP_NETIF | 1140 MLX4_IB_QP_CREATE_ROCE_V2_GSI)) 1141 return ERR_PTR(-EINVAL); 1142 1143 if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { 1144 if (init_attr->qp_type != IB_QPT_UD) 1145 return ERR_PTR(-EINVAL); 1146 } 1147 1148 if (init_attr->create_flags) { 1149 if (udata && init_attr->create_flags & ~(sup_u_create_flags)) 1150 return ERR_PTR(-EINVAL); 1151 1152 if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | 1153 MLX4_IB_QP_CREATE_ROCE_V2_GSI | 1154 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) && 1155 init_attr->qp_type != IB_QPT_UD) || 1156 (init_attr->create_flags & MLX4_IB_SRIOV_SQP && 1157 init_attr->qp_type > IB_QPT_GSI) || 1158 (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI && 1159 init_attr->qp_type != IB_QPT_GSI)) 1160 return ERR_PTR(-EINVAL); 1161 } 1162 1163 switch (init_attr->qp_type) { 1164 case IB_QPT_XRC_TGT: 1165 pd = to_mxrcd(init_attr->xrcd)->pd; 1166 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; 1167 init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; 1168 /* fall through */ 1169 case IB_QPT_XRC_INI: 1170 if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) 1171 return ERR_PTR(-ENOSYS); 1172 init_attr->recv_cq = init_attr->send_cq; 1173 /* fall through */ 1174 case IB_QPT_RC: 1175 case IB_QPT_UC: 1176 case IB_QPT_RAW_PACKET: 1177 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 1178 if (!qp) 1179 return ERR_PTR(-ENOMEM); 1180 qp->pri.vid = 0xFFFF; 1181 qp->alt.vid = 0xFFFF; 1182 /* fall through */ 1183 case IB_QPT_UD: 1184 { 1185 err = create_qp_common(to_mdev(pd->device), pd, init_attr, 1186 udata, 0, &qp); 1187 if (err) { 1188 kfree(qp); 1189 return ERR_PTR(err); 1190 } 1191 1192 qp->ibqp.qp_num = qp->mqp.qpn; 1193 qp->xrcdn = xrcdn; 1194 1195 break; 1196 } 1197 case IB_QPT_SMI: 1198 case IB_QPT_GSI: 1199 { 1200 int sqpn; 1201 1202 /* Userspace is not allowed to create special QPs: */ 1203 if (udata) 1204 return ERR_PTR(-EINVAL); 1205 if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { 1206 int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0); 1207 1208 if (res) 1209 return ERR_PTR(res); 1210 } else { 1211 sqpn = get_sqp_num(to_mdev(pd->device), init_attr); 1212 } 1213 1214 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 1215 sqpn, &qp); 1216 if (err) 1217 return ERR_PTR(err); 1218 1219 qp->port = init_attr->port_num; 1220 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1221 init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; 1222 break; 1223 } 1224 default: 1225 /* Don't support raw QPs */ 1226 return ERR_PTR(-EINVAL); 1227 } 1228 1229 return &qp->ibqp; 1230 } 1231 1232 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 1233 struct ib_qp_init_attr *init_attr, 1234 struct ib_udata *udata) { 1235 struct ib_device *device = pd ? pd->device : init_attr->xrcd->device; 1236 struct ib_qp *ibqp; 1237 struct mlx4_ib_dev *dev = to_mdev(device); 1238 1239 ibqp = _mlx4_ib_create_qp(pd, init_attr, udata); 1240 1241 if (!IS_ERR(ibqp) && 1242 (init_attr->qp_type == IB_QPT_GSI) && 1243 !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) { 1244 struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp))); 1245 int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num); 1246 1247 if (is_eth && 1248 dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { 1249 init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI; 1250 sqp->roce_v2_gsi = ib_create_qp(pd, init_attr); 1251 1252 if (IS_ERR(sqp->roce_v2_gsi)) { 1253 pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); 1254 sqp->roce_v2_gsi = NULL; 1255 } else { 1256 sqp = to_msqp(to_mqp(sqp->roce_v2_gsi)); 1257 sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP; 1258 } 1259 1260 init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI; 1261 } 1262 } 1263 return ibqp; 1264 } 1265 1266 static int _mlx4_ib_destroy_qp(struct ib_qp *qp) 1267 { 1268 struct mlx4_ib_dev *dev = to_mdev(qp->device); 1269 struct mlx4_ib_qp *mqp = to_mqp(qp); 1270 struct mlx4_ib_pd *pd; 1271 1272 if (is_qp0(dev, mqp)) 1273 mlx4_CLOSE_PORT(dev->dev, mqp->port); 1274 1275 if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && 1276 dev->qp1_proxy[mqp->port - 1] == mqp) { 1277 mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); 1278 dev->qp1_proxy[mqp->port - 1] = NULL; 1279 mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); 1280 } 1281 1282 if (mqp->counter_index) 1283 mlx4_ib_free_qp_counter(dev, mqp); 1284 1285 pd = get_pd(mqp); 1286 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); 1287 1288 if (is_sqp(dev, mqp)) 1289 kfree(to_msqp(mqp)); 1290 else 1291 kfree(mqp); 1292 1293 return 0; 1294 } 1295 1296 int mlx4_ib_destroy_qp(struct ib_qp *qp) 1297 { 1298 struct mlx4_ib_qp *mqp = to_mqp(qp); 1299 1300 if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { 1301 struct mlx4_ib_sqp *sqp = to_msqp(mqp); 1302 1303 if (sqp->roce_v2_gsi) 1304 ib_destroy_qp(sqp->roce_v2_gsi); 1305 } 1306 1307 return _mlx4_ib_destroy_qp(qp); 1308 } 1309 1310 static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) 1311 { 1312 switch (type) { 1313 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; 1314 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; 1315 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; 1316 case MLX4_IB_QPT_XRC_INI: 1317 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; 1318 case MLX4_IB_QPT_SMI: 1319 case MLX4_IB_QPT_GSI: 1320 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; 1321 1322 case MLX4_IB_QPT_PROXY_SMI_OWNER: 1323 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ? 1324 MLX4_QP_ST_MLX : -1); 1325 case MLX4_IB_QPT_PROXY_SMI: 1326 case MLX4_IB_QPT_TUN_SMI: 1327 case MLX4_IB_QPT_PROXY_GSI: 1328 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ? 1329 MLX4_QP_ST_UD : -1); 1330 default: return -1; 1331 } 1332 } 1333 1334 static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, 1335 int attr_mask) 1336 { 1337 u8 dest_rd_atomic; 1338 u32 access_flags; 1339 u32 hw_access_flags = 0; 1340 1341 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 1342 dest_rd_atomic = attr->max_dest_rd_atomic; 1343 else 1344 dest_rd_atomic = qp->resp_depth; 1345 1346 if (attr_mask & IB_QP_ACCESS_FLAGS) 1347 access_flags = attr->qp_access_flags; 1348 else 1349 access_flags = qp->atomic_rd_en; 1350 1351 if (!dest_rd_atomic) 1352 access_flags &= IB_ACCESS_REMOTE_WRITE; 1353 1354 if (access_flags & IB_ACCESS_REMOTE_READ) 1355 hw_access_flags |= MLX4_QP_BIT_RRE; 1356 if (access_flags & IB_ACCESS_REMOTE_ATOMIC) 1357 hw_access_flags |= MLX4_QP_BIT_RAE; 1358 if (access_flags & IB_ACCESS_REMOTE_WRITE) 1359 hw_access_flags |= MLX4_QP_BIT_RWE; 1360 1361 return cpu_to_be32(hw_access_flags); 1362 } 1363 1364 static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, 1365 int attr_mask) 1366 { 1367 if (attr_mask & IB_QP_PKEY_INDEX) 1368 sqp->pkey_index = attr->pkey_index; 1369 if (attr_mask & IB_QP_QKEY) 1370 sqp->qkey = attr->qkey; 1371 if (attr_mask & IB_QP_SQ_PSN) 1372 sqp->send_psn = attr->sq_psn; 1373 } 1374 1375 static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) 1376 { 1377 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 1378 } 1379 1380 static int _mlx4_set_path(struct mlx4_ib_dev *dev, 1381 const struct rdma_ah_attr *ah, 1382 u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, 1383 struct mlx4_roce_smac_vlan_info *smac_info, u8 port) 1384 { 1385 int vidx; 1386 int smac_index; 1387 int err; 1388 1389 path->grh_mylmc = rdma_ah_get_path_bits(ah) & 0x7f; 1390 path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah)); 1391 if (rdma_ah_get_static_rate(ah)) { 1392 path->static_rate = rdma_ah_get_static_rate(ah) + 1393 MLX4_STAT_RATE_OFFSET; 1394 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && 1395 !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) 1396 --path->static_rate; 1397 } else 1398 path->static_rate = 0; 1399 1400 if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) { 1401 const struct ib_global_route *grh = rdma_ah_read_grh(ah); 1402 int real_sgid_index = 1403 mlx4_ib_gid_index_to_real_index(dev, port, 1404 grh->sgid_index); 1405 1406 if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) { 1407 pr_err("sgid_index (%u) too large. max is %d\n", 1408 real_sgid_index, dev->dev->caps.gid_table_len[port] - 1); 1409 return -1; 1410 } 1411 1412 path->grh_mylmc |= 1 << 7; 1413 path->mgid_index = real_sgid_index; 1414 path->hop_limit = grh->hop_limit; 1415 path->tclass_flowlabel = 1416 cpu_to_be32((grh->traffic_class << 20) | 1417 (grh->flow_label)); 1418 memcpy(path->rgid, grh->dgid.raw, 16); 1419 } 1420 1421 if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { 1422 if (!(rdma_ah_get_ah_flags(ah) & IB_AH_GRH)) 1423 return -1; 1424 1425 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 1426 ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 7) << 3); 1427 1428 path->feup |= MLX4_FEUP_FORCE_ETH_UP; 1429 if (vlan_tag < 0x1000) { 1430 if (smac_info->vid < 0x1000) { 1431 /* both valid vlan ids */ 1432 if (smac_info->vid != vlan_tag) { 1433 /* different VIDs. unreg old and reg new */ 1434 err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); 1435 if (err) 1436 return err; 1437 smac_info->candidate_vid = vlan_tag; 1438 smac_info->candidate_vlan_index = vidx; 1439 smac_info->candidate_vlan_port = port; 1440 smac_info->update_vid = 1; 1441 path->vlan_index = vidx; 1442 } else { 1443 path->vlan_index = smac_info->vlan_index; 1444 } 1445 } else { 1446 /* no current vlan tag in qp */ 1447 err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); 1448 if (err) 1449 return err; 1450 smac_info->candidate_vid = vlan_tag; 1451 smac_info->candidate_vlan_index = vidx; 1452 smac_info->candidate_vlan_port = port; 1453 smac_info->update_vid = 1; 1454 path->vlan_index = vidx; 1455 } 1456 path->feup |= MLX4_FVL_FORCE_ETH_VLAN; 1457 path->fl = 1 << 6; 1458 } else { 1459 /* have current vlan tag. unregister it at modify-qp success */ 1460 if (smac_info->vid < 0x1000) { 1461 smac_info->candidate_vid = 0xFFFF; 1462 smac_info->update_vid = 1; 1463 } 1464 } 1465 1466 /* get smac_index for RoCE use. 1467 * If no smac was yet assigned, register one. 1468 * If one was already assigned, but the new mac differs, 1469 * unregister the old one and register the new one. 1470 */ 1471 if ((!smac_info->smac && !smac_info->smac_port) || 1472 smac_info->smac != smac) { 1473 /* register candidate now, unreg if needed, after success */ 1474 smac_index = mlx4_register_mac(dev->dev, port, smac); 1475 if (smac_index >= 0) { 1476 smac_info->candidate_smac_index = smac_index; 1477 smac_info->candidate_smac = smac; 1478 smac_info->candidate_smac_port = port; 1479 } else { 1480 return -EINVAL; 1481 } 1482 } else { 1483 smac_index = smac_info->smac_index; 1484 } 1485 memcpy(path->dmac, ah->roce.dmac, 6); 1486 path->ackto = MLX4_IB_LINK_TYPE_ETH; 1487 /* put MAC table smac index for IBoE */ 1488 path->grh_mylmc = (u8) (smac_index) | 0x80; 1489 } else { 1490 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 1491 ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 0xf) << 2); 1492 } 1493 1494 return 0; 1495 } 1496 1497 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, 1498 enum ib_qp_attr_mask qp_attr_mask, 1499 struct mlx4_ib_qp *mqp, 1500 struct mlx4_qp_path *path, u8 port, 1501 u16 vlan_id, u8 *smac) 1502 { 1503 return _mlx4_set_path(dev, &qp->ah_attr, 1504 mlx4_mac_to_u64(smac), 1505 vlan_id, 1506 path, &mqp->pri, port); 1507 } 1508 1509 static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, 1510 const struct ib_qp_attr *qp, 1511 enum ib_qp_attr_mask qp_attr_mask, 1512 struct mlx4_ib_qp *mqp, 1513 struct mlx4_qp_path *path, u8 port) 1514 { 1515 return _mlx4_set_path(dev, &qp->alt_ah_attr, 1516 0, 1517 0xffff, 1518 path, &mqp->alt, port); 1519 } 1520 1521 static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 1522 { 1523 struct mlx4_ib_gid_entry *ge, *tmp; 1524 1525 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { 1526 if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { 1527 ge->added = 1; 1528 ge->port = qp->port; 1529 } 1530 } 1531 } 1532 1533 static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, 1534 struct mlx4_ib_qp *qp, 1535 struct mlx4_qp_context *context) 1536 { 1537 u64 u64_mac; 1538 int smac_index; 1539 1540 u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]); 1541 1542 context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); 1543 if (!qp->pri.smac && !qp->pri.smac_port) { 1544 smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); 1545 if (smac_index >= 0) { 1546 qp->pri.candidate_smac_index = smac_index; 1547 qp->pri.candidate_smac = u64_mac; 1548 qp->pri.candidate_smac_port = qp->port; 1549 context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; 1550 } else { 1551 return -ENOENT; 1552 } 1553 } 1554 return 0; 1555 } 1556 1557 static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 1558 { 1559 struct counter_index *new_counter_index; 1560 int err; 1561 u32 tmp_idx; 1562 1563 if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) != 1564 IB_LINK_LAYER_ETHERNET || 1565 !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) || 1566 !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) 1567 return 0; 1568 1569 err = mlx4_counter_alloc(dev->dev, &tmp_idx); 1570 if (err) 1571 return err; 1572 1573 new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL); 1574 if (!new_counter_index) { 1575 mlx4_counter_free(dev->dev, tmp_idx); 1576 return -ENOMEM; 1577 } 1578 1579 new_counter_index->index = tmp_idx; 1580 new_counter_index->allocated = 1; 1581 qp->counter_index = new_counter_index; 1582 1583 mutex_lock(&dev->counters_table[qp->port - 1].mutex); 1584 list_add_tail(&new_counter_index->list, 1585 &dev->counters_table[qp->port - 1].counters_list); 1586 mutex_unlock(&dev->counters_table[qp->port - 1].mutex); 1587 1588 return 0; 1589 } 1590 1591 enum { 1592 MLX4_QPC_ROCE_MODE_1 = 0, 1593 MLX4_QPC_ROCE_MODE_2 = 2, 1594 MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff 1595 }; 1596 1597 static u8 gid_type_to_qpc(enum ib_gid_type gid_type) 1598 { 1599 switch (gid_type) { 1600 case IB_GID_TYPE_ROCE: 1601 return MLX4_QPC_ROCE_MODE_1; 1602 case IB_GID_TYPE_ROCE_UDP_ENCAP: 1603 return MLX4_QPC_ROCE_MODE_2; 1604 default: 1605 return MLX4_QPC_ROCE_MODE_UNDEFINED; 1606 } 1607 } 1608 1609 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 1610 const struct ib_qp_attr *attr, int attr_mask, 1611 enum ib_qp_state cur_state, enum ib_qp_state new_state) 1612 { 1613 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 1614 struct mlx4_ib_qp *qp = to_mqp(ibqp); 1615 struct mlx4_ib_pd *pd; 1616 struct mlx4_ib_cq *send_cq, *recv_cq; 1617 struct mlx4_qp_context *context; 1618 enum mlx4_qp_optpar optpar = 0; 1619 int sqd_event; 1620 int steer_qp = 0; 1621 int err = -EINVAL; 1622 int counter_index; 1623 1624 /* APM is not supported under RoCE */ 1625 if (attr_mask & IB_QP_ALT_PATH && 1626 rdma_port_get_link_layer(&dev->ib_dev, qp->port) == 1627 IB_LINK_LAYER_ETHERNET) 1628 return -ENOTSUPP; 1629 1630 context = kzalloc(sizeof *context, GFP_KERNEL); 1631 if (!context) 1632 return -ENOMEM; 1633 1634 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 1635 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); 1636 1637 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 1638 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1639 else { 1640 optpar |= MLX4_QP_OPTPAR_PM_STATE; 1641 switch (attr->path_mig_state) { 1642 case IB_MIG_MIGRATED: 1643 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1644 break; 1645 case IB_MIG_REARM: 1646 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); 1647 break; 1648 case IB_MIG_ARMED: 1649 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); 1650 break; 1651 } 1652 } 1653 1654 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) 1655 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; 1656 else if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1657 context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; 1658 else if (ibqp->qp_type == IB_QPT_UD) { 1659 if (qp->flags & MLX4_IB_QP_LSO) 1660 context->mtu_msgmax = (IB_MTU_4096 << 5) | 1661 ilog2(dev->dev->caps.max_gso_sz); 1662 else 1663 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; 1664 } else if (attr_mask & IB_QP_PATH_MTU) { 1665 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { 1666 pr_err("path MTU (%u) is invalid\n", 1667 attr->path_mtu); 1668 goto out; 1669 } 1670 context->mtu_msgmax = (attr->path_mtu << 5) | 1671 ilog2(dev->dev->caps.max_msg_sz); 1672 } 1673 1674 if (qp->rq.wqe_cnt) 1675 context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; 1676 context->rq_size_stride |= qp->rq.wqe_shift - 4; 1677 1678 if (qp->sq.wqe_cnt) 1679 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; 1680 context->sq_size_stride |= qp->sq.wqe_shift - 4; 1681 1682 if (new_state == IB_QPS_RESET && qp->counter_index) 1683 mlx4_ib_free_qp_counter(dev, qp); 1684 1685 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 1686 context->sq_size_stride |= !!qp->sq_no_prefetch << 7; 1687 context->xrcd = cpu_to_be32((u32) qp->xrcdn); 1688 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1689 context->param3 |= cpu_to_be32(1 << 30); 1690 } 1691 1692 if (qp->ibqp.uobject) 1693 context->usr_page = cpu_to_be32( 1694 mlx4_to_hw_uar_index(dev->dev, 1695 to_mucontext(ibqp->uobject->context)->uar.index)); 1696 else 1697 context->usr_page = cpu_to_be32( 1698 mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); 1699 1700 if (attr_mask & IB_QP_DEST_QPN) 1701 context->remote_qpn = cpu_to_be32(attr->dest_qp_num); 1702 1703 if (attr_mask & IB_QP_PORT) { 1704 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && 1705 !(attr_mask & IB_QP_AV)) { 1706 mlx4_set_sched(&context->pri_path, attr->port_num); 1707 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; 1708 } 1709 } 1710 1711 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { 1712 err = create_qp_lb_counter(dev, qp); 1713 if (err) 1714 goto out; 1715 1716 counter_index = 1717 dev->counters_table[qp->port - 1].default_counter; 1718 if (qp->counter_index) 1719 counter_index = qp->counter_index->index; 1720 1721 if (counter_index != -1) { 1722 context->pri_path.counter_index = counter_index; 1723 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; 1724 if (qp->counter_index) { 1725 context->pri_path.fl |= 1726 MLX4_FL_ETH_SRC_CHECK_MC_LB; 1727 context->pri_path.vlan_control |= 1728 MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; 1729 } 1730 } else 1731 context->pri_path.counter_index = 1732 MLX4_SINK_COUNTER_INDEX(dev->dev); 1733 1734 if (qp->flags & MLX4_IB_QP_NETIF) { 1735 mlx4_ib_steer_qp_reg(dev, qp, 1); 1736 steer_qp = 1; 1737 } 1738 1739 if (ibqp->qp_type == IB_QPT_GSI) { 1740 enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ? 1741 IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE; 1742 u8 qpc_roce_mode = gid_type_to_qpc(gid_type); 1743 1744 context->rlkey_roce_mode |= (qpc_roce_mode << 6); 1745 } 1746 } 1747 1748 if (attr_mask & IB_QP_PKEY_INDEX) { 1749 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) 1750 context->pri_path.disable_pkey_check = 0x40; 1751 context->pri_path.pkey_index = attr->pkey_index; 1752 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 1753 } 1754 1755 if (attr_mask & IB_QP_AV) { 1756 u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : 1757 attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1758 union ib_gid gid; 1759 struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; 1760 u16 vlan = 0xffff; 1761 u8 smac[ETH_ALEN]; 1762 int status = 0; 1763 int is_eth = 1764 rdma_cap_eth_ah(&dev->ib_dev, port_num) && 1765 rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; 1766 1767 if (is_eth) { 1768 int index = 1769 rdma_ah_read_grh(&attr->ah_attr)->sgid_index; 1770 1771 status = ib_get_cached_gid(ibqp->device, port_num, 1772 index, &gid, &gid_attr); 1773 if (!status && !memcmp(&gid, &zgid, sizeof(gid))) 1774 status = -ENOENT; 1775 if (!status && gid_attr.ndev) { 1776 vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); 1777 memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); 1778 dev_put(gid_attr.ndev); 1779 } 1780 } 1781 if (status) 1782 goto out; 1783 1784 if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, 1785 port_num, vlan, smac)) 1786 goto out; 1787 1788 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1789 MLX4_QP_OPTPAR_SCHED_QUEUE); 1790 1791 if (is_eth && 1792 (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { 1793 u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); 1794 1795 if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { 1796 err = -EINVAL; 1797 goto out; 1798 } 1799 context->rlkey_roce_mode |= (qpc_roce_mode << 6); 1800 } 1801 1802 } 1803 1804 if (attr_mask & IB_QP_TIMEOUT) { 1805 context->pri_path.ackto |= attr->timeout << 3; 1806 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; 1807 } 1808 1809 if (attr_mask & IB_QP_ALT_PATH) { 1810 if (attr->alt_port_num == 0 || 1811 attr->alt_port_num > dev->dev->caps.num_ports) 1812 goto out; 1813 1814 if (attr->alt_pkey_index >= 1815 dev->dev->caps.pkey_table_len[attr->alt_port_num]) 1816 goto out; 1817 1818 if (mlx4_set_alt_path(dev, attr, attr_mask, qp, 1819 &context->alt_path, 1820 attr->alt_port_num)) 1821 goto out; 1822 1823 context->alt_path.pkey_index = attr->alt_pkey_index; 1824 context->alt_path.ackto = attr->alt_timeout << 3; 1825 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; 1826 } 1827 1828 pd = get_pd(qp); 1829 get_cqs(qp, &send_cq, &recv_cq); 1830 context->pd = cpu_to_be32(pd->pdn); 1831 context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); 1832 context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); 1833 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); 1834 1835 /* Set "fast registration enabled" for all kernel QPs */ 1836 if (!qp->ibqp.uobject) 1837 context->params1 |= cpu_to_be32(1 << 11); 1838 1839 if (attr_mask & IB_QP_RNR_RETRY) { 1840 context->params1 |= cpu_to_be32(attr->rnr_retry << 13); 1841 optpar |= MLX4_QP_OPTPAR_RNR_RETRY; 1842 } 1843 1844 if (attr_mask & IB_QP_RETRY_CNT) { 1845 context->params1 |= cpu_to_be32(attr->retry_cnt << 16); 1846 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; 1847 } 1848 1849 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 1850 if (attr->max_rd_atomic) 1851 context->params1 |= 1852 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); 1853 optpar |= MLX4_QP_OPTPAR_SRA_MAX; 1854 } 1855 1856 if (attr_mask & IB_QP_SQ_PSN) 1857 context->next_send_psn = cpu_to_be32(attr->sq_psn); 1858 1859 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 1860 if (attr->max_dest_rd_atomic) 1861 context->params2 |= 1862 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); 1863 optpar |= MLX4_QP_OPTPAR_RRA_MAX; 1864 } 1865 1866 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { 1867 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); 1868 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; 1869 } 1870 1871 if (ibqp->srq) 1872 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); 1873 1874 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 1875 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); 1876 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; 1877 } 1878 if (attr_mask & IB_QP_RQ_PSN) 1879 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 1880 1881 /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ 1882 if (attr_mask & IB_QP_QKEY) { 1883 if (qp->mlx4_ib_qp_type & 1884 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) 1885 context->qkey = cpu_to_be32(IB_QP_SET_QKEY); 1886 else { 1887 if (mlx4_is_mfunc(dev->dev) && 1888 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) && 1889 (attr->qkey & MLX4_RESERVED_QKEY_MASK) == 1890 MLX4_RESERVED_QKEY_BASE) { 1891 pr_err("Cannot use reserved QKEY" 1892 " 0x%x (range 0xffff0000..0xffffffff" 1893 " is reserved)\n", attr->qkey); 1894 err = -EINVAL; 1895 goto out; 1896 } 1897 context->qkey = cpu_to_be32(attr->qkey); 1898 } 1899 optpar |= MLX4_QP_OPTPAR_Q_KEY; 1900 } 1901 1902 if (ibqp->srq) 1903 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); 1904 1905 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 1906 context->db_rec_addr = cpu_to_be64(qp->db.dma); 1907 1908 if (cur_state == IB_QPS_INIT && 1909 new_state == IB_QPS_RTR && 1910 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || 1911 ibqp->qp_type == IB_QPT_UD || 1912 ibqp->qp_type == IB_QPT_RAW_PACKET)) { 1913 context->pri_path.sched_queue = (qp->port - 1) << 6; 1914 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || 1915 qp->mlx4_ib_qp_type & 1916 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) { 1917 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 1918 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) 1919 context->pri_path.fl = 0x80; 1920 } else { 1921 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) 1922 context->pri_path.fl = 0x80; 1923 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 1924 } 1925 if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) == 1926 IB_LINK_LAYER_ETHERNET) { 1927 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI || 1928 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) 1929 context->pri_path.feup = 1 << 7; /* don't fsm */ 1930 /* handle smac_index */ 1931 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || 1932 qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || 1933 qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { 1934 err = handle_eth_ud_smac_index(dev, qp, context); 1935 if (err) { 1936 err = -EINVAL; 1937 goto out; 1938 } 1939 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) 1940 dev->qp1_proxy[qp->port - 1] = qp; 1941 } 1942 } 1943 } 1944 1945 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { 1946 context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | 1947 MLX4_IB_LINK_TYPE_ETH; 1948 if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { 1949 /* set QP to receive both tunneled & non-tunneled packets */ 1950 if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET))) 1951 context->srqn = cpu_to_be32(7 << 28); 1952 } 1953 } 1954 1955 if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { 1956 int is_eth = rdma_port_get_link_layer( 1957 &dev->ib_dev, qp->port) == 1958 IB_LINK_LAYER_ETHERNET; 1959 if (is_eth) { 1960 context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH; 1961 optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH; 1962 } 1963 } 1964 1965 1966 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 1967 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) 1968 sqd_event = 1; 1969 else 1970 sqd_event = 0; 1971 1972 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 1973 context->rlkey_roce_mode |= (1 << 4); 1974 1975 /* 1976 * Before passing a kernel QP to the HW, make sure that the 1977 * ownership bits of the send queue are set and the SQ 1978 * headroom is stamped so that the hardware doesn't start 1979 * processing stale work requests. 1980 */ 1981 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 1982 struct mlx4_wqe_ctrl_seg *ctrl; 1983 int i; 1984 1985 for (i = 0; i < qp->sq.wqe_cnt; ++i) { 1986 ctrl = get_send_wqe(qp, i); 1987 ctrl->owner_opcode = cpu_to_be32(1 << 31); 1988 if (qp->sq_max_wqes_per_wr == 1) 1989 ctrl->qpn_vlan.fence_size = 1990 1 << (qp->sq.wqe_shift - 4); 1991 1992 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); 1993 } 1994 } 1995 1996 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), 1997 to_mlx4_state(new_state), context, optpar, 1998 sqd_event, &qp->mqp); 1999 if (err) 2000 goto out; 2001 2002 qp->state = new_state; 2003 2004 if (attr_mask & IB_QP_ACCESS_FLAGS) 2005 qp->atomic_rd_en = attr->qp_access_flags; 2006 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 2007 qp->resp_depth = attr->max_dest_rd_atomic; 2008 if (attr_mask & IB_QP_PORT) { 2009 qp->port = attr->port_num; 2010 update_mcg_macs(dev, qp); 2011 } 2012 if (attr_mask & IB_QP_ALT_PATH) 2013 qp->alt_port = attr->alt_port_num; 2014 2015 if (is_sqp(dev, qp)) 2016 store_sqp_attrs(to_msqp(qp), attr, attr_mask); 2017 2018 /* 2019 * If we moved QP0 to RTR, bring the IB link up; if we moved 2020 * QP0 to RESET or ERROR, bring the link back down. 2021 */ 2022 if (is_qp0(dev, qp)) { 2023 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) 2024 if (mlx4_INIT_PORT(dev->dev, qp->port)) 2025 pr_warn("INIT_PORT failed for port %d\n", 2026 qp->port); 2027 2028 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && 2029 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) 2030 mlx4_CLOSE_PORT(dev->dev, qp->port); 2031 } 2032 2033 /* 2034 * If we moved a kernel QP to RESET, clean up all old CQ 2035 * entries and reinitialize the QP. 2036 */ 2037 if (new_state == IB_QPS_RESET) { 2038 if (!ibqp->uobject) { 2039 mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, 2040 ibqp->srq ? to_msrq(ibqp->srq) : NULL); 2041 if (send_cq != recv_cq) 2042 mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 2043 2044 qp->rq.head = 0; 2045 qp->rq.tail = 0; 2046 qp->sq.head = 0; 2047 qp->sq.tail = 0; 2048 qp->sq_next_wqe = 0; 2049 if (qp->rq.wqe_cnt) 2050 *qp->db.db = 0; 2051 2052 if (qp->flags & MLX4_IB_QP_NETIF) 2053 mlx4_ib_steer_qp_reg(dev, qp, 0); 2054 } 2055 if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) { 2056 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); 2057 qp->pri.smac = 0; 2058 qp->pri.smac_port = 0; 2059 } 2060 if (qp->alt.smac) { 2061 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); 2062 qp->alt.smac = 0; 2063 } 2064 if (qp->pri.vid < 0x1000) { 2065 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); 2066 qp->pri.vid = 0xFFFF; 2067 qp->pri.candidate_vid = 0xFFFF; 2068 qp->pri.update_vid = 0; 2069 } 2070 2071 if (qp->alt.vid < 0x1000) { 2072 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); 2073 qp->alt.vid = 0xFFFF; 2074 qp->alt.candidate_vid = 0xFFFF; 2075 qp->alt.update_vid = 0; 2076 } 2077 } 2078 out: 2079 if (err && qp->counter_index) 2080 mlx4_ib_free_qp_counter(dev, qp); 2081 if (err && steer_qp) 2082 mlx4_ib_steer_qp_reg(dev, qp, 0); 2083 kfree(context); 2084 if (qp->pri.candidate_smac || 2085 (!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) { 2086 if (err) { 2087 mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); 2088 } else { 2089 if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) 2090 mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); 2091 qp->pri.smac = qp->pri.candidate_smac; 2092 qp->pri.smac_index = qp->pri.candidate_smac_index; 2093 qp->pri.smac_port = qp->pri.candidate_smac_port; 2094 } 2095 qp->pri.candidate_smac = 0; 2096 qp->pri.candidate_smac_index = 0; 2097 qp->pri.candidate_smac_port = 0; 2098 } 2099 if (qp->alt.candidate_smac) { 2100 if (err) { 2101 mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac); 2102 } else { 2103 if (qp->alt.smac) 2104 mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); 2105 qp->alt.smac = qp->alt.candidate_smac; 2106 qp->alt.smac_index = qp->alt.candidate_smac_index; 2107 qp->alt.smac_port = qp->alt.candidate_smac_port; 2108 } 2109 qp->alt.candidate_smac = 0; 2110 qp->alt.candidate_smac_index = 0; 2111 qp->alt.candidate_smac_port = 0; 2112 } 2113 2114 if (qp->pri.update_vid) { 2115 if (err) { 2116 if (qp->pri.candidate_vid < 0x1000) 2117 mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, 2118 qp->pri.candidate_vid); 2119 } else { 2120 if (qp->pri.vid < 0x1000) 2121 mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, 2122 qp->pri.vid); 2123 qp->pri.vid = qp->pri.candidate_vid; 2124 qp->pri.vlan_port = qp->pri.candidate_vlan_port; 2125 qp->pri.vlan_index = qp->pri.candidate_vlan_index; 2126 } 2127 qp->pri.candidate_vid = 0xFFFF; 2128 qp->pri.update_vid = 0; 2129 } 2130 2131 if (qp->alt.update_vid) { 2132 if (err) { 2133 if (qp->alt.candidate_vid < 0x1000) 2134 mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, 2135 qp->alt.candidate_vid); 2136 } else { 2137 if (qp->alt.vid < 0x1000) 2138 mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, 2139 qp->alt.vid); 2140 qp->alt.vid = qp->alt.candidate_vid; 2141 qp->alt.vlan_port = qp->alt.candidate_vlan_port; 2142 qp->alt.vlan_index = qp->alt.candidate_vlan_index; 2143 } 2144 qp->alt.candidate_vid = 0xFFFF; 2145 qp->alt.update_vid = 0; 2146 } 2147 2148 return err; 2149 } 2150 2151 static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2152 int attr_mask, struct ib_udata *udata) 2153 { 2154 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 2155 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2156 enum ib_qp_state cur_state, new_state; 2157 int err = -EINVAL; 2158 int ll; 2159 mutex_lock(&qp->mutex); 2160 2161 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 2162 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 2163 2164 if (cur_state == new_state && cur_state == IB_QPS_RESET) { 2165 ll = IB_LINK_LAYER_UNSPECIFIED; 2166 } else { 2167 int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2168 ll = rdma_port_get_link_layer(&dev->ib_dev, port); 2169 } 2170 2171 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 2172 attr_mask, ll)) { 2173 pr_debug("qpn 0x%x: invalid attribute mask specified " 2174 "for transition %d to %d. qp_type %d," 2175 " attr_mask 0x%x\n", 2176 ibqp->qp_num, cur_state, new_state, 2177 ibqp->qp_type, attr_mask); 2178 goto out; 2179 } 2180 2181 if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { 2182 if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { 2183 if ((ibqp->qp_type == IB_QPT_RC) || 2184 (ibqp->qp_type == IB_QPT_UD) || 2185 (ibqp->qp_type == IB_QPT_UC) || 2186 (ibqp->qp_type == IB_QPT_RAW_PACKET) || 2187 (ibqp->qp_type == IB_QPT_XRC_INI)) { 2188 attr->port_num = mlx4_ib_bond_next_port(dev); 2189 } 2190 } else { 2191 /* no sense in changing port_num 2192 * when ports are bonded */ 2193 attr_mask &= ~IB_QP_PORT; 2194 } 2195 } 2196 2197 if ((attr_mask & IB_QP_PORT) && 2198 (attr->port_num == 0 || attr->port_num > dev->num_ports)) { 2199 pr_debug("qpn 0x%x: invalid port number (%d) specified " 2200 "for transition %d to %d. qp_type %d\n", 2201 ibqp->qp_num, attr->port_num, cur_state, 2202 new_state, ibqp->qp_type); 2203 goto out; 2204 } 2205 2206 if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) && 2207 (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) != 2208 IB_LINK_LAYER_ETHERNET)) 2209 goto out; 2210 2211 if (attr_mask & IB_QP_PKEY_INDEX) { 2212 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2213 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { 2214 pr_debug("qpn 0x%x: invalid pkey index (%d) specified " 2215 "for transition %d to %d. qp_type %d\n", 2216 ibqp->qp_num, attr->pkey_index, cur_state, 2217 new_state, ibqp->qp_type); 2218 goto out; 2219 } 2220 } 2221 2222 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 2223 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { 2224 pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. " 2225 "Transition %d to %d. qp_type %d\n", 2226 ibqp->qp_num, attr->max_rd_atomic, cur_state, 2227 new_state, ibqp->qp_type); 2228 goto out; 2229 } 2230 2231 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 2232 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { 2233 pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. " 2234 "Transition %d to %d. qp_type %d\n", 2235 ibqp->qp_num, attr->max_dest_rd_atomic, cur_state, 2236 new_state, ibqp->qp_type); 2237 goto out; 2238 } 2239 2240 if (cur_state == new_state && cur_state == IB_QPS_RESET) { 2241 err = 0; 2242 goto out; 2243 } 2244 2245 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); 2246 2247 if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) 2248 attr->port_num = 1; 2249 2250 out: 2251 mutex_unlock(&qp->mutex); 2252 return err; 2253 } 2254 2255 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2256 int attr_mask, struct ib_udata *udata) 2257 { 2258 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 2259 int ret; 2260 2261 ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); 2262 2263 if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { 2264 struct mlx4_ib_sqp *sqp = to_msqp(mqp); 2265 int err = 0; 2266 2267 if (sqp->roce_v2_gsi) 2268 err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask); 2269 if (err) 2270 pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n", 2271 err); 2272 } 2273 return ret; 2274 } 2275 2276 static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) 2277 { 2278 int i; 2279 for (i = 0; i < dev->caps.num_ports; i++) { 2280 if (qpn == dev->caps.qp0_proxy[i] || 2281 qpn == dev->caps.qp0_tunnel[i]) { 2282 *qkey = dev->caps.qp0_qkey[i]; 2283 return 0; 2284 } 2285 } 2286 return -EINVAL; 2287 } 2288 2289 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, 2290 struct ib_ud_wr *wr, 2291 void *wqe, unsigned *mlx_seg_len) 2292 { 2293 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); 2294 struct ib_device *ib_dev = &mdev->ib_dev; 2295 struct mlx4_wqe_mlx_seg *mlx = wqe; 2296 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2297 struct mlx4_ib_ah *ah = to_mah(wr->ah); 2298 u16 pkey; 2299 u32 qkey; 2300 int send_size; 2301 int header_size; 2302 int spc; 2303 int i; 2304 2305 if (wr->wr.opcode != IB_WR_SEND) 2306 return -EINVAL; 2307 2308 send_size = 0; 2309 2310 for (i = 0; i < wr->wr.num_sge; ++i) 2311 send_size += wr->wr.sg_list[i].length; 2312 2313 /* for proxy-qp0 sends, need to add in size of tunnel header */ 2314 /* for tunnel-qp0 sends, tunnel header is already in s/g list */ 2315 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) 2316 send_size += sizeof (struct mlx4_ib_tunnel_header); 2317 2318 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header); 2319 2320 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { 2321 sqp->ud_header.lrh.service_level = 2322 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; 2323 sqp->ud_header.lrh.destination_lid = 2324 cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2325 sqp->ud_header.lrh.source_lid = 2326 cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2327 } 2328 2329 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 2330 2331 /* force loopback */ 2332 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); 2333 mlx->rlid = sqp->ud_header.lrh.destination_lid; 2334 2335 sqp->ud_header.lrh.virtual_lane = 0; 2336 sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); 2337 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); 2338 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 2339 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) 2340 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); 2341 else 2342 sqp->ud_header.bth.destination_qpn = 2343 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); 2344 2345 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 2346 if (mlx4_is_master(mdev->dev)) { 2347 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) 2348 return -EINVAL; 2349 } else { 2350 if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) 2351 return -EINVAL; 2352 } 2353 sqp->ud_header.deth.qkey = cpu_to_be32(qkey); 2354 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); 2355 2356 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 2357 sqp->ud_header.immediate_present = 0; 2358 2359 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 2360 2361 /* 2362 * Inline data segments may not cross a 64 byte boundary. If 2363 * our UD header is bigger than the space available up to the 2364 * next 64 byte boundary in the WQE, use two inline data 2365 * segments to hold the UD header. 2366 */ 2367 spc = MLX4_INLINE_ALIGN - 2368 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 2369 if (header_size <= spc) { 2370 inl->byte_count = cpu_to_be32(1 << 31 | header_size); 2371 memcpy(inl + 1, sqp->header_buf, header_size); 2372 i = 1; 2373 } else { 2374 inl->byte_count = cpu_to_be32(1 << 31 | spc); 2375 memcpy(inl + 1, sqp->header_buf, spc); 2376 2377 inl = (void *) (inl + 1) + spc; 2378 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); 2379 /* 2380 * Need a barrier here to make sure all the data is 2381 * visible before the byte_count field is set. 2382 * Otherwise the HCA prefetcher could grab the 64-byte 2383 * chunk with this inline segment and get a valid (!= 2384 * 0xffffffff) byte count but stale data, and end up 2385 * generating a packet with bad headers. 2386 * 2387 * The first inline segment's byte_count field doesn't 2388 * need a barrier, because it comes after a 2389 * control/MLX segment and therefore is at an offset 2390 * of 16 mod 64. 2391 */ 2392 wmb(); 2393 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); 2394 i = 2; 2395 } 2396 2397 *mlx_seg_len = 2398 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); 2399 return 0; 2400 } 2401 2402 static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num) 2403 { 2404 union sl2vl_tbl_to_u64 tmp_vltab; 2405 u8 vl; 2406 2407 if (sl > 15) 2408 return 0xf; 2409 tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]); 2410 vl = tmp_vltab.sl8[sl >> 1]; 2411 if (sl & 1) 2412 vl &= 0x0f; 2413 else 2414 vl >>= 4; 2415 return vl; 2416 } 2417 2418 static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, 2419 int index, union ib_gid *gid, 2420 enum ib_gid_type *gid_type) 2421 { 2422 struct mlx4_ib_iboe *iboe = &ibdev->iboe; 2423 struct mlx4_port_gid_table *port_gid_table; 2424 unsigned long flags; 2425 2426 port_gid_table = &iboe->gids[port_num - 1]; 2427 spin_lock_irqsave(&iboe->lock, flags); 2428 memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid)); 2429 *gid_type = port_gid_table->gids[index].gid_type; 2430 spin_unlock_irqrestore(&iboe->lock, flags); 2431 if (!memcmp(gid, &zgid, sizeof(*gid))) 2432 return -ENOENT; 2433 2434 return 0; 2435 } 2436 2437 #define MLX4_ROCEV2_QP1_SPORT 0xC000 2438 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, 2439 void *wqe, unsigned *mlx_seg_len) 2440 { 2441 struct ib_device *ib_dev = sqp->qp.ibqp.device; 2442 struct mlx4_ib_dev *ibdev = to_mdev(ib_dev); 2443 struct mlx4_wqe_mlx_seg *mlx = wqe; 2444 struct mlx4_wqe_ctrl_seg *ctrl = wqe; 2445 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2446 struct mlx4_ib_ah *ah = to_mah(wr->ah); 2447 union ib_gid sgid; 2448 u16 pkey; 2449 int send_size; 2450 int header_size; 2451 int spc; 2452 int i; 2453 int err = 0; 2454 u16 vlan = 0xffff; 2455 bool is_eth; 2456 bool is_vlan = false; 2457 bool is_grh; 2458 bool is_udp = false; 2459 int ip_version = 0; 2460 2461 send_size = 0; 2462 for (i = 0; i < wr->wr.num_sge; ++i) 2463 send_size += wr->wr.sg_list[i].length; 2464 2465 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 2466 is_grh = mlx4_ib_ah_grh_present(ah); 2467 if (is_eth) { 2468 enum ib_gid_type gid_type; 2469 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2470 /* When multi-function is enabled, the ib_core gid 2471 * indexes don't necessarily match the hw ones, so 2472 * we must use our own cache */ 2473 err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, 2474 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2475 ah->av.ib.gid_index, &sgid.raw[0]); 2476 if (err) 2477 return err; 2478 } else { 2479 err = fill_gid_by_hw_index(ibdev, sqp->qp.port, 2480 ah->av.ib.gid_index, 2481 &sgid, &gid_type); 2482 if (!err) { 2483 is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; 2484 if (is_udp) { 2485 if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) 2486 ip_version = 4; 2487 else 2488 ip_version = 6; 2489 is_grh = false; 2490 } 2491 } else { 2492 return err; 2493 } 2494 } 2495 if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { 2496 vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; 2497 is_vlan = 1; 2498 } 2499 } 2500 err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 2501 ip_version, is_udp, 0, &sqp->ud_header); 2502 if (err) 2503 return err; 2504 2505 if (!is_eth) { 2506 sqp->ud_header.lrh.service_level = 2507 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; 2508 sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; 2509 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2510 } 2511 2512 if (is_grh || (ip_version == 6)) { 2513 sqp->ud_header.grh.traffic_class = 2514 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; 2515 sqp->ud_header.grh.flow_label = 2516 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 2517 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 2518 if (is_eth) { 2519 memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); 2520 } else { 2521 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2522 /* When multi-function is enabled, the ib_core gid 2523 * indexes don't necessarily match the hw ones, so 2524 * we must use our own cache 2525 */ 2526 sqp->ud_header.grh.source_gid.global.subnet_prefix = 2527 cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. 2528 demux[sqp->qp.port - 1]. 2529 subnet_prefix))); 2530 sqp->ud_header.grh.source_gid.global.interface_id = 2531 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 2532 guid_cache[ah->av.ib.gid_index]; 2533 } else { 2534 ib_get_cached_gid(ib_dev, 2535 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2536 ah->av.ib.gid_index, 2537 &sqp->ud_header.grh.source_gid, NULL); 2538 } 2539 } 2540 memcpy(sqp->ud_header.grh.destination_gid.raw, 2541 ah->av.ib.dgid, 16); 2542 } 2543 2544 if (ip_version == 4) { 2545 sqp->ud_header.ip4.tos = 2546 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; 2547 sqp->ud_header.ip4.id = 0; 2548 sqp->ud_header.ip4.frag_off = htons(IP_DF); 2549 sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit; 2550 2551 memcpy(&sqp->ud_header.ip4.saddr, 2552 sgid.raw + 12, 4); 2553 memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4); 2554 sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header); 2555 } 2556 2557 if (is_udp) { 2558 sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT); 2559 sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT); 2560 sqp->ud_header.udp.csum = 0; 2561 } 2562 2563 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 2564 2565 if (!is_eth) { 2566 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | 2567 (sqp->ud_header.lrh.destination_lid == 2568 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 2569 (sqp->ud_header.lrh.service_level << 8)); 2570 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) 2571 mlx->flags |= cpu_to_be32(0x1); /* force loopback */ 2572 mlx->rlid = sqp->ud_header.lrh.destination_lid; 2573 } 2574 2575 switch (wr->wr.opcode) { 2576 case IB_WR_SEND: 2577 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 2578 sqp->ud_header.immediate_present = 0; 2579 break; 2580 case IB_WR_SEND_WITH_IMM: 2581 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 2582 sqp->ud_header.immediate_present = 1; 2583 sqp->ud_header.immediate_data = wr->wr.ex.imm_data; 2584 break; 2585 default: 2586 return -EINVAL; 2587 } 2588 2589 if (is_eth) { 2590 struct in6_addr in6; 2591 u16 ether_type; 2592 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; 2593 2594 ether_type = (!is_udp) ? ETH_P_IBOE: 2595 (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6); 2596 2597 mlx->sched_prio = cpu_to_be16(pcp); 2598 2599 ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac); 2600 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); 2601 memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); 2602 memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); 2603 memcpy(&in6, sgid.raw, sizeof(in6)); 2604 2605 2606 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) 2607 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); 2608 if (!is_vlan) { 2609 sqp->ud_header.eth.type = cpu_to_be16(ether_type); 2610 } else { 2611 sqp->ud_header.vlan.type = cpu_to_be16(ether_type); 2612 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); 2613 } 2614 } else { 2615 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 2616 sl_to_vl(to_mdev(ib_dev), 2617 sqp->ud_header.lrh.service_level, 2618 sqp->qp.port); 2619 if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) 2620 return -EINVAL; 2621 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 2622 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 2623 } 2624 sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); 2625 if (!sqp->qp.ibqp.qp_num) 2626 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); 2627 else 2628 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); 2629 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 2630 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); 2631 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 2632 sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? 2633 sqp->qkey : wr->remote_qkey); 2634 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); 2635 2636 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 2637 2638 if (0) { 2639 pr_err("built UD header of size %d:\n", header_size); 2640 for (i = 0; i < header_size / 4; ++i) { 2641 if (i % 8 == 0) 2642 pr_err(" [%02x] ", i * 4); 2643 pr_cont(" %08x", 2644 be32_to_cpu(((__be32 *) sqp->header_buf)[i])); 2645 if ((i + 1) % 8 == 0) 2646 pr_cont("\n"); 2647 } 2648 pr_err("\n"); 2649 } 2650 2651 /* 2652 * Inline data segments may not cross a 64 byte boundary. If 2653 * our UD header is bigger than the space available up to the 2654 * next 64 byte boundary in the WQE, use two inline data 2655 * segments to hold the UD header. 2656 */ 2657 spc = MLX4_INLINE_ALIGN - 2658 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 2659 if (header_size <= spc) { 2660 inl->byte_count = cpu_to_be32(1 << 31 | header_size); 2661 memcpy(inl + 1, sqp->header_buf, header_size); 2662 i = 1; 2663 } else { 2664 inl->byte_count = cpu_to_be32(1 << 31 | spc); 2665 memcpy(inl + 1, sqp->header_buf, spc); 2666 2667 inl = (void *) (inl + 1) + spc; 2668 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); 2669 /* 2670 * Need a barrier here to make sure all the data is 2671 * visible before the byte_count field is set. 2672 * Otherwise the HCA prefetcher could grab the 64-byte 2673 * chunk with this inline segment and get a valid (!= 2674 * 0xffffffff) byte count but stale data, and end up 2675 * generating a packet with bad headers. 2676 * 2677 * The first inline segment's byte_count field doesn't 2678 * need a barrier, because it comes after a 2679 * control/MLX segment and therefore is at an offset 2680 * of 16 mod 64. 2681 */ 2682 wmb(); 2683 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); 2684 i = 2; 2685 } 2686 2687 *mlx_seg_len = 2688 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); 2689 return 0; 2690 } 2691 2692 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) 2693 { 2694 unsigned cur; 2695 struct mlx4_ib_cq *cq; 2696 2697 cur = wq->head - wq->tail; 2698 if (likely(cur + nreq < wq->max_post)) 2699 return 0; 2700 2701 cq = to_mcq(ib_cq); 2702 spin_lock(&cq->lock); 2703 cur = wq->head - wq->tail; 2704 spin_unlock(&cq->lock); 2705 2706 return cur + nreq >= wq->max_post; 2707 } 2708 2709 static __be32 convert_access(int acc) 2710 { 2711 return (acc & IB_ACCESS_REMOTE_ATOMIC ? 2712 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | 2713 (acc & IB_ACCESS_REMOTE_WRITE ? 2714 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | 2715 (acc & IB_ACCESS_REMOTE_READ ? 2716 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | 2717 (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | 2718 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); 2719 } 2720 2721 static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, 2722 struct ib_reg_wr *wr) 2723 { 2724 struct mlx4_ib_mr *mr = to_mmr(wr->mr); 2725 2726 fseg->flags = convert_access(wr->access); 2727 fseg->mem_key = cpu_to_be32(wr->key); 2728 fseg->buf_list = cpu_to_be64(mr->page_map); 2729 fseg->start_addr = cpu_to_be64(mr->ibmr.iova); 2730 fseg->reg_len = cpu_to_be64(mr->ibmr.length); 2731 fseg->offset = 0; /* XXX -- is this just for ZBVA? */ 2732 fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size)); 2733 fseg->reserved[0] = 0; 2734 fseg->reserved[1] = 0; 2735 } 2736 2737 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) 2738 { 2739 memset(iseg, 0, sizeof(*iseg)); 2740 iseg->mem_key = cpu_to_be32(rkey); 2741 } 2742 2743 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, 2744 u64 remote_addr, u32 rkey) 2745 { 2746 rseg->raddr = cpu_to_be64(remote_addr); 2747 rseg->rkey = cpu_to_be32(rkey); 2748 rseg->reserved = 0; 2749 } 2750 2751 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, 2752 struct ib_atomic_wr *wr) 2753 { 2754 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 2755 aseg->swap_add = cpu_to_be64(wr->swap); 2756 aseg->compare = cpu_to_be64(wr->compare_add); 2757 } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { 2758 aseg->swap_add = cpu_to_be64(wr->compare_add); 2759 aseg->compare = cpu_to_be64(wr->compare_add_mask); 2760 } else { 2761 aseg->swap_add = cpu_to_be64(wr->compare_add); 2762 aseg->compare = 0; 2763 } 2764 2765 } 2766 2767 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, 2768 struct ib_atomic_wr *wr) 2769 { 2770 aseg->swap_add = cpu_to_be64(wr->swap); 2771 aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); 2772 aseg->compare = cpu_to_be64(wr->compare_add); 2773 aseg->compare_mask = cpu_to_be64(wr->compare_add_mask); 2774 } 2775 2776 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 2777 struct ib_ud_wr *wr) 2778 { 2779 memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); 2780 dseg->dqpn = cpu_to_be32(wr->remote_qpn); 2781 dseg->qkey = cpu_to_be32(wr->remote_qkey); 2782 dseg->vlan = to_mah(wr->ah)->av.eth.vlan; 2783 memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6); 2784 } 2785 2786 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, 2787 struct mlx4_wqe_datagram_seg *dseg, 2788 struct ib_ud_wr *wr, 2789 enum mlx4_ib_qp_type qpt) 2790 { 2791 union mlx4_ext_av *av = &to_mah(wr->ah)->av; 2792 struct mlx4_av sqp_av = {0}; 2793 int port = *((u8 *) &av->ib.port_pd) & 0x3; 2794 2795 /* force loopback */ 2796 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); 2797 sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */ 2798 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel & 2799 cpu_to_be32(0xf0000000); 2800 2801 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); 2802 if (qpt == MLX4_IB_QPT_PROXY_GSI) 2803 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); 2804 else 2805 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); 2806 /* Use QKEY from the QP context, which is set by master */ 2807 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); 2808 } 2809 2810 static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) 2811 { 2812 struct mlx4_wqe_inline_seg *inl = wqe; 2813 struct mlx4_ib_tunnel_header hdr; 2814 struct mlx4_ib_ah *ah = to_mah(wr->ah); 2815 int spc; 2816 int i; 2817 2818 memcpy(&hdr.av, &ah->av, sizeof hdr.av); 2819 hdr.remote_qpn = cpu_to_be32(wr->remote_qpn); 2820 hdr.pkey_index = cpu_to_be16(wr->pkey_index); 2821 hdr.qkey = cpu_to_be32(wr->remote_qkey); 2822 memcpy(hdr.mac, ah->av.eth.mac, 6); 2823 hdr.vlan = ah->av.eth.vlan; 2824 2825 spc = MLX4_INLINE_ALIGN - 2826 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); 2827 if (sizeof (hdr) <= spc) { 2828 memcpy(inl + 1, &hdr, sizeof (hdr)); 2829 wmb(); 2830 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr)); 2831 i = 1; 2832 } else { 2833 memcpy(inl + 1, &hdr, spc); 2834 wmb(); 2835 inl->byte_count = cpu_to_be32(1 << 31 | spc); 2836 2837 inl = (void *) (inl + 1) + spc; 2838 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); 2839 wmb(); 2840 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc)); 2841 i = 2; 2842 } 2843 2844 *mlx_seg_len = 2845 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16); 2846 } 2847 2848 static void set_mlx_icrc_seg(void *dseg) 2849 { 2850 u32 *t = dseg; 2851 struct mlx4_wqe_inline_seg *iseg = dseg; 2852 2853 t[1] = 0; 2854 2855 /* 2856 * Need a barrier here before writing the byte_count field to 2857 * make sure that all the data is visible before the 2858 * byte_count field is set. Otherwise, if the segment begins 2859 * a new cacheline, the HCA prefetcher could grab the 64-byte 2860 * chunk and get a valid (!= * 0xffffffff) byte count but 2861 * stale data, and end up sending the wrong data. 2862 */ 2863 wmb(); 2864 2865 iseg->byte_count = cpu_to_be32((1 << 31) | 4); 2866 } 2867 2868 static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) 2869 { 2870 dseg->lkey = cpu_to_be32(sg->lkey); 2871 dseg->addr = cpu_to_be64(sg->addr); 2872 2873 /* 2874 * Need a barrier here before writing the byte_count field to 2875 * make sure that all the data is visible before the 2876 * byte_count field is set. Otherwise, if the segment begins 2877 * a new cacheline, the HCA prefetcher could grab the 64-byte 2878 * chunk and get a valid (!= * 0xffffffff) byte count but 2879 * stale data, and end up sending the wrong data. 2880 */ 2881 wmb(); 2882 2883 dseg->byte_count = cpu_to_be32(sg->length); 2884 } 2885 2886 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) 2887 { 2888 dseg->byte_count = cpu_to_be32(sg->length); 2889 dseg->lkey = cpu_to_be32(sg->lkey); 2890 dseg->addr = cpu_to_be64(sg->addr); 2891 } 2892 2893 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, 2894 struct mlx4_ib_qp *qp, unsigned *lso_seg_len, 2895 __be32 *lso_hdr_sz, __be32 *blh) 2896 { 2897 unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); 2898 2899 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) 2900 *blh = cpu_to_be32(1 << 6); 2901 2902 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && 2903 wr->wr.num_sge > qp->sq.max_gs - (halign >> 4))) 2904 return -EINVAL; 2905 2906 memcpy(wqe->header, wr->header, wr->hlen); 2907 2908 *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen); 2909 *lso_seg_len = halign; 2910 return 0; 2911 } 2912 2913 static __be32 send_ieth(struct ib_send_wr *wr) 2914 { 2915 switch (wr->opcode) { 2916 case IB_WR_SEND_WITH_IMM: 2917 case IB_WR_RDMA_WRITE_WITH_IMM: 2918 return wr->ex.imm_data; 2919 2920 case IB_WR_SEND_WITH_INV: 2921 return cpu_to_be32(wr->ex.invalidate_rkey); 2922 2923 default: 2924 return 0; 2925 } 2926 } 2927 2928 static void add_zero_len_inline(void *wqe) 2929 { 2930 struct mlx4_wqe_inline_seg *inl = wqe; 2931 memset(wqe, 0, 16); 2932 inl->byte_count = cpu_to_be32(1 << 31); 2933 } 2934 2935 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2936 struct ib_send_wr **bad_wr) 2937 { 2938 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2939 void *wqe; 2940 struct mlx4_wqe_ctrl_seg *ctrl; 2941 struct mlx4_wqe_data_seg *dseg; 2942 unsigned long flags; 2943 int nreq; 2944 int err = 0; 2945 unsigned ind; 2946 int uninitialized_var(stamp); 2947 int uninitialized_var(size); 2948 unsigned uninitialized_var(seglen); 2949 __be32 dummy; 2950 __be32 *lso_wqe; 2951 __be32 uninitialized_var(lso_hdr_sz); 2952 __be32 blh; 2953 int i; 2954 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 2955 2956 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { 2957 struct mlx4_ib_sqp *sqp = to_msqp(qp); 2958 2959 if (sqp->roce_v2_gsi) { 2960 struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); 2961 enum ib_gid_type gid_type; 2962 union ib_gid gid; 2963 2964 if (!fill_gid_by_hw_index(mdev, sqp->qp.port, 2965 ah->av.ib.gid_index, 2966 &gid, &gid_type)) 2967 qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 2968 to_mqp(sqp->roce_v2_gsi) : qp; 2969 else 2970 pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n", 2971 ah->av.ib.gid_index); 2972 } 2973 } 2974 2975 spin_lock_irqsave(&qp->sq.lock, flags); 2976 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { 2977 err = -EIO; 2978 *bad_wr = wr; 2979 nreq = 0; 2980 goto out; 2981 } 2982 2983 ind = qp->sq_next_wqe; 2984 2985 for (nreq = 0; wr; ++nreq, wr = wr->next) { 2986 lso_wqe = &dummy; 2987 blh = 0; 2988 2989 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 2990 err = -ENOMEM; 2991 *bad_wr = wr; 2992 goto out; 2993 } 2994 2995 if (unlikely(wr->num_sge > qp->sq.max_gs)) { 2996 err = -EINVAL; 2997 *bad_wr = wr; 2998 goto out; 2999 } 3000 3001 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 3002 qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; 3003 3004 ctrl->srcrb_flags = 3005 (wr->send_flags & IB_SEND_SIGNALED ? 3006 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | 3007 (wr->send_flags & IB_SEND_SOLICITED ? 3008 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | 3009 ((wr->send_flags & IB_SEND_IP_CSUM) ? 3010 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | 3011 MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | 3012 qp->sq_signal_bits; 3013 3014 ctrl->imm = send_ieth(wr); 3015 3016 wqe += sizeof *ctrl; 3017 size = sizeof *ctrl / 16; 3018 3019 switch (qp->mlx4_ib_qp_type) { 3020 case MLX4_IB_QPT_RC: 3021 case MLX4_IB_QPT_UC: 3022 switch (wr->opcode) { 3023 case IB_WR_ATOMIC_CMP_AND_SWP: 3024 case IB_WR_ATOMIC_FETCH_AND_ADD: 3025 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: 3026 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, 3027 atomic_wr(wr)->rkey); 3028 wqe += sizeof (struct mlx4_wqe_raddr_seg); 3029 3030 set_atomic_seg(wqe, atomic_wr(wr)); 3031 wqe += sizeof (struct mlx4_wqe_atomic_seg); 3032 3033 size += (sizeof (struct mlx4_wqe_raddr_seg) + 3034 sizeof (struct mlx4_wqe_atomic_seg)) / 16; 3035 3036 break; 3037 3038 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 3039 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, 3040 atomic_wr(wr)->rkey); 3041 wqe += sizeof (struct mlx4_wqe_raddr_seg); 3042 3043 set_masked_atomic_seg(wqe, atomic_wr(wr)); 3044 wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); 3045 3046 size += (sizeof (struct mlx4_wqe_raddr_seg) + 3047 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; 3048 3049 break; 3050 3051 case IB_WR_RDMA_READ: 3052 case IB_WR_RDMA_WRITE: 3053 case IB_WR_RDMA_WRITE_WITH_IMM: 3054 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, 3055 rdma_wr(wr)->rkey); 3056 wqe += sizeof (struct mlx4_wqe_raddr_seg); 3057 size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 3058 break; 3059 3060 case IB_WR_LOCAL_INV: 3061 ctrl->srcrb_flags |= 3062 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); 3063 set_local_inv_seg(wqe, wr->ex.invalidate_rkey); 3064 wqe += sizeof (struct mlx4_wqe_local_inval_seg); 3065 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; 3066 break; 3067 3068 case IB_WR_REG_MR: 3069 ctrl->srcrb_flags |= 3070 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); 3071 set_reg_seg(wqe, reg_wr(wr)); 3072 wqe += sizeof(struct mlx4_wqe_fmr_seg); 3073 size += sizeof(struct mlx4_wqe_fmr_seg) / 16; 3074 break; 3075 3076 default: 3077 /* No extra segments required for sends */ 3078 break; 3079 } 3080 break; 3081 3082 case MLX4_IB_QPT_TUN_SMI_OWNER: 3083 err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), 3084 ctrl, &seglen); 3085 if (unlikely(err)) { 3086 *bad_wr = wr; 3087 goto out; 3088 } 3089 wqe += seglen; 3090 size += seglen / 16; 3091 break; 3092 case MLX4_IB_QPT_TUN_SMI: 3093 case MLX4_IB_QPT_TUN_GSI: 3094 /* this is a UD qp used in MAD responses to slaves. */ 3095 set_datagram_seg(wqe, ud_wr(wr)); 3096 /* set the forced-loopback bit in the data seg av */ 3097 *(__be32 *) wqe |= cpu_to_be32(0x80000000); 3098 wqe += sizeof (struct mlx4_wqe_datagram_seg); 3099 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 3100 break; 3101 case MLX4_IB_QPT_UD: 3102 set_datagram_seg(wqe, ud_wr(wr)); 3103 wqe += sizeof (struct mlx4_wqe_datagram_seg); 3104 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 3105 3106 if (wr->opcode == IB_WR_LSO) { 3107 err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen, 3108 &lso_hdr_sz, &blh); 3109 if (unlikely(err)) { 3110 *bad_wr = wr; 3111 goto out; 3112 } 3113 lso_wqe = (__be32 *) wqe; 3114 wqe += seglen; 3115 size += seglen / 16; 3116 } 3117 break; 3118 3119 case MLX4_IB_QPT_PROXY_SMI_OWNER: 3120 err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), 3121 ctrl, &seglen); 3122 if (unlikely(err)) { 3123 *bad_wr = wr; 3124 goto out; 3125 } 3126 wqe += seglen; 3127 size += seglen / 16; 3128 /* to start tunnel header on a cache-line boundary */ 3129 add_zero_len_inline(wqe); 3130 wqe += 16; 3131 size++; 3132 build_tunnel_header(ud_wr(wr), wqe, &seglen); 3133 wqe += seglen; 3134 size += seglen / 16; 3135 break; 3136 case MLX4_IB_QPT_PROXY_SMI: 3137 case MLX4_IB_QPT_PROXY_GSI: 3138 /* If we are tunneling special qps, this is a UD qp. 3139 * In this case we first add a UD segment targeting 3140 * the tunnel qp, and then add a header with address 3141 * information */ 3142 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, 3143 ud_wr(wr), 3144 qp->mlx4_ib_qp_type); 3145 wqe += sizeof (struct mlx4_wqe_datagram_seg); 3146 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 3147 build_tunnel_header(ud_wr(wr), wqe, &seglen); 3148 wqe += seglen; 3149 size += seglen / 16; 3150 break; 3151 3152 case MLX4_IB_QPT_SMI: 3153 case MLX4_IB_QPT_GSI: 3154 err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, 3155 &seglen); 3156 if (unlikely(err)) { 3157 *bad_wr = wr; 3158 goto out; 3159 } 3160 wqe += seglen; 3161 size += seglen / 16; 3162 break; 3163 3164 default: 3165 break; 3166 } 3167 3168 /* 3169 * Write data segments in reverse order, so as to 3170 * overwrite cacheline stamp last within each 3171 * cacheline. This avoids issues with WQE 3172 * prefetching. 3173 */ 3174 3175 dseg = wqe; 3176 dseg += wr->num_sge - 1; 3177 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); 3178 3179 /* Add one more inline data segment for ICRC for MLX sends */ 3180 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || 3181 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || 3182 qp->mlx4_ib_qp_type & 3183 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) { 3184 set_mlx_icrc_seg(dseg + 1); 3185 size += sizeof (struct mlx4_wqe_data_seg) / 16; 3186 } 3187 3188 for (i = wr->num_sge - 1; i >= 0; --i, --dseg) 3189 set_data_seg(dseg, wr->sg_list + i); 3190 3191 /* 3192 * Possibly overwrite stamping in cacheline with LSO 3193 * segment only after making sure all data segments 3194 * are written. 3195 */ 3196 wmb(); 3197 *lso_wqe = lso_hdr_sz; 3198 3199 ctrl->qpn_vlan.fence_size = (wr->send_flags & IB_SEND_FENCE ? 3200 MLX4_WQE_CTRL_FENCE : 0) | size; 3201 3202 /* 3203 * Make sure descriptor is fully written before 3204 * setting ownership bit (because HW can start 3205 * executing as soon as we do). 3206 */ 3207 wmb(); 3208 3209 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { 3210 *bad_wr = wr; 3211 err = -EINVAL; 3212 goto out; 3213 } 3214 3215 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | 3216 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; 3217 3218 stamp = ind + qp->sq_spare_wqes; 3219 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); 3220 3221 /* 3222 * We can improve latency by not stamping the last 3223 * send queue WQE until after ringing the doorbell, so 3224 * only stamp here if there are still more WQEs to post. 3225 * 3226 * Same optimization applies to padding with NOP wqe 3227 * in case of WQE shrinking (used to prevent wrap-around 3228 * in the middle of WR). 3229 */ 3230 if (wr->next) { 3231 stamp_send_wqe(qp, stamp, size * 16); 3232 ind = pad_wraparound(qp, ind); 3233 } 3234 } 3235 3236 out: 3237 if (likely(nreq)) { 3238 qp->sq.head += nreq; 3239 3240 /* 3241 * Make sure that descriptors are written before 3242 * doorbell record. 3243 */ 3244 wmb(); 3245 3246 writel(qp->doorbell_qpn, 3247 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); 3248 3249 /* 3250 * Make sure doorbells don't leak out of SQ spinlock 3251 * and reach the HCA out of order. 3252 */ 3253 mmiowb(); 3254 3255 stamp_send_wqe(qp, stamp, size * 16); 3256 3257 ind = pad_wraparound(qp, ind); 3258 qp->sq_next_wqe = ind; 3259 } 3260 3261 spin_unlock_irqrestore(&qp->sq.lock, flags); 3262 3263 return err; 3264 } 3265 3266 int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3267 struct ib_recv_wr **bad_wr) 3268 { 3269 struct mlx4_ib_qp *qp = to_mqp(ibqp); 3270 struct mlx4_wqe_data_seg *scat; 3271 unsigned long flags; 3272 int err = 0; 3273 int nreq; 3274 int ind; 3275 int max_gs; 3276 int i; 3277 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 3278 3279 max_gs = qp->rq.max_gs; 3280 spin_lock_irqsave(&qp->rq.lock, flags); 3281 3282 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { 3283 err = -EIO; 3284 *bad_wr = wr; 3285 nreq = 0; 3286 goto out; 3287 } 3288 3289 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 3290 3291 for (nreq = 0; wr; ++nreq, wr = wr->next) { 3292 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 3293 err = -ENOMEM; 3294 *bad_wr = wr; 3295 goto out; 3296 } 3297 3298 if (unlikely(wr->num_sge > qp->rq.max_gs)) { 3299 err = -EINVAL; 3300 *bad_wr = wr; 3301 goto out; 3302 } 3303 3304 scat = get_recv_wqe(qp, ind); 3305 3306 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | 3307 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { 3308 ib_dma_sync_single_for_device(ibqp->device, 3309 qp->sqp_proxy_rcv[ind].map, 3310 sizeof (struct mlx4_ib_proxy_sqp_hdr), 3311 DMA_FROM_DEVICE); 3312 scat->byte_count = 3313 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr)); 3314 /* use dma lkey from upper layer entry */ 3315 scat->lkey = cpu_to_be32(wr->sg_list->lkey); 3316 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); 3317 scat++; 3318 max_gs--; 3319 } 3320 3321 for (i = 0; i < wr->num_sge; ++i) 3322 __set_data_seg(scat + i, wr->sg_list + i); 3323 3324 if (i < max_gs) { 3325 scat[i].byte_count = 0; 3326 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 3327 scat[i].addr = 0; 3328 } 3329 3330 qp->rq.wrid[ind] = wr->wr_id; 3331 3332 ind = (ind + 1) & (qp->rq.wqe_cnt - 1); 3333 } 3334 3335 out: 3336 if (likely(nreq)) { 3337 qp->rq.head += nreq; 3338 3339 /* 3340 * Make sure that descriptors are written before 3341 * doorbell record. 3342 */ 3343 wmb(); 3344 3345 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); 3346 } 3347 3348 spin_unlock_irqrestore(&qp->rq.lock, flags); 3349 3350 return err; 3351 } 3352 3353 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) 3354 { 3355 switch (mlx4_state) { 3356 case MLX4_QP_STATE_RST: return IB_QPS_RESET; 3357 case MLX4_QP_STATE_INIT: return IB_QPS_INIT; 3358 case MLX4_QP_STATE_RTR: return IB_QPS_RTR; 3359 case MLX4_QP_STATE_RTS: return IB_QPS_RTS; 3360 case MLX4_QP_STATE_SQ_DRAINING: 3361 case MLX4_QP_STATE_SQD: return IB_QPS_SQD; 3362 case MLX4_QP_STATE_SQER: return IB_QPS_SQE; 3363 case MLX4_QP_STATE_ERR: return IB_QPS_ERR; 3364 default: return -1; 3365 } 3366 } 3367 3368 static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) 3369 { 3370 switch (mlx4_mig_state) { 3371 case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; 3372 case MLX4_QP_PM_REARM: return IB_MIG_REARM; 3373 case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; 3374 default: return -1; 3375 } 3376 } 3377 3378 static int to_ib_qp_access_flags(int mlx4_flags) 3379 { 3380 int ib_flags = 0; 3381 3382 if (mlx4_flags & MLX4_QP_BIT_RRE) 3383 ib_flags |= IB_ACCESS_REMOTE_READ; 3384 if (mlx4_flags & MLX4_QP_BIT_RWE) 3385 ib_flags |= IB_ACCESS_REMOTE_WRITE; 3386 if (mlx4_flags & MLX4_QP_BIT_RAE) 3387 ib_flags |= IB_ACCESS_REMOTE_ATOMIC; 3388 3389 return ib_flags; 3390 } 3391 3392 static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev, 3393 struct rdma_ah_attr *ah_attr, 3394 struct mlx4_qp_path *path) 3395 { 3396 struct mlx4_dev *dev = ibdev->dev; 3397 u8 port_num = path->sched_queue & 0x40 ? 2 : 1; 3398 3399 memset(ah_attr, 0, sizeof(*ah_attr)); 3400 ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); 3401 if (port_num == 0 || port_num > dev->caps.num_ports) 3402 return; 3403 3404 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) 3405 rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | 3406 ((path->sched_queue & 4) << 1)); 3407 else 3408 rdma_ah_set_sl(ah_attr, (path->sched_queue >> 2) & 0xf); 3409 rdma_ah_set_port_num(ah_attr, port_num); 3410 3411 rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid)); 3412 rdma_ah_set_path_bits(ah_attr, path->grh_mylmc & 0x7f); 3413 rdma_ah_set_static_rate(ah_attr, 3414 path->static_rate ? path->static_rate - 5 : 0); 3415 if (path->grh_mylmc & (1 << 7)) { 3416 rdma_ah_set_grh(ah_attr, NULL, 3417 be32_to_cpu(path->tclass_flowlabel) & 0xfffff, 3418 path->mgid_index, 3419 path->hop_limit, 3420 (be32_to_cpu(path->tclass_flowlabel) 3421 >> 20) & 0xff); 3422 rdma_ah_set_dgid_raw(ah_attr, path->rgid); 3423 } 3424 } 3425 3426 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, 3427 struct ib_qp_init_attr *qp_init_attr) 3428 { 3429 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 3430 struct mlx4_ib_qp *qp = to_mqp(ibqp); 3431 struct mlx4_qp_context context; 3432 int mlx4_state; 3433 int err = 0; 3434 3435 mutex_lock(&qp->mutex); 3436 3437 if (qp->state == IB_QPS_RESET) { 3438 qp_attr->qp_state = IB_QPS_RESET; 3439 goto done; 3440 } 3441 3442 err = mlx4_qp_query(dev->dev, &qp->mqp, &context); 3443 if (err) { 3444 err = -EINVAL; 3445 goto out; 3446 } 3447 3448 mlx4_state = be32_to_cpu(context.flags) >> 28; 3449 3450 qp->state = to_ib_qp_state(mlx4_state); 3451 qp_attr->qp_state = qp->state; 3452 qp_attr->path_mtu = context.mtu_msgmax >> 5; 3453 qp_attr->path_mig_state = 3454 to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); 3455 qp_attr->qkey = be32_to_cpu(context.qkey); 3456 qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; 3457 qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; 3458 qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; 3459 qp_attr->qp_access_flags = 3460 to_ib_qp_access_flags(be32_to_cpu(context.params2)); 3461 3462 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { 3463 to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); 3464 to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); 3465 qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; 3466 qp_attr->alt_port_num = 3467 rdma_ah_get_port_num(&qp_attr->alt_ah_attr); 3468 } 3469 3470 qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; 3471 if (qp_attr->qp_state == IB_QPS_INIT) 3472 qp_attr->port_num = qp->port; 3473 else 3474 qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; 3475 3476 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ 3477 qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; 3478 3479 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); 3480 3481 qp_attr->max_dest_rd_atomic = 3482 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); 3483 qp_attr->min_rnr_timer = 3484 (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; 3485 qp_attr->timeout = context.pri_path.ackto >> 3; 3486 qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; 3487 qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; 3488 qp_attr->alt_timeout = context.alt_path.ackto >> 3; 3489 3490 done: 3491 qp_attr->cur_qp_state = qp_attr->qp_state; 3492 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; 3493 qp_attr->cap.max_recv_sge = qp->rq.max_gs; 3494 3495 if (!ibqp->uobject) { 3496 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; 3497 qp_attr->cap.max_send_sge = qp->sq.max_gs; 3498 } else { 3499 qp_attr->cap.max_send_wr = 0; 3500 qp_attr->cap.max_send_sge = 0; 3501 } 3502 3503 /* 3504 * We don't support inline sends for kernel QPs (yet), and we 3505 * don't know what userspace's value should be. 3506 */ 3507 qp_attr->cap.max_inline_data = 0; 3508 3509 qp_init_attr->cap = qp_attr->cap; 3510 3511 qp_init_attr->create_flags = 0; 3512 if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) 3513 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; 3514 3515 if (qp->flags & MLX4_IB_QP_LSO) 3516 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 3517 3518 if (qp->flags & MLX4_IB_QP_NETIF) 3519 qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; 3520 3521 qp_init_attr->sq_sig_type = 3522 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? 3523 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 3524 3525 out: 3526 mutex_unlock(&qp->mutex); 3527 return err; 3528 } 3529 3530