xref: /openbmc/linux/drivers/infiniband/hw/mlx5/wr.c (revision 724ba675)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include <linux/gfp.h>
7 #include <linux/mlx5/qp.h>
8 #include <linux/mlx5/driver.h>
9 #include "wr.h"
10 #include "umr.h"
11 
12 static const u32 mlx5_ib_opcode[] = {
13 	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
14 	[IB_WR_LSO]				= MLX5_OPCODE_LSO,
15 	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
16 	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
17 	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
18 	[IB_WR_RDMA_READ]			= MLX5_OPCODE_RDMA_READ,
19 	[IB_WR_ATOMIC_CMP_AND_SWP]		= MLX5_OPCODE_ATOMIC_CS,
20 	[IB_WR_ATOMIC_FETCH_AND_ADD]		= MLX5_OPCODE_ATOMIC_FA,
21 	[IB_WR_SEND_WITH_INV]			= MLX5_OPCODE_SEND_INVAL,
22 	[IB_WR_LOCAL_INV]			= MLX5_OPCODE_UMR,
23 	[IB_WR_REG_MR]				= MLX5_OPCODE_UMR,
24 	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= MLX5_OPCODE_ATOMIC_MASKED_CS,
25 	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= MLX5_OPCODE_ATOMIC_MASKED_FA,
26 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
27 };
28 
29 int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
30 {
31 	struct mlx5_ib_cq *cq;
32 	unsigned int cur;
33 
34 	cur = wq->head - wq->tail;
35 	if (likely(cur + nreq < wq->max_post))
36 		return 0;
37 
38 	cq = to_mcq(ib_cq);
39 	spin_lock(&cq->lock);
40 	cur = wq->head - wq->tail;
41 	spin_unlock(&cq->lock);
42 
43 	return cur + nreq >= wq->max_post;
44 }
45 
46 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
47 					  u64 remote_addr, u32 rkey)
48 {
49 	rseg->raddr    = cpu_to_be64(remote_addr);
50 	rseg->rkey     = cpu_to_be32(rkey);
51 	rseg->reserved = 0;
52 }
53 
54 static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
55 			void **seg, int *size, void **cur_edge)
56 {
57 	struct mlx5_wqe_eth_seg *eseg = *seg;
58 
59 	memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
60 
61 	if (wr->send_flags & IB_SEND_IP_CSUM)
62 		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
63 				 MLX5_ETH_WQE_L4_CSUM;
64 
65 	if (wr->opcode == IB_WR_LSO) {
66 		struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
67 		size_t left, copysz;
68 		void *pdata = ud_wr->header;
69 		size_t stride;
70 
71 		left = ud_wr->hlen;
72 		eseg->mss = cpu_to_be16(ud_wr->mss);
73 		eseg->inline_hdr.sz = cpu_to_be16(left);
74 
75 		/* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
76 		 * we first copy up to the current edge and then, if needed,
77 		 * continue to mlx5r_memcpy_send_wqe.
78 		 */
79 		copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
80 			       left);
81 		memcpy(eseg->inline_hdr.start, pdata, copysz);
82 		stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
83 			       sizeof(eseg->inline_hdr.start) + copysz, 16);
84 		*size += stride / 16;
85 		*seg += stride;
86 
87 		if (copysz < left) {
88 			handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
89 			left -= copysz;
90 			pdata += copysz;
91 			mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
92 					      pdata, left);
93 		}
94 
95 		return;
96 	}
97 
98 	*seg += sizeof(struct mlx5_wqe_eth_seg);
99 	*size += sizeof(struct mlx5_wqe_eth_seg) / 16;
100 }
101 
102 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
103 			     const struct ib_send_wr *wr)
104 {
105 	memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
106 	dseg->av.dqp_dct =
107 		cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
108 	dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
109 }
110 
111 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
112 {
113 	dseg->byte_count = cpu_to_be32(sg->length);
114 	dseg->lkey       = cpu_to_be32(sg->lkey);
115 	dseg->addr       = cpu_to_be64(sg->addr);
116 }
117 
118 static __be64 frwr_mkey_mask(bool atomic)
119 {
120 	u64 result;
121 
122 	result = MLX5_MKEY_MASK_LEN		|
123 		MLX5_MKEY_MASK_PAGE_SIZE	|
124 		MLX5_MKEY_MASK_START_ADDR	|
125 		MLX5_MKEY_MASK_EN_RINVAL	|
126 		MLX5_MKEY_MASK_KEY		|
127 		MLX5_MKEY_MASK_LR		|
128 		MLX5_MKEY_MASK_LW		|
129 		MLX5_MKEY_MASK_RR		|
130 		MLX5_MKEY_MASK_RW		|
131 		MLX5_MKEY_MASK_SMALL_FENCE	|
132 		MLX5_MKEY_MASK_FREE;
133 
134 	if (atomic)
135 		result |= MLX5_MKEY_MASK_A;
136 
137 	return cpu_to_be64(result);
138 }
139 
140 static __be64 sig_mkey_mask(void)
141 {
142 	u64 result;
143 
144 	result = MLX5_MKEY_MASK_LEN		|
145 		MLX5_MKEY_MASK_PAGE_SIZE	|
146 		MLX5_MKEY_MASK_START_ADDR	|
147 		MLX5_MKEY_MASK_EN_SIGERR	|
148 		MLX5_MKEY_MASK_EN_RINVAL	|
149 		MLX5_MKEY_MASK_KEY		|
150 		MLX5_MKEY_MASK_LR		|
151 		MLX5_MKEY_MASK_LW		|
152 		MLX5_MKEY_MASK_RR		|
153 		MLX5_MKEY_MASK_RW		|
154 		MLX5_MKEY_MASK_SMALL_FENCE	|
155 		MLX5_MKEY_MASK_FREE		|
156 		MLX5_MKEY_MASK_BSF_EN;
157 
158 	return cpu_to_be64(result);
159 }
160 
161 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
162 			    struct mlx5_ib_mr *mr, u8 flags, bool atomic)
163 {
164 	int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
165 
166 	memset(umr, 0, sizeof(*umr));
167 
168 	umr->flags = flags;
169 	umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
170 	umr->mkey_mask = frwr_mkey_mask(atomic);
171 }
172 
173 static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
174 {
175 	memset(umr, 0, sizeof(*umr));
176 	umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
177 	umr->flags = MLX5_UMR_INLINE;
178 }
179 
180 static u8 get_umr_flags(int acc)
181 {
182 	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
183 	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
184 	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
185 	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
186 		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
187 }
188 
189 static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
190 			     struct mlx5_ib_mr *mr,
191 			     u32 key, int access)
192 {
193 	int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1;
194 
195 	memset(seg, 0, sizeof(*seg));
196 
197 	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
198 		seg->log2_page_size = ilog2(mr->ibmr.page_size);
199 	else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
200 		/* KLMs take twice the size of MTTs */
201 		ndescs *= 2;
202 
203 	seg->flags = get_umr_flags(access) | mr->access_mode;
204 	seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
205 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
206 	seg->start_addr = cpu_to_be64(mr->ibmr.iova);
207 	seg->len = cpu_to_be64(mr->ibmr.length);
208 	seg->xlt_oct_size = cpu_to_be32(ndescs);
209 }
210 
211 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
212 {
213 	memset(seg, 0, sizeof(*seg));
214 	seg->status = MLX5_MKEY_STATUS_FREE;
215 }
216 
217 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
218 			     struct mlx5_ib_mr *mr,
219 			     struct mlx5_ib_pd *pd)
220 {
221 	int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs);
222 
223 	dseg->addr = cpu_to_be64(mr->desc_map);
224 	dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
225 	dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
226 }
227 
228 static __be32 send_ieth(const struct ib_send_wr *wr)
229 {
230 	switch (wr->opcode) {
231 	case IB_WR_SEND_WITH_IMM:
232 	case IB_WR_RDMA_WRITE_WITH_IMM:
233 		return wr->ex.imm_data;
234 
235 	case IB_WR_SEND_WITH_INV:
236 		return cpu_to_be32(wr->ex.invalidate_rkey);
237 
238 	default:
239 		return 0;
240 	}
241 }
242 
243 static u8 calc_sig(void *wqe, int size)
244 {
245 	u8 *p = wqe;
246 	u8 res = 0;
247 	int i;
248 
249 	for (i = 0; i < size; i++)
250 		res ^= p[i];
251 
252 	return ~res;
253 }
254 
255 static u8 wq_sig(void *wqe)
256 {
257 	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
258 }
259 
260 static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
261 			    void **wqe, int *wqe_sz, void **cur_edge)
262 {
263 	struct mlx5_wqe_inline_seg *seg;
264 	size_t offset;
265 	int inl = 0;
266 	int i;
267 
268 	seg = *wqe;
269 	*wqe += sizeof(*seg);
270 	offset = sizeof(*seg);
271 
272 	for (i = 0; i < wr->num_sge; i++) {
273 		size_t len  = wr->sg_list[i].length;
274 		void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
275 
276 		inl += len;
277 
278 		if (unlikely(inl > qp->max_inline_data))
279 			return -ENOMEM;
280 
281 		while (likely(len)) {
282 			size_t leftlen;
283 			size_t copysz;
284 
285 			handle_post_send_edge(&qp->sq, wqe,
286 					      *wqe_sz + (offset >> 4),
287 					      cur_edge);
288 
289 			leftlen = *cur_edge - *wqe;
290 			copysz = min_t(size_t, leftlen, len);
291 
292 			memcpy(*wqe, addr, copysz);
293 			len -= copysz;
294 			addr += copysz;
295 			*wqe += copysz;
296 			offset += copysz;
297 		}
298 	}
299 
300 	seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
301 
302 	*wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
303 
304 	return 0;
305 }
306 
307 static u16 prot_field_size(enum ib_signature_type type)
308 {
309 	switch (type) {
310 	case IB_SIG_TYPE_T10_DIF:
311 		return MLX5_DIF_SIZE;
312 	default:
313 		return 0;
314 	}
315 }
316 
317 static u8 bs_selector(int block_size)
318 {
319 	switch (block_size) {
320 	case 512:	    return 0x1;
321 	case 520:	    return 0x2;
322 	case 4096:	    return 0x3;
323 	case 4160:	    return 0x4;
324 	case 1073741824:    return 0x5;
325 	default:	    return 0;
326 	}
327 }
328 
329 static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
330 			      struct mlx5_bsf_inl *inl)
331 {
332 	/* Valid inline section and allow BSF refresh */
333 	inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
334 				       MLX5_BSF_REFRESH_DIF);
335 	inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
336 	inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
337 	/* repeating block */
338 	inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
339 	inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
340 			MLX5_DIF_CRC : MLX5_DIF_IPCS;
341 
342 	if (domain->sig.dif.ref_remap)
343 		inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
344 
345 	if (domain->sig.dif.app_escape) {
346 		if (domain->sig.dif.ref_escape)
347 			inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
348 		else
349 			inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
350 	}
351 
352 	inl->dif_app_bitmask_check =
353 		cpu_to_be16(domain->sig.dif.apptag_check_mask);
354 }
355 
356 static int mlx5_set_bsf(struct ib_mr *sig_mr,
357 			struct ib_sig_attrs *sig_attrs,
358 			struct mlx5_bsf *bsf, u32 data_size)
359 {
360 	struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
361 	struct mlx5_bsf_basic *basic = &bsf->basic;
362 	struct ib_sig_domain *mem = &sig_attrs->mem;
363 	struct ib_sig_domain *wire = &sig_attrs->wire;
364 
365 	memset(bsf, 0, sizeof(*bsf));
366 
367 	/* Basic + Extended + Inline */
368 	basic->bsf_size_sbs = 1 << 7;
369 	/* Input domain check byte mask */
370 	basic->check_byte_mask = sig_attrs->check_mask;
371 	basic->raw_data_size = cpu_to_be32(data_size);
372 
373 	/* Memory domain */
374 	switch (sig_attrs->mem.sig_type) {
375 	case IB_SIG_TYPE_NONE:
376 		break;
377 	case IB_SIG_TYPE_T10_DIF:
378 		basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
379 		basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
380 		mlx5_fill_inl_bsf(mem, &bsf->m_inl);
381 		break;
382 	default:
383 		return -EINVAL;
384 	}
385 
386 	/* Wire domain */
387 	switch (sig_attrs->wire.sig_type) {
388 	case IB_SIG_TYPE_NONE:
389 		break;
390 	case IB_SIG_TYPE_T10_DIF:
391 		if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
392 		    mem->sig_type == wire->sig_type) {
393 			/* Same block structure */
394 			basic->bsf_size_sbs |= 1 << 4;
395 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
396 				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
397 			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
398 				basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
399 			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
400 				basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
401 		} else
402 			basic->wire.bs_selector =
403 				bs_selector(wire->sig.dif.pi_interval);
404 
405 		basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
406 		mlx5_fill_inl_bsf(wire, &bsf->w_inl);
407 		break;
408 	default:
409 		return -EINVAL;
410 	}
411 
412 	return 0;
413 }
414 
415 
416 static int set_sig_data_segment(const struct ib_send_wr *send_wr,
417 				struct ib_mr *sig_mr,
418 				struct ib_sig_attrs *sig_attrs,
419 				struct mlx5_ib_qp *qp, void **seg, int *size,
420 				void **cur_edge)
421 {
422 	struct mlx5_bsf *bsf;
423 	u32 data_len;
424 	u32 data_key;
425 	u64 data_va;
426 	u32 prot_len = 0;
427 	u32 prot_key = 0;
428 	u64 prot_va = 0;
429 	bool prot = false;
430 	int ret;
431 	int wqe_size;
432 	struct mlx5_ib_mr *mr = to_mmr(sig_mr);
433 	struct mlx5_ib_mr *pi_mr = mr->pi_mr;
434 
435 	data_len = pi_mr->data_length;
436 	data_key = pi_mr->ibmr.lkey;
437 	data_va = pi_mr->data_iova;
438 	if (pi_mr->meta_ndescs) {
439 		prot_len = pi_mr->meta_length;
440 		prot_key = pi_mr->ibmr.lkey;
441 		prot_va = pi_mr->pi_iova;
442 		prot = true;
443 	}
444 
445 	if (!prot || (data_key == prot_key && data_va == prot_va &&
446 		      data_len == prot_len)) {
447 		/**
448 		 * Source domain doesn't contain signature information
449 		 * or data and protection are interleaved in memory.
450 		 * So need construct:
451 		 *                  ------------------
452 		 *                 |     data_klm     |
453 		 *                  ------------------
454 		 *                 |       BSF        |
455 		 *                  ------------------
456 		 **/
457 		struct mlx5_klm *data_klm = *seg;
458 
459 		data_klm->bcount = cpu_to_be32(data_len);
460 		data_klm->key = cpu_to_be32(data_key);
461 		data_klm->va = cpu_to_be64(data_va);
462 		wqe_size = ALIGN(sizeof(*data_klm), 64);
463 	} else {
464 		/**
465 		 * Source domain contains signature information
466 		 * So need construct a strided block format:
467 		 *               ---------------------------
468 		 *              |     stride_block_ctrl     |
469 		 *               ---------------------------
470 		 *              |          data_klm         |
471 		 *               ---------------------------
472 		 *              |          prot_klm         |
473 		 *               ---------------------------
474 		 *              |             BSF           |
475 		 *               ---------------------------
476 		 **/
477 		struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
478 		struct mlx5_stride_block_entry *data_sentry;
479 		struct mlx5_stride_block_entry *prot_sentry;
480 		u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
481 		int prot_size;
482 
483 		sblock_ctrl = *seg;
484 		data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
485 		prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
486 
487 		prot_size = prot_field_size(sig_attrs->mem.sig_type);
488 		if (!prot_size) {
489 			pr_err("Bad block size given: %u\n", block_size);
490 			return -EINVAL;
491 		}
492 		sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
493 							    prot_size);
494 		sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
495 		sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
496 		sblock_ctrl->num_entries = cpu_to_be16(2);
497 
498 		data_sentry->bcount = cpu_to_be16(block_size);
499 		data_sentry->key = cpu_to_be32(data_key);
500 		data_sentry->va = cpu_to_be64(data_va);
501 		data_sentry->stride = cpu_to_be16(block_size);
502 
503 		prot_sentry->bcount = cpu_to_be16(prot_size);
504 		prot_sentry->key = cpu_to_be32(prot_key);
505 		prot_sentry->va = cpu_to_be64(prot_va);
506 		prot_sentry->stride = cpu_to_be16(prot_size);
507 
508 		wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
509 				 sizeof(*prot_sentry), 64);
510 	}
511 
512 	*seg += wqe_size;
513 	*size += wqe_size / 16;
514 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
515 
516 	bsf = *seg;
517 	ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
518 	if (ret)
519 		return -EINVAL;
520 
521 	*seg += sizeof(*bsf);
522 	*size += sizeof(*bsf) / 16;
523 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
524 
525 	return 0;
526 }
527 
528 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
529 				 struct ib_mr *sig_mr, int access_flags,
530 				 u32 size, u32 length, u32 pdn)
531 {
532 	u32 sig_key = sig_mr->rkey;
533 	u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
534 
535 	memset(seg, 0, sizeof(*seg));
536 
537 	seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
538 	seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
539 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
540 				    MLX5_MKEY_BSF_EN | pdn);
541 	seg->len = cpu_to_be64(length);
542 	seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
543 	seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
544 }
545 
546 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
547 				u32 size)
548 {
549 	memset(umr, 0, sizeof(*umr));
550 
551 	umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
552 	umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
553 	umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
554 	umr->mkey_mask = sig_mkey_mask();
555 }
556 
557 static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
558 			 struct mlx5_ib_qp *qp, void **seg, int *size,
559 			 void **cur_edge)
560 {
561 	const struct ib_reg_wr *wr = reg_wr(send_wr);
562 	struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
563 	struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
564 	struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
565 	u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
566 	u32 xlt_size;
567 	int region_len, ret;
568 
569 	if (unlikely(send_wr->num_sge != 0) ||
570 	    unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
571 	    unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
572 	    unlikely(!sig_mr->sig->sig_status_checked))
573 		return -EINVAL;
574 
575 	/* length of the protected region, data + protection */
576 	region_len = pi_mr->ibmr.length;
577 
578 	/**
579 	 * KLM octoword size - if protection was provided
580 	 * then we use strided block format (3 octowords),
581 	 * else we use single KLM (1 octoword)
582 	 **/
583 	if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
584 		xlt_size = 0x30;
585 	else
586 		xlt_size = sizeof(struct mlx5_klm);
587 
588 	set_sig_umr_segment(*seg, xlt_size);
589 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
590 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
591 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
592 
593 	set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
594 			     pdn);
595 	*seg += sizeof(struct mlx5_mkey_seg);
596 	*size += sizeof(struct mlx5_mkey_seg) / 16;
597 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
598 
599 	ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
600 				   cur_edge);
601 	if (ret)
602 		return ret;
603 
604 	sig_mr->sig->sig_status_checked = false;
605 	return 0;
606 }
607 
608 static int set_psv_wr(struct ib_sig_domain *domain,
609 		      u32 psv_idx, void **seg, int *size)
610 {
611 	struct mlx5_seg_set_psv *psv_seg = *seg;
612 
613 	memset(psv_seg, 0, sizeof(*psv_seg));
614 	psv_seg->psv_num = cpu_to_be32(psv_idx);
615 	switch (domain->sig_type) {
616 	case IB_SIG_TYPE_NONE:
617 		break;
618 	case IB_SIG_TYPE_T10_DIF:
619 		psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
620 						     domain->sig.dif.app_tag);
621 		psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
622 		break;
623 	default:
624 		pr_err("Bad signature type (%d) is given.\n",
625 		       domain->sig_type);
626 		return -EINVAL;
627 	}
628 
629 	*seg += sizeof(*psv_seg);
630 	*size += sizeof(*psv_seg) / 16;
631 
632 	return 0;
633 }
634 
635 static int set_reg_wr(struct mlx5_ib_qp *qp,
636 		      const struct ib_reg_wr *wr,
637 		      void **seg, int *size, void **cur_edge,
638 		      bool check_not_free)
639 {
640 	struct mlx5_ib_mr *mr = to_mmr(wr->mr);
641 	struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
642 	struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
643 	int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
644 	bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
645 	bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
646 	u8 flags = 0;
647 
648 	/* Matches access in mlx5_set_umr_free_mkey().
649 	 * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
650 	 * kernel ULPs are not aware of it, so we don't set it here.
651 	 */
652 	if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
653 		mlx5_ib_warn(
654 			to_mdev(qp->ibqp.device),
655 			"Fast update for MR access flags is not possible\n");
656 		return -EINVAL;
657 	}
658 
659 	if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
660 		mlx5_ib_warn(to_mdev(qp->ibqp.device),
661 			     "Invalid IB_SEND_INLINE send flag\n");
662 		return -EINVAL;
663 	}
664 
665 	if (check_not_free)
666 		flags |= MLX5_UMR_CHECK_NOT_FREE;
667 	if (umr_inline)
668 		flags |= MLX5_UMR_INLINE;
669 
670 	set_reg_umr_seg(*seg, mr, flags, atomic);
671 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
672 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
673 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
674 
675 	set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
676 	*seg += sizeof(struct mlx5_mkey_seg);
677 	*size += sizeof(struct mlx5_mkey_seg) / 16;
678 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
679 
680 	if (umr_inline) {
681 		mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
682 				      mr_list_size);
683 		*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
684 	} else {
685 		set_reg_data_seg(*seg, mr, pd);
686 		*seg += sizeof(struct mlx5_wqe_data_seg);
687 		*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
688 	}
689 	return 0;
690 }
691 
692 static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
693 			void **cur_edge)
694 {
695 	set_linv_umr_seg(*seg);
696 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
697 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
698 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
699 	set_linv_mkey_seg(*seg);
700 	*seg += sizeof(struct mlx5_mkey_seg);
701 	*size += sizeof(struct mlx5_mkey_seg) / 16;
702 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
703 }
704 
705 static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
706 {
707 	__be32 *p = NULL;
708 	int i, j;
709 
710 	pr_debug("dump WQE index %u:\n", idx);
711 	for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
712 		if ((i & 0xf) == 0) {
713 			p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
714 			pr_debug("WQBB at %p:\n", (void *)p);
715 			j = 0;
716 			idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
717 		}
718 		pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
719 			 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
720 			 be32_to_cpu(p[j + 3]));
721 	}
722 }
723 
724 int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
725 		    struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
726 		    int *size, void **cur_edge, int nreq, __be32 general_id,
727 		    bool send_signaled, bool solicited)
728 {
729 	if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
730 		return -ENOMEM;
731 
732 	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
733 	*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
734 	*ctrl = *seg;
735 	*(uint32_t *)(*seg + 8) = 0;
736 	(*ctrl)->general_id = general_id;
737 	(*ctrl)->fm_ce_se = qp->sq_signal_bits |
738 			    (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
739 			    (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
740 
741 	*seg += sizeof(**ctrl);
742 	*size = sizeof(**ctrl) / 16;
743 	*cur_edge = qp->sq.cur_edge;
744 
745 	return 0;
746 }
747 
748 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
749 		     struct mlx5_wqe_ctrl_seg **ctrl,
750 		     const struct ib_send_wr *wr, unsigned int *idx, int *size,
751 		     void **cur_edge, int nreq)
752 {
753 	return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
754 			       send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
755 			       wr->send_flags & IB_SEND_SOLICITED);
756 }
757 
758 void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
759 		      void *seg, u8 size, void *cur_edge, unsigned int idx,
760 		      u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
761 {
762 	u8 opmod = 0;
763 
764 	ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
765 					     mlx5_opcode | ((u32)opmod << 24));
766 	ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
767 	ctrl->fm_ce_se |= fence;
768 	if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
769 		ctrl->signature = wq_sig(ctrl);
770 
771 	qp->sq.wrid[idx] = wr_id;
772 	qp->sq.w_list[idx].opcode = mlx5_opcode;
773 	qp->sq.wqe_head[idx] = qp->sq.head + nreq;
774 	qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
775 	qp->sq.w_list[idx].next = qp->sq.cur_post;
776 
777 	/* We save the edge which was possibly updated during the WQE
778 	 * construction, into SQ's cache.
779 	 */
780 	seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
781 	qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
782 			  get_sq_edge(&qp->sq, qp->sq.cur_post &
783 				      (qp->sq.wqe_cnt - 1)) :
784 			  cur_edge;
785 }
786 
787 static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
788 {
789 	set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
790 	*seg += sizeof(struct mlx5_wqe_raddr_seg);
791 	*size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
792 }
793 
794 static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
795 			     struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
796 			     int *size, void **cur_edge, unsigned int idx)
797 {
798 	qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
799 	(*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
800 	set_linv_wr(qp, seg, size, cur_edge);
801 }
802 
803 static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
804 			 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
805 			 void **cur_edge, unsigned int idx)
806 {
807 	qp->sq.wr_data[idx] = IB_WR_REG_MR;
808 	(*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
809 	return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
810 }
811 
812 static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
813 		      const struct ib_send_wr *wr,
814 		      struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
815 		      void **cur_edge, unsigned int *idx, int nreq,
816 		      struct ib_sig_domain *domain, u32 psv_index,
817 		      u8 next_fence)
818 {
819 	int err;
820 
821 	/*
822 	 * SET_PSV WQEs are not signaled and solicited on error.
823 	 */
824 	err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
825 			      send_ieth(wr), false, true);
826 	if (unlikely(err)) {
827 		mlx5_ib_warn(dev, "\n");
828 		err = -ENOMEM;
829 		goto out;
830 	}
831 	err = set_psv_wr(domain, psv_index, seg, size);
832 	if (unlikely(err)) {
833 		mlx5_ib_warn(dev, "\n");
834 		goto out;
835 	}
836 	mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
837 			 nreq, next_fence, MLX5_OPCODE_SET_PSV);
838 
839 out:
840 	return err;
841 }
842 
843 static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
844 				   struct mlx5_ib_qp *qp,
845 				   const struct ib_send_wr *wr,
846 				   struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
847 				   int *size, void **cur_edge,
848 				   unsigned int *idx, int nreq, u8 fence,
849 				   u8 next_fence)
850 {
851 	struct mlx5_ib_mr *mr;
852 	struct mlx5_ib_mr *pi_mr;
853 	struct mlx5_ib_mr pa_pi_mr;
854 	struct ib_sig_attrs *sig_attrs;
855 	struct ib_reg_wr reg_pi_wr;
856 	int err;
857 
858 	qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
859 
860 	mr = to_mmr(reg_wr(wr)->mr);
861 	pi_mr = mr->pi_mr;
862 
863 	if (pi_mr) {
864 		memset(&reg_pi_wr, 0,
865 		       sizeof(struct ib_reg_wr));
866 
867 		reg_pi_wr.mr = &pi_mr->ibmr;
868 		reg_pi_wr.access = reg_wr(wr)->access;
869 		reg_pi_wr.key = pi_mr->ibmr.rkey;
870 
871 		(*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
872 		/* UMR for data + prot registration */
873 		err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
874 		if (unlikely(err))
875 			goto out;
876 
877 		mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
878 				 wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
879 
880 		err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
881 		if (unlikely(err)) {
882 			mlx5_ib_warn(dev, "\n");
883 			err = -ENOMEM;
884 			goto out;
885 		}
886 	} else {
887 		memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
888 		/* No UMR, use local_dma_lkey */
889 		pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
890 		pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs;
891 		pa_pi_mr.data_length = mr->data_length;
892 		pa_pi_mr.data_iova = mr->data_iova;
893 		if (mr->meta_ndescs) {
894 			pa_pi_mr.meta_ndescs = mr->meta_ndescs;
895 			pa_pi_mr.meta_length = mr->meta_length;
896 			pa_pi_mr.pi_iova = mr->pi_iova;
897 		}
898 
899 		pa_pi_mr.ibmr.length = mr->ibmr.length;
900 		mr->pi_mr = &pa_pi_mr;
901 	}
902 	(*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
903 	/* UMR for sig MR */
904 	err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
905 	if (unlikely(err)) {
906 		mlx5_ib_warn(dev, "\n");
907 		goto out;
908 	}
909 	mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
910 			 nreq, fence, MLX5_OPCODE_UMR);
911 
912 	sig_attrs = mr->ibmr.sig_attrs;
913 	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
914 			 &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
915 			 next_fence);
916 	if (unlikely(err))
917 		goto out;
918 
919 	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
920 			 &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
921 			 next_fence);
922 	if (unlikely(err))
923 		goto out;
924 
925 	qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
926 
927 out:
928 	return err;
929 }
930 
931 static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
932 			 const struct ib_send_wr *wr,
933 			 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
934 			 void **cur_edge, unsigned int *idx, int nreq, u8 fence,
935 			 u8 next_fence, int *num_sge)
936 {
937 	int err = 0;
938 
939 	switch (wr->opcode) {
940 	case IB_WR_RDMA_READ:
941 	case IB_WR_RDMA_WRITE:
942 	case IB_WR_RDMA_WRITE_WITH_IMM:
943 		handle_rdma_op(wr, seg, size);
944 		break;
945 
946 	case IB_WR_ATOMIC_CMP_AND_SWP:
947 	case IB_WR_ATOMIC_FETCH_AND_ADD:
948 	case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
949 		mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
950 		err = -EOPNOTSUPP;
951 		goto out;
952 
953 	case IB_WR_LOCAL_INV:
954 		handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
955 		*num_sge = 0;
956 		break;
957 
958 	case IB_WR_REG_MR:
959 		err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
960 		if (unlikely(err))
961 			goto out;
962 		*num_sge = 0;
963 		break;
964 
965 	case IB_WR_REG_MR_INTEGRITY:
966 		err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
967 					      cur_edge, idx, nreq, fence,
968 					      next_fence);
969 		if (unlikely(err))
970 			goto out;
971 		*num_sge = 0;
972 		break;
973 
974 	default:
975 		break;
976 	}
977 
978 out:
979 	return err;
980 }
981 
982 static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
983 {
984 	switch (wr->opcode) {
985 	case IB_WR_RDMA_WRITE:
986 	case IB_WR_RDMA_WRITE_WITH_IMM:
987 		handle_rdma_op(wr, seg, size);
988 		break;
989 	default:
990 		break;
991 	}
992 }
993 
994 static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
995 			      const struct ib_send_wr *wr, void **seg,
996 			      int *size, void **cur_edge)
997 {
998 	set_datagram_seg(*seg, wr);
999 	*seg += sizeof(struct mlx5_wqe_datagram_seg);
1000 	*size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1001 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1002 }
1003 
1004 static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1005 			  void **seg, int *size, void **cur_edge)
1006 {
1007 	set_datagram_seg(*seg, wr);
1008 	*seg += sizeof(struct mlx5_wqe_datagram_seg);
1009 	*size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1010 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1011 
1012 	/* handle qp that supports ud offload */
1013 	if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
1014 		struct mlx5_wqe_eth_pad *pad;
1015 
1016 		pad = *seg;
1017 		memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
1018 		*seg += sizeof(struct mlx5_wqe_eth_pad);
1019 		*size += sizeof(struct mlx5_wqe_eth_pad) / 16;
1020 		set_eth_seg(wr, qp, seg, size, cur_edge);
1021 		handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1022 	}
1023 }
1024 
1025 void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
1026 		   struct mlx5_wqe_ctrl_seg *ctrl)
1027 {
1028 	struct mlx5_bf *bf = &qp->bf;
1029 
1030 	qp->sq.head += nreq;
1031 
1032 	/* Make sure that descriptors are written before
1033 	 * updating doorbell record and ringing the doorbell
1034 	 */
1035 	wmb();
1036 
1037 	qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
1038 
1039 	/* Make sure doorbell record is visible to the HCA before
1040 	 * we hit doorbell.
1041 	 */
1042 	wmb();
1043 
1044 	mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
1045 	/* Make sure doorbells don't leak out of SQ spinlock
1046 	 * and reach the HCA out of order.
1047 	 */
1048 	bf->offset ^= bf->buf_size;
1049 }
1050 
1051 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1052 		      const struct ib_send_wr **bad_wr, bool drain)
1053 {
1054 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
1055 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1056 	struct mlx5_core_dev *mdev = dev->mdev;
1057 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
1058 	struct mlx5_wqe_xrc_seg *xrc;
1059 	void *cur_edge;
1060 	int size;
1061 	unsigned long flags;
1062 	unsigned int idx;
1063 	int err = 0;
1064 	int num_sge;
1065 	void *seg;
1066 	int nreq;
1067 	int i;
1068 	u8 next_fence = 0;
1069 	u8 fence;
1070 
1071 	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1072 		     !drain)) {
1073 		*bad_wr = wr;
1074 		return -EIO;
1075 	}
1076 
1077 	if (qp->type == IB_QPT_GSI)
1078 		return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
1079 
1080 	spin_lock_irqsave(&qp->sq.lock, flags);
1081 
1082 	for (nreq = 0; wr; nreq++, wr = wr->next) {
1083 		if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
1084 			mlx5_ib_warn(dev, "\n");
1085 			err = -EINVAL;
1086 			*bad_wr = wr;
1087 			goto out;
1088 		}
1089 
1090 		num_sge = wr->num_sge;
1091 		if (unlikely(num_sge > qp->sq.max_gs)) {
1092 			mlx5_ib_warn(dev, "\n");
1093 			err = -EINVAL;
1094 			*bad_wr = wr;
1095 			goto out;
1096 		}
1097 
1098 		err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
1099 				nreq);
1100 		if (err) {
1101 			mlx5_ib_warn(dev, "\n");
1102 			err = -ENOMEM;
1103 			*bad_wr = wr;
1104 			goto out;
1105 		}
1106 
1107 		if (wr->opcode == IB_WR_REG_MR ||
1108 		    wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1109 			fence = dev->umr_fence;
1110 			next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1111 		} else  {
1112 			if (wr->send_flags & IB_SEND_FENCE) {
1113 				if (qp->next_fence)
1114 					fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
1115 				else
1116 					fence = MLX5_FENCE_MODE_FENCE;
1117 			} else {
1118 				fence = qp->next_fence;
1119 			}
1120 		}
1121 
1122 		switch (qp->type) {
1123 		case IB_QPT_XRC_INI:
1124 			xrc = seg;
1125 			seg += sizeof(*xrc);
1126 			size += sizeof(*xrc) / 16;
1127 			fallthrough;
1128 		case IB_QPT_RC:
1129 			err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
1130 					    &cur_edge, &idx, nreq, fence,
1131 					    next_fence, &num_sge);
1132 			if (unlikely(err)) {
1133 				*bad_wr = wr;
1134 				goto out;
1135 			} else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1136 				goto skip_psv;
1137 			}
1138 			break;
1139 
1140 		case IB_QPT_UC:
1141 			handle_qpt_uc(wr, &seg, &size);
1142 			break;
1143 		case IB_QPT_SMI:
1144 			if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
1145 				mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
1146 				err = -EPERM;
1147 				*bad_wr = wr;
1148 				goto out;
1149 			}
1150 			fallthrough;
1151 		case MLX5_IB_QPT_HW_GSI:
1152 			handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
1153 			break;
1154 		case IB_QPT_UD:
1155 			handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
1156 			break;
1157 
1158 		default:
1159 			break;
1160 		}
1161 
1162 		if (wr->send_flags & IB_SEND_INLINE && num_sge) {
1163 			err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
1164 			if (unlikely(err)) {
1165 				mlx5_ib_warn(dev, "\n");
1166 				*bad_wr = wr;
1167 				goto out;
1168 			}
1169 		} else {
1170 			for (i = 0; i < num_sge; i++) {
1171 				handle_post_send_edge(&qp->sq, &seg, size,
1172 						      &cur_edge);
1173 				if (unlikely(!wr->sg_list[i].length))
1174 					continue;
1175 
1176 				set_data_ptr_seg(
1177 					(struct mlx5_wqe_data_seg *)seg,
1178 					wr->sg_list + i);
1179 				size += sizeof(struct mlx5_wqe_data_seg) / 16;
1180 				seg += sizeof(struct mlx5_wqe_data_seg);
1181 			}
1182 		}
1183 
1184 		qp->next_fence = next_fence;
1185 		mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
1186 				 nreq, fence, mlx5_ib_opcode[wr->opcode]);
1187 skip_psv:
1188 		if (0)
1189 			dump_wqe(qp, idx, size);
1190 	}
1191 
1192 out:
1193 	if (likely(nreq))
1194 		mlx5r_ring_db(qp, nreq, ctrl);
1195 
1196 	spin_unlock_irqrestore(&qp->sq.lock, flags);
1197 
1198 	return err;
1199 }
1200 
1201 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
1202 {
1203 	 sig->signature = calc_sig(sig, (max_gs + 1) << 2);
1204 }
1205 
1206 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1207 		      const struct ib_recv_wr **bad_wr, bool drain)
1208 {
1209 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
1210 	struct mlx5_wqe_data_seg *scat;
1211 	struct mlx5_rwqe_sig *sig;
1212 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1213 	struct mlx5_core_dev *mdev = dev->mdev;
1214 	unsigned long flags;
1215 	int err = 0;
1216 	int nreq;
1217 	int ind;
1218 	int i;
1219 
1220 	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1221 		     !drain)) {
1222 		*bad_wr = wr;
1223 		return -EIO;
1224 	}
1225 
1226 	if (qp->type == IB_QPT_GSI)
1227 		return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
1228 
1229 	spin_lock_irqsave(&qp->rq.lock, flags);
1230 
1231 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
1232 
1233 	for (nreq = 0; wr; nreq++, wr = wr->next) {
1234 		if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1235 			err = -ENOMEM;
1236 			*bad_wr = wr;
1237 			goto out;
1238 		}
1239 
1240 		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1241 			err = -EINVAL;
1242 			*bad_wr = wr;
1243 			goto out;
1244 		}
1245 
1246 		scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
1247 		if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
1248 			scat++;
1249 
1250 		for (i = 0; i < wr->num_sge; i++)
1251 			set_data_ptr_seg(scat + i, wr->sg_list + i);
1252 
1253 		if (i < qp->rq.max_gs) {
1254 			scat[i].byte_count = 0;
1255 			scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
1256 			scat[i].addr       = 0;
1257 		}
1258 
1259 		if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
1260 			sig = (struct mlx5_rwqe_sig *)scat;
1261 			set_sig_seg(sig, qp->rq.max_gs);
1262 		}
1263 
1264 		qp->rq.wrid[ind] = wr->wr_id;
1265 
1266 		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
1267 	}
1268 
1269 out:
1270 	if (likely(nreq)) {
1271 		qp->rq.head += nreq;
1272 
1273 		/* Make sure that descriptors are written before
1274 		 * doorbell record.
1275 		 */
1276 		wmb();
1277 
1278 		*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1279 	}
1280 
1281 	spin_unlock_irqrestore(&qp->rq.lock, flags);
1282 
1283 	return err;
1284 }
1285