xref: /openbmc/linux/drivers/infiniband/hw/mlx5/wr.c (revision 887069f4)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
4  */
5 
6 #include <linux/gfp.h>
7 #include <linux/mlx5/qp.h>
8 #include <linux/mlx5/driver.h>
9 #include "wr.h"
10 
11 static const u32 mlx5_ib_opcode[] = {
12 	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
13 	[IB_WR_LSO]				= MLX5_OPCODE_LSO,
14 	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
15 	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
16 	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
17 	[IB_WR_RDMA_READ]			= MLX5_OPCODE_RDMA_READ,
18 	[IB_WR_ATOMIC_CMP_AND_SWP]		= MLX5_OPCODE_ATOMIC_CS,
19 	[IB_WR_ATOMIC_FETCH_AND_ADD]		= MLX5_OPCODE_ATOMIC_FA,
20 	[IB_WR_SEND_WITH_INV]			= MLX5_OPCODE_SEND_INVAL,
21 	[IB_WR_LOCAL_INV]			= MLX5_OPCODE_UMR,
22 	[IB_WR_REG_MR]				= MLX5_OPCODE_UMR,
23 	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= MLX5_OPCODE_ATOMIC_MASKED_CS,
24 	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= MLX5_OPCODE_ATOMIC_MASKED_FA,
25 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
26 };
27 
28 /* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
29  * next nearby edge and get new address translation for current WQE position.
30  * @sq - SQ buffer.
31  * @seg: Current WQE position (16B aligned).
32  * @wqe_sz: Total current WQE size [16B].
33  * @cur_edge: Updated current edge.
34  */
35 static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
36 					 u32 wqe_sz, void **cur_edge)
37 {
38 	u32 idx;
39 
40 	if (likely(*seg != *cur_edge))
41 		return;
42 
43 	idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
44 	*cur_edge = get_sq_edge(sq, idx);
45 
46 	*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
47 }
48 
49 /* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
50  * pointers. At the end @seg is aligned to 16B regardless the copied size.
51  * @sq - SQ buffer.
52  * @cur_edge: Updated current edge.
53  * @seg: Current WQE position (16B aligned).
54  * @wqe_sz: Total current WQE size [16B].
55  * @src: Pointer to copy from.
56  * @n: Number of bytes to copy.
57  */
58 static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
59 				   void **seg, u32 *wqe_sz, const void *src,
60 				   size_t n)
61 {
62 	while (likely(n)) {
63 		size_t leftlen = *cur_edge - *seg;
64 		size_t copysz = min_t(size_t, leftlen, n);
65 		size_t stride;
66 
67 		memcpy(*seg, src, copysz);
68 
69 		n -= copysz;
70 		src += copysz;
71 		stride = !n ? ALIGN(copysz, 16) : copysz;
72 		*seg += stride;
73 		*wqe_sz += stride >> 4;
74 		handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
75 	}
76 }
77 
78 static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
79 			    struct ib_cq *ib_cq)
80 {
81 	struct mlx5_ib_cq *cq;
82 	unsigned int cur;
83 
84 	cur = wq->head - wq->tail;
85 	if (likely(cur + nreq < wq->max_post))
86 		return 0;
87 
88 	cq = to_mcq(ib_cq);
89 	spin_lock(&cq->lock);
90 	cur = wq->head - wq->tail;
91 	spin_unlock(&cq->lock);
92 
93 	return cur + nreq >= wq->max_post;
94 }
95 
96 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
97 					  u64 remote_addr, u32 rkey)
98 {
99 	rseg->raddr    = cpu_to_be64(remote_addr);
100 	rseg->rkey     = cpu_to_be32(rkey);
101 	rseg->reserved = 0;
102 }
103 
104 static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
105 			void **seg, int *size, void **cur_edge)
106 {
107 	struct mlx5_wqe_eth_seg *eseg = *seg;
108 
109 	memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
110 
111 	if (wr->send_flags & IB_SEND_IP_CSUM)
112 		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
113 				 MLX5_ETH_WQE_L4_CSUM;
114 
115 	if (wr->opcode == IB_WR_LSO) {
116 		struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
117 		size_t left, copysz;
118 		void *pdata = ud_wr->header;
119 		size_t stride;
120 
121 		left = ud_wr->hlen;
122 		eseg->mss = cpu_to_be16(ud_wr->mss);
123 		eseg->inline_hdr.sz = cpu_to_be16(left);
124 
125 		/* memcpy_send_wqe should get a 16B align address. Hence, we
126 		 * first copy up to the current edge and then, if needed,
127 		 * continue to memcpy_send_wqe.
128 		 */
129 		copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
130 			       left);
131 		memcpy(eseg->inline_hdr.start, pdata, copysz);
132 		stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
133 			       sizeof(eseg->inline_hdr.start) + copysz, 16);
134 		*size += stride / 16;
135 		*seg += stride;
136 
137 		if (copysz < left) {
138 			handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
139 			left -= copysz;
140 			pdata += copysz;
141 			memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
142 					left);
143 		}
144 
145 		return;
146 	}
147 
148 	*seg += sizeof(struct mlx5_wqe_eth_seg);
149 	*size += sizeof(struct mlx5_wqe_eth_seg) / 16;
150 }
151 
152 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
153 			     const struct ib_send_wr *wr)
154 {
155 	memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
156 	dseg->av.dqp_dct =
157 		cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
158 	dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
159 }
160 
161 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
162 {
163 	dseg->byte_count = cpu_to_be32(sg->length);
164 	dseg->lkey       = cpu_to_be32(sg->lkey);
165 	dseg->addr       = cpu_to_be64(sg->addr);
166 }
167 
168 static u64 get_xlt_octo(u64 bytes)
169 {
170 	return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
171 	       MLX5_IB_UMR_OCTOWORD;
172 }
173 
174 static __be64 frwr_mkey_mask(bool atomic)
175 {
176 	u64 result;
177 
178 	result = MLX5_MKEY_MASK_LEN		|
179 		MLX5_MKEY_MASK_PAGE_SIZE	|
180 		MLX5_MKEY_MASK_START_ADDR	|
181 		MLX5_MKEY_MASK_EN_RINVAL	|
182 		MLX5_MKEY_MASK_KEY		|
183 		MLX5_MKEY_MASK_LR		|
184 		MLX5_MKEY_MASK_LW		|
185 		MLX5_MKEY_MASK_RR		|
186 		MLX5_MKEY_MASK_RW		|
187 		MLX5_MKEY_MASK_SMALL_FENCE	|
188 		MLX5_MKEY_MASK_FREE;
189 
190 	if (atomic)
191 		result |= MLX5_MKEY_MASK_A;
192 
193 	return cpu_to_be64(result);
194 }
195 
196 static __be64 sig_mkey_mask(void)
197 {
198 	u64 result;
199 
200 	result = MLX5_MKEY_MASK_LEN		|
201 		MLX5_MKEY_MASK_PAGE_SIZE	|
202 		MLX5_MKEY_MASK_START_ADDR	|
203 		MLX5_MKEY_MASK_EN_SIGERR	|
204 		MLX5_MKEY_MASK_EN_RINVAL	|
205 		MLX5_MKEY_MASK_KEY		|
206 		MLX5_MKEY_MASK_LR		|
207 		MLX5_MKEY_MASK_LW		|
208 		MLX5_MKEY_MASK_RR		|
209 		MLX5_MKEY_MASK_RW		|
210 		MLX5_MKEY_MASK_SMALL_FENCE	|
211 		MLX5_MKEY_MASK_FREE		|
212 		MLX5_MKEY_MASK_BSF_EN;
213 
214 	return cpu_to_be64(result);
215 }
216 
217 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
218 			    struct mlx5_ib_mr *mr, u8 flags, bool atomic)
219 {
220 	int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
221 
222 	memset(umr, 0, sizeof(*umr));
223 
224 	umr->flags = flags;
225 	umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
226 	umr->mkey_mask = frwr_mkey_mask(atomic);
227 }
228 
229 static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
230 {
231 	memset(umr, 0, sizeof(*umr));
232 	umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
233 	umr->flags = MLX5_UMR_INLINE;
234 }
235 
236 static __be64 get_umr_enable_mr_mask(void)
237 {
238 	u64 result;
239 
240 	result = MLX5_MKEY_MASK_KEY |
241 		 MLX5_MKEY_MASK_FREE;
242 
243 	return cpu_to_be64(result);
244 }
245 
246 static __be64 get_umr_disable_mr_mask(void)
247 {
248 	u64 result;
249 
250 	result = MLX5_MKEY_MASK_FREE;
251 
252 	return cpu_to_be64(result);
253 }
254 
255 static __be64 get_umr_update_translation_mask(void)
256 {
257 	u64 result;
258 
259 	result = MLX5_MKEY_MASK_LEN |
260 		 MLX5_MKEY_MASK_PAGE_SIZE |
261 		 MLX5_MKEY_MASK_START_ADDR;
262 
263 	return cpu_to_be64(result);
264 }
265 
266 static __be64 get_umr_update_access_mask(int atomic,
267 					 int relaxed_ordering_write,
268 					 int relaxed_ordering_read)
269 {
270 	u64 result;
271 
272 	result = MLX5_MKEY_MASK_LR |
273 		 MLX5_MKEY_MASK_LW |
274 		 MLX5_MKEY_MASK_RR |
275 		 MLX5_MKEY_MASK_RW;
276 
277 	if (atomic)
278 		result |= MLX5_MKEY_MASK_A;
279 
280 	if (relaxed_ordering_write)
281 		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
282 
283 	if (relaxed_ordering_read)
284 		result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
285 
286 	return cpu_to_be64(result);
287 }
288 
289 static __be64 get_umr_update_pd_mask(void)
290 {
291 	u64 result;
292 
293 	result = MLX5_MKEY_MASK_PD;
294 
295 	return cpu_to_be64(result);
296 }
297 
298 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
299 {
300 	if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
301 	    MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
302 		return -EPERM;
303 
304 	if (mask & MLX5_MKEY_MASK_A &&
305 	    MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
306 		return -EPERM;
307 
308 	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
309 	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
310 		return -EPERM;
311 
312 	if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
313 	    !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
314 		return -EPERM;
315 
316 	return 0;
317 }
318 
319 static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
320 			       struct mlx5_wqe_umr_ctrl_seg *umr,
321 			       const struct ib_send_wr *wr)
322 {
323 	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
324 
325 	memset(umr, 0, sizeof(*umr));
326 
327 	if (!umrwr->ignore_free_state) {
328 		if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
329 			 /* fail if free */
330 			umr->flags = MLX5_UMR_CHECK_FREE;
331 		else
332 			/* fail if not free */
333 			umr->flags = MLX5_UMR_CHECK_NOT_FREE;
334 	}
335 
336 	umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
337 	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
338 		u64 offset = get_xlt_octo(umrwr->offset);
339 
340 		umr->xlt_offset = cpu_to_be16(offset & 0xffff);
341 		umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
342 		umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
343 	}
344 	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
345 		umr->mkey_mask |= get_umr_update_translation_mask();
346 	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
347 		umr->mkey_mask |= get_umr_update_access_mask(
348 			!!(MLX5_CAP_GEN(dev->mdev, atomic)),
349 			!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
350 			!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
351 		umr->mkey_mask |= get_umr_update_pd_mask();
352 	}
353 	if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
354 		umr->mkey_mask |= get_umr_enable_mr_mask();
355 	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
356 		umr->mkey_mask |= get_umr_disable_mr_mask();
357 
358 	if (!wr->num_sge)
359 		umr->flags |= MLX5_UMR_INLINE;
360 
361 	return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
362 }
363 
364 static u8 get_umr_flags(int acc)
365 {
366 	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
367 	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
368 	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
369 	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
370 		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
371 }
372 
373 static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
374 			     struct mlx5_ib_mr *mr,
375 			     u32 key, int access)
376 {
377 	int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
378 
379 	memset(seg, 0, sizeof(*seg));
380 
381 	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
382 		seg->log2_page_size = ilog2(mr->ibmr.page_size);
383 	else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
384 		/* KLMs take twice the size of MTTs */
385 		ndescs *= 2;
386 
387 	seg->flags = get_umr_flags(access) | mr->access_mode;
388 	seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
389 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
390 	seg->start_addr = cpu_to_be64(mr->ibmr.iova);
391 	seg->len = cpu_to_be64(mr->ibmr.length);
392 	seg->xlt_oct_size = cpu_to_be32(ndescs);
393 }
394 
395 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
396 {
397 	memset(seg, 0, sizeof(*seg));
398 	seg->status = MLX5_MKEY_STATUS_FREE;
399 }
400 
401 static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
402 				 struct mlx5_mkey_seg *seg,
403 				 const struct ib_send_wr *wr)
404 {
405 	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
406 
407 	memset(seg, 0, sizeof(*seg));
408 	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
409 		MLX5_SET(mkc, seg, free, 1);
410 
411 	MLX5_SET(mkc, seg, a,
412 		 !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
413 	MLX5_SET(mkc, seg, rw,
414 		 !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
415 	MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
416 	MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
417 	MLX5_SET(mkc, seg, lr, 1);
418 	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
419 		MLX5_SET(mkc, seg, relaxed_ordering_write,
420 			 !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
421 	if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
422 		MLX5_SET(mkc, seg, relaxed_ordering_read,
423 			 !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
424 
425 	if (umrwr->pd)
426 		MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
427 	if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
428 	    !umrwr->length)
429 		MLX5_SET(mkc, seg, length64, 1);
430 
431 	MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
432 	MLX5_SET64(mkc, seg, len, umrwr->length);
433 	MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
434 	MLX5_SET(mkc, seg, qpn, 0xffffff);
435 	MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
436 }
437 
438 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
439 			     struct mlx5_ib_mr *mr,
440 			     struct mlx5_ib_pd *pd)
441 {
442 	int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
443 
444 	dseg->addr = cpu_to_be64(mr->desc_map);
445 	dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
446 	dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
447 }
448 
449 static __be32 send_ieth(const struct ib_send_wr *wr)
450 {
451 	switch (wr->opcode) {
452 	case IB_WR_SEND_WITH_IMM:
453 	case IB_WR_RDMA_WRITE_WITH_IMM:
454 		return wr->ex.imm_data;
455 
456 	case IB_WR_SEND_WITH_INV:
457 		return cpu_to_be32(wr->ex.invalidate_rkey);
458 
459 	default:
460 		return 0;
461 	}
462 }
463 
464 static u8 calc_sig(void *wqe, int size)
465 {
466 	u8 *p = wqe;
467 	u8 res = 0;
468 	int i;
469 
470 	for (i = 0; i < size; i++)
471 		res ^= p[i];
472 
473 	return ~res;
474 }
475 
476 static u8 wq_sig(void *wqe)
477 {
478 	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
479 }
480 
481 static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
482 			    void **wqe, int *wqe_sz, void **cur_edge)
483 {
484 	struct mlx5_wqe_inline_seg *seg;
485 	size_t offset;
486 	int inl = 0;
487 	int i;
488 
489 	seg = *wqe;
490 	*wqe += sizeof(*seg);
491 	offset = sizeof(*seg);
492 
493 	for (i = 0; i < wr->num_sge; i++) {
494 		size_t len  = wr->sg_list[i].length;
495 		void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
496 
497 		inl += len;
498 
499 		if (unlikely(inl > qp->max_inline_data))
500 			return -ENOMEM;
501 
502 		while (likely(len)) {
503 			size_t leftlen;
504 			size_t copysz;
505 
506 			handle_post_send_edge(&qp->sq, wqe,
507 					      *wqe_sz + (offset >> 4),
508 					      cur_edge);
509 
510 			leftlen = *cur_edge - *wqe;
511 			copysz = min_t(size_t, leftlen, len);
512 
513 			memcpy(*wqe, addr, copysz);
514 			len -= copysz;
515 			addr += copysz;
516 			*wqe += copysz;
517 			offset += copysz;
518 		}
519 	}
520 
521 	seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
522 
523 	*wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
524 
525 	return 0;
526 }
527 
528 static u16 prot_field_size(enum ib_signature_type type)
529 {
530 	switch (type) {
531 	case IB_SIG_TYPE_T10_DIF:
532 		return MLX5_DIF_SIZE;
533 	default:
534 		return 0;
535 	}
536 }
537 
538 static u8 bs_selector(int block_size)
539 {
540 	switch (block_size) {
541 	case 512:	    return 0x1;
542 	case 520:	    return 0x2;
543 	case 4096:	    return 0x3;
544 	case 4160:	    return 0x4;
545 	case 1073741824:    return 0x5;
546 	default:	    return 0;
547 	}
548 }
549 
550 static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
551 			      struct mlx5_bsf_inl *inl)
552 {
553 	/* Valid inline section and allow BSF refresh */
554 	inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
555 				       MLX5_BSF_REFRESH_DIF);
556 	inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
557 	inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
558 	/* repeating block */
559 	inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
560 	inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
561 			MLX5_DIF_CRC : MLX5_DIF_IPCS;
562 
563 	if (domain->sig.dif.ref_remap)
564 		inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
565 
566 	if (domain->sig.dif.app_escape) {
567 		if (domain->sig.dif.ref_escape)
568 			inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
569 		else
570 			inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
571 	}
572 
573 	inl->dif_app_bitmask_check =
574 		cpu_to_be16(domain->sig.dif.apptag_check_mask);
575 }
576 
577 static int mlx5_set_bsf(struct ib_mr *sig_mr,
578 			struct ib_sig_attrs *sig_attrs,
579 			struct mlx5_bsf *bsf, u32 data_size)
580 {
581 	struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
582 	struct mlx5_bsf_basic *basic = &bsf->basic;
583 	struct ib_sig_domain *mem = &sig_attrs->mem;
584 	struct ib_sig_domain *wire = &sig_attrs->wire;
585 
586 	memset(bsf, 0, sizeof(*bsf));
587 
588 	/* Basic + Extended + Inline */
589 	basic->bsf_size_sbs = 1 << 7;
590 	/* Input domain check byte mask */
591 	basic->check_byte_mask = sig_attrs->check_mask;
592 	basic->raw_data_size = cpu_to_be32(data_size);
593 
594 	/* Memory domain */
595 	switch (sig_attrs->mem.sig_type) {
596 	case IB_SIG_TYPE_NONE:
597 		break;
598 	case IB_SIG_TYPE_T10_DIF:
599 		basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
600 		basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
601 		mlx5_fill_inl_bsf(mem, &bsf->m_inl);
602 		break;
603 	default:
604 		return -EINVAL;
605 	}
606 
607 	/* Wire domain */
608 	switch (sig_attrs->wire.sig_type) {
609 	case IB_SIG_TYPE_NONE:
610 		break;
611 	case IB_SIG_TYPE_T10_DIF:
612 		if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
613 		    mem->sig_type == wire->sig_type) {
614 			/* Same block structure */
615 			basic->bsf_size_sbs |= 1 << 4;
616 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
617 				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
618 			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
619 				basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
620 			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
621 				basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
622 		} else
623 			basic->wire.bs_selector =
624 				bs_selector(wire->sig.dif.pi_interval);
625 
626 		basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
627 		mlx5_fill_inl_bsf(wire, &bsf->w_inl);
628 		break;
629 	default:
630 		return -EINVAL;
631 	}
632 
633 	return 0;
634 }
635 
636 
637 static int set_sig_data_segment(const struct ib_send_wr *send_wr,
638 				struct ib_mr *sig_mr,
639 				struct ib_sig_attrs *sig_attrs,
640 				struct mlx5_ib_qp *qp, void **seg, int *size,
641 				void **cur_edge)
642 {
643 	struct mlx5_bsf *bsf;
644 	u32 data_len;
645 	u32 data_key;
646 	u64 data_va;
647 	u32 prot_len = 0;
648 	u32 prot_key = 0;
649 	u64 prot_va = 0;
650 	bool prot = false;
651 	int ret;
652 	int wqe_size;
653 	struct mlx5_ib_mr *mr = to_mmr(sig_mr);
654 	struct mlx5_ib_mr *pi_mr = mr->pi_mr;
655 
656 	data_len = pi_mr->data_length;
657 	data_key = pi_mr->ibmr.lkey;
658 	data_va = pi_mr->data_iova;
659 	if (pi_mr->meta_ndescs) {
660 		prot_len = pi_mr->meta_length;
661 		prot_key = pi_mr->ibmr.lkey;
662 		prot_va = pi_mr->pi_iova;
663 		prot = true;
664 	}
665 
666 	if (!prot || (data_key == prot_key && data_va == prot_va &&
667 		      data_len == prot_len)) {
668 		/**
669 		 * Source domain doesn't contain signature information
670 		 * or data and protection are interleaved in memory.
671 		 * So need construct:
672 		 *                  ------------------
673 		 *                 |     data_klm     |
674 		 *                  ------------------
675 		 *                 |       BSF        |
676 		 *                  ------------------
677 		 **/
678 		struct mlx5_klm *data_klm = *seg;
679 
680 		data_klm->bcount = cpu_to_be32(data_len);
681 		data_klm->key = cpu_to_be32(data_key);
682 		data_klm->va = cpu_to_be64(data_va);
683 		wqe_size = ALIGN(sizeof(*data_klm), 64);
684 	} else {
685 		/**
686 		 * Source domain contains signature information
687 		 * So need construct a strided block format:
688 		 *               ---------------------------
689 		 *              |     stride_block_ctrl     |
690 		 *               ---------------------------
691 		 *              |          data_klm         |
692 		 *               ---------------------------
693 		 *              |          prot_klm         |
694 		 *               ---------------------------
695 		 *              |             BSF           |
696 		 *               ---------------------------
697 		 **/
698 		struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
699 		struct mlx5_stride_block_entry *data_sentry;
700 		struct mlx5_stride_block_entry *prot_sentry;
701 		u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
702 		int prot_size;
703 
704 		sblock_ctrl = *seg;
705 		data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
706 		prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
707 
708 		prot_size = prot_field_size(sig_attrs->mem.sig_type);
709 		if (!prot_size) {
710 			pr_err("Bad block size given: %u\n", block_size);
711 			return -EINVAL;
712 		}
713 		sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
714 							    prot_size);
715 		sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
716 		sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
717 		sblock_ctrl->num_entries = cpu_to_be16(2);
718 
719 		data_sentry->bcount = cpu_to_be16(block_size);
720 		data_sentry->key = cpu_to_be32(data_key);
721 		data_sentry->va = cpu_to_be64(data_va);
722 		data_sentry->stride = cpu_to_be16(block_size);
723 
724 		prot_sentry->bcount = cpu_to_be16(prot_size);
725 		prot_sentry->key = cpu_to_be32(prot_key);
726 		prot_sentry->va = cpu_to_be64(prot_va);
727 		prot_sentry->stride = cpu_to_be16(prot_size);
728 
729 		wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
730 				 sizeof(*prot_sentry), 64);
731 	}
732 
733 	*seg += wqe_size;
734 	*size += wqe_size / 16;
735 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
736 
737 	bsf = *seg;
738 	ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
739 	if (ret)
740 		return -EINVAL;
741 
742 	*seg += sizeof(*bsf);
743 	*size += sizeof(*bsf) / 16;
744 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
745 
746 	return 0;
747 }
748 
749 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
750 				 struct ib_mr *sig_mr, int access_flags,
751 				 u32 size, u32 length, u32 pdn)
752 {
753 	u32 sig_key = sig_mr->rkey;
754 	u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
755 
756 	memset(seg, 0, sizeof(*seg));
757 
758 	seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
759 	seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
760 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
761 				    MLX5_MKEY_BSF_EN | pdn);
762 	seg->len = cpu_to_be64(length);
763 	seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
764 	seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
765 }
766 
767 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
768 				u32 size)
769 {
770 	memset(umr, 0, sizeof(*umr));
771 
772 	umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
773 	umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
774 	umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
775 	umr->mkey_mask = sig_mkey_mask();
776 }
777 
778 static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
779 			 struct mlx5_ib_qp *qp, void **seg, int *size,
780 			 void **cur_edge)
781 {
782 	const struct ib_reg_wr *wr = reg_wr(send_wr);
783 	struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
784 	struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
785 	struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
786 	u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
787 	u32 xlt_size;
788 	int region_len, ret;
789 
790 	if (unlikely(send_wr->num_sge != 0) ||
791 	    unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
792 	    unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
793 	    unlikely(!sig_mr->sig->sig_status_checked))
794 		return -EINVAL;
795 
796 	/* length of the protected region, data + protection */
797 	region_len = pi_mr->ibmr.length;
798 
799 	/**
800 	 * KLM octoword size - if protection was provided
801 	 * then we use strided block format (3 octowords),
802 	 * else we use single KLM (1 octoword)
803 	 **/
804 	if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
805 		xlt_size = 0x30;
806 	else
807 		xlt_size = sizeof(struct mlx5_klm);
808 
809 	set_sig_umr_segment(*seg, xlt_size);
810 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
811 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
812 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
813 
814 	set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
815 			     pdn);
816 	*seg += sizeof(struct mlx5_mkey_seg);
817 	*size += sizeof(struct mlx5_mkey_seg) / 16;
818 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
819 
820 	ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
821 				   cur_edge);
822 	if (ret)
823 		return ret;
824 
825 	sig_mr->sig->sig_status_checked = false;
826 	return 0;
827 }
828 
829 static int set_psv_wr(struct ib_sig_domain *domain,
830 		      u32 psv_idx, void **seg, int *size)
831 {
832 	struct mlx5_seg_set_psv *psv_seg = *seg;
833 
834 	memset(psv_seg, 0, sizeof(*psv_seg));
835 	psv_seg->psv_num = cpu_to_be32(psv_idx);
836 	switch (domain->sig_type) {
837 	case IB_SIG_TYPE_NONE:
838 		break;
839 	case IB_SIG_TYPE_T10_DIF:
840 		psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
841 						     domain->sig.dif.app_tag);
842 		psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
843 		break;
844 	default:
845 		pr_err("Bad signature type (%d) is given.\n",
846 		       domain->sig_type);
847 		return -EINVAL;
848 	}
849 
850 	*seg += sizeof(*psv_seg);
851 	*size += sizeof(*psv_seg) / 16;
852 
853 	return 0;
854 }
855 
856 static int set_reg_wr(struct mlx5_ib_qp *qp,
857 		      const struct ib_reg_wr *wr,
858 		      void **seg, int *size, void **cur_edge,
859 		      bool check_not_free)
860 {
861 	struct mlx5_ib_mr *mr = to_mmr(wr->mr);
862 	struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
863 	struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
864 	int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
865 	bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
866 	bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
867 	u8 flags = 0;
868 
869 	/* Matches access in mlx5_set_umr_free_mkey().
870 	 * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
871 	 * kernel ULPs are not aware of it, so we don't set it here.
872 	 */
873 	if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
874 		mlx5_ib_warn(
875 			to_mdev(qp->ibqp.device),
876 			"Fast update for MR access flags is not possible\n");
877 		return -EINVAL;
878 	}
879 
880 	if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
881 		mlx5_ib_warn(to_mdev(qp->ibqp.device),
882 			     "Invalid IB_SEND_INLINE send flag\n");
883 		return -EINVAL;
884 	}
885 
886 	if (check_not_free)
887 		flags |= MLX5_UMR_CHECK_NOT_FREE;
888 	if (umr_inline)
889 		flags |= MLX5_UMR_INLINE;
890 
891 	set_reg_umr_seg(*seg, mr, flags, atomic);
892 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
893 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
894 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
895 
896 	set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
897 	*seg += sizeof(struct mlx5_mkey_seg);
898 	*size += sizeof(struct mlx5_mkey_seg) / 16;
899 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
900 
901 	if (umr_inline) {
902 		memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
903 				mr_list_size);
904 		*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
905 	} else {
906 		set_reg_data_seg(*seg, mr, pd);
907 		*seg += sizeof(struct mlx5_wqe_data_seg);
908 		*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
909 	}
910 	return 0;
911 }
912 
913 static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
914 			void **cur_edge)
915 {
916 	set_linv_umr_seg(*seg);
917 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
918 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
919 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
920 	set_linv_mkey_seg(*seg);
921 	*seg += sizeof(struct mlx5_mkey_seg);
922 	*size += sizeof(struct mlx5_mkey_seg) / 16;
923 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
924 }
925 
926 static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
927 {
928 	__be32 *p = NULL;
929 	int i, j;
930 
931 	pr_debug("dump WQE index %u:\n", idx);
932 	for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
933 		if ((i & 0xf) == 0) {
934 			p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
935 			pr_debug("WQBB at %p:\n", (void *)p);
936 			j = 0;
937 			idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
938 		}
939 		pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
940 			 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
941 			 be32_to_cpu(p[j + 3]));
942 	}
943 }
944 
945 static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
946 		       struct mlx5_wqe_ctrl_seg **ctrl,
947 		       const struct ib_send_wr *wr, unsigned int *idx,
948 		       int *size, void **cur_edge, int nreq,
949 		       bool send_signaled, bool solicited)
950 {
951 	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
952 		return -ENOMEM;
953 
954 	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
955 	*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
956 	*ctrl = *seg;
957 	*(uint32_t *)(*seg + 8) = 0;
958 	(*ctrl)->imm = send_ieth(wr);
959 	(*ctrl)->fm_ce_se = qp->sq_signal_bits |
960 		(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
961 		(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
962 
963 	*seg += sizeof(**ctrl);
964 	*size = sizeof(**ctrl) / 16;
965 	*cur_edge = qp->sq.cur_edge;
966 
967 	return 0;
968 }
969 
970 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
971 		     struct mlx5_wqe_ctrl_seg **ctrl,
972 		     const struct ib_send_wr *wr, unsigned int *idx, int *size,
973 		     void **cur_edge, int nreq)
974 {
975 	return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
976 			   wr->send_flags & IB_SEND_SIGNALED,
977 			   wr->send_flags & IB_SEND_SOLICITED);
978 }
979 
980 static void finish_wqe(struct mlx5_ib_qp *qp,
981 		       struct mlx5_wqe_ctrl_seg *ctrl,
982 		       void *seg, u8 size, void *cur_edge,
983 		       unsigned int idx, u64 wr_id, int nreq, u8 fence,
984 		       u32 mlx5_opcode)
985 {
986 	u8 opmod = 0;
987 
988 	ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
989 					     mlx5_opcode | ((u32)opmod << 24));
990 	ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
991 	ctrl->fm_ce_se |= fence;
992 	if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
993 		ctrl->signature = wq_sig(ctrl);
994 
995 	qp->sq.wrid[idx] = wr_id;
996 	qp->sq.w_list[idx].opcode = mlx5_opcode;
997 	qp->sq.wqe_head[idx] = qp->sq.head + nreq;
998 	qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
999 	qp->sq.w_list[idx].next = qp->sq.cur_post;
1000 
1001 	/* We save the edge which was possibly updated during the WQE
1002 	 * construction, into SQ's cache.
1003 	 */
1004 	seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
1005 	qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
1006 			  get_sq_edge(&qp->sq, qp->sq.cur_post &
1007 				      (qp->sq.wqe_cnt - 1)) :
1008 			  cur_edge;
1009 }
1010 
1011 static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
1012 {
1013 	set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
1014 	*seg += sizeof(struct mlx5_wqe_raddr_seg);
1015 	*size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
1016 }
1017 
1018 static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1019 			     struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1020 			     int *size, void **cur_edge, unsigned int idx)
1021 {
1022 	qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
1023 	(*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
1024 	set_linv_wr(qp, seg, size, cur_edge);
1025 }
1026 
1027 static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1028 			 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1029 			 void **cur_edge, unsigned int idx)
1030 {
1031 	qp->sq.wr_data[idx] = IB_WR_REG_MR;
1032 	(*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
1033 	return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
1034 }
1035 
1036 static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1037 		      const struct ib_send_wr *wr,
1038 		      struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1039 		      void **cur_edge, unsigned int *idx, int nreq,
1040 		      struct ib_sig_domain *domain, u32 psv_index,
1041 		      u8 next_fence)
1042 {
1043 	int err;
1044 
1045 	/*
1046 	 * SET_PSV WQEs are not signaled and solicited on error.
1047 	 */
1048 	err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
1049 			  false, true);
1050 	if (unlikely(err)) {
1051 		mlx5_ib_warn(dev, "\n");
1052 		err = -ENOMEM;
1053 		goto out;
1054 	}
1055 	err = set_psv_wr(domain, psv_index, seg, size);
1056 	if (unlikely(err)) {
1057 		mlx5_ib_warn(dev, "\n");
1058 		goto out;
1059 	}
1060 	finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
1061 		   next_fence, MLX5_OPCODE_SET_PSV);
1062 
1063 out:
1064 	return err;
1065 }
1066 
1067 static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
1068 				   struct mlx5_ib_qp *qp,
1069 				   const struct ib_send_wr *wr,
1070 				   struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1071 				   int *size, void **cur_edge,
1072 				   unsigned int *idx, int nreq, u8 fence,
1073 				   u8 next_fence)
1074 {
1075 	struct mlx5_ib_mr *mr;
1076 	struct mlx5_ib_mr *pi_mr;
1077 	struct mlx5_ib_mr pa_pi_mr;
1078 	struct ib_sig_attrs *sig_attrs;
1079 	struct ib_reg_wr reg_pi_wr;
1080 	int err;
1081 
1082 	qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
1083 
1084 	mr = to_mmr(reg_wr(wr)->mr);
1085 	pi_mr = mr->pi_mr;
1086 
1087 	if (pi_mr) {
1088 		memset(&reg_pi_wr, 0,
1089 		       sizeof(struct ib_reg_wr));
1090 
1091 		reg_pi_wr.mr = &pi_mr->ibmr;
1092 		reg_pi_wr.access = reg_wr(wr)->access;
1093 		reg_pi_wr.key = pi_mr->ibmr.rkey;
1094 
1095 		(*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
1096 		/* UMR for data + prot registration */
1097 		err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
1098 		if (unlikely(err))
1099 			goto out;
1100 
1101 		finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
1102 			   nreq, fence, MLX5_OPCODE_UMR);
1103 
1104 		err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
1105 		if (unlikely(err)) {
1106 			mlx5_ib_warn(dev, "\n");
1107 			err = -ENOMEM;
1108 			goto out;
1109 		}
1110 	} else {
1111 		memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
1112 		/* No UMR, use local_dma_lkey */
1113 		pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
1114 		pa_pi_mr.ndescs = mr->ndescs;
1115 		pa_pi_mr.data_length = mr->data_length;
1116 		pa_pi_mr.data_iova = mr->data_iova;
1117 		if (mr->meta_ndescs) {
1118 			pa_pi_mr.meta_ndescs = mr->meta_ndescs;
1119 			pa_pi_mr.meta_length = mr->meta_length;
1120 			pa_pi_mr.pi_iova = mr->pi_iova;
1121 		}
1122 
1123 		pa_pi_mr.ibmr.length = mr->ibmr.length;
1124 		mr->pi_mr = &pa_pi_mr;
1125 	}
1126 	(*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
1127 	/* UMR for sig MR */
1128 	err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
1129 	if (unlikely(err)) {
1130 		mlx5_ib_warn(dev, "\n");
1131 		goto out;
1132 	}
1133 	finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
1134 		   fence, MLX5_OPCODE_UMR);
1135 
1136 	sig_attrs = mr->ibmr.sig_attrs;
1137 	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
1138 			 &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
1139 			 next_fence);
1140 	if (unlikely(err))
1141 		goto out;
1142 
1143 	err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
1144 			 &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
1145 			 next_fence);
1146 	if (unlikely(err))
1147 		goto out;
1148 
1149 	qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1150 
1151 out:
1152 	return err;
1153 }
1154 
1155 static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1156 			 const struct ib_send_wr *wr,
1157 			 struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1158 			 void **cur_edge, unsigned int *idx, int nreq, u8 fence,
1159 			 u8 next_fence, int *num_sge)
1160 {
1161 	int err = 0;
1162 
1163 	switch (wr->opcode) {
1164 	case IB_WR_RDMA_READ:
1165 	case IB_WR_RDMA_WRITE:
1166 	case IB_WR_RDMA_WRITE_WITH_IMM:
1167 		handle_rdma_op(wr, seg, size);
1168 		break;
1169 
1170 	case IB_WR_ATOMIC_CMP_AND_SWP:
1171 	case IB_WR_ATOMIC_FETCH_AND_ADD:
1172 	case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
1173 		mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
1174 		err = -EOPNOTSUPP;
1175 		goto out;
1176 
1177 	case IB_WR_LOCAL_INV:
1178 		handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
1179 		*num_sge = 0;
1180 		break;
1181 
1182 	case IB_WR_REG_MR:
1183 		err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
1184 		if (unlikely(err))
1185 			goto out;
1186 		*num_sge = 0;
1187 		break;
1188 
1189 	case IB_WR_REG_MR_INTEGRITY:
1190 		err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
1191 					      cur_edge, idx, nreq, fence,
1192 					      next_fence);
1193 		if (unlikely(err))
1194 			goto out;
1195 		*num_sge = 0;
1196 		break;
1197 
1198 	default:
1199 		break;
1200 	}
1201 
1202 out:
1203 	return err;
1204 }
1205 
1206 static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
1207 {
1208 	switch (wr->opcode) {
1209 	case IB_WR_RDMA_WRITE:
1210 	case IB_WR_RDMA_WRITE_WITH_IMM:
1211 		handle_rdma_op(wr, seg, size);
1212 		break;
1213 	default:
1214 		break;
1215 	}
1216 }
1217 
1218 static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
1219 			      const struct ib_send_wr *wr, void **seg,
1220 			      int *size, void **cur_edge)
1221 {
1222 	set_datagram_seg(*seg, wr);
1223 	*seg += sizeof(struct mlx5_wqe_datagram_seg);
1224 	*size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1225 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1226 }
1227 
1228 static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1229 			  void **seg, int *size, void **cur_edge)
1230 {
1231 	set_datagram_seg(*seg, wr);
1232 	*seg += sizeof(struct mlx5_wqe_datagram_seg);
1233 	*size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1234 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1235 
1236 	/* handle qp that supports ud offload */
1237 	if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
1238 		struct mlx5_wqe_eth_pad *pad;
1239 
1240 		pad = *seg;
1241 		memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
1242 		*seg += sizeof(struct mlx5_wqe_eth_pad);
1243 		*size += sizeof(struct mlx5_wqe_eth_pad) / 16;
1244 		set_eth_seg(wr, qp, seg, size, cur_edge);
1245 		handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1246 	}
1247 }
1248 
1249 static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1250 			      const struct ib_send_wr *wr,
1251 			      struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1252 			      int *size, void **cur_edge, unsigned int idx)
1253 {
1254 	int err = 0;
1255 
1256 	if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
1257 		err = -EINVAL;
1258 		mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
1259 		goto out;
1260 	}
1261 
1262 	qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
1263 	(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
1264 	err = set_reg_umr_segment(dev, *seg, wr);
1265 	if (unlikely(err))
1266 		goto out;
1267 	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
1268 	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
1269 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1270 	set_reg_mkey_segment(dev, *seg, wr);
1271 	*seg += sizeof(struct mlx5_mkey_seg);
1272 	*size += sizeof(struct mlx5_mkey_seg) / 16;
1273 	handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1274 out:
1275 	return err;
1276 }
1277 
1278 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1279 		      const struct ib_send_wr **bad_wr, bool drain)
1280 {
1281 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
1282 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1283 	struct mlx5_core_dev *mdev = dev->mdev;
1284 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
1285 	struct mlx5_wqe_xrc_seg *xrc;
1286 	struct mlx5_bf *bf;
1287 	void *cur_edge;
1288 	int size;
1289 	unsigned long flags;
1290 	unsigned int idx;
1291 	int err = 0;
1292 	int num_sge;
1293 	void *seg;
1294 	int nreq;
1295 	int i;
1296 	u8 next_fence = 0;
1297 	u8 fence;
1298 
1299 	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1300 		     !drain)) {
1301 		*bad_wr = wr;
1302 		return -EIO;
1303 	}
1304 
1305 	if (qp->type == IB_QPT_GSI)
1306 		return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
1307 
1308 	bf = &qp->bf;
1309 
1310 	spin_lock_irqsave(&qp->sq.lock, flags);
1311 
1312 	for (nreq = 0; wr; nreq++, wr = wr->next) {
1313 		if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
1314 			mlx5_ib_warn(dev, "\n");
1315 			err = -EINVAL;
1316 			*bad_wr = wr;
1317 			goto out;
1318 		}
1319 
1320 		num_sge = wr->num_sge;
1321 		if (unlikely(num_sge > qp->sq.max_gs)) {
1322 			mlx5_ib_warn(dev, "\n");
1323 			err = -EINVAL;
1324 			*bad_wr = wr;
1325 			goto out;
1326 		}
1327 
1328 		err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
1329 				nreq);
1330 		if (err) {
1331 			mlx5_ib_warn(dev, "\n");
1332 			err = -ENOMEM;
1333 			*bad_wr = wr;
1334 			goto out;
1335 		}
1336 
1337 		if (wr->opcode == IB_WR_REG_MR ||
1338 		    wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1339 			fence = dev->umr_fence;
1340 			next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1341 		} else  {
1342 			if (wr->send_flags & IB_SEND_FENCE) {
1343 				if (qp->next_fence)
1344 					fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
1345 				else
1346 					fence = MLX5_FENCE_MODE_FENCE;
1347 			} else {
1348 				fence = qp->next_fence;
1349 			}
1350 		}
1351 
1352 		switch (qp->type) {
1353 		case IB_QPT_XRC_INI:
1354 			xrc = seg;
1355 			seg += sizeof(*xrc);
1356 			size += sizeof(*xrc) / 16;
1357 			fallthrough;
1358 		case IB_QPT_RC:
1359 			err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
1360 					    &cur_edge, &idx, nreq, fence,
1361 					    next_fence, &num_sge);
1362 			if (unlikely(err)) {
1363 				*bad_wr = wr;
1364 				goto out;
1365 			} else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1366 				goto skip_psv;
1367 			}
1368 			break;
1369 
1370 		case IB_QPT_UC:
1371 			handle_qpt_uc(wr, &seg, &size);
1372 			break;
1373 		case IB_QPT_SMI:
1374 			if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
1375 				mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
1376 				err = -EPERM;
1377 				*bad_wr = wr;
1378 				goto out;
1379 			}
1380 			fallthrough;
1381 		case MLX5_IB_QPT_HW_GSI:
1382 			handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
1383 			break;
1384 		case IB_QPT_UD:
1385 			handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
1386 			break;
1387 		case MLX5_IB_QPT_REG_UMR:
1388 			err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
1389 						       &size, &cur_edge, idx);
1390 			if (unlikely(err))
1391 				goto out;
1392 			break;
1393 
1394 		default:
1395 			break;
1396 		}
1397 
1398 		if (wr->send_flags & IB_SEND_INLINE && num_sge) {
1399 			err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
1400 			if (unlikely(err)) {
1401 				mlx5_ib_warn(dev, "\n");
1402 				*bad_wr = wr;
1403 				goto out;
1404 			}
1405 		} else {
1406 			for (i = 0; i < num_sge; i++) {
1407 				handle_post_send_edge(&qp->sq, &seg, size,
1408 						      &cur_edge);
1409 				if (unlikely(!wr->sg_list[i].length))
1410 					continue;
1411 
1412 				set_data_ptr_seg(
1413 					(struct mlx5_wqe_data_seg *)seg,
1414 					wr->sg_list + i);
1415 				size += sizeof(struct mlx5_wqe_data_seg) / 16;
1416 				seg += sizeof(struct mlx5_wqe_data_seg);
1417 			}
1418 		}
1419 
1420 		qp->next_fence = next_fence;
1421 		finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
1422 			   fence, mlx5_ib_opcode[wr->opcode]);
1423 skip_psv:
1424 		if (0)
1425 			dump_wqe(qp, idx, size);
1426 	}
1427 
1428 out:
1429 	if (likely(nreq)) {
1430 		qp->sq.head += nreq;
1431 
1432 		/* Make sure that descriptors are written before
1433 		 * updating doorbell record and ringing the doorbell
1434 		 */
1435 		wmb();
1436 
1437 		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
1438 
1439 		/* Make sure doorbell record is visible to the HCA before
1440 		 * we hit doorbell.
1441 		 */
1442 		wmb();
1443 
1444 		mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
1445 		/* Make sure doorbells don't leak out of SQ spinlock
1446 		 * and reach the HCA out of order.
1447 		 */
1448 		bf->offset ^= bf->buf_size;
1449 	}
1450 
1451 	spin_unlock_irqrestore(&qp->sq.lock, flags);
1452 
1453 	return err;
1454 }
1455 
1456 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
1457 {
1458 	 sig->signature = calc_sig(sig, (max_gs + 1) << 2);
1459 }
1460 
1461 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1462 		      const struct ib_recv_wr **bad_wr, bool drain)
1463 {
1464 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
1465 	struct mlx5_wqe_data_seg *scat;
1466 	struct mlx5_rwqe_sig *sig;
1467 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1468 	struct mlx5_core_dev *mdev = dev->mdev;
1469 	unsigned long flags;
1470 	int err = 0;
1471 	int nreq;
1472 	int ind;
1473 	int i;
1474 
1475 	if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1476 		     !drain)) {
1477 		*bad_wr = wr;
1478 		return -EIO;
1479 	}
1480 
1481 	if (qp->type == IB_QPT_GSI)
1482 		return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
1483 
1484 	spin_lock_irqsave(&qp->rq.lock, flags);
1485 
1486 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
1487 
1488 	for (nreq = 0; wr; nreq++, wr = wr->next) {
1489 		if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1490 			err = -ENOMEM;
1491 			*bad_wr = wr;
1492 			goto out;
1493 		}
1494 
1495 		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1496 			err = -EINVAL;
1497 			*bad_wr = wr;
1498 			goto out;
1499 		}
1500 
1501 		scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
1502 		if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
1503 			scat++;
1504 
1505 		for (i = 0; i < wr->num_sge; i++)
1506 			set_data_ptr_seg(scat + i, wr->sg_list + i);
1507 
1508 		if (i < qp->rq.max_gs) {
1509 			scat[i].byte_count = 0;
1510 			scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
1511 			scat[i].addr       = 0;
1512 		}
1513 
1514 		if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
1515 			sig = (struct mlx5_rwqe_sig *)scat;
1516 			set_sig_seg(sig, qp->rq.max_gs);
1517 		}
1518 
1519 		qp->rq.wrid[ind] = wr->wr_id;
1520 
1521 		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
1522 	}
1523 
1524 out:
1525 	if (likely(nreq)) {
1526 		qp->rq.head += nreq;
1527 
1528 		/* Make sure that descriptors are written before
1529 		 * doorbell record.
1530 		 */
1531 		wmb();
1532 
1533 		*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1534 	}
1535 
1536 	spin_unlock_irqrestore(&qp->rq.lock, flags);
1537 
1538 	return err;
1539 }
1540