xref: /openbmc/linux/drivers/infiniband/hw/mlx5/cq.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1  /*
2   * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3   *
4   * This software is available to you under a choice of one of two
5   * licenses.  You may choose to be licensed under the terms of the GNU
6   * General Public License (GPL) Version 2, available from the file
7   * COPYING in the main directory of this source tree, or the
8   * OpenIB.org BSD license below:
9   *
10   *     Redistribution and use in source and binary forms, with or
11   *     without modification, are permitted provided that the following
12   *     conditions are met:
13   *
14   *      - Redistributions of source code must retain the above
15   *        copyright notice, this list of conditions and the following
16   *        disclaimer.
17   *
18   *      - Redistributions in binary form must reproduce the above
19   *        copyright notice, this list of conditions and the following
20   *        disclaimer in the documentation and/or other materials
21   *        provided with the distribution.
22   *
23   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30   * SOFTWARE.
31   */
32  
33  #include <linux/kref.h>
34  #include <rdma/ib_umem.h>
35  #include <rdma/ib_user_verbs.h>
36  #include <rdma/ib_cache.h>
37  #include "mlx5_ib.h"
38  #include "srq.h"
39  #include "qp.h"
40  
mlx5_ib_cq_comp(struct mlx5_core_cq * cq,struct mlx5_eqe * eqe)41  static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
42  {
43  	struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
44  
45  	ibcq->comp_handler(ibcq, ibcq->cq_context);
46  }
47  
mlx5_ib_cq_event(struct mlx5_core_cq * mcq,enum mlx5_event type)48  static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
49  {
50  	struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
51  	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
52  	struct ib_cq *ibcq = &cq->ibcq;
53  	struct ib_event event;
54  
55  	if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
56  		mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
57  			     type, mcq->cqn);
58  		return;
59  	}
60  
61  	if (ibcq->event_handler) {
62  		event.device     = &dev->ib_dev;
63  		event.event      = IB_EVENT_CQ_ERR;
64  		event.element.cq = ibcq;
65  		ibcq->event_handler(&event, ibcq->cq_context);
66  	}
67  }
68  
get_cqe(struct mlx5_ib_cq * cq,int n)69  static void *get_cqe(struct mlx5_ib_cq *cq, int n)
70  {
71  	return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
72  }
73  
sw_ownership_bit(int n,int nent)74  static u8 sw_ownership_bit(int n, int nent)
75  {
76  	return (n & nent) ? 1 : 0;
77  }
78  
get_sw_cqe(struct mlx5_ib_cq * cq,int n)79  static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
80  {
81  	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
82  	struct mlx5_cqe64 *cqe64;
83  
84  	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
85  
86  	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
87  	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
88  		return cqe;
89  	} else {
90  		return NULL;
91  	}
92  }
93  
next_cqe_sw(struct mlx5_ib_cq * cq)94  static void *next_cqe_sw(struct mlx5_ib_cq *cq)
95  {
96  	return get_sw_cqe(cq, cq->mcq.cons_index);
97  }
98  
get_umr_comp(struct mlx5_ib_wq * wq,int idx)99  static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
100  {
101  	switch (wq->wr_data[idx]) {
102  	case MLX5_IB_WR_UMR:
103  		return 0;
104  
105  	case IB_WR_LOCAL_INV:
106  		return IB_WC_LOCAL_INV;
107  
108  	case IB_WR_REG_MR:
109  		return IB_WC_REG_MR;
110  
111  	default:
112  		pr_warn("unknown completion status\n");
113  		return 0;
114  	}
115  }
116  
handle_good_req(struct ib_wc * wc,struct mlx5_cqe64 * cqe,struct mlx5_ib_wq * wq,int idx)117  static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
118  			    struct mlx5_ib_wq *wq, int idx)
119  {
120  	wc->wc_flags = 0;
121  	switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
122  	case MLX5_OPCODE_RDMA_WRITE_IMM:
123  		wc->wc_flags |= IB_WC_WITH_IMM;
124  		fallthrough;
125  	case MLX5_OPCODE_RDMA_WRITE:
126  		wc->opcode    = IB_WC_RDMA_WRITE;
127  		break;
128  	case MLX5_OPCODE_SEND_IMM:
129  		wc->wc_flags |= IB_WC_WITH_IMM;
130  		fallthrough;
131  	case MLX5_OPCODE_SEND:
132  	case MLX5_OPCODE_SEND_INVAL:
133  		wc->opcode    = IB_WC_SEND;
134  		break;
135  	case MLX5_OPCODE_RDMA_READ:
136  		wc->opcode    = IB_WC_RDMA_READ;
137  		wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
138  		break;
139  	case MLX5_OPCODE_ATOMIC_CS:
140  		wc->opcode    = IB_WC_COMP_SWAP;
141  		wc->byte_len  = 8;
142  		break;
143  	case MLX5_OPCODE_ATOMIC_FA:
144  		wc->opcode    = IB_WC_FETCH_ADD;
145  		wc->byte_len  = 8;
146  		break;
147  	case MLX5_OPCODE_ATOMIC_MASKED_CS:
148  		wc->opcode    = IB_WC_MASKED_COMP_SWAP;
149  		wc->byte_len  = 8;
150  		break;
151  	case MLX5_OPCODE_ATOMIC_MASKED_FA:
152  		wc->opcode    = IB_WC_MASKED_FETCH_ADD;
153  		wc->byte_len  = 8;
154  		break;
155  	case MLX5_OPCODE_UMR:
156  		wc->opcode = get_umr_comp(wq, idx);
157  		break;
158  	}
159  }
160  
161  enum {
162  	MLX5_GRH_IN_BUFFER = 1,
163  	MLX5_GRH_IN_CQE	   = 2,
164  };
165  
handle_responder(struct ib_wc * wc,struct mlx5_cqe64 * cqe,struct mlx5_ib_qp * qp)166  static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
167  			     struct mlx5_ib_qp *qp)
168  {
169  	enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
170  	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
171  	struct mlx5_ib_srq *srq = NULL;
172  	struct mlx5_ib_wq *wq;
173  	u16 wqe_ctr;
174  	u8  roce_packet_type;
175  	bool vlan_present;
176  	u8 g;
177  
178  	if (qp->ibqp.srq || qp->ibqp.xrcd) {
179  		struct mlx5_core_srq *msrq = NULL;
180  
181  		if (qp->ibqp.xrcd) {
182  			msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
183  			if (msrq)
184  				srq = to_mibsrq(msrq);
185  		} else {
186  			srq = to_msrq(qp->ibqp.srq);
187  		}
188  		if (srq) {
189  			wqe_ctr = be16_to_cpu(cqe->wqe_counter);
190  			wc->wr_id = srq->wrid[wqe_ctr];
191  			mlx5_ib_free_srq_wqe(srq, wqe_ctr);
192  			if (msrq)
193  				mlx5_core_res_put(&msrq->common);
194  		}
195  	} else {
196  		wq	  = &qp->rq;
197  		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
198  		++wq->tail;
199  	}
200  	wc->byte_len = be32_to_cpu(cqe->byte_cnt);
201  
202  	switch (get_cqe_opcode(cqe)) {
203  	case MLX5_CQE_RESP_WR_IMM:
204  		wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
205  		wc->wc_flags	= IB_WC_WITH_IMM;
206  		wc->ex.imm_data = cqe->immediate;
207  		break;
208  	case MLX5_CQE_RESP_SEND:
209  		wc->opcode   = IB_WC_RECV;
210  		wc->wc_flags = IB_WC_IP_CSUM_OK;
211  		if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
212  			       (cqe->hds_ip_ext & CQE_L4_OK))))
213  			wc->wc_flags = 0;
214  		break;
215  	case MLX5_CQE_RESP_SEND_IMM:
216  		wc->opcode	= IB_WC_RECV;
217  		wc->wc_flags	= IB_WC_WITH_IMM;
218  		wc->ex.imm_data = cqe->immediate;
219  		break;
220  	case MLX5_CQE_RESP_SEND_INV:
221  		wc->opcode	= IB_WC_RECV;
222  		wc->wc_flags	= IB_WC_WITH_INVALIDATE;
223  		wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey);
224  		break;
225  	}
226  	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
227  	wc->dlid_path_bits = cqe->ml_path;
228  	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
229  	wc->wc_flags |= g ? IB_WC_GRH : 0;
230  	if (is_qp1(qp->type)) {
231  		u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff;
232  
233  		ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
234  				    &wc->pkey_index);
235  	} else {
236  		wc->pkey_index = 0;
237  	}
238  
239  	if (ll != IB_LINK_LAYER_ETHERNET) {
240  		wc->slid = be16_to_cpu(cqe->slid);
241  		wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
242  		return;
243  	}
244  
245  	wc->slid = 0;
246  	vlan_present = cqe->l4_l3_hdr_type & 0x1;
247  	roce_packet_type   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
248  	if (vlan_present) {
249  		wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
250  		wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
251  		wc->wc_flags |= IB_WC_WITH_VLAN;
252  	} else {
253  		wc->sl = 0;
254  	}
255  
256  	switch (roce_packet_type) {
257  	case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
258  		wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
259  		break;
260  	case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
261  		wc->network_hdr_type = RDMA_NETWORK_IPV6;
262  		break;
263  	case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
264  		wc->network_hdr_type = RDMA_NETWORK_IPV4;
265  		break;
266  	}
267  	wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
268  }
269  
dump_cqe(struct mlx5_ib_dev * dev,struct mlx5_err_cqe * cqe,struct ib_wc * wc,const char * level)270  static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
271  		     struct ib_wc *wc, const char *level)
272  {
273  	mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
274  		    ib_wc_status_msg(wc->status));
275  	print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
276  		       cqe, sizeof(*cqe), false);
277  }
278  
mlx5_handle_error_cqe(struct mlx5_ib_dev * dev,struct mlx5_err_cqe * cqe,struct ib_wc * wc)279  static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
280  				  struct mlx5_err_cqe *cqe,
281  				  struct ib_wc *wc)
282  {
283  	const char *dump = KERN_WARNING;
284  
285  	switch (cqe->syndrome) {
286  	case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
287  		wc->status = IB_WC_LOC_LEN_ERR;
288  		break;
289  	case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
290  		wc->status = IB_WC_LOC_QP_OP_ERR;
291  		break;
292  	case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
293  		dump = KERN_DEBUG;
294  		wc->status = IB_WC_LOC_PROT_ERR;
295  		break;
296  	case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
297  		dump = NULL;
298  		wc->status = IB_WC_WR_FLUSH_ERR;
299  		break;
300  	case MLX5_CQE_SYNDROME_MW_BIND_ERR:
301  		wc->status = IB_WC_MW_BIND_ERR;
302  		break;
303  	case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
304  		wc->status = IB_WC_BAD_RESP_ERR;
305  		break;
306  	case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
307  		wc->status = IB_WC_LOC_ACCESS_ERR;
308  		break;
309  	case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
310  		wc->status = IB_WC_REM_INV_REQ_ERR;
311  		break;
312  	case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
313  		dump = KERN_DEBUG;
314  		wc->status = IB_WC_REM_ACCESS_ERR;
315  		break;
316  	case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
317  		dump = KERN_DEBUG;
318  		wc->status = IB_WC_REM_OP_ERR;
319  		break;
320  	case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
321  		dump = NULL;
322  		wc->status = IB_WC_RETRY_EXC_ERR;
323  		break;
324  	case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
325  		dump = NULL;
326  		wc->status = IB_WC_RNR_RETRY_EXC_ERR;
327  		break;
328  	case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
329  		wc->status = IB_WC_REM_ABORT_ERR;
330  		break;
331  	default:
332  		wc->status = IB_WC_GENERAL_ERR;
333  		break;
334  	}
335  
336  	wc->vendor_err = cqe->vendor_err_synd;
337  	if (dump)
338  		dump_cqe(dev, cqe, wc, dump);
339  }
340  
handle_atomics(struct mlx5_ib_qp * qp,struct mlx5_cqe64 * cqe64,u16 tail,u16 head)341  static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
342  			   u16 tail, u16 head)
343  {
344  	u16 idx;
345  
346  	do {
347  		idx = tail & (qp->sq.wqe_cnt - 1);
348  		if (idx == head)
349  			break;
350  
351  		tail = qp->sq.w_list[idx].next;
352  	} while (1);
353  	tail = qp->sq.w_list[idx].next;
354  	qp->sq.last_poll = tail;
355  }
356  
free_cq_buf(struct mlx5_ib_dev * dev,struct mlx5_ib_cq_buf * buf)357  static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
358  {
359  	mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
360  }
361  
get_sig_err_item(struct mlx5_sig_err_cqe * cqe,struct ib_sig_err * item)362  static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
363  			     struct ib_sig_err *item)
364  {
365  	u16 syndrome = be16_to_cpu(cqe->syndrome);
366  
367  #define GUARD_ERR   (1 << 13)
368  #define APPTAG_ERR  (1 << 12)
369  #define REFTAG_ERR  (1 << 11)
370  
371  	if (syndrome & GUARD_ERR) {
372  		item->err_type = IB_SIG_BAD_GUARD;
373  		item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
374  		item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
375  	} else
376  	if (syndrome & REFTAG_ERR) {
377  		item->err_type = IB_SIG_BAD_REFTAG;
378  		item->expected = be32_to_cpu(cqe->expected_reftag);
379  		item->actual = be32_to_cpu(cqe->actual_reftag);
380  	} else
381  	if (syndrome & APPTAG_ERR) {
382  		item->err_type = IB_SIG_BAD_APPTAG;
383  		item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
384  		item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
385  	} else {
386  		pr_err("Got signature completion error with bad syndrome %04x\n",
387  		       syndrome);
388  	}
389  
390  	item->sig_err_offset = be64_to_cpu(cqe->err_offset);
391  	item->key = be32_to_cpu(cqe->mkey);
392  }
393  
sw_comp(struct mlx5_ib_qp * qp,int num_entries,struct ib_wc * wc,int * npolled,bool is_send)394  static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
395  		    int *npolled, bool is_send)
396  {
397  	struct mlx5_ib_wq *wq;
398  	unsigned int cur;
399  	int np;
400  	int i;
401  
402  	wq = (is_send) ? &qp->sq : &qp->rq;
403  	cur = wq->head - wq->tail;
404  	np = *npolled;
405  
406  	if (cur == 0)
407  		return;
408  
409  	for (i = 0;  i < cur && np < num_entries; i++) {
410  		unsigned int idx;
411  
412  		idx = (is_send) ? wq->last_poll : wq->tail;
413  		idx &= (wq->wqe_cnt - 1);
414  		wc->wr_id = wq->wrid[idx];
415  		wc->status = IB_WC_WR_FLUSH_ERR;
416  		wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
417  		wq->tail++;
418  		if (is_send)
419  			wq->last_poll = wq->w_list[idx].next;
420  		np++;
421  		wc->qp = &qp->ibqp;
422  		wc++;
423  	}
424  	*npolled = np;
425  }
426  
mlx5_ib_poll_sw_comp(struct mlx5_ib_cq * cq,int num_entries,struct ib_wc * wc,int * npolled)427  static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
428  				 struct ib_wc *wc, int *npolled)
429  {
430  	struct mlx5_ib_qp *qp;
431  
432  	*npolled = 0;
433  	/* Find uncompleted WQEs belonging to that cq and return mmics ones */
434  	list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
435  		sw_comp(qp, num_entries, wc + *npolled, npolled, true);
436  		if (*npolled >= num_entries)
437  			return;
438  	}
439  
440  	list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
441  		sw_comp(qp, num_entries, wc + *npolled, npolled, false);
442  		if (*npolled >= num_entries)
443  			return;
444  	}
445  }
446  
mlx5_poll_one(struct mlx5_ib_cq * cq,struct mlx5_ib_qp ** cur_qp,struct ib_wc * wc)447  static int mlx5_poll_one(struct mlx5_ib_cq *cq,
448  			 struct mlx5_ib_qp **cur_qp,
449  			 struct ib_wc *wc)
450  {
451  	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
452  	struct mlx5_err_cqe *err_cqe;
453  	struct mlx5_cqe64 *cqe64;
454  	struct mlx5_core_qp *mqp;
455  	struct mlx5_ib_wq *wq;
456  	uint8_t opcode;
457  	uint32_t qpn;
458  	u16 wqe_ctr;
459  	void *cqe;
460  	int idx;
461  
462  repoll:
463  	cqe = next_cqe_sw(cq);
464  	if (!cqe)
465  		return -EAGAIN;
466  
467  	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
468  
469  	++cq->mcq.cons_index;
470  
471  	/* Make sure we read CQ entry contents after we've checked the
472  	 * ownership bit.
473  	 */
474  	rmb();
475  
476  	opcode = get_cqe_opcode(cqe64);
477  	if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
478  		if (likely(cq->resize_buf)) {
479  			free_cq_buf(dev, &cq->buf);
480  			cq->buf = *cq->resize_buf;
481  			kfree(cq->resize_buf);
482  			cq->resize_buf = NULL;
483  			goto repoll;
484  		} else {
485  			mlx5_ib_warn(dev, "unexpected resize cqe\n");
486  		}
487  	}
488  
489  	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
490  	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
491  		/* We do not have to take the QP table lock here,
492  		 * because CQs will be locked while QPs are removed
493  		 * from the table.
494  		 */
495  		mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
496  		*cur_qp = to_mibqp(mqp);
497  	}
498  
499  	wc->qp  = &(*cur_qp)->ibqp;
500  	switch (opcode) {
501  	case MLX5_CQE_REQ:
502  		wq = &(*cur_qp)->sq;
503  		wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
504  		idx = wqe_ctr & (wq->wqe_cnt - 1);
505  		handle_good_req(wc, cqe64, wq, idx);
506  		handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
507  		wc->wr_id = wq->wrid[idx];
508  		wq->tail = wq->wqe_head[idx] + 1;
509  		wc->status = IB_WC_SUCCESS;
510  		break;
511  	case MLX5_CQE_RESP_WR_IMM:
512  	case MLX5_CQE_RESP_SEND:
513  	case MLX5_CQE_RESP_SEND_IMM:
514  	case MLX5_CQE_RESP_SEND_INV:
515  		handle_responder(wc, cqe64, *cur_qp);
516  		wc->status = IB_WC_SUCCESS;
517  		break;
518  	case MLX5_CQE_RESIZE_CQ:
519  		break;
520  	case MLX5_CQE_REQ_ERR:
521  	case MLX5_CQE_RESP_ERR:
522  		err_cqe = (struct mlx5_err_cqe *)cqe64;
523  		mlx5_handle_error_cqe(dev, err_cqe, wc);
524  		mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
525  			    opcode == MLX5_CQE_REQ_ERR ?
526  			    "Requestor" : "Responder", cq->mcq.cqn);
527  		mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
528  			    err_cqe->syndrome, err_cqe->vendor_err_synd);
529  		if (wc->status != IB_WC_WR_FLUSH_ERR &&
530  		    (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
531  			dev->umrc.state = MLX5_UMR_STATE_RECOVER;
532  
533  		if (opcode == MLX5_CQE_REQ_ERR) {
534  			wq = &(*cur_qp)->sq;
535  			wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
536  			idx = wqe_ctr & (wq->wqe_cnt - 1);
537  			wc->wr_id = wq->wrid[idx];
538  			wq->tail = wq->wqe_head[idx] + 1;
539  		} else {
540  			struct mlx5_ib_srq *srq;
541  
542  			if ((*cur_qp)->ibqp.srq) {
543  				srq = to_msrq((*cur_qp)->ibqp.srq);
544  				wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
545  				wc->wr_id = srq->wrid[wqe_ctr];
546  				mlx5_ib_free_srq_wqe(srq, wqe_ctr);
547  			} else {
548  				wq = &(*cur_qp)->rq;
549  				wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
550  				++wq->tail;
551  			}
552  		}
553  		break;
554  	case MLX5_CQE_SIG_ERR: {
555  		struct mlx5_sig_err_cqe *sig_err_cqe =
556  			(struct mlx5_sig_err_cqe *)cqe64;
557  		struct mlx5_core_sig_ctx *sig;
558  
559  		xa_lock(&dev->sig_mrs);
560  		sig = xa_load(&dev->sig_mrs,
561  				mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
562  		get_sig_err_item(sig_err_cqe, &sig->err_item);
563  		sig->sig_err_exists = true;
564  		sig->sigerr_count++;
565  
566  		mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
567  			     cq->mcq.cqn, sig->err_item.key,
568  			     sig->err_item.err_type,
569  			     sig->err_item.sig_err_offset,
570  			     sig->err_item.expected,
571  			     sig->err_item.actual);
572  
573  		xa_unlock(&dev->sig_mrs);
574  		goto repoll;
575  	}
576  	}
577  
578  	return 0;
579  }
580  
poll_soft_wc(struct mlx5_ib_cq * cq,int num_entries,struct ib_wc * wc,bool is_fatal_err)581  static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
582  			struct ib_wc *wc, bool is_fatal_err)
583  {
584  	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
585  	struct mlx5_ib_wc *soft_wc, *next;
586  	int npolled = 0;
587  
588  	list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
589  		if (npolled >= num_entries)
590  			break;
591  
592  		mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
593  			    cq->mcq.cqn);
594  
595  		if (unlikely(is_fatal_err)) {
596  			soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
597  			soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
598  		}
599  		wc[npolled++] = soft_wc->wc;
600  		list_del(&soft_wc->list);
601  		kfree(soft_wc);
602  	}
603  
604  	return npolled;
605  }
606  
mlx5_ib_poll_cq(struct ib_cq * ibcq,int num_entries,struct ib_wc * wc)607  int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
608  {
609  	struct mlx5_ib_cq *cq = to_mcq(ibcq);
610  	struct mlx5_ib_qp *cur_qp = NULL;
611  	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
612  	struct mlx5_core_dev *mdev = dev->mdev;
613  	unsigned long flags;
614  	int soft_polled = 0;
615  	int npolled;
616  
617  	spin_lock_irqsave(&cq->lock, flags);
618  	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
619  		/* make sure no soft wqe's are waiting */
620  		if (unlikely(!list_empty(&cq->wc_list)))
621  			soft_polled = poll_soft_wc(cq, num_entries, wc, true);
622  
623  		mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
624  				     wc + soft_polled, &npolled);
625  		goto out;
626  	}
627  
628  	if (unlikely(!list_empty(&cq->wc_list)))
629  		soft_polled = poll_soft_wc(cq, num_entries, wc, false);
630  
631  	for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
632  		if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
633  			break;
634  	}
635  
636  	if (npolled)
637  		mlx5_cq_set_ci(&cq->mcq);
638  out:
639  	spin_unlock_irqrestore(&cq->lock, flags);
640  
641  	return soft_polled + npolled;
642  }
643  
mlx5_ib_arm_cq(struct ib_cq * ibcq,enum ib_cq_notify_flags flags)644  int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
645  {
646  	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
647  	struct mlx5_ib_cq *cq = to_mcq(ibcq);
648  	void __iomem *uar_page = mdev->priv.uar->map;
649  	unsigned long irq_flags;
650  	int ret = 0;
651  
652  	spin_lock_irqsave(&cq->lock, irq_flags);
653  	if (cq->notify_flags != IB_CQ_NEXT_COMP)
654  		cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
655  
656  	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
657  		ret = 1;
658  	spin_unlock_irqrestore(&cq->lock, irq_flags);
659  
660  	mlx5_cq_arm(&cq->mcq,
661  		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
662  		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
663  		    uar_page, to_mcq(ibcq)->mcq.cons_index);
664  
665  	return ret;
666  }
667  
alloc_cq_frag_buf(struct mlx5_ib_dev * dev,struct mlx5_ib_cq_buf * buf,int nent,int cqe_size)668  static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
669  			     struct mlx5_ib_cq_buf *buf,
670  			     int nent,
671  			     int cqe_size)
672  {
673  	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
674  	u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
675  	u8 log_wq_sz     = ilog2(cqe_size);
676  	int err;
677  
678  	err = mlx5_frag_buf_alloc_node(dev->mdev,
679  				       nent * cqe_size,
680  				       frag_buf,
681  				       dev->mdev->priv.numa_node);
682  	if (err)
683  		return err;
684  
685  	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
686  
687  	buf->cqe_size = cqe_size;
688  	buf->nent = nent;
689  
690  	return 0;
691  }
692  
693  enum {
694  	MLX5_CQE_RES_FORMAT_HASH = 0,
695  	MLX5_CQE_RES_FORMAT_CSUM = 1,
696  	MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3,
697  };
698  
mini_cqe_res_format_to_hw(struct mlx5_ib_dev * dev,u8 format)699  static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
700  {
701  	switch (format) {
702  	case MLX5_IB_CQE_RES_FORMAT_HASH:
703  		return MLX5_CQE_RES_FORMAT_HASH;
704  	case MLX5_IB_CQE_RES_FORMAT_CSUM:
705  		return MLX5_CQE_RES_FORMAT_CSUM;
706  	case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX:
707  		if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
708  			return MLX5_CQE_RES_FORMAT_CSUM_STRIDX;
709  		return -EOPNOTSUPP;
710  	default:
711  		return -EINVAL;
712  	}
713  }
714  
create_cq_user(struct mlx5_ib_dev * dev,struct ib_udata * udata,struct mlx5_ib_cq * cq,int entries,u32 ** cqb,int * cqe_size,int * index,int * inlen)715  static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
716  			  struct mlx5_ib_cq *cq, int entries, u32 **cqb,
717  			  int *cqe_size, int *index, int *inlen)
718  {
719  	struct mlx5_ib_create_cq ucmd = {};
720  	unsigned long page_size;
721  	unsigned int page_offset_quantized;
722  	size_t ucmdlen;
723  	__be64 *pas;
724  	int ncont;
725  	void *cqc;
726  	int err;
727  	struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
728  		udata, struct mlx5_ib_ucontext, ibucontext);
729  
730  	ucmdlen = min(udata->inlen, sizeof(ucmd));
731  	if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
732  		return -EINVAL;
733  
734  	if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
735  		return -EFAULT;
736  
737  	if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
738  			    MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX |
739  			    MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)))
740  		return -EINVAL;
741  
742  	if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
743  	    ucmd.reserved0 || ucmd.reserved1)
744  		return -EINVAL;
745  
746  	*cqe_size = ucmd.cqe_size;
747  
748  	cq->buf.umem =
749  		ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
750  			    entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
751  	if (IS_ERR(cq->buf.umem)) {
752  		err = PTR_ERR(cq->buf.umem);
753  		return err;
754  	}
755  
756  	page_size = mlx5_umem_find_best_cq_quantized_pgoff(
757  		cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
758  		page_offset, 64, &page_offset_quantized);
759  	if (!page_size) {
760  		err = -EINVAL;
761  		goto err_umem;
762  	}
763  
764  	err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
765  	if (err)
766  		goto err_umem;
767  
768  	ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
769  	mlx5_ib_dbg(
770  		dev,
771  		"addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
772  		ucmd.buf_addr, entries * ucmd.cqe_size,
773  		ib_umem_num_pages(cq->buf.umem), page_size, ncont);
774  
775  	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
776  		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
777  	*cqb = kvzalloc(*inlen, GFP_KERNEL);
778  	if (!*cqb) {
779  		err = -ENOMEM;
780  		goto err_db;
781  	}
782  
783  	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
784  	mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
785  
786  	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
787  	MLX5_SET(cqc, cqc, log_page_size,
788  		 order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
789  	MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
790  
791  	if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
792  		*index = ucmd.uar_page_index;
793  	} else if (context->bfregi.lib_uar_dyn) {
794  		err = -EINVAL;
795  		goto err_cqb;
796  	} else {
797  		*index = context->bfregi.sys_pages[0];
798  	}
799  
800  	if (ucmd.cqe_comp_en == 1) {
801  		int mini_cqe_format;
802  
803  		if (!((*cqe_size == 128 &&
804  		       MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
805  		      (*cqe_size == 64  &&
806  		       MLX5_CAP_GEN(dev->mdev, cqe_compression)))) {
807  			err = -EOPNOTSUPP;
808  			mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
809  				     *cqe_size);
810  			goto err_cqb;
811  		}
812  
813  		mini_cqe_format =
814  			mini_cqe_res_format_to_hw(dev,
815  						  ucmd.cqe_comp_res_format);
816  		if (mini_cqe_format < 0) {
817  			err = mini_cqe_format;
818  			mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n",
819  				    ucmd.cqe_comp_res_format, err);
820  			goto err_cqb;
821  		}
822  
823  		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
824  		MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format);
825  	}
826  
827  	if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
828  		if (*cqe_size != 128 ||
829  		    !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) {
830  			err = -EOPNOTSUPP;
831  			mlx5_ib_warn(dev,
832  				     "CQE padding is not supported for CQE size of %dB!\n",
833  				     *cqe_size);
834  			goto err_cqb;
835  		}
836  
837  		cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
838  	}
839  
840  	if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)
841  		cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS;
842  
843  	MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
844  	return 0;
845  
846  err_cqb:
847  	kvfree(*cqb);
848  
849  err_db:
850  	mlx5_ib_db_unmap_user(context, &cq->db);
851  
852  err_umem:
853  	ib_umem_release(cq->buf.umem);
854  	return err;
855  }
856  
destroy_cq_user(struct mlx5_ib_cq * cq,struct ib_udata * udata)857  static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
858  {
859  	struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
860  		udata, struct mlx5_ib_ucontext, ibucontext);
861  
862  	mlx5_ib_db_unmap_user(context, &cq->db);
863  	ib_umem_release(cq->buf.umem);
864  }
865  
init_cq_frag_buf(struct mlx5_ib_cq_buf * buf)866  static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
867  {
868  	int i;
869  	void *cqe;
870  	struct mlx5_cqe64 *cqe64;
871  
872  	for (i = 0; i < buf->nent; i++) {
873  		cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
874  		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
875  		cqe64->op_own = MLX5_CQE_INVALID << 4;
876  	}
877  }
878  
create_cq_kernel(struct mlx5_ib_dev * dev,struct mlx5_ib_cq * cq,int entries,int cqe_size,u32 ** cqb,int * index,int * inlen)879  static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
880  			    int entries, int cqe_size,
881  			    u32 **cqb, int *index, int *inlen)
882  {
883  	__be64 *pas;
884  	void *cqc;
885  	int err;
886  
887  	err = mlx5_db_alloc(dev->mdev, &cq->db);
888  	if (err)
889  		return err;
890  
891  	cq->mcq.set_ci_db  = cq->db.db;
892  	cq->mcq.arm_db     = cq->db.db + 1;
893  	cq->mcq.cqe_sz = cqe_size;
894  
895  	err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
896  	if (err)
897  		goto err_db;
898  
899  	init_cq_frag_buf(&cq->buf);
900  
901  	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
902  		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
903  		 cq->buf.frag_buf.npages;
904  	*cqb = kvzalloc(*inlen, GFP_KERNEL);
905  	if (!*cqb) {
906  		err = -ENOMEM;
907  		goto err_buf;
908  	}
909  
910  	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
911  	mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
912  
913  	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
914  	MLX5_SET(cqc, cqc, log_page_size,
915  		 cq->buf.frag_buf.page_shift -
916  		 MLX5_ADAPTER_PAGE_SHIFT);
917  
918  	*index = dev->mdev->priv.uar->index;
919  
920  	return 0;
921  
922  err_buf:
923  	free_cq_buf(dev, &cq->buf);
924  
925  err_db:
926  	mlx5_db_free(dev->mdev, &cq->db);
927  	return err;
928  }
929  
destroy_cq_kernel(struct mlx5_ib_dev * dev,struct mlx5_ib_cq * cq)930  static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
931  {
932  	free_cq_buf(dev, &cq->buf);
933  	mlx5_db_free(dev->mdev, &cq->db);
934  }
935  
notify_soft_wc_handler(struct work_struct * work)936  static void notify_soft_wc_handler(struct work_struct *work)
937  {
938  	struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
939  					     notify_work);
940  
941  	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
942  }
943  
mlx5_ib_create_cq(struct ib_cq * ibcq,const struct ib_cq_init_attr * attr,struct ib_udata * udata)944  int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
945  		      struct ib_udata *udata)
946  {
947  	struct ib_device *ibdev = ibcq->device;
948  	int entries = attr->cqe;
949  	int vector = attr->comp_vector;
950  	struct mlx5_ib_dev *dev = to_mdev(ibdev);
951  	struct mlx5_ib_cq *cq = to_mcq(ibcq);
952  	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
953  	int index;
954  	int inlen;
955  	u32 *cqb = NULL;
956  	void *cqc;
957  	int cqe_size;
958  	int eqn;
959  	int err;
960  
961  	if (entries < 0 ||
962  	    (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
963  		return -EINVAL;
964  
965  	if (check_cq_create_flags(attr->flags))
966  		return -EOPNOTSUPP;
967  
968  	entries = roundup_pow_of_two(entries + 1);
969  	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
970  		return -EINVAL;
971  
972  	cq->ibcq.cqe = entries - 1;
973  	mutex_init(&cq->resize_mutex);
974  	spin_lock_init(&cq->lock);
975  	cq->resize_buf = NULL;
976  	cq->resize_umem = NULL;
977  	cq->create_flags = attr->flags;
978  	INIT_LIST_HEAD(&cq->list_send_qp);
979  	INIT_LIST_HEAD(&cq->list_recv_qp);
980  
981  	if (udata) {
982  		err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
983  				     &index, &inlen);
984  		if (err)
985  			return err;
986  	} else {
987  		cqe_size = cache_line_size() == 128 ? 128 : 64;
988  		err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
989  				       &index, &inlen);
990  		if (err)
991  			return err;
992  
993  		INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
994  	}
995  
996  	err = mlx5_comp_eqn_get(dev->mdev, vector, &eqn);
997  	if (err)
998  		goto err_cqb;
999  
1000  	cq->cqe_size = cqe_size;
1001  
1002  	cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
1003  	MLX5_SET(cqc, cqc, cqe_sz,
1004  		 cqe_sz_to_mlx_sz(cqe_size,
1005  				  cq->private_flags &
1006  				  MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
1007  	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
1008  	MLX5_SET(cqc, cqc, uar_page, index);
1009  	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1010  	MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
1011  	if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
1012  		MLX5_SET(cqc, cqc, oi, 1);
1013  
1014  	err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
1015  	if (err)
1016  		goto err_cqb;
1017  
1018  	mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
1019  	if (udata)
1020  		cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
1021  	else
1022  		cq->mcq.comp  = mlx5_ib_cq_comp;
1023  	cq->mcq.event = mlx5_ib_cq_event;
1024  
1025  	INIT_LIST_HEAD(&cq->wc_list);
1026  
1027  	if (udata)
1028  		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
1029  			err = -EFAULT;
1030  			goto err_cmd;
1031  		}
1032  
1033  
1034  	kvfree(cqb);
1035  	return 0;
1036  
1037  err_cmd:
1038  	mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
1039  
1040  err_cqb:
1041  	kvfree(cqb);
1042  	if (udata)
1043  		destroy_cq_user(cq, udata);
1044  	else
1045  		destroy_cq_kernel(dev, cq);
1046  	return err;
1047  }
1048  
mlx5_ib_destroy_cq(struct ib_cq * cq,struct ib_udata * udata)1049  int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
1050  {
1051  	struct mlx5_ib_dev *dev = to_mdev(cq->device);
1052  	struct mlx5_ib_cq *mcq = to_mcq(cq);
1053  	int ret;
1054  
1055  	ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
1056  	if (ret)
1057  		return ret;
1058  
1059  	if (udata)
1060  		destroy_cq_user(mcq, udata);
1061  	else
1062  		destroy_cq_kernel(dev, mcq);
1063  	return 0;
1064  }
1065  
is_equal_rsn(struct mlx5_cqe64 * cqe64,u32 rsn)1066  static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
1067  {
1068  	return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
1069  }
1070  
__mlx5_ib_cq_clean(struct mlx5_ib_cq * cq,u32 rsn,struct mlx5_ib_srq * srq)1071  void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
1072  {
1073  	struct mlx5_cqe64 *cqe64, *dest64;
1074  	void *cqe, *dest;
1075  	u32 prod_index;
1076  	int nfreed = 0;
1077  	u8 owner_bit;
1078  
1079  	if (!cq)
1080  		return;
1081  
1082  	/* First we need to find the current producer index, so we
1083  	 * know where to start cleaning from.  It doesn't matter if HW
1084  	 * adds new entries after this loop -- the QP we're worried
1085  	 * about is already in RESET, so the new entries won't come
1086  	 * from our QP and therefore don't need to be checked.
1087  	 */
1088  	for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
1089  		if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
1090  			break;
1091  
1092  	/* Now sweep backwards through the CQ, removing CQ entries
1093  	 * that match our QP by copying older entries on top of them.
1094  	 */
1095  	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
1096  		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
1097  		cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
1098  		if (is_equal_rsn(cqe64, rsn)) {
1099  			if (srq && (ntohl(cqe64->srqn) & 0xffffff))
1100  				mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
1101  			++nfreed;
1102  		} else if (nfreed) {
1103  			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
1104  			dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
1105  			owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
1106  			memcpy(dest, cqe, cq->mcq.cqe_sz);
1107  			dest64->op_own = owner_bit |
1108  				(dest64->op_own & ~MLX5_CQE_OWNER_MASK);
1109  		}
1110  	}
1111  
1112  	if (nfreed) {
1113  		cq->mcq.cons_index += nfreed;
1114  		/* Make sure update of buffer contents is done before
1115  		 * updating consumer index.
1116  		 */
1117  		wmb();
1118  		mlx5_cq_set_ci(&cq->mcq);
1119  	}
1120  }
1121  
mlx5_ib_cq_clean(struct mlx5_ib_cq * cq,u32 qpn,struct mlx5_ib_srq * srq)1122  void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
1123  {
1124  	if (!cq)
1125  		return;
1126  
1127  	spin_lock_irq(&cq->lock);
1128  	__mlx5_ib_cq_clean(cq, qpn, srq);
1129  	spin_unlock_irq(&cq->lock);
1130  }
1131  
mlx5_ib_modify_cq(struct ib_cq * cq,u16 cq_count,u16 cq_period)1132  int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
1133  {
1134  	struct mlx5_ib_dev *dev = to_mdev(cq->device);
1135  	struct mlx5_ib_cq *mcq = to_mcq(cq);
1136  	int err;
1137  
1138  	if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
1139  		return -EOPNOTSUPP;
1140  
1141  	if (cq_period > MLX5_MAX_CQ_PERIOD)
1142  		return -EINVAL;
1143  
1144  	err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
1145  					     cq_period, cq_count);
1146  	if (err)
1147  		mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
1148  
1149  	return err;
1150  }
1151  
resize_user(struct mlx5_ib_dev * dev,struct mlx5_ib_cq * cq,int entries,struct ib_udata * udata,int * cqe_size)1152  static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1153  		       int entries, struct ib_udata *udata,
1154  		       int *cqe_size)
1155  {
1156  	struct mlx5_ib_resize_cq ucmd;
1157  	struct ib_umem *umem;
1158  	int err;
1159  
1160  	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
1161  	if (err)
1162  		return err;
1163  
1164  	if (ucmd.reserved0 || ucmd.reserved1)
1165  		return -EINVAL;
1166  
1167  	/* check multiplication overflow */
1168  	if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
1169  		return -EINVAL;
1170  
1171  	umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
1172  			   (size_t)ucmd.cqe_size * entries,
1173  			   IB_ACCESS_LOCAL_WRITE);
1174  	if (IS_ERR(umem)) {
1175  		err = PTR_ERR(umem);
1176  		return err;
1177  	}
1178  
1179  	cq->resize_umem = umem;
1180  	*cqe_size = ucmd.cqe_size;
1181  
1182  	return 0;
1183  }
1184  
resize_kernel(struct mlx5_ib_dev * dev,struct mlx5_ib_cq * cq,int entries,int cqe_size)1185  static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1186  			 int entries, int cqe_size)
1187  {
1188  	int err;
1189  
1190  	cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
1191  	if (!cq->resize_buf)
1192  		return -ENOMEM;
1193  
1194  	err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
1195  	if (err)
1196  		goto ex;
1197  
1198  	init_cq_frag_buf(cq->resize_buf);
1199  
1200  	return 0;
1201  
1202  ex:
1203  	kfree(cq->resize_buf);
1204  	return err;
1205  }
1206  
copy_resize_cqes(struct mlx5_ib_cq * cq)1207  static int copy_resize_cqes(struct mlx5_ib_cq *cq)
1208  {
1209  	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
1210  	struct mlx5_cqe64 *scqe64;
1211  	struct mlx5_cqe64 *dcqe64;
1212  	void *start_cqe;
1213  	void *scqe;
1214  	void *dcqe;
1215  	int ssize;
1216  	int dsize;
1217  	int i;
1218  	u8 sw_own;
1219  
1220  	ssize = cq->buf.cqe_size;
1221  	dsize = cq->resize_buf->cqe_size;
1222  	if (ssize != dsize) {
1223  		mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
1224  		return -EINVAL;
1225  	}
1226  
1227  	i = cq->mcq.cons_index;
1228  	scqe = get_sw_cqe(cq, i);
1229  	scqe64 = ssize == 64 ? scqe : scqe + 64;
1230  	start_cqe = scqe;
1231  	if (!scqe) {
1232  		mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1233  		return -EINVAL;
1234  	}
1235  
1236  	while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
1237  		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
1238  					     (i + 1) & cq->resize_buf->nent);
1239  		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1240  		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
1241  		memcpy(dcqe, scqe, dsize);
1242  		dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1243  
1244  		++i;
1245  		scqe = get_sw_cqe(cq, i);
1246  		scqe64 = ssize == 64 ? scqe : scqe + 64;
1247  		if (!scqe) {
1248  			mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1249  			return -EINVAL;
1250  		}
1251  
1252  		if (scqe == start_cqe) {
1253  			pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1254  				cq->mcq.cqn);
1255  			return -ENOMEM;
1256  		}
1257  	}
1258  	++cq->mcq.cons_index;
1259  	return 0;
1260  }
1261  
mlx5_ib_resize_cq(struct ib_cq * ibcq,int entries,struct ib_udata * udata)1262  int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
1263  {
1264  	struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
1265  	struct mlx5_ib_cq *cq = to_mcq(ibcq);
1266  	void *cqc;
1267  	u32 *in;
1268  	int err;
1269  	int npas;
1270  	__be64 *pas;
1271  	unsigned int page_offset_quantized = 0;
1272  	unsigned int page_shift;
1273  	int inlen;
1274  	int cqe_size;
1275  	unsigned long flags;
1276  
1277  	if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
1278  		pr_info("Firmware does not support resize CQ\n");
1279  		return -ENOSYS;
1280  	}
1281  
1282  	if (entries < 1 ||
1283  	    entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
1284  		mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
1285  			     entries,
1286  			     1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
1287  		return -EINVAL;
1288  	}
1289  
1290  	entries = roundup_pow_of_two(entries + 1);
1291  	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
1292  		return -EINVAL;
1293  
1294  	if (entries == ibcq->cqe + 1)
1295  		return 0;
1296  
1297  	mutex_lock(&cq->resize_mutex);
1298  	if (udata) {
1299  		unsigned long page_size;
1300  
1301  		err = resize_user(dev, cq, entries, udata, &cqe_size);
1302  		if (err)
1303  			goto ex;
1304  
1305  		page_size = mlx5_umem_find_best_cq_quantized_pgoff(
1306  			cq->resize_umem, cqc, log_page_size,
1307  			MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
1308  			&page_offset_quantized);
1309  		if (!page_size) {
1310  			err = -EINVAL;
1311  			goto ex_resize;
1312  		}
1313  		npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
1314  		page_shift = order_base_2(page_size);
1315  	} else {
1316  		struct mlx5_frag_buf *frag_buf;
1317  
1318  		cqe_size = 64;
1319  		err = resize_kernel(dev, cq, entries, cqe_size);
1320  		if (err)
1321  			goto ex;
1322  		frag_buf = &cq->resize_buf->frag_buf;
1323  		npas = frag_buf->npages;
1324  		page_shift = frag_buf->page_shift;
1325  	}
1326  
1327  	inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
1328  		MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
1329  
1330  	in = kvzalloc(inlen, GFP_KERNEL);
1331  	if (!in) {
1332  		err = -ENOMEM;
1333  		goto ex_resize;
1334  	}
1335  
1336  	pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
1337  	if (udata)
1338  		mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
1339  				     0);
1340  	else
1341  		mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
1342  
1343  	MLX5_SET(modify_cq_in, in,
1344  		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
1345  		 MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
1346  		 MLX5_MODIFY_CQ_MASK_PG_OFFSET |
1347  		 MLX5_MODIFY_CQ_MASK_PG_SIZE);
1348  
1349  	cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
1350  
1351  	MLX5_SET(cqc, cqc, log_page_size,
1352  		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1353  	MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
1354  	MLX5_SET(cqc, cqc, cqe_sz,
1355  		 cqe_sz_to_mlx_sz(cqe_size,
1356  				  cq->private_flags &
1357  				  MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
1358  	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
1359  
1360  	MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
1361  	MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
1362  
1363  	err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
1364  	if (err)
1365  		goto ex_alloc;
1366  
1367  	if (udata) {
1368  		cq->ibcq.cqe = entries - 1;
1369  		ib_umem_release(cq->buf.umem);
1370  		cq->buf.umem = cq->resize_umem;
1371  		cq->resize_umem = NULL;
1372  	} else {
1373  		struct mlx5_ib_cq_buf tbuf;
1374  		int resized = 0;
1375  
1376  		spin_lock_irqsave(&cq->lock, flags);
1377  		if (cq->resize_buf) {
1378  			err = copy_resize_cqes(cq);
1379  			if (!err) {
1380  				tbuf = cq->buf;
1381  				cq->buf = *cq->resize_buf;
1382  				kfree(cq->resize_buf);
1383  				cq->resize_buf = NULL;
1384  				resized = 1;
1385  			}
1386  		}
1387  		cq->ibcq.cqe = entries - 1;
1388  		spin_unlock_irqrestore(&cq->lock, flags);
1389  		if (resized)
1390  			free_cq_buf(dev, &tbuf);
1391  	}
1392  	mutex_unlock(&cq->resize_mutex);
1393  
1394  	kvfree(in);
1395  	return 0;
1396  
1397  ex_alloc:
1398  	kvfree(in);
1399  
1400  ex_resize:
1401  	ib_umem_release(cq->resize_umem);
1402  	if (!udata) {
1403  		free_cq_buf(dev, cq->resize_buf);
1404  		cq->resize_buf = NULL;
1405  	}
1406  ex:
1407  	mutex_unlock(&cq->resize_mutex);
1408  	return err;
1409  }
1410  
mlx5_ib_get_cqe_size(struct ib_cq * ibcq)1411  int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
1412  {
1413  	struct mlx5_ib_cq *cq;
1414  
1415  	if (!ibcq)
1416  		return 128;
1417  
1418  	cq = to_mcq(ibcq);
1419  	return cq->cqe_size;
1420  }
1421  
1422  /* Called from atomic context */
mlx5_ib_generate_wc(struct ib_cq * ibcq,struct ib_wc * wc)1423  int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
1424  {
1425  	struct mlx5_ib_wc *soft_wc;
1426  	struct mlx5_ib_cq *cq = to_mcq(ibcq);
1427  	unsigned long flags;
1428  
1429  	soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
1430  	if (!soft_wc)
1431  		return -ENOMEM;
1432  
1433  	soft_wc->wc = *wc;
1434  	spin_lock_irqsave(&cq->lock, flags);
1435  	list_add_tail(&soft_wc->list, &cq->wc_list);
1436  	if (cq->notify_flags == IB_CQ_NEXT_COMP ||
1437  	    wc->status != IB_WC_SUCCESS) {
1438  		cq->notify_flags = 0;
1439  		schedule_work(&cq->notify_work);
1440  	}
1441  	spin_unlock_irqrestore(&cq->lock, flags);
1442  
1443  	return 0;
1444  }
1445