xref: /openbmc/linux/drivers/infiniband/hw/hns/hns_roce_qp.c (revision cdcc26d714c96e9de75c549f05d770b3ddaf2d21)
1 /*
2  * Copyright (c) 2016 Hisilicon Limited.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/pci.h>
35 #include <rdma/ib_addr.h>
36 #include <rdma/ib_umem.h>
37 #include <rdma/uverbs_ioctl.h>
38 #include "hns_roce_common.h"
39 #include "hns_roce_device.h"
40 #include "hns_roce_hem.h"
41 
42 static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
43 					      u32 qpn)
44 {
45 	struct device *dev = hr_dev->dev;
46 	struct hns_roce_qp *qp;
47 	unsigned long flags;
48 
49 	xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
50 	qp = __hns_roce_qp_lookup(hr_dev, qpn);
51 	if (qp)
52 		refcount_inc(&qp->refcount);
53 	xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
54 
55 	if (!qp)
56 		dev_warn(dev, "async event for bogus QP %08x\n", qpn);
57 
58 	return qp;
59 }
60 
61 static void flush_work_handle(struct work_struct *work)
62 {
63 	struct hns_roce_work *flush_work = container_of(work,
64 					struct hns_roce_work, work);
65 	struct hns_roce_qp *hr_qp = container_of(flush_work,
66 					struct hns_roce_qp, flush_work);
67 	struct device *dev = flush_work->hr_dev->dev;
68 	struct ib_qp_attr attr;
69 	int attr_mask;
70 	int ret;
71 
72 	attr_mask = IB_QP_STATE;
73 	attr.qp_state = IB_QPS_ERR;
74 
75 	if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
76 		ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
77 		if (ret)
78 			dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n",
79 				ret);
80 	}
81 
82 	/*
83 	 * make sure we signal QP destroy leg that flush QP was completed
84 	 * so that it can safely proceed ahead now and destroy QP
85 	 */
86 	if (refcount_dec_and_test(&hr_qp->refcount))
87 		complete(&hr_qp->free);
88 }
89 
90 void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
91 {
92 	struct hns_roce_work *flush_work = &hr_qp->flush_work;
93 
94 	flush_work->hr_dev = hr_dev;
95 	INIT_WORK(&flush_work->work, flush_work_handle);
96 	refcount_inc(&hr_qp->refcount);
97 	queue_work(hr_dev->irq_workq, &flush_work->work);
98 }
99 
100 void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
101 {
102 	/*
103 	 * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state
104 	 * gets into errored mode. Hence, as a workaround to this
105 	 * hardware limitation, driver needs to assist in flushing. But
106 	 * the flushing operation uses mailbox to convey the QP state to
107 	 * the hardware and which can sleep due to the mutex protection
108 	 * around the mailbox calls. Hence, use the deferred flush for
109 	 * now.
110 	 */
111 	if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
112 		init_flush_work(dev, qp);
113 }
114 
115 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
116 {
117 	struct hns_roce_qp *qp;
118 
119 	qp = hns_roce_qp_lookup(hr_dev, qpn);
120 	if (!qp)
121 		return;
122 
123 	qp->event(qp, (enum hns_roce_event)event_type);
124 
125 	if (refcount_dec_and_test(&qp->refcount))
126 		complete(&qp->free);
127 }
128 
129 void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
130 {
131 	struct hns_roce_qp *qp;
132 
133 	qp = hns_roce_qp_lookup(hr_dev, qpn);
134 	if (!qp)
135 		return;
136 
137 	qp->state = IB_QPS_ERR;
138 	flush_cqe(hr_dev, qp);
139 
140 	if (refcount_dec_and_test(&qp->refcount))
141 		complete(&qp->free);
142 }
143 
144 static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
145 				 enum hns_roce_event type)
146 {
147 	struct ib_qp *ibqp = &hr_qp->ibqp;
148 	struct ib_event event;
149 
150 	if (ibqp->event_handler) {
151 		event.device = ibqp->device;
152 		event.element.qp = ibqp;
153 		switch (type) {
154 		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
155 			event.event = IB_EVENT_PATH_MIG;
156 			break;
157 		case HNS_ROCE_EVENT_TYPE_COMM_EST:
158 			event.event = IB_EVENT_COMM_EST;
159 			break;
160 		case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
161 			event.event = IB_EVENT_SQ_DRAINED;
162 			break;
163 		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
164 			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
165 			break;
166 		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
167 			event.event = IB_EVENT_QP_FATAL;
168 			break;
169 		case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
170 			event.event = IB_EVENT_PATH_MIG_ERR;
171 			break;
172 		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
173 			event.event = IB_EVENT_QP_REQ_ERR;
174 			break;
175 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
176 		case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
177 		case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
178 			event.event = IB_EVENT_QP_ACCESS_ERR;
179 			break;
180 		default:
181 			dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
182 				type, hr_qp->qpn);
183 			return;
184 		}
185 		ibqp->event_handler(&event, ibqp->qp_context);
186 	}
187 }
188 
189 static u8 get_affinity_cq_bank(u8 qp_bank)
190 {
191 	return (qp_bank >> 1) & CQ_BANKID_MASK;
192 }
193 
194 static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
195 					struct hns_roce_bank *bank)
196 {
197 #define INVALID_LOAD_QPNUM 0xFFFFFFFF
198 	struct ib_cq *scq = init_attr->send_cq;
199 	u32 least_load = INVALID_LOAD_QPNUM;
200 	unsigned long cqn = 0;
201 	u8 bankid = 0;
202 	u32 bankcnt;
203 	u8 i;
204 
205 	if (scq)
206 		cqn = to_hr_cq(scq)->cqn;
207 
208 	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
209 		if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
210 			continue;
211 
212 		bankcnt = bank[i].inuse;
213 		if (bankcnt < least_load) {
214 			least_load = bankcnt;
215 			bankid = i;
216 		}
217 	}
218 
219 	return bankid;
220 }
221 
222 static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
223 				 unsigned long *qpn)
224 {
225 	int id;
226 
227 	id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL);
228 	if (id < 0) {
229 		id = ida_alloc_range(&bank->ida, bank->min, bank->max,
230 				     GFP_KERNEL);
231 		if (id < 0)
232 			return id;
233 	}
234 
235 	/* the QPN should keep increasing until the max value is reached. */
236 	bank->next = (id + 1) > bank->max ? bank->min : id + 1;
237 
238 	/* the lower 3 bits is bankid */
239 	*qpn = (id << 3) | bankid;
240 
241 	return 0;
242 }
243 static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
244 		     struct ib_qp_init_attr *init_attr)
245 {
246 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
247 	unsigned long num = 0;
248 	u8 bankid;
249 	int ret;
250 
251 	if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
252 		num = 1;
253 	} else {
254 		mutex_lock(&qp_table->bank_mutex);
255 		bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
256 
257 		ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
258 					    &num);
259 		if (ret) {
260 			ibdev_err(&hr_dev->ib_dev,
261 				  "failed to alloc QPN, ret = %d\n", ret);
262 			mutex_unlock(&qp_table->bank_mutex);
263 			return ret;
264 		}
265 
266 		qp_table->bank[bankid].inuse++;
267 		mutex_unlock(&qp_table->bank_mutex);
268 	}
269 
270 	hr_qp->qpn = num;
271 
272 	return 0;
273 }
274 
275 static void add_qp_to_list(struct hns_roce_dev *hr_dev,
276 			   struct hns_roce_qp *hr_qp,
277 			   struct ib_cq *send_cq, struct ib_cq *recv_cq)
278 {
279 	struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
280 	unsigned long flags;
281 
282 	hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
283 	hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
284 
285 	spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
286 	hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
287 
288 	list_add_tail(&hr_qp->node, &hr_dev->qp_list);
289 	if (hr_send_cq)
290 		list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
291 	if (hr_recv_cq)
292 		list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
293 
294 	hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
295 	spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
296 }
297 
298 static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
299 			     struct hns_roce_qp *hr_qp,
300 			     struct ib_qp_init_attr *init_attr)
301 {
302 	struct xarray *xa = &hr_dev->qp_table_xa;
303 	int ret;
304 
305 	if (!hr_qp->qpn)
306 		return -EINVAL;
307 
308 	ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
309 	if (ret)
310 		dev_err(hr_dev->dev, "failed to xa store for QPC\n");
311 	else
312 		/* add QP to device's QP list for softwc */
313 		add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
314 			       init_attr->recv_cq);
315 
316 	return ret;
317 }
318 
319 static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
320 {
321 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
322 	struct device *dev = hr_dev->dev;
323 	int ret;
324 
325 	if (!hr_qp->qpn)
326 		return -EINVAL;
327 
328 	/* Alloc memory for QPC */
329 	ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
330 	if (ret) {
331 		dev_err(dev, "failed to get QPC table\n");
332 		goto err_out;
333 	}
334 
335 	/* Alloc memory for IRRL */
336 	ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
337 	if (ret) {
338 		dev_err(dev, "failed to get IRRL table\n");
339 		goto err_put_qp;
340 	}
341 
342 	if (hr_dev->caps.trrl_entry_sz) {
343 		/* Alloc memory for TRRL */
344 		ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
345 					 hr_qp->qpn);
346 		if (ret) {
347 			dev_err(dev, "failed to get TRRL table\n");
348 			goto err_put_irrl;
349 		}
350 	}
351 
352 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
353 		/* Alloc memory for SCC CTX */
354 		ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
355 					 hr_qp->qpn);
356 		if (ret) {
357 			dev_err(dev, "failed to get SCC CTX table\n");
358 			goto err_put_trrl;
359 		}
360 	}
361 
362 	return 0;
363 
364 err_put_trrl:
365 	if (hr_dev->caps.trrl_entry_sz)
366 		hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
367 
368 err_put_irrl:
369 	hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
370 
371 err_put_qp:
372 	hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
373 
374 err_out:
375 	return ret;
376 }
377 
378 static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
379 {
380 	rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
381 }
382 
383 void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
384 {
385 	struct xarray *xa = &hr_dev->qp_table_xa;
386 	unsigned long flags;
387 
388 	list_del(&hr_qp->node);
389 
390 	if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
391 		list_del(&hr_qp->sq_node);
392 
393 	if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
394 	    hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
395 		list_del(&hr_qp->rq_node);
396 
397 	xa_lock_irqsave(xa, flags);
398 	__xa_erase(xa, hr_qp->qpn);
399 	xa_unlock_irqrestore(xa, flags);
400 }
401 
402 static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
403 {
404 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
405 
406 	if (hr_dev->caps.trrl_entry_sz)
407 		hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
408 	hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
409 }
410 
411 static inline u8 get_qp_bankid(unsigned long qpn)
412 {
413 	/* The lower 3 bits of QPN are used to hash to different banks */
414 	return (u8)(qpn & GENMASK(2, 0));
415 }
416 
417 static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
418 {
419 	u8 bankid;
420 
421 	if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
422 		return;
423 
424 	if (hr_qp->qpn < hr_dev->caps.reserved_qps)
425 		return;
426 
427 	bankid = get_qp_bankid(hr_qp->qpn);
428 
429 	ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3);
430 
431 	mutex_lock(&hr_dev->qp_table.bank_mutex);
432 	hr_dev->qp_table.bank[bankid].inuse--;
433 	mutex_unlock(&hr_dev->qp_table.bank_mutex);
434 }
435 
436 static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
437 		       bool user)
438 {
439 	u32 max_sge = dev->caps.max_rq_sg;
440 
441 	if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
442 		return max_sge;
443 
444 	/* Reserve SGEs only for HIP08 in kernel; The userspace driver will
445 	 * calculate number of max_sge with reserved SGEs when allocating wqe
446 	 * buf, so there is no need to do this again in kernel. But the number
447 	 * may exceed the capacity of SGEs recorded in the firmware, so the
448 	 * kernel driver should just adapt the value accordingly.
449 	 */
450 	if (user)
451 		max_sge = roundup_pow_of_two(max_sge + 1);
452 	else
453 		hr_qp->rq.rsv_sge = 1;
454 
455 	return max_sge;
456 }
457 
458 static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
459 		       struct hns_roce_qp *hr_qp, int has_rq, bool user)
460 {
461 	u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
462 	u32 cnt;
463 
464 	/* If srq exist, set zero for relative number of rq */
465 	if (!has_rq) {
466 		hr_qp->rq.wqe_cnt = 0;
467 		hr_qp->rq.max_gs = 0;
468 		cap->max_recv_wr = 0;
469 		cap->max_recv_sge = 0;
470 
471 		return 0;
472 	}
473 
474 	/* Check the validity of QP support capacity */
475 	if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
476 	    cap->max_recv_sge > max_sge) {
477 		ibdev_err(&hr_dev->ib_dev,
478 			  "RQ config error, depth = %u, sge = %u\n",
479 			  cap->max_recv_wr, cap->max_recv_sge);
480 		return -EINVAL;
481 	}
482 
483 	cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
484 	if (cnt > hr_dev->caps.max_wqes) {
485 		ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
486 			  cap->max_recv_wr);
487 		return -EINVAL;
488 	}
489 
490 	hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
491 					      hr_qp->rq.rsv_sge);
492 
493 	hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
494 				    hr_qp->rq.max_gs);
495 
496 	hr_qp->rq.wqe_cnt = cnt;
497 
498 	cap->max_recv_wr = cnt;
499 	cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
500 
501 	return 0;
502 }
503 
504 static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
505 			       struct ib_qp_cap *cap)
506 {
507 	if (cap->max_inline_data) {
508 		cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
509 		return min(cap->max_inline_data,
510 			   hr_dev->caps.max_sq_inline);
511 	}
512 
513 	return 0;
514 }
515 
516 static void update_inline_data(struct hns_roce_qp *hr_qp,
517 			       struct ib_qp_cap *cap)
518 {
519 	u32 sge_num = hr_qp->sq.ext_sge_cnt;
520 
521 	if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
522 		if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
523 		      hr_qp->ibqp.qp_type == IB_QPT_UD))
524 			sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
525 
526 		cap->max_inline_data = max(cap->max_inline_data,
527 					   sge_num * HNS_ROCE_SGE_SIZE);
528 	}
529 
530 	hr_qp->max_inline_data = cap->max_inline_data;
531 }
532 
533 static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
534 					 u32 max_send_sge)
535 {
536 	unsigned int std_sge_num;
537 	unsigned int min_sge;
538 
539 	std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
540 	min_sge = is_ud_or_gsi ? 1 : 0;
541 	return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
542 				min_sge;
543 }
544 
545 static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
546 						  u32 max_inline_data)
547 {
548 	unsigned int inline_sge;
549 
550 	if (!max_inline_data)
551 		return 0;
552 
553 	/*
554 	 * if max_inline_data less than
555 	 * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
556 	 * In addition to ud's mode, no need to extend sge.
557 	 */
558 	inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
559 	if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
560 		inline_sge = 0;
561 
562 	return inline_sge;
563 }
564 
565 static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
566 			      struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
567 {
568 	bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
569 				hr_qp->ibqp.qp_type == IB_QPT_UD);
570 	unsigned int std_sge_num;
571 	u32 inline_ext_sge = 0;
572 	u32 ext_wqe_sge_cnt;
573 	u32 total_sge_cnt;
574 
575 	cap->max_inline_data = get_max_inline_data(hr_dev, cap);
576 
577 	hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
578 	std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
579 	ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
580 							cap->max_send_sge);
581 
582 	if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
583 		inline_ext_sge = max(ext_wqe_sge_cnt,
584 				     get_sge_num_from_max_inl_data(is_ud_or_gsi,
585 							 cap->max_inline_data));
586 		hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
587 					roundup_pow_of_two(inline_ext_sge) : 0;
588 
589 		hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
590 		hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
591 
592 		ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
593 	} else {
594 		hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
595 		hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
596 		hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
597 	}
598 
599 	/* If the number of extended sge is not zero, they MUST use the
600 	 * space of HNS_HW_PAGE_SIZE at least.
601 	 */
602 	if (ext_wqe_sge_cnt) {
603 		total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
604 		hr_qp->sge.sge_cnt = max(total_sge_cnt,
605 				(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
606 	}
607 
608 	update_inline_data(hr_qp, cap);
609 }
610 
611 static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
612 					struct ib_qp_cap *cap,
613 					struct hns_roce_ib_create_qp *ucmd)
614 {
615 	u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
616 	u8 max_sq_stride = ilog2(roundup_sq_stride);
617 
618 	/* Sanity check SQ size before proceeding */
619 	if (ucmd->log_sq_stride > max_sq_stride ||
620 	    ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
621 		ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n");
622 		return -EINVAL;
623 	}
624 
625 	if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
626 		ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n",
627 			  cap->max_send_sge);
628 		return -EINVAL;
629 	}
630 
631 	return 0;
632 }
633 
634 static int set_user_sq_size(struct hns_roce_dev *hr_dev,
635 			    struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
636 			    struct hns_roce_ib_create_qp *ucmd)
637 {
638 	struct ib_device *ibdev = &hr_dev->ib_dev;
639 	u32 cnt = 0;
640 	int ret;
641 
642 	if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
643 	    cnt > hr_dev->caps.max_wqes)
644 		return -EINVAL;
645 
646 	ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
647 	if (ret) {
648 		ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
649 			  ret);
650 		return ret;
651 	}
652 
653 	set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
654 
655 	hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
656 	hr_qp->sq.wqe_cnt = cnt;
657 	cap->max_send_sge = hr_qp->sq.max_gs;
658 
659 	return 0;
660 }
661 
662 static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
663 			    struct hns_roce_qp *hr_qp,
664 			    struct hns_roce_buf_attr *buf_attr)
665 {
666 	int buf_size;
667 	int idx = 0;
668 
669 	hr_qp->buff_size = 0;
670 
671 	/* SQ WQE */
672 	hr_qp->sq.offset = 0;
673 	buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
674 					  hr_qp->sq.wqe_shift);
675 	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
676 		buf_attr->region[idx].size = buf_size;
677 		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
678 		idx++;
679 		hr_qp->buff_size += buf_size;
680 	}
681 
682 	/* extend SGE WQE in SQ */
683 	hr_qp->sge.offset = hr_qp->buff_size;
684 	buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
685 					  hr_qp->sge.sge_shift);
686 	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
687 		buf_attr->region[idx].size = buf_size;
688 		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
689 		idx++;
690 		hr_qp->buff_size += buf_size;
691 	}
692 
693 	/* RQ WQE */
694 	hr_qp->rq.offset = hr_qp->buff_size;
695 	buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
696 					  hr_qp->rq.wqe_shift);
697 	if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
698 		buf_attr->region[idx].size = buf_size;
699 		buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
700 		idx++;
701 		hr_qp->buff_size += buf_size;
702 	}
703 
704 	if (hr_qp->buff_size < 1)
705 		return -EINVAL;
706 
707 	buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
708 	buf_attr->region_count = idx;
709 
710 	return 0;
711 }
712 
713 static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
714 			      struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
715 {
716 	struct ib_device *ibdev = &hr_dev->ib_dev;
717 	u32 cnt;
718 
719 	if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
720 	    cap->max_send_sge > hr_dev->caps.max_sq_sg) {
721 		ibdev_err(ibdev, "failed to check SQ WR or SGE num.\n");
722 		return -EINVAL;
723 	}
724 
725 	cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
726 	if (cnt > hr_dev->caps.max_wqes) {
727 		ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n",
728 			  cnt);
729 		return -EINVAL;
730 	}
731 
732 	hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
733 	hr_qp->sq.wqe_cnt = cnt;
734 
735 	set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
736 
737 	/* sync the parameters of kernel QP to user's configuration */
738 	cap->max_send_wr = cnt;
739 	cap->max_send_sge = hr_qp->sq.max_gs;
740 
741 	return 0;
742 }
743 
744 static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
745 {
746 	if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
747 		return 0;
748 
749 	return 1;
750 }
751 
752 static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
753 {
754 	if (attr->qp_type == IB_QPT_XRC_INI ||
755 	    attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
756 	    !attr->cap.max_recv_wr)
757 		return 0;
758 
759 	return 1;
760 }
761 
762 static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
763 			struct ib_qp_init_attr *init_attr,
764 			struct ib_udata *udata, unsigned long addr)
765 {
766 	struct ib_device *ibdev = &hr_dev->ib_dev;
767 	struct hns_roce_buf_attr buf_attr = {};
768 	int ret;
769 
770 	ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
771 	if (ret) {
772 		ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
773 		goto err_inline;
774 	}
775 	ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
776 				  PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
777 				  udata, addr);
778 	if (ret) {
779 		ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
780 		goto err_inline;
781 	}
782 
783 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
784 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
785 
786 	return 0;
787 
788 err_inline:
789 
790 	return ret;
791 }
792 
793 static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
794 {
795 	hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
796 }
797 
798 static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
799 				   struct ib_qp_init_attr *init_attr,
800 				   struct ib_udata *udata,
801 				   struct hns_roce_ib_create_qp_resp *resp,
802 				   struct hns_roce_ib_create_qp *ucmd)
803 {
804 	return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
805 		udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
806 		hns_roce_qp_has_sq(init_attr) &&
807 		udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr));
808 }
809 
810 static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev,
811 				   struct ib_qp_init_attr *init_attr,
812 				   struct ib_udata *udata,
813 				   struct hns_roce_ib_create_qp_resp *resp)
814 {
815 	return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
816 		udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
817 		hns_roce_qp_has_rq(init_attr));
818 }
819 
820 static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
821 				     struct ib_qp_init_attr *init_attr)
822 {
823 	return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
824 		hns_roce_qp_has_rq(init_attr));
825 }
826 
827 static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
828 			 struct hns_roce_dev *hr_dev,
829 			 struct ib_udata *udata,
830 			 struct hns_roce_ib_create_qp_resp *resp)
831 {
832 	struct hns_roce_ucontext *uctx =
833 		rdma_udata_to_drv_context(udata,
834 			struct hns_roce_ucontext, ibucontext);
835 	struct rdma_user_mmap_entry *rdma_entry;
836 	u64 address;
837 
838 	address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
839 
840 	hr_qp->dwqe_mmap_entry =
841 		hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
842 						HNS_ROCE_DWQE_SIZE,
843 						HNS_ROCE_MMAP_TYPE_DWQE);
844 
845 	if (!hr_qp->dwqe_mmap_entry) {
846 		ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
847 		return -ENOMEM;
848 	}
849 
850 	rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
851 	resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
852 
853 	return 0;
854 }
855 
856 static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
857 			    struct hns_roce_qp *hr_qp,
858 			    struct ib_qp_init_attr *init_attr,
859 			    struct ib_udata *udata,
860 			    struct hns_roce_ib_create_qp *ucmd,
861 			    struct hns_roce_ib_create_qp_resp *resp)
862 {
863 	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
864 		struct hns_roce_ucontext, ibucontext);
865 	struct ib_device *ibdev = &hr_dev->ib_dev;
866 	int ret;
867 
868 	if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
869 		ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
870 		if (ret) {
871 			ibdev_err(ibdev,
872 				  "failed to map user SQ doorbell, ret = %d.\n",
873 				  ret);
874 			goto err_out;
875 		}
876 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
877 	}
878 
879 	if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
880 		ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
881 		if (ret) {
882 			ibdev_err(ibdev,
883 				  "failed to map user RQ doorbell, ret = %d.\n",
884 				  ret);
885 			goto err_sdb;
886 		}
887 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
888 	}
889 
890 	return 0;
891 
892 err_sdb:
893 	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
894 		hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
895 err_out:
896 	return ret;
897 }
898 
899 static int alloc_kernel_qp_db(struct hns_roce_dev *hr_dev,
900 			      struct hns_roce_qp *hr_qp,
901 			      struct ib_qp_init_attr *init_attr)
902 {
903 	struct ib_device *ibdev = &hr_dev->ib_dev;
904 	int ret;
905 
906 	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
907 		hr_qp->sq.db_reg = hr_dev->mem_base +
908 				   HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
909 	else
910 		hr_qp->sq.db_reg = hr_dev->reg_base + hr_dev->sdb_offset +
911 				   DB_REG_OFFSET * hr_dev->priv_uar.index;
912 
913 	hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
914 			   DB_REG_OFFSET * hr_dev->priv_uar.index;
915 
916 	if (kernel_qp_has_rdb(hr_dev, init_attr)) {
917 		ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
918 		if (ret) {
919 			ibdev_err(ibdev,
920 				  "failed to alloc kernel RQ doorbell, ret = %d.\n",
921 				  ret);
922 			return ret;
923 		}
924 		*hr_qp->rdb.db_record = 0;
925 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
926 	}
927 
928 	return 0;
929 }
930 
931 static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
932 		       struct ib_qp_init_attr *init_attr,
933 		       struct ib_udata *udata,
934 		       struct hns_roce_ib_create_qp *ucmd,
935 		       struct hns_roce_ib_create_qp_resp *resp)
936 {
937 	int ret;
938 
939 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
940 		hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
941 
942 	if (udata) {
943 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
944 			ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
945 			if (ret)
946 				return ret;
947 		}
948 
949 		ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
950 				       resp);
951 		if (ret)
952 			goto err_remove_qp;
953 	} else {
954 		ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
955 		if (ret)
956 			return ret;
957 	}
958 
959 	return 0;
960 
961 err_remove_qp:
962 	if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
963 		qp_user_mmap_entry_remove(hr_qp);
964 
965 	return ret;
966 }
967 
968 static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
969 		       struct ib_udata *udata)
970 {
971 	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
972 		udata, struct hns_roce_ucontext, ibucontext);
973 
974 	if (udata) {
975 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
976 			hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
977 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
978 			hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
979 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
980 			qp_user_mmap_entry_remove(hr_qp);
981 	} else {
982 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
983 			hns_roce_free_db(hr_dev, &hr_qp->rdb);
984 	}
985 }
986 
987 static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
988 			     struct hns_roce_qp *hr_qp)
989 {
990 	struct ib_device *ibdev = &hr_dev->ib_dev;
991 	u64 *sq_wrid = NULL;
992 	u64 *rq_wrid = NULL;
993 	int ret;
994 
995 	sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
996 	if (ZERO_OR_NULL_PTR(sq_wrid)) {
997 		ibdev_err(ibdev, "failed to alloc SQ wrid.\n");
998 		return -ENOMEM;
999 	}
1000 
1001 	if (hr_qp->rq.wqe_cnt) {
1002 		rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
1003 		if (ZERO_OR_NULL_PTR(rq_wrid)) {
1004 			ibdev_err(ibdev, "failed to alloc RQ wrid.\n");
1005 			ret = -ENOMEM;
1006 			goto err_sq;
1007 		}
1008 	}
1009 
1010 	hr_qp->sq.wrid = sq_wrid;
1011 	hr_qp->rq.wrid = rq_wrid;
1012 	return 0;
1013 err_sq:
1014 	kfree(sq_wrid);
1015 
1016 	return ret;
1017 }
1018 
1019 static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
1020 {
1021 	kfree(hr_qp->rq.wrid);
1022 	kfree(hr_qp->sq.wrid);
1023 }
1024 
1025 static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
1026 			struct ib_qp_init_attr *init_attr,
1027 			struct ib_udata *udata,
1028 			struct hns_roce_ib_create_qp *ucmd)
1029 {
1030 	struct ib_device *ibdev = &hr_dev->ib_dev;
1031 	struct hns_roce_ucontext *uctx;
1032 	int ret;
1033 
1034 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1035 		hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
1036 	else
1037 		hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
1038 
1039 	ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
1040 			  hns_roce_qp_has_rq(init_attr), !!udata);
1041 	if (ret) {
1042 		ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
1043 			  ret);
1044 		return ret;
1045 	}
1046 
1047 	if (udata) {
1048 		ret = ib_copy_from_udata(ucmd, udata,
1049 					 min(udata->inlen, sizeof(*ucmd)));
1050 		if (ret) {
1051 			ibdev_err(ibdev,
1052 				  "failed to copy QP ucmd, ret = %d\n", ret);
1053 			return ret;
1054 		}
1055 
1056 		uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
1057 						 ibucontext);
1058 		hr_qp->config = uctx->config;
1059 		ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
1060 		if (ret)
1061 			ibdev_err(ibdev,
1062 				  "failed to set user SQ size, ret = %d.\n",
1063 				  ret);
1064 	} else {
1065 		if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
1066 			hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
1067 		ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
1068 		if (ret)
1069 			ibdev_err(ibdev,
1070 				  "failed to set kernel SQ size, ret = %d.\n",
1071 				  ret);
1072 	}
1073 
1074 	return ret;
1075 }
1076 
1077 static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
1078 				     struct ib_pd *ib_pd,
1079 				     struct ib_qp_init_attr *init_attr,
1080 				     struct ib_udata *udata,
1081 				     struct hns_roce_qp *hr_qp)
1082 {
1083 	struct hns_roce_ib_create_qp_resp resp = {};
1084 	struct ib_device *ibdev = &hr_dev->ib_dev;
1085 	struct hns_roce_ib_create_qp ucmd = {};
1086 	int ret;
1087 
1088 	mutex_init(&hr_qp->mutex);
1089 	spin_lock_init(&hr_qp->sq.lock);
1090 	spin_lock_init(&hr_qp->rq.lock);
1091 
1092 	hr_qp->state = IB_QPS_RESET;
1093 	hr_qp->flush_flag = 0;
1094 
1095 	if (init_attr->create_flags)
1096 		return -EOPNOTSUPP;
1097 
1098 	ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
1099 	if (ret) {
1100 		ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
1101 		return ret;
1102 	}
1103 
1104 	if (!udata) {
1105 		ret = alloc_kernel_wrid(hr_dev, hr_qp);
1106 		if (ret) {
1107 			ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
1108 				  ret);
1109 			return ret;
1110 		}
1111 	}
1112 
1113 	ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
1114 	if (ret) {
1115 		ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
1116 		goto err_buf;
1117 	}
1118 
1119 	ret = alloc_qpn(hr_dev, hr_qp, init_attr);
1120 	if (ret) {
1121 		ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
1122 		goto err_qpn;
1123 	}
1124 
1125 	ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
1126 	if (ret) {
1127 		ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n",
1128 			  ret);
1129 		goto err_db;
1130 	}
1131 
1132 	ret = alloc_qpc(hr_dev, hr_qp);
1133 	if (ret) {
1134 		ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n",
1135 			  ret);
1136 		goto err_qpc;
1137 	}
1138 
1139 	ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
1140 	if (ret) {
1141 		ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret);
1142 		goto err_store;
1143 	}
1144 
1145 	if (udata) {
1146 		resp.cap_flags = hr_qp->en_flags;
1147 		ret = ib_copy_to_udata(udata, &resp,
1148 				       min(udata->outlen, sizeof(resp)));
1149 		if (ret) {
1150 			ibdev_err(ibdev, "copy qp resp failed!\n");
1151 			goto err_store;
1152 		}
1153 	}
1154 
1155 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
1156 		ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
1157 		if (ret)
1158 			goto err_flow_ctrl;
1159 	}
1160 
1161 	hr_qp->ibqp.qp_num = hr_qp->qpn;
1162 	hr_qp->event = hns_roce_ib_qp_event;
1163 	refcount_set(&hr_qp->refcount, 1);
1164 	init_completion(&hr_qp->free);
1165 
1166 	return 0;
1167 
1168 err_flow_ctrl:
1169 	hns_roce_qp_remove(hr_dev, hr_qp);
1170 err_store:
1171 	free_qpc(hr_dev, hr_qp);
1172 err_qpc:
1173 	free_qp_db(hr_dev, hr_qp, udata);
1174 err_db:
1175 	free_qpn(hr_dev, hr_qp);
1176 err_qpn:
1177 	free_qp_buf(hr_dev, hr_qp);
1178 err_buf:
1179 	free_kernel_wrid(hr_qp);
1180 	return ret;
1181 }
1182 
1183 void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
1184 			 struct ib_udata *udata)
1185 {
1186 	if (refcount_dec_and_test(&hr_qp->refcount))
1187 		complete(&hr_qp->free);
1188 	wait_for_completion(&hr_qp->free);
1189 
1190 	free_qpc(hr_dev, hr_qp);
1191 	free_qpn(hr_dev, hr_qp);
1192 	free_qp_buf(hr_dev, hr_qp);
1193 	free_kernel_wrid(hr_qp);
1194 	free_qp_db(hr_dev, hr_qp, udata);
1195 }
1196 
1197 static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
1198 			 bool is_user)
1199 {
1200 	switch (type) {
1201 	case IB_QPT_XRC_INI:
1202 	case IB_QPT_XRC_TGT:
1203 		if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
1204 			goto out;
1205 		break;
1206 	case IB_QPT_UD:
1207 		if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 &&
1208 		    is_user)
1209 			goto out;
1210 		break;
1211 	case IB_QPT_RC:
1212 	case IB_QPT_GSI:
1213 		break;
1214 	default:
1215 		goto out;
1216 	}
1217 
1218 	return 0;
1219 
1220 out:
1221 	ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type);
1222 
1223 	return -EOPNOTSUPP;
1224 }
1225 
1226 int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
1227 		       struct ib_udata *udata)
1228 {
1229 	struct ib_device *ibdev = qp->device;
1230 	struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
1231 	struct hns_roce_qp *hr_qp = to_hr_qp(qp);
1232 	struct ib_pd *pd = qp->pd;
1233 	int ret;
1234 
1235 	ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
1236 	if (ret)
1237 		return ret;
1238 
1239 	if (init_attr->qp_type == IB_QPT_XRC_TGT)
1240 		hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
1241 
1242 	if (init_attr->qp_type == IB_QPT_GSI) {
1243 		hr_qp->port = init_attr->port_num - 1;
1244 		hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
1245 	}
1246 
1247 	ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
1248 	if (ret)
1249 		ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
1250 			  init_attr->qp_type, ret);
1251 
1252 	return ret;
1253 }
1254 
1255 int to_hr_qp_type(int qp_type)
1256 {
1257 	switch (qp_type) {
1258 	case IB_QPT_RC:
1259 		return SERV_TYPE_RC;
1260 	case IB_QPT_UD:
1261 	case IB_QPT_GSI:
1262 		return SERV_TYPE_UD;
1263 	case IB_QPT_XRC_INI:
1264 	case IB_QPT_XRC_TGT:
1265 		return SERV_TYPE_XRC;
1266 	default:
1267 		return -1;
1268 	}
1269 }
1270 
1271 static int check_mtu_validate(struct hns_roce_dev *hr_dev,
1272 			      struct hns_roce_qp *hr_qp,
1273 			      struct ib_qp_attr *attr, int attr_mask)
1274 {
1275 	enum ib_mtu active_mtu;
1276 	int p;
1277 
1278 	p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
1279 	active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
1280 
1281 	if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
1282 	    attr->path_mtu > hr_dev->caps.max_mtu) ||
1283 	    attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
1284 		ibdev_err(&hr_dev->ib_dev,
1285 			"attr path_mtu(%d)invalid while modify qp",
1286 			attr->path_mtu);
1287 		return -EINVAL;
1288 	}
1289 
1290 	return 0;
1291 }
1292 
1293 static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1294 				  int attr_mask)
1295 {
1296 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
1297 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
1298 	int p;
1299 
1300 	if ((attr_mask & IB_QP_PORT) &&
1301 	    (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
1302 		ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n",
1303 			  attr->port_num);
1304 		return -EINVAL;
1305 	}
1306 
1307 	if (attr_mask & IB_QP_PKEY_INDEX) {
1308 		p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
1309 		if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
1310 			ibdev_err(&hr_dev->ib_dev,
1311 				  "invalid attr, pkey_index = %u.\n",
1312 				  attr->pkey_index);
1313 			return -EINVAL;
1314 		}
1315 	}
1316 
1317 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1318 	    attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
1319 		ibdev_err(&hr_dev->ib_dev,
1320 			  "invalid attr, max_rd_atomic = %u.\n",
1321 			  attr->max_rd_atomic);
1322 		return -EINVAL;
1323 	}
1324 
1325 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1326 	    attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
1327 		ibdev_err(&hr_dev->ib_dev,
1328 			  "invalid attr, max_dest_rd_atomic = %u.\n",
1329 			  attr->max_dest_rd_atomic);
1330 		return -EINVAL;
1331 	}
1332 
1333 	if (attr_mask & IB_QP_PATH_MTU)
1334 		return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
1335 
1336 	return 0;
1337 }
1338 
1339 int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1340 		       int attr_mask, struct ib_udata *udata)
1341 {
1342 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
1343 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
1344 	enum ib_qp_state cur_state, new_state;
1345 	int ret = -EINVAL;
1346 
1347 	mutex_lock(&hr_qp->mutex);
1348 
1349 	if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
1350 		goto out;
1351 
1352 	cur_state = hr_qp->state;
1353 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1354 
1355 	if (ibqp->uobject &&
1356 	    (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
1357 		if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) {
1358 			hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
1359 
1360 			if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
1361 				hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
1362 		} else {
1363 			ibdev_warn(&hr_dev->ib_dev,
1364 				  "flush cqe is not supported in userspace!\n");
1365 			goto out;
1366 		}
1367 	}
1368 
1369 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1370 				attr_mask)) {
1371 		ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
1372 		goto out;
1373 	}
1374 
1375 	ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
1376 	if (ret)
1377 		goto out;
1378 
1379 	if (cur_state == new_state && cur_state == IB_QPS_RESET)
1380 		goto out;
1381 
1382 	ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
1383 				    new_state, udata);
1384 
1385 out:
1386 	mutex_unlock(&hr_qp->mutex);
1387 
1388 	return ret;
1389 }
1390 
1391 void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
1392 		       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1393 {
1394 	if (unlikely(send_cq == NULL && recv_cq == NULL)) {
1395 		__acquire(&send_cq->lock);
1396 		__acquire(&recv_cq->lock);
1397 	} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
1398 		spin_lock(&send_cq->lock);
1399 		__acquire(&recv_cq->lock);
1400 	} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
1401 		spin_lock(&recv_cq->lock);
1402 		__acquire(&send_cq->lock);
1403 	} else if (send_cq == recv_cq) {
1404 		spin_lock(&send_cq->lock);
1405 		__acquire(&recv_cq->lock);
1406 	} else if (send_cq->cqn < recv_cq->cqn) {
1407 		spin_lock(&send_cq->lock);
1408 		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
1409 	} else {
1410 		spin_lock(&recv_cq->lock);
1411 		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
1412 	}
1413 }
1414 
1415 void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
1416 			 struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
1417 			 __releases(&recv_cq->lock)
1418 {
1419 	if (unlikely(send_cq == NULL && recv_cq == NULL)) {
1420 		__release(&recv_cq->lock);
1421 		__release(&send_cq->lock);
1422 	} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
1423 		__release(&recv_cq->lock);
1424 		spin_unlock(&send_cq->lock);
1425 	} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
1426 		__release(&send_cq->lock);
1427 		spin_unlock(&recv_cq->lock);
1428 	} else if (send_cq == recv_cq) {
1429 		__release(&recv_cq->lock);
1430 		spin_unlock(&send_cq->lock);
1431 	} else if (send_cq->cqn < recv_cq->cqn) {
1432 		spin_unlock(&recv_cq->lock);
1433 		spin_unlock(&send_cq->lock);
1434 	} else {
1435 		spin_unlock(&send_cq->lock);
1436 		spin_unlock(&recv_cq->lock);
1437 	}
1438 }
1439 
1440 static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset)
1441 {
1442 	return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
1443 }
1444 
1445 void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
1446 {
1447 	return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
1448 }
1449 
1450 void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
1451 {
1452 	return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
1453 }
1454 
1455 void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n)
1456 {
1457 	return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
1458 }
1459 
1460 bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
1461 			  struct ib_cq *ib_cq)
1462 {
1463 	struct hns_roce_cq *hr_cq;
1464 	u32 cur;
1465 
1466 	cur = hr_wq->head - hr_wq->tail;
1467 	if (likely(cur + nreq < hr_wq->wqe_cnt))
1468 		return false;
1469 
1470 	hr_cq = to_hr_cq(ib_cq);
1471 	spin_lock(&hr_cq->lock);
1472 	cur = hr_wq->head - hr_wq->tail;
1473 	spin_unlock(&hr_cq->lock);
1474 
1475 	return cur + nreq >= hr_wq->wqe_cnt;
1476 }
1477 
1478 int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
1479 {
1480 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
1481 	unsigned int reserved_from_bot;
1482 	unsigned int i;
1483 
1484 	qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps,
1485 					sizeof(u32), GFP_KERNEL);
1486 	if (!qp_table->idx_table.spare_idx)
1487 		return -ENOMEM;
1488 
1489 	mutex_init(&qp_table->scc_mutex);
1490 	mutex_init(&qp_table->bank_mutex);
1491 	xa_init(&hr_dev->qp_table_xa);
1492 
1493 	reserved_from_bot = hr_dev->caps.reserved_qps;
1494 
1495 	for (i = 0; i < reserved_from_bot; i++) {
1496 		hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++;
1497 		hr_dev->qp_table.bank[get_qp_bankid(i)].min++;
1498 	}
1499 
1500 	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
1501 		ida_init(&hr_dev->qp_table.bank[i].ida);
1502 		hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps /
1503 					       HNS_ROCE_QP_BANK_NUM - 1;
1504 		hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
1505 	}
1506 
1507 	return 0;
1508 }
1509 
1510 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
1511 {
1512 	int i;
1513 
1514 	for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
1515 		ida_destroy(&hr_dev->qp_table.bank[i].ida);
1516 	kfree(hr_dev->qp_table.idx_table.spare_idx);
1517 }
1518