xref: /openbmc/linux/drivers/infiniband/hw/mlx5/gsi.c (revision b8265621)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include "mlx5_ib.h"
34 
35 struct mlx5_ib_gsi_wr {
36 	struct ib_cqe cqe;
37 	struct ib_wc wc;
38 	int send_flags;
39 	bool completed:1;
40 };
41 
42 struct mlx5_ib_gsi_qp {
43 	struct ib_qp ibqp;
44 	struct ib_qp *rx_qp;
45 	u8 port_num;
46 	struct ib_qp_cap cap;
47 	enum ib_sig_type sq_sig_type;
48 	/* Serialize qp state modifications */
49 	struct mutex mutex;
50 	struct ib_cq *cq;
51 	struct mlx5_ib_gsi_wr *outstanding_wrs;
52 	u32 outstanding_pi, outstanding_ci;
53 	int num_qps;
54 	/* Protects access to the tx_qps. Post send operations synchronize
55 	 * with tx_qp creation in setup_qp(). Also protects the
56 	 * outstanding_wrs array and indices.
57 	 */
58 	spinlock_t lock;
59 	struct ib_qp **tx_qps;
60 };
61 
62 static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
63 {
64 	return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
65 }
66 
67 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
68 {
69 	return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
70 }
71 
72 /* Call with gsi->lock locked */
73 static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
74 {
75 	struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
76 	struct mlx5_ib_gsi_wr *wr;
77 	u32 index;
78 
79 	for (index = gsi->outstanding_ci; index != gsi->outstanding_pi;
80 	     index++) {
81 		wr = &gsi->outstanding_wrs[index % gsi->cap.max_send_wr];
82 
83 		if (!wr->completed)
84 			break;
85 
86 		if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
87 		    wr->send_flags & IB_SEND_SIGNALED)
88 			WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
89 
90 		wr->completed = false;
91 	}
92 
93 	gsi->outstanding_ci = index;
94 }
95 
96 static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
97 {
98 	struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
99 	struct mlx5_ib_gsi_wr *wr =
100 		container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
101 	u64 wr_id;
102 	unsigned long flags;
103 
104 	spin_lock_irqsave(&gsi->lock, flags);
105 	wr->completed = true;
106 	wr_id = wr->wc.wr_id;
107 	wr->wc = *wc;
108 	wr->wc.wr_id = wr_id;
109 	wr->wc.qp = &gsi->ibqp;
110 
111 	generate_completions(gsi);
112 	spin_unlock_irqrestore(&gsi->lock, flags);
113 }
114 
115 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
116 				    struct ib_qp_init_attr *init_attr)
117 {
118 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
119 	struct mlx5_ib_gsi_qp *gsi;
120 	struct ib_qp_init_attr hw_init_attr = *init_attr;
121 	const u8 port_num = init_attr->port_num;
122 	int num_qps = 0;
123 	int ret;
124 
125 	if (mlx5_ib_deth_sqpn_cap(dev)) {
126 		if (MLX5_CAP_GEN(dev->mdev,
127 				 port_type) == MLX5_CAP_PORT_TYPE_IB)
128 			num_qps = pd->device->attrs.max_pkeys;
129 		else if (dev->lag_active)
130 			num_qps = MLX5_MAX_PORTS;
131 	}
132 
133 	gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
134 	if (!gsi)
135 		return ERR_PTR(-ENOMEM);
136 
137 	gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
138 	if (!gsi->tx_qps) {
139 		ret = -ENOMEM;
140 		goto err_free;
141 	}
142 
143 	gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
144 				       sizeof(*gsi->outstanding_wrs),
145 				       GFP_KERNEL);
146 	if (!gsi->outstanding_wrs) {
147 		ret = -ENOMEM;
148 		goto err_free_tx;
149 	}
150 
151 	mutex_init(&gsi->mutex);
152 
153 	mutex_lock(&dev->devr.mutex);
154 
155 	if (dev->devr.ports[port_num - 1].gsi) {
156 		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
157 			     port_num);
158 		ret = -EBUSY;
159 		goto err_free_wrs;
160 	}
161 	gsi->num_qps = num_qps;
162 	spin_lock_init(&gsi->lock);
163 
164 	gsi->cap = init_attr->cap;
165 	gsi->sq_sig_type = init_attr->sq_sig_type;
166 	gsi->ibqp.qp_num = 1;
167 	gsi->port_num = port_num;
168 
169 	gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
170 			      IB_POLL_SOFTIRQ);
171 	if (IS_ERR(gsi->cq)) {
172 		mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
173 			     PTR_ERR(gsi->cq));
174 		ret = PTR_ERR(gsi->cq);
175 		goto err_free_wrs;
176 	}
177 
178 	hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
179 	hw_init_attr.send_cq = gsi->cq;
180 	if (num_qps) {
181 		hw_init_attr.cap.max_send_wr = 0;
182 		hw_init_attr.cap.max_send_sge = 0;
183 		hw_init_attr.cap.max_inline_data = 0;
184 	}
185 	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
186 	if (IS_ERR(gsi->rx_qp)) {
187 		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
188 			     PTR_ERR(gsi->rx_qp));
189 		ret = PTR_ERR(gsi->rx_qp);
190 		goto err_destroy_cq;
191 	}
192 
193 	dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
194 
195 	mutex_unlock(&dev->devr.mutex);
196 
197 	return &gsi->ibqp;
198 
199 err_destroy_cq:
200 	ib_free_cq(gsi->cq);
201 err_free_wrs:
202 	mutex_unlock(&dev->devr.mutex);
203 	kfree(gsi->outstanding_wrs);
204 err_free_tx:
205 	kfree(gsi->tx_qps);
206 err_free:
207 	kfree(gsi);
208 	return ERR_PTR(ret);
209 }
210 
211 int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
212 {
213 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
214 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
215 	const int port_num = gsi->port_num;
216 	int qp_index;
217 	int ret;
218 
219 	mlx5_ib_dbg(dev, "destroying GSI QP\n");
220 
221 	mutex_lock(&dev->devr.mutex);
222 	ret = ib_destroy_qp(gsi->rx_qp);
223 	if (ret) {
224 		mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
225 			     ret);
226 		mutex_unlock(&dev->devr.mutex);
227 		return ret;
228 	}
229 	dev->devr.ports[port_num - 1].gsi = NULL;
230 	mutex_unlock(&dev->devr.mutex);
231 	gsi->rx_qp = NULL;
232 
233 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
234 		if (!gsi->tx_qps[qp_index])
235 			continue;
236 		WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
237 		gsi->tx_qps[qp_index] = NULL;
238 	}
239 
240 	ib_free_cq(gsi->cq);
241 
242 	kfree(gsi->outstanding_wrs);
243 	kfree(gsi->tx_qps);
244 	kfree(gsi);
245 
246 	return 0;
247 }
248 
249 static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
250 {
251 	struct ib_pd *pd = gsi->rx_qp->pd;
252 	struct ib_qp_init_attr init_attr = {
253 		.event_handler = gsi->rx_qp->event_handler,
254 		.qp_context = gsi->rx_qp->qp_context,
255 		.send_cq = gsi->cq,
256 		.recv_cq = gsi->rx_qp->recv_cq,
257 		.cap = {
258 			.max_send_wr = gsi->cap.max_send_wr,
259 			.max_send_sge = gsi->cap.max_send_sge,
260 			.max_inline_data = gsi->cap.max_inline_data,
261 		},
262 		.sq_sig_type = gsi->sq_sig_type,
263 		.qp_type = IB_QPT_UD,
264 		.create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
265 	};
266 
267 	return ib_create_qp(pd, &init_attr);
268 }
269 
270 static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
271 			 u16 pkey_index)
272 {
273 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
274 	struct ib_qp_attr attr;
275 	int mask;
276 	int ret;
277 
278 	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
279 	attr.qp_state = IB_QPS_INIT;
280 	attr.pkey_index = pkey_index;
281 	attr.qkey = IB_QP1_QKEY;
282 	attr.port_num = gsi->port_num;
283 	ret = ib_modify_qp(qp, &attr, mask);
284 	if (ret) {
285 		mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
286 			    qp->qp_num, ret);
287 		return ret;
288 	}
289 
290 	attr.qp_state = IB_QPS_RTR;
291 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
292 	if (ret) {
293 		mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
294 			    qp->qp_num, ret);
295 		return ret;
296 	}
297 
298 	attr.qp_state = IB_QPS_RTS;
299 	attr.sq_psn = 0;
300 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
301 	if (ret) {
302 		mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
303 			    qp->qp_num, ret);
304 		return ret;
305 	}
306 
307 	return 0;
308 }
309 
310 static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
311 {
312 	struct ib_device *device = gsi->rx_qp->device;
313 	struct mlx5_ib_dev *dev = to_mdev(device);
314 	int pkey_index = qp_index;
315 	struct mlx5_ib_qp *mqp;
316 	struct ib_qp *qp;
317 	unsigned long flags;
318 	u16 pkey;
319 	int ret;
320 
321 	if (MLX5_CAP_GEN(dev->mdev,  port_type) != MLX5_CAP_PORT_TYPE_IB)
322 		pkey_index = 0;
323 
324 	ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
325 	if (ret) {
326 		mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
327 			     gsi->port_num, qp_index);
328 		return;
329 	}
330 
331 	if (!pkey) {
332 		mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d.  Skipping.\n",
333 			    gsi->port_num, qp_index);
334 		return;
335 	}
336 
337 	spin_lock_irqsave(&gsi->lock, flags);
338 	qp = gsi->tx_qps[qp_index];
339 	spin_unlock_irqrestore(&gsi->lock, flags);
340 	if (qp) {
341 		mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
342 			    gsi->port_num, qp_index);
343 		return;
344 	}
345 
346 	qp = create_gsi_ud_qp(gsi);
347 	if (IS_ERR(qp)) {
348 		mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
349 			     PTR_ERR(qp));
350 		return;
351 	}
352 
353 	mqp = to_mqp(qp);
354 	if (dev->lag_active)
355 		mqp->gsi_lag_port = qp_index + 1;
356 	ret = modify_to_rts(gsi, qp, pkey_index);
357 	if (ret)
358 		goto err_destroy_qp;
359 
360 	spin_lock_irqsave(&gsi->lock, flags);
361 	WARN_ON_ONCE(gsi->tx_qps[qp_index]);
362 	gsi->tx_qps[qp_index] = qp;
363 	spin_unlock_irqrestore(&gsi->lock, flags);
364 
365 	return;
366 
367 err_destroy_qp:
368 	WARN_ON_ONCE(qp);
369 }
370 
371 static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
372 {
373 	u16 qp_index;
374 
375 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
376 		setup_qp(gsi, qp_index);
377 }
378 
379 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
380 			  int attr_mask)
381 {
382 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
383 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
384 	int ret;
385 
386 	mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
387 
388 	mutex_lock(&gsi->mutex);
389 	ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
390 	if (ret) {
391 		mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
392 		goto unlock;
393 	}
394 
395 	if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
396 		setup_qps(gsi);
397 
398 unlock:
399 	mutex_unlock(&gsi->mutex);
400 
401 	return ret;
402 }
403 
404 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
405 			 int qp_attr_mask,
406 			 struct ib_qp_init_attr *qp_init_attr)
407 {
408 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
409 	int ret;
410 
411 	mutex_lock(&gsi->mutex);
412 	ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
413 	qp_init_attr->cap = gsi->cap;
414 	mutex_unlock(&gsi->mutex);
415 
416 	return ret;
417 }
418 
419 /* Call with gsi->lock locked */
420 static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
421 				      struct ib_ud_wr *wr, struct ib_wc *wc)
422 {
423 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
424 	struct mlx5_ib_gsi_wr *gsi_wr;
425 
426 	if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
427 		mlx5_ib_warn(dev, "no available GSI work request.\n");
428 		return -ENOMEM;
429 	}
430 
431 	gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi %
432 				       gsi->cap.max_send_wr];
433 	gsi->outstanding_pi++;
434 
435 	if (!wc) {
436 		memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
437 		gsi_wr->wc.pkey_index = wr->pkey_index;
438 		gsi_wr->wc.wr_id = wr->wr.wr_id;
439 	} else {
440 		gsi_wr->wc = *wc;
441 		gsi_wr->completed = true;
442 	}
443 
444 	gsi_wr->cqe.done = &handle_single_completion;
445 	wr->wr.wr_cqe = &gsi_wr->cqe;
446 
447 	return 0;
448 }
449 
450 /* Call with gsi->lock locked */
451 static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
452 				    struct ib_ud_wr *wr)
453 {
454 	struct ib_wc wc = {
455 		{ .wr_id = wr->wr.wr_id },
456 		.status = IB_WC_SUCCESS,
457 		.opcode = IB_WC_SEND,
458 		.qp = &gsi->ibqp,
459 	};
460 	int ret;
461 
462 	ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
463 	if (ret)
464 		return ret;
465 
466 	generate_completions(gsi);
467 
468 	return 0;
469 }
470 
471 /* Call with gsi->lock locked */
472 static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
473 {
474 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
475 	struct mlx5_ib_ah *ah = to_mah(wr->ah);
476 	int qp_index = wr->pkey_index;
477 
478 	if (!gsi->num_qps)
479 		return gsi->rx_qp;
480 
481 	if (dev->lag_active && ah->xmit_port)
482 		qp_index = ah->xmit_port - 1;
483 
484 	if (qp_index >= gsi->num_qps)
485 		return NULL;
486 
487 	return gsi->tx_qps[qp_index];
488 }
489 
490 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
491 			  const struct ib_send_wr **bad_wr)
492 {
493 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
494 	struct ib_qp *tx_qp;
495 	unsigned long flags;
496 	int ret;
497 
498 	for (; wr; wr = wr->next) {
499 		struct ib_ud_wr cur_wr = *ud_wr(wr);
500 
501 		cur_wr.wr.next = NULL;
502 
503 		spin_lock_irqsave(&gsi->lock, flags);
504 		tx_qp = get_tx_qp(gsi, &cur_wr);
505 		if (!tx_qp) {
506 			ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
507 			if (ret)
508 				goto err;
509 			spin_unlock_irqrestore(&gsi->lock, flags);
510 			continue;
511 		}
512 
513 		ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
514 		if (ret)
515 			goto err;
516 
517 		ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
518 		if (ret) {
519 			/* Undo the effect of adding the outstanding wr */
520 			gsi->outstanding_pi--;
521 			goto err;
522 		}
523 		spin_unlock_irqrestore(&gsi->lock, flags);
524 	}
525 
526 	return 0;
527 
528 err:
529 	spin_unlock_irqrestore(&gsi->lock, flags);
530 	*bad_wr = wr;
531 	return ret;
532 }
533 
534 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
535 			  const struct ib_recv_wr **bad_wr)
536 {
537 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
538 
539 	return ib_post_recv(gsi->rx_qp, wr, bad_wr);
540 }
541 
542 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
543 {
544 	if (!gsi)
545 		return;
546 
547 	mutex_lock(&gsi->mutex);
548 	setup_qps(gsi);
549 	mutex_unlock(&gsi->mutex);
550 }
551