xref: /openbmc/linux/drivers/infiniband/hw/mlx5/gsi.c (revision 0edbfea5)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include "mlx5_ib.h"
34 
35 struct mlx5_ib_gsi_wr {
36 	struct ib_cqe cqe;
37 	struct ib_wc wc;
38 	int send_flags;
39 	bool completed:1;
40 };
41 
42 struct mlx5_ib_gsi_qp {
43 	struct ib_qp ibqp;
44 	struct ib_qp *rx_qp;
45 	u8 port_num;
46 	struct ib_qp_cap cap;
47 	enum ib_sig_type sq_sig_type;
48 	/* Serialize qp state modifications */
49 	struct mutex mutex;
50 	struct ib_cq *cq;
51 	struct mlx5_ib_gsi_wr *outstanding_wrs;
52 	u32 outstanding_pi, outstanding_ci;
53 	int num_qps;
54 	/* Protects access to the tx_qps. Post send operations synchronize
55 	 * with tx_qp creation in setup_qp(). Also protects the
56 	 * outstanding_wrs array and indices.
57 	 */
58 	spinlock_t lock;
59 	struct ib_qp **tx_qps;
60 };
61 
62 static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
63 {
64 	return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
65 }
66 
67 static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
68 {
69 	return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
70 }
71 
72 static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
73 {
74 	return ++index % gsi->cap.max_send_wr;
75 }
76 
77 #define for_each_outstanding_wr(gsi, index) \
78 	for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
79 	     index = next_outstanding(gsi, index))
80 
81 /* Call with gsi->lock locked */
82 static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
83 {
84 	struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
85 	struct mlx5_ib_gsi_wr *wr;
86 	u32 index;
87 
88 	for_each_outstanding_wr(gsi, index) {
89 		wr = &gsi->outstanding_wrs[index];
90 
91 		if (!wr->completed)
92 			break;
93 
94 		if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
95 		    wr->send_flags & IB_SEND_SIGNALED)
96 			WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
97 
98 		wr->completed = false;
99 	}
100 
101 	gsi->outstanding_ci = index;
102 }
103 
104 static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
105 {
106 	struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
107 	struct mlx5_ib_gsi_wr *wr =
108 		container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
109 	u64 wr_id;
110 	unsigned long flags;
111 
112 	spin_lock_irqsave(&gsi->lock, flags);
113 	wr->completed = true;
114 	wr_id = wr->wc.wr_id;
115 	wr->wc = *wc;
116 	wr->wc.wr_id = wr_id;
117 	wr->wc.qp = &gsi->ibqp;
118 
119 	generate_completions(gsi);
120 	spin_unlock_irqrestore(&gsi->lock, flags);
121 }
122 
123 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
124 				    struct ib_qp_init_attr *init_attr)
125 {
126 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
127 	struct mlx5_ib_gsi_qp *gsi;
128 	struct ib_qp_init_attr hw_init_attr = *init_attr;
129 	const u8 port_num = init_attr->port_num;
130 	const int num_pkeys = pd->device->attrs.max_pkeys;
131 	const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
132 	int ret;
133 
134 	mlx5_ib_dbg(dev, "creating GSI QP\n");
135 
136 	if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
137 		mlx5_ib_warn(dev,
138 			     "invalid port number %d during GSI QP creation\n",
139 			     port_num);
140 		return ERR_PTR(-EINVAL);
141 	}
142 
143 	gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
144 	if (!gsi)
145 		return ERR_PTR(-ENOMEM);
146 
147 	gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
148 	if (!gsi->tx_qps) {
149 		ret = -ENOMEM;
150 		goto err_free;
151 	}
152 
153 	gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
154 				       sizeof(*gsi->outstanding_wrs),
155 				       GFP_KERNEL);
156 	if (!gsi->outstanding_wrs) {
157 		ret = -ENOMEM;
158 		goto err_free_tx;
159 	}
160 
161 	mutex_init(&gsi->mutex);
162 
163 	mutex_lock(&dev->devr.mutex);
164 
165 	if (dev->devr.ports[port_num - 1].gsi) {
166 		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
167 			     port_num);
168 		ret = -EBUSY;
169 		goto err_free_wrs;
170 	}
171 	gsi->num_qps = num_qps;
172 	spin_lock_init(&gsi->lock);
173 
174 	gsi->cap = init_attr->cap;
175 	gsi->sq_sig_type = init_attr->sq_sig_type;
176 	gsi->ibqp.qp_num = 1;
177 	gsi->port_num = port_num;
178 
179 	gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
180 			      IB_POLL_SOFTIRQ);
181 	if (IS_ERR(gsi->cq)) {
182 		mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
183 			     PTR_ERR(gsi->cq));
184 		ret = PTR_ERR(gsi->cq);
185 		goto err_free_wrs;
186 	}
187 
188 	hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
189 	hw_init_attr.send_cq = gsi->cq;
190 	if (num_qps) {
191 		hw_init_attr.cap.max_send_wr = 0;
192 		hw_init_attr.cap.max_send_sge = 0;
193 		hw_init_attr.cap.max_inline_data = 0;
194 	}
195 	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
196 	if (IS_ERR(gsi->rx_qp)) {
197 		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
198 			     PTR_ERR(gsi->rx_qp));
199 		ret = PTR_ERR(gsi->rx_qp);
200 		goto err_destroy_cq;
201 	}
202 
203 	dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
204 
205 	mutex_unlock(&dev->devr.mutex);
206 
207 	return &gsi->ibqp;
208 
209 err_destroy_cq:
210 	ib_free_cq(gsi->cq);
211 err_free_wrs:
212 	mutex_unlock(&dev->devr.mutex);
213 	kfree(gsi->outstanding_wrs);
214 err_free_tx:
215 	kfree(gsi->tx_qps);
216 err_free:
217 	kfree(gsi);
218 	return ERR_PTR(ret);
219 }
220 
221 int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
222 {
223 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
224 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
225 	const int port_num = gsi->port_num;
226 	int qp_index;
227 	int ret;
228 
229 	mlx5_ib_dbg(dev, "destroying GSI QP\n");
230 
231 	mutex_lock(&dev->devr.mutex);
232 	ret = ib_destroy_qp(gsi->rx_qp);
233 	if (ret) {
234 		mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
235 			     ret);
236 		mutex_unlock(&dev->devr.mutex);
237 		return ret;
238 	}
239 	dev->devr.ports[port_num - 1].gsi = NULL;
240 	mutex_unlock(&dev->devr.mutex);
241 	gsi->rx_qp = NULL;
242 
243 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
244 		if (!gsi->tx_qps[qp_index])
245 			continue;
246 		WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
247 		gsi->tx_qps[qp_index] = NULL;
248 	}
249 
250 	ib_free_cq(gsi->cq);
251 
252 	kfree(gsi->outstanding_wrs);
253 	kfree(gsi->tx_qps);
254 	kfree(gsi);
255 
256 	return 0;
257 }
258 
259 static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
260 {
261 	struct ib_pd *pd = gsi->rx_qp->pd;
262 	struct ib_qp_init_attr init_attr = {
263 		.event_handler = gsi->rx_qp->event_handler,
264 		.qp_context = gsi->rx_qp->qp_context,
265 		.send_cq = gsi->cq,
266 		.recv_cq = gsi->rx_qp->recv_cq,
267 		.cap = {
268 			.max_send_wr = gsi->cap.max_send_wr,
269 			.max_send_sge = gsi->cap.max_send_sge,
270 			.max_inline_data = gsi->cap.max_inline_data,
271 		},
272 		.sq_sig_type = gsi->sq_sig_type,
273 		.qp_type = IB_QPT_UD,
274 		.create_flags = mlx5_ib_create_qp_sqpn_qp1(),
275 	};
276 
277 	return ib_create_qp(pd, &init_attr);
278 }
279 
280 static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
281 			 u16 qp_index)
282 {
283 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
284 	struct ib_qp_attr attr;
285 	int mask;
286 	int ret;
287 
288 	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
289 	attr.qp_state = IB_QPS_INIT;
290 	attr.pkey_index = qp_index;
291 	attr.qkey = IB_QP1_QKEY;
292 	attr.port_num = gsi->port_num;
293 	ret = ib_modify_qp(qp, &attr, mask);
294 	if (ret) {
295 		mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
296 			    qp->qp_num, ret);
297 		return ret;
298 	}
299 
300 	attr.qp_state = IB_QPS_RTR;
301 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
302 	if (ret) {
303 		mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
304 			    qp->qp_num, ret);
305 		return ret;
306 	}
307 
308 	attr.qp_state = IB_QPS_RTS;
309 	attr.sq_psn = 0;
310 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
311 	if (ret) {
312 		mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
313 			    qp->qp_num, ret);
314 		return ret;
315 	}
316 
317 	return 0;
318 }
319 
320 static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
321 {
322 	struct ib_device *device = gsi->rx_qp->device;
323 	struct mlx5_ib_dev *dev = to_mdev(device);
324 	struct ib_qp *qp;
325 	unsigned long flags;
326 	u16 pkey;
327 	int ret;
328 
329 	ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
330 	if (ret) {
331 		mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
332 			     gsi->port_num, qp_index);
333 		return;
334 	}
335 
336 	if (!pkey) {
337 		mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d.  Skipping.\n",
338 			    gsi->port_num, qp_index);
339 		return;
340 	}
341 
342 	spin_lock_irqsave(&gsi->lock, flags);
343 	qp = gsi->tx_qps[qp_index];
344 	spin_unlock_irqrestore(&gsi->lock, flags);
345 	if (qp) {
346 		mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
347 			    gsi->port_num, qp_index);
348 		return;
349 	}
350 
351 	qp = create_gsi_ud_qp(gsi);
352 	if (IS_ERR(qp)) {
353 		mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
354 			     PTR_ERR(qp));
355 		return;
356 	}
357 
358 	ret = modify_to_rts(gsi, qp, qp_index);
359 	if (ret)
360 		goto err_destroy_qp;
361 
362 	spin_lock_irqsave(&gsi->lock, flags);
363 	WARN_ON_ONCE(gsi->tx_qps[qp_index]);
364 	gsi->tx_qps[qp_index] = qp;
365 	spin_unlock_irqrestore(&gsi->lock, flags);
366 
367 	return;
368 
369 err_destroy_qp:
370 	WARN_ON_ONCE(qp);
371 }
372 
373 static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
374 {
375 	u16 qp_index;
376 
377 	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
378 		setup_qp(gsi, qp_index);
379 }
380 
381 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
382 			  int attr_mask)
383 {
384 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
385 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
386 	int ret;
387 
388 	mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
389 
390 	mutex_lock(&gsi->mutex);
391 	ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
392 	if (ret) {
393 		mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
394 		goto unlock;
395 	}
396 
397 	if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
398 		setup_qps(gsi);
399 
400 unlock:
401 	mutex_unlock(&gsi->mutex);
402 
403 	return ret;
404 }
405 
406 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
407 			 int qp_attr_mask,
408 			 struct ib_qp_init_attr *qp_init_attr)
409 {
410 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
411 	int ret;
412 
413 	mutex_lock(&gsi->mutex);
414 	ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
415 	qp_init_attr->cap = gsi->cap;
416 	mutex_unlock(&gsi->mutex);
417 
418 	return ret;
419 }
420 
421 /* Call with gsi->lock locked */
422 static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
423 				      struct ib_ud_wr *wr, struct ib_wc *wc)
424 {
425 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
426 	struct mlx5_ib_gsi_wr *gsi_wr;
427 
428 	if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
429 		mlx5_ib_warn(dev, "no available GSI work request.\n");
430 		return -ENOMEM;
431 	}
432 
433 	gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
434 	gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
435 
436 	if (!wc) {
437 		memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
438 		gsi_wr->wc.pkey_index = wr->pkey_index;
439 		gsi_wr->wc.wr_id = wr->wr.wr_id;
440 	} else {
441 		gsi_wr->wc = *wc;
442 		gsi_wr->completed = true;
443 	}
444 
445 	gsi_wr->cqe.done = &handle_single_completion;
446 	wr->wr.wr_cqe = &gsi_wr->cqe;
447 
448 	return 0;
449 }
450 
451 /* Call with gsi->lock locked */
452 static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
453 				    struct ib_ud_wr *wr)
454 {
455 	struct ib_wc wc = {
456 		{ .wr_id = wr->wr.wr_id },
457 		.status = IB_WC_SUCCESS,
458 		.opcode = IB_WC_SEND,
459 		.qp = &gsi->ibqp,
460 	};
461 	int ret;
462 
463 	ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
464 	if (ret)
465 		return ret;
466 
467 	generate_completions(gsi);
468 
469 	return 0;
470 }
471 
472 /* Call with gsi->lock locked */
473 static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
474 {
475 	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
476 	int qp_index = wr->pkey_index;
477 
478 	if (!mlx5_ib_deth_sqpn_cap(dev))
479 		return gsi->rx_qp;
480 
481 	if (qp_index >= gsi->num_qps)
482 		return NULL;
483 
484 	return gsi->tx_qps[qp_index];
485 }
486 
487 int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
488 			  struct ib_send_wr **bad_wr)
489 {
490 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
491 	struct ib_qp *tx_qp;
492 	unsigned long flags;
493 	int ret;
494 
495 	for (; wr; wr = wr->next) {
496 		struct ib_ud_wr cur_wr = *ud_wr(wr);
497 
498 		cur_wr.wr.next = NULL;
499 
500 		spin_lock_irqsave(&gsi->lock, flags);
501 		tx_qp = get_tx_qp(gsi, &cur_wr);
502 		if (!tx_qp) {
503 			ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
504 			if (ret)
505 				goto err;
506 			spin_unlock_irqrestore(&gsi->lock, flags);
507 			continue;
508 		}
509 
510 		ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
511 		if (ret)
512 			goto err;
513 
514 		ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
515 		if (ret) {
516 			/* Undo the effect of adding the outstanding wr */
517 			gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
518 					      gsi->cap.max_send_wr;
519 			goto err;
520 		}
521 		spin_unlock_irqrestore(&gsi->lock, flags);
522 	}
523 
524 	return 0;
525 
526 err:
527 	spin_unlock_irqrestore(&gsi->lock, flags);
528 	*bad_wr = wr;
529 	return ret;
530 }
531 
532 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
533 			  struct ib_recv_wr **bad_wr)
534 {
535 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
536 
537 	return ib_post_recv(gsi->rx_qp, wr, bad_wr);
538 }
539 
540 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
541 {
542 	if (!gsi)
543 		return;
544 
545 	mutex_lock(&gsi->mutex);
546 	setup_qps(gsi);
547 	mutex_unlock(&gsi->mutex);
548 }
549