1 /*
2  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *	- Redistributions of source code must retain the above
16  *	  copyright notice, this list of conditions and the following
17  *	  disclaimer.
18  *
19  *	- Redistributions in binary form must reproduce the above
20  *	  copyright notice, this list of conditions and the following
21  *	  disclaimer in the documentation and/or other materials
22  *	  provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/dma-mapping.h>
35 #include <net/addrconf.h>
36 #include "rxe.h"
37 #include "rxe_loc.h"
38 #include "rxe_queue.h"
39 #include "rxe_hw_counters.h"
40 
41 static int rxe_query_device(struct ib_device *dev,
42 			    struct ib_device_attr *attr,
43 			    struct ib_udata *uhw)
44 {
45 	struct rxe_dev *rxe = to_rdev(dev);
46 
47 	if (uhw->inlen || uhw->outlen)
48 		return -EINVAL;
49 
50 	*attr = rxe->attr;
51 	return 0;
52 }
53 
54 static int rxe_query_port(struct ib_device *dev,
55 			  u8 port_num, struct ib_port_attr *attr)
56 {
57 	struct rxe_dev *rxe = to_rdev(dev);
58 	struct rxe_port *port;
59 	int rc;
60 
61 	port = &rxe->port;
62 
63 	/* *attr being zeroed by the caller, avoid zeroing it here */
64 	*attr = port->attr;
65 
66 	mutex_lock(&rxe->usdev_lock);
67 	rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
68 			      &attr->active_width);
69 
70 	if (attr->state == IB_PORT_ACTIVE)
71 		attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
72 	else if (dev_get_flags(rxe->ndev) & IFF_UP)
73 		attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
74 	else
75 		attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
76 
77 	mutex_unlock(&rxe->usdev_lock);
78 
79 	return rc;
80 }
81 
82 static struct net_device *rxe_get_netdev(struct ib_device *device,
83 					 u8 port_num)
84 {
85 	struct rxe_dev *rxe = to_rdev(device);
86 
87 	if (rxe->ndev) {
88 		dev_hold(rxe->ndev);
89 		return rxe->ndev;
90 	}
91 
92 	return NULL;
93 }
94 
95 static int rxe_query_pkey(struct ib_device *device,
96 			  u8 port_num, u16 index, u16 *pkey)
97 {
98 	struct rxe_dev *rxe = to_rdev(device);
99 	struct rxe_port *port;
100 
101 	port = &rxe->port;
102 
103 	if (unlikely(index >= port->attr.pkey_tbl_len)) {
104 		dev_warn(device->dev.parent, "invalid index = %d\n",
105 			 index);
106 		goto err1;
107 	}
108 
109 	*pkey = port->pkey_tbl[index];
110 	return 0;
111 
112 err1:
113 	return -EINVAL;
114 }
115 
116 static int rxe_modify_device(struct ib_device *dev,
117 			     int mask, struct ib_device_modify *attr)
118 {
119 	struct rxe_dev *rxe = to_rdev(dev);
120 
121 	if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
122 		rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
123 
124 	if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
125 		memcpy(rxe->ib_dev.node_desc,
126 		       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
127 	}
128 
129 	return 0;
130 }
131 
132 static int rxe_modify_port(struct ib_device *dev,
133 			   u8 port_num, int mask, struct ib_port_modify *attr)
134 {
135 	struct rxe_dev *rxe = to_rdev(dev);
136 	struct rxe_port *port;
137 
138 	port = &rxe->port;
139 
140 	port->attr.port_cap_flags |= attr->set_port_cap_mask;
141 	port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
142 
143 	if (mask & IB_PORT_RESET_QKEY_CNTR)
144 		port->attr.qkey_viol_cntr = 0;
145 
146 	return 0;
147 }
148 
149 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
150 					       u8 port_num)
151 {
152 	struct rxe_dev *rxe = to_rdev(dev);
153 
154 	return rxe_link_layer(rxe, port_num);
155 }
156 
157 static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
158 					      struct ib_udata *udata)
159 {
160 	struct rxe_dev *rxe = to_rdev(dev);
161 	struct rxe_ucontext *uc;
162 
163 	uc = rxe_alloc(&rxe->uc_pool);
164 	return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
165 }
166 
167 static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
168 {
169 	struct rxe_ucontext *uc = to_ruc(ibuc);
170 
171 	rxe_drop_ref(uc);
172 	return 0;
173 }
174 
175 static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
176 			      struct ib_port_immutable *immutable)
177 {
178 	int err;
179 	struct ib_port_attr attr;
180 
181 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
182 
183 	err = ib_query_port(dev, port_num, &attr);
184 	if (err)
185 		return err;
186 
187 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
188 	immutable->gid_tbl_len = attr.gid_tbl_len;
189 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
190 
191 	return 0;
192 }
193 
194 static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
195 				  struct ib_ucontext *context,
196 				  struct ib_udata *udata)
197 {
198 	struct rxe_dev *rxe = to_rdev(dev);
199 	struct rxe_pd *pd;
200 
201 	pd = rxe_alloc(&rxe->pd_pool);
202 	return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
203 }
204 
205 static int rxe_dealloc_pd(struct ib_pd *ibpd)
206 {
207 	struct rxe_pd *pd = to_rpd(ibpd);
208 
209 	rxe_drop_ref(pd);
210 	return 0;
211 }
212 
213 static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
214 			struct rxe_av *av)
215 {
216 	rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
217 	rxe_av_fill_ip_info(av, attr);
218 }
219 
220 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
221 				   struct rdma_ah_attr *attr,
222 				   u32 flags,
223 				   struct ib_udata *udata)
224 
225 {
226 	int err;
227 	struct rxe_dev *rxe = to_rdev(ibpd->device);
228 	struct rxe_pd *pd = to_rpd(ibpd);
229 	struct rxe_ah *ah;
230 
231 	err = rxe_av_chk_attr(rxe, attr);
232 	if (err)
233 		return ERR_PTR(err);
234 
235 	ah = rxe_alloc(&rxe->ah_pool);
236 	if (!ah)
237 		return ERR_PTR(-ENOMEM);
238 
239 	rxe_add_ref(pd);
240 	ah->pd = pd;
241 
242 	rxe_init_av(rxe, attr, &ah->av);
243 	return &ah->ibah;
244 }
245 
246 static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
247 {
248 	int err;
249 	struct rxe_dev *rxe = to_rdev(ibah->device);
250 	struct rxe_ah *ah = to_rah(ibah);
251 
252 	err = rxe_av_chk_attr(rxe, attr);
253 	if (err)
254 		return err;
255 
256 	rxe_init_av(rxe, attr, &ah->av);
257 	return 0;
258 }
259 
260 static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
261 {
262 	struct rxe_ah *ah = to_rah(ibah);
263 
264 	memset(attr, 0, sizeof(*attr));
265 	attr->type = ibah->type;
266 	rxe_av_to_attr(&ah->av, attr);
267 	return 0;
268 }
269 
270 static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
271 {
272 	struct rxe_ah *ah = to_rah(ibah);
273 
274 	rxe_drop_ref(ah->pd);
275 	rxe_drop_ref(ah);
276 	return 0;
277 }
278 
279 static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
280 {
281 	int err;
282 	int i;
283 	u32 length;
284 	struct rxe_recv_wqe *recv_wqe;
285 	int num_sge = ibwr->num_sge;
286 
287 	if (unlikely(queue_full(rq->queue))) {
288 		err = -ENOMEM;
289 		goto err1;
290 	}
291 
292 	if (unlikely(num_sge > rq->max_sge)) {
293 		err = -EINVAL;
294 		goto err1;
295 	}
296 
297 	length = 0;
298 	for (i = 0; i < num_sge; i++)
299 		length += ibwr->sg_list[i].length;
300 
301 	recv_wqe = producer_addr(rq->queue);
302 	recv_wqe->wr_id = ibwr->wr_id;
303 	recv_wqe->num_sge = num_sge;
304 
305 	memcpy(recv_wqe->dma.sge, ibwr->sg_list,
306 	       num_sge * sizeof(struct ib_sge));
307 
308 	recv_wqe->dma.length		= length;
309 	recv_wqe->dma.resid		= length;
310 	recv_wqe->dma.num_sge		= num_sge;
311 	recv_wqe->dma.cur_sge		= 0;
312 	recv_wqe->dma.sge_offset	= 0;
313 
314 	/* make sure all changes to the work queue are written before we
315 	 * update the producer pointer
316 	 */
317 	smp_wmb();
318 
319 	advance_producer(rq->queue);
320 	return 0;
321 
322 err1:
323 	return err;
324 }
325 
326 static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
327 				     struct ib_srq_init_attr *init,
328 				     struct ib_udata *udata)
329 {
330 	int err;
331 	struct rxe_dev *rxe = to_rdev(ibpd->device);
332 	struct rxe_pd *pd = to_rpd(ibpd);
333 	struct rxe_srq *srq;
334 	struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
335 	struct rxe_create_srq_resp __user *uresp = NULL;
336 
337 	if (udata) {
338 		if (udata->outlen < sizeof(*uresp))
339 			return ERR_PTR(-EINVAL);
340 		uresp = udata->outbuf;
341 	}
342 
343 	err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
344 	if (err)
345 		goto err1;
346 
347 	srq = rxe_alloc(&rxe->srq_pool);
348 	if (!srq) {
349 		err = -ENOMEM;
350 		goto err1;
351 	}
352 
353 	rxe_add_index(srq);
354 	rxe_add_ref(pd);
355 	srq->pd = pd;
356 
357 	err = rxe_srq_from_init(rxe, srq, init, context, uresp);
358 	if (err)
359 		goto err2;
360 
361 	return &srq->ibsrq;
362 
363 err2:
364 	rxe_drop_ref(pd);
365 	rxe_drop_index(srq);
366 	rxe_drop_ref(srq);
367 err1:
368 	return ERR_PTR(err);
369 }
370 
371 static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
372 			  enum ib_srq_attr_mask mask,
373 			  struct ib_udata *udata)
374 {
375 	int err;
376 	struct rxe_srq *srq = to_rsrq(ibsrq);
377 	struct rxe_dev *rxe = to_rdev(ibsrq->device);
378 	struct rxe_modify_srq_cmd ucmd = {};
379 
380 	if (udata) {
381 		if (udata->inlen < sizeof(ucmd))
382 			return -EINVAL;
383 
384 		err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
385 		if (err)
386 			return err;
387 	}
388 
389 	err = rxe_srq_chk_attr(rxe, srq, attr, mask);
390 	if (err)
391 		goto err1;
392 
393 	err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd);
394 	if (err)
395 		goto err1;
396 
397 	return 0;
398 
399 err1:
400 	return err;
401 }
402 
403 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
404 {
405 	struct rxe_srq *srq = to_rsrq(ibsrq);
406 
407 	if (srq->error)
408 		return -EINVAL;
409 
410 	attr->max_wr = srq->rq.queue->buf->index_mask;
411 	attr->max_sge = srq->rq.max_sge;
412 	attr->srq_limit = srq->limit;
413 	return 0;
414 }
415 
416 static int rxe_destroy_srq(struct ib_srq *ibsrq)
417 {
418 	struct rxe_srq *srq = to_rsrq(ibsrq);
419 
420 	if (srq->rq.queue)
421 		rxe_queue_cleanup(srq->rq.queue);
422 
423 	rxe_drop_ref(srq->pd);
424 	rxe_drop_index(srq);
425 	rxe_drop_ref(srq);
426 
427 	return 0;
428 }
429 
430 static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
431 			     const struct ib_recv_wr **bad_wr)
432 {
433 	int err = 0;
434 	unsigned long flags;
435 	struct rxe_srq *srq = to_rsrq(ibsrq);
436 
437 	spin_lock_irqsave(&srq->rq.producer_lock, flags);
438 
439 	while (wr) {
440 		err = post_one_recv(&srq->rq, wr);
441 		if (unlikely(err))
442 			break;
443 		wr = wr->next;
444 	}
445 
446 	spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
447 
448 	if (err)
449 		*bad_wr = wr;
450 
451 	return err;
452 }
453 
454 static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
455 				   struct ib_qp_init_attr *init,
456 				   struct ib_udata *udata)
457 {
458 	int err;
459 	struct rxe_dev *rxe = to_rdev(ibpd->device);
460 	struct rxe_pd *pd = to_rpd(ibpd);
461 	struct rxe_qp *qp;
462 	struct rxe_create_qp_resp __user *uresp = NULL;
463 
464 	if (udata) {
465 		if (udata->outlen < sizeof(*uresp))
466 			return ERR_PTR(-EINVAL);
467 		uresp = udata->outbuf;
468 	}
469 
470 	err = rxe_qp_chk_init(rxe, init);
471 	if (err)
472 		goto err1;
473 
474 	qp = rxe_alloc(&rxe->qp_pool);
475 	if (!qp) {
476 		err = -ENOMEM;
477 		goto err1;
478 	}
479 
480 	if (udata) {
481 		if (udata->inlen) {
482 			err = -EINVAL;
483 			goto err2;
484 		}
485 		qp->is_user = 1;
486 	}
487 
488 	rxe_add_index(qp);
489 
490 	err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata);
491 	if (err)
492 		goto err3;
493 
494 	return &qp->ibqp;
495 
496 err3:
497 	rxe_drop_index(qp);
498 err2:
499 	rxe_drop_ref(qp);
500 err1:
501 	return ERR_PTR(err);
502 }
503 
504 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
505 			 int mask, struct ib_udata *udata)
506 {
507 	int err;
508 	struct rxe_dev *rxe = to_rdev(ibqp->device);
509 	struct rxe_qp *qp = to_rqp(ibqp);
510 
511 	err = rxe_qp_chk_attr(rxe, qp, attr, mask);
512 	if (err)
513 		goto err1;
514 
515 	err = rxe_qp_from_attr(qp, attr, mask, udata);
516 	if (err)
517 		goto err1;
518 
519 	return 0;
520 
521 err1:
522 	return err;
523 }
524 
525 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
526 			int mask, struct ib_qp_init_attr *init)
527 {
528 	struct rxe_qp *qp = to_rqp(ibqp);
529 
530 	rxe_qp_to_init(qp, init);
531 	rxe_qp_to_attr(qp, attr, mask);
532 
533 	return 0;
534 }
535 
536 static int rxe_destroy_qp(struct ib_qp *ibqp)
537 {
538 	struct rxe_qp *qp = to_rqp(ibqp);
539 
540 	rxe_qp_destroy(qp);
541 	rxe_drop_index(qp);
542 	rxe_drop_ref(qp);
543 	return 0;
544 }
545 
546 static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
547 			    unsigned int mask, unsigned int length)
548 {
549 	int num_sge = ibwr->num_sge;
550 	struct rxe_sq *sq = &qp->sq;
551 
552 	if (unlikely(num_sge > sq->max_sge))
553 		goto err1;
554 
555 	if (unlikely(mask & WR_ATOMIC_MASK)) {
556 		if (length < 8)
557 			goto err1;
558 
559 		if (atomic_wr(ibwr)->remote_addr & 0x7)
560 			goto err1;
561 	}
562 
563 	if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
564 		     (length > sq->max_inline)))
565 		goto err1;
566 
567 	return 0;
568 
569 err1:
570 	return -EINVAL;
571 }
572 
573 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
574 			 const struct ib_send_wr *ibwr)
575 {
576 	wr->wr_id = ibwr->wr_id;
577 	wr->num_sge = ibwr->num_sge;
578 	wr->opcode = ibwr->opcode;
579 	wr->send_flags = ibwr->send_flags;
580 
581 	if (qp_type(qp) == IB_QPT_UD ||
582 	    qp_type(qp) == IB_QPT_SMI ||
583 	    qp_type(qp) == IB_QPT_GSI) {
584 		wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
585 		wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
586 		if (qp_type(qp) == IB_QPT_GSI)
587 			wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
588 		if (wr->opcode == IB_WR_SEND_WITH_IMM)
589 			wr->ex.imm_data = ibwr->ex.imm_data;
590 	} else {
591 		switch (wr->opcode) {
592 		case IB_WR_RDMA_WRITE_WITH_IMM:
593 			wr->ex.imm_data = ibwr->ex.imm_data;
594 			/* fall through */
595 		case IB_WR_RDMA_READ:
596 		case IB_WR_RDMA_WRITE:
597 			wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
598 			wr->wr.rdma.rkey	= rdma_wr(ibwr)->rkey;
599 			break;
600 		case IB_WR_SEND_WITH_IMM:
601 			wr->ex.imm_data = ibwr->ex.imm_data;
602 			break;
603 		case IB_WR_SEND_WITH_INV:
604 			wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
605 			break;
606 		case IB_WR_ATOMIC_CMP_AND_SWP:
607 		case IB_WR_ATOMIC_FETCH_AND_ADD:
608 			wr->wr.atomic.remote_addr =
609 				atomic_wr(ibwr)->remote_addr;
610 			wr->wr.atomic.compare_add =
611 				atomic_wr(ibwr)->compare_add;
612 			wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
613 			wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
614 			break;
615 		case IB_WR_LOCAL_INV:
616 			wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
617 		break;
618 		case IB_WR_REG_MR:
619 			wr->wr.reg.mr = reg_wr(ibwr)->mr;
620 			wr->wr.reg.key = reg_wr(ibwr)->key;
621 			wr->wr.reg.access = reg_wr(ibwr)->access;
622 		break;
623 		default:
624 			break;
625 		}
626 	}
627 }
628 
629 static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
630 			 unsigned int mask, unsigned int length,
631 			 struct rxe_send_wqe *wqe)
632 {
633 	int num_sge = ibwr->num_sge;
634 	struct ib_sge *sge;
635 	int i;
636 	u8 *p;
637 
638 	init_send_wr(qp, &wqe->wr, ibwr);
639 
640 	if (qp_type(qp) == IB_QPT_UD ||
641 	    qp_type(qp) == IB_QPT_SMI ||
642 	    qp_type(qp) == IB_QPT_GSI)
643 		memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av));
644 
645 	if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) {
646 		p = wqe->dma.inline_data;
647 
648 		sge = ibwr->sg_list;
649 		for (i = 0; i < num_sge; i++, sge++) {
650 			memcpy(p, (void *)(uintptr_t)sge->addr,
651 					sge->length);
652 
653 			p += sge->length;
654 		}
655 	} else if (mask & WR_REG_MASK) {
656 		wqe->mask = mask;
657 		wqe->state = wqe_state_posted;
658 		return 0;
659 	} else
660 		memcpy(wqe->dma.sge, ibwr->sg_list,
661 		       num_sge * sizeof(struct ib_sge));
662 
663 	wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
664 		mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
665 	wqe->mask		= mask;
666 	wqe->dma.length		= length;
667 	wqe->dma.resid		= length;
668 	wqe->dma.num_sge	= num_sge;
669 	wqe->dma.cur_sge	= 0;
670 	wqe->dma.sge_offset	= 0;
671 	wqe->state		= wqe_state_posted;
672 	wqe->ssn		= atomic_add_return(1, &qp->ssn);
673 
674 	return 0;
675 }
676 
677 static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
678 			 unsigned int mask, u32 length)
679 {
680 	int err;
681 	struct rxe_sq *sq = &qp->sq;
682 	struct rxe_send_wqe *send_wqe;
683 	unsigned long flags;
684 
685 	err = validate_send_wr(qp, ibwr, mask, length);
686 	if (err)
687 		return err;
688 
689 	spin_lock_irqsave(&qp->sq.sq_lock, flags);
690 
691 	if (unlikely(queue_full(sq->queue))) {
692 		err = -ENOMEM;
693 		goto err1;
694 	}
695 
696 	send_wqe = producer_addr(sq->queue);
697 
698 	err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
699 	if (unlikely(err))
700 		goto err1;
701 
702 	/*
703 	 * make sure all changes to the work queue are
704 	 * written before we update the producer pointer
705 	 */
706 	smp_wmb();
707 
708 	advance_producer(sq->queue);
709 	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
710 
711 	return 0;
712 
713 err1:
714 	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
715 	return err;
716 }
717 
718 static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
719 				const struct ib_send_wr **bad_wr)
720 {
721 	int err = 0;
722 	unsigned int mask;
723 	unsigned int length = 0;
724 	int i;
725 
726 	while (wr) {
727 		mask = wr_opcode_mask(wr->opcode, qp);
728 		if (unlikely(!mask)) {
729 			err = -EINVAL;
730 			*bad_wr = wr;
731 			break;
732 		}
733 
734 		if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
735 			     !(mask & WR_INLINE_MASK))) {
736 			err = -EINVAL;
737 			*bad_wr = wr;
738 			break;
739 		}
740 
741 		length = 0;
742 		for (i = 0; i < wr->num_sge; i++)
743 			length += wr->sg_list[i].length;
744 
745 		err = post_one_send(qp, wr, mask, length);
746 
747 		if (err) {
748 			*bad_wr = wr;
749 			break;
750 		}
751 		wr = wr->next;
752 	}
753 
754 	rxe_run_task(&qp->req.task, 1);
755 	if (unlikely(qp->req.state == QP_STATE_ERROR))
756 		rxe_run_task(&qp->comp.task, 1);
757 
758 	return err;
759 }
760 
761 static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
762 			 const struct ib_send_wr **bad_wr)
763 {
764 	struct rxe_qp *qp = to_rqp(ibqp);
765 
766 	if (unlikely(!qp->valid)) {
767 		*bad_wr = wr;
768 		return -EINVAL;
769 	}
770 
771 	if (unlikely(qp->req.state < QP_STATE_READY)) {
772 		*bad_wr = wr;
773 		return -EINVAL;
774 	}
775 
776 	if (qp->is_user) {
777 		/* Utilize process context to do protocol processing */
778 		rxe_run_task(&qp->req.task, 0);
779 		return 0;
780 	} else
781 		return rxe_post_send_kernel(qp, wr, bad_wr);
782 }
783 
784 static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
785 			 const struct ib_recv_wr **bad_wr)
786 {
787 	int err = 0;
788 	struct rxe_qp *qp = to_rqp(ibqp);
789 	struct rxe_rq *rq = &qp->rq;
790 	unsigned long flags;
791 
792 	if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
793 		*bad_wr = wr;
794 		err = -EINVAL;
795 		goto err1;
796 	}
797 
798 	if (unlikely(qp->srq)) {
799 		*bad_wr = wr;
800 		err = -EINVAL;
801 		goto err1;
802 	}
803 
804 	spin_lock_irqsave(&rq->producer_lock, flags);
805 
806 	while (wr) {
807 		err = post_one_recv(rq, wr);
808 		if (unlikely(err)) {
809 			*bad_wr = wr;
810 			break;
811 		}
812 		wr = wr->next;
813 	}
814 
815 	spin_unlock_irqrestore(&rq->producer_lock, flags);
816 
817 	if (qp->resp.state == QP_STATE_ERROR)
818 		rxe_run_task(&qp->resp.task, 1);
819 
820 err1:
821 	return err;
822 }
823 
824 static struct ib_cq *rxe_create_cq(struct ib_device *dev,
825 				   const struct ib_cq_init_attr *attr,
826 				   struct ib_ucontext *context,
827 				   struct ib_udata *udata)
828 {
829 	int err;
830 	struct rxe_dev *rxe = to_rdev(dev);
831 	struct rxe_cq *cq;
832 	struct rxe_create_cq_resp __user *uresp = NULL;
833 
834 	if (udata) {
835 		if (udata->outlen < sizeof(*uresp))
836 			return ERR_PTR(-EINVAL);
837 		uresp = udata->outbuf;
838 	}
839 
840 	if (attr->flags)
841 		return ERR_PTR(-EINVAL);
842 
843 	err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
844 	if (err)
845 		goto err1;
846 
847 	cq = rxe_alloc(&rxe->cq_pool);
848 	if (!cq) {
849 		err = -ENOMEM;
850 		goto err1;
851 	}
852 
853 	err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
854 			       context, uresp);
855 	if (err)
856 		goto err2;
857 
858 	return &cq->ibcq;
859 
860 err2:
861 	rxe_drop_ref(cq);
862 err1:
863 	return ERR_PTR(err);
864 }
865 
866 static int rxe_destroy_cq(struct ib_cq *ibcq)
867 {
868 	struct rxe_cq *cq = to_rcq(ibcq);
869 
870 	rxe_cq_disable(cq);
871 
872 	rxe_drop_ref(cq);
873 	return 0;
874 }
875 
876 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
877 {
878 	int err;
879 	struct rxe_cq *cq = to_rcq(ibcq);
880 	struct rxe_dev *rxe = to_rdev(ibcq->device);
881 	struct rxe_resize_cq_resp __user *uresp = NULL;
882 
883 	if (udata) {
884 		if (udata->outlen < sizeof(*uresp))
885 			return -EINVAL;
886 		uresp = udata->outbuf;
887 	}
888 
889 	err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
890 	if (err)
891 		goto err1;
892 
893 	err = rxe_cq_resize_queue(cq, cqe, uresp);
894 	if (err)
895 		goto err1;
896 
897 	return 0;
898 
899 err1:
900 	return err;
901 }
902 
903 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
904 {
905 	int i;
906 	struct rxe_cq *cq = to_rcq(ibcq);
907 	struct rxe_cqe *cqe;
908 	unsigned long flags;
909 
910 	spin_lock_irqsave(&cq->cq_lock, flags);
911 	for (i = 0; i < num_entries; i++) {
912 		cqe = queue_head(cq->queue);
913 		if (!cqe)
914 			break;
915 
916 		memcpy(wc++, &cqe->ibwc, sizeof(*wc));
917 		advance_consumer(cq->queue);
918 	}
919 	spin_unlock_irqrestore(&cq->cq_lock, flags);
920 
921 	return i;
922 }
923 
924 static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
925 {
926 	struct rxe_cq *cq = to_rcq(ibcq);
927 	int count = queue_count(cq->queue);
928 
929 	return (count > wc_cnt) ? wc_cnt : count;
930 }
931 
932 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
933 {
934 	struct rxe_cq *cq = to_rcq(ibcq);
935 	unsigned long irq_flags;
936 	int ret = 0;
937 
938 	spin_lock_irqsave(&cq->cq_lock, irq_flags);
939 	if (cq->notify != IB_CQ_NEXT_COMP)
940 		cq->notify = flags & IB_CQ_SOLICITED_MASK;
941 
942 	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
943 		ret = 1;
944 
945 	spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
946 
947 	return ret;
948 }
949 
950 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
951 {
952 	struct rxe_dev *rxe = to_rdev(ibpd->device);
953 	struct rxe_pd *pd = to_rpd(ibpd);
954 	struct rxe_mem *mr;
955 	int err;
956 
957 	mr = rxe_alloc(&rxe->mr_pool);
958 	if (!mr) {
959 		err = -ENOMEM;
960 		goto err1;
961 	}
962 
963 	rxe_add_index(mr);
964 
965 	rxe_add_ref(pd);
966 
967 	err = rxe_mem_init_dma(pd, access, mr);
968 	if (err)
969 		goto err2;
970 
971 	return &mr->ibmr;
972 
973 err2:
974 	rxe_drop_ref(pd);
975 	rxe_drop_index(mr);
976 	rxe_drop_ref(mr);
977 err1:
978 	return ERR_PTR(err);
979 }
980 
981 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
982 				     u64 start,
983 				     u64 length,
984 				     u64 iova,
985 				     int access, struct ib_udata *udata)
986 {
987 	int err;
988 	struct rxe_dev *rxe = to_rdev(ibpd->device);
989 	struct rxe_pd *pd = to_rpd(ibpd);
990 	struct rxe_mem *mr;
991 
992 	mr = rxe_alloc(&rxe->mr_pool);
993 	if (!mr) {
994 		err = -ENOMEM;
995 		goto err2;
996 	}
997 
998 	rxe_add_index(mr);
999 
1000 	rxe_add_ref(pd);
1001 
1002 	err = rxe_mem_init_user(pd, start, length, iova,
1003 				access, udata, mr);
1004 	if (err)
1005 		goto err3;
1006 
1007 	return &mr->ibmr;
1008 
1009 err3:
1010 	rxe_drop_ref(pd);
1011 	rxe_drop_index(mr);
1012 	rxe_drop_ref(mr);
1013 err2:
1014 	return ERR_PTR(err);
1015 }
1016 
1017 static int rxe_dereg_mr(struct ib_mr *ibmr)
1018 {
1019 	struct rxe_mem *mr = to_rmr(ibmr);
1020 
1021 	mr->state = RXE_MEM_STATE_ZOMBIE;
1022 	rxe_drop_ref(mr->pd);
1023 	rxe_drop_index(mr);
1024 	rxe_drop_ref(mr);
1025 	return 0;
1026 }
1027 
1028 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
1029 				  enum ib_mr_type mr_type,
1030 				  u32 max_num_sg)
1031 {
1032 	struct rxe_dev *rxe = to_rdev(ibpd->device);
1033 	struct rxe_pd *pd = to_rpd(ibpd);
1034 	struct rxe_mem *mr;
1035 	int err;
1036 
1037 	if (mr_type != IB_MR_TYPE_MEM_REG)
1038 		return ERR_PTR(-EINVAL);
1039 
1040 	mr = rxe_alloc(&rxe->mr_pool);
1041 	if (!mr) {
1042 		err = -ENOMEM;
1043 		goto err1;
1044 	}
1045 
1046 	rxe_add_index(mr);
1047 
1048 	rxe_add_ref(pd);
1049 
1050 	err = rxe_mem_init_fast(pd, max_num_sg, mr);
1051 	if (err)
1052 		goto err2;
1053 
1054 	return &mr->ibmr;
1055 
1056 err2:
1057 	rxe_drop_ref(pd);
1058 	rxe_drop_index(mr);
1059 	rxe_drop_ref(mr);
1060 err1:
1061 	return ERR_PTR(err);
1062 }
1063 
1064 static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
1065 {
1066 	struct rxe_mem *mr = to_rmr(ibmr);
1067 	struct rxe_map *map;
1068 	struct rxe_phys_buf *buf;
1069 
1070 	if (unlikely(mr->nbuf == mr->num_buf))
1071 		return -ENOMEM;
1072 
1073 	map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
1074 	buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
1075 
1076 	buf->addr = addr;
1077 	buf->size = ibmr->page_size;
1078 	mr->nbuf++;
1079 
1080 	return 0;
1081 }
1082 
1083 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1084 			 int sg_nents, unsigned int *sg_offset)
1085 {
1086 	struct rxe_mem *mr = to_rmr(ibmr);
1087 	int n;
1088 
1089 	mr->nbuf = 0;
1090 
1091 	n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1092 
1093 	mr->va = ibmr->iova;
1094 	mr->iova = ibmr->iova;
1095 	mr->length = ibmr->length;
1096 	mr->page_shift = ilog2(ibmr->page_size);
1097 	mr->page_mask = ibmr->page_size - 1;
1098 	mr->offset = mr->iova & mr->page_mask;
1099 
1100 	return n;
1101 }
1102 
1103 static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1104 {
1105 	int err;
1106 	struct rxe_dev *rxe = to_rdev(ibqp->device);
1107 	struct rxe_qp *qp = to_rqp(ibqp);
1108 	struct rxe_mc_grp *grp;
1109 
1110 	/* takes a ref on grp if successful */
1111 	err = rxe_mcast_get_grp(rxe, mgid, &grp);
1112 	if (err)
1113 		return err;
1114 
1115 	err = rxe_mcast_add_grp_elem(rxe, qp, grp);
1116 
1117 	rxe_drop_ref(grp);
1118 	return err;
1119 }
1120 
1121 static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
1122 {
1123 	struct rxe_dev *rxe = to_rdev(ibqp->device);
1124 	struct rxe_qp *qp = to_rqp(ibqp);
1125 
1126 	return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
1127 }
1128 
1129 static ssize_t parent_show(struct device *device,
1130 			   struct device_attribute *attr, char *buf)
1131 {
1132 	struct rxe_dev *rxe = container_of(device, struct rxe_dev,
1133 					   ib_dev.dev);
1134 
1135 	return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
1136 }
1137 
1138 static DEVICE_ATTR_RO(parent);
1139 
1140 static struct attribute *rxe_dev_attributes[] = {
1141 	&dev_attr_parent.attr,
1142 	NULL
1143 };
1144 
1145 static const struct attribute_group rxe_attr_group = {
1146 	.attrs = rxe_dev_attributes,
1147 };
1148 
1149 static const struct ib_device_ops rxe_dev_ops = {
1150 	.alloc_hw_stats = rxe_ib_alloc_hw_stats,
1151 	.alloc_mr = rxe_alloc_mr,
1152 	.alloc_pd = rxe_alloc_pd,
1153 	.alloc_ucontext = rxe_alloc_ucontext,
1154 	.attach_mcast = rxe_attach_mcast,
1155 	.create_ah = rxe_create_ah,
1156 	.create_cq = rxe_create_cq,
1157 	.create_qp = rxe_create_qp,
1158 	.create_srq = rxe_create_srq,
1159 	.dealloc_pd = rxe_dealloc_pd,
1160 	.dealloc_ucontext = rxe_dealloc_ucontext,
1161 	.dereg_mr = rxe_dereg_mr,
1162 	.destroy_ah = rxe_destroy_ah,
1163 	.destroy_cq = rxe_destroy_cq,
1164 	.destroy_qp = rxe_destroy_qp,
1165 	.destroy_srq = rxe_destroy_srq,
1166 	.detach_mcast = rxe_detach_mcast,
1167 	.get_dma_mr = rxe_get_dma_mr,
1168 	.get_hw_stats = rxe_ib_get_hw_stats,
1169 	.get_link_layer = rxe_get_link_layer,
1170 	.get_netdev = rxe_get_netdev,
1171 	.get_port_immutable = rxe_port_immutable,
1172 	.map_mr_sg = rxe_map_mr_sg,
1173 	.mmap = rxe_mmap,
1174 	.modify_ah = rxe_modify_ah,
1175 	.modify_device = rxe_modify_device,
1176 	.modify_port = rxe_modify_port,
1177 	.modify_qp = rxe_modify_qp,
1178 	.modify_srq = rxe_modify_srq,
1179 	.peek_cq = rxe_peek_cq,
1180 	.poll_cq = rxe_poll_cq,
1181 	.post_recv = rxe_post_recv,
1182 	.post_send = rxe_post_send,
1183 	.post_srq_recv = rxe_post_srq_recv,
1184 	.query_ah = rxe_query_ah,
1185 	.query_device = rxe_query_device,
1186 	.query_pkey = rxe_query_pkey,
1187 	.query_port = rxe_query_port,
1188 	.query_qp = rxe_query_qp,
1189 	.query_srq = rxe_query_srq,
1190 	.reg_user_mr = rxe_reg_user_mr,
1191 	.req_notify_cq = rxe_req_notify_cq,
1192 	.resize_cq = rxe_resize_cq,
1193 };
1194 
1195 int rxe_register_device(struct rxe_dev *rxe)
1196 {
1197 	int err;
1198 	struct ib_device *dev = &rxe->ib_dev;
1199 	struct crypto_shash *tfm;
1200 
1201 	strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1202 
1203 	dev->owner = THIS_MODULE;
1204 	dev->node_type = RDMA_NODE_IB_CA;
1205 	dev->phys_port_cnt = 1;
1206 	dev->num_comp_vectors = num_possible_cpus();
1207 	dev->dev.parent = rxe_dma_device(rxe);
1208 	dev->local_dma_lkey = 0;
1209 	addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1210 			    rxe->ndev->dev_addr);
1211 	dev->dev.dma_ops = &dma_virt_ops;
1212 	dma_coerce_mask_and_coherent(&dev->dev,
1213 				     dma_get_required_mask(&dev->dev));
1214 
1215 	dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
1216 	dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
1217 	    | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
1218 	    | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE)
1219 	    | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT)
1220 	    | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD)
1221 	    | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD)
1222 	    | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ)
1223 	    | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ)
1224 	    | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ)
1225 	    | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ)
1226 	    | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV)
1227 	    | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP)
1228 	    | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP)
1229 	    | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP)
1230 	    | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP)
1231 	    | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND)
1232 	    | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV)
1233 	    | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ)
1234 	    | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ)
1235 	    | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ)
1236 	    | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ)
1237 	    | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ)
1238 	    | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)
1239 	    | BIT_ULL(IB_USER_VERBS_CMD_REG_MR)
1240 	    | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR)
1241 	    | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH)
1242 	    | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH)
1243 	    | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH)
1244 	    | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH)
1245 	    | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST)
1246 	    | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
1247 	    ;
1248 
1249 	ib_set_device_ops(dev, &rxe_dev_ops);
1250 
1251 	tfm = crypto_alloc_shash("crc32", 0, 0);
1252 	if (IS_ERR(tfm)) {
1253 		pr_err("failed to allocate crc algorithm err:%ld\n",
1254 		       PTR_ERR(tfm));
1255 		return PTR_ERR(tfm);
1256 	}
1257 	rxe->tfm = tfm;
1258 
1259 	rdma_set_device_sysfs_group(dev, &rxe_attr_group);
1260 	dev->driver_id = RDMA_DRIVER_RXE;
1261 	err = ib_register_device(dev, "rxe%d", NULL);
1262 	if (err) {
1263 		pr_warn("%s failed with error %d\n", __func__, err);
1264 		goto err1;
1265 	}
1266 
1267 	return 0;
1268 
1269 err1:
1270 	crypto_free_shash(rxe->tfm);
1271 
1272 	return err;
1273 }
1274 
1275 void rxe_unregister_device(struct rxe_dev *rxe)
1276 {
1277 	struct ib_device *dev = &rxe->ib_dev;
1278 
1279 	ib_unregister_device(dev);
1280 }
1281