xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_verbs.c (revision 060f35a317ef09101b128f399dce7ed13d019461)
1  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2  /*
3   * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4   * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5   */
6  
7  #include <linux/dma-mapping.h>
8  #include <net/addrconf.h>
9  #include <rdma/uverbs_ioctl.h>
10  
11  #include "rxe.h"
12  #include "rxe_queue.h"
13  #include "rxe_hw_counters.h"
14  
15  static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr);
16  
17  /* dev */
rxe_query_device(struct ib_device * ibdev,struct ib_device_attr * attr,struct ib_udata * udata)18  static int rxe_query_device(struct ib_device *ibdev,
19  			    struct ib_device_attr *attr,
20  			    struct ib_udata *udata)
21  {
22  	struct rxe_dev *rxe = to_rdev(ibdev);
23  	int err;
24  
25  	if (udata->inlen || udata->outlen) {
26  		rxe_dbg_dev(rxe, "malformed udata\n");
27  		err = -EINVAL;
28  		goto err_out;
29  	}
30  
31  	memcpy(attr, &rxe->attr, sizeof(*attr));
32  
33  	return 0;
34  
35  err_out:
36  	rxe_err_dev(rxe, "returned err = %d\n", err);
37  	return err;
38  }
39  
rxe_query_port(struct ib_device * ibdev,u32 port_num,struct ib_port_attr * attr)40  static int rxe_query_port(struct ib_device *ibdev,
41  			  u32 port_num, struct ib_port_attr *attr)
42  {
43  	struct rxe_dev *rxe = to_rdev(ibdev);
44  	int err, ret;
45  
46  	if (port_num != 1) {
47  		err = -EINVAL;
48  		rxe_dbg_dev(rxe, "bad port_num = %d\n", port_num);
49  		goto err_out;
50  	}
51  
52  	memcpy(attr, &rxe->port.attr, sizeof(*attr));
53  
54  	mutex_lock(&rxe->usdev_lock);
55  	ret = ib_get_eth_speed(ibdev, port_num, &attr->active_speed,
56  			       &attr->active_width);
57  
58  	if (attr->state == IB_PORT_ACTIVE)
59  		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
60  	else if (dev_get_flags(rxe->ndev) & IFF_UP)
61  		attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
62  	else
63  		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
64  
65  	mutex_unlock(&rxe->usdev_lock);
66  
67  	return ret;
68  
69  err_out:
70  	rxe_err_dev(rxe, "returned err = %d\n", err);
71  	return err;
72  }
73  
rxe_query_pkey(struct ib_device * ibdev,u32 port_num,u16 index,u16 * pkey)74  static int rxe_query_pkey(struct ib_device *ibdev,
75  			  u32 port_num, u16 index, u16 *pkey)
76  {
77  	struct rxe_dev *rxe = to_rdev(ibdev);
78  	int err;
79  
80  	if (index != 0) {
81  		err = -EINVAL;
82  		rxe_dbg_dev(rxe, "bad pkey index = %d\n", index);
83  		goto err_out;
84  	}
85  
86  	*pkey = IB_DEFAULT_PKEY_FULL;
87  	return 0;
88  
89  err_out:
90  	rxe_err_dev(rxe, "returned err = %d\n", err);
91  	return err;
92  }
93  
rxe_modify_device(struct ib_device * ibdev,int mask,struct ib_device_modify * attr)94  static int rxe_modify_device(struct ib_device *ibdev,
95  			     int mask, struct ib_device_modify *attr)
96  {
97  	struct rxe_dev *rxe = to_rdev(ibdev);
98  	int err;
99  
100  	if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
101  		     IB_DEVICE_MODIFY_NODE_DESC)) {
102  		err = -EOPNOTSUPP;
103  		rxe_dbg_dev(rxe, "unsupported mask = 0x%x\n", mask);
104  		goto err_out;
105  	}
106  
107  	if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
108  		rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
109  
110  	if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
111  		memcpy(rxe->ib_dev.node_desc,
112  		       attr->node_desc, sizeof(rxe->ib_dev.node_desc));
113  	}
114  
115  	return 0;
116  
117  err_out:
118  	rxe_err_dev(rxe, "returned err = %d\n", err);
119  	return err;
120  }
121  
rxe_modify_port(struct ib_device * ibdev,u32 port_num,int mask,struct ib_port_modify * attr)122  static int rxe_modify_port(struct ib_device *ibdev, u32 port_num,
123  			   int mask, struct ib_port_modify *attr)
124  {
125  	struct rxe_dev *rxe = to_rdev(ibdev);
126  	struct rxe_port *port;
127  	int err;
128  
129  	if (port_num != 1) {
130  		err = -EINVAL;
131  		rxe_dbg_dev(rxe, "bad port_num = %d\n", port_num);
132  		goto err_out;
133  	}
134  
135  	//TODO is shutdown useful
136  	if (mask & ~(IB_PORT_RESET_QKEY_CNTR)) {
137  		err = -EOPNOTSUPP;
138  		rxe_dbg_dev(rxe, "unsupported mask = 0x%x\n", mask);
139  		goto err_out;
140  	}
141  
142  	port = &rxe->port;
143  	port->attr.port_cap_flags |= attr->set_port_cap_mask;
144  	port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
145  
146  	if (mask & IB_PORT_RESET_QKEY_CNTR)
147  		port->attr.qkey_viol_cntr = 0;
148  
149  	return 0;
150  
151  err_out:
152  	rxe_err_dev(rxe, "returned err = %d\n", err);
153  	return err;
154  }
155  
rxe_get_link_layer(struct ib_device * ibdev,u32 port_num)156  static enum rdma_link_layer rxe_get_link_layer(struct ib_device *ibdev,
157  					       u32 port_num)
158  {
159  	struct rxe_dev *rxe = to_rdev(ibdev);
160  	int err;
161  
162  	if (port_num != 1) {
163  		err = -EINVAL;
164  		rxe_dbg_dev(rxe, "bad port_num = %d\n", port_num);
165  		goto err_out;
166  	}
167  
168  	return IB_LINK_LAYER_ETHERNET;
169  
170  err_out:
171  	rxe_err_dev(rxe, "returned err = %d\n", err);
172  	return err;
173  }
174  
rxe_port_immutable(struct ib_device * ibdev,u32 port_num,struct ib_port_immutable * immutable)175  static int rxe_port_immutable(struct ib_device *ibdev, u32 port_num,
176  			      struct ib_port_immutable *immutable)
177  {
178  	struct rxe_dev *rxe = to_rdev(ibdev);
179  	struct ib_port_attr attr = {};
180  	int err;
181  
182  	if (port_num != 1) {
183  		err = -EINVAL;
184  		rxe_dbg_dev(rxe, "bad port_num = %d\n", port_num);
185  		goto err_out;
186  	}
187  
188  	err = ib_query_port(ibdev, port_num, &attr);
189  	if (err)
190  		goto err_out;
191  
192  	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
193  	immutable->pkey_tbl_len = attr.pkey_tbl_len;
194  	immutable->gid_tbl_len = attr.gid_tbl_len;
195  	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
196  
197  	return 0;
198  
199  err_out:
200  	rxe_err_dev(rxe, "returned err = %d\n", err);
201  	return err;
202  }
203  
204  /* uc */
rxe_alloc_ucontext(struct ib_ucontext * ibuc,struct ib_udata * udata)205  static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
206  {
207  	struct rxe_dev *rxe = to_rdev(ibuc->device);
208  	struct rxe_ucontext *uc = to_ruc(ibuc);
209  	int err;
210  
211  	err = rxe_add_to_pool(&rxe->uc_pool, uc);
212  	if (err)
213  		rxe_err_dev(rxe, "unable to create uc\n");
214  
215  	return err;
216  }
217  
rxe_dealloc_ucontext(struct ib_ucontext * ibuc)218  static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
219  {
220  	struct rxe_ucontext *uc = to_ruc(ibuc);
221  	int err;
222  
223  	err = rxe_cleanup(uc);
224  	if (err)
225  		rxe_err_uc(uc, "cleanup failed, err = %d\n", err);
226  }
227  
228  /* pd */
rxe_alloc_pd(struct ib_pd * ibpd,struct ib_udata * udata)229  static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
230  {
231  	struct rxe_dev *rxe = to_rdev(ibpd->device);
232  	struct rxe_pd *pd = to_rpd(ibpd);
233  	int err;
234  
235  	err = rxe_add_to_pool(&rxe->pd_pool, pd);
236  	if (err) {
237  		rxe_dbg_dev(rxe, "unable to alloc pd\n");
238  		goto err_out;
239  	}
240  
241  	return 0;
242  
243  err_out:
244  	rxe_err_dev(rxe, "returned err = %d\n", err);
245  	return err;
246  }
247  
rxe_dealloc_pd(struct ib_pd * ibpd,struct ib_udata * udata)248  static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
249  {
250  	struct rxe_pd *pd = to_rpd(ibpd);
251  	int err;
252  
253  	err = rxe_cleanup(pd);
254  	if (err)
255  		rxe_err_pd(pd, "cleanup failed, err = %d\n", err);
256  
257  	return 0;
258  }
259  
260  /* ah */
rxe_create_ah(struct ib_ah * ibah,struct rdma_ah_init_attr * init_attr,struct ib_udata * udata)261  static int rxe_create_ah(struct ib_ah *ibah,
262  			 struct rdma_ah_init_attr *init_attr,
263  			 struct ib_udata *udata)
264  {
265  	struct rxe_dev *rxe = to_rdev(ibah->device);
266  	struct rxe_ah *ah = to_rah(ibah);
267  	struct rxe_create_ah_resp __user *uresp = NULL;
268  	int err, cleanup_err;
269  
270  	if (udata) {
271  		/* test if new user provider */
272  		if (udata->outlen >= sizeof(*uresp))
273  			uresp = udata->outbuf;
274  		ah->is_user = true;
275  	} else {
276  		ah->is_user = false;
277  	}
278  
279  	err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
280  			init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
281  	if (err) {
282  		rxe_dbg_dev(rxe, "unable to create ah\n");
283  		goto err_out;
284  	}
285  
286  	/* create index > 0 */
287  	ah->ah_num = ah->elem.index;
288  
289  	err = rxe_ah_chk_attr(ah, init_attr->ah_attr);
290  	if (err) {
291  		rxe_dbg_ah(ah, "bad attr\n");
292  		goto err_cleanup;
293  	}
294  
295  	if (uresp) {
296  		/* only if new user provider */
297  		err = copy_to_user(&uresp->ah_num, &ah->ah_num,
298  					 sizeof(uresp->ah_num));
299  		if (err) {
300  			err = -EFAULT;
301  			rxe_dbg_ah(ah, "unable to copy to user\n");
302  			goto err_cleanup;
303  		}
304  	} else if (ah->is_user) {
305  		/* only if old user provider */
306  		ah->ah_num = 0;
307  	}
308  
309  	rxe_init_av(init_attr->ah_attr, &ah->av);
310  	rxe_finalize(ah);
311  
312  	return 0;
313  
314  err_cleanup:
315  	cleanup_err = rxe_cleanup(ah);
316  	if (cleanup_err)
317  		rxe_err_ah(ah, "cleanup failed, err = %d\n", cleanup_err);
318  err_out:
319  	rxe_err_ah(ah, "returned err = %d\n", err);
320  	return err;
321  }
322  
rxe_modify_ah(struct ib_ah * ibah,struct rdma_ah_attr * attr)323  static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
324  {
325  	struct rxe_ah *ah = to_rah(ibah);
326  	int err;
327  
328  	err = rxe_ah_chk_attr(ah, attr);
329  	if (err) {
330  		rxe_dbg_ah(ah, "bad attr\n");
331  		goto err_out;
332  	}
333  
334  	rxe_init_av(attr, &ah->av);
335  
336  	return 0;
337  
338  err_out:
339  	rxe_err_ah(ah, "returned err = %d\n", err);
340  	return err;
341  }
342  
rxe_query_ah(struct ib_ah * ibah,struct rdma_ah_attr * attr)343  static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
344  {
345  	struct rxe_ah *ah = to_rah(ibah);
346  
347  	memset(attr, 0, sizeof(*attr));
348  	attr->type = ibah->type;
349  	rxe_av_to_attr(&ah->av, attr);
350  
351  	return 0;
352  }
353  
rxe_destroy_ah(struct ib_ah * ibah,u32 flags)354  static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
355  {
356  	struct rxe_ah *ah = to_rah(ibah);
357  	int err;
358  
359  	err = rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
360  	if (err)
361  		rxe_err_ah(ah, "cleanup failed, err = %d\n", err);
362  
363  	return 0;
364  }
365  
366  /* srq */
rxe_create_srq(struct ib_srq * ibsrq,struct ib_srq_init_attr * init,struct ib_udata * udata)367  static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
368  			  struct ib_udata *udata)
369  {
370  	struct rxe_dev *rxe = to_rdev(ibsrq->device);
371  	struct rxe_pd *pd = to_rpd(ibsrq->pd);
372  	struct rxe_srq *srq = to_rsrq(ibsrq);
373  	struct rxe_create_srq_resp __user *uresp = NULL;
374  	int err, cleanup_err;
375  
376  	if (udata) {
377  		if (udata->outlen < sizeof(*uresp)) {
378  			err = -EINVAL;
379  			rxe_err_dev(rxe, "malformed udata\n");
380  			goto err_out;
381  		}
382  		uresp = udata->outbuf;
383  	}
384  
385  	if (init->srq_type != IB_SRQT_BASIC) {
386  		err = -EOPNOTSUPP;
387  		rxe_dbg_dev(rxe, "srq type = %d, not supported\n",
388  				init->srq_type);
389  		goto err_out;
390  	}
391  
392  	err = rxe_srq_chk_init(rxe, init);
393  	if (err) {
394  		rxe_dbg_dev(rxe, "invalid init attributes\n");
395  		goto err_out;
396  	}
397  
398  	err = rxe_add_to_pool(&rxe->srq_pool, srq);
399  	if (err) {
400  		rxe_dbg_dev(rxe, "unable to create srq, err = %d\n", err);
401  		goto err_out;
402  	}
403  
404  	rxe_get(pd);
405  	srq->pd = pd;
406  
407  	err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
408  	if (err) {
409  		rxe_dbg_srq(srq, "create srq failed, err = %d\n", err);
410  		goto err_cleanup;
411  	}
412  
413  	return 0;
414  
415  err_cleanup:
416  	cleanup_err = rxe_cleanup(srq);
417  	if (cleanup_err)
418  		rxe_err_srq(srq, "cleanup failed, err = %d\n", cleanup_err);
419  err_out:
420  	rxe_err_dev(rxe, "returned err = %d\n", err);
421  	return err;
422  }
423  
rxe_modify_srq(struct ib_srq * ibsrq,struct ib_srq_attr * attr,enum ib_srq_attr_mask mask,struct ib_udata * udata)424  static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
425  			  enum ib_srq_attr_mask mask,
426  			  struct ib_udata *udata)
427  {
428  	struct rxe_srq *srq = to_rsrq(ibsrq);
429  	struct rxe_dev *rxe = to_rdev(ibsrq->device);
430  	struct rxe_modify_srq_cmd cmd = {};
431  	int err;
432  
433  	if (udata) {
434  		if (udata->inlen < sizeof(cmd)) {
435  			err = -EINVAL;
436  			rxe_dbg_srq(srq, "malformed udata\n");
437  			goto err_out;
438  		}
439  
440  		err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
441  		if (err) {
442  			err = -EFAULT;
443  			rxe_dbg_srq(srq, "unable to read udata\n");
444  			goto err_out;
445  		}
446  	}
447  
448  	err = rxe_srq_chk_attr(rxe, srq, attr, mask);
449  	if (err) {
450  		rxe_dbg_srq(srq, "bad init attributes\n");
451  		goto err_out;
452  	}
453  
454  	err = rxe_srq_from_attr(rxe, srq, attr, mask, &cmd, udata);
455  	if (err) {
456  		rxe_dbg_srq(srq, "bad attr\n");
457  		goto err_out;
458  	}
459  
460  	return 0;
461  
462  err_out:
463  	rxe_err_srq(srq, "returned err = %d\n", err);
464  	return err;
465  }
466  
rxe_query_srq(struct ib_srq * ibsrq,struct ib_srq_attr * attr)467  static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
468  {
469  	struct rxe_srq *srq = to_rsrq(ibsrq);
470  	int err;
471  
472  	if (srq->error) {
473  		err = -EINVAL;
474  		rxe_dbg_srq(srq, "srq in error state\n");
475  		goto err_out;
476  	}
477  
478  	attr->max_wr = srq->rq.queue->buf->index_mask;
479  	attr->max_sge = srq->rq.max_sge;
480  	attr->srq_limit = srq->limit;
481  	return 0;
482  
483  err_out:
484  	rxe_err_srq(srq, "returned err = %d\n", err);
485  	return err;
486  }
487  
rxe_post_srq_recv(struct ib_srq * ibsrq,const struct ib_recv_wr * wr,const struct ib_recv_wr ** bad_wr)488  static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
489  			     const struct ib_recv_wr **bad_wr)
490  {
491  	int err = 0;
492  	struct rxe_srq *srq = to_rsrq(ibsrq);
493  	unsigned long flags;
494  
495  	spin_lock_irqsave(&srq->rq.producer_lock, flags);
496  
497  	while (wr) {
498  		err = post_one_recv(&srq->rq, wr);
499  		if (unlikely(err))
500  			break;
501  		wr = wr->next;
502  	}
503  
504  	spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
505  
506  	if (err) {
507  		*bad_wr = wr;
508  		rxe_err_srq(srq, "returned err = %d\n", err);
509  	}
510  
511  	return err;
512  }
513  
rxe_destroy_srq(struct ib_srq * ibsrq,struct ib_udata * udata)514  static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
515  {
516  	struct rxe_srq *srq = to_rsrq(ibsrq);
517  	int err;
518  
519  	err = rxe_cleanup(srq);
520  	if (err)
521  		rxe_err_srq(srq, "cleanup failed, err = %d\n", err);
522  
523  	return 0;
524  }
525  
526  /* qp */
rxe_create_qp(struct ib_qp * ibqp,struct ib_qp_init_attr * init,struct ib_udata * udata)527  static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
528  			 struct ib_udata *udata)
529  {
530  	struct rxe_dev *rxe = to_rdev(ibqp->device);
531  	struct rxe_pd *pd = to_rpd(ibqp->pd);
532  	struct rxe_qp *qp = to_rqp(ibqp);
533  	struct rxe_create_qp_resp __user *uresp = NULL;
534  	int err, cleanup_err;
535  
536  	if (udata) {
537  		if (udata->inlen) {
538  			err = -EINVAL;
539  			rxe_dbg_dev(rxe, "malformed udata, err = %d\n", err);
540  			goto err_out;
541  		}
542  
543  		if (udata->outlen < sizeof(*uresp)) {
544  			err = -EINVAL;
545  			rxe_dbg_dev(rxe, "malformed udata, err = %d\n", err);
546  			goto err_out;
547  		}
548  
549  		qp->is_user = true;
550  		uresp = udata->outbuf;
551  	} else {
552  		qp->is_user = false;
553  	}
554  
555  	if (init->create_flags) {
556  		err = -EOPNOTSUPP;
557  		rxe_dbg_dev(rxe, "unsupported create_flags, err = %d\n", err);
558  		goto err_out;
559  	}
560  
561  	err = rxe_qp_chk_init(rxe, init);
562  	if (err) {
563  		rxe_dbg_dev(rxe, "bad init attr, err = %d\n", err);
564  		goto err_out;
565  	}
566  
567  	err = rxe_add_to_pool(&rxe->qp_pool, qp);
568  	if (err) {
569  		rxe_dbg_dev(rxe, "unable to create qp, err = %d\n", err);
570  		goto err_out;
571  	}
572  
573  	err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
574  	if (err) {
575  		rxe_dbg_qp(qp, "create qp failed, err = %d\n", err);
576  		goto err_cleanup;
577  	}
578  
579  	rxe_finalize(qp);
580  	return 0;
581  
582  err_cleanup:
583  	cleanup_err = rxe_cleanup(qp);
584  	if (cleanup_err)
585  		rxe_err_qp(qp, "cleanup failed, err = %d\n", cleanup_err);
586  err_out:
587  	rxe_err_dev(rxe, "returned err = %d\n", err);
588  	return err;
589  }
590  
rxe_modify_qp(struct ib_qp * ibqp,struct ib_qp_attr * attr,int mask,struct ib_udata * udata)591  static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
592  			 int mask, struct ib_udata *udata)
593  {
594  	struct rxe_dev *rxe = to_rdev(ibqp->device);
595  	struct rxe_qp *qp = to_rqp(ibqp);
596  	int err;
597  
598  	if (mask & ~IB_QP_ATTR_STANDARD_BITS) {
599  		err = -EOPNOTSUPP;
600  		rxe_dbg_qp(qp, "unsupported mask = 0x%x, err = %d\n",
601  			   mask, err);
602  		goto err_out;
603  	}
604  
605  	err = rxe_qp_chk_attr(rxe, qp, attr, mask);
606  	if (err) {
607  		rxe_dbg_qp(qp, "bad mask/attr, err = %d\n", err);
608  		goto err_out;
609  	}
610  
611  	err = rxe_qp_from_attr(qp, attr, mask, udata);
612  	if (err) {
613  		rxe_dbg_qp(qp, "modify qp failed, err = %d\n", err);
614  		goto err_out;
615  	}
616  
617  	if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
618  		qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
619  						  qp->ibqp.qp_num,
620  						  qp->attr.dest_qp_num);
621  
622  	return 0;
623  
624  err_out:
625  	rxe_err_qp(qp, "returned err = %d\n", err);
626  	return err;
627  }
628  
rxe_query_qp(struct ib_qp * ibqp,struct ib_qp_attr * attr,int mask,struct ib_qp_init_attr * init)629  static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
630  			int mask, struct ib_qp_init_attr *init)
631  {
632  	struct rxe_qp *qp = to_rqp(ibqp);
633  
634  	rxe_qp_to_init(qp, init);
635  	rxe_qp_to_attr(qp, attr, mask);
636  
637  	return 0;
638  }
639  
rxe_destroy_qp(struct ib_qp * ibqp,struct ib_udata * udata)640  static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
641  {
642  	struct rxe_qp *qp = to_rqp(ibqp);
643  	int err;
644  
645  	err = rxe_qp_chk_destroy(qp);
646  	if (err) {
647  		rxe_dbg_qp(qp, "unable to destroy qp, err = %d\n", err);
648  		goto err_out;
649  	}
650  
651  	err = rxe_cleanup(qp);
652  	if (err)
653  		rxe_err_qp(qp, "cleanup failed, err = %d\n", err);
654  
655  	return 0;
656  
657  err_out:
658  	rxe_err_qp(qp, "returned err = %d\n", err);
659  	return err;
660  }
661  
662  /* send wr */
663  
664  /* sanity check incoming send work request */
validate_send_wr(struct rxe_qp * qp,const struct ib_send_wr * ibwr,unsigned int * maskp,unsigned int * lengthp)665  static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
666  			    unsigned int *maskp, unsigned int *lengthp)
667  {
668  	int num_sge = ibwr->num_sge;
669  	struct rxe_sq *sq = &qp->sq;
670  	unsigned int mask = 0;
671  	unsigned long length = 0;
672  	int err = -EINVAL;
673  	int i;
674  
675  	do {
676  		mask = wr_opcode_mask(ibwr->opcode, qp);
677  		if (!mask) {
678  			rxe_err_qp(qp, "bad wr opcode for qp type\n");
679  			break;
680  		}
681  
682  		if (num_sge > sq->max_sge) {
683  			rxe_err_qp(qp, "num_sge > max_sge\n");
684  			break;
685  		}
686  
687  		length = 0;
688  		for (i = 0; i < ibwr->num_sge; i++)
689  			length += ibwr->sg_list[i].length;
690  
691  		if (length > RXE_PORT_MAX_MSG_SZ) {
692  			rxe_err_qp(qp, "message length too long\n");
693  			break;
694  		}
695  
696  		if (mask & WR_ATOMIC_MASK) {
697  			if (length != 8) {
698  				rxe_err_qp(qp, "atomic length != 8\n");
699  				break;
700  			}
701  			if (atomic_wr(ibwr)->remote_addr & 0x7) {
702  				rxe_err_qp(qp, "misaligned atomic address\n");
703  				break;
704  			}
705  		}
706  		if (ibwr->send_flags & IB_SEND_INLINE) {
707  			if (!(mask & WR_INLINE_MASK)) {
708  				rxe_err_qp(qp, "opcode doesn't support inline data\n");
709  				break;
710  			}
711  			if (length > sq->max_inline) {
712  				rxe_err_qp(qp, "inline length too big\n");
713  				break;
714  			}
715  		}
716  
717  		err = 0;
718  	} while (0);
719  
720  	*maskp = mask;
721  	*lengthp = (int)length;
722  
723  	return err;
724  }
725  
init_send_wr(struct rxe_qp * qp,struct rxe_send_wr * wr,const struct ib_send_wr * ibwr)726  static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
727  			 const struct ib_send_wr *ibwr)
728  {
729  	wr->wr_id = ibwr->wr_id;
730  	wr->opcode = ibwr->opcode;
731  	wr->send_flags = ibwr->send_flags;
732  
733  	if (qp_type(qp) == IB_QPT_UD ||
734  	    qp_type(qp) == IB_QPT_GSI) {
735  		struct ib_ah *ibah = ud_wr(ibwr)->ah;
736  
737  		wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
738  		wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
739  		wr->wr.ud.ah_num = to_rah(ibah)->ah_num;
740  		if (qp_type(qp) == IB_QPT_GSI)
741  			wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
742  
743  		switch (wr->opcode) {
744  		case IB_WR_SEND_WITH_IMM:
745  			wr->ex.imm_data = ibwr->ex.imm_data;
746  			break;
747  		case IB_WR_SEND:
748  			break;
749  		default:
750  			rxe_err_qp(qp, "bad wr opcode %d for UD/GSI QP\n",
751  					wr->opcode);
752  			return -EINVAL;
753  		}
754  	} else {
755  		switch (wr->opcode) {
756  		case IB_WR_RDMA_WRITE_WITH_IMM:
757  			wr->ex.imm_data = ibwr->ex.imm_data;
758  			fallthrough;
759  		case IB_WR_RDMA_READ:
760  		case IB_WR_RDMA_WRITE:
761  			wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
762  			wr->wr.rdma.rkey	= rdma_wr(ibwr)->rkey;
763  			break;
764  		case IB_WR_SEND_WITH_IMM:
765  			wr->ex.imm_data = ibwr->ex.imm_data;
766  			break;
767  		case IB_WR_SEND_WITH_INV:
768  			wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
769  			break;
770  		case IB_WR_RDMA_READ_WITH_INV:
771  			wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
772  			wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
773  			wr->wr.rdma.rkey	= rdma_wr(ibwr)->rkey;
774  			break;
775  		case IB_WR_ATOMIC_CMP_AND_SWP:
776  		case IB_WR_ATOMIC_FETCH_AND_ADD:
777  			wr->wr.atomic.remote_addr =
778  				atomic_wr(ibwr)->remote_addr;
779  			wr->wr.atomic.compare_add =
780  				atomic_wr(ibwr)->compare_add;
781  			wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
782  			wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
783  			break;
784  		case IB_WR_LOCAL_INV:
785  			wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
786  			break;
787  		case IB_WR_REG_MR:
788  			wr->wr.reg.mr = reg_wr(ibwr)->mr;
789  			wr->wr.reg.key = reg_wr(ibwr)->key;
790  			wr->wr.reg.access = reg_wr(ibwr)->access;
791  			break;
792  		case IB_WR_SEND:
793  		case IB_WR_BIND_MW:
794  		case IB_WR_FLUSH:
795  		case IB_WR_ATOMIC_WRITE:
796  			break;
797  		default:
798  			rxe_err_qp(qp, "unsupported wr opcode %d\n",
799  					wr->opcode);
800  			return -EINVAL;
801  		}
802  	}
803  
804  	return 0;
805  }
806  
copy_inline_data_to_wqe(struct rxe_send_wqe * wqe,const struct ib_send_wr * ibwr)807  static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
808  				    const struct ib_send_wr *ibwr)
809  {
810  	struct ib_sge *sge = ibwr->sg_list;
811  	u8 *p = wqe->dma.inline_data;
812  	int i;
813  
814  	for (i = 0; i < ibwr->num_sge; i++, sge++) {
815  		memcpy(p, ib_virt_dma_to_ptr(sge->addr), sge->length);
816  		p += sge->length;
817  	}
818  }
819  
init_send_wqe(struct rxe_qp * qp,const struct ib_send_wr * ibwr,unsigned int mask,unsigned int length,struct rxe_send_wqe * wqe)820  static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
821  			 unsigned int mask, unsigned int length,
822  			 struct rxe_send_wqe *wqe)
823  {
824  	int num_sge = ibwr->num_sge;
825  	int err;
826  
827  	err = init_send_wr(qp, &wqe->wr, ibwr);
828  	if (err)
829  		return err;
830  
831  	/* local operation */
832  	if (unlikely(mask & WR_LOCAL_OP_MASK)) {
833  		wqe->mask = mask;
834  		wqe->state = wqe_state_posted;
835  		return 0;
836  	}
837  
838  	if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
839  		copy_inline_data_to_wqe(wqe, ibwr);
840  	else
841  		memcpy(wqe->dma.sge, ibwr->sg_list,
842  		       num_sge * sizeof(struct ib_sge));
843  
844  	wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
845  		mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
846  	wqe->mask		= mask;
847  	wqe->dma.length		= length;
848  	wqe->dma.resid		= length;
849  	wqe->dma.num_sge	= num_sge;
850  	wqe->dma.cur_sge	= 0;
851  	wqe->dma.sge_offset	= 0;
852  	wqe->state		= wqe_state_posted;
853  	wqe->ssn		= atomic_add_return(1, &qp->ssn);
854  
855  	return 0;
856  }
857  
post_one_send(struct rxe_qp * qp,const struct ib_send_wr * ibwr)858  static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr)
859  {
860  	int err;
861  	struct rxe_sq *sq = &qp->sq;
862  	struct rxe_send_wqe *send_wqe;
863  	unsigned int mask;
864  	unsigned int length;
865  	int full;
866  
867  	err = validate_send_wr(qp, ibwr, &mask, &length);
868  	if (err)
869  		return err;
870  
871  	full = queue_full(sq->queue, QUEUE_TYPE_FROM_ULP);
872  	if (unlikely(full)) {
873  		rxe_err_qp(qp, "send queue full\n");
874  		return -ENOMEM;
875  	}
876  
877  	send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_FROM_ULP);
878  	err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
879  	if (!err)
880  		queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP);
881  
882  	return err;
883  }
884  
rxe_post_send_kernel(struct rxe_qp * qp,const struct ib_send_wr * ibwr,const struct ib_send_wr ** bad_wr)885  static int rxe_post_send_kernel(struct rxe_qp *qp,
886  				const struct ib_send_wr *ibwr,
887  				const struct ib_send_wr **bad_wr)
888  {
889  	int err = 0;
890  	unsigned long flags;
891  	int good = 0;
892  
893  	spin_lock_irqsave(&qp->sq.sq_lock, flags);
894  	while (ibwr) {
895  		err = post_one_send(qp, ibwr);
896  		if (err) {
897  			*bad_wr = ibwr;
898  			break;
899  		} else {
900  			good++;
901  		}
902  		ibwr = ibwr->next;
903  	}
904  	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
905  
906  	/* kickoff processing of any posted wqes */
907  	if (good)
908  		rxe_sched_task(&qp->req.task);
909  
910  	spin_lock_irqsave(&qp->state_lock, flags);
911  	if (qp_state(qp) == IB_QPS_ERR)
912  		rxe_sched_task(&qp->comp.task);
913  	spin_unlock_irqrestore(&qp->state_lock, flags);
914  
915  	return err;
916  }
917  
rxe_post_send(struct ib_qp * ibqp,const struct ib_send_wr * wr,const struct ib_send_wr ** bad_wr)918  static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
919  			 const struct ib_send_wr **bad_wr)
920  {
921  	struct rxe_qp *qp = to_rqp(ibqp);
922  	int err;
923  	unsigned long flags;
924  
925  	spin_lock_irqsave(&qp->state_lock, flags);
926  	/* caller has already called destroy_qp */
927  	if (WARN_ON_ONCE(!qp->valid)) {
928  		spin_unlock_irqrestore(&qp->state_lock, flags);
929  		rxe_err_qp(qp, "qp has been destroyed\n");
930  		return -EINVAL;
931  	}
932  
933  	if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
934  		spin_unlock_irqrestore(&qp->state_lock, flags);
935  		*bad_wr = wr;
936  		rxe_err_qp(qp, "qp not ready to send\n");
937  		return -EINVAL;
938  	}
939  	spin_unlock_irqrestore(&qp->state_lock, flags);
940  
941  	if (qp->is_user) {
942  		/* Utilize process context to do protocol processing */
943  		rxe_run_task(&qp->req.task);
944  	} else {
945  		err = rxe_post_send_kernel(qp, wr, bad_wr);
946  		if (err)
947  			return err;
948  	}
949  
950  	return 0;
951  }
952  
953  /* recv wr */
post_one_recv(struct rxe_rq * rq,const struct ib_recv_wr * ibwr)954  static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
955  {
956  	int i;
957  	unsigned long length;
958  	struct rxe_recv_wqe *recv_wqe;
959  	int num_sge = ibwr->num_sge;
960  	int full;
961  	int err;
962  
963  	full = queue_full(rq->queue, QUEUE_TYPE_FROM_ULP);
964  	if (unlikely(full)) {
965  		err = -ENOMEM;
966  		rxe_dbg("queue full\n");
967  		goto err_out;
968  	}
969  
970  	if (unlikely(num_sge > rq->max_sge)) {
971  		err = -EINVAL;
972  		rxe_dbg("bad num_sge > max_sge\n");
973  		goto err_out;
974  	}
975  
976  	length = 0;
977  	for (i = 0; i < num_sge; i++)
978  		length += ibwr->sg_list[i].length;
979  
980  	if (length > RXE_PORT_MAX_MSG_SZ) {
981  		err = -EINVAL;
982  		rxe_dbg("message length too long\n");
983  		goto err_out;
984  	}
985  
986  	recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_FROM_ULP);
987  
988  	recv_wqe->wr_id = ibwr->wr_id;
989  	recv_wqe->dma.length = length;
990  	recv_wqe->dma.resid = length;
991  	recv_wqe->dma.num_sge = num_sge;
992  	recv_wqe->dma.cur_sge = 0;
993  	recv_wqe->dma.sge_offset = 0;
994  	memcpy(recv_wqe->dma.sge, ibwr->sg_list,
995  	       num_sge * sizeof(struct ib_sge));
996  
997  	queue_advance_producer(rq->queue, QUEUE_TYPE_FROM_ULP);
998  
999  	return 0;
1000  
1001  err_out:
1002  	rxe_dbg("returned err = %d\n", err);
1003  	return err;
1004  }
1005  
rxe_post_recv(struct ib_qp * ibqp,const struct ib_recv_wr * wr,const struct ib_recv_wr ** bad_wr)1006  static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1007  			 const struct ib_recv_wr **bad_wr)
1008  {
1009  	int err = 0;
1010  	struct rxe_qp *qp = to_rqp(ibqp);
1011  	struct rxe_rq *rq = &qp->rq;
1012  	unsigned long flags;
1013  
1014  	spin_lock_irqsave(&qp->state_lock, flags);
1015  	/* caller has already called destroy_qp */
1016  	if (WARN_ON_ONCE(!qp->valid)) {
1017  		spin_unlock_irqrestore(&qp->state_lock, flags);
1018  		rxe_err_qp(qp, "qp has been destroyed\n");
1019  		return -EINVAL;
1020  	}
1021  
1022  	/* see C10-97.2.1 */
1023  	if (unlikely((qp_state(qp) < IB_QPS_INIT))) {
1024  		spin_unlock_irqrestore(&qp->state_lock, flags);
1025  		*bad_wr = wr;
1026  		rxe_dbg_qp(qp, "qp not ready to post recv\n");
1027  		return -EINVAL;
1028  	}
1029  	spin_unlock_irqrestore(&qp->state_lock, flags);
1030  
1031  	if (unlikely(qp->srq)) {
1032  		*bad_wr = wr;
1033  		rxe_dbg_qp(qp, "qp has srq, use post_srq_recv instead\n");
1034  		return -EINVAL;
1035  	}
1036  
1037  	spin_lock_irqsave(&rq->producer_lock, flags);
1038  
1039  	while (wr) {
1040  		err = post_one_recv(rq, wr);
1041  		if (unlikely(err)) {
1042  			*bad_wr = wr;
1043  			break;
1044  		}
1045  		wr = wr->next;
1046  	}
1047  
1048  	spin_unlock_irqrestore(&rq->producer_lock, flags);
1049  
1050  	spin_lock_irqsave(&qp->state_lock, flags);
1051  	if (qp_state(qp) == IB_QPS_ERR)
1052  		rxe_sched_task(&qp->resp.task);
1053  	spin_unlock_irqrestore(&qp->state_lock, flags);
1054  
1055  	return err;
1056  }
1057  
1058  /* cq */
rxe_create_cq(struct ib_cq * ibcq,const struct ib_cq_init_attr * attr,struct ib_udata * udata)1059  static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1060  			 struct ib_udata *udata)
1061  {
1062  	struct ib_device *dev = ibcq->device;
1063  	struct rxe_dev *rxe = to_rdev(dev);
1064  	struct rxe_cq *cq = to_rcq(ibcq);
1065  	struct rxe_create_cq_resp __user *uresp = NULL;
1066  	int err, cleanup_err;
1067  
1068  	if (udata) {
1069  		if (udata->outlen < sizeof(*uresp)) {
1070  			err = -EINVAL;
1071  			rxe_dbg_dev(rxe, "malformed udata, err = %d\n", err);
1072  			goto err_out;
1073  		}
1074  		uresp = udata->outbuf;
1075  	}
1076  
1077  	if (attr->flags) {
1078  		err = -EOPNOTSUPP;
1079  		rxe_dbg_dev(rxe, "bad attr->flags, err = %d\n", err);
1080  		goto err_out;
1081  	}
1082  
1083  	err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
1084  	if (err) {
1085  		rxe_dbg_dev(rxe, "bad init attributes, err = %d\n", err);
1086  		goto err_out;
1087  	}
1088  
1089  	err = rxe_add_to_pool(&rxe->cq_pool, cq);
1090  	if (err) {
1091  		rxe_dbg_dev(rxe, "unable to create cq, err = %d\n", err);
1092  		goto err_out;
1093  	}
1094  
1095  	err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
1096  			       uresp);
1097  	if (err) {
1098  		rxe_dbg_cq(cq, "create cq failed, err = %d\n", err);
1099  		goto err_cleanup;
1100  	}
1101  
1102  	return 0;
1103  
1104  err_cleanup:
1105  	cleanup_err = rxe_cleanup(cq);
1106  	if (cleanup_err)
1107  		rxe_err_cq(cq, "cleanup failed, err = %d\n", cleanup_err);
1108  err_out:
1109  	rxe_err_dev(rxe, "returned err = %d\n", err);
1110  	return err;
1111  }
1112  
rxe_resize_cq(struct ib_cq * ibcq,int cqe,struct ib_udata * udata)1113  static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
1114  {
1115  	struct rxe_cq *cq = to_rcq(ibcq);
1116  	struct rxe_dev *rxe = to_rdev(ibcq->device);
1117  	struct rxe_resize_cq_resp __user *uresp = NULL;
1118  	int err;
1119  
1120  	if (udata) {
1121  		if (udata->outlen < sizeof(*uresp)) {
1122  			err = -EINVAL;
1123  			rxe_dbg_cq(cq, "malformed udata\n");
1124  			goto err_out;
1125  		}
1126  		uresp = udata->outbuf;
1127  	}
1128  
1129  	err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
1130  	if (err) {
1131  		rxe_dbg_cq(cq, "bad attr, err = %d\n", err);
1132  		goto err_out;
1133  	}
1134  
1135  	err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
1136  	if (err) {
1137  		rxe_dbg_cq(cq, "resize cq failed, err = %d\n", err);
1138  		goto err_out;
1139  	}
1140  
1141  	return 0;
1142  
1143  err_out:
1144  	rxe_err_cq(cq, "returned err = %d\n", err);
1145  	return err;
1146  }
1147  
rxe_poll_cq(struct ib_cq * ibcq,int num_entries,struct ib_wc * wc)1148  static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
1149  {
1150  	int i;
1151  	struct rxe_cq *cq = to_rcq(ibcq);
1152  	struct rxe_cqe *cqe;
1153  	unsigned long flags;
1154  
1155  	spin_lock_irqsave(&cq->cq_lock, flags);
1156  	for (i = 0; i < num_entries; i++) {
1157  		cqe = queue_head(cq->queue, QUEUE_TYPE_TO_ULP);
1158  		if (!cqe)
1159  			break;	/* queue empty */
1160  
1161  		memcpy(wc++, &cqe->ibwc, sizeof(*wc));
1162  		queue_advance_consumer(cq->queue, QUEUE_TYPE_TO_ULP);
1163  	}
1164  	spin_unlock_irqrestore(&cq->cq_lock, flags);
1165  
1166  	return i;
1167  }
1168  
rxe_peek_cq(struct ib_cq * ibcq,int wc_cnt)1169  static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
1170  {
1171  	struct rxe_cq *cq = to_rcq(ibcq);
1172  	int count;
1173  
1174  	count = queue_count(cq->queue, QUEUE_TYPE_TO_ULP);
1175  
1176  	return (count > wc_cnt) ? wc_cnt : count;
1177  }
1178  
rxe_req_notify_cq(struct ib_cq * ibcq,enum ib_cq_notify_flags flags)1179  static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1180  {
1181  	struct rxe_cq *cq = to_rcq(ibcq);
1182  	int ret = 0;
1183  	int empty;
1184  	unsigned long irq_flags;
1185  
1186  	spin_lock_irqsave(&cq->cq_lock, irq_flags);
1187  	cq->notify |= flags & IB_CQ_SOLICITED_MASK;
1188  	empty = queue_empty(cq->queue, QUEUE_TYPE_TO_ULP);
1189  
1190  	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
1191  		ret = 1;
1192  
1193  	spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
1194  
1195  	return ret;
1196  }
1197  
rxe_destroy_cq(struct ib_cq * ibcq,struct ib_udata * udata)1198  static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1199  {
1200  	struct rxe_cq *cq = to_rcq(ibcq);
1201  	int err;
1202  
1203  	/* See IBA C11-17: The CI shall return an error if this Verb is
1204  	 * invoked while a Work Queue is still associated with the CQ.
1205  	 */
1206  	if (atomic_read(&cq->num_wq)) {
1207  		err = -EINVAL;
1208  		rxe_dbg_cq(cq, "still in use\n");
1209  		goto err_out;
1210  	}
1211  
1212  	err = rxe_cleanup(cq);
1213  	if (err)
1214  		rxe_err_cq(cq, "cleanup failed, err = %d\n", err);
1215  
1216  	return 0;
1217  
1218  err_out:
1219  	rxe_err_cq(cq, "returned err = %d\n", err);
1220  	return err;
1221  }
1222  
1223  /* mr */
rxe_get_dma_mr(struct ib_pd * ibpd,int access)1224  static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
1225  {
1226  	struct rxe_dev *rxe = to_rdev(ibpd->device);
1227  	struct rxe_pd *pd = to_rpd(ibpd);
1228  	struct rxe_mr *mr;
1229  	int err;
1230  
1231  	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1232  	if (!mr)
1233  		return ERR_PTR(-ENOMEM);
1234  
1235  	err = rxe_add_to_pool(&rxe->mr_pool, mr);
1236  	if (err) {
1237  		rxe_dbg_dev(rxe, "unable to create mr\n");
1238  		goto err_free;
1239  	}
1240  
1241  	rxe_get(pd);
1242  	mr->ibmr.pd = ibpd;
1243  	mr->ibmr.device = ibpd->device;
1244  
1245  	rxe_mr_init_dma(access, mr);
1246  	rxe_finalize(mr);
1247  	return &mr->ibmr;
1248  
1249  err_free:
1250  	kfree(mr);
1251  	rxe_err_pd(pd, "returned err = %d\n", err);
1252  	return ERR_PTR(err);
1253  }
1254  
rxe_reg_user_mr(struct ib_pd * ibpd,u64 start,u64 length,u64 iova,int access,struct ib_udata * udata)1255  static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
1256  				     u64 length, u64 iova, int access,
1257  				     struct ib_udata *udata)
1258  {
1259  	struct rxe_dev *rxe = to_rdev(ibpd->device);
1260  	struct rxe_pd *pd = to_rpd(ibpd);
1261  	struct rxe_mr *mr;
1262  	int err, cleanup_err;
1263  
1264  	if (access & ~RXE_ACCESS_SUPPORTED_MR) {
1265  		rxe_err_pd(pd, "access = %#x not supported (%#x)\n", access,
1266  				RXE_ACCESS_SUPPORTED_MR);
1267  		return ERR_PTR(-EOPNOTSUPP);
1268  	}
1269  
1270  	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1271  	if (!mr)
1272  		return ERR_PTR(-ENOMEM);
1273  
1274  	err = rxe_add_to_pool(&rxe->mr_pool, mr);
1275  	if (err) {
1276  		rxe_dbg_pd(pd, "unable to create mr\n");
1277  		goto err_free;
1278  	}
1279  
1280  	rxe_get(pd);
1281  	mr->ibmr.pd = ibpd;
1282  	mr->ibmr.device = ibpd->device;
1283  
1284  	err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
1285  	if (err) {
1286  		rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
1287  		goto err_cleanup;
1288  	}
1289  
1290  	rxe_finalize(mr);
1291  	return &mr->ibmr;
1292  
1293  err_cleanup:
1294  	cleanup_err = rxe_cleanup(mr);
1295  	if (cleanup_err)
1296  		rxe_err_mr(mr, "cleanup failed, err = %d\n", cleanup_err);
1297  err_free:
1298  	kfree(mr);
1299  	rxe_err_pd(pd, "returned err = %d\n", err);
1300  	return ERR_PTR(err);
1301  }
1302  
rxe_rereg_user_mr(struct ib_mr * ibmr,int flags,u64 start,u64 length,u64 iova,int access,struct ib_pd * ibpd,struct ib_udata * udata)1303  static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags,
1304  				       u64 start, u64 length, u64 iova,
1305  				       int access, struct ib_pd *ibpd,
1306  				       struct ib_udata *udata)
1307  {
1308  	struct rxe_mr *mr = to_rmr(ibmr);
1309  	struct rxe_pd *old_pd = to_rpd(ibmr->pd);
1310  	struct rxe_pd *pd = to_rpd(ibpd);
1311  
1312  	/* for now only support the two easy cases:
1313  	 * rereg_pd and rereg_access
1314  	 */
1315  	if (flags & ~RXE_MR_REREG_SUPPORTED) {
1316  		rxe_err_mr(mr, "flags = %#x not supported\n", flags);
1317  		return ERR_PTR(-EOPNOTSUPP);
1318  	}
1319  
1320  	if (flags & IB_MR_REREG_PD) {
1321  		rxe_put(old_pd);
1322  		rxe_get(pd);
1323  		mr->ibmr.pd = ibpd;
1324  	}
1325  
1326  	if (flags & IB_MR_REREG_ACCESS) {
1327  		if (access & ~RXE_ACCESS_SUPPORTED_MR) {
1328  			rxe_err_mr(mr, "access = %#x not supported\n", access);
1329  			return ERR_PTR(-EOPNOTSUPP);
1330  		}
1331  		mr->access = access;
1332  	}
1333  
1334  	return NULL;
1335  }
1336  
rxe_alloc_mr(struct ib_pd * ibpd,enum ib_mr_type mr_type,u32 max_num_sg)1337  static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
1338  				  u32 max_num_sg)
1339  {
1340  	struct rxe_dev *rxe = to_rdev(ibpd->device);
1341  	struct rxe_pd *pd = to_rpd(ibpd);
1342  	struct rxe_mr *mr;
1343  	int err, cleanup_err;
1344  
1345  	if (mr_type != IB_MR_TYPE_MEM_REG) {
1346  		err = -EINVAL;
1347  		rxe_dbg_pd(pd, "mr type %d not supported, err = %d\n",
1348  			   mr_type, err);
1349  		goto err_out;
1350  	}
1351  
1352  	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1353  	if (!mr)
1354  		return ERR_PTR(-ENOMEM);
1355  
1356  	err = rxe_add_to_pool(&rxe->mr_pool, mr);
1357  	if (err)
1358  		goto err_free;
1359  
1360  	rxe_get(pd);
1361  	mr->ibmr.pd = ibpd;
1362  	mr->ibmr.device = ibpd->device;
1363  
1364  	err = rxe_mr_init_fast(max_num_sg, mr);
1365  	if (err) {
1366  		rxe_dbg_mr(mr, "alloc_mr failed, err = %d\n", err);
1367  		goto err_cleanup;
1368  	}
1369  
1370  	rxe_finalize(mr);
1371  	return &mr->ibmr;
1372  
1373  err_cleanup:
1374  	cleanup_err = rxe_cleanup(mr);
1375  	if (cleanup_err)
1376  		rxe_err_mr(mr, "cleanup failed, err = %d\n", err);
1377  err_free:
1378  	kfree(mr);
1379  err_out:
1380  	rxe_err_pd(pd, "returned err = %d\n", err);
1381  	return ERR_PTR(err);
1382  }
1383  
rxe_dereg_mr(struct ib_mr * ibmr,struct ib_udata * udata)1384  static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1385  {
1386  	struct rxe_mr *mr = to_rmr(ibmr);
1387  	int err, cleanup_err;
1388  
1389  	/* See IBA 10.6.7.2.6 */
1390  	if (atomic_read(&mr->num_mw) > 0) {
1391  		err = -EINVAL;
1392  		rxe_dbg_mr(mr, "mr has mw's bound\n");
1393  		goto err_out;
1394  	}
1395  
1396  	cleanup_err = rxe_cleanup(mr);
1397  	if (cleanup_err)
1398  		rxe_err_mr(mr, "cleanup failed, err = %d\n", cleanup_err);
1399  
1400  	kfree_rcu_mightsleep(mr);
1401  	return 0;
1402  
1403  err_out:
1404  	rxe_err_mr(mr, "returned err = %d\n", err);
1405  	return err;
1406  }
1407  
parent_show(struct device * device,struct device_attribute * attr,char * buf)1408  static ssize_t parent_show(struct device *device,
1409  			   struct device_attribute *attr, char *buf)
1410  {
1411  	struct rxe_dev *rxe =
1412  		rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
1413  
1414  	return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
1415  }
1416  
1417  static DEVICE_ATTR_RO(parent);
1418  
1419  static struct attribute *rxe_dev_attributes[] = {
1420  	&dev_attr_parent.attr,
1421  	NULL
1422  };
1423  
1424  static const struct attribute_group rxe_attr_group = {
1425  	.attrs = rxe_dev_attributes,
1426  };
1427  
rxe_enable_driver(struct ib_device * ib_dev)1428  static int rxe_enable_driver(struct ib_device *ib_dev)
1429  {
1430  	struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
1431  
1432  	rxe_set_port_state(rxe);
1433  	dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
1434  	return 0;
1435  }
1436  
1437  static const struct ib_device_ops rxe_dev_ops = {
1438  	.owner = THIS_MODULE,
1439  	.driver_id = RDMA_DRIVER_RXE,
1440  	.uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
1441  
1442  	.alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
1443  	.alloc_mr = rxe_alloc_mr,
1444  	.alloc_mw = rxe_alloc_mw,
1445  	.alloc_pd = rxe_alloc_pd,
1446  	.alloc_ucontext = rxe_alloc_ucontext,
1447  	.attach_mcast = rxe_attach_mcast,
1448  	.create_ah = rxe_create_ah,
1449  	.create_cq = rxe_create_cq,
1450  	.create_qp = rxe_create_qp,
1451  	.create_srq = rxe_create_srq,
1452  	.create_user_ah = rxe_create_ah,
1453  	.dealloc_driver = rxe_dealloc,
1454  	.dealloc_mw = rxe_dealloc_mw,
1455  	.dealloc_pd = rxe_dealloc_pd,
1456  	.dealloc_ucontext = rxe_dealloc_ucontext,
1457  	.dereg_mr = rxe_dereg_mr,
1458  	.destroy_ah = rxe_destroy_ah,
1459  	.destroy_cq = rxe_destroy_cq,
1460  	.destroy_qp = rxe_destroy_qp,
1461  	.destroy_srq = rxe_destroy_srq,
1462  	.detach_mcast = rxe_detach_mcast,
1463  	.device_group = &rxe_attr_group,
1464  	.enable_driver = rxe_enable_driver,
1465  	.get_dma_mr = rxe_get_dma_mr,
1466  	.get_hw_stats = rxe_ib_get_hw_stats,
1467  	.get_link_layer = rxe_get_link_layer,
1468  	.get_port_immutable = rxe_port_immutable,
1469  	.map_mr_sg = rxe_map_mr_sg,
1470  	.mmap = rxe_mmap,
1471  	.modify_ah = rxe_modify_ah,
1472  	.modify_device = rxe_modify_device,
1473  	.modify_port = rxe_modify_port,
1474  	.modify_qp = rxe_modify_qp,
1475  	.modify_srq = rxe_modify_srq,
1476  	.peek_cq = rxe_peek_cq,
1477  	.poll_cq = rxe_poll_cq,
1478  	.post_recv = rxe_post_recv,
1479  	.post_send = rxe_post_send,
1480  	.post_srq_recv = rxe_post_srq_recv,
1481  	.query_ah = rxe_query_ah,
1482  	.query_device = rxe_query_device,
1483  	.query_pkey = rxe_query_pkey,
1484  	.query_port = rxe_query_port,
1485  	.query_qp = rxe_query_qp,
1486  	.query_srq = rxe_query_srq,
1487  	.reg_user_mr = rxe_reg_user_mr,
1488  	.req_notify_cq = rxe_req_notify_cq,
1489  	.rereg_user_mr = rxe_rereg_user_mr,
1490  	.resize_cq = rxe_resize_cq,
1491  
1492  	INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
1493  	INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
1494  	INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
1495  	INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp),
1496  	INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
1497  	INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
1498  	INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
1499  };
1500  
rxe_register_device(struct rxe_dev * rxe,const char * ibdev_name)1501  int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
1502  {
1503  	int err;
1504  	struct ib_device *dev = &rxe->ib_dev;
1505  
1506  	strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1507  
1508  	dev->node_type = RDMA_NODE_IB_CA;
1509  	dev->phys_port_cnt = 1;
1510  	dev->num_comp_vectors = num_possible_cpus();
1511  	dev->local_dma_lkey = 0;
1512  	addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1513  			    rxe->ndev->dev_addr);
1514  
1515  	dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
1516  				BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
1517  
1518  	ib_set_device_ops(dev, &rxe_dev_ops);
1519  	err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
1520  	if (err)
1521  		return err;
1522  
1523  	err = rxe_icrc_init(rxe);
1524  	if (err)
1525  		return err;
1526  
1527  	err = ib_register_device(dev, ibdev_name, NULL);
1528  	if (err)
1529  		rxe_dbg_dev(rxe, "failed with error %d\n", err);
1530  
1531  	/*
1532  	 * Note that rxe may be invalid at this point if another thread
1533  	 * unregistered it.
1534  	 */
1535  	return err;
1536  }
1537