1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
4  */
5 
6 #include <linux/vmalloc.h>
7 
8 #include <rdma/ib_addr.h>
9 #include <rdma/ib_umem.h>
10 #include <rdma/ib_user_verbs.h>
11 #include <rdma/ib_verbs.h>
12 #include <rdma/uverbs_ioctl.h>
13 
14 #include "efa.h"
15 
16 #define EFA_MMAP_FLAG_SHIFT 56
17 #define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
18 #define EFA_MMAP_INVALID U64_MAX
19 
20 enum {
21 	EFA_MMAP_DMA_PAGE = 0,
22 	EFA_MMAP_IO_WC,
23 	EFA_MMAP_IO_NC,
24 };
25 
26 #define EFA_AENQ_ENABLED_GROUPS \
27 	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
28 	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
29 
30 struct efa_mmap_entry {
31 	void  *obj;
32 	u64 address;
33 	u64 length;
34 	u32 mmap_page;
35 	u8 mmap_flag;
36 };
37 
38 static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
39 {
40 	return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
41 	       ((u64)efa->mmap_page << PAGE_SHIFT);
42 }
43 
44 #define EFA_DEFINE_STATS(op) \
45 	op(EFA_TX_BYTES, "tx_bytes") \
46 	op(EFA_TX_PKTS, "tx_pkts") \
47 	op(EFA_RX_BYTES, "rx_bytes") \
48 	op(EFA_RX_PKTS, "rx_pkts") \
49 	op(EFA_RX_DROPS, "rx_drops") \
50 	op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
51 	op(EFA_COMPLETED_CMDS, "completed_cmds") \
52 	op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
53 	op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
54 	op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
55 	op(EFA_CREATE_QP_ERR, "create_qp_err") \
56 	op(EFA_REG_MR_ERR, "reg_mr_err") \
57 	op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
58 	op(EFA_CREATE_AH_ERR, "create_ah_err")
59 
60 #define EFA_STATS_ENUM(ename, name) ename,
61 #define EFA_STATS_STR(ename, name) [ename] = name,
62 
63 enum efa_hw_stats {
64 	EFA_DEFINE_STATS(EFA_STATS_ENUM)
65 };
66 
67 static const char *const efa_stats_names[] = {
68 	EFA_DEFINE_STATS(EFA_STATS_STR)
69 };
70 
71 #define EFA_CHUNK_PAYLOAD_SHIFT       12
72 #define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
73 #define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
74 
75 #define EFA_CHUNK_SHIFT               12
76 #define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
77 #define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
78 
79 #define EFA_PTRS_PER_CHUNK \
80 	((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
81 
82 #define EFA_CHUNK_USED_SIZE \
83 	((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
84 
85 #define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
86 
87 struct pbl_chunk {
88 	dma_addr_t dma_addr;
89 	u64 *buf;
90 	u32 length;
91 };
92 
93 struct pbl_chunk_list {
94 	struct pbl_chunk *chunks;
95 	unsigned int size;
96 };
97 
98 struct pbl_context {
99 	union {
100 		struct {
101 			dma_addr_t dma_addr;
102 		} continuous;
103 		struct {
104 			u32 pbl_buf_size_in_pages;
105 			struct scatterlist *sgl;
106 			int sg_dma_cnt;
107 			struct pbl_chunk_list chunk_list;
108 		} indirect;
109 	} phys;
110 	u64 *pbl_buf;
111 	u32 pbl_buf_size_in_bytes;
112 	u8 physically_continuous;
113 };
114 
115 static inline struct efa_dev *to_edev(struct ib_device *ibdev)
116 {
117 	return container_of(ibdev, struct efa_dev, ibdev);
118 }
119 
120 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
121 {
122 	return container_of(ibucontext, struct efa_ucontext, ibucontext);
123 }
124 
125 static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
126 {
127 	return container_of(ibpd, struct efa_pd, ibpd);
128 }
129 
130 static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
131 {
132 	return container_of(ibmr, struct efa_mr, ibmr);
133 }
134 
135 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
136 {
137 	return container_of(ibqp, struct efa_qp, ibqp);
138 }
139 
140 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
141 {
142 	return container_of(ibcq, struct efa_cq, ibcq);
143 }
144 
145 static inline struct efa_ah *to_eah(struct ib_ah *ibah)
146 {
147 	return container_of(ibah, struct efa_ah, ibah);
148 }
149 
150 #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
151 				 FIELD_SIZEOF(typeof(x), fld) <= (sz))
152 
153 #define is_reserved_cleared(reserved) \
154 	!memchr_inv(reserved, 0, sizeof(reserved))
155 
156 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
157 			       size_t size, enum dma_data_direction dir)
158 {
159 	void *addr;
160 
161 	addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
162 	if (!addr)
163 		return NULL;
164 
165 	*dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
166 	if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
167 		ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
168 		free_pages_exact(addr, size);
169 		return NULL;
170 	}
171 
172 	return addr;
173 }
174 
175 /*
176  * This is only called when the ucontext is destroyed and there can be no
177  * concurrent query via mmap or allocate on the xarray, thus we can be sure no
178  * other thread is using the entry pointer. We also know that all the BAR
179  * pages have either been zap'd or munmaped at this point.  Normal pages are
180  * refcounted and will be freed at the proper time.
181  */
182 static void mmap_entries_remove_free(struct efa_dev *dev,
183 				     struct efa_ucontext *ucontext)
184 {
185 	struct efa_mmap_entry *entry;
186 	unsigned long mmap_page;
187 
188 	xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
189 		xa_erase(&ucontext->mmap_xa, mmap_page);
190 
191 		ibdev_dbg(
192 			&dev->ibdev,
193 			"mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
194 			entry->obj, get_mmap_key(entry), entry->address,
195 			entry->length);
196 		if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
197 			/* DMA mapping is already gone, now free the pages */
198 			free_pages_exact(phys_to_virt(entry->address),
199 					 entry->length);
200 		kfree(entry);
201 	}
202 }
203 
204 static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
205 					     struct efa_ucontext *ucontext,
206 					     u64 key, u64 len)
207 {
208 	struct efa_mmap_entry *entry;
209 	u64 mmap_page;
210 
211 	mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
212 	if (mmap_page > U32_MAX)
213 		return NULL;
214 
215 	entry = xa_load(&ucontext->mmap_xa, mmap_page);
216 	if (!entry || get_mmap_key(entry) != key || entry->length != len)
217 		return NULL;
218 
219 	ibdev_dbg(&dev->ibdev,
220 		  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
221 		  entry->obj, key, entry->address, entry->length);
222 
223 	return entry;
224 }
225 
226 /*
227  * Note this locking scheme cannot support removal of entries, except during
228  * ucontext destruction when the core code guarentees no concurrency.
229  */
230 static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
231 			     void *obj, u64 address, u64 length, u8 mmap_flag)
232 {
233 	struct efa_mmap_entry *entry;
234 	u32 next_mmap_page;
235 	int err;
236 
237 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
238 	if (!entry)
239 		return EFA_MMAP_INVALID;
240 
241 	entry->obj = obj;
242 	entry->address = address;
243 	entry->length = length;
244 	entry->mmap_flag = mmap_flag;
245 
246 	xa_lock(&ucontext->mmap_xa);
247 	if (check_add_overflow(ucontext->mmap_xa_page,
248 			       (u32)(length >> PAGE_SHIFT),
249 			       &next_mmap_page))
250 		goto err_unlock;
251 
252 	entry->mmap_page = ucontext->mmap_xa_page;
253 	ucontext->mmap_xa_page = next_mmap_page;
254 	err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
255 			  GFP_KERNEL);
256 	if (err)
257 		goto err_unlock;
258 
259 	xa_unlock(&ucontext->mmap_xa);
260 
261 	ibdev_dbg(
262 		&dev->ibdev,
263 		"mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
264 		entry->obj, entry->address, entry->length, get_mmap_key(entry));
265 
266 	return get_mmap_key(entry);
267 
268 err_unlock:
269 	xa_unlock(&ucontext->mmap_xa);
270 	kfree(entry);
271 	return EFA_MMAP_INVALID;
272 
273 }
274 
275 int efa_query_device(struct ib_device *ibdev,
276 		     struct ib_device_attr *props,
277 		     struct ib_udata *udata)
278 {
279 	struct efa_com_get_device_attr_result *dev_attr;
280 	struct efa_ibv_ex_query_device_resp resp = {};
281 	struct efa_dev *dev = to_edev(ibdev);
282 	int err;
283 
284 	if (udata && udata->inlen &&
285 	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
286 		ibdev_dbg(ibdev,
287 			  "Incompatible ABI params, udata not cleared\n");
288 		return -EINVAL;
289 	}
290 
291 	dev_attr = &dev->dev_attr;
292 
293 	memset(props, 0, sizeof(*props));
294 	props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
295 	props->page_size_cap = dev_attr->page_size_cap;
296 	props->vendor_id = dev->pdev->vendor;
297 	props->vendor_part_id = dev->pdev->device;
298 	props->hw_ver = dev->pdev->subsystem_device;
299 	props->max_qp = dev_attr->max_qp;
300 	props->max_cq = dev_attr->max_cq;
301 	props->max_pd = dev_attr->max_pd;
302 	props->max_mr = dev_attr->max_mr;
303 	props->max_ah = dev_attr->max_ah;
304 	props->max_cqe = dev_attr->max_cq_depth;
305 	props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
306 				 dev_attr->max_rq_depth);
307 	props->max_send_sge = dev_attr->max_sq_sge;
308 	props->max_recv_sge = dev_attr->max_rq_sge;
309 
310 	if (udata && udata->outlen) {
311 		resp.max_sq_sge = dev_attr->max_sq_sge;
312 		resp.max_rq_sge = dev_attr->max_rq_sge;
313 		resp.max_sq_wr = dev_attr->max_sq_depth;
314 		resp.max_rq_wr = dev_attr->max_rq_depth;
315 
316 		err = ib_copy_to_udata(udata, &resp,
317 				       min(sizeof(resp), udata->outlen));
318 		if (err) {
319 			ibdev_dbg(ibdev,
320 				  "Failed to copy udata for query_device\n");
321 			return err;
322 		}
323 	}
324 
325 	return 0;
326 }
327 
328 int efa_query_port(struct ib_device *ibdev, u8 port,
329 		   struct ib_port_attr *props)
330 {
331 	struct efa_dev *dev = to_edev(ibdev);
332 
333 	props->lmc = 1;
334 
335 	props->state = IB_PORT_ACTIVE;
336 	props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
337 	props->gid_tbl_len = 1;
338 	props->pkey_tbl_len = 1;
339 	props->active_speed = IB_SPEED_EDR;
340 	props->active_width = IB_WIDTH_4X;
341 	props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
342 	props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
343 	props->max_msg_sz = dev->mtu;
344 	props->max_vl_num = 1;
345 
346 	return 0;
347 }
348 
349 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
350 		 int qp_attr_mask,
351 		 struct ib_qp_init_attr *qp_init_attr)
352 {
353 	struct efa_dev *dev = to_edev(ibqp->device);
354 	struct efa_com_query_qp_params params = {};
355 	struct efa_com_query_qp_result result;
356 	struct efa_qp *qp = to_eqp(ibqp);
357 	int err;
358 
359 #define EFA_QUERY_QP_SUPP_MASK \
360 	(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
361 	 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
362 
363 	if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
364 		ibdev_dbg(&dev->ibdev,
365 			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
366 			  qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
367 		return -EOPNOTSUPP;
368 	}
369 
370 	memset(qp_attr, 0, sizeof(*qp_attr));
371 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
372 
373 	params.qp_handle = qp->qp_handle;
374 	err = efa_com_query_qp(&dev->edev, &params, &result);
375 	if (err)
376 		return err;
377 
378 	qp_attr->qp_state = result.qp_state;
379 	qp_attr->qkey = result.qkey;
380 	qp_attr->sq_psn = result.sq_psn;
381 	qp_attr->sq_draining = result.sq_draining;
382 	qp_attr->port_num = 1;
383 
384 	qp_attr->cap.max_send_wr = qp->max_send_wr;
385 	qp_attr->cap.max_recv_wr = qp->max_recv_wr;
386 	qp_attr->cap.max_send_sge = qp->max_send_sge;
387 	qp_attr->cap.max_recv_sge = qp->max_recv_sge;
388 	qp_attr->cap.max_inline_data = qp->max_inline_data;
389 
390 	qp_init_attr->qp_type = ibqp->qp_type;
391 	qp_init_attr->recv_cq = ibqp->recv_cq;
392 	qp_init_attr->send_cq = ibqp->send_cq;
393 	qp_init_attr->qp_context = ibqp->qp_context;
394 	qp_init_attr->cap = qp_attr->cap;
395 
396 	return 0;
397 }
398 
399 int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
400 		  union ib_gid *gid)
401 {
402 	struct efa_dev *dev = to_edev(ibdev);
403 
404 	memcpy(gid->raw, dev->addr, sizeof(dev->addr));
405 
406 	return 0;
407 }
408 
409 int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
410 		   u16 *pkey)
411 {
412 	if (index > 0)
413 		return -EINVAL;
414 
415 	*pkey = 0xffff;
416 	return 0;
417 }
418 
419 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
420 {
421 	struct efa_com_dealloc_pd_params params = {
422 		.pdn = pdn,
423 	};
424 
425 	return efa_com_dealloc_pd(&dev->edev, &params);
426 }
427 
428 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
429 {
430 	struct efa_dev *dev = to_edev(ibpd->device);
431 	struct efa_ibv_alloc_pd_resp resp = {};
432 	struct efa_com_alloc_pd_result result;
433 	struct efa_pd *pd = to_epd(ibpd);
434 	int err;
435 
436 	if (udata->inlen &&
437 	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
438 		ibdev_dbg(&dev->ibdev,
439 			  "Incompatible ABI params, udata not cleared\n");
440 		err = -EINVAL;
441 		goto err_out;
442 	}
443 
444 	err = efa_com_alloc_pd(&dev->edev, &result);
445 	if (err)
446 		goto err_out;
447 
448 	pd->pdn = result.pdn;
449 	resp.pdn = result.pdn;
450 
451 	if (udata->outlen) {
452 		err = ib_copy_to_udata(udata, &resp,
453 				       min(sizeof(resp), udata->outlen));
454 		if (err) {
455 			ibdev_dbg(&dev->ibdev,
456 				  "Failed to copy udata for alloc_pd\n");
457 			goto err_dealloc_pd;
458 		}
459 	}
460 
461 	ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
462 
463 	return 0;
464 
465 err_dealloc_pd:
466 	efa_pd_dealloc(dev, result.pdn);
467 err_out:
468 	atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
469 	return err;
470 }
471 
472 void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
473 {
474 	struct efa_dev *dev = to_edev(ibpd->device);
475 	struct efa_pd *pd = to_epd(ibpd);
476 
477 	ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
478 	efa_pd_dealloc(dev, pd->pdn);
479 }
480 
481 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
482 {
483 	struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
484 
485 	return efa_com_destroy_qp(&dev->edev, &params);
486 }
487 
488 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
489 {
490 	struct efa_dev *dev = to_edev(ibqp->pd->device);
491 	struct efa_qp *qp = to_eqp(ibqp);
492 	int err;
493 
494 	ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
495 	err = efa_destroy_qp_handle(dev, qp->qp_handle);
496 	if (err)
497 		return err;
498 
499 	if (qp->rq_cpu_addr) {
500 		ibdev_dbg(&dev->ibdev,
501 			  "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
502 			  qp->rq_cpu_addr, qp->rq_size,
503 			  &qp->rq_dma_addr);
504 		dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
505 				 DMA_TO_DEVICE);
506 	}
507 
508 	kfree(qp);
509 	return 0;
510 }
511 
512 static int qp_mmap_entries_setup(struct efa_qp *qp,
513 				 struct efa_dev *dev,
514 				 struct efa_ucontext *ucontext,
515 				 struct efa_com_create_qp_params *params,
516 				 struct efa_ibv_create_qp_resp *resp)
517 {
518 	/*
519 	 * Once an entry is inserted it might be mmapped, hence cannot be
520 	 * cleaned up until dealloc_ucontext.
521 	 */
522 	resp->sq_db_mmap_key =
523 		mmap_entry_insert(dev, ucontext, qp,
524 				  dev->db_bar_addr + resp->sq_db_offset,
525 				  PAGE_SIZE, EFA_MMAP_IO_NC);
526 	if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
527 		return -ENOMEM;
528 
529 	resp->sq_db_offset &= ~PAGE_MASK;
530 
531 	resp->llq_desc_mmap_key =
532 		mmap_entry_insert(dev, ucontext, qp,
533 				  dev->mem_bar_addr + resp->llq_desc_offset,
534 				  PAGE_ALIGN(params->sq_ring_size_in_bytes +
535 					     (resp->llq_desc_offset & ~PAGE_MASK)),
536 				  EFA_MMAP_IO_WC);
537 	if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
538 		return -ENOMEM;
539 
540 	resp->llq_desc_offset &= ~PAGE_MASK;
541 
542 	if (qp->rq_size) {
543 		resp->rq_db_mmap_key =
544 			mmap_entry_insert(dev, ucontext, qp,
545 					  dev->db_bar_addr + resp->rq_db_offset,
546 					  PAGE_SIZE, EFA_MMAP_IO_NC);
547 		if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
548 			return -ENOMEM;
549 
550 		resp->rq_db_offset &= ~PAGE_MASK;
551 
552 		resp->rq_mmap_key =
553 			mmap_entry_insert(dev, ucontext, qp,
554 					  virt_to_phys(qp->rq_cpu_addr),
555 					  qp->rq_size, EFA_MMAP_DMA_PAGE);
556 		if (resp->rq_mmap_key == EFA_MMAP_INVALID)
557 			return -ENOMEM;
558 
559 		resp->rq_mmap_size = qp->rq_size;
560 	}
561 
562 	return 0;
563 }
564 
565 static int efa_qp_validate_cap(struct efa_dev *dev,
566 			       struct ib_qp_init_attr *init_attr)
567 {
568 	if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
569 		ibdev_dbg(&dev->ibdev,
570 			  "qp: requested send wr[%u] exceeds the max[%u]\n",
571 			  init_attr->cap.max_send_wr,
572 			  dev->dev_attr.max_sq_depth);
573 		return -EINVAL;
574 	}
575 	if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
576 		ibdev_dbg(&dev->ibdev,
577 			  "qp: requested receive wr[%u] exceeds the max[%u]\n",
578 			  init_attr->cap.max_recv_wr,
579 			  dev->dev_attr.max_rq_depth);
580 		return -EINVAL;
581 	}
582 	if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
583 		ibdev_dbg(&dev->ibdev,
584 			  "qp: requested sge send[%u] exceeds the max[%u]\n",
585 			  init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
586 		return -EINVAL;
587 	}
588 	if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
589 		ibdev_dbg(&dev->ibdev,
590 			  "qp: requested sge recv[%u] exceeds the max[%u]\n",
591 			  init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
592 		return -EINVAL;
593 	}
594 	if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
595 		ibdev_dbg(&dev->ibdev,
596 			  "qp: requested inline data[%u] exceeds the max[%u]\n",
597 			  init_attr->cap.max_inline_data,
598 			  dev->dev_attr.inline_buf_size);
599 		return -EINVAL;
600 	}
601 
602 	return 0;
603 }
604 
605 static int efa_qp_validate_attr(struct efa_dev *dev,
606 				struct ib_qp_init_attr *init_attr)
607 {
608 	if (init_attr->qp_type != IB_QPT_DRIVER &&
609 	    init_attr->qp_type != IB_QPT_UD) {
610 		ibdev_dbg(&dev->ibdev,
611 			  "Unsupported qp type %d\n", init_attr->qp_type);
612 		return -EOPNOTSUPP;
613 	}
614 
615 	if (init_attr->srq) {
616 		ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
617 		return -EOPNOTSUPP;
618 	}
619 
620 	if (init_attr->create_flags) {
621 		ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
622 		return -EOPNOTSUPP;
623 	}
624 
625 	return 0;
626 }
627 
628 struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
629 			    struct ib_qp_init_attr *init_attr,
630 			    struct ib_udata *udata)
631 {
632 	struct efa_com_create_qp_params create_qp_params = {};
633 	struct efa_com_create_qp_result create_qp_resp;
634 	struct efa_dev *dev = to_edev(ibpd->device);
635 	struct efa_ibv_create_qp_resp resp = {};
636 	struct efa_ibv_create_qp cmd = {};
637 	bool rq_entry_inserted = false;
638 	struct efa_ucontext *ucontext;
639 	struct efa_qp *qp;
640 	int err;
641 
642 	ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
643 					     ibucontext);
644 
645 	err = efa_qp_validate_cap(dev, init_attr);
646 	if (err)
647 		goto err_out;
648 
649 	err = efa_qp_validate_attr(dev, init_attr);
650 	if (err)
651 		goto err_out;
652 
653 	if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
654 		ibdev_dbg(&dev->ibdev,
655 			  "Incompatible ABI params, no input udata\n");
656 		err = -EINVAL;
657 		goto err_out;
658 	}
659 
660 	if (udata->inlen > sizeof(cmd) &&
661 	    !ib_is_udata_cleared(udata, sizeof(cmd),
662 				 udata->inlen - sizeof(cmd))) {
663 		ibdev_dbg(&dev->ibdev,
664 			  "Incompatible ABI params, unknown fields in udata\n");
665 		err = -EINVAL;
666 		goto err_out;
667 	}
668 
669 	err = ib_copy_from_udata(&cmd, udata,
670 				 min(sizeof(cmd), udata->inlen));
671 	if (err) {
672 		ibdev_dbg(&dev->ibdev,
673 			  "Cannot copy udata for create_qp\n");
674 		goto err_out;
675 	}
676 
677 	if (cmd.comp_mask) {
678 		ibdev_dbg(&dev->ibdev,
679 			  "Incompatible ABI params, unknown fields in udata\n");
680 		err = -EINVAL;
681 		goto err_out;
682 	}
683 
684 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
685 	if (!qp) {
686 		err = -ENOMEM;
687 		goto err_out;
688 	}
689 
690 	create_qp_params.uarn = ucontext->uarn;
691 	create_qp_params.pd = to_epd(ibpd)->pdn;
692 
693 	if (init_attr->qp_type == IB_QPT_UD) {
694 		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
695 	} else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
696 		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
697 	} else {
698 		ibdev_dbg(&dev->ibdev,
699 			  "Unsupported qp type %d driver qp type %d\n",
700 			  init_attr->qp_type, cmd.driver_qp_type);
701 		err = -EOPNOTSUPP;
702 		goto err_free_qp;
703 	}
704 
705 	ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
706 		  init_attr->qp_type, cmd.driver_qp_type);
707 	create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
708 	create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
709 	create_qp_params.sq_depth = init_attr->cap.max_send_wr;
710 	create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
711 
712 	create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
713 	create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
714 	qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
715 	if (qp->rq_size) {
716 		qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
717 						    qp->rq_size, DMA_TO_DEVICE);
718 		if (!qp->rq_cpu_addr) {
719 			err = -ENOMEM;
720 			goto err_free_qp;
721 		}
722 
723 		ibdev_dbg(&dev->ibdev,
724 			  "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
725 			  qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
726 		create_qp_params.rq_base_addr = qp->rq_dma_addr;
727 	}
728 
729 	err = efa_com_create_qp(&dev->edev, &create_qp_params,
730 				&create_qp_resp);
731 	if (err)
732 		goto err_free_mapped;
733 
734 	resp.sq_db_offset = create_qp_resp.sq_db_offset;
735 	resp.rq_db_offset = create_qp_resp.rq_db_offset;
736 	resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
737 	resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
738 	resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
739 
740 	err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
741 				    &resp);
742 	if (err)
743 		goto err_destroy_qp;
744 
745 	rq_entry_inserted = true;
746 	qp->qp_handle = create_qp_resp.qp_handle;
747 	qp->ibqp.qp_num = create_qp_resp.qp_num;
748 	qp->ibqp.qp_type = init_attr->qp_type;
749 	qp->max_send_wr = init_attr->cap.max_send_wr;
750 	qp->max_recv_wr = init_attr->cap.max_recv_wr;
751 	qp->max_send_sge = init_attr->cap.max_send_sge;
752 	qp->max_recv_sge = init_attr->cap.max_recv_sge;
753 	qp->max_inline_data = init_attr->cap.max_inline_data;
754 
755 	if (udata->outlen) {
756 		err = ib_copy_to_udata(udata, &resp,
757 				       min(sizeof(resp), udata->outlen));
758 		if (err) {
759 			ibdev_dbg(&dev->ibdev,
760 				  "Failed to copy udata for qp[%u]\n",
761 				  create_qp_resp.qp_num);
762 			goto err_destroy_qp;
763 		}
764 	}
765 
766 	ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
767 
768 	return &qp->ibqp;
769 
770 err_destroy_qp:
771 	efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
772 err_free_mapped:
773 	if (qp->rq_size) {
774 		dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
775 				 DMA_TO_DEVICE);
776 		if (!rq_entry_inserted)
777 			free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
778 	}
779 err_free_qp:
780 	kfree(qp);
781 err_out:
782 	atomic64_inc(&dev->stats.sw_stats.create_qp_err);
783 	return ERR_PTR(err);
784 }
785 
786 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
787 				  struct ib_qp_attr *qp_attr, int qp_attr_mask,
788 				  enum ib_qp_state cur_state,
789 				  enum ib_qp_state new_state)
790 {
791 #define EFA_MODIFY_QP_SUPP_MASK \
792 	(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
793 	 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
794 
795 	if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
796 		ibdev_dbg(&dev->ibdev,
797 			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
798 			  qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
799 		return -EOPNOTSUPP;
800 	}
801 
802 	if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
803 				qp_attr_mask)) {
804 		ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
805 		return -EINVAL;
806 	}
807 
808 	if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
809 		ibdev_dbg(&dev->ibdev, "Can't change port num\n");
810 		return -EOPNOTSUPP;
811 	}
812 
813 	if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
814 		ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
815 		return -EOPNOTSUPP;
816 	}
817 
818 	return 0;
819 }
820 
821 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
822 		  int qp_attr_mask, struct ib_udata *udata)
823 {
824 	struct efa_dev *dev = to_edev(ibqp->device);
825 	struct efa_com_modify_qp_params params = {};
826 	struct efa_qp *qp = to_eqp(ibqp);
827 	enum ib_qp_state cur_state;
828 	enum ib_qp_state new_state;
829 	int err;
830 
831 	if (udata->inlen &&
832 	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
833 		ibdev_dbg(&dev->ibdev,
834 			  "Incompatible ABI params, udata not cleared\n");
835 		return -EINVAL;
836 	}
837 
838 	cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
839 						     qp->state;
840 	new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
841 
842 	err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
843 				     new_state);
844 	if (err)
845 		return err;
846 
847 	params.qp_handle = qp->qp_handle;
848 
849 	if (qp_attr_mask & IB_QP_STATE) {
850 		params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
851 				      BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
852 		params.cur_qp_state = qp_attr->cur_qp_state;
853 		params.qp_state = qp_attr->qp_state;
854 	}
855 
856 	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
857 		params.modify_mask |=
858 			BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
859 		params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
860 	}
861 
862 	if (qp_attr_mask & IB_QP_QKEY) {
863 		params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
864 		params.qkey = qp_attr->qkey;
865 	}
866 
867 	if (qp_attr_mask & IB_QP_SQ_PSN) {
868 		params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
869 		params.sq_psn = qp_attr->sq_psn;
870 	}
871 
872 	err = efa_com_modify_qp(&dev->edev, &params);
873 	if (err)
874 		return err;
875 
876 	qp->state = new_state;
877 
878 	return 0;
879 }
880 
881 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
882 {
883 	struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
884 
885 	return efa_com_destroy_cq(&dev->edev, &params);
886 }
887 
888 void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
889 {
890 	struct efa_dev *dev = to_edev(ibcq->device);
891 	struct efa_cq *cq = to_ecq(ibcq);
892 
893 	ibdev_dbg(&dev->ibdev,
894 		  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
895 		  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
896 
897 	efa_destroy_cq_idx(dev, cq->cq_idx);
898 	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
899 			 DMA_FROM_DEVICE);
900 }
901 
902 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
903 				 struct efa_ibv_create_cq_resp *resp)
904 {
905 	resp->q_mmap_size = cq->size;
906 	resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
907 					     virt_to_phys(cq->cpu_addr),
908 					     cq->size, EFA_MMAP_DMA_PAGE);
909 	if (resp->q_mmap_key == EFA_MMAP_INVALID)
910 		return -ENOMEM;
911 
912 	return 0;
913 }
914 
915 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
916 		  struct ib_udata *udata)
917 {
918 	struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
919 		udata, struct efa_ucontext, ibucontext);
920 	struct efa_ibv_create_cq_resp resp = {};
921 	struct efa_com_create_cq_params params;
922 	struct efa_com_create_cq_result result;
923 	struct ib_device *ibdev = ibcq->device;
924 	struct efa_dev *dev = to_edev(ibdev);
925 	struct efa_ibv_create_cq cmd = {};
926 	struct efa_cq *cq = to_ecq(ibcq);
927 	bool cq_entry_inserted = false;
928 	int entries = attr->cqe;
929 	int err;
930 
931 	ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
932 
933 	if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
934 		ibdev_dbg(ibdev,
935 			  "cq: requested entries[%u] non-positive or greater than max[%u]\n",
936 			  entries, dev->dev_attr.max_cq_depth);
937 		err = -EINVAL;
938 		goto err_out;
939 	}
940 
941 	if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
942 		ibdev_dbg(ibdev,
943 			  "Incompatible ABI params, no input udata\n");
944 		err = -EINVAL;
945 		goto err_out;
946 	}
947 
948 	if (udata->inlen > sizeof(cmd) &&
949 	    !ib_is_udata_cleared(udata, sizeof(cmd),
950 				 udata->inlen - sizeof(cmd))) {
951 		ibdev_dbg(ibdev,
952 			  "Incompatible ABI params, unknown fields in udata\n");
953 		err = -EINVAL;
954 		goto err_out;
955 	}
956 
957 	err = ib_copy_from_udata(&cmd, udata,
958 				 min(sizeof(cmd), udata->inlen));
959 	if (err) {
960 		ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
961 		goto err_out;
962 	}
963 
964 	if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
965 		ibdev_dbg(ibdev,
966 			  "Incompatible ABI params, unknown fields in udata\n");
967 		err = -EINVAL;
968 		goto err_out;
969 	}
970 
971 	if (!cmd.cq_entry_size) {
972 		ibdev_dbg(ibdev,
973 			  "Invalid entry size [%u]\n", cmd.cq_entry_size);
974 		err = -EINVAL;
975 		goto err_out;
976 	}
977 
978 	if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
979 		ibdev_dbg(ibdev,
980 			  "Invalid number of sub cqs[%u] expected[%u]\n",
981 			  cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
982 		err = -EINVAL;
983 		goto err_out;
984 	}
985 
986 	cq->ucontext = ucontext;
987 	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
988 	cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
989 					 DMA_FROM_DEVICE);
990 	if (!cq->cpu_addr) {
991 		err = -ENOMEM;
992 		goto err_out;
993 	}
994 
995 	params.uarn = cq->ucontext->uarn;
996 	params.cq_depth = entries;
997 	params.dma_addr = cq->dma_addr;
998 	params.entry_size_in_bytes = cmd.cq_entry_size;
999 	params.num_sub_cqs = cmd.num_sub_cqs;
1000 	err = efa_com_create_cq(&dev->edev, &params, &result);
1001 	if (err)
1002 		goto err_free_mapped;
1003 
1004 	resp.cq_idx = result.cq_idx;
1005 	cq->cq_idx = result.cq_idx;
1006 	cq->ibcq.cqe = result.actual_depth;
1007 	WARN_ON_ONCE(entries != result.actual_depth);
1008 
1009 	err = cq_mmap_entries_setup(dev, cq, &resp);
1010 	if (err) {
1011 		ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
1012 			  cq->cq_idx);
1013 		goto err_destroy_cq;
1014 	}
1015 
1016 	cq_entry_inserted = true;
1017 
1018 	if (udata->outlen) {
1019 		err = ib_copy_to_udata(udata, &resp,
1020 				       min(sizeof(resp), udata->outlen));
1021 		if (err) {
1022 			ibdev_dbg(ibdev,
1023 				  "Failed to copy udata for create_cq\n");
1024 			goto err_destroy_cq;
1025 		}
1026 	}
1027 
1028 	ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
1029 		  cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
1030 
1031 	return 0;
1032 
1033 err_destroy_cq:
1034 	efa_destroy_cq_idx(dev, cq->cq_idx);
1035 err_free_mapped:
1036 	dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
1037 			 DMA_FROM_DEVICE);
1038 	if (!cq_entry_inserted)
1039 		free_pages_exact(cq->cpu_addr, cq->size);
1040 err_out:
1041 	atomic64_inc(&dev->stats.sw_stats.create_cq_err);
1042 	return err;
1043 }
1044 
1045 static int umem_to_page_list(struct efa_dev *dev,
1046 			     struct ib_umem *umem,
1047 			     u64 *page_list,
1048 			     u32 hp_cnt,
1049 			     u8 hp_shift)
1050 {
1051 	u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
1052 	struct ib_block_iter biter;
1053 	unsigned int hp_idx = 0;
1054 
1055 	ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
1056 		  hp_cnt, pages_in_hp);
1057 
1058 	rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
1059 			    BIT(hp_shift))
1060 		page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
1061 
1062 	return 0;
1063 }
1064 
1065 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
1066 {
1067 	struct scatterlist *sglist;
1068 	struct page *pg;
1069 	int i;
1070 
1071 	sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
1072 	if (!sglist)
1073 		return NULL;
1074 	sg_init_table(sglist, page_cnt);
1075 	for (i = 0; i < page_cnt; i++) {
1076 		pg = vmalloc_to_page(buf);
1077 		if (!pg)
1078 			goto err;
1079 		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
1080 		buf += PAGE_SIZE / sizeof(*buf);
1081 	}
1082 	return sglist;
1083 
1084 err:
1085 	kfree(sglist);
1086 	return NULL;
1087 }
1088 
1089 /*
1090  * create a chunk list of physical pages dma addresses from the supplied
1091  * scatter gather list
1092  */
1093 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
1094 {
1095 	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1096 	int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
1097 	struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
1098 	unsigned int chunk_list_size, chunk_idx, payload_idx;
1099 	int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
1100 	struct efa_com_ctrl_buff_info *ctrl_buf;
1101 	u64 *cur_chunk_buf, *prev_chunk_buf;
1102 	struct ib_block_iter biter;
1103 	dma_addr_t dma_addr;
1104 	int i;
1105 
1106 	/* allocate a chunk list that consists of 4KB chunks */
1107 	chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
1108 
1109 	chunk_list->size = chunk_list_size;
1110 	chunk_list->chunks = kcalloc(chunk_list_size,
1111 				     sizeof(*chunk_list->chunks),
1112 				     GFP_KERNEL);
1113 	if (!chunk_list->chunks)
1114 		return -ENOMEM;
1115 
1116 	ibdev_dbg(&dev->ibdev,
1117 		  "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
1118 		  page_cnt);
1119 
1120 	/* allocate chunk buffers: */
1121 	for (i = 0; i < chunk_list_size; i++) {
1122 		chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
1123 		if (!chunk_list->chunks[i].buf)
1124 			goto chunk_list_dealloc;
1125 
1126 		chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
1127 	}
1128 	chunk_list->chunks[chunk_list_size - 1].length =
1129 		((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
1130 			EFA_CHUNK_PTR_SIZE;
1131 
1132 	/* fill the dma addresses of sg list pages to chunks: */
1133 	chunk_idx = 0;
1134 	payload_idx = 0;
1135 	cur_chunk_buf = chunk_list->chunks[0].buf;
1136 	rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
1137 			    EFA_CHUNK_PAYLOAD_SIZE) {
1138 		cur_chunk_buf[payload_idx++] =
1139 			rdma_block_iter_dma_address(&biter);
1140 
1141 		if (payload_idx == EFA_PTRS_PER_CHUNK) {
1142 			chunk_idx++;
1143 			cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
1144 			payload_idx = 0;
1145 		}
1146 	}
1147 
1148 	/* map chunks to dma and fill chunks next ptrs */
1149 	for (i = chunk_list_size - 1; i >= 0; i--) {
1150 		dma_addr = dma_map_single(&dev->pdev->dev,
1151 					  chunk_list->chunks[i].buf,
1152 					  chunk_list->chunks[i].length,
1153 					  DMA_TO_DEVICE);
1154 		if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1155 			ibdev_err(&dev->ibdev,
1156 				  "chunk[%u] dma_map_failed\n", i);
1157 			goto chunk_list_unmap;
1158 		}
1159 
1160 		chunk_list->chunks[i].dma_addr = dma_addr;
1161 		ibdev_dbg(&dev->ibdev,
1162 			  "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
1163 
1164 		if (!i)
1165 			break;
1166 
1167 		prev_chunk_buf = chunk_list->chunks[i - 1].buf;
1168 
1169 		ctrl_buf = (struct efa_com_ctrl_buff_info *)
1170 				&prev_chunk_buf[EFA_PTRS_PER_CHUNK];
1171 		ctrl_buf->length = chunk_list->chunks[i].length;
1172 
1173 		efa_com_set_dma_addr(dma_addr,
1174 				     &ctrl_buf->address.mem_addr_high,
1175 				     &ctrl_buf->address.mem_addr_low);
1176 	}
1177 
1178 	return 0;
1179 
1180 chunk_list_unmap:
1181 	for (; i < chunk_list_size; i++) {
1182 		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1183 				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1184 	}
1185 chunk_list_dealloc:
1186 	for (i = 0; i < chunk_list_size; i++)
1187 		kfree(chunk_list->chunks[i].buf);
1188 
1189 	kfree(chunk_list->chunks);
1190 	return -ENOMEM;
1191 }
1192 
1193 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1194 {
1195 	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1196 	int i;
1197 
1198 	for (i = 0; i < chunk_list->size; i++) {
1199 		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1200 				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1201 		kfree(chunk_list->chunks[i].buf);
1202 	}
1203 
1204 	kfree(chunk_list->chunks);
1205 }
1206 
1207 /* initialize pbl continuous mode: map pbl buffer to a dma address. */
1208 static int pbl_continuous_initialize(struct efa_dev *dev,
1209 				     struct pbl_context *pbl)
1210 {
1211 	dma_addr_t dma_addr;
1212 
1213 	dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
1214 				  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1215 	if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1216 		ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
1217 		return -ENOMEM;
1218 	}
1219 
1220 	pbl->phys.continuous.dma_addr = dma_addr;
1221 	ibdev_dbg(&dev->ibdev,
1222 		  "pbl continuous - dma_addr = %pad, size[%u]\n",
1223 		  &dma_addr, pbl->pbl_buf_size_in_bytes);
1224 
1225 	return 0;
1226 }
1227 
1228 /*
1229  * initialize pbl indirect mode:
1230  * create a chunk list out of the dma addresses of the physical pages of
1231  * pbl buffer.
1232  */
1233 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
1234 {
1235 	u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
1236 	struct scatterlist *sgl;
1237 	int sg_dma_cnt, err;
1238 
1239 	BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
1240 	sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
1241 	if (!sgl)
1242 		return -ENOMEM;
1243 
1244 	sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1245 	if (!sg_dma_cnt) {
1246 		err = -EINVAL;
1247 		goto err_map;
1248 	}
1249 
1250 	pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
1251 	pbl->phys.indirect.sgl = sgl;
1252 	pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
1253 	err = pbl_chunk_list_create(dev, pbl);
1254 	if (err) {
1255 		ibdev_dbg(&dev->ibdev,
1256 			  "chunk_list creation failed[%d]\n", err);
1257 		goto err_chunk;
1258 	}
1259 
1260 	ibdev_dbg(&dev->ibdev,
1261 		  "pbl indirect - size[%u], chunks[%u]\n",
1262 		  pbl->pbl_buf_size_in_bytes,
1263 		  pbl->phys.indirect.chunk_list.size);
1264 
1265 	return 0;
1266 
1267 err_chunk:
1268 	dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1269 err_map:
1270 	kfree(sgl);
1271 	return err;
1272 }
1273 
1274 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
1275 {
1276 	pbl_chunk_list_destroy(dev, pbl);
1277 	dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
1278 		     pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
1279 	kfree(pbl->phys.indirect.sgl);
1280 }
1281 
1282 /* create a page buffer list from a mapped user memory region */
1283 static int pbl_create(struct efa_dev *dev,
1284 		      struct pbl_context *pbl,
1285 		      struct ib_umem *umem,
1286 		      int hp_cnt,
1287 		      u8 hp_shift)
1288 {
1289 	int err;
1290 
1291 	pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
1292 	pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
1293 	if (!pbl->pbl_buf)
1294 		return -ENOMEM;
1295 
1296 	if (is_vmalloc_addr(pbl->pbl_buf)) {
1297 		pbl->physically_continuous = 0;
1298 		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1299 					hp_shift);
1300 		if (err)
1301 			goto err_free;
1302 
1303 		err = pbl_indirect_initialize(dev, pbl);
1304 		if (err)
1305 			goto err_free;
1306 	} else {
1307 		pbl->physically_continuous = 1;
1308 		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1309 					hp_shift);
1310 		if (err)
1311 			goto err_free;
1312 
1313 		err = pbl_continuous_initialize(dev, pbl);
1314 		if (err)
1315 			goto err_free;
1316 	}
1317 
1318 	ibdev_dbg(&dev->ibdev,
1319 		  "user_pbl_created: user_pages[%u], continuous[%u]\n",
1320 		  hp_cnt, pbl->physically_continuous);
1321 
1322 	return 0;
1323 
1324 err_free:
1325 	kvfree(pbl->pbl_buf);
1326 	return err;
1327 }
1328 
1329 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1330 {
1331 	if (pbl->physically_continuous)
1332 		dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
1333 				 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1334 	else
1335 		pbl_indirect_terminate(dev, pbl);
1336 
1337 	kvfree(pbl->pbl_buf);
1338 }
1339 
1340 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
1341 				 struct efa_com_reg_mr_params *params)
1342 {
1343 	int err;
1344 
1345 	params->inline_pbl = 1;
1346 	err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
1347 				params->page_num, params->page_shift);
1348 	if (err)
1349 		return err;
1350 
1351 	ibdev_dbg(&dev->ibdev,
1352 		  "inline_pbl_array - pages[%u]\n", params->page_num);
1353 
1354 	return 0;
1355 }
1356 
1357 static int efa_create_pbl(struct efa_dev *dev,
1358 			  struct pbl_context *pbl,
1359 			  struct efa_mr *mr,
1360 			  struct efa_com_reg_mr_params *params)
1361 {
1362 	int err;
1363 
1364 	err = pbl_create(dev, pbl, mr->umem, params->page_num,
1365 			 params->page_shift);
1366 	if (err) {
1367 		ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
1368 		return err;
1369 	}
1370 
1371 	params->inline_pbl = 0;
1372 	params->indirect = !pbl->physically_continuous;
1373 	if (pbl->physically_continuous) {
1374 		params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
1375 
1376 		efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
1377 				     &params->pbl.pbl.address.mem_addr_high,
1378 				     &params->pbl.pbl.address.mem_addr_low);
1379 	} else {
1380 		params->pbl.pbl.length =
1381 			pbl->phys.indirect.chunk_list.chunks[0].length;
1382 
1383 		efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
1384 				     &params->pbl.pbl.address.mem_addr_high,
1385 				     &params->pbl.pbl.address.mem_addr_low);
1386 	}
1387 
1388 	return 0;
1389 }
1390 
1391 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
1392 			 u64 virt_addr, int access_flags,
1393 			 struct ib_udata *udata)
1394 {
1395 	struct efa_dev *dev = to_edev(ibpd->device);
1396 	struct efa_com_reg_mr_params params = {};
1397 	struct efa_com_reg_mr_result result = {};
1398 	struct pbl_context pbl;
1399 	unsigned int pg_sz;
1400 	struct efa_mr *mr;
1401 	int inline_size;
1402 	int err;
1403 
1404 	if (udata->inlen &&
1405 	    !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
1406 		ibdev_dbg(&dev->ibdev,
1407 			  "Incompatible ABI params, udata not cleared\n");
1408 		err = -EINVAL;
1409 		goto err_out;
1410 	}
1411 
1412 	if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
1413 		ibdev_dbg(&dev->ibdev,
1414 			  "Unsupported access flags[%#x], supported[%#x]\n",
1415 			  access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
1416 		err = -EOPNOTSUPP;
1417 		goto err_out;
1418 	}
1419 
1420 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1421 	if (!mr) {
1422 		err = -ENOMEM;
1423 		goto err_out;
1424 	}
1425 
1426 	mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
1427 	if (IS_ERR(mr->umem)) {
1428 		err = PTR_ERR(mr->umem);
1429 		ibdev_dbg(&dev->ibdev,
1430 			  "Failed to pin and map user space memory[%d]\n", err);
1431 		goto err_free;
1432 	}
1433 
1434 	params.pd = to_epd(ibpd)->pdn;
1435 	params.iova = virt_addr;
1436 	params.mr_length_in_bytes = length;
1437 	params.permissions = access_flags & 0x1;
1438 
1439 	pg_sz = ib_umem_find_best_pgsz(mr->umem,
1440 				       dev->dev_attr.page_size_cap,
1441 				       virt_addr);
1442 	if (!pg_sz) {
1443 		err = -EOPNOTSUPP;
1444 		ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
1445 			  dev->dev_attr.page_size_cap);
1446 		goto err_unmap;
1447 	}
1448 
1449 	params.page_shift = __ffs(pg_sz);
1450 	params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
1451 				       pg_sz);
1452 
1453 	ibdev_dbg(&dev->ibdev,
1454 		  "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1455 		  start, length, params.page_shift, params.page_num);
1456 
1457 	inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
1458 	if (params.page_num <= inline_size) {
1459 		err = efa_create_inline_pbl(dev, mr, &params);
1460 		if (err)
1461 			goto err_unmap;
1462 
1463 		err = efa_com_register_mr(&dev->edev, &params, &result);
1464 		if (err)
1465 			goto err_unmap;
1466 	} else {
1467 		err = efa_create_pbl(dev, &pbl, mr, &params);
1468 		if (err)
1469 			goto err_unmap;
1470 
1471 		err = efa_com_register_mr(&dev->edev, &params, &result);
1472 		pbl_destroy(dev, &pbl);
1473 
1474 		if (err)
1475 			goto err_unmap;
1476 	}
1477 
1478 	mr->ibmr.lkey = result.l_key;
1479 	mr->ibmr.rkey = result.r_key;
1480 	mr->ibmr.length = length;
1481 	ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
1482 
1483 	return &mr->ibmr;
1484 
1485 err_unmap:
1486 	ib_umem_release(mr->umem);
1487 err_free:
1488 	kfree(mr);
1489 err_out:
1490 	atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
1491 	return ERR_PTR(err);
1492 }
1493 
1494 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1495 {
1496 	struct efa_dev *dev = to_edev(ibmr->device);
1497 	struct efa_com_dereg_mr_params params;
1498 	struct efa_mr *mr = to_emr(ibmr);
1499 	int err;
1500 
1501 	ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
1502 
1503 	params.l_key = mr->ibmr.lkey;
1504 	err = efa_com_dereg_mr(&dev->edev, &params);
1505 	if (err)
1506 		return err;
1507 
1508 	ib_umem_release(mr->umem);
1509 	kfree(mr);
1510 
1511 	return 0;
1512 }
1513 
1514 int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
1515 			   struct ib_port_immutable *immutable)
1516 {
1517 	struct ib_port_attr attr;
1518 	int err;
1519 
1520 	err = ib_query_port(ibdev, port_num, &attr);
1521 	if (err) {
1522 		ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
1523 		return err;
1524 	}
1525 
1526 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
1527 	immutable->gid_tbl_len = attr.gid_tbl_len;
1528 
1529 	return 0;
1530 }
1531 
1532 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
1533 {
1534 	struct efa_com_dealloc_uar_params params = {
1535 		.uarn = uarn,
1536 	};
1537 
1538 	return efa_com_dealloc_uar(&dev->edev, &params);
1539 }
1540 
1541 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
1542 {
1543 	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1544 	struct efa_dev *dev = to_edev(ibucontext->device);
1545 	struct efa_ibv_alloc_ucontext_resp resp = {};
1546 	struct efa_com_alloc_uar_result result;
1547 	int err;
1548 
1549 	/*
1550 	 * it's fine if the driver does not know all request fields,
1551 	 * we will ack input fields in our response.
1552 	 */
1553 
1554 	err = efa_com_alloc_uar(&dev->edev, &result);
1555 	if (err)
1556 		goto err_out;
1557 
1558 	ucontext->uarn = result.uarn;
1559 	xa_init(&ucontext->mmap_xa);
1560 
1561 	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
1562 	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
1563 	resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
1564 	resp.inline_buf_size = dev->dev_attr.inline_buf_size;
1565 	resp.max_llq_size = dev->dev_attr.max_llq_size;
1566 
1567 	if (udata && udata->outlen) {
1568 		err = ib_copy_to_udata(udata, &resp,
1569 				       min(sizeof(resp), udata->outlen));
1570 		if (err)
1571 			goto err_dealloc_uar;
1572 	}
1573 
1574 	return 0;
1575 
1576 err_dealloc_uar:
1577 	efa_dealloc_uar(dev, result.uarn);
1578 err_out:
1579 	atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
1580 	return err;
1581 }
1582 
1583 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
1584 {
1585 	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1586 	struct efa_dev *dev = to_edev(ibucontext->device);
1587 
1588 	mmap_entries_remove_free(dev, ucontext);
1589 	efa_dealloc_uar(dev, ucontext->uarn);
1590 }
1591 
1592 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
1593 		      struct vm_area_struct *vma, u64 key, u64 length)
1594 {
1595 	struct efa_mmap_entry *entry;
1596 	unsigned long va;
1597 	u64 pfn;
1598 	int err;
1599 
1600 	entry = mmap_entry_get(dev, ucontext, key, length);
1601 	if (!entry) {
1602 		ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
1603 			  key);
1604 		return -EINVAL;
1605 	}
1606 
1607 	ibdev_dbg(&dev->ibdev,
1608 		  "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
1609 		  entry->address, length, entry->mmap_flag);
1610 
1611 	pfn = entry->address >> PAGE_SHIFT;
1612 	switch (entry->mmap_flag) {
1613 	case EFA_MMAP_IO_NC:
1614 		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1615 					pgprot_noncached(vma->vm_page_prot));
1616 		break;
1617 	case EFA_MMAP_IO_WC:
1618 		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1619 					pgprot_writecombine(vma->vm_page_prot));
1620 		break;
1621 	case EFA_MMAP_DMA_PAGE:
1622 		for (va = vma->vm_start; va < vma->vm_end;
1623 		     va += PAGE_SIZE, pfn++) {
1624 			err = vm_insert_page(vma, va, pfn_to_page(pfn));
1625 			if (err)
1626 				break;
1627 		}
1628 		break;
1629 	default:
1630 		err = -EINVAL;
1631 	}
1632 
1633 	if (err) {
1634 		ibdev_dbg(
1635 			&dev->ibdev,
1636 			"Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
1637 			entry->address, length, entry->mmap_flag, err);
1638 		return err;
1639 	}
1640 
1641 	return 0;
1642 }
1643 
1644 int efa_mmap(struct ib_ucontext *ibucontext,
1645 	     struct vm_area_struct *vma)
1646 {
1647 	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1648 	struct efa_dev *dev = to_edev(ibucontext->device);
1649 	u64 length = vma->vm_end - vma->vm_start;
1650 	u64 key = vma->vm_pgoff << PAGE_SHIFT;
1651 
1652 	ibdev_dbg(&dev->ibdev,
1653 		  "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
1654 		  vma->vm_start, vma->vm_end, length, key);
1655 
1656 	if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
1657 		ibdev_dbg(&dev->ibdev,
1658 			  "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
1659 			  length, PAGE_SIZE, vma->vm_flags);
1660 		return -EINVAL;
1661 	}
1662 
1663 	if (vma->vm_flags & VM_EXEC) {
1664 		ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
1665 		return -EPERM;
1666 	}
1667 
1668 	return __efa_mmap(dev, ucontext, vma, key, length);
1669 }
1670 
1671 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
1672 {
1673 	struct efa_com_destroy_ah_params params = {
1674 		.ah = ah->ah,
1675 		.pdn = to_epd(ah->ibah.pd)->pdn,
1676 	};
1677 
1678 	return efa_com_destroy_ah(&dev->edev, &params);
1679 }
1680 
1681 int efa_create_ah(struct ib_ah *ibah,
1682 		  struct rdma_ah_attr *ah_attr,
1683 		  u32 flags,
1684 		  struct ib_udata *udata)
1685 {
1686 	struct efa_dev *dev = to_edev(ibah->device);
1687 	struct efa_com_create_ah_params params = {};
1688 	struct efa_ibv_create_ah_resp resp = {};
1689 	struct efa_com_create_ah_result result;
1690 	struct efa_ah *ah = to_eah(ibah);
1691 	int err;
1692 
1693 	if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
1694 		ibdev_dbg(&dev->ibdev,
1695 			  "Create address handle is not supported in atomic context\n");
1696 		err = -EOPNOTSUPP;
1697 		goto err_out;
1698 	}
1699 
1700 	if (udata->inlen &&
1701 	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1702 		ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1703 		err = -EINVAL;
1704 		goto err_out;
1705 	}
1706 
1707 	memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
1708 	       sizeof(params.dest_addr));
1709 	params.pdn = to_epd(ibah->pd)->pdn;
1710 	err = efa_com_create_ah(&dev->edev, &params, &result);
1711 	if (err)
1712 		goto err_out;
1713 
1714 	memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
1715 	ah->ah = result.ah;
1716 
1717 	resp.efa_address_handle = result.ah;
1718 
1719 	if (udata->outlen) {
1720 		err = ib_copy_to_udata(udata, &resp,
1721 				       min(sizeof(resp), udata->outlen));
1722 		if (err) {
1723 			ibdev_dbg(&dev->ibdev,
1724 				  "Failed to copy udata for create_ah response\n");
1725 			goto err_destroy_ah;
1726 		}
1727 	}
1728 	ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
1729 
1730 	return 0;
1731 
1732 err_destroy_ah:
1733 	efa_ah_destroy(dev, ah);
1734 err_out:
1735 	atomic64_inc(&dev->stats.sw_stats.create_ah_err);
1736 	return err;
1737 }
1738 
1739 void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
1740 {
1741 	struct efa_dev *dev = to_edev(ibah->pd->device);
1742 	struct efa_ah *ah = to_eah(ibah);
1743 
1744 	ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
1745 
1746 	if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
1747 		ibdev_dbg(&dev->ibdev,
1748 			  "Destroy address handle is not supported in atomic context\n");
1749 		return;
1750 	}
1751 
1752 	efa_ah_destroy(dev, ah);
1753 }
1754 
1755 struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
1756 {
1757 	return rdma_alloc_hw_stats_struct(efa_stats_names,
1758 					  ARRAY_SIZE(efa_stats_names),
1759 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
1760 }
1761 
1762 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1763 		     u8 port_num, int index)
1764 {
1765 	struct efa_com_get_stats_params params = {};
1766 	union efa_com_get_stats_result result;
1767 	struct efa_dev *dev = to_edev(ibdev);
1768 	struct efa_com_basic_stats *bs;
1769 	struct efa_com_stats_admin *as;
1770 	struct efa_stats *s;
1771 	int err;
1772 
1773 	params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
1774 	params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
1775 
1776 	err = efa_com_get_stats(&dev->edev, &params, &result);
1777 	if (err)
1778 		return err;
1779 
1780 	bs = &result.basic_stats;
1781 	stats->value[EFA_TX_BYTES] = bs->tx_bytes;
1782 	stats->value[EFA_TX_PKTS] = bs->tx_pkts;
1783 	stats->value[EFA_RX_BYTES] = bs->rx_bytes;
1784 	stats->value[EFA_RX_PKTS] = bs->rx_pkts;
1785 	stats->value[EFA_RX_DROPS] = bs->rx_drops;
1786 
1787 	as = &dev->edev.aq.stats;
1788 	stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
1789 	stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
1790 	stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
1791 
1792 	s = &dev->stats;
1793 	stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
1794 	stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
1795 	stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
1796 	stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
1797 	stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
1798 	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
1799 
1800 	return ARRAY_SIZE(efa_stats_names);
1801 }
1802 
1803 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
1804 					 u8 port_num)
1805 {
1806 	return IB_LINK_LAYER_UNSPECIFIED;
1807 }
1808 
1809