1 /*
2  * Copyright (c) 2016 Hisilicon Limited.
3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/platform_device.h>
35 #include <linux/vmalloc.h>
36 #include <rdma/ib_umem.h>
37 #include "hns_roce_device.h"
38 #include "hns_roce_cmd.h"
39 #include "hns_roce_hem.h"
40 
41 static u32 hw_index_to_key(unsigned long ind)
42 {
43 	return (u32)(ind >> 24) | (ind << 8);
44 }
45 
46 unsigned long key_to_hw_index(u32 key)
47 {
48 	return (key << 24) | (key >> 8);
49 }
50 
51 static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
52 				  struct hns_roce_cmd_mailbox *mailbox,
53 				  unsigned long mpt_index)
54 {
55 	return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
56 				 HNS_ROCE_CMD_CREATE_MPT,
57 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
58 }
59 
60 int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
61 			    struct hns_roce_cmd_mailbox *mailbox,
62 			    unsigned long mpt_index)
63 {
64 	return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
65 				 mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
66 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
67 }
68 
69 static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
70 				unsigned long *seg)
71 {
72 	int o;
73 	u32 m;
74 
75 	spin_lock(&buddy->lock);
76 
77 	for (o = order; o <= buddy->max_order; ++o) {
78 		if (buddy->num_free[o]) {
79 			m = 1 << (buddy->max_order - o);
80 			*seg = find_first_bit(buddy->bits[o], m);
81 			if (*seg < m)
82 				goto found;
83 		}
84 	}
85 	spin_unlock(&buddy->lock);
86 	return -EINVAL;
87 
88  found:
89 	clear_bit(*seg, buddy->bits[o]);
90 	--buddy->num_free[o];
91 
92 	while (o > order) {
93 		--o;
94 		*seg <<= 1;
95 		set_bit(*seg ^ 1, buddy->bits[o]);
96 		++buddy->num_free[o];
97 	}
98 
99 	spin_unlock(&buddy->lock);
100 
101 	*seg <<= order;
102 	return 0;
103 }
104 
105 static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
106 				int order)
107 {
108 	seg >>= order;
109 
110 	spin_lock(&buddy->lock);
111 
112 	while (test_bit(seg ^ 1, buddy->bits[order])) {
113 		clear_bit(seg ^ 1, buddy->bits[order]);
114 		--buddy->num_free[order];
115 		seg >>= 1;
116 		++order;
117 	}
118 
119 	set_bit(seg, buddy->bits[order]);
120 	++buddy->num_free[order];
121 
122 	spin_unlock(&buddy->lock);
123 }
124 
125 static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
126 {
127 	int i, s;
128 
129 	buddy->max_order = max_order;
130 	spin_lock_init(&buddy->lock);
131 	buddy->bits = kcalloc(buddy->max_order + 1,
132 			      sizeof(*buddy->bits),
133 			      GFP_KERNEL);
134 	buddy->num_free = kcalloc(buddy->max_order + 1,
135 				  sizeof(*buddy->num_free),
136 				  GFP_KERNEL);
137 	if (!buddy->bits || !buddy->num_free)
138 		goto err_out;
139 
140 	for (i = 0; i <= buddy->max_order; ++i) {
141 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
142 		buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
143 					 __GFP_NOWARN);
144 		if (!buddy->bits[i]) {
145 			buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
146 			if (!buddy->bits[i])
147 				goto err_out_free;
148 		}
149 	}
150 
151 	set_bit(0, buddy->bits[buddy->max_order]);
152 	buddy->num_free[buddy->max_order] = 1;
153 
154 	return 0;
155 
156 err_out_free:
157 	for (i = 0; i <= buddy->max_order; ++i)
158 		kvfree(buddy->bits[i]);
159 
160 err_out:
161 	kfree(buddy->bits);
162 	kfree(buddy->num_free);
163 	return -ENOMEM;
164 }
165 
166 static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
167 {
168 	int i;
169 
170 	for (i = 0; i <= buddy->max_order; ++i)
171 		kvfree(buddy->bits[i]);
172 
173 	kfree(buddy->bits);
174 	kfree(buddy->num_free);
175 }
176 
177 static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
178 				    unsigned long *seg, u32 mtt_type)
179 {
180 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
181 	struct hns_roce_hem_table *table;
182 	struct hns_roce_buddy *buddy;
183 	int ret;
184 
185 	switch (mtt_type) {
186 	case MTT_TYPE_WQE:
187 		buddy = &mr_table->mtt_buddy;
188 		table = &mr_table->mtt_table;
189 		break;
190 	case MTT_TYPE_CQE:
191 		buddy = &mr_table->mtt_cqe_buddy;
192 		table = &mr_table->mtt_cqe_table;
193 		break;
194 	case MTT_TYPE_SRQWQE:
195 		buddy = &mr_table->mtt_srqwqe_buddy;
196 		table = &mr_table->mtt_srqwqe_table;
197 		break;
198 	case MTT_TYPE_IDX:
199 		buddy = &mr_table->mtt_idx_buddy;
200 		table = &mr_table->mtt_idx_table;
201 		break;
202 	default:
203 		dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
204 			mtt_type);
205 		return -EINVAL;
206 	}
207 
208 	ret = hns_roce_buddy_alloc(buddy, order, seg);
209 	if (ret)
210 		return ret;
211 
212 	ret = hns_roce_table_get_range(hr_dev, table, *seg,
213 				       *seg + (1 << order) - 1);
214 	if (ret) {
215 		hns_roce_buddy_free(buddy, *seg, order);
216 		return ret;
217 	}
218 
219 	return 0;
220 }
221 
222 int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
223 		      struct hns_roce_mtt *mtt)
224 {
225 	int ret;
226 	int i;
227 
228 	/* Page num is zero, correspond to DMA memory register */
229 	if (!npages) {
230 		mtt->order = -1;
231 		mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
232 		return 0;
233 	}
234 
235 	/* Note: if page_shift is zero, FAST memory register */
236 	mtt->page_shift = page_shift;
237 
238 	/* Compute MTT entry necessary */
239 	for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
240 	     i <<= 1)
241 		++mtt->order;
242 
243 	/* Allocate MTT entry */
244 	ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
245 				       mtt->mtt_type);
246 	if (ret)
247 		return -ENOMEM;
248 
249 	return 0;
250 }
251 
252 void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
253 {
254 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
255 
256 	if (mtt->order < 0)
257 		return;
258 
259 	switch (mtt->mtt_type) {
260 	case MTT_TYPE_WQE:
261 		hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
262 				    mtt->order);
263 		hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
264 					mtt->first_seg,
265 					mtt->first_seg + (1 << mtt->order) - 1);
266 		break;
267 	case MTT_TYPE_CQE:
268 		hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
269 				    mtt->order);
270 		hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
271 					mtt->first_seg,
272 					mtt->first_seg + (1 << mtt->order) - 1);
273 		break;
274 	case MTT_TYPE_SRQWQE:
275 		hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
276 				    mtt->order);
277 		hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
278 					mtt->first_seg,
279 					mtt->first_seg + (1 << mtt->order) - 1);
280 		break;
281 	case MTT_TYPE_IDX:
282 		hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
283 				    mtt->order);
284 		hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
285 					mtt->first_seg,
286 					mtt->first_seg + (1 << mtt->order) - 1);
287 		break;
288 	default:
289 		dev_err(hr_dev->dev,
290 			"Unsupport mtt type %d, clean mtt failed\n",
291 			mtt->mtt_type);
292 		break;
293 	}
294 }
295 
296 static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
297 			       struct hns_roce_mr *mr, int err_loop_index,
298 			       int loop_i, int loop_j)
299 {
300 	struct device *dev = hr_dev->dev;
301 	u32 mhop_num;
302 	u32 pbl_bt_sz;
303 	u64 bt_idx;
304 	int i, j;
305 
306 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
307 	mhop_num = hr_dev->caps.pbl_hop_num;
308 
309 	i = loop_i;
310 	if (mhop_num == 3 && err_loop_index == 2) {
311 		for (; i >= 0; i--) {
312 			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
313 					  mr->pbl_l1_dma_addr[i]);
314 
315 			for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
316 				if (i == loop_i && j >= loop_j)
317 					break;
318 
319 				bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
320 				dma_free_coherent(dev, pbl_bt_sz,
321 						  mr->pbl_bt_l2[bt_idx],
322 						  mr->pbl_l2_dma_addr[bt_idx]);
323 			}
324 		}
325 	} else if (mhop_num == 3 && err_loop_index == 1) {
326 		for (i -= 1; i >= 0; i--) {
327 			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
328 					  mr->pbl_l1_dma_addr[i]);
329 
330 			for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
331 				bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
332 				dma_free_coherent(dev, pbl_bt_sz,
333 						  mr->pbl_bt_l2[bt_idx],
334 						  mr->pbl_l2_dma_addr[bt_idx]);
335 			}
336 		}
337 	} else if (mhop_num == 2 && err_loop_index == 1) {
338 		for (i -= 1; i >= 0; i--)
339 			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
340 					  mr->pbl_l1_dma_addr[i]);
341 	} else {
342 		dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
343 			 mhop_num, err_loop_index);
344 		return;
345 	}
346 
347 	dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
348 	mr->pbl_bt_l0 = NULL;
349 	mr->pbl_l0_dma_addr = 0;
350 }
351 static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
352 			       struct hns_roce_mr *mr, u32 pbl_bt_sz)
353 {
354 	struct device *dev = hr_dev->dev;
355 
356 	if (npages > pbl_bt_sz / 8) {
357 		dev_err(dev, "npages %d is larger than buf_pg_sz!",
358 			npages);
359 		return -EINVAL;
360 	}
361 	mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
362 					 &(mr->pbl_dma_addr),
363 					 GFP_KERNEL);
364 	if (!mr->pbl_buf)
365 		return -ENOMEM;
366 
367 	mr->pbl_size = npages;
368 	mr->pbl_ba = mr->pbl_dma_addr;
369 	mr->pbl_hop_num = 1;
370 	mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
371 	mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
372 	return 0;
373 
374 }
375 
376 
377 static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
378 			       struct hns_roce_mr *mr, u32 pbl_bt_sz)
379 {
380 	struct device *dev = hr_dev->dev;
381 	int npages_allocated;
382 	u64 pbl_last_bt_num;
383 	u64 pbl_bt_cnt = 0;
384 	u64 size;
385 	int i;
386 
387 	pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
388 
389 	/* alloc L1 BT */
390 	for (i = 0; i < pbl_bt_sz / 8; i++) {
391 		if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
392 			size = pbl_bt_sz;
393 		} else {
394 			npages_allocated = i * (pbl_bt_sz / 8);
395 			size = (npages - npages_allocated) * 8;
396 		}
397 		mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
398 					    &(mr->pbl_l1_dma_addr[i]),
399 					    GFP_KERNEL);
400 		if (!mr->pbl_bt_l1[i]) {
401 			hns_roce_loop_free(hr_dev, mr, 1, i, 0);
402 			return -ENOMEM;
403 		}
404 
405 		*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
406 
407 		pbl_bt_cnt++;
408 		if (pbl_bt_cnt >= pbl_last_bt_num)
409 			break;
410 	}
411 
412 	mr->l0_chunk_last_num = i + 1;
413 
414 	return 0;
415 }
416 
417 static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
418 			       struct hns_roce_mr *mr, u32 pbl_bt_sz)
419 {
420 	struct device *dev = hr_dev->dev;
421 	int mr_alloc_done = 0;
422 	int npages_allocated;
423 	u64 pbl_last_bt_num;
424 	u64 pbl_bt_cnt = 0;
425 	u64 bt_idx;
426 	u64 size;
427 	int i;
428 	int j = 0;
429 
430 	pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
431 
432 	mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
433 				      sizeof(*mr->pbl_l2_dma_addr),
434 				      GFP_KERNEL);
435 	if (!mr->pbl_l2_dma_addr)
436 		return -ENOMEM;
437 
438 	mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
439 				sizeof(*mr->pbl_bt_l2),
440 				GFP_KERNEL);
441 	if (!mr->pbl_bt_l2)
442 		goto err_kcalloc_bt_l2;
443 
444 	/* alloc L1, L2 BT */
445 	for (i = 0; i < pbl_bt_sz / 8; i++) {
446 		mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
447 					    &(mr->pbl_l1_dma_addr[i]),
448 					    GFP_KERNEL);
449 		if (!mr->pbl_bt_l1[i]) {
450 			hns_roce_loop_free(hr_dev, mr, 1, i, 0);
451 			goto err_dma_alloc_l0;
452 		}
453 
454 		*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
455 
456 		for (j = 0; j < pbl_bt_sz / 8; j++) {
457 			bt_idx = i * pbl_bt_sz / 8 + j;
458 
459 			if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
460 				size = pbl_bt_sz;
461 			} else {
462 				npages_allocated = bt_idx *
463 						   (pbl_bt_sz / 8);
464 				size = (npages - npages_allocated) * 8;
465 			}
466 			mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
467 				      dev, size,
468 				      &(mr->pbl_l2_dma_addr[bt_idx]),
469 				      GFP_KERNEL);
470 			if (!mr->pbl_bt_l2[bt_idx]) {
471 				hns_roce_loop_free(hr_dev, mr, 2, i, j);
472 				goto err_dma_alloc_l0;
473 			}
474 
475 			*(mr->pbl_bt_l1[i] + j) =
476 					mr->pbl_l2_dma_addr[bt_idx];
477 
478 			pbl_bt_cnt++;
479 			if (pbl_bt_cnt >= pbl_last_bt_num) {
480 				mr_alloc_done = 1;
481 				break;
482 			}
483 		}
484 
485 		if (mr_alloc_done)
486 			break;
487 	}
488 
489 	mr->l0_chunk_last_num = i + 1;
490 	mr->l1_chunk_last_num = j + 1;
491 
492 
493 	return 0;
494 
495 err_dma_alloc_l0:
496 	kfree(mr->pbl_bt_l2);
497 	mr->pbl_bt_l2 = NULL;
498 
499 err_kcalloc_bt_l2:
500 	kfree(mr->pbl_l2_dma_addr);
501 	mr->pbl_l2_dma_addr = NULL;
502 
503 	return -ENOMEM;
504 }
505 
506 
507 /* PBL multi hop addressing */
508 static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
509 			       struct hns_roce_mr *mr)
510 {
511 	struct device *dev = hr_dev->dev;
512 	u32 pbl_bt_sz;
513 	u32 mhop_num;
514 
515 	mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
516 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
517 
518 	if (mhop_num == HNS_ROCE_HOP_NUM_0)
519 		return 0;
520 
521 	if (mhop_num == 1)
522 		return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
523 
524 	mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
525 				      sizeof(*mr->pbl_l1_dma_addr),
526 				      GFP_KERNEL);
527 	if (!mr->pbl_l1_dma_addr)
528 		return -ENOMEM;
529 
530 	mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
531 				GFP_KERNEL);
532 	if (!mr->pbl_bt_l1)
533 		goto err_kcalloc_bt_l1;
534 
535 	/* alloc L0 BT */
536 	mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
537 					   &(mr->pbl_l0_dma_addr),
538 					   GFP_KERNEL);
539 	if (!mr->pbl_bt_l0)
540 		goto err_kcalloc_l2_dma;
541 
542 	if (mhop_num == 2) {
543 		if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
544 			goto err_kcalloc_l2_dma;
545 	}
546 
547 	if (mhop_num == 3) {
548 		if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
549 			goto err_kcalloc_l2_dma;
550 	}
551 
552 
553 	mr->pbl_size = npages;
554 	mr->pbl_ba = mr->pbl_l0_dma_addr;
555 	mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
556 	mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
557 	mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
558 
559 	return 0;
560 
561 err_kcalloc_l2_dma:
562 	kfree(mr->pbl_bt_l1);
563 	mr->pbl_bt_l1 = NULL;
564 
565 err_kcalloc_bt_l1:
566 	kfree(mr->pbl_l1_dma_addr);
567 	mr->pbl_l1_dma_addr = NULL;
568 
569 	return -ENOMEM;
570 }
571 
572 static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
573 			     u64 size, u32 access, int npages,
574 			     struct hns_roce_mr *mr)
575 {
576 	struct device *dev = hr_dev->dev;
577 	unsigned long index = 0;
578 	int ret;
579 
580 	/* Allocate a key for mr from mr_table */
581 	ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
582 	if (ret)
583 		return -ENOMEM;
584 
585 	mr->iova = iova;			/* MR va starting addr */
586 	mr->size = size;			/* MR addr range */
587 	mr->pd = pd;				/* MR num */
588 	mr->access = access;			/* MR access permit */
589 	mr->enabled = 0;			/* MR active status */
590 	mr->key = hw_index_to_key(index);	/* MR key */
591 
592 	if (size == ~0ull) {
593 		mr->pbl_buf = NULL;
594 		mr->pbl_dma_addr = 0;
595 		/* PBL multi-hop addressing parameters */
596 		mr->pbl_bt_l2 = NULL;
597 		mr->pbl_bt_l1 = NULL;
598 		mr->pbl_bt_l0 = NULL;
599 		mr->pbl_l2_dma_addr = NULL;
600 		mr->pbl_l1_dma_addr = NULL;
601 		mr->pbl_l0_dma_addr = 0;
602 	} else {
603 		if (!hr_dev->caps.pbl_hop_num) {
604 			mr->pbl_buf = dma_alloc_coherent(dev,
605 							 npages * BA_BYTE_LEN,
606 							 &(mr->pbl_dma_addr),
607 							 GFP_KERNEL);
608 			if (!mr->pbl_buf)
609 				return -ENOMEM;
610 		} else {
611 			ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
612 		}
613 	}
614 
615 	return ret;
616 }
617 
618 static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
619 			       struct hns_roce_mr *mr)
620 {
621 	struct device *dev = hr_dev->dev;
622 	int npages_allocated;
623 	int npages;
624 	int i, j;
625 	u32 pbl_bt_sz;
626 	u32 mhop_num;
627 	u64 bt_idx;
628 
629 	npages = mr->pbl_size;
630 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
631 	mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
632 
633 	if (mhop_num == HNS_ROCE_HOP_NUM_0)
634 		return;
635 
636 	if (mhop_num == 1) {
637 		dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN),
638 				  mr->pbl_buf, mr->pbl_dma_addr);
639 		return;
640 	}
641 
642 	dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
643 			  mr->pbl_l0_dma_addr);
644 
645 	if (mhop_num == 2) {
646 		for (i = 0; i < mr->l0_chunk_last_num; i++) {
647 			if (i == mr->l0_chunk_last_num - 1) {
648 				npages_allocated =
649 						i * (pbl_bt_sz / BA_BYTE_LEN);
650 
651 				dma_free_coherent(dev,
652 				      (npages - npages_allocated) * BA_BYTE_LEN,
653 				       mr->pbl_bt_l1[i],
654 				       mr->pbl_l1_dma_addr[i]);
655 
656 				break;
657 			}
658 
659 			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
660 					  mr->pbl_l1_dma_addr[i]);
661 		}
662 	} else if (mhop_num == 3) {
663 		for (i = 0; i < mr->l0_chunk_last_num; i++) {
664 			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
665 					  mr->pbl_l1_dma_addr[i]);
666 
667 			for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
668 				bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j;
669 
670 				if ((i == mr->l0_chunk_last_num - 1)
671 				    && j == mr->l1_chunk_last_num - 1) {
672 					npages_allocated = bt_idx *
673 						      (pbl_bt_sz / BA_BYTE_LEN);
674 
675 					dma_free_coherent(dev,
676 					      (npages - npages_allocated) *
677 					      BA_BYTE_LEN,
678 					      mr->pbl_bt_l2[bt_idx],
679 					      mr->pbl_l2_dma_addr[bt_idx]);
680 
681 					break;
682 				}
683 
684 				dma_free_coherent(dev, pbl_bt_sz,
685 						mr->pbl_bt_l2[bt_idx],
686 						mr->pbl_l2_dma_addr[bt_idx]);
687 			}
688 		}
689 	}
690 
691 	kfree(mr->pbl_bt_l1);
692 	kfree(mr->pbl_l1_dma_addr);
693 	mr->pbl_bt_l1 = NULL;
694 	mr->pbl_l1_dma_addr = NULL;
695 	if (mhop_num == 3) {
696 		kfree(mr->pbl_bt_l2);
697 		kfree(mr->pbl_l2_dma_addr);
698 		mr->pbl_bt_l2 = NULL;
699 		mr->pbl_l2_dma_addr = NULL;
700 	}
701 }
702 
703 static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
704 			     struct hns_roce_mr *mr)
705 {
706 	struct device *dev = hr_dev->dev;
707 	int npages = 0;
708 	int ret;
709 
710 	if (mr->enabled) {
711 		ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
712 					      key_to_hw_index(mr->key) &
713 					      (hr_dev->caps.num_mtpts - 1));
714 		if (ret)
715 			dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
716 	}
717 
718 	if (mr->size != ~0ULL) {
719 		if (mr->type == MR_TYPE_MR)
720 			npages = ib_umem_page_count(mr->umem);
721 
722 		if (!hr_dev->caps.pbl_hop_num)
723 			dma_free_coherent(dev,
724 					  (unsigned int)(npages * BA_BYTE_LEN),
725 					  mr->pbl_buf, mr->pbl_dma_addr);
726 		else
727 			hns_roce_mhop_free(hr_dev, mr);
728 	}
729 
730 	if (mr->enabled)
731 		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
732 				   key_to_hw_index(mr->key));
733 
734 	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
735 			     key_to_hw_index(mr->key), BITMAP_NO_RR);
736 }
737 
738 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
739 			      struct hns_roce_mr *mr)
740 {
741 	int ret;
742 	unsigned long mtpt_idx = key_to_hw_index(mr->key);
743 	struct device *dev = hr_dev->dev;
744 	struct hns_roce_cmd_mailbox *mailbox;
745 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
746 
747 	/* Prepare HEM entry memory */
748 	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
749 	if (ret)
750 		return ret;
751 
752 	/* Allocate mailbox memory */
753 	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
754 	if (IS_ERR(mailbox)) {
755 		ret = PTR_ERR(mailbox);
756 		goto err_table;
757 	}
758 
759 	if (mr->type != MR_TYPE_FRMR)
760 		ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
761 	else
762 		ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
763 	if (ret) {
764 		dev_err(dev, "Write mtpt fail!\n");
765 		goto err_page;
766 	}
767 
768 	ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
769 				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
770 	if (ret) {
771 		dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
772 		goto err_page;
773 	}
774 
775 	mr->enabled = 1;
776 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
777 
778 	return 0;
779 
780 err_page:
781 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
782 
783 err_table:
784 	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
785 	return ret;
786 }
787 
788 static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
789 				    struct hns_roce_mtt *mtt, u32 start_index,
790 				    u32 npages, u64 *page_list)
791 {
792 	struct hns_roce_hem_table *table;
793 	dma_addr_t dma_handle;
794 	__le64 *mtts;
795 	u32 bt_page_size;
796 	u32 i;
797 
798 	switch (mtt->mtt_type) {
799 	case MTT_TYPE_WQE:
800 		table = &hr_dev->mr_table.mtt_table;
801 		bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
802 		break;
803 	case MTT_TYPE_CQE:
804 		table = &hr_dev->mr_table.mtt_cqe_table;
805 		bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
806 		break;
807 	case MTT_TYPE_SRQWQE:
808 		table = &hr_dev->mr_table.mtt_srqwqe_table;
809 		bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
810 		break;
811 	case MTT_TYPE_IDX:
812 		table = &hr_dev->mr_table.mtt_idx_table;
813 		bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
814 		break;
815 	default:
816 		return -EINVAL;
817 	}
818 
819 	/* All MTTs must fit in the same page */
820 	if (start_index / (bt_page_size / sizeof(u64)) !=
821 		(start_index + npages - 1) / (bt_page_size / sizeof(u64)))
822 		return -EINVAL;
823 
824 	if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
825 		return -EINVAL;
826 
827 	mtts = hns_roce_table_find(hr_dev, table,
828 				mtt->first_seg +
829 				start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
830 				&dma_handle);
831 	if (!mtts)
832 		return -ENOMEM;
833 
834 	/* Save page addr, low 12 bits : 0 */
835 	for (i = 0; i < npages; ++i) {
836 		if (!hr_dev->caps.mtt_hop_num)
837 			mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
838 		else
839 			mtts[i] = cpu_to_le64(page_list[i]);
840 	}
841 
842 	return 0;
843 }
844 
845 static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
846 			      struct hns_roce_mtt *mtt, u32 start_index,
847 			      u32 npages, u64 *page_list)
848 {
849 	int chunk;
850 	int ret;
851 	u32 bt_page_size;
852 
853 	if (mtt->order < 0)
854 		return -EINVAL;
855 
856 	switch (mtt->mtt_type) {
857 	case MTT_TYPE_WQE:
858 		bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
859 		break;
860 	case MTT_TYPE_CQE:
861 		bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
862 		break;
863 	case MTT_TYPE_SRQWQE:
864 		bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
865 		break;
866 	case MTT_TYPE_IDX:
867 		bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
868 		break;
869 	default:
870 		dev_err(hr_dev->dev,
871 			"Unsupport mtt type %d, write mtt failed\n",
872 			mtt->mtt_type);
873 		return -EINVAL;
874 	}
875 
876 	while (npages > 0) {
877 		chunk = min_t(int, bt_page_size / sizeof(u64), npages);
878 
879 		ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
880 					       page_list);
881 		if (ret)
882 			return ret;
883 
884 		npages -= chunk;
885 		start_index += chunk;
886 		page_list += chunk;
887 	}
888 
889 	return 0;
890 }
891 
892 int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
893 			   struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
894 {
895 	u64 *page_list;
896 	int ret;
897 	u32 i;
898 
899 	page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
900 	if (!page_list)
901 		return -ENOMEM;
902 
903 	for (i = 0; i < buf->npages; ++i) {
904 		if (buf->nbufs == 1)
905 			page_list[i] = buf->direct.map + (i << buf->page_shift);
906 		else
907 			page_list[i] = buf->page_list[i].map;
908 
909 	}
910 	ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
911 
912 	kfree(page_list);
913 
914 	return ret;
915 }
916 
917 int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
918 {
919 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
920 	int ret;
921 
922 	ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap,
923 				   hr_dev->caps.num_mtpts,
924 				   hr_dev->caps.num_mtpts - 1,
925 				   hr_dev->caps.reserved_mrws, 0);
926 	if (ret)
927 		return ret;
928 
929 	ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
930 				  ilog2(hr_dev->caps.num_mtt_segs));
931 	if (ret)
932 		goto err_buddy;
933 
934 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
935 		ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
936 					  ilog2(hr_dev->caps.num_cqe_segs));
937 		if (ret)
938 			goto err_buddy_cqe;
939 	}
940 
941 	if (hr_dev->caps.num_srqwqe_segs) {
942 		ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
943 					  ilog2(hr_dev->caps.num_srqwqe_segs));
944 		if (ret)
945 			goto err_buddy_srqwqe;
946 	}
947 
948 	if (hr_dev->caps.num_idx_segs) {
949 		ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
950 					  ilog2(hr_dev->caps.num_idx_segs));
951 		if (ret)
952 			goto err_buddy_idx;
953 	}
954 
955 	return 0;
956 
957 err_buddy_idx:
958 	if (hr_dev->caps.num_srqwqe_segs)
959 		hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
960 
961 err_buddy_srqwqe:
962 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
963 		hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
964 
965 err_buddy_cqe:
966 	hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
967 
968 err_buddy:
969 	hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
970 	return ret;
971 }
972 
973 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
974 {
975 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
976 
977 	if (hr_dev->caps.num_idx_segs)
978 		hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
979 	if (hr_dev->caps.num_srqwqe_segs)
980 		hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
981 	hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
982 	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
983 		hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
984 	hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
985 }
986 
987 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
988 {
989 	struct hns_roce_mr *mr;
990 	int ret;
991 
992 	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
993 	if (mr == NULL)
994 		return  ERR_PTR(-ENOMEM);
995 
996 	mr->type = MR_TYPE_DMA;
997 
998 	/* Allocate memory region key */
999 	ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
1000 				~0ULL, acc, 0, mr);
1001 	if (ret)
1002 		goto err_free;
1003 
1004 	ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
1005 	if (ret)
1006 		goto err_mr;
1007 
1008 	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1009 	mr->umem = NULL;
1010 
1011 	return &mr->ibmr;
1012 
1013 err_mr:
1014 	hns_roce_mr_free(to_hr_dev(pd->device), mr);
1015 
1016 err_free:
1017 	kfree(mr);
1018 	return ERR_PTR(ret);
1019 }
1020 
1021 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
1022 			       struct hns_roce_mtt *mtt, struct ib_umem *umem)
1023 {
1024 	struct device *dev = hr_dev->dev;
1025 	struct sg_dma_page_iter sg_iter;
1026 	unsigned int order;
1027 	int npage = 0;
1028 	int ret = 0;
1029 	int i;
1030 	u64 page_addr;
1031 	u64 *pages;
1032 	u32 bt_page_size;
1033 	u32 n;
1034 
1035 	switch (mtt->mtt_type) {
1036 	case MTT_TYPE_WQE:
1037 		order = hr_dev->caps.mtt_ba_pg_sz;
1038 		break;
1039 	case MTT_TYPE_CQE:
1040 		order = hr_dev->caps.cqe_ba_pg_sz;
1041 		break;
1042 	case MTT_TYPE_SRQWQE:
1043 		order = hr_dev->caps.srqwqe_ba_pg_sz;
1044 		break;
1045 	case MTT_TYPE_IDX:
1046 		order = hr_dev->caps.idx_ba_pg_sz;
1047 		break;
1048 	default:
1049 		dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
1050 			mtt->mtt_type);
1051 		return -EINVAL;
1052 	}
1053 
1054 	bt_page_size = 1 << (order + PAGE_SHIFT);
1055 
1056 	pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
1057 	if (!pages)
1058 		return -ENOMEM;
1059 
1060 	i = n = 0;
1061 
1062 	for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1063 		page_addr = sg_page_iter_dma_address(&sg_iter);
1064 		if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
1065 			if (page_addr & ((1 << mtt->page_shift) - 1)) {
1066 				dev_err(dev,
1067 					"page_addr is not page_shift %d alignment!\n",
1068 					mtt->page_shift);
1069 				ret = -EINVAL;
1070 				goto out;
1071 			}
1072 			pages[i++] = page_addr;
1073 		}
1074 		npage++;
1075 		if (i == bt_page_size / sizeof(u64)) {
1076 			ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1077 			if (ret)
1078 				goto out;
1079 			n += i;
1080 			i = 0;
1081 		}
1082 	}
1083 
1084 	if (i)
1085 		ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1086 
1087 out:
1088 	free_pages((unsigned long) pages, order);
1089 	return ret;
1090 }
1091 
1092 static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
1093 				     struct hns_roce_mr *mr,
1094 				     struct ib_umem *umem)
1095 {
1096 	struct sg_dma_page_iter sg_iter;
1097 	int i = 0, j = 0;
1098 	u64 page_addr;
1099 	u32 pbl_bt_sz;
1100 
1101 	if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
1102 		return 0;
1103 
1104 	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
1105 	for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1106 		page_addr = sg_page_iter_dma_address(&sg_iter);
1107 		if (!hr_dev->caps.pbl_hop_num) {
1108 			/* for hip06, page addr is aligned to 4K */
1109 			mr->pbl_buf[i++] = page_addr >> 12;
1110 		} else if (hr_dev->caps.pbl_hop_num == 1) {
1111 			mr->pbl_buf[i++] = page_addr;
1112 		} else {
1113 			if (hr_dev->caps.pbl_hop_num == 2)
1114 				mr->pbl_bt_l1[i][j] = page_addr;
1115 			else if (hr_dev->caps.pbl_hop_num == 3)
1116 				mr->pbl_bt_l2[i][j] = page_addr;
1117 
1118 			j++;
1119 			if (j >= (pbl_bt_sz / BA_BYTE_LEN)) {
1120 				i++;
1121 				j = 0;
1122 			}
1123 		}
1124 	}
1125 
1126 	/* Memory barrier */
1127 	mb();
1128 
1129 	return 0;
1130 }
1131 
1132 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1133 				   u64 virt_addr, int access_flags,
1134 				   struct ib_udata *udata)
1135 {
1136 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1137 	struct device *dev = hr_dev->dev;
1138 	struct hns_roce_mr *mr;
1139 	int bt_size;
1140 	int ret;
1141 	int n;
1142 	int i;
1143 
1144 	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
1145 	if (!mr)
1146 		return ERR_PTR(-ENOMEM);
1147 
1148 	mr->umem = ib_umem_get(pd->device, start, length, access_flags);
1149 	if (IS_ERR(mr->umem)) {
1150 		ret = PTR_ERR(mr->umem);
1151 		goto err_free;
1152 	}
1153 
1154 	n = ib_umem_page_count(mr->umem);
1155 
1156 	if (!hr_dev->caps.pbl_hop_num) {
1157 		if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
1158 			dev_err(dev,
1159 			     " MR len %lld err. MR is limited to 4G at most!\n",
1160 			     length);
1161 			ret = -EINVAL;
1162 			goto err_umem;
1163 		}
1164 	} else {
1165 		u64 pbl_size = 1;
1166 
1167 		bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) /
1168 			  BA_BYTE_LEN;
1169 		for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
1170 			pbl_size *= bt_size;
1171 		if (n > pbl_size) {
1172 			dev_err(dev,
1173 			    " MR len %lld err. MR page num is limited to %lld!\n",
1174 			    length, pbl_size);
1175 			ret = -EINVAL;
1176 			goto err_umem;
1177 		}
1178 	}
1179 
1180 	mr->type = MR_TYPE_MR;
1181 
1182 	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
1183 				access_flags, n, mr);
1184 	if (ret)
1185 		goto err_umem;
1186 
1187 	ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1188 	if (ret)
1189 		goto err_mr;
1190 
1191 	ret = hns_roce_mr_enable(hr_dev, mr);
1192 	if (ret)
1193 		goto err_mr;
1194 
1195 	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1196 
1197 	return &mr->ibmr;
1198 
1199 err_mr:
1200 	hns_roce_mr_free(hr_dev, mr);
1201 
1202 err_umem:
1203 	ib_umem_release(mr->umem);
1204 
1205 err_free:
1206 	kfree(mr);
1207 	return ERR_PTR(ret);
1208 }
1209 
1210 static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
1211 			  u64 start, u64 length,
1212 			  u64 virt_addr, int mr_access_flags,
1213 			  struct hns_roce_cmd_mailbox *mailbox,
1214 			  u32 pdn, struct ib_udata *udata)
1215 {
1216 	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1217 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
1218 	struct device *dev = hr_dev->dev;
1219 	int npages;
1220 	int ret;
1221 
1222 	if (mr->size != ~0ULL) {
1223 		npages = ib_umem_page_count(mr->umem);
1224 
1225 		if (hr_dev->caps.pbl_hop_num)
1226 			hns_roce_mhop_free(hr_dev, mr);
1227 		else
1228 			dma_free_coherent(dev, npages * 8,
1229 					  mr->pbl_buf, mr->pbl_dma_addr);
1230 	}
1231 	ib_umem_release(mr->umem);
1232 
1233 	mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags);
1234 	if (IS_ERR(mr->umem)) {
1235 		ret = PTR_ERR(mr->umem);
1236 		mr->umem = NULL;
1237 		return -ENOMEM;
1238 	}
1239 	npages = ib_umem_page_count(mr->umem);
1240 
1241 	if (hr_dev->caps.pbl_hop_num) {
1242 		ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
1243 		if (ret)
1244 			goto release_umem;
1245 	} else {
1246 		mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
1247 						 &(mr->pbl_dma_addr),
1248 						 GFP_KERNEL);
1249 		if (!mr->pbl_buf) {
1250 			ret = -ENOMEM;
1251 			goto release_umem;
1252 		}
1253 	}
1254 
1255 	ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1256 					   mr_access_flags, virt_addr,
1257 					   length, mailbox->buf);
1258 	if (ret)
1259 		goto release_umem;
1260 
1261 
1262 	ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1263 	if (ret) {
1264 		if (mr->size != ~0ULL) {
1265 			npages = ib_umem_page_count(mr->umem);
1266 
1267 			if (hr_dev->caps.pbl_hop_num)
1268 				hns_roce_mhop_free(hr_dev, mr);
1269 			else
1270 				dma_free_coherent(dev, npages * 8,
1271 						  mr->pbl_buf,
1272 						  mr->pbl_dma_addr);
1273 		}
1274 
1275 		goto release_umem;
1276 	}
1277 
1278 	return 0;
1279 
1280 release_umem:
1281 	ib_umem_release(mr->umem);
1282 	return ret;
1283 
1284 }
1285 
1286 
1287 int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
1288 			   u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
1289 			   struct ib_udata *udata)
1290 {
1291 	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1292 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
1293 	struct hns_roce_cmd_mailbox *mailbox;
1294 	struct device *dev = hr_dev->dev;
1295 	unsigned long mtpt_idx;
1296 	u32 pdn = 0;
1297 	int ret;
1298 
1299 	if (!mr->enabled)
1300 		return -EINVAL;
1301 
1302 	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1303 	if (IS_ERR(mailbox))
1304 		return PTR_ERR(mailbox);
1305 
1306 	mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
1307 	ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
1308 				HNS_ROCE_CMD_QUERY_MPT,
1309 				HNS_ROCE_CMD_TIMEOUT_MSECS);
1310 	if (ret)
1311 		goto free_cmd_mbox;
1312 
1313 	ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
1314 	if (ret)
1315 		dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
1316 
1317 	mr->enabled = 0;
1318 
1319 	if (flags & IB_MR_REREG_PD)
1320 		pdn = to_hr_pd(pd)->pdn;
1321 
1322 	if (flags & IB_MR_REREG_TRANS) {
1323 		ret = rereg_mr_trans(ibmr, flags,
1324 				     start, length,
1325 				     virt_addr, mr_access_flags,
1326 				     mailbox, pdn, udata);
1327 		if (ret)
1328 			goto free_cmd_mbox;
1329 	} else {
1330 		ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1331 						   mr_access_flags, virt_addr,
1332 						   length, mailbox->buf);
1333 		if (ret)
1334 			goto free_cmd_mbox;
1335 	}
1336 
1337 	ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
1338 	if (ret) {
1339 		dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
1340 		ib_umem_release(mr->umem);
1341 		goto free_cmd_mbox;
1342 	}
1343 
1344 	mr->enabled = 1;
1345 	if (flags & IB_MR_REREG_ACCESS)
1346 		mr->access = mr_access_flags;
1347 
1348 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1349 
1350 	return 0;
1351 
1352 free_cmd_mbox:
1353 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1354 
1355 	return ret;
1356 }
1357 
1358 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1359 {
1360 	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1361 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
1362 	int ret = 0;
1363 
1364 	if (hr_dev->hw->dereg_mr) {
1365 		ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
1366 	} else {
1367 		hns_roce_mr_free(hr_dev, mr);
1368 
1369 		ib_umem_release(mr->umem);
1370 		kfree(mr);
1371 	}
1372 
1373 	return ret;
1374 }
1375 
1376 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1377 				u32 max_num_sg, struct ib_udata *udata)
1378 {
1379 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1380 	struct device *dev = hr_dev->dev;
1381 	struct hns_roce_mr *mr;
1382 	u64 length;
1383 	u32 page_size;
1384 	int ret;
1385 
1386 	page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
1387 	length = max_num_sg * page_size;
1388 
1389 	if (mr_type != IB_MR_TYPE_MEM_REG)
1390 		return ERR_PTR(-EINVAL);
1391 
1392 	if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
1393 		dev_err(dev, "max_num_sg larger than %d\n",
1394 			HNS_ROCE_FRMR_MAX_PA);
1395 		return ERR_PTR(-EINVAL);
1396 	}
1397 
1398 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1399 	if (!mr)
1400 		return ERR_PTR(-ENOMEM);
1401 
1402 	mr->type = MR_TYPE_FRMR;
1403 
1404 	/* Allocate memory region key */
1405 	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
1406 				0, max_num_sg, mr);
1407 	if (ret)
1408 		goto err_free;
1409 
1410 	ret = hns_roce_mr_enable(hr_dev, mr);
1411 	if (ret)
1412 		goto err_mr;
1413 
1414 	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1415 	mr->umem = NULL;
1416 
1417 	return &mr->ibmr;
1418 
1419 err_mr:
1420 	hns_roce_mr_free(to_hr_dev(pd->device), mr);
1421 
1422 err_free:
1423 	kfree(mr);
1424 	return ERR_PTR(ret);
1425 }
1426 
1427 static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
1428 {
1429 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
1430 
1431 	mr->pbl_buf[mr->npages++] = addr;
1432 
1433 	return 0;
1434 }
1435 
1436 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1437 		       unsigned int *sg_offset)
1438 {
1439 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
1440 
1441 	mr->npages = 0;
1442 
1443 	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
1444 }
1445 
1446 static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
1447 			     struct hns_roce_mw *mw)
1448 {
1449 	struct device *dev = hr_dev->dev;
1450 	int ret;
1451 
1452 	if (mw->enabled) {
1453 		ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
1454 					      key_to_hw_index(mw->rkey) &
1455 					      (hr_dev->caps.num_mtpts - 1));
1456 		if (ret)
1457 			dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
1458 
1459 		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
1460 				   key_to_hw_index(mw->rkey));
1461 	}
1462 
1463 	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
1464 			     key_to_hw_index(mw->rkey), BITMAP_NO_RR);
1465 }
1466 
1467 static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
1468 			      struct hns_roce_mw *mw)
1469 {
1470 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
1471 	struct hns_roce_cmd_mailbox *mailbox;
1472 	struct device *dev = hr_dev->dev;
1473 	unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
1474 	int ret;
1475 
1476 	/* prepare HEM entry memory */
1477 	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1478 	if (ret)
1479 		return ret;
1480 
1481 	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1482 	if (IS_ERR(mailbox)) {
1483 		ret = PTR_ERR(mailbox);
1484 		goto err_table;
1485 	}
1486 
1487 	ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
1488 	if (ret) {
1489 		dev_err(dev, "MW write mtpt fail!\n");
1490 		goto err_page;
1491 	}
1492 
1493 	ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
1494 				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
1495 	if (ret) {
1496 		dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
1497 		goto err_page;
1498 	}
1499 
1500 	mw->enabled = 1;
1501 
1502 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1503 
1504 	return 0;
1505 
1506 err_page:
1507 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1508 
1509 err_table:
1510 	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1511 
1512 	return ret;
1513 }
1514 
1515 struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
1516 				struct ib_udata *udata)
1517 {
1518 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
1519 	struct hns_roce_mw *mw;
1520 	unsigned long index = 0;
1521 	int ret;
1522 
1523 	mw = kmalloc(sizeof(*mw), GFP_KERNEL);
1524 	if (!mw)
1525 		return ERR_PTR(-ENOMEM);
1526 
1527 	/* Allocate a key for mw from bitmap */
1528 	ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
1529 	if (ret)
1530 		goto err_bitmap;
1531 
1532 	mw->rkey = hw_index_to_key(index);
1533 
1534 	mw->ibmw.rkey = mw->rkey;
1535 	mw->ibmw.type = type;
1536 	mw->pdn = to_hr_pd(ib_pd)->pdn;
1537 	mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
1538 	mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
1539 	mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
1540 
1541 	ret = hns_roce_mw_enable(hr_dev, mw);
1542 	if (ret)
1543 		goto err_mw;
1544 
1545 	return &mw->ibmw;
1546 
1547 err_mw:
1548 	hns_roce_mw_free(hr_dev, mw);
1549 
1550 err_bitmap:
1551 	kfree(mw);
1552 
1553 	return ERR_PTR(ret);
1554 }
1555 
1556 int hns_roce_dealloc_mw(struct ib_mw *ibmw)
1557 {
1558 	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
1559 	struct hns_roce_mw *mw = to_hr_mw(ibmw);
1560 
1561 	hns_roce_mw_free(hr_dev, mw);
1562 	kfree(mw);
1563 
1564 	return 0;
1565 }
1566 
1567 void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
1568 		       int buf_pg_shift)
1569 {
1570 	hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift);
1571 	mtr->buf_pg_shift = buf_pg_shift;
1572 }
1573 
1574 void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
1575 			  struct hns_roce_mtr *mtr)
1576 {
1577 	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1578 }
1579 
1580 static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
1581 			      struct hns_roce_mtr *mtr, dma_addr_t *bufs,
1582 			      struct hns_roce_buf_region *r)
1583 {
1584 	int offset;
1585 	int count;
1586 	int npage;
1587 	u64 *mtts;
1588 	int end;
1589 	int i;
1590 
1591 	offset = r->offset;
1592 	end = offset + r->count;
1593 	npage = 0;
1594 	while (offset < end) {
1595 		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1596 						  offset, &count, NULL);
1597 		if (!mtts)
1598 			return -ENOBUFS;
1599 
1600 		/* Save page addr, low 12 bits : 0 */
1601 		for (i = 0; i < count; i++) {
1602 			if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
1603 				mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
1604 			else
1605 				mtts[i] = bufs[npage];
1606 
1607 			npage++;
1608 		}
1609 		offset += count;
1610 	}
1611 
1612 	return 0;
1613 }
1614 
1615 int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1616 			dma_addr_t **bufs, struct hns_roce_buf_region *regions,
1617 			int region_cnt)
1618 {
1619 	struct hns_roce_buf_region *r;
1620 	int ret;
1621 	int i;
1622 
1623 	ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions,
1624 					region_cnt);
1625 	if (ret)
1626 		return ret;
1627 
1628 	for (i = 0; i < region_cnt; i++) {
1629 		r = &regions[i];
1630 		ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r);
1631 		if (ret) {
1632 			dev_err(hr_dev->dev,
1633 				"write mtr[%d/%d] err %d,offset=%d.\n",
1634 				i, region_cnt, ret,  r->offset);
1635 			goto err_write;
1636 		}
1637 	}
1638 
1639 	return 0;
1640 
1641 err_write:
1642 	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1643 
1644 	return ret;
1645 }
1646 
1647 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1648 		      int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
1649 {
1650 	u64 *mtts = mtt_buf;
1651 	int mtt_count;
1652 	int total = 0;
1653 	u64 *addr;
1654 	int npage;
1655 	int left;
1656 
1657 	if (mtts == NULL || mtt_max < 1)
1658 		goto done;
1659 
1660 	left = mtt_max;
1661 	while (left > 0) {
1662 		mtt_count = 0;
1663 		addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1664 						  offset + total,
1665 						  &mtt_count, NULL);
1666 		if (!addr || !mtt_count)
1667 			goto done;
1668 
1669 		npage = min(mtt_count, left);
1670 		memcpy(&mtts[total], addr, BA_BYTE_LEN * npage);
1671 		left -= npage;
1672 		total += npage;
1673 	}
1674 
1675 done:
1676 	if (base_addr)
1677 		*base_addr = mtr->hem_list.root_ba;
1678 
1679 	return total;
1680 }
1681