1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
4  * Copyright (c) 2006, 2007 Cisco Systems, Inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <linux/errno.h>
36 #include <linux/export.h>
37 #include <linux/slab.h>
38 #include <linux/kernel.h>
39 #include <linux/vmalloc.h>
40 
41 #include <linux/mlx4/cmd.h>
42 
43 #include "mlx4.h"
44 #include "icm.h"
45 
46 static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
47 {
48 	int o;
49 	int m;
50 	u32 seg;
51 
52 	spin_lock(&buddy->lock);
53 
54 	for (o = order; o <= buddy->max_order; ++o)
55 		if (buddy->num_free[o]) {
56 			m = 1 << (buddy->max_order - o);
57 			seg = find_first_bit(buddy->bits[o], m);
58 			if (seg < m)
59 				goto found;
60 		}
61 
62 	spin_unlock(&buddy->lock);
63 	return -1;
64 
65  found:
66 	clear_bit(seg, buddy->bits[o]);
67 	--buddy->num_free[o];
68 
69 	while (o > order) {
70 		--o;
71 		seg <<= 1;
72 		set_bit(seg ^ 1, buddy->bits[o]);
73 		++buddy->num_free[o];
74 	}
75 
76 	spin_unlock(&buddy->lock);
77 
78 	seg <<= order;
79 
80 	return seg;
81 }
82 
83 static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
84 {
85 	seg >>= order;
86 
87 	spin_lock(&buddy->lock);
88 
89 	while (test_bit(seg ^ 1, buddy->bits[order])) {
90 		clear_bit(seg ^ 1, buddy->bits[order]);
91 		--buddy->num_free[order];
92 		seg >>= 1;
93 		++order;
94 	}
95 
96 	set_bit(seg, buddy->bits[order]);
97 	++buddy->num_free[order];
98 
99 	spin_unlock(&buddy->lock);
100 }
101 
102 static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
103 {
104 	int i, s;
105 
106 	buddy->max_order = max_order;
107 	spin_lock_init(&buddy->lock);
108 
109 	buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *),
110 			      GFP_KERNEL);
111 	buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
112 				  GFP_KERNEL);
113 	if (!buddy->bits || !buddy->num_free)
114 		goto err_out;
115 
116 	for (i = 0; i <= buddy->max_order; ++i) {
117 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
118 		buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN);
119 		if (!buddy->bits[i]) {
120 			buddy->bits[i] = vzalloc(s * sizeof(long));
121 			if (!buddy->bits[i])
122 				goto err_out_free;
123 		}
124 	}
125 
126 	set_bit(0, buddy->bits[buddy->max_order]);
127 	buddy->num_free[buddy->max_order] = 1;
128 
129 	return 0;
130 
131 err_out_free:
132 	for (i = 0; i <= buddy->max_order; ++i)
133 		if (buddy->bits[i] && is_vmalloc_addr(buddy->bits[i]))
134 			vfree(buddy->bits[i]);
135 		else
136 			kfree(buddy->bits[i]);
137 
138 err_out:
139 	kfree(buddy->bits);
140 	kfree(buddy->num_free);
141 
142 	return -ENOMEM;
143 }
144 
145 static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
146 {
147 	int i;
148 
149 	for (i = 0; i <= buddy->max_order; ++i)
150 		if (is_vmalloc_addr(buddy->bits[i]))
151 			vfree(buddy->bits[i]);
152 		else
153 			kfree(buddy->bits[i]);
154 
155 	kfree(buddy->bits);
156 	kfree(buddy->num_free);
157 }
158 
159 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
160 {
161 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
162 	u32 seg;
163 	int seg_order;
164 	u32 offset;
165 
166 	seg_order = max_t(int, order - log_mtts_per_seg, 0);
167 
168 	seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
169 	if (seg == -1)
170 		return -1;
171 
172 	offset = seg * (1 << log_mtts_per_seg);
173 
174 	if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
175 				 offset + (1 << order) - 1)) {
176 		mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
177 		return -1;
178 	}
179 
180 	return offset;
181 }
182 
183 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
184 {
185 	u64 in_param = 0;
186 	u64 out_param;
187 	int err;
188 
189 	if (mlx4_is_mfunc(dev)) {
190 		set_param_l(&in_param, order);
191 		err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT,
192 						       RES_OP_RESERVE_AND_MAP,
193 						       MLX4_CMD_ALLOC_RES,
194 						       MLX4_CMD_TIME_CLASS_A,
195 						       MLX4_CMD_WRAPPED);
196 		if (err)
197 			return -1;
198 		return get_param_l(&out_param);
199 	}
200 	return __mlx4_alloc_mtt_range(dev, order);
201 }
202 
203 int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
204 		  struct mlx4_mtt *mtt)
205 {
206 	int i;
207 
208 	if (!npages) {
209 		mtt->order      = -1;
210 		mtt->page_shift = MLX4_ICM_PAGE_SHIFT;
211 		return 0;
212 	} else
213 		mtt->page_shift = page_shift;
214 
215 	for (mtt->order = 0, i = 1; i < npages; i <<= 1)
216 		++mtt->order;
217 
218 	mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
219 	if (mtt->offset == -1)
220 		return -ENOMEM;
221 
222 	return 0;
223 }
224 EXPORT_SYMBOL_GPL(mlx4_mtt_init);
225 
226 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
227 {
228 	u32 first_seg;
229 	int seg_order;
230 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
231 
232 	seg_order = max_t(int, order - log_mtts_per_seg, 0);
233 	first_seg = offset / (1 << log_mtts_per_seg);
234 
235 	mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
236 	mlx4_table_put_range(dev, &mr_table->mtt_table, offset,
237 			     offset + (1 << order) - 1);
238 }
239 
240 static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
241 {
242 	u64 in_param = 0;
243 	int err;
244 
245 	if (mlx4_is_mfunc(dev)) {
246 		set_param_l(&in_param, offset);
247 		set_param_h(&in_param, order);
248 		err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
249 						       MLX4_CMD_FREE_RES,
250 						       MLX4_CMD_TIME_CLASS_A,
251 						       MLX4_CMD_WRAPPED);
252 		if (err)
253 			mlx4_warn(dev, "Failed to free mtt range at:%d order:%d\n",
254 				  offset, order);
255 		return;
256 	}
257 	 __mlx4_free_mtt_range(dev, offset, order);
258 }
259 
260 void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
261 {
262 	if (mtt->order < 0)
263 		return;
264 
265 	mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
266 }
267 EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
268 
269 u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
270 {
271 	return (u64) mtt->offset * dev->caps.mtt_entry_sz;
272 }
273 EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
274 
275 static u32 hw_index_to_key(u32 ind)
276 {
277 	return (ind >> 24) | (ind << 8);
278 }
279 
280 static u32 key_to_hw_index(u32 key)
281 {
282 	return (key << 24) | (key >> 8);
283 }
284 
285 static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
286 			  int mpt_index)
287 {
288 	return mlx4_cmd(dev, mailbox->dma, mpt_index,
289 			0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B,
290 			MLX4_CMD_WRAPPED);
291 }
292 
293 static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
294 			  int mpt_index)
295 {
296 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
297 			    !mailbox, MLX4_CMD_HW2SW_MPT,
298 			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
299 }
300 
301 /* Must protect against concurrent access */
302 int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
303 		       struct mlx4_mpt_entry ***mpt_entry)
304 {
305 	int err;
306 	int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
307 	struct mlx4_cmd_mailbox *mailbox = NULL;
308 
309 	if (mmr->enabled != MLX4_MPT_EN_HW)
310 		return -EINVAL;
311 
312 	err = mlx4_HW2SW_MPT(dev, NULL, key);
313 	if (err) {
314 		mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
315 		mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
316 		return err;
317 	}
318 
319 	mmr->enabled = MLX4_MPT_EN_SW;
320 
321 	if (!mlx4_is_mfunc(dev)) {
322 		**mpt_entry = mlx4_table_find(
323 				&mlx4_priv(dev)->mr_table.dmpt_table,
324 				key, NULL);
325 	} else {
326 		mailbox = mlx4_alloc_cmd_mailbox(dev);
327 		if (IS_ERR_OR_NULL(mailbox))
328 			return PTR_ERR(mailbox);
329 
330 		err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
331 				   0, MLX4_CMD_QUERY_MPT,
332 				   MLX4_CMD_TIME_CLASS_B,
333 				   MLX4_CMD_WRAPPED);
334 		if (err)
335 			goto free_mailbox;
336 
337 		*mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
338 	}
339 
340 	if (!(*mpt_entry) || !(**mpt_entry)) {
341 		err = -ENOMEM;
342 		goto free_mailbox;
343 	}
344 
345 	return 0;
346 
347 free_mailbox:
348 	mlx4_free_cmd_mailbox(dev, mailbox);
349 	return err;
350 }
351 EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
352 
353 int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
354 			 struct mlx4_mpt_entry **mpt_entry)
355 {
356 	int err;
357 
358 	if (!mlx4_is_mfunc(dev)) {
359 		/* Make sure any changes to this entry are flushed */
360 		wmb();
361 
362 		*(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
363 
364 		/* Make sure the new status is written */
365 		wmb();
366 
367 		err = mlx4_SYNC_TPT(dev);
368 	} else {
369 		int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
370 
371 		struct mlx4_cmd_mailbox *mailbox =
372 			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
373 				     buf);
374 
375 		err = mlx4_SW2HW_MPT(dev, mailbox, key);
376 	}
377 
378 	if (!err) {
379 		mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
380 		mmr->enabled = MLX4_MPT_EN_HW;
381 	}
382 	return err;
383 }
384 EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
385 
386 void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
387 			struct mlx4_mpt_entry **mpt_entry)
388 {
389 	if (mlx4_is_mfunc(dev)) {
390 		struct mlx4_cmd_mailbox *mailbox =
391 			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
392 				     buf);
393 		mlx4_free_cmd_mailbox(dev, mailbox);
394 	}
395 }
396 EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
397 
398 int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
399 			 u32 pdn)
400 {
401 	u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags) & ~MLX4_MPT_PD_MASK;
402 	/* The wrapper function will put the slave's id here */
403 	if (mlx4_is_mfunc(dev))
404 		pd_flags &= ~MLX4_MPT_PD_VF_MASK;
405 
406 	mpt_entry->pd_flags = cpu_to_be32(pd_flags |
407 					  (pdn & MLX4_MPT_PD_MASK)
408 					  | MLX4_MPT_PD_FLAG_EN_INV);
409 	return 0;
410 }
411 EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
412 
413 int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
414 			     struct mlx4_mpt_entry *mpt_entry,
415 			     u32 access)
416 {
417 	u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
418 		    (access & MLX4_PERM_MASK);
419 
420 	mpt_entry->flags = cpu_to_be32(flags);
421 	return 0;
422 }
423 EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
424 
425 static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
426 			   u64 iova, u64 size, u32 access, int npages,
427 			   int page_shift, struct mlx4_mr *mr)
428 {
429 	mr->iova       = iova;
430 	mr->size       = size;
431 	mr->pd	       = pd;
432 	mr->access     = access;
433 	mr->enabled    = MLX4_MPT_DISABLED;
434 	mr->key	       = hw_index_to_key(mridx);
435 
436 	return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
437 }
438 
439 static int mlx4_WRITE_MTT(struct mlx4_dev *dev,
440 			  struct mlx4_cmd_mailbox *mailbox,
441 			  int num_entries)
442 {
443 	return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
444 			MLX4_CMD_TIME_CLASS_A,  MLX4_CMD_WRAPPED);
445 }
446 
447 int __mlx4_mpt_reserve(struct mlx4_dev *dev)
448 {
449 	struct mlx4_priv *priv = mlx4_priv(dev);
450 
451 	return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
452 }
453 
454 static int mlx4_mpt_reserve(struct mlx4_dev *dev)
455 {
456 	u64 out_param;
457 
458 	if (mlx4_is_mfunc(dev)) {
459 		if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE,
460 				   MLX4_CMD_ALLOC_RES,
461 				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
462 			return -1;
463 		return get_param_l(&out_param);
464 	}
465 	return  __mlx4_mpt_reserve(dev);
466 }
467 
468 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
469 {
470 	struct mlx4_priv *priv = mlx4_priv(dev);
471 
472 	mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR);
473 }
474 
475 static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
476 {
477 	u64 in_param = 0;
478 
479 	if (mlx4_is_mfunc(dev)) {
480 		set_param_l(&in_param, index);
481 		if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE,
482 			       MLX4_CMD_FREE_RES,
483 			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
484 			mlx4_warn(dev, "Failed to release mr index:%d\n",
485 				  index);
486 		return;
487 	}
488 	__mlx4_mpt_release(dev, index);
489 }
490 
491 int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
492 {
493 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
494 
495 	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
496 }
497 
498 static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
499 {
500 	u64 param = 0;
501 
502 	if (mlx4_is_mfunc(dev)) {
503 		set_param_l(&param, index);
504 		return mlx4_cmd_imm(dev, param, &param, RES_MPT, RES_OP_MAP_ICM,
505 							MLX4_CMD_ALLOC_RES,
506 							MLX4_CMD_TIME_CLASS_A,
507 							MLX4_CMD_WRAPPED);
508 	}
509 	return __mlx4_mpt_alloc_icm(dev, index, gfp);
510 }
511 
512 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
513 {
514 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
515 
516 	mlx4_table_put(dev, &mr_table->dmpt_table, index);
517 }
518 
519 static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
520 {
521 	u64 in_param = 0;
522 
523 	if (mlx4_is_mfunc(dev)) {
524 		set_param_l(&in_param, index);
525 		if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM,
526 			     MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
527 			     MLX4_CMD_WRAPPED))
528 			mlx4_warn(dev, "Failed to free icm of mr index:%d\n",
529 				  index);
530 		return;
531 	}
532 	return __mlx4_mpt_free_icm(dev, index);
533 }
534 
535 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
536 		  int npages, int page_shift, struct mlx4_mr *mr)
537 {
538 	u32 index;
539 	int err;
540 
541 	index = mlx4_mpt_reserve(dev);
542 	if (index == -1)
543 		return -ENOMEM;
544 
545 	err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
546 				     access, npages, page_shift, mr);
547 	if (err)
548 		mlx4_mpt_release(dev, index);
549 
550 	return err;
551 }
552 EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
553 
554 static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
555 {
556 	int err;
557 
558 	if (mr->enabled == MLX4_MPT_EN_HW) {
559 		err = mlx4_HW2SW_MPT(dev, NULL,
560 				     key_to_hw_index(mr->key) &
561 				     (dev->caps.num_mpts - 1));
562 		if (err) {
563 			mlx4_warn(dev, "HW2SW_MPT failed (%d), MR has MWs bound to it\n",
564 				  err);
565 			return err;
566 		}
567 
568 		mr->enabled = MLX4_MPT_EN_SW;
569 	}
570 	mlx4_mtt_cleanup(dev, &mr->mtt);
571 
572 	return 0;
573 }
574 
575 int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
576 {
577 	int ret;
578 
579 	ret = mlx4_mr_free_reserved(dev, mr);
580 	if (ret)
581 		return ret;
582 	if (mr->enabled)
583 		mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
584 	mlx4_mpt_release(dev, key_to_hw_index(mr->key));
585 
586 	return 0;
587 }
588 EXPORT_SYMBOL_GPL(mlx4_mr_free);
589 
590 void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
591 {
592 	mlx4_mtt_cleanup(dev, &mr->mtt);
593 }
594 EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
595 
596 int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
597 			    u64 iova, u64 size, int npages,
598 			    int page_shift, struct mlx4_mpt_entry *mpt_entry)
599 {
600 	int err;
601 
602 	mpt_entry->start       = cpu_to_be64(iova);
603 	mpt_entry->length      = cpu_to_be64(size);
604 	mpt_entry->entity_size = cpu_to_be32(page_shift);
605 
606 	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
607 	if (err)
608 		return err;
609 
610 	mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK |
611 					   MLX4_MPT_PD_FLAG_EN_INV);
612 	mpt_entry->flags    &= cpu_to_be32(MLX4_MPT_FLAG_FREE |
613 					   MLX4_MPT_FLAG_SW_OWNS);
614 	if (mr->mtt.order < 0) {
615 		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
616 		mpt_entry->mtt_addr = 0;
617 	} else {
618 		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
619 						  &mr->mtt));
620 		if (mr->mtt.page_shift == 0)
621 			mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
622 	}
623 	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
624 		/* fast register MR in free state */
625 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
626 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
627 						   MLX4_MPT_PD_FLAG_RAE);
628 	} else {
629 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
630 	}
631 	mr->enabled = MLX4_MPT_EN_SW;
632 
633 	return 0;
634 }
635 EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
636 
637 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
638 {
639 	struct mlx4_cmd_mailbox *mailbox;
640 	struct mlx4_mpt_entry *mpt_entry;
641 	int err;
642 
643 	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
644 	if (err)
645 		return err;
646 
647 	mailbox = mlx4_alloc_cmd_mailbox(dev);
648 	if (IS_ERR(mailbox)) {
649 		err = PTR_ERR(mailbox);
650 		goto err_table;
651 	}
652 	mpt_entry = mailbox->buf;
653 	mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO	 |
654 				       MLX4_MPT_FLAG_REGION	 |
655 				       mr->access);
656 
657 	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mr->key));
658 	mpt_entry->pd_flags    = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
659 	mpt_entry->start       = cpu_to_be64(mr->iova);
660 	mpt_entry->length      = cpu_to_be64(mr->size);
661 	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
662 
663 	if (mr->mtt.order < 0) {
664 		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
665 		mpt_entry->mtt_addr = 0;
666 	} else {
667 		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
668 						  &mr->mtt));
669 	}
670 
671 	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
672 		/* fast register MR in free state */
673 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
674 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
675 						   MLX4_MPT_PD_FLAG_RAE);
676 		mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
677 	} else {
678 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
679 	}
680 
681 	err = mlx4_SW2HW_MPT(dev, mailbox,
682 			     key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
683 	if (err) {
684 		mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
685 		goto err_cmd;
686 	}
687 	mr->enabled = MLX4_MPT_EN_HW;
688 
689 	mlx4_free_cmd_mailbox(dev, mailbox);
690 
691 	return 0;
692 
693 err_cmd:
694 	mlx4_free_cmd_mailbox(dev, mailbox);
695 
696 err_table:
697 	mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
698 	return err;
699 }
700 EXPORT_SYMBOL_GPL(mlx4_mr_enable);
701 
702 static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
703 				int start_index, int npages, u64 *page_list)
704 {
705 	struct mlx4_priv *priv = mlx4_priv(dev);
706 	__be64 *mtts;
707 	dma_addr_t dma_handle;
708 	int i;
709 
710 	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
711 			       start_index, &dma_handle);
712 
713 	if (!mtts)
714 		return -ENOMEM;
715 
716 	dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
717 				npages * sizeof (u64), DMA_TO_DEVICE);
718 
719 	for (i = 0; i < npages; ++i)
720 		mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
721 
722 	dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
723 				   npages * sizeof (u64), DMA_TO_DEVICE);
724 
725 	return 0;
726 }
727 
728 int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
729 		     int start_index, int npages, u64 *page_list)
730 {
731 	int err = 0;
732 	int chunk;
733 	int mtts_per_page;
734 	int max_mtts_first_page;
735 
736 	/* compute how may mtts fit in the first page */
737 	mtts_per_page = PAGE_SIZE / sizeof(u64);
738 	max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
739 			      % mtts_per_page;
740 
741 	chunk = min_t(int, max_mtts_first_page, npages);
742 
743 	while (npages > 0) {
744 		err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
745 		if (err)
746 			return err;
747 		npages      -= chunk;
748 		start_index += chunk;
749 		page_list   += chunk;
750 
751 		chunk = min_t(int, mtts_per_page, npages);
752 	}
753 	return err;
754 }
755 
756 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
757 		   int start_index, int npages, u64 *page_list)
758 {
759 	struct mlx4_cmd_mailbox *mailbox = NULL;
760 	__be64 *inbox = NULL;
761 	int chunk;
762 	int err = 0;
763 	int i;
764 
765 	if (mtt->order < 0)
766 		return -EINVAL;
767 
768 	if (mlx4_is_mfunc(dev)) {
769 		mailbox = mlx4_alloc_cmd_mailbox(dev);
770 		if (IS_ERR(mailbox))
771 			return PTR_ERR(mailbox);
772 		inbox = mailbox->buf;
773 
774 		while (npages > 0) {
775 			chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
776 				      npages);
777 			inbox[0] = cpu_to_be64(mtt->offset + start_index);
778 			inbox[1] = 0;
779 			for (i = 0; i < chunk; ++i)
780 				inbox[i + 2] = cpu_to_be64(page_list[i] |
781 					       MLX4_MTT_FLAG_PRESENT);
782 			err = mlx4_WRITE_MTT(dev, mailbox, chunk);
783 			if (err) {
784 				mlx4_free_cmd_mailbox(dev, mailbox);
785 				return err;
786 			}
787 
788 			npages      -= chunk;
789 			start_index += chunk;
790 			page_list   += chunk;
791 		}
792 		mlx4_free_cmd_mailbox(dev, mailbox);
793 		return err;
794 	}
795 
796 	return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list);
797 }
798 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
799 
800 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
801 		       struct mlx4_buf *buf, gfp_t gfp)
802 {
803 	u64 *page_list;
804 	int err;
805 	int i;
806 
807 	page_list = kmalloc(buf->npages * sizeof *page_list,
808 			    gfp);
809 	if (!page_list)
810 		return -ENOMEM;
811 
812 	for (i = 0; i < buf->npages; ++i)
813 		if (buf->nbufs == 1)
814 			page_list[i] = buf->direct.map + (i << buf->page_shift);
815 		else
816 			page_list[i] = buf->page_list[i].map;
817 
818 	err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
819 
820 	kfree(page_list);
821 	return err;
822 }
823 EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
824 
825 int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
826 		  struct mlx4_mw *mw)
827 {
828 	u32 index;
829 
830 	if ((type == MLX4_MW_TYPE_1 &&
831 	     !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) ||
832 	     (type == MLX4_MW_TYPE_2 &&
833 	     !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)))
834 		return -ENOTSUPP;
835 
836 	index = mlx4_mpt_reserve(dev);
837 	if (index == -1)
838 		return -ENOMEM;
839 
840 	mw->key	    = hw_index_to_key(index);
841 	mw->pd      = pd;
842 	mw->type    = type;
843 	mw->enabled = MLX4_MPT_DISABLED;
844 
845 	return 0;
846 }
847 EXPORT_SYMBOL_GPL(mlx4_mw_alloc);
848 
849 int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
850 {
851 	struct mlx4_cmd_mailbox *mailbox;
852 	struct mlx4_mpt_entry *mpt_entry;
853 	int err;
854 
855 	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
856 	if (err)
857 		return err;
858 
859 	mailbox = mlx4_alloc_cmd_mailbox(dev);
860 	if (IS_ERR(mailbox)) {
861 		err = PTR_ERR(mailbox);
862 		goto err_table;
863 	}
864 	mpt_entry = mailbox->buf;
865 
866 	/* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned
867 	 * off, thus creating a memory window and not a memory region.
868 	 */
869 	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mw->key));
870 	mpt_entry->pd_flags    = cpu_to_be32(mw->pd);
871 	if (mw->type == MLX4_MW_TYPE_2) {
872 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
873 		mpt_entry->qpn       = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP);
874 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV);
875 	}
876 
877 	err = mlx4_SW2HW_MPT(dev, mailbox,
878 			     key_to_hw_index(mw->key) &
879 			     (dev->caps.num_mpts - 1));
880 	if (err) {
881 		mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
882 		goto err_cmd;
883 	}
884 	mw->enabled = MLX4_MPT_EN_HW;
885 
886 	mlx4_free_cmd_mailbox(dev, mailbox);
887 
888 	return 0;
889 
890 err_cmd:
891 	mlx4_free_cmd_mailbox(dev, mailbox);
892 
893 err_table:
894 	mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
895 	return err;
896 }
897 EXPORT_SYMBOL_GPL(mlx4_mw_enable);
898 
899 void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw)
900 {
901 	int err;
902 
903 	if (mw->enabled == MLX4_MPT_EN_HW) {
904 		err = mlx4_HW2SW_MPT(dev, NULL,
905 				     key_to_hw_index(mw->key) &
906 				     (dev->caps.num_mpts - 1));
907 		if (err)
908 			mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
909 
910 		mw->enabled = MLX4_MPT_EN_SW;
911 	}
912 	if (mw->enabled)
913 		mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
914 	mlx4_mpt_release(dev, key_to_hw_index(mw->key));
915 }
916 EXPORT_SYMBOL_GPL(mlx4_mw_free);
917 
918 int mlx4_init_mr_table(struct mlx4_dev *dev)
919 {
920 	struct mlx4_priv *priv = mlx4_priv(dev);
921 	struct mlx4_mr_table *mr_table = &priv->mr_table;
922 	int err;
923 
924 	/* Nothing to do for slaves - all MR handling is forwarded
925 	* to the master */
926 	if (mlx4_is_slave(dev))
927 		return 0;
928 
929 	if (!is_power_of_2(dev->caps.num_mpts))
930 		return -EINVAL;
931 
932 	err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
933 			       ~0, dev->caps.reserved_mrws, 0);
934 	if (err)
935 		return err;
936 
937 	err = mlx4_buddy_init(&mr_table->mtt_buddy,
938 			      ilog2((u32)dev->caps.num_mtts /
939 			      (1 << log_mtts_per_seg)));
940 	if (err)
941 		goto err_buddy;
942 
943 	if (dev->caps.reserved_mtts) {
944 		priv->reserved_mtts =
945 			mlx4_alloc_mtt_range(dev,
946 					     fls(dev->caps.reserved_mtts - 1));
947 		if (priv->reserved_mtts < 0) {
948 			mlx4_warn(dev, "MTT table of order %u is too small\n",
949 				  mr_table->mtt_buddy.max_order);
950 			err = -ENOMEM;
951 			goto err_reserve_mtts;
952 		}
953 	}
954 
955 	return 0;
956 
957 err_reserve_mtts:
958 	mlx4_buddy_cleanup(&mr_table->mtt_buddy);
959 
960 err_buddy:
961 	mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
962 
963 	return err;
964 }
965 
966 void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
967 {
968 	struct mlx4_priv *priv = mlx4_priv(dev);
969 	struct mlx4_mr_table *mr_table = &priv->mr_table;
970 
971 	if (mlx4_is_slave(dev))
972 		return;
973 	if (priv->reserved_mtts >= 0)
974 		mlx4_free_mtt_range(dev, priv->reserved_mtts,
975 				    fls(dev->caps.reserved_mtts - 1));
976 	mlx4_buddy_cleanup(&mr_table->mtt_buddy);
977 	mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
978 }
979 
980 static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
981 				  int npages, u64 iova)
982 {
983 	int i, page_mask;
984 
985 	if (npages > fmr->max_pages)
986 		return -EINVAL;
987 
988 	page_mask = (1 << fmr->page_shift) - 1;
989 
990 	/* We are getting page lists, so va must be page aligned. */
991 	if (iova & page_mask)
992 		return -EINVAL;
993 
994 	/* Trust the user not to pass misaligned data in page_list */
995 	if (0)
996 		for (i = 0; i < npages; ++i) {
997 			if (page_list[i] & ~page_mask)
998 				return -EINVAL;
999 		}
1000 
1001 	if (fmr->maps >= fmr->max_maps)
1002 		return -EINVAL;
1003 
1004 	return 0;
1005 }
1006 
1007 int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
1008 		      int npages, u64 iova, u32 *lkey, u32 *rkey)
1009 {
1010 	u32 key;
1011 	int i, err;
1012 
1013 	err = mlx4_check_fmr(fmr, page_list, npages, iova);
1014 	if (err)
1015 		return err;
1016 
1017 	++fmr->maps;
1018 
1019 	key = key_to_hw_index(fmr->mr.key);
1020 	key += dev->caps.num_mpts;
1021 	*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
1022 
1023 	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
1024 
1025 	/* Make sure MPT status is visible before writing MTT entries */
1026 	wmb();
1027 
1028 	dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
1029 				npages * sizeof(u64), DMA_TO_DEVICE);
1030 
1031 	for (i = 0; i < npages; ++i)
1032 		fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
1033 
1034 	dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
1035 				   npages * sizeof(u64), DMA_TO_DEVICE);
1036 
1037 	fmr->mpt->key    = cpu_to_be32(key);
1038 	fmr->mpt->lkey   = cpu_to_be32(key);
1039 	fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
1040 	fmr->mpt->start  = cpu_to_be64(iova);
1041 
1042 	/* Make MTT entries are visible before setting MPT status */
1043 	wmb();
1044 
1045 	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW;
1046 
1047 	/* Make sure MPT status is visible before consumer can use FMR */
1048 	wmb();
1049 
1050 	return 0;
1051 }
1052 EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
1053 
1054 int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
1055 		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
1056 {
1057 	struct mlx4_priv *priv = mlx4_priv(dev);
1058 	int err = -ENOMEM;
1059 
1060 	if (max_maps > dev->caps.max_fmr_maps)
1061 		return -EINVAL;
1062 
1063 	if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
1064 		return -EINVAL;
1065 
1066 	/* All MTTs must fit in the same page */
1067 	if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
1068 		return -EINVAL;
1069 
1070 	fmr->page_shift = page_shift;
1071 	fmr->max_pages  = max_pages;
1072 	fmr->max_maps   = max_maps;
1073 	fmr->maps = 0;
1074 
1075 	err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages,
1076 			    page_shift, &fmr->mr);
1077 	if (err)
1078 		return err;
1079 
1080 	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
1081 				    fmr->mr.mtt.offset,
1082 				    &fmr->dma_handle);
1083 
1084 	if (!fmr->mtts) {
1085 		err = -ENOMEM;
1086 		goto err_free;
1087 	}
1088 
1089 	return 0;
1090 
1091 err_free:
1092 	(void) mlx4_mr_free(dev, &fmr->mr);
1093 	return err;
1094 }
1095 EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
1096 
1097 int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
1098 {
1099 	struct mlx4_priv *priv = mlx4_priv(dev);
1100 	int err;
1101 
1102 	err = mlx4_mr_enable(dev, &fmr->mr);
1103 	if (err)
1104 		return err;
1105 
1106 	fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table,
1107 				    key_to_hw_index(fmr->mr.key), NULL);
1108 	if (!fmr->mpt)
1109 		return -ENOMEM;
1110 
1111 	return 0;
1112 }
1113 EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
1114 
1115 void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
1116 		    u32 *lkey, u32 *rkey)
1117 {
1118 	struct mlx4_cmd_mailbox *mailbox;
1119 	int err;
1120 
1121 	if (!fmr->maps)
1122 		return;
1123 
1124 	fmr->maps = 0;
1125 
1126 	mailbox = mlx4_alloc_cmd_mailbox(dev);
1127 	if (IS_ERR(mailbox)) {
1128 		err = PTR_ERR(mailbox);
1129 		pr_warn("mlx4_ib: mlx4_alloc_cmd_mailbox failed (%d)\n", err);
1130 		return;
1131 	}
1132 
1133 	err = mlx4_HW2SW_MPT(dev, NULL,
1134 			     key_to_hw_index(fmr->mr.key) &
1135 			     (dev->caps.num_mpts - 1));
1136 	mlx4_free_cmd_mailbox(dev, mailbox);
1137 	if (err) {
1138 		pr_warn("mlx4_ib: mlx4_HW2SW_MPT failed (%d)\n", err);
1139 		return;
1140 	}
1141 	fmr->mr.enabled = MLX4_MPT_EN_SW;
1142 }
1143 EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
1144 
1145 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
1146 {
1147 	int ret;
1148 
1149 	if (fmr->maps)
1150 		return -EBUSY;
1151 
1152 	ret = mlx4_mr_free(dev, &fmr->mr);
1153 	if (ret)
1154 		return ret;
1155 	fmr->mr.enabled = MLX4_MPT_DISABLED;
1156 
1157 	return 0;
1158 }
1159 EXPORT_SYMBOL_GPL(mlx4_fmr_free);
1160 
1161 int mlx4_SYNC_TPT(struct mlx4_dev *dev)
1162 {
1163 	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
1164 			MLX4_CMD_NATIVE);
1165 }
1166 EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
1167