xref: /openbmc/linux/drivers/infiniband/hw/mthca/mthca_mr.c (revision df2634f43f5106947f3735a0b61a6527a4b278cd)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/slab.h>
35 #include <linux/errno.h>
36 
37 #include "mthca_dev.h"
38 #include "mthca_cmd.h"
39 #include "mthca_memfree.h"
40 
41 struct mthca_mtt {
42 	struct mthca_buddy *buddy;
43 	int                 order;
44 	u32                 first_seg;
45 };
46 
47 /*
48  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
49  */
50 struct mthca_mpt_entry {
51 	__be32 flags;
52 	__be32 page_size;
53 	__be32 key;
54 	__be32 pd;
55 	__be64 start;
56 	__be64 length;
57 	__be32 lkey;
58 	__be32 window_count;
59 	__be32 window_count_limit;
60 	__be64 mtt_seg;
61 	__be32 mtt_sz;		/* Arbel only */
62 	u32    reserved[2];
63 } __attribute__((packed));
64 
65 #define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
66 #define MTHCA_MPT_FLAG_MIO           (1 << 17)
67 #define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
68 #define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
69 #define MTHCA_MPT_FLAG_REGION        (1 <<  8)
70 
71 #define MTHCA_MTT_FLAG_PRESENT       1
72 
73 #define MTHCA_MPT_STATUS_SW 0xF0
74 #define MTHCA_MPT_STATUS_HW 0x00
75 
76 #define SINAI_FMR_KEY_INC 0x1000000
77 
78 /*
79  * Buddy allocator for MTT segments (currently not very efficient
80  * since it doesn't keep a free list and just searches linearly
81  * through the bitmaps)
82  */
83 
84 static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
85 {
86 	int o;
87 	int m;
88 	u32 seg;
89 
90 	spin_lock(&buddy->lock);
91 
92 	for (o = order; o <= buddy->max_order; ++o)
93 		if (buddy->num_free[o]) {
94 			m = 1 << (buddy->max_order - o);
95 			seg = find_first_bit(buddy->bits[o], m);
96 			if (seg < m)
97 				goto found;
98 		}
99 
100 	spin_unlock(&buddy->lock);
101 	return -1;
102 
103  found:
104 	clear_bit(seg, buddy->bits[o]);
105 	--buddy->num_free[o];
106 
107 	while (o > order) {
108 		--o;
109 		seg <<= 1;
110 		set_bit(seg ^ 1, buddy->bits[o]);
111 		++buddy->num_free[o];
112 	}
113 
114 	spin_unlock(&buddy->lock);
115 
116 	seg <<= order;
117 
118 	return seg;
119 }
120 
121 static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
122 {
123 	seg >>= order;
124 
125 	spin_lock(&buddy->lock);
126 
127 	while (test_bit(seg ^ 1, buddy->bits[order])) {
128 		clear_bit(seg ^ 1, buddy->bits[order]);
129 		--buddy->num_free[order];
130 		seg >>= 1;
131 		++order;
132 	}
133 
134 	set_bit(seg, buddy->bits[order]);
135 	++buddy->num_free[order];
136 
137 	spin_unlock(&buddy->lock);
138 }
139 
140 static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
141 {
142 	int i, s;
143 
144 	buddy->max_order = max_order;
145 	spin_lock_init(&buddy->lock);
146 
147 	buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
148 			      GFP_KERNEL);
149 	buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
150 				  GFP_KERNEL);
151 	if (!buddy->bits || !buddy->num_free)
152 		goto err_out;
153 
154 	for (i = 0; i <= buddy->max_order; ++i) {
155 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
156 		buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
157 		if (!buddy->bits[i])
158 			goto err_out_free;
159 		bitmap_zero(buddy->bits[i],
160 			    1 << (buddy->max_order - i));
161 	}
162 
163 	set_bit(0, buddy->bits[buddy->max_order]);
164 	buddy->num_free[buddy->max_order] = 1;
165 
166 	return 0;
167 
168 err_out_free:
169 	for (i = 0; i <= buddy->max_order; ++i)
170 		kfree(buddy->bits[i]);
171 
172 err_out:
173 	kfree(buddy->bits);
174 	kfree(buddy->num_free);
175 
176 	return -ENOMEM;
177 }
178 
179 static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
180 {
181 	int i;
182 
183 	for (i = 0; i <= buddy->max_order; ++i)
184 		kfree(buddy->bits[i]);
185 
186 	kfree(buddy->bits);
187 	kfree(buddy->num_free);
188 }
189 
190 static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
191 				 struct mthca_buddy *buddy)
192 {
193 	u32 seg = mthca_buddy_alloc(buddy, order);
194 
195 	if (seg == -1)
196 		return -1;
197 
198 	if (mthca_is_memfree(dev))
199 		if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
200 					  seg + (1 << order) - 1)) {
201 			mthca_buddy_free(buddy, seg, order);
202 			seg = -1;
203 		}
204 
205 	return seg;
206 }
207 
208 static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
209 					   struct mthca_buddy *buddy)
210 {
211 	struct mthca_mtt *mtt;
212 	int i;
213 
214 	if (size <= 0)
215 		return ERR_PTR(-EINVAL);
216 
217 	mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
218 	if (!mtt)
219 		return ERR_PTR(-ENOMEM);
220 
221 	mtt->buddy = buddy;
222 	mtt->order = 0;
223 	for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
224 		++mtt->order;
225 
226 	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
227 	if (mtt->first_seg == -1) {
228 		kfree(mtt);
229 		return ERR_PTR(-ENOMEM);
230 	}
231 
232 	return mtt;
233 }
234 
235 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
236 {
237 	return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
238 }
239 
240 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
241 {
242 	if (!mtt)
243 		return;
244 
245 	mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
246 
247 	mthca_table_put_range(dev, dev->mr_table.mtt_table,
248 			      mtt->first_seg,
249 			      mtt->first_seg + (1 << mtt->order) - 1);
250 
251 	kfree(mtt);
252 }
253 
254 static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
255 			     int start_index, u64 *buffer_list, int list_len)
256 {
257 	struct mthca_mailbox *mailbox;
258 	__be64 *mtt_entry;
259 	int err = 0;
260 	u8 status;
261 	int i;
262 
263 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
264 	if (IS_ERR(mailbox))
265 		return PTR_ERR(mailbox);
266 	mtt_entry = mailbox->buf;
267 
268 	while (list_len > 0) {
269 		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
270 					   mtt->first_seg * dev->limits.mtt_seg_size +
271 					   start_index * 8);
272 		mtt_entry[1] = 0;
273 		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
274 			mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
275 						       MTHCA_MTT_FLAG_PRESENT);
276 
277 		/*
278 		 * If we have an odd number of entries to write, add
279 		 * one more dummy entry for firmware efficiency.
280 		 */
281 		if (i & 1)
282 			mtt_entry[i + 2] = 0;
283 
284 		err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
285 		if (err) {
286 			mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
287 			goto out;
288 		}
289 		if (status) {
290 			mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
291 				   status);
292 			err = -EINVAL;
293 			goto out;
294 		}
295 
296 		list_len    -= i;
297 		start_index += i;
298 		buffer_list += i;
299 	}
300 
301 out:
302 	mthca_free_mailbox(dev, mailbox);
303 	return err;
304 }
305 
306 int mthca_write_mtt_size(struct mthca_dev *dev)
307 {
308 	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
309 	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
310 		/*
311 		 * Be friendly to WRITE_MTT command
312 		 * and leave two empty slots for the
313 		 * index and reserved fields of the
314 		 * mailbox.
315 		 */
316 		return PAGE_SIZE / sizeof (u64) - 2;
317 
318 	/* For Arbel, all MTTs must fit in the same page. */
319 	return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
320 }
321 
322 static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
323 				      struct mthca_mtt *mtt, int start_index,
324 				      u64 *buffer_list, int list_len)
325 {
326 	u64 __iomem *mtts;
327 	int i;
328 
329 	mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
330 		start_index * sizeof (u64);
331 	for (i = 0; i < list_len; ++i)
332 		mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
333 				  mtts + i);
334 }
335 
336 static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
337 				      struct mthca_mtt *mtt, int start_index,
338 				      u64 *buffer_list, int list_len)
339 {
340 	__be64 *mtts;
341 	dma_addr_t dma_handle;
342 	int i;
343 	int s = start_index * sizeof (u64);
344 
345 	/* For Arbel, all MTTs must fit in the same page. */
346 	BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
347 	/* Require full segments */
348 	BUG_ON(s % dev->limits.mtt_seg_size);
349 
350 	mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
351 				s / dev->limits.mtt_seg_size, &dma_handle);
352 
353 	BUG_ON(!mtts);
354 
355 	dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
356 				list_len * sizeof (u64), DMA_TO_DEVICE);
357 
358 	for (i = 0; i < list_len; ++i)
359 		mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
360 
361 	dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
362 				   list_len * sizeof (u64), DMA_TO_DEVICE);
363 }
364 
365 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
366 		    int start_index, u64 *buffer_list, int list_len)
367 {
368 	int size = mthca_write_mtt_size(dev);
369 	int chunk;
370 
371 	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
372 	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
373 		return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
374 
375 	while (list_len > 0) {
376 		chunk = min(size, list_len);
377 		if (mthca_is_memfree(dev))
378 			mthca_arbel_write_mtt_seg(dev, mtt, start_index,
379 						  buffer_list, chunk);
380 		else
381 			mthca_tavor_write_mtt_seg(dev, mtt, start_index,
382 						  buffer_list, chunk);
383 
384 		list_len    -= chunk;
385 		start_index += chunk;
386 		buffer_list += chunk;
387 	}
388 
389 	return 0;
390 }
391 
392 static inline u32 tavor_hw_index_to_key(u32 ind)
393 {
394 	return ind;
395 }
396 
397 static inline u32 tavor_key_to_hw_index(u32 key)
398 {
399 	return key;
400 }
401 
402 static inline u32 arbel_hw_index_to_key(u32 ind)
403 {
404 	return (ind >> 24) | (ind << 8);
405 }
406 
407 static inline u32 arbel_key_to_hw_index(u32 key)
408 {
409 	return (key << 24) | (key >> 8);
410 }
411 
412 static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
413 {
414 	if (mthca_is_memfree(dev))
415 		return arbel_hw_index_to_key(ind);
416 	else
417 		return tavor_hw_index_to_key(ind);
418 }
419 
420 static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
421 {
422 	if (mthca_is_memfree(dev))
423 		return arbel_key_to_hw_index(key);
424 	else
425 		return tavor_key_to_hw_index(key);
426 }
427 
428 static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
429 {
430 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
431 		return ((key << 20) & 0x800000) | (key & 0x7fffff);
432 	else
433 		return key;
434 }
435 
436 int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
437 		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
438 {
439 	struct mthca_mailbox *mailbox;
440 	struct mthca_mpt_entry *mpt_entry;
441 	u32 key;
442 	int i;
443 	int err;
444 	u8 status;
445 
446 	WARN_ON(buffer_size_shift >= 32);
447 
448 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
449 	if (key == -1)
450 		return -ENOMEM;
451 	key = adjust_key(dev, key);
452 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
453 
454 	if (mthca_is_memfree(dev)) {
455 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
456 		if (err)
457 			goto err_out_mpt_free;
458 	}
459 
460 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
461 	if (IS_ERR(mailbox)) {
462 		err = PTR_ERR(mailbox);
463 		goto err_out_table;
464 	}
465 	mpt_entry = mailbox->buf;
466 
467 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
468 				       MTHCA_MPT_FLAG_MIO         |
469 				       MTHCA_MPT_FLAG_REGION      |
470 				       access);
471 	if (!mr->mtt)
472 		mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
473 
474 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
475 	mpt_entry->key       = cpu_to_be32(key);
476 	mpt_entry->pd        = cpu_to_be32(pd);
477 	mpt_entry->start     = cpu_to_be64(iova);
478 	mpt_entry->length    = cpu_to_be64(total_size);
479 
480 	memset(&mpt_entry->lkey, 0,
481 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
482 
483 	if (mr->mtt)
484 		mpt_entry->mtt_seg =
485 			cpu_to_be64(dev->mr_table.mtt_base +
486 				    mr->mtt->first_seg * dev->limits.mtt_seg_size);
487 
488 	if (0) {
489 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
490 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
491 			if (i % 4 == 0)
492 				printk("[%02x] ", i * 4);
493 			printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
494 			if ((i + 1) % 4 == 0)
495 				printk("\n");
496 		}
497 	}
498 
499 	err = mthca_SW2HW_MPT(dev, mailbox,
500 			      key & (dev->limits.num_mpts - 1),
501 			      &status);
502 	if (err) {
503 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
504 		goto err_out_mailbox;
505 	} else if (status) {
506 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
507 			   status);
508 		err = -EINVAL;
509 		goto err_out_mailbox;
510 	}
511 
512 	mthca_free_mailbox(dev, mailbox);
513 	return err;
514 
515 err_out_mailbox:
516 	mthca_free_mailbox(dev, mailbox);
517 
518 err_out_table:
519 	mthca_table_put(dev, dev->mr_table.mpt_table, key);
520 
521 err_out_mpt_free:
522 	mthca_free(&dev->mr_table.mpt_alloc, key);
523 	return err;
524 }
525 
526 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
527 			   u32 access, struct mthca_mr *mr)
528 {
529 	mr->mtt = NULL;
530 	return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
531 }
532 
533 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
534 			u64 *buffer_list, int buffer_size_shift,
535 			int list_len, u64 iova, u64 total_size,
536 			u32 access, struct mthca_mr *mr)
537 {
538 	int err;
539 
540 	mr->mtt = mthca_alloc_mtt(dev, list_len);
541 	if (IS_ERR(mr->mtt))
542 		return PTR_ERR(mr->mtt);
543 
544 	err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
545 	if (err) {
546 		mthca_free_mtt(dev, mr->mtt);
547 		return err;
548 	}
549 
550 	err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
551 			     total_size, access, mr);
552 	if (err)
553 		mthca_free_mtt(dev, mr->mtt);
554 
555 	return err;
556 }
557 
558 /* Free mr or fmr */
559 static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
560 {
561 	mthca_table_put(dev, dev->mr_table.mpt_table,
562 			key_to_hw_index(dev, lkey));
563 
564 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
565 }
566 
567 void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
568 {
569 	int err;
570 	u8 status;
571 
572 	err = mthca_HW2SW_MPT(dev, NULL,
573 			      key_to_hw_index(dev, mr->ibmr.lkey) &
574 			      (dev->limits.num_mpts - 1),
575 			      &status);
576 	if (err)
577 		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
578 	else if (status)
579 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
580 			   status);
581 
582 	mthca_free_region(dev, mr->ibmr.lkey);
583 	mthca_free_mtt(dev, mr->mtt);
584 }
585 
586 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
587 		    u32 access, struct mthca_fmr *mr)
588 {
589 	struct mthca_mpt_entry *mpt_entry;
590 	struct mthca_mailbox *mailbox;
591 	u64 mtt_seg;
592 	u32 key, idx;
593 	u8 status;
594 	int list_len = mr->attr.max_pages;
595 	int err = -ENOMEM;
596 	int i;
597 
598 	if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
599 		return -EINVAL;
600 
601 	/* For Arbel, all MTTs must fit in the same page. */
602 	if (mthca_is_memfree(dev) &&
603 	    mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
604 		return -EINVAL;
605 
606 	mr->maps = 0;
607 
608 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
609 	if (key == -1)
610 		return -ENOMEM;
611 	key = adjust_key(dev, key);
612 
613 	idx = key & (dev->limits.num_mpts - 1);
614 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
615 
616 	if (mthca_is_memfree(dev)) {
617 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
618 		if (err)
619 			goto err_out_mpt_free;
620 
621 		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
622 		BUG_ON(!mr->mem.arbel.mpt);
623 	} else
624 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
625 			sizeof *(mr->mem.tavor.mpt) * idx;
626 
627 	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
628 	if (IS_ERR(mr->mtt)) {
629 		err = PTR_ERR(mr->mtt);
630 		goto err_out_table;
631 	}
632 
633 	mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
634 
635 	if (mthca_is_memfree(dev)) {
636 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
637 						      mr->mtt->first_seg,
638 						      &mr->mem.arbel.dma_handle);
639 		BUG_ON(!mr->mem.arbel.mtts);
640 	} else
641 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
642 
643 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
644 	if (IS_ERR(mailbox)) {
645 		err = PTR_ERR(mailbox);
646 		goto err_out_free_mtt;
647 	}
648 
649 	mpt_entry = mailbox->buf;
650 
651 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
652 				       MTHCA_MPT_FLAG_MIO         |
653 				       MTHCA_MPT_FLAG_REGION      |
654 				       access);
655 
656 	mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
657 	mpt_entry->key       = cpu_to_be32(key);
658 	mpt_entry->pd        = cpu_to_be32(pd);
659 	memset(&mpt_entry->start, 0,
660 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
661 	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
662 
663 	if (0) {
664 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
665 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
666 			if (i % 4 == 0)
667 				printk("[%02x] ", i * 4);
668 			printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
669 			if ((i + 1) % 4 == 0)
670 				printk("\n");
671 		}
672 	}
673 
674 	err = mthca_SW2HW_MPT(dev, mailbox,
675 			      key & (dev->limits.num_mpts - 1),
676 			      &status);
677 	if (err) {
678 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
679 		goto err_out_mailbox_free;
680 	}
681 	if (status) {
682 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
683 			   status);
684 		err = -EINVAL;
685 		goto err_out_mailbox_free;
686 	}
687 
688 	mthca_free_mailbox(dev, mailbox);
689 	return 0;
690 
691 err_out_mailbox_free:
692 	mthca_free_mailbox(dev, mailbox);
693 
694 err_out_free_mtt:
695 	mthca_free_mtt(dev, mr->mtt);
696 
697 err_out_table:
698 	mthca_table_put(dev, dev->mr_table.mpt_table, key);
699 
700 err_out_mpt_free:
701 	mthca_free(&dev->mr_table.mpt_alloc, key);
702 	return err;
703 }
704 
705 int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
706 {
707 	if (fmr->maps)
708 		return -EBUSY;
709 
710 	mthca_free_region(dev, fmr->ibmr.lkey);
711 	mthca_free_mtt(dev, fmr->mtt);
712 
713 	return 0;
714 }
715 
716 static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
717 				  int list_len, u64 iova)
718 {
719 	int i, page_mask;
720 
721 	if (list_len > fmr->attr.max_pages)
722 		return -EINVAL;
723 
724 	page_mask = (1 << fmr->attr.page_shift) - 1;
725 
726 	/* We are getting page lists, so va must be page aligned. */
727 	if (iova & page_mask)
728 		return -EINVAL;
729 
730 	/* Trust the user not to pass misaligned data in page_list */
731 	if (0)
732 		for (i = 0; i < list_len; ++i) {
733 			if (page_list[i] & ~page_mask)
734 				return -EINVAL;
735 		}
736 
737 	if (fmr->maps >= fmr->attr.max_maps)
738 		return -EINVAL;
739 
740 	return 0;
741 }
742 
743 
744 int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
745 			     int list_len, u64 iova)
746 {
747 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
748 	struct mthca_dev *dev = to_mdev(ibfmr->device);
749 	struct mthca_mpt_entry mpt_entry;
750 	u32 key;
751 	int i, err;
752 
753 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
754 	if (err)
755 		return err;
756 
757 	++fmr->maps;
758 
759 	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
760 	key += dev->limits.num_mpts;
761 	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
762 
763 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
764 
765 	for (i = 0; i < list_len; ++i) {
766 		__be64 mtt_entry = cpu_to_be64(page_list[i] |
767 					       MTHCA_MTT_FLAG_PRESENT);
768 		mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
769 	}
770 
771 	mpt_entry.lkey   = cpu_to_be32(key);
772 	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
773 	mpt_entry.start  = cpu_to_be64(iova);
774 
775 	__raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
776 	memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
777 		    offsetof(struct mthca_mpt_entry, window_count) -
778 		    offsetof(struct mthca_mpt_entry, start));
779 
780 	writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
781 
782 	return 0;
783 }
784 
785 int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
786 			     int list_len, u64 iova)
787 {
788 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
789 	struct mthca_dev *dev = to_mdev(ibfmr->device);
790 	u32 key;
791 	int i, err;
792 
793 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
794 	if (err)
795 		return err;
796 
797 	++fmr->maps;
798 
799 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
800 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
801 		key += SINAI_FMR_KEY_INC;
802 	else
803 		key += dev->limits.num_mpts;
804 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
805 
806 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
807 
808 	wmb();
809 
810 	dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
811 				list_len * sizeof(u64), DMA_TO_DEVICE);
812 
813 	for (i = 0; i < list_len; ++i)
814 		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
815 						     MTHCA_MTT_FLAG_PRESENT);
816 
817 	dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
818 				   list_len * sizeof(u64), DMA_TO_DEVICE);
819 
820 	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
821 	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
822 	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
823 	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
824 
825 	wmb();
826 
827 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
828 
829 	wmb();
830 
831 	return 0;
832 }
833 
834 void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
835 {
836 	if (!fmr->maps)
837 		return;
838 
839 	fmr->maps = 0;
840 
841 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
842 }
843 
844 void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
845 {
846 	if (!fmr->maps)
847 		return;
848 
849 	fmr->maps = 0;
850 
851 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
852 }
853 
854 int mthca_init_mr_table(struct mthca_dev *dev)
855 {
856 	phys_addr_t addr;
857 	int mpts, mtts, err, i;
858 
859 	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
860 			       dev->limits.num_mpts,
861 			       ~0, dev->limits.reserved_mrws);
862 	if (err)
863 		return err;
864 
865 	if (!mthca_is_memfree(dev) &&
866 	    (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
867 		dev->limits.fmr_reserved_mtts = 0;
868 	else
869 		dev->mthca_flags |= MTHCA_FLAG_FMR;
870 
871 	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
872 		mthca_dbg(dev, "Memory key throughput optimization activated.\n");
873 
874 	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
875 			       fls(dev->limits.num_mtt_segs - 1));
876 
877 	if (err)
878 		goto err_mtt_buddy;
879 
880 	dev->mr_table.tavor_fmr.mpt_base = NULL;
881 	dev->mr_table.tavor_fmr.mtt_base = NULL;
882 
883 	if (dev->limits.fmr_reserved_mtts) {
884 		i = fls(dev->limits.fmr_reserved_mtts - 1);
885 
886 		if (i >= 31) {
887 			mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
888 			err = -EINVAL;
889 			goto err_fmr_mpt;
890 		}
891 		mpts = mtts = 1 << i;
892 	} else {
893 		mtts = dev->limits.num_mtt_segs;
894 		mpts = dev->limits.num_mpts;
895 	}
896 
897 	if (!mthca_is_memfree(dev) &&
898 	    (dev->mthca_flags & MTHCA_FLAG_FMR)) {
899 
900 		addr = pci_resource_start(dev->pdev, 4) +
901 			((pci_resource_len(dev->pdev, 4) - 1) &
902 			 dev->mr_table.mpt_base);
903 
904 		dev->mr_table.tavor_fmr.mpt_base =
905 			ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
906 
907 		if (!dev->mr_table.tavor_fmr.mpt_base) {
908 			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
909 			err = -ENOMEM;
910 			goto err_fmr_mpt;
911 		}
912 
913 		addr = pci_resource_start(dev->pdev, 4) +
914 			((pci_resource_len(dev->pdev, 4) - 1) &
915 			 dev->mr_table.mtt_base);
916 
917 		dev->mr_table.tavor_fmr.mtt_base =
918 			ioremap(addr, mtts * dev->limits.mtt_seg_size);
919 		if (!dev->mr_table.tavor_fmr.mtt_base) {
920 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
921 			err = -ENOMEM;
922 			goto err_fmr_mtt;
923 		}
924 	}
925 
926 	if (dev->limits.fmr_reserved_mtts) {
927 		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
928 		if (err)
929 			goto err_fmr_mtt_buddy;
930 
931 		/* Prevent regular MRs from using FMR keys */
932 		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
933 		if (err)
934 			goto err_reserve_fmr;
935 
936 		dev->mr_table.fmr_mtt_buddy =
937 			&dev->mr_table.tavor_fmr.mtt_buddy;
938 	} else
939 		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
940 
941 	/* FMR table is always the first, take reserved MTTs out of there */
942 	if (dev->limits.reserved_mtts) {
943 		i = fls(dev->limits.reserved_mtts - 1);
944 
945 		if (mthca_alloc_mtt_range(dev, i,
946 					  dev->mr_table.fmr_mtt_buddy) == -1) {
947 			mthca_warn(dev, "MTT table of order %d is too small.\n",
948 				  dev->mr_table.fmr_mtt_buddy->max_order);
949 			err = -ENOMEM;
950 			goto err_reserve_mtts;
951 		}
952 	}
953 
954 	return 0;
955 
956 err_reserve_mtts:
957 err_reserve_fmr:
958 	if (dev->limits.fmr_reserved_mtts)
959 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
960 
961 err_fmr_mtt_buddy:
962 	if (dev->mr_table.tavor_fmr.mtt_base)
963 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
964 
965 err_fmr_mtt:
966 	if (dev->mr_table.tavor_fmr.mpt_base)
967 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
968 
969 err_fmr_mpt:
970 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
971 
972 err_mtt_buddy:
973 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
974 
975 	return err;
976 }
977 
978 void mthca_cleanup_mr_table(struct mthca_dev *dev)
979 {
980 	/* XXX check if any MRs are still allocated? */
981 	if (dev->limits.fmr_reserved_mtts)
982 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
983 
984 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
985 
986 	if (dev->mr_table.tavor_fmr.mtt_base)
987 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
988 	if (dev->mr_table.tavor_fmr.mpt_base)
989 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
990 
991 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
992 }
993