xref: /openbmc/linux/drivers/iommu/iova.c (revision 4984dd069f2995f239f075199ee8c0d9f020bcd9)
1 /*
2  * Copyright © 2006-2009, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
18  */
19 
20 #include <linux/iova.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/smp.h>
24 #include <linux/bitops.h>
25 #include <linux/cpu.h>
26 
27 /* The anchor node sits above the top of the usable address space */
28 #define IOVA_ANCHOR	~0UL
29 
30 static bool iova_rcache_insert(struct iova_domain *iovad,
31 			       unsigned long pfn,
32 			       unsigned long size);
33 static unsigned long iova_rcache_get(struct iova_domain *iovad,
34 				     unsigned long size,
35 				     unsigned long limit_pfn);
36 static void init_iova_rcaches(struct iova_domain *iovad);
37 static void free_iova_rcaches(struct iova_domain *iovad);
38 static void fq_destroy_all_entries(struct iova_domain *iovad);
39 static void fq_flush_timeout(struct timer_list *t);
40 
41 void
42 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
43 	unsigned long start_pfn)
44 {
45 	/*
46 	 * IOVA granularity will normally be equal to the smallest
47 	 * supported IOMMU page size; both *must* be capable of
48 	 * representing individual CPU pages exactly.
49 	 */
50 	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
51 
52 	spin_lock_init(&iovad->iova_rbtree_lock);
53 	iovad->rbroot = RB_ROOT;
54 	iovad->cached_node = &iovad->anchor.node;
55 	iovad->cached32_node = &iovad->anchor.node;
56 	iovad->granule = granule;
57 	iovad->start_pfn = start_pfn;
58 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
59 	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
60 	iovad->flush_cb = NULL;
61 	iovad->fq = NULL;
62 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
63 	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
64 	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
65 	init_iova_rcaches(iovad);
66 }
67 EXPORT_SYMBOL_GPL(init_iova_domain);
68 
69 static void free_iova_flush_queue(struct iova_domain *iovad)
70 {
71 	if (!iovad->fq)
72 		return;
73 
74 	if (timer_pending(&iovad->fq_timer))
75 		del_timer(&iovad->fq_timer);
76 
77 	fq_destroy_all_entries(iovad);
78 
79 	free_percpu(iovad->fq);
80 
81 	iovad->fq         = NULL;
82 	iovad->flush_cb   = NULL;
83 	iovad->entry_dtor = NULL;
84 }
85 
86 int init_iova_flush_queue(struct iova_domain *iovad,
87 			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
88 {
89 	int cpu;
90 
91 	atomic64_set(&iovad->fq_flush_start_cnt,  0);
92 	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
93 
94 	iovad->fq = alloc_percpu(struct iova_fq);
95 	if (!iovad->fq)
96 		return -ENOMEM;
97 
98 	iovad->flush_cb   = flush_cb;
99 	iovad->entry_dtor = entry_dtor;
100 
101 	for_each_possible_cpu(cpu) {
102 		struct iova_fq *fq;
103 
104 		fq = per_cpu_ptr(iovad->fq, cpu);
105 		fq->head = 0;
106 		fq->tail = 0;
107 
108 		spin_lock_init(&fq->lock);
109 	}
110 
111 	timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
112 	atomic_set(&iovad->fq_timer_on, 0);
113 
114 	return 0;
115 }
116 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
117 
118 static struct rb_node *
119 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
120 {
121 	if (limit_pfn <= iovad->dma_32bit_pfn)
122 		return iovad->cached32_node;
123 
124 	return iovad->cached_node;
125 }
126 
127 static void
128 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
129 {
130 	if (new->pfn_hi < iovad->dma_32bit_pfn)
131 		iovad->cached32_node = &new->node;
132 	else
133 		iovad->cached_node = &new->node;
134 }
135 
136 static void
137 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
138 {
139 	struct iova *cached_iova;
140 
141 	cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
142 	if (free->pfn_hi < iovad->dma_32bit_pfn &&
143 	    free->pfn_lo >= cached_iova->pfn_lo) {
144 		iovad->cached32_node = rb_next(&free->node);
145 		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
146 	}
147 
148 	cached_iova = rb_entry(iovad->cached_node, struct iova, node);
149 	if (free->pfn_lo >= cached_iova->pfn_lo)
150 		iovad->cached_node = rb_next(&free->node);
151 }
152 
153 /* Insert the iova into domain rbtree by holding writer lock */
154 static void
155 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
156 		   struct rb_node *start)
157 {
158 	struct rb_node **new, *parent = NULL;
159 
160 	new = (start) ? &start : &(root->rb_node);
161 	/* Figure out where to put new node */
162 	while (*new) {
163 		struct iova *this = rb_entry(*new, struct iova, node);
164 
165 		parent = *new;
166 
167 		if (iova->pfn_lo < this->pfn_lo)
168 			new = &((*new)->rb_left);
169 		else if (iova->pfn_lo > this->pfn_lo)
170 			new = &((*new)->rb_right);
171 		else {
172 			WARN_ON(1); /* this should not happen */
173 			return;
174 		}
175 	}
176 	/* Add new node and rebalance tree. */
177 	rb_link_node(&iova->node, parent, new);
178 	rb_insert_color(&iova->node, root);
179 }
180 
181 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
182 		unsigned long size, unsigned long limit_pfn,
183 			struct iova *new, bool size_aligned)
184 {
185 	struct rb_node *curr, *prev;
186 	struct iova *curr_iova;
187 	unsigned long flags;
188 	unsigned long new_pfn;
189 	unsigned long align_mask = ~0UL;
190 
191 	if (size_aligned)
192 		align_mask <<= fls_long(size - 1);
193 
194 	/* Walk the tree backwards */
195 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
196 	if (limit_pfn <= iovad->dma_32bit_pfn &&
197 			size >= iovad->max32_alloc_size)
198 		goto iova32_full;
199 
200 	curr = __get_cached_rbnode(iovad, limit_pfn);
201 	curr_iova = rb_entry(curr, struct iova, node);
202 	do {
203 		limit_pfn = min(limit_pfn, curr_iova->pfn_lo);
204 		new_pfn = (limit_pfn - size) & align_mask;
205 		prev = curr;
206 		curr = rb_prev(curr);
207 		curr_iova = rb_entry(curr, struct iova, node);
208 	} while (curr && new_pfn <= curr_iova->pfn_hi);
209 
210 	if (limit_pfn < size || new_pfn < iovad->start_pfn) {
211 		iovad->max32_alloc_size = size;
212 		goto iova32_full;
213 	}
214 
215 	/* pfn_lo will point to size aligned address if size_aligned is set */
216 	new->pfn_lo = new_pfn;
217 	new->pfn_hi = new->pfn_lo + size - 1;
218 
219 	/* If we have 'prev', it's a valid place to start the insertion. */
220 	iova_insert_rbtree(&iovad->rbroot, new, prev);
221 	__cached_rbnode_insert_update(iovad, new);
222 
223 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
224 	return 0;
225 
226 iova32_full:
227 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
228 	return -ENOMEM;
229 }
230 
231 static struct kmem_cache *iova_cache;
232 static unsigned int iova_cache_users;
233 static DEFINE_MUTEX(iova_cache_mutex);
234 
235 struct iova *alloc_iova_mem(void)
236 {
237 	return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
238 }
239 EXPORT_SYMBOL(alloc_iova_mem);
240 
241 void free_iova_mem(struct iova *iova)
242 {
243 	if (iova->pfn_lo != IOVA_ANCHOR)
244 		kmem_cache_free(iova_cache, iova);
245 }
246 EXPORT_SYMBOL(free_iova_mem);
247 
248 int iova_cache_get(void)
249 {
250 	mutex_lock(&iova_cache_mutex);
251 	if (!iova_cache_users) {
252 		iova_cache = kmem_cache_create(
253 			"iommu_iova", sizeof(struct iova), 0,
254 			SLAB_HWCACHE_ALIGN, NULL);
255 		if (!iova_cache) {
256 			mutex_unlock(&iova_cache_mutex);
257 			printk(KERN_ERR "Couldn't create iova cache\n");
258 			return -ENOMEM;
259 		}
260 	}
261 
262 	iova_cache_users++;
263 	mutex_unlock(&iova_cache_mutex);
264 
265 	return 0;
266 }
267 EXPORT_SYMBOL_GPL(iova_cache_get);
268 
269 void iova_cache_put(void)
270 {
271 	mutex_lock(&iova_cache_mutex);
272 	if (WARN_ON(!iova_cache_users)) {
273 		mutex_unlock(&iova_cache_mutex);
274 		return;
275 	}
276 	iova_cache_users--;
277 	if (!iova_cache_users)
278 		kmem_cache_destroy(iova_cache);
279 	mutex_unlock(&iova_cache_mutex);
280 }
281 EXPORT_SYMBOL_GPL(iova_cache_put);
282 
283 /**
284  * alloc_iova - allocates an iova
285  * @iovad: - iova domain in question
286  * @size: - size of page frames to allocate
287  * @limit_pfn: - max limit address
288  * @size_aligned: - set if size_aligned address range is required
289  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
290  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
291  * flag is set then the allocated address iova->pfn_lo will be naturally
292  * aligned on roundup_power_of_two(size).
293  */
294 struct iova *
295 alloc_iova(struct iova_domain *iovad, unsigned long size,
296 	unsigned long limit_pfn,
297 	bool size_aligned)
298 {
299 	struct iova *new_iova;
300 	int ret;
301 
302 	new_iova = alloc_iova_mem();
303 	if (!new_iova)
304 		return NULL;
305 
306 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
307 			new_iova, size_aligned);
308 
309 	if (ret) {
310 		free_iova_mem(new_iova);
311 		return NULL;
312 	}
313 
314 	return new_iova;
315 }
316 EXPORT_SYMBOL_GPL(alloc_iova);
317 
318 static struct iova *
319 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
320 {
321 	struct rb_node *node = iovad->rbroot.rb_node;
322 
323 	assert_spin_locked(&iovad->iova_rbtree_lock);
324 
325 	while (node) {
326 		struct iova *iova = rb_entry(node, struct iova, node);
327 
328 		if (pfn < iova->pfn_lo)
329 			node = node->rb_left;
330 		else if (pfn > iova->pfn_hi)
331 			node = node->rb_right;
332 		else
333 			return iova;	/* pfn falls within iova's range */
334 	}
335 
336 	return NULL;
337 }
338 
339 static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
340 {
341 	assert_spin_locked(&iovad->iova_rbtree_lock);
342 	__cached_rbnode_delete_update(iovad, iova);
343 	rb_erase(&iova->node, &iovad->rbroot);
344 	free_iova_mem(iova);
345 }
346 
347 /**
348  * find_iova - finds an iova for a given pfn
349  * @iovad: - iova domain in question.
350  * @pfn: - page frame number
351  * This function finds and returns an iova belonging to the
352  * given doamin which matches the given pfn.
353  */
354 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
355 {
356 	unsigned long flags;
357 	struct iova *iova;
358 
359 	/* Take the lock so that no other thread is manipulating the rbtree */
360 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
361 	iova = private_find_iova(iovad, pfn);
362 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
363 	return iova;
364 }
365 EXPORT_SYMBOL_GPL(find_iova);
366 
367 /**
368  * __free_iova - frees the given iova
369  * @iovad: iova domain in question.
370  * @iova: iova in question.
371  * Frees the given iova belonging to the giving domain
372  */
373 void
374 __free_iova(struct iova_domain *iovad, struct iova *iova)
375 {
376 	unsigned long flags;
377 
378 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
379 	private_free_iova(iovad, iova);
380 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
381 }
382 EXPORT_SYMBOL_GPL(__free_iova);
383 
384 /**
385  * free_iova - finds and frees the iova for a given pfn
386  * @iovad: - iova domain in question.
387  * @pfn: - pfn that is allocated previously
388  * This functions finds an iova for a given pfn and then
389  * frees the iova from that domain.
390  */
391 void
392 free_iova(struct iova_domain *iovad, unsigned long pfn)
393 {
394 	struct iova *iova = find_iova(iovad, pfn);
395 
396 	if (iova)
397 		__free_iova(iovad, iova);
398 
399 }
400 EXPORT_SYMBOL_GPL(free_iova);
401 
402 /**
403  * alloc_iova_fast - allocates an iova from rcache
404  * @iovad: - iova domain in question
405  * @size: - size of page frames to allocate
406  * @limit_pfn: - max limit address
407  * @flush_rcache: - set to flush rcache on regular allocation failure
408  * This function tries to satisfy an iova allocation from the rcache,
409  * and falls back to regular allocation on failure. If regular allocation
410  * fails too and the flush_rcache flag is set then the rcache will be flushed.
411 */
412 unsigned long
413 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
414 		unsigned long limit_pfn, bool flush_rcache)
415 {
416 	unsigned long iova_pfn;
417 	struct iova *new_iova;
418 
419 	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
420 	if (iova_pfn)
421 		return iova_pfn;
422 
423 retry:
424 	new_iova = alloc_iova(iovad, size, limit_pfn, true);
425 	if (!new_iova) {
426 		unsigned int cpu;
427 
428 		if (!flush_rcache)
429 			return 0;
430 
431 		/* Try replenishing IOVAs by flushing rcache. */
432 		flush_rcache = false;
433 		for_each_online_cpu(cpu)
434 			free_cpu_cached_iovas(cpu, iovad);
435 		goto retry;
436 	}
437 
438 	return new_iova->pfn_lo;
439 }
440 EXPORT_SYMBOL_GPL(alloc_iova_fast);
441 
442 /**
443  * free_iova_fast - free iova pfn range into rcache
444  * @iovad: - iova domain in question.
445  * @pfn: - pfn that is allocated previously
446  * @size: - # of pages in range
447  * This functions frees an iova range by trying to put it into the rcache,
448  * falling back to regular iova deallocation via free_iova() if this fails.
449  */
450 void
451 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
452 {
453 	if (iova_rcache_insert(iovad, pfn, size))
454 		return;
455 
456 	free_iova(iovad, pfn);
457 }
458 EXPORT_SYMBOL_GPL(free_iova_fast);
459 
460 #define fq_ring_for_each(i, fq) \
461 	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
462 
463 static inline bool fq_full(struct iova_fq *fq)
464 {
465 	assert_spin_locked(&fq->lock);
466 	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
467 }
468 
469 static inline unsigned fq_ring_add(struct iova_fq *fq)
470 {
471 	unsigned idx = fq->tail;
472 
473 	assert_spin_locked(&fq->lock);
474 
475 	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
476 
477 	return idx;
478 }
479 
480 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
481 {
482 	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
483 	unsigned idx;
484 
485 	assert_spin_locked(&fq->lock);
486 
487 	fq_ring_for_each(idx, fq) {
488 
489 		if (fq->entries[idx].counter >= counter)
490 			break;
491 
492 		if (iovad->entry_dtor)
493 			iovad->entry_dtor(fq->entries[idx].data);
494 
495 		free_iova_fast(iovad,
496 			       fq->entries[idx].iova_pfn,
497 			       fq->entries[idx].pages);
498 
499 		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
500 	}
501 }
502 
503 static void iova_domain_flush(struct iova_domain *iovad)
504 {
505 	atomic64_inc(&iovad->fq_flush_start_cnt);
506 	iovad->flush_cb(iovad);
507 	atomic64_inc(&iovad->fq_flush_finish_cnt);
508 }
509 
510 static void fq_destroy_all_entries(struct iova_domain *iovad)
511 {
512 	int cpu;
513 
514 	/*
515 	 * This code runs when the iova_domain is being detroyed, so don't
516 	 * bother to free iovas, just call the entry_dtor on all remaining
517 	 * entries.
518 	 */
519 	if (!iovad->entry_dtor)
520 		return;
521 
522 	for_each_possible_cpu(cpu) {
523 		struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
524 		int idx;
525 
526 		fq_ring_for_each(idx, fq)
527 			iovad->entry_dtor(fq->entries[idx].data);
528 	}
529 }
530 
531 static void fq_flush_timeout(struct timer_list *t)
532 {
533 	struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
534 	int cpu;
535 
536 	atomic_set(&iovad->fq_timer_on, 0);
537 	iova_domain_flush(iovad);
538 
539 	for_each_possible_cpu(cpu) {
540 		unsigned long flags;
541 		struct iova_fq *fq;
542 
543 		fq = per_cpu_ptr(iovad->fq, cpu);
544 		spin_lock_irqsave(&fq->lock, flags);
545 		fq_ring_free(iovad, fq);
546 		spin_unlock_irqrestore(&fq->lock, flags);
547 	}
548 }
549 
550 void queue_iova(struct iova_domain *iovad,
551 		unsigned long pfn, unsigned long pages,
552 		unsigned long data)
553 {
554 	struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
555 	unsigned long flags;
556 	unsigned idx;
557 
558 	spin_lock_irqsave(&fq->lock, flags);
559 
560 	/*
561 	 * First remove all entries from the flush queue that have already been
562 	 * flushed out on another CPU. This makes the fq_full() check below less
563 	 * likely to be true.
564 	 */
565 	fq_ring_free(iovad, fq);
566 
567 	if (fq_full(fq)) {
568 		iova_domain_flush(iovad);
569 		fq_ring_free(iovad, fq);
570 	}
571 
572 	idx = fq_ring_add(fq);
573 
574 	fq->entries[idx].iova_pfn = pfn;
575 	fq->entries[idx].pages    = pages;
576 	fq->entries[idx].data     = data;
577 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
578 
579 	spin_unlock_irqrestore(&fq->lock, flags);
580 
581 	if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
582 		mod_timer(&iovad->fq_timer,
583 			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
584 }
585 EXPORT_SYMBOL_GPL(queue_iova);
586 
587 /**
588  * put_iova_domain - destroys the iova doamin
589  * @iovad: - iova domain in question.
590  * All the iova's in that domain are destroyed.
591  */
592 void put_iova_domain(struct iova_domain *iovad)
593 {
594 	struct iova *iova, *tmp;
595 
596 	free_iova_flush_queue(iovad);
597 	free_iova_rcaches(iovad);
598 	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
599 		free_iova_mem(iova);
600 }
601 EXPORT_SYMBOL_GPL(put_iova_domain);
602 
603 static int
604 __is_range_overlap(struct rb_node *node,
605 	unsigned long pfn_lo, unsigned long pfn_hi)
606 {
607 	struct iova *iova = rb_entry(node, struct iova, node);
608 
609 	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
610 		return 1;
611 	return 0;
612 }
613 
614 static inline struct iova *
615 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
616 {
617 	struct iova *iova;
618 
619 	iova = alloc_iova_mem();
620 	if (iova) {
621 		iova->pfn_lo = pfn_lo;
622 		iova->pfn_hi = pfn_hi;
623 	}
624 
625 	return iova;
626 }
627 
628 static struct iova *
629 __insert_new_range(struct iova_domain *iovad,
630 	unsigned long pfn_lo, unsigned long pfn_hi)
631 {
632 	struct iova *iova;
633 
634 	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
635 	if (iova)
636 		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
637 
638 	return iova;
639 }
640 
641 static void
642 __adjust_overlap_range(struct iova *iova,
643 	unsigned long *pfn_lo, unsigned long *pfn_hi)
644 {
645 	if (*pfn_lo < iova->pfn_lo)
646 		iova->pfn_lo = *pfn_lo;
647 	if (*pfn_hi > iova->pfn_hi)
648 		*pfn_lo = iova->pfn_hi + 1;
649 }
650 
651 /**
652  * reserve_iova - reserves an iova in the given range
653  * @iovad: - iova domain pointer
654  * @pfn_lo: - lower page frame address
655  * @pfn_hi:- higher pfn adderss
656  * This function allocates reserves the address range from pfn_lo to pfn_hi so
657  * that this address is not dished out as part of alloc_iova.
658  */
659 struct iova *
660 reserve_iova(struct iova_domain *iovad,
661 	unsigned long pfn_lo, unsigned long pfn_hi)
662 {
663 	struct rb_node *node;
664 	unsigned long flags;
665 	struct iova *iova;
666 	unsigned int overlap = 0;
667 
668 	/* Don't allow nonsensical pfns */
669 	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
670 		return NULL;
671 
672 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
673 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
674 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
675 			iova = rb_entry(node, struct iova, node);
676 			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
677 			if ((pfn_lo >= iova->pfn_lo) &&
678 				(pfn_hi <= iova->pfn_hi))
679 				goto finish;
680 			overlap = 1;
681 
682 		} else if (overlap)
683 				break;
684 	}
685 
686 	/* We are here either because this is the first reserver node
687 	 * or need to insert remaining non overlap addr range
688 	 */
689 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
690 finish:
691 
692 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
693 	return iova;
694 }
695 EXPORT_SYMBOL_GPL(reserve_iova);
696 
697 /**
698  * copy_reserved_iova - copies the reserved between domains
699  * @from: - source doamin from where to copy
700  * @to: - destination domin where to copy
701  * This function copies reserved iova's from one doamin to
702  * other.
703  */
704 void
705 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
706 {
707 	unsigned long flags;
708 	struct rb_node *node;
709 
710 	spin_lock_irqsave(&from->iova_rbtree_lock, flags);
711 	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
712 		struct iova *iova = rb_entry(node, struct iova, node);
713 		struct iova *new_iova;
714 
715 		if (iova->pfn_lo == IOVA_ANCHOR)
716 			continue;
717 
718 		new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
719 		if (!new_iova)
720 			printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
721 				iova->pfn_lo, iova->pfn_lo);
722 	}
723 	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
724 }
725 EXPORT_SYMBOL_GPL(copy_reserved_iova);
726 
727 struct iova *
728 split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
729 		      unsigned long pfn_lo, unsigned long pfn_hi)
730 {
731 	unsigned long flags;
732 	struct iova *prev = NULL, *next = NULL;
733 
734 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
735 	if (iova->pfn_lo < pfn_lo) {
736 		prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1);
737 		if (prev == NULL)
738 			goto error;
739 	}
740 	if (iova->pfn_hi > pfn_hi) {
741 		next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi);
742 		if (next == NULL)
743 			goto error;
744 	}
745 
746 	__cached_rbnode_delete_update(iovad, iova);
747 	rb_erase(&iova->node, &iovad->rbroot);
748 
749 	if (prev) {
750 		iova_insert_rbtree(&iovad->rbroot, prev, NULL);
751 		iova->pfn_lo = pfn_lo;
752 	}
753 	if (next) {
754 		iova_insert_rbtree(&iovad->rbroot, next, NULL);
755 		iova->pfn_hi = pfn_hi;
756 	}
757 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
758 
759 	return iova;
760 
761 error:
762 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
763 	if (prev)
764 		free_iova_mem(prev);
765 	return NULL;
766 }
767 
768 /*
769  * Magazine caches for IOVA ranges.  For an introduction to magazines,
770  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
771  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
772  * For simplicity, we use a static magazine size and don't implement the
773  * dynamic size tuning described in the paper.
774  */
775 
776 #define IOVA_MAG_SIZE 128
777 
778 struct iova_magazine {
779 	unsigned long size;
780 	unsigned long pfns[IOVA_MAG_SIZE];
781 };
782 
783 struct iova_cpu_rcache {
784 	spinlock_t lock;
785 	struct iova_magazine *loaded;
786 	struct iova_magazine *prev;
787 };
788 
789 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
790 {
791 	return kzalloc(sizeof(struct iova_magazine), flags);
792 }
793 
794 static void iova_magazine_free(struct iova_magazine *mag)
795 {
796 	kfree(mag);
797 }
798 
799 static void
800 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
801 {
802 	unsigned long flags;
803 	int i;
804 
805 	if (!mag)
806 		return;
807 
808 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
809 
810 	for (i = 0 ; i < mag->size; ++i) {
811 		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
812 
813 		BUG_ON(!iova);
814 		private_free_iova(iovad, iova);
815 	}
816 
817 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
818 
819 	mag->size = 0;
820 }
821 
822 static bool iova_magazine_full(struct iova_magazine *mag)
823 {
824 	return (mag && mag->size == IOVA_MAG_SIZE);
825 }
826 
827 static bool iova_magazine_empty(struct iova_magazine *mag)
828 {
829 	return (!mag || mag->size == 0);
830 }
831 
832 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
833 				       unsigned long limit_pfn)
834 {
835 	int i;
836 	unsigned long pfn;
837 
838 	BUG_ON(iova_magazine_empty(mag));
839 
840 	/* Only fall back to the rbtree if we have no suitable pfns at all */
841 	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
842 		if (i == 0)
843 			return 0;
844 
845 	/* Swap it to pop it */
846 	pfn = mag->pfns[i];
847 	mag->pfns[i] = mag->pfns[--mag->size];
848 
849 	return pfn;
850 }
851 
852 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
853 {
854 	BUG_ON(iova_magazine_full(mag));
855 
856 	mag->pfns[mag->size++] = pfn;
857 }
858 
859 static void init_iova_rcaches(struct iova_domain *iovad)
860 {
861 	struct iova_cpu_rcache *cpu_rcache;
862 	struct iova_rcache *rcache;
863 	unsigned int cpu;
864 	int i;
865 
866 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
867 		rcache = &iovad->rcaches[i];
868 		spin_lock_init(&rcache->lock);
869 		rcache->depot_size = 0;
870 		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
871 		if (WARN_ON(!rcache->cpu_rcaches))
872 			continue;
873 		for_each_possible_cpu(cpu) {
874 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
875 			spin_lock_init(&cpu_rcache->lock);
876 			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
877 			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
878 		}
879 	}
880 }
881 
882 /*
883  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
884  * return true on success.  Can fail if rcache is full and we can't free
885  * space, and free_iova() (our only caller) will then return the IOVA
886  * range to the rbtree instead.
887  */
888 static bool __iova_rcache_insert(struct iova_domain *iovad,
889 				 struct iova_rcache *rcache,
890 				 unsigned long iova_pfn)
891 {
892 	struct iova_magazine *mag_to_free = NULL;
893 	struct iova_cpu_rcache *cpu_rcache;
894 	bool can_insert = false;
895 	unsigned long flags;
896 
897 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
898 	spin_lock_irqsave(&cpu_rcache->lock, flags);
899 
900 	if (!iova_magazine_full(cpu_rcache->loaded)) {
901 		can_insert = true;
902 	} else if (!iova_magazine_full(cpu_rcache->prev)) {
903 		swap(cpu_rcache->prev, cpu_rcache->loaded);
904 		can_insert = true;
905 	} else {
906 		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
907 
908 		if (new_mag) {
909 			spin_lock(&rcache->lock);
910 			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
911 				rcache->depot[rcache->depot_size++] =
912 						cpu_rcache->loaded;
913 			} else {
914 				mag_to_free = cpu_rcache->loaded;
915 			}
916 			spin_unlock(&rcache->lock);
917 
918 			cpu_rcache->loaded = new_mag;
919 			can_insert = true;
920 		}
921 	}
922 
923 	if (can_insert)
924 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
925 
926 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
927 
928 	if (mag_to_free) {
929 		iova_magazine_free_pfns(mag_to_free, iovad);
930 		iova_magazine_free(mag_to_free);
931 	}
932 
933 	return can_insert;
934 }
935 
936 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
937 			       unsigned long size)
938 {
939 	unsigned int log_size = order_base_2(size);
940 
941 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
942 		return false;
943 
944 	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
945 }
946 
947 /*
948  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
949  * satisfy the request, return a matching non-NULL range and remove
950  * it from the 'rcache'.
951  */
952 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
953 				       unsigned long limit_pfn)
954 {
955 	struct iova_cpu_rcache *cpu_rcache;
956 	unsigned long iova_pfn = 0;
957 	bool has_pfn = false;
958 	unsigned long flags;
959 
960 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
961 	spin_lock_irqsave(&cpu_rcache->lock, flags);
962 
963 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
964 		has_pfn = true;
965 	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
966 		swap(cpu_rcache->prev, cpu_rcache->loaded);
967 		has_pfn = true;
968 	} else {
969 		spin_lock(&rcache->lock);
970 		if (rcache->depot_size > 0) {
971 			iova_magazine_free(cpu_rcache->loaded);
972 			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
973 			has_pfn = true;
974 		}
975 		spin_unlock(&rcache->lock);
976 	}
977 
978 	if (has_pfn)
979 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
980 
981 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
982 
983 	return iova_pfn;
984 }
985 
986 /*
987  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
988  * size is too big or the DMA limit we are given isn't satisfied by the
989  * top element in the magazine.
990  */
991 static unsigned long iova_rcache_get(struct iova_domain *iovad,
992 				     unsigned long size,
993 				     unsigned long limit_pfn)
994 {
995 	unsigned int log_size = order_base_2(size);
996 
997 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
998 		return 0;
999 
1000 	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
1001 }
1002 
1003 /*
1004  * free rcache data structures.
1005  */
1006 static void free_iova_rcaches(struct iova_domain *iovad)
1007 {
1008 	struct iova_rcache *rcache;
1009 	struct iova_cpu_rcache *cpu_rcache;
1010 	unsigned int cpu;
1011 	int i, j;
1012 
1013 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1014 		rcache = &iovad->rcaches[i];
1015 		for_each_possible_cpu(cpu) {
1016 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1017 			iova_magazine_free(cpu_rcache->loaded);
1018 			iova_magazine_free(cpu_rcache->prev);
1019 		}
1020 		free_percpu(rcache->cpu_rcaches);
1021 		for (j = 0; j < rcache->depot_size; ++j)
1022 			iova_magazine_free(rcache->depot[j]);
1023 	}
1024 }
1025 
1026 /*
1027  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1028  */
1029 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1030 {
1031 	struct iova_cpu_rcache *cpu_rcache;
1032 	struct iova_rcache *rcache;
1033 	unsigned long flags;
1034 	int i;
1035 
1036 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1037 		rcache = &iovad->rcaches[i];
1038 		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1039 		spin_lock_irqsave(&cpu_rcache->lock, flags);
1040 		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1041 		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1042 		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1043 	}
1044 }
1045 
1046 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1047 MODULE_LICENSE("GPL");
1048