xref: /openbmc/linux/drivers/iommu/iova.c (revision bef7a78d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2009, Intel Corporation.
4  *
5  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6  */
7 
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14 
15 /* The anchor node sits above the top of the usable address space */
16 #define IOVA_ANCHOR	~0UL
17 
18 static bool iova_rcache_insert(struct iova_domain *iovad,
19 			       unsigned long pfn,
20 			       unsigned long size);
21 static unsigned long iova_rcache_get(struct iova_domain *iovad,
22 				     unsigned long size,
23 				     unsigned long limit_pfn);
24 static void init_iova_rcaches(struct iova_domain *iovad);
25 static void free_iova_rcaches(struct iova_domain *iovad);
26 static void fq_destroy_all_entries(struct iova_domain *iovad);
27 static void fq_flush_timeout(struct timer_list *t);
28 static void free_global_cached_iovas(struct iova_domain *iovad);
29 
30 void
31 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
32 	unsigned long start_pfn)
33 {
34 	/*
35 	 * IOVA granularity will normally be equal to the smallest
36 	 * supported IOMMU page size; both *must* be capable of
37 	 * representing individual CPU pages exactly.
38 	 */
39 	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
40 
41 	spin_lock_init(&iovad->iova_rbtree_lock);
42 	iovad->rbroot = RB_ROOT;
43 	iovad->cached_node = &iovad->anchor.node;
44 	iovad->cached32_node = &iovad->anchor.node;
45 	iovad->granule = granule;
46 	iovad->start_pfn = start_pfn;
47 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
48 	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
49 	iovad->flush_cb = NULL;
50 	iovad->fq = NULL;
51 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
52 	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
53 	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
54 	init_iova_rcaches(iovad);
55 }
56 EXPORT_SYMBOL_GPL(init_iova_domain);
57 
58 bool has_iova_flush_queue(struct iova_domain *iovad)
59 {
60 	return !!iovad->fq;
61 }
62 
63 static void free_iova_flush_queue(struct iova_domain *iovad)
64 {
65 	if (!has_iova_flush_queue(iovad))
66 		return;
67 
68 	if (timer_pending(&iovad->fq_timer))
69 		del_timer(&iovad->fq_timer);
70 
71 	fq_destroy_all_entries(iovad);
72 
73 	free_percpu(iovad->fq);
74 
75 	iovad->fq         = NULL;
76 	iovad->flush_cb   = NULL;
77 	iovad->entry_dtor = NULL;
78 }
79 
80 int init_iova_flush_queue(struct iova_domain *iovad,
81 			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
82 {
83 	struct iova_fq __percpu *queue;
84 	int cpu;
85 
86 	atomic64_set(&iovad->fq_flush_start_cnt,  0);
87 	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
88 
89 	queue = alloc_percpu(struct iova_fq);
90 	if (!queue)
91 		return -ENOMEM;
92 
93 	iovad->flush_cb   = flush_cb;
94 	iovad->entry_dtor = entry_dtor;
95 
96 	for_each_possible_cpu(cpu) {
97 		struct iova_fq *fq;
98 
99 		fq = per_cpu_ptr(queue, cpu);
100 		fq->head = 0;
101 		fq->tail = 0;
102 
103 		spin_lock_init(&fq->lock);
104 	}
105 
106 	smp_wmb();
107 
108 	iovad->fq = queue;
109 
110 	timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
111 	atomic_set(&iovad->fq_timer_on, 0);
112 
113 	return 0;
114 }
115 EXPORT_SYMBOL_GPL(init_iova_flush_queue);
116 
117 static struct rb_node *
118 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
119 {
120 	if (limit_pfn <= iovad->dma_32bit_pfn)
121 		return iovad->cached32_node;
122 
123 	return iovad->cached_node;
124 }
125 
126 static void
127 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
128 {
129 	if (new->pfn_hi < iovad->dma_32bit_pfn)
130 		iovad->cached32_node = &new->node;
131 	else
132 		iovad->cached_node = &new->node;
133 }
134 
135 static void
136 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
137 {
138 	struct iova *cached_iova;
139 
140 	cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
141 	if (free == cached_iova ||
142 	    (free->pfn_hi < iovad->dma_32bit_pfn &&
143 	     free->pfn_lo >= cached_iova->pfn_lo)) {
144 		iovad->cached32_node = rb_next(&free->node);
145 		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
146 	}
147 
148 	cached_iova = rb_entry(iovad->cached_node, struct iova, node);
149 	if (free->pfn_lo >= cached_iova->pfn_lo)
150 		iovad->cached_node = rb_next(&free->node);
151 }
152 
153 /* Insert the iova into domain rbtree by holding writer lock */
154 static void
155 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
156 		   struct rb_node *start)
157 {
158 	struct rb_node **new, *parent = NULL;
159 
160 	new = (start) ? &start : &(root->rb_node);
161 	/* Figure out where to put new node */
162 	while (*new) {
163 		struct iova *this = rb_entry(*new, struct iova, node);
164 
165 		parent = *new;
166 
167 		if (iova->pfn_lo < this->pfn_lo)
168 			new = &((*new)->rb_left);
169 		else if (iova->pfn_lo > this->pfn_lo)
170 			new = &((*new)->rb_right);
171 		else {
172 			WARN_ON(1); /* this should not happen */
173 			return;
174 		}
175 	}
176 	/* Add new node and rebalance tree. */
177 	rb_link_node(&iova->node, parent, new);
178 	rb_insert_color(&iova->node, root);
179 }
180 
181 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
182 		unsigned long size, unsigned long limit_pfn,
183 			struct iova *new, bool size_aligned)
184 {
185 	struct rb_node *curr, *prev;
186 	struct iova *curr_iova;
187 	unsigned long flags;
188 	unsigned long new_pfn, retry_pfn;
189 	unsigned long align_mask = ~0UL;
190 	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
191 
192 	if (size_aligned)
193 		align_mask <<= fls_long(size - 1);
194 
195 	/* Walk the tree backwards */
196 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
197 	if (limit_pfn <= iovad->dma_32bit_pfn &&
198 			size >= iovad->max32_alloc_size)
199 		goto iova32_full;
200 
201 	curr = __get_cached_rbnode(iovad, limit_pfn);
202 	curr_iova = rb_entry(curr, struct iova, node);
203 	retry_pfn = curr_iova->pfn_hi + 1;
204 
205 retry:
206 	do {
207 		high_pfn = min(high_pfn, curr_iova->pfn_lo);
208 		new_pfn = (high_pfn - size) & align_mask;
209 		prev = curr;
210 		curr = rb_prev(curr);
211 		curr_iova = rb_entry(curr, struct iova, node);
212 	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
213 
214 	if (high_pfn < size || new_pfn < low_pfn) {
215 		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
216 			high_pfn = limit_pfn;
217 			low_pfn = retry_pfn;
218 			curr = &iovad->anchor.node;
219 			curr_iova = rb_entry(curr, struct iova, node);
220 			goto retry;
221 		}
222 		iovad->max32_alloc_size = size;
223 		goto iova32_full;
224 	}
225 
226 	/* pfn_lo will point to size aligned address if size_aligned is set */
227 	new->pfn_lo = new_pfn;
228 	new->pfn_hi = new->pfn_lo + size - 1;
229 
230 	/* If we have 'prev', it's a valid place to start the insertion. */
231 	iova_insert_rbtree(&iovad->rbroot, new, prev);
232 	__cached_rbnode_insert_update(iovad, new);
233 
234 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
235 	return 0;
236 
237 iova32_full:
238 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
239 	return -ENOMEM;
240 }
241 
242 static struct kmem_cache *iova_cache;
243 static unsigned int iova_cache_users;
244 static DEFINE_MUTEX(iova_cache_mutex);
245 
246 static struct iova *alloc_iova_mem(void)
247 {
248 	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
249 }
250 
251 static void free_iova_mem(struct iova *iova)
252 {
253 	if (iova->pfn_lo != IOVA_ANCHOR)
254 		kmem_cache_free(iova_cache, iova);
255 }
256 
257 int iova_cache_get(void)
258 {
259 	mutex_lock(&iova_cache_mutex);
260 	if (!iova_cache_users) {
261 		iova_cache = kmem_cache_create(
262 			"iommu_iova", sizeof(struct iova), 0,
263 			SLAB_HWCACHE_ALIGN, NULL);
264 		if (!iova_cache) {
265 			mutex_unlock(&iova_cache_mutex);
266 			pr_err("Couldn't create iova cache\n");
267 			return -ENOMEM;
268 		}
269 	}
270 
271 	iova_cache_users++;
272 	mutex_unlock(&iova_cache_mutex);
273 
274 	return 0;
275 }
276 EXPORT_SYMBOL_GPL(iova_cache_get);
277 
278 void iova_cache_put(void)
279 {
280 	mutex_lock(&iova_cache_mutex);
281 	if (WARN_ON(!iova_cache_users)) {
282 		mutex_unlock(&iova_cache_mutex);
283 		return;
284 	}
285 	iova_cache_users--;
286 	if (!iova_cache_users)
287 		kmem_cache_destroy(iova_cache);
288 	mutex_unlock(&iova_cache_mutex);
289 }
290 EXPORT_SYMBOL_GPL(iova_cache_put);
291 
292 /**
293  * alloc_iova - allocates an iova
294  * @iovad: - iova domain in question
295  * @size: - size of page frames to allocate
296  * @limit_pfn: - max limit address
297  * @size_aligned: - set if size_aligned address range is required
298  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
299  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
300  * flag is set then the allocated address iova->pfn_lo will be naturally
301  * aligned on roundup_power_of_two(size).
302  */
303 struct iova *
304 alloc_iova(struct iova_domain *iovad, unsigned long size,
305 	unsigned long limit_pfn,
306 	bool size_aligned)
307 {
308 	struct iova *new_iova;
309 	int ret;
310 
311 	new_iova = alloc_iova_mem();
312 	if (!new_iova)
313 		return NULL;
314 
315 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
316 			new_iova, size_aligned);
317 
318 	if (ret) {
319 		free_iova_mem(new_iova);
320 		return NULL;
321 	}
322 
323 	return new_iova;
324 }
325 EXPORT_SYMBOL_GPL(alloc_iova);
326 
327 static struct iova *
328 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
329 {
330 	struct rb_node *node = iovad->rbroot.rb_node;
331 
332 	assert_spin_locked(&iovad->iova_rbtree_lock);
333 
334 	while (node) {
335 		struct iova *iova = rb_entry(node, struct iova, node);
336 
337 		if (pfn < iova->pfn_lo)
338 			node = node->rb_left;
339 		else if (pfn > iova->pfn_hi)
340 			node = node->rb_right;
341 		else
342 			return iova;	/* pfn falls within iova's range */
343 	}
344 
345 	return NULL;
346 }
347 
348 static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
349 {
350 	assert_spin_locked(&iovad->iova_rbtree_lock);
351 	__cached_rbnode_delete_update(iovad, iova);
352 	rb_erase(&iova->node, &iovad->rbroot);
353 	free_iova_mem(iova);
354 }
355 
356 /**
357  * find_iova - finds an iova for a given pfn
358  * @iovad: - iova domain in question.
359  * @pfn: - page frame number
360  * This function finds and returns an iova belonging to the
361  * given doamin which matches the given pfn.
362  */
363 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
364 {
365 	unsigned long flags;
366 	struct iova *iova;
367 
368 	/* Take the lock so that no other thread is manipulating the rbtree */
369 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
370 	iova = private_find_iova(iovad, pfn);
371 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
372 	return iova;
373 }
374 EXPORT_SYMBOL_GPL(find_iova);
375 
376 /**
377  * __free_iova - frees the given iova
378  * @iovad: iova domain in question.
379  * @iova: iova in question.
380  * Frees the given iova belonging to the giving domain
381  */
382 void
383 __free_iova(struct iova_domain *iovad, struct iova *iova)
384 {
385 	unsigned long flags;
386 
387 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
388 	private_free_iova(iovad, iova);
389 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
390 }
391 EXPORT_SYMBOL_GPL(__free_iova);
392 
393 /**
394  * free_iova - finds and frees the iova for a given pfn
395  * @iovad: - iova domain in question.
396  * @pfn: - pfn that is allocated previously
397  * This functions finds an iova for a given pfn and then
398  * frees the iova from that domain.
399  */
400 void
401 free_iova(struct iova_domain *iovad, unsigned long pfn)
402 {
403 	unsigned long flags;
404 	struct iova *iova;
405 
406 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
407 	iova = private_find_iova(iovad, pfn);
408 	if (iova)
409 		private_free_iova(iovad, iova);
410 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
411 
412 }
413 EXPORT_SYMBOL_GPL(free_iova);
414 
415 /**
416  * alloc_iova_fast - allocates an iova from rcache
417  * @iovad: - iova domain in question
418  * @size: - size of page frames to allocate
419  * @limit_pfn: - max limit address
420  * @flush_rcache: - set to flush rcache on regular allocation failure
421  * This function tries to satisfy an iova allocation from the rcache,
422  * and falls back to regular allocation on failure. If regular allocation
423  * fails too and the flush_rcache flag is set then the rcache will be flushed.
424 */
425 unsigned long
426 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
427 		unsigned long limit_pfn, bool flush_rcache)
428 {
429 	unsigned long iova_pfn;
430 	struct iova *new_iova;
431 
432 	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
433 	if (iova_pfn)
434 		return iova_pfn;
435 
436 retry:
437 	new_iova = alloc_iova(iovad, size, limit_pfn, true);
438 	if (!new_iova) {
439 		unsigned int cpu;
440 
441 		if (!flush_rcache)
442 			return 0;
443 
444 		/* Try replenishing IOVAs by flushing rcache. */
445 		flush_rcache = false;
446 		for_each_online_cpu(cpu)
447 			free_cpu_cached_iovas(cpu, iovad);
448 		free_global_cached_iovas(iovad);
449 		goto retry;
450 	}
451 
452 	return new_iova->pfn_lo;
453 }
454 EXPORT_SYMBOL_GPL(alloc_iova_fast);
455 
456 /**
457  * free_iova_fast - free iova pfn range into rcache
458  * @iovad: - iova domain in question.
459  * @pfn: - pfn that is allocated previously
460  * @size: - # of pages in range
461  * This functions frees an iova range by trying to put it into the rcache,
462  * falling back to regular iova deallocation via free_iova() if this fails.
463  */
464 void
465 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
466 {
467 	if (iova_rcache_insert(iovad, pfn, size))
468 		return;
469 
470 	free_iova(iovad, pfn);
471 }
472 EXPORT_SYMBOL_GPL(free_iova_fast);
473 
474 #define fq_ring_for_each(i, fq) \
475 	for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
476 
477 static inline bool fq_full(struct iova_fq *fq)
478 {
479 	assert_spin_locked(&fq->lock);
480 	return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
481 }
482 
483 static inline unsigned fq_ring_add(struct iova_fq *fq)
484 {
485 	unsigned idx = fq->tail;
486 
487 	assert_spin_locked(&fq->lock);
488 
489 	fq->tail = (idx + 1) % IOVA_FQ_SIZE;
490 
491 	return idx;
492 }
493 
494 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq)
495 {
496 	u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt);
497 	unsigned idx;
498 
499 	assert_spin_locked(&fq->lock);
500 
501 	fq_ring_for_each(idx, fq) {
502 
503 		if (fq->entries[idx].counter >= counter)
504 			break;
505 
506 		if (iovad->entry_dtor)
507 			iovad->entry_dtor(fq->entries[idx].data);
508 
509 		free_iova_fast(iovad,
510 			       fq->entries[idx].iova_pfn,
511 			       fq->entries[idx].pages);
512 
513 		fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
514 	}
515 }
516 
517 static void iova_domain_flush(struct iova_domain *iovad)
518 {
519 	atomic64_inc(&iovad->fq_flush_start_cnt);
520 	iovad->flush_cb(iovad);
521 	atomic64_inc(&iovad->fq_flush_finish_cnt);
522 }
523 
524 static void fq_destroy_all_entries(struct iova_domain *iovad)
525 {
526 	int cpu;
527 
528 	/*
529 	 * This code runs when the iova_domain is being detroyed, so don't
530 	 * bother to free iovas, just call the entry_dtor on all remaining
531 	 * entries.
532 	 */
533 	if (!iovad->entry_dtor)
534 		return;
535 
536 	for_each_possible_cpu(cpu) {
537 		struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu);
538 		int idx;
539 
540 		fq_ring_for_each(idx, fq)
541 			iovad->entry_dtor(fq->entries[idx].data);
542 	}
543 }
544 
545 static void fq_flush_timeout(struct timer_list *t)
546 {
547 	struct iova_domain *iovad = from_timer(iovad, t, fq_timer);
548 	int cpu;
549 
550 	atomic_set(&iovad->fq_timer_on, 0);
551 	iova_domain_flush(iovad);
552 
553 	for_each_possible_cpu(cpu) {
554 		unsigned long flags;
555 		struct iova_fq *fq;
556 
557 		fq = per_cpu_ptr(iovad->fq, cpu);
558 		spin_lock_irqsave(&fq->lock, flags);
559 		fq_ring_free(iovad, fq);
560 		spin_unlock_irqrestore(&fq->lock, flags);
561 	}
562 }
563 
564 void queue_iova(struct iova_domain *iovad,
565 		unsigned long pfn, unsigned long pages,
566 		unsigned long data)
567 {
568 	struct iova_fq *fq = raw_cpu_ptr(iovad->fq);
569 	unsigned long flags;
570 	unsigned idx;
571 
572 	spin_lock_irqsave(&fq->lock, flags);
573 
574 	/*
575 	 * First remove all entries from the flush queue that have already been
576 	 * flushed out on another CPU. This makes the fq_full() check below less
577 	 * likely to be true.
578 	 */
579 	fq_ring_free(iovad, fq);
580 
581 	if (fq_full(fq)) {
582 		iova_domain_flush(iovad);
583 		fq_ring_free(iovad, fq);
584 	}
585 
586 	idx = fq_ring_add(fq);
587 
588 	fq->entries[idx].iova_pfn = pfn;
589 	fq->entries[idx].pages    = pages;
590 	fq->entries[idx].data     = data;
591 	fq->entries[idx].counter  = atomic64_read(&iovad->fq_flush_start_cnt);
592 
593 	spin_unlock_irqrestore(&fq->lock, flags);
594 
595 	/* Avoid false sharing as much as possible. */
596 	if (!atomic_read(&iovad->fq_timer_on) &&
597 	    !atomic_xchg(&iovad->fq_timer_on, 1))
598 		mod_timer(&iovad->fq_timer,
599 			  jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
600 }
601 EXPORT_SYMBOL_GPL(queue_iova);
602 
603 /**
604  * put_iova_domain - destroys the iova doamin
605  * @iovad: - iova domain in question.
606  * All the iova's in that domain are destroyed.
607  */
608 void put_iova_domain(struct iova_domain *iovad)
609 {
610 	struct iova *iova, *tmp;
611 
612 	free_iova_flush_queue(iovad);
613 	free_iova_rcaches(iovad);
614 	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
615 		free_iova_mem(iova);
616 }
617 EXPORT_SYMBOL_GPL(put_iova_domain);
618 
619 static int
620 __is_range_overlap(struct rb_node *node,
621 	unsigned long pfn_lo, unsigned long pfn_hi)
622 {
623 	struct iova *iova = rb_entry(node, struct iova, node);
624 
625 	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
626 		return 1;
627 	return 0;
628 }
629 
630 static inline struct iova *
631 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
632 {
633 	struct iova *iova;
634 
635 	iova = alloc_iova_mem();
636 	if (iova) {
637 		iova->pfn_lo = pfn_lo;
638 		iova->pfn_hi = pfn_hi;
639 	}
640 
641 	return iova;
642 }
643 
644 static struct iova *
645 __insert_new_range(struct iova_domain *iovad,
646 	unsigned long pfn_lo, unsigned long pfn_hi)
647 {
648 	struct iova *iova;
649 
650 	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
651 	if (iova)
652 		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
653 
654 	return iova;
655 }
656 
657 static void
658 __adjust_overlap_range(struct iova *iova,
659 	unsigned long *pfn_lo, unsigned long *pfn_hi)
660 {
661 	if (*pfn_lo < iova->pfn_lo)
662 		iova->pfn_lo = *pfn_lo;
663 	if (*pfn_hi > iova->pfn_hi)
664 		*pfn_lo = iova->pfn_hi + 1;
665 }
666 
667 /**
668  * reserve_iova - reserves an iova in the given range
669  * @iovad: - iova domain pointer
670  * @pfn_lo: - lower page frame address
671  * @pfn_hi:- higher pfn adderss
672  * This function allocates reserves the address range from pfn_lo to pfn_hi so
673  * that this address is not dished out as part of alloc_iova.
674  */
675 struct iova *
676 reserve_iova(struct iova_domain *iovad,
677 	unsigned long pfn_lo, unsigned long pfn_hi)
678 {
679 	struct rb_node *node;
680 	unsigned long flags;
681 	struct iova *iova;
682 	unsigned int overlap = 0;
683 
684 	/* Don't allow nonsensical pfns */
685 	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
686 		return NULL;
687 
688 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
689 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
690 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
691 			iova = rb_entry(node, struct iova, node);
692 			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
693 			if ((pfn_lo >= iova->pfn_lo) &&
694 				(pfn_hi <= iova->pfn_hi))
695 				goto finish;
696 			overlap = 1;
697 
698 		} else if (overlap)
699 				break;
700 	}
701 
702 	/* We are here either because this is the first reserver node
703 	 * or need to insert remaining non overlap addr range
704 	 */
705 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
706 finish:
707 
708 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
709 	return iova;
710 }
711 EXPORT_SYMBOL_GPL(reserve_iova);
712 
713 /**
714  * copy_reserved_iova - copies the reserved between domains
715  * @from: - source doamin from where to copy
716  * @to: - destination domin where to copy
717  * This function copies reserved iova's from one doamin to
718  * other.
719  */
720 void
721 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
722 {
723 	unsigned long flags;
724 	struct rb_node *node;
725 
726 	spin_lock_irqsave(&from->iova_rbtree_lock, flags);
727 	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
728 		struct iova *iova = rb_entry(node, struct iova, node);
729 		struct iova *new_iova;
730 
731 		if (iova->pfn_lo == IOVA_ANCHOR)
732 			continue;
733 
734 		new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
735 		if (!new_iova)
736 			pr_err("Reserve iova range %lx@%lx failed\n",
737 			       iova->pfn_lo, iova->pfn_lo);
738 	}
739 	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
740 }
741 EXPORT_SYMBOL_GPL(copy_reserved_iova);
742 
743 /*
744  * Magazine caches for IOVA ranges.  For an introduction to magazines,
745  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
746  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
747  * For simplicity, we use a static magazine size and don't implement the
748  * dynamic size tuning described in the paper.
749  */
750 
751 #define IOVA_MAG_SIZE 128
752 
753 struct iova_magazine {
754 	unsigned long size;
755 	unsigned long pfns[IOVA_MAG_SIZE];
756 };
757 
758 struct iova_cpu_rcache {
759 	spinlock_t lock;
760 	struct iova_magazine *loaded;
761 	struct iova_magazine *prev;
762 };
763 
764 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
765 {
766 	return kzalloc(sizeof(struct iova_magazine), flags);
767 }
768 
769 static void iova_magazine_free(struct iova_magazine *mag)
770 {
771 	kfree(mag);
772 }
773 
774 static void
775 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
776 {
777 	unsigned long flags;
778 	int i;
779 
780 	if (!mag)
781 		return;
782 
783 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
784 
785 	for (i = 0 ; i < mag->size; ++i) {
786 		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
787 
788 		if (WARN_ON(!iova))
789 			continue;
790 
791 		private_free_iova(iovad, iova);
792 	}
793 
794 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
795 
796 	mag->size = 0;
797 }
798 
799 static bool iova_magazine_full(struct iova_magazine *mag)
800 {
801 	return (mag && mag->size == IOVA_MAG_SIZE);
802 }
803 
804 static bool iova_magazine_empty(struct iova_magazine *mag)
805 {
806 	return (!mag || mag->size == 0);
807 }
808 
809 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
810 				       unsigned long limit_pfn)
811 {
812 	int i;
813 	unsigned long pfn;
814 
815 	BUG_ON(iova_magazine_empty(mag));
816 
817 	/* Only fall back to the rbtree if we have no suitable pfns at all */
818 	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
819 		if (i == 0)
820 			return 0;
821 
822 	/* Swap it to pop it */
823 	pfn = mag->pfns[i];
824 	mag->pfns[i] = mag->pfns[--mag->size];
825 
826 	return pfn;
827 }
828 
829 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
830 {
831 	BUG_ON(iova_magazine_full(mag));
832 
833 	mag->pfns[mag->size++] = pfn;
834 }
835 
836 static void init_iova_rcaches(struct iova_domain *iovad)
837 {
838 	struct iova_cpu_rcache *cpu_rcache;
839 	struct iova_rcache *rcache;
840 	unsigned int cpu;
841 	int i;
842 
843 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
844 		rcache = &iovad->rcaches[i];
845 		spin_lock_init(&rcache->lock);
846 		rcache->depot_size = 0;
847 		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
848 		if (WARN_ON(!rcache->cpu_rcaches))
849 			continue;
850 		for_each_possible_cpu(cpu) {
851 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
852 			spin_lock_init(&cpu_rcache->lock);
853 			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
854 			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
855 		}
856 	}
857 }
858 
859 /*
860  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
861  * return true on success.  Can fail if rcache is full and we can't free
862  * space, and free_iova() (our only caller) will then return the IOVA
863  * range to the rbtree instead.
864  */
865 static bool __iova_rcache_insert(struct iova_domain *iovad,
866 				 struct iova_rcache *rcache,
867 				 unsigned long iova_pfn)
868 {
869 	struct iova_magazine *mag_to_free = NULL;
870 	struct iova_cpu_rcache *cpu_rcache;
871 	bool can_insert = false;
872 	unsigned long flags;
873 
874 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
875 	spin_lock_irqsave(&cpu_rcache->lock, flags);
876 
877 	if (!iova_magazine_full(cpu_rcache->loaded)) {
878 		can_insert = true;
879 	} else if (!iova_magazine_full(cpu_rcache->prev)) {
880 		swap(cpu_rcache->prev, cpu_rcache->loaded);
881 		can_insert = true;
882 	} else {
883 		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
884 
885 		if (new_mag) {
886 			spin_lock(&rcache->lock);
887 			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
888 				rcache->depot[rcache->depot_size++] =
889 						cpu_rcache->loaded;
890 			} else {
891 				mag_to_free = cpu_rcache->loaded;
892 			}
893 			spin_unlock(&rcache->lock);
894 
895 			cpu_rcache->loaded = new_mag;
896 			can_insert = true;
897 		}
898 	}
899 
900 	if (can_insert)
901 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
902 
903 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
904 
905 	if (mag_to_free) {
906 		iova_magazine_free_pfns(mag_to_free, iovad);
907 		iova_magazine_free(mag_to_free);
908 	}
909 
910 	return can_insert;
911 }
912 
913 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
914 			       unsigned long size)
915 {
916 	unsigned int log_size = order_base_2(size);
917 
918 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
919 		return false;
920 
921 	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
922 }
923 
924 /*
925  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
926  * satisfy the request, return a matching non-NULL range and remove
927  * it from the 'rcache'.
928  */
929 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
930 				       unsigned long limit_pfn)
931 {
932 	struct iova_cpu_rcache *cpu_rcache;
933 	unsigned long iova_pfn = 0;
934 	bool has_pfn = false;
935 	unsigned long flags;
936 
937 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
938 	spin_lock_irqsave(&cpu_rcache->lock, flags);
939 
940 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
941 		has_pfn = true;
942 	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
943 		swap(cpu_rcache->prev, cpu_rcache->loaded);
944 		has_pfn = true;
945 	} else {
946 		spin_lock(&rcache->lock);
947 		if (rcache->depot_size > 0) {
948 			iova_magazine_free(cpu_rcache->loaded);
949 			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
950 			has_pfn = true;
951 		}
952 		spin_unlock(&rcache->lock);
953 	}
954 
955 	if (has_pfn)
956 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
957 
958 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
959 
960 	return iova_pfn;
961 }
962 
963 /*
964  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
965  * size is too big or the DMA limit we are given isn't satisfied by the
966  * top element in the magazine.
967  */
968 static unsigned long iova_rcache_get(struct iova_domain *iovad,
969 				     unsigned long size,
970 				     unsigned long limit_pfn)
971 {
972 	unsigned int log_size = order_base_2(size);
973 
974 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
975 		return 0;
976 
977 	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
978 }
979 
980 /*
981  * free rcache data structures.
982  */
983 static void free_iova_rcaches(struct iova_domain *iovad)
984 {
985 	struct iova_rcache *rcache;
986 	struct iova_cpu_rcache *cpu_rcache;
987 	unsigned int cpu;
988 	int i, j;
989 
990 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
991 		rcache = &iovad->rcaches[i];
992 		for_each_possible_cpu(cpu) {
993 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
994 			iova_magazine_free(cpu_rcache->loaded);
995 			iova_magazine_free(cpu_rcache->prev);
996 		}
997 		free_percpu(rcache->cpu_rcaches);
998 		for (j = 0; j < rcache->depot_size; ++j)
999 			iova_magazine_free(rcache->depot[j]);
1000 	}
1001 }
1002 
1003 /*
1004  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
1005  */
1006 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
1007 {
1008 	struct iova_cpu_rcache *cpu_rcache;
1009 	struct iova_rcache *rcache;
1010 	unsigned long flags;
1011 	int i;
1012 
1013 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1014 		rcache = &iovad->rcaches[i];
1015 		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
1016 		spin_lock_irqsave(&cpu_rcache->lock, flags);
1017 		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
1018 		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
1019 		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
1020 	}
1021 }
1022 
1023 /*
1024  * free all the IOVA ranges of global cache
1025  */
1026 static void free_global_cached_iovas(struct iova_domain *iovad)
1027 {
1028 	struct iova_rcache *rcache;
1029 	unsigned long flags;
1030 	int i, j;
1031 
1032 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
1033 		rcache = &iovad->rcaches[i];
1034 		spin_lock_irqsave(&rcache->lock, flags);
1035 		for (j = 0; j < rcache->depot_size; ++j) {
1036 			iova_magazine_free_pfns(rcache->depot[j], iovad);
1037 			iova_magazine_free(rcache->depot[j]);
1038 		}
1039 		rcache->depot_size = 0;
1040 		spin_unlock_irqrestore(&rcache->lock, flags);
1041 	}
1042 }
1043 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1044 MODULE_LICENSE("GPL");
1045