xref: /openbmc/linux/drivers/gpu/drm/ttm/ttm_pool.c (revision 89b15863)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2020 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors: Christian König
24  */
25 
26 /* Pooling of allocated pages is necessary because changing the caching
27  * attributes on x86 of the linear mapping requires a costly cross CPU TLB
28  * invalidate for those addresses.
29  *
30  * Additional to that allocations from the DMA coherent API are pooled as well
31  * cause they are rather slow compared to alloc_pages+map.
32  */
33 
34 #include <linux/module.h>
35 #include <linux/dma-mapping.h>
36 
37 #ifdef CONFIG_X86
38 #include <asm/set_memory.h>
39 #endif
40 
41 #include <drm/ttm/ttm_pool.h>
42 #include <drm/ttm/ttm_bo_driver.h>
43 #include <drm/ttm/ttm_tt.h>
44 
45 /**
46  * struct ttm_pool_dma - Helper object for coherent DMA mappings
47  *
48  * @addr: original DMA address returned for the mapping
49  * @vaddr: original vaddr return for the mapping and order in the lower bits
50  */
51 struct ttm_pool_dma {
52 	dma_addr_t addr;
53 	unsigned long vaddr;
54 };
55 
56 static unsigned long page_pool_size;
57 
58 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
59 module_param(page_pool_size, ulong, 0644);
60 
61 static atomic_long_t allocated_pages;
62 
63 static struct ttm_pool_type global_write_combined[MAX_ORDER];
64 static struct ttm_pool_type global_uncached[MAX_ORDER];
65 
66 static spinlock_t shrinker_lock;
67 static struct list_head shrinker_list;
68 static struct shrinker mm_shrinker;
69 
70 /* Allocate pages of size 1 << order with the given gfp_flags */
71 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
72 					unsigned int order)
73 {
74 	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
75 	struct ttm_pool_dma *dma;
76 	struct page *p;
77 	void *vaddr;
78 
79 	if (order) {
80 		gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY |
81 			__GFP_KSWAPD_RECLAIM;
82 		gfp_flags &= ~__GFP_MOVABLE;
83 		gfp_flags &= ~__GFP_COMP;
84 	}
85 
86 	if (!pool->use_dma_alloc) {
87 		p = alloc_pages(gfp_flags, order);
88 		if (p)
89 			p->private = order;
90 		return p;
91 	}
92 
93 	dma = kmalloc(sizeof(*dma), GFP_KERNEL);
94 	if (!dma)
95 		return NULL;
96 
97 	if (order)
98 		attr |= DMA_ATTR_NO_WARN;
99 
100 	vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE,
101 				&dma->addr, gfp_flags, attr);
102 	if (!vaddr)
103 		goto error_free;
104 
105 	/* TODO: This is an illegal abuse of the DMA API, but we need to rework
106 	 * TTM page fault handling and extend the DMA API to clean this up.
107 	 */
108 	if (is_vmalloc_addr(vaddr))
109 		p = vmalloc_to_page(vaddr);
110 	else
111 		p = virt_to_page(vaddr);
112 
113 	dma->vaddr = (unsigned long)vaddr | order;
114 	p->private = (unsigned long)dma;
115 	return p;
116 
117 error_free:
118 	kfree(dma);
119 	return NULL;
120 }
121 
122 /* Reset the caching and pages of size 1 << order */
123 static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
124 			       unsigned int order, struct page *p)
125 {
126 	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
127 	struct ttm_pool_dma *dma;
128 	void *vaddr;
129 
130 #ifdef CONFIG_X86
131 	/* We don't care that set_pages_wb is inefficient here. This is only
132 	 * used when we have to shrink and CPU overhead is irrelevant then.
133 	 */
134 	if (caching != ttm_cached && !PageHighMem(p))
135 		set_pages_wb(p, 1 << order);
136 #endif
137 
138 	if (!pool || !pool->use_dma_alloc) {
139 		__free_pages(p, order);
140 		return;
141 	}
142 
143 	if (order)
144 		attr |= DMA_ATTR_NO_WARN;
145 
146 	dma = (void *)p->private;
147 	vaddr = (void *)(dma->vaddr & PAGE_MASK);
148 	dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr,
149 		       attr);
150 	kfree(dma);
151 }
152 
153 /* Apply a new caching to an array of pages */
154 static int ttm_pool_apply_caching(struct page **first, struct page **last,
155 				  enum ttm_caching caching)
156 {
157 #ifdef CONFIG_X86
158 	unsigned int num_pages = last - first;
159 
160 	if (!num_pages)
161 		return 0;
162 
163 	switch (caching) {
164 	case ttm_cached:
165 		break;
166 	case ttm_write_combined:
167 		return set_pages_array_wc(first, num_pages);
168 	case ttm_uncached:
169 		return set_pages_array_uc(first, num_pages);
170 	}
171 #endif
172 	return 0;
173 }
174 
175 /* Map pages of 1 << order size and fill the DMA address array  */
176 static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
177 			struct page *p, dma_addr_t **dma_addr)
178 {
179 	dma_addr_t addr;
180 	unsigned int i;
181 
182 	if (pool->use_dma_alloc) {
183 		struct ttm_pool_dma *dma = (void *)p->private;
184 
185 		addr = dma->addr;
186 	} else {
187 		size_t size = (1ULL << order) * PAGE_SIZE;
188 
189 		addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL);
190 		if (dma_mapping_error(pool->dev, **dma_addr))
191 			return -EFAULT;
192 	}
193 
194 	for (i = 1 << order; i ; --i) {
195 		*(*dma_addr)++ = addr;
196 		addr += PAGE_SIZE;
197 	}
198 
199 	return 0;
200 }
201 
202 /* Unmap pages of 1 << order size */
203 static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
204 			   unsigned int num_pages)
205 {
206 	/* Unmapped while freeing the page */
207 	if (pool->use_dma_alloc)
208 		return;
209 
210 	dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT,
211 		       DMA_BIDIRECTIONAL);
212 }
213 
214 /* Give pages into a specific pool_type */
215 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
216 {
217 	spin_lock(&pt->lock);
218 	list_add(&p->lru, &pt->pages);
219 	spin_unlock(&pt->lock);
220 	atomic_long_add(1 << pt->order, &allocated_pages);
221 }
222 
223 /* Take pages from a specific pool_type, return NULL when nothing available */
224 static struct page *ttm_pool_type_take(struct ttm_pool_type *pt)
225 {
226 	struct page *p;
227 
228 	spin_lock(&pt->lock);
229 	p = list_first_entry_or_null(&pt->pages, typeof(*p), lru);
230 	if (p) {
231 		atomic_long_sub(1 << pt->order, &allocated_pages);
232 		list_del(&p->lru);
233 	}
234 	spin_unlock(&pt->lock);
235 
236 	return p;
237 }
238 
239 /* Count the number of pages available in a pool_type */
240 static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt)
241 {
242 	unsigned int count = 0;
243 	struct page *p;
244 
245 	spin_lock(&pt->lock);
246 	/* Only used for debugfs, the overhead doesn't matter */
247 	list_for_each_entry(p, &pt->pages, lru)
248 		++count;
249 	spin_unlock(&pt->lock);
250 
251 	return count;
252 }
253 
254 /* Initialize and add a pool type to the global shrinker list */
255 static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
256 			       enum ttm_caching caching, unsigned int order)
257 {
258 	pt->pool = pool;
259 	pt->caching = caching;
260 	pt->order = order;
261 	spin_lock_init(&pt->lock);
262 	INIT_LIST_HEAD(&pt->pages);
263 
264 	spin_lock(&shrinker_lock);
265 	list_add_tail(&pt->shrinker_list, &shrinker_list);
266 	spin_unlock(&shrinker_lock);
267 }
268 
269 /* Remove a pool_type from the global shrinker list and free all pages */
270 static void ttm_pool_type_fini(struct ttm_pool_type *pt)
271 {
272 	struct page *p, *tmp;
273 
274 	spin_lock(&shrinker_lock);
275 	list_del(&pt->shrinker_list);
276 	spin_unlock(&shrinker_lock);
277 
278 	list_for_each_entry_safe(p, tmp, &pt->pages, lru)
279 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
280 }
281 
282 /* Return the pool_type to use for the given caching and order */
283 static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
284 						  enum ttm_caching caching,
285 						  unsigned int order)
286 {
287 	if (pool->use_dma_alloc)
288 		return &pool->caching[caching].orders[order];
289 
290 #ifdef CONFIG_X86
291 	switch (caching) {
292 	case ttm_write_combined:
293 		return &global_write_combined[order];
294 	case ttm_uncached:
295 		return &global_uncached[order];
296 	default:
297 		break;
298 	}
299 #endif
300 
301 	return NULL;
302 }
303 
304 /* Free pages using the global shrinker list */
305 static unsigned int ttm_pool_shrink(void)
306 {
307 	struct ttm_pool_type *pt;
308 	unsigned int num_freed;
309 	struct page *p;
310 
311 	spin_lock(&shrinker_lock);
312 	pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list);
313 
314 	p = ttm_pool_type_take(pt);
315 	if (p) {
316 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
317 		num_freed = 1 << pt->order;
318 	} else {
319 		num_freed = 0;
320 	}
321 
322 	list_move_tail(&pt->shrinker_list, &shrinker_list);
323 	spin_unlock(&shrinker_lock);
324 
325 	return num_freed;
326 }
327 
328 /* Return the allocation order based for a page */
329 static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
330 {
331 	if (pool->use_dma_alloc) {
332 		struct ttm_pool_dma *dma = (void *)p->private;
333 
334 		return dma->vaddr & ~PAGE_MASK;
335 	}
336 
337 	return p->private;
338 }
339 
340 /**
341  * ttm_pool_alloc - Fill a ttm_tt object
342  *
343  * @pool: ttm_pool to use
344  * @tt: ttm_tt object to fill
345  * @ctx: operation context
346  *
347  * Fill the ttm_tt object with pages and also make sure to DMA map them when
348  * necessary.
349  *
350  * Returns: 0 on successe, negative error code otherwise.
351  */
352 int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
353 		   struct ttm_operation_ctx *ctx)
354 {
355 	unsigned long num_pages = tt->num_pages;
356 	dma_addr_t *dma_addr = tt->dma_address;
357 	struct page **caching = tt->pages;
358 	struct page **pages = tt->pages;
359 	gfp_t gfp_flags = GFP_USER;
360 	unsigned int i, order;
361 	struct page *p;
362 	int r;
363 
364 	WARN_ON(!num_pages || ttm_tt_is_populated(tt));
365 	WARN_ON(dma_addr && !pool->dev);
366 
367 	if (tt->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
368 		gfp_flags |= __GFP_ZERO;
369 
370 	if (ctx->gfp_retry_mayfail)
371 		gfp_flags |= __GFP_RETRY_MAYFAIL;
372 
373 	if (pool->use_dma32)
374 		gfp_flags |= GFP_DMA32;
375 	else
376 		gfp_flags |= GFP_HIGHUSER;
377 
378 	for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages;
379 	     order = min_t(unsigned int, order, __fls(num_pages))) {
380 		bool apply_caching = false;
381 		struct ttm_pool_type *pt;
382 
383 		pt = ttm_pool_select_type(pool, tt->caching, order);
384 		p = pt ? ttm_pool_type_take(pt) : NULL;
385 		if (p) {
386 			apply_caching = true;
387 		} else {
388 			p = ttm_pool_alloc_page(pool, gfp_flags, order);
389 			if (p && PageHighMem(p))
390 				apply_caching = true;
391 		}
392 
393 		if (!p) {
394 			if (order) {
395 				--order;
396 				continue;
397 			}
398 			r = -ENOMEM;
399 			goto error_free_all;
400 		}
401 
402 		if (apply_caching) {
403 			r = ttm_pool_apply_caching(caching, pages,
404 						   tt->caching);
405 			if (r)
406 				goto error_free_page;
407 			caching = pages + (1 << order);
408 		}
409 
410 		r = ttm_mem_global_alloc_page(&ttm_mem_glob, p,
411 					      (1 << order) * PAGE_SIZE,
412 					      ctx);
413 		if (r)
414 			goto error_free_page;
415 
416 		if (dma_addr) {
417 			r = ttm_pool_map(pool, order, p, &dma_addr);
418 			if (r)
419 				goto error_global_free;
420 		}
421 
422 		num_pages -= 1 << order;
423 		for (i = 1 << order; i; --i)
424 			*(pages++) = p++;
425 	}
426 
427 	r = ttm_pool_apply_caching(caching, pages, tt->caching);
428 	if (r)
429 		goto error_free_all;
430 
431 	return 0;
432 
433 error_global_free:
434 	ttm_mem_global_free_page(&ttm_mem_glob, p, (1 << order) * PAGE_SIZE);
435 
436 error_free_page:
437 	ttm_pool_free_page(pool, tt->caching, order, p);
438 
439 error_free_all:
440 	num_pages = tt->num_pages - num_pages;
441 	for (i = 0; i < num_pages; ) {
442 		order = ttm_pool_page_order(pool, tt->pages[i]);
443 		ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
444 		i += 1 << order;
445 	}
446 
447 	return r;
448 }
449 EXPORT_SYMBOL(ttm_pool_alloc);
450 
451 /**
452  * ttm_pool_free - Free the backing pages from a ttm_tt object
453  *
454  * @pool: Pool to give pages back to.
455  * @tt: ttm_tt object to unpopulate
456  *
457  * Give the packing pages back to a pool or free them
458  */
459 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
460 {
461 	unsigned int i;
462 
463 	for (i = 0; i < tt->num_pages; ) {
464 		struct page *p = tt->pages[i];
465 		unsigned int order, num_pages;
466 		struct ttm_pool_type *pt;
467 
468 		order = ttm_pool_page_order(pool, p);
469 		num_pages = 1ULL << order;
470 		ttm_mem_global_free_page(&ttm_mem_glob, p,
471 					 num_pages * PAGE_SIZE);
472 		if (tt->dma_address)
473 			ttm_pool_unmap(pool, tt->dma_address[i], num_pages);
474 
475 		pt = ttm_pool_select_type(pool, tt->caching, order);
476 		if (pt)
477 			ttm_pool_type_give(pt, tt->pages[i]);
478 		else
479 			ttm_pool_free_page(pool, tt->caching, order,
480 					   tt->pages[i]);
481 
482 		i += num_pages;
483 	}
484 
485 	while (atomic_long_read(&allocated_pages) > page_pool_size)
486 		ttm_pool_shrink();
487 }
488 EXPORT_SYMBOL(ttm_pool_free);
489 
490 /**
491  * ttm_pool_init - Initialize a pool
492  *
493  * @pool: the pool to initialize
494  * @dev: device for DMA allocations and mappings
495  * @use_dma_alloc: true if coherent DMA alloc should be used
496  * @use_dma32: true if GFP_DMA32 should be used
497  *
498  * Initialize the pool and its pool types.
499  */
500 void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
501 		   bool use_dma_alloc, bool use_dma32)
502 {
503 	unsigned int i, j;
504 
505 	WARN_ON(!dev && use_dma_alloc);
506 
507 	pool->dev = dev;
508 	pool->use_dma_alloc = use_dma_alloc;
509 	pool->use_dma32 = use_dma32;
510 
511 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
512 		for (j = 0; j < MAX_ORDER; ++j)
513 			ttm_pool_type_init(&pool->caching[i].orders[j],
514 					   pool, i, j);
515 }
516 EXPORT_SYMBOL(ttm_pool_init);
517 
518 /**
519  * ttm_pool_fini - Cleanup a pool
520  *
521  * @pool: the pool to clean up
522  *
523  * Free all pages in the pool and unregister the types from the global
524  * shrinker.
525  */
526 void ttm_pool_fini(struct ttm_pool *pool)
527 {
528 	unsigned int i, j;
529 
530 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
531 		for (j = 0; j < MAX_ORDER; ++j)
532 			ttm_pool_type_fini(&pool->caching[i].orders[j]);
533 }
534 EXPORT_SYMBOL(ttm_pool_fini);
535 
536 #ifdef CONFIG_DEBUG_FS
537 
538 /* Dump information about the different pool types */
539 static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
540 				    struct seq_file *m)
541 {
542 	unsigned int i;
543 
544 	for (i = 0; i < MAX_ORDER; ++i)
545 		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
546 	seq_puts(m, "\n");
547 }
548 
549 /**
550  * ttm_pool_debugfs - Debugfs dump function for a pool
551  *
552  * @pool: the pool to dump the information for
553  * @m: seq_file to dump to
554  *
555  * Make a debugfs dump with the per pool and global information.
556  */
557 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
558 {
559 	unsigned int i;
560 
561 	spin_lock(&shrinker_lock);
562 
563 	seq_puts(m, "\t ");
564 	for (i = 0; i < MAX_ORDER; ++i)
565 		seq_printf(m, " ---%2u---", i);
566 	seq_puts(m, "\n");
567 
568 	seq_puts(m, "wc\t:");
569 	ttm_pool_debugfs_orders(global_write_combined, m);
570 	seq_puts(m, "uc\t:");
571 	ttm_pool_debugfs_orders(global_uncached, m);
572 
573 	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
574 		seq_puts(m, "DMA ");
575 		switch (i) {
576 		case ttm_cached:
577 			seq_puts(m, "\t:");
578 			break;
579 		case ttm_write_combined:
580 			seq_puts(m, "wc\t:");
581 			break;
582 		case ttm_uncached:
583 			seq_puts(m, "uc\t:");
584 			break;
585 		}
586 		ttm_pool_debugfs_orders(pool->caching[i].orders, m);
587 	}
588 
589 	seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
590 		   atomic_long_read(&allocated_pages), page_pool_size);
591 
592 	spin_unlock(&shrinker_lock);
593 
594 	return 0;
595 }
596 EXPORT_SYMBOL(ttm_pool_debugfs);
597 
598 #endif
599 
600 /* As long as pages are available make sure to release at least one */
601 static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink,
602 					    struct shrink_control *sc)
603 {
604 	unsigned long num_freed = 0;
605 
606 	do
607 		num_freed += ttm_pool_shrink();
608 	while (!num_freed && atomic_long_read(&allocated_pages));
609 
610 	return num_freed;
611 }
612 
613 /* Return the number of pages available or SHRINK_EMPTY if we have none */
614 static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink,
615 					     struct shrink_control *sc)
616 {
617 	unsigned long num_pages = atomic_long_read(&allocated_pages);
618 
619 	return num_pages ? num_pages : SHRINK_EMPTY;
620 }
621 
622 /**
623  * ttm_pool_mgr_init - Initialize globals
624  *
625  * @num_pages: default number of pages
626  *
627  * Initialize the global locks and lists for the MM shrinker.
628  */
629 int ttm_pool_mgr_init(unsigned long num_pages)
630 {
631 	unsigned int i;
632 
633 	if (!page_pool_size)
634 		page_pool_size = num_pages;
635 
636 	spin_lock_init(&shrinker_lock);
637 	INIT_LIST_HEAD(&shrinker_list);
638 
639 	for (i = 0; i < MAX_ORDER; ++i) {
640 		ttm_pool_type_init(&global_write_combined[i], NULL,
641 				   ttm_write_combined, i);
642 		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
643 	}
644 
645 	mm_shrinker.count_objects = ttm_pool_shrinker_count;
646 	mm_shrinker.scan_objects = ttm_pool_shrinker_scan;
647 	mm_shrinker.seeks = 1;
648 	return register_shrinker(&mm_shrinker);
649 }
650 
651 /**
652  * ttm_pool_mgr_fini - Finalize globals
653  *
654  * Cleanup the global pools and unregister the MM shrinker.
655  */
656 void ttm_pool_mgr_fini(void)
657 {
658 	unsigned int i;
659 
660 	for (i = 0; i < MAX_ORDER; ++i) {
661 		ttm_pool_type_fini(&global_write_combined[i]);
662 		ttm_pool_type_fini(&global_uncached[i]);
663 	}
664 
665 	unregister_shrinker(&mm_shrinker);
666 	WARN_ON(!list_empty(&shrinker_list));
667 }
668