xref: /openbmc/linux/mm/zbud.c (revision 285e74ab4f94d921a56fcf66320b3cd65e35b6bc)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * zbud.c
4   *
5   * Copyright (C) 2013, Seth Jennings, IBM
6   *
7   * Concepts based on zcache internal zbud allocator by Dan Magenheimer.
8   *
9   * zbud is an special purpose allocator for storing compressed pages.  Contrary
10   * to what its name may suggest, zbud is not a buddy allocator, but rather an
11   * allocator that "buddies" two compressed pages together in a single memory
12   * page.
13   *
14   * While this design limits storage density, it has simple and deterministic
15   * reclaim properties that make it preferable to a higher density approach when
16   * reclaim will be used.
17   *
18   * zbud works by storing compressed pages, or "zpages", together in pairs in a
19   * single memory page called a "zbud page".  The first buddy is "left
20   * justified" at the beginning of the zbud page, and the last buddy is "right
21   * justified" at the end of the zbud page.  The benefit is that if either
22   * buddy is freed, the freed buddy space, coalesced with whatever slack space
23   * that existed between the buddies, results in the largest possible free region
24   * within the zbud page.
25   *
26   * zbud also provides an attractive lower bound on density. The ratio of zpages
27   * to zbud pages can not be less than 1.  This ensures that zbud can never "do
28   * harm" by using more pages to store zpages than the uncompressed zpages would
29   * have used on their own.
30   *
31   * zbud pages are divided into "chunks".  The size of the chunks is fixed at
32   * compile time and determined by NCHUNKS_ORDER below.  Dividing zbud pages
33   * into chunks allows organizing unbuddied zbud pages into a manageable number
34   * of unbuddied lists according to the number of free chunks available in the
35   * zbud page.
36   *
37   * The zbud API differs from that of conventional allocators in that the
38   * allocation function, zbud_alloc(), returns an opaque handle to the user,
39   * not a dereferenceable pointer.  The user must map the handle using
40   * zbud_map() in order to get a usable pointer by which to access the
41   * allocation data and unmap the handle with zbud_unmap() when operations
42   * on the allocation data are complete.
43   */
44  
45  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
46  
47  #include <linux/atomic.h>
48  #include <linux/list.h>
49  #include <linux/mm.h>
50  #include <linux/module.h>
51  #include <linux/preempt.h>
52  #include <linux/slab.h>
53  #include <linux/spinlock.h>
54  #include <linux/zbud.h>
55  #include <linux/zpool.h>
56  
57  /*****************
58   * Structures
59  *****************/
60  /*
61   * NCHUNKS_ORDER determines the internal allocation granularity, effectively
62   * adjusting internal fragmentation.  It also determines the number of
63   * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
64   * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
65   * in allocated page is occupied by zbud header, NCHUNKS will be calculated to
66   * 63 which shows the max number of free chunks in zbud page, also there will be
67   * 63 freelists per pool.
68   */
69  #define NCHUNKS_ORDER	6
70  
71  #define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
72  #define CHUNK_SIZE	(1 << CHUNK_SHIFT)
73  #define ZHDR_SIZE_ALIGNED CHUNK_SIZE
74  #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
75  
76  /**
77   * struct zbud_pool - stores metadata for each zbud pool
78   * @lock:	protects all pool fields and first|last_chunk fields of any
79   *		zbud page in the pool
80   * @unbuddied:	array of lists tracking zbud pages that only contain one buddy;
81   *		the lists each zbud page is added to depends on the size of
82   *		its free region.
83   * @buddied:	list tracking the zbud pages that contain two buddies;
84   *		these zbud pages are full
85   * @lru:	list tracking the zbud pages in LRU order by most recently
86   *		added buddy.
87   * @pages_nr:	number of zbud pages in the pool.
88   * @ops:	pointer to a structure of user defined operations specified at
89   *		pool creation time.
90   *
91   * This structure is allocated at pool creation time and maintains metadata
92   * pertaining to a particular zbud pool.
93   */
94  struct zbud_pool {
95  	spinlock_t lock;
96  	struct list_head unbuddied[NCHUNKS];
97  	struct list_head buddied;
98  	struct list_head lru;
99  	u64 pages_nr;
100  	const struct zbud_ops *ops;
101  #ifdef CONFIG_ZPOOL
102  	struct zpool *zpool;
103  	const struct zpool_ops *zpool_ops;
104  #endif
105  };
106  
107  /*
108   * struct zbud_header - zbud page metadata occupying the first chunk of each
109   *			zbud page.
110   * @buddy:	links the zbud page into the unbuddied/buddied lists in the pool
111   * @lru:	links the zbud page into the lru list in the pool
112   * @first_chunks:	the size of the first buddy in chunks, 0 if free
113   * @last_chunks:	the size of the last buddy in chunks, 0 if free
114   */
115  struct zbud_header {
116  	struct list_head buddy;
117  	struct list_head lru;
118  	unsigned int first_chunks;
119  	unsigned int last_chunks;
120  	bool under_reclaim;
121  };
122  
123  /*****************
124   * zpool
125   ****************/
126  
127  #ifdef CONFIG_ZPOOL
128  
129  static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
130  {
131  	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
132  		return pool->zpool_ops->evict(pool->zpool, handle);
133  	else
134  		return -ENOENT;
135  }
136  
137  static const struct zbud_ops zbud_zpool_ops = {
138  	.evict =	zbud_zpool_evict
139  };
140  
141  static void *zbud_zpool_create(const char *name, gfp_t gfp,
142  			       const struct zpool_ops *zpool_ops,
143  			       struct zpool *zpool)
144  {
145  	struct zbud_pool *pool;
146  
147  	pool = zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL);
148  	if (pool) {
149  		pool->zpool = zpool;
150  		pool->zpool_ops = zpool_ops;
151  	}
152  	return pool;
153  }
154  
155  static void zbud_zpool_destroy(void *pool)
156  {
157  	zbud_destroy_pool(pool);
158  }
159  
160  static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
161  			unsigned long *handle)
162  {
163  	return zbud_alloc(pool, size, gfp, handle);
164  }
165  static void zbud_zpool_free(void *pool, unsigned long handle)
166  {
167  	zbud_free(pool, handle);
168  }
169  
170  static int zbud_zpool_shrink(void *pool, unsigned int pages,
171  			unsigned int *reclaimed)
172  {
173  	unsigned int total = 0;
174  	int ret = -EINVAL;
175  
176  	while (total < pages) {
177  		ret = zbud_reclaim_page(pool, 8);
178  		if (ret < 0)
179  			break;
180  		total++;
181  	}
182  
183  	if (reclaimed)
184  		*reclaimed = total;
185  
186  	return ret;
187  }
188  
189  static void *zbud_zpool_map(void *pool, unsigned long handle,
190  			enum zpool_mapmode mm)
191  {
192  	return zbud_map(pool, handle);
193  }
194  static void zbud_zpool_unmap(void *pool, unsigned long handle)
195  {
196  	zbud_unmap(pool, handle);
197  }
198  
199  static u64 zbud_zpool_total_size(void *pool)
200  {
201  	return zbud_get_pool_size(pool) * PAGE_SIZE;
202  }
203  
204  static struct zpool_driver zbud_zpool_driver = {
205  	.type =		"zbud",
206  	.owner =	THIS_MODULE,
207  	.create =	zbud_zpool_create,
208  	.destroy =	zbud_zpool_destroy,
209  	.malloc =	zbud_zpool_malloc,
210  	.free =		zbud_zpool_free,
211  	.shrink =	zbud_zpool_shrink,
212  	.map =		zbud_zpool_map,
213  	.unmap =	zbud_zpool_unmap,
214  	.total_size =	zbud_zpool_total_size,
215  };
216  
217  MODULE_ALIAS("zpool-zbud");
218  #endif /* CONFIG_ZPOOL */
219  
220  /*****************
221   * Helpers
222  *****************/
223  /* Just to make the code easier to read */
224  enum buddy {
225  	FIRST,
226  	LAST
227  };
228  
229  /* Converts an allocation size in bytes to size in zbud chunks */
230  static int size_to_chunks(size_t size)
231  {
232  	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
233  }
234  
235  #define for_each_unbuddied_list(_iter, _begin) \
236  	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
237  
238  /* Initializes the zbud header of a newly allocated zbud page */
239  static struct zbud_header *init_zbud_page(struct page *page)
240  {
241  	struct zbud_header *zhdr = page_address(page);
242  	zhdr->first_chunks = 0;
243  	zhdr->last_chunks = 0;
244  	INIT_LIST_HEAD(&zhdr->buddy);
245  	INIT_LIST_HEAD(&zhdr->lru);
246  	zhdr->under_reclaim = false;
247  	return zhdr;
248  }
249  
250  /* Resets the struct page fields and frees the page */
251  static void free_zbud_page(struct zbud_header *zhdr)
252  {
253  	__free_page(virt_to_page(zhdr));
254  }
255  
256  /*
257   * Encodes the handle of a particular buddy within a zbud page
258   * Pool lock should be held as this function accesses first|last_chunks
259   */
260  static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
261  {
262  	unsigned long handle;
263  
264  	/*
265  	 * For now, the encoded handle is actually just the pointer to the data
266  	 * but this might not always be the case.  A little information hiding.
267  	 * Add CHUNK_SIZE to the handle if it is the first allocation to jump
268  	 * over the zbud header in the first chunk.
269  	 */
270  	handle = (unsigned long)zhdr;
271  	if (bud == FIRST)
272  		/* skip over zbud header */
273  		handle += ZHDR_SIZE_ALIGNED;
274  	else /* bud == LAST */
275  		handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
276  	return handle;
277  }
278  
279  /* Returns the zbud page where a given handle is stored */
280  static struct zbud_header *handle_to_zbud_header(unsigned long handle)
281  {
282  	return (struct zbud_header *)(handle & PAGE_MASK);
283  }
284  
285  /* Returns the number of free chunks in a zbud page */
286  static int num_free_chunks(struct zbud_header *zhdr)
287  {
288  	/*
289  	 * Rather than branch for different situations, just use the fact that
290  	 * free buddies have a length of zero to simplify everything.
291  	 */
292  	return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
293  }
294  
295  /*****************
296   * API Functions
297  *****************/
298  /**
299   * zbud_create_pool() - create a new zbud pool
300   * @gfp:	gfp flags when allocating the zbud pool structure
301   * @ops:	user-defined operations for the zbud pool
302   *
303   * Return: pointer to the new zbud pool or NULL if the metadata allocation
304   * failed.
305   */
306  struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops)
307  {
308  	struct zbud_pool *pool;
309  	int i;
310  
311  	pool = kzalloc(sizeof(struct zbud_pool), gfp);
312  	if (!pool)
313  		return NULL;
314  	spin_lock_init(&pool->lock);
315  	for_each_unbuddied_list(i, 0)
316  		INIT_LIST_HEAD(&pool->unbuddied[i]);
317  	INIT_LIST_HEAD(&pool->buddied);
318  	INIT_LIST_HEAD(&pool->lru);
319  	pool->pages_nr = 0;
320  	pool->ops = ops;
321  	return pool;
322  }
323  
324  /**
325   * zbud_destroy_pool() - destroys an existing zbud pool
326   * @pool:	the zbud pool to be destroyed
327   *
328   * The pool should be emptied before this function is called.
329   */
330  void zbud_destroy_pool(struct zbud_pool *pool)
331  {
332  	kfree(pool);
333  }
334  
335  /**
336   * zbud_alloc() - allocates a region of a given size
337   * @pool:	zbud pool from which to allocate
338   * @size:	size in bytes of the desired allocation
339   * @gfp:	gfp flags used if the pool needs to grow
340   * @handle:	handle of the new allocation
341   *
342   * This function will attempt to find a free region in the pool large enough to
343   * satisfy the allocation request.  A search of the unbuddied lists is
344   * performed first. If no suitable free region is found, then a new page is
345   * allocated and added to the pool to satisfy the request.
346   *
347   * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
348   * as zbud pool pages.
349   *
350   * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
351   * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
352   * a new page.
353   */
354  int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
355  			unsigned long *handle)
356  {
357  	int chunks, i, freechunks;
358  	struct zbud_header *zhdr = NULL;
359  	enum buddy bud;
360  	struct page *page;
361  
362  	if (!size || (gfp & __GFP_HIGHMEM))
363  		return -EINVAL;
364  	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
365  		return -ENOSPC;
366  	chunks = size_to_chunks(size);
367  	spin_lock(&pool->lock);
368  
369  	/* First, try to find an unbuddied zbud page. */
370  	zhdr = NULL;
371  	for_each_unbuddied_list(i, chunks) {
372  		if (!list_empty(&pool->unbuddied[i])) {
373  			zhdr = list_first_entry(&pool->unbuddied[i],
374  					struct zbud_header, buddy);
375  			list_del(&zhdr->buddy);
376  			if (zhdr->first_chunks == 0)
377  				bud = FIRST;
378  			else
379  				bud = LAST;
380  			goto found;
381  		}
382  	}
383  
384  	/* Couldn't find unbuddied zbud page, create new one */
385  	spin_unlock(&pool->lock);
386  	page = alloc_page(gfp);
387  	if (!page)
388  		return -ENOMEM;
389  	spin_lock(&pool->lock);
390  	pool->pages_nr++;
391  	zhdr = init_zbud_page(page);
392  	bud = FIRST;
393  
394  found:
395  	if (bud == FIRST)
396  		zhdr->first_chunks = chunks;
397  	else
398  		zhdr->last_chunks = chunks;
399  
400  	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
401  		/* Add to unbuddied list */
402  		freechunks = num_free_chunks(zhdr);
403  		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
404  	} else {
405  		/* Add to buddied list */
406  		list_add(&zhdr->buddy, &pool->buddied);
407  	}
408  
409  	/* Add/move zbud page to beginning of LRU */
410  	if (!list_empty(&zhdr->lru))
411  		list_del(&zhdr->lru);
412  	list_add(&zhdr->lru, &pool->lru);
413  
414  	*handle = encode_handle(zhdr, bud);
415  	spin_unlock(&pool->lock);
416  
417  	return 0;
418  }
419  
420  /**
421   * zbud_free() - frees the allocation associated with the given handle
422   * @pool:	pool in which the allocation resided
423   * @handle:	handle associated with the allocation returned by zbud_alloc()
424   *
425   * In the case that the zbud page in which the allocation resides is under
426   * reclaim, as indicated by the PG_reclaim flag being set, this function
427   * only sets the first|last_chunks to 0.  The page is actually freed
428   * once both buddies are evicted (see zbud_reclaim_page() below).
429   */
430  void zbud_free(struct zbud_pool *pool, unsigned long handle)
431  {
432  	struct zbud_header *zhdr;
433  	int freechunks;
434  
435  	spin_lock(&pool->lock);
436  	zhdr = handle_to_zbud_header(handle);
437  
438  	/* If first buddy, handle will be page aligned */
439  	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
440  		zhdr->last_chunks = 0;
441  	else
442  		zhdr->first_chunks = 0;
443  
444  	if (zhdr->under_reclaim) {
445  		/* zbud page is under reclaim, reclaim will free */
446  		spin_unlock(&pool->lock);
447  		return;
448  	}
449  
450  	/* Remove from existing buddy list */
451  	list_del(&zhdr->buddy);
452  
453  	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
454  		/* zbud page is empty, free */
455  		list_del(&zhdr->lru);
456  		free_zbud_page(zhdr);
457  		pool->pages_nr--;
458  	} else {
459  		/* Add to unbuddied list */
460  		freechunks = num_free_chunks(zhdr);
461  		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
462  	}
463  
464  	spin_unlock(&pool->lock);
465  }
466  
467  /**
468   * zbud_reclaim_page() - evicts allocations from a pool page and frees it
469   * @pool:	pool from which a page will attempt to be evicted
470   * @retries:	number of pages on the LRU list for which eviction will
471   *		be attempted before failing
472   *
473   * zbud reclaim is different from normal system reclaim in that the reclaim is
474   * done from the bottom, up.  This is because only the bottom layer, zbud, has
475   * information on how the allocations are organized within each zbud page. This
476   * has the potential to create interesting locking situations between zbud and
477   * the user, however.
478   *
479   * To avoid these, this is how zbud_reclaim_page() should be called:
480   *
481   * The user detects a page should be reclaimed and calls zbud_reclaim_page().
482   * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
483   * the user-defined eviction handler with the pool and handle as arguments.
484   *
485   * If the handle can not be evicted, the eviction handler should return
486   * non-zero. zbud_reclaim_page() will add the zbud page back to the
487   * appropriate list and try the next zbud page on the LRU up to
488   * a user defined number of retries.
489   *
490   * If the handle is successfully evicted, the eviction handler should
491   * return 0 _and_ should have called zbud_free() on the handle. zbud_free()
492   * contains logic to delay freeing the page if the page is under reclaim,
493   * as indicated by the setting of the PG_reclaim flag on the underlying page.
494   *
495   * If all buddies in the zbud page are successfully evicted, then the
496   * zbud page can be freed.
497   *
498   * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
499   * no pages to evict or an eviction handler is not registered, -EAGAIN if
500   * the retry limit was hit.
501   */
502  int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
503  {
504  	int i, ret, freechunks;
505  	struct zbud_header *zhdr;
506  	unsigned long first_handle = 0, last_handle = 0;
507  
508  	spin_lock(&pool->lock);
509  	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
510  			retries == 0) {
511  		spin_unlock(&pool->lock);
512  		return -EINVAL;
513  	}
514  	for (i = 0; i < retries; i++) {
515  		zhdr = list_last_entry(&pool->lru, struct zbud_header, lru);
516  		list_del(&zhdr->lru);
517  		list_del(&zhdr->buddy);
518  		/* Protect zbud page against free */
519  		zhdr->under_reclaim = true;
520  		/*
521  		 * We need encode the handles before unlocking, since we can
522  		 * race with free that will set (first|last)_chunks to 0
523  		 */
524  		first_handle = 0;
525  		last_handle = 0;
526  		if (zhdr->first_chunks)
527  			first_handle = encode_handle(zhdr, FIRST);
528  		if (zhdr->last_chunks)
529  			last_handle = encode_handle(zhdr, LAST);
530  		spin_unlock(&pool->lock);
531  
532  		/* Issue the eviction callback(s) */
533  		if (first_handle) {
534  			ret = pool->ops->evict(pool, first_handle);
535  			if (ret)
536  				goto next;
537  		}
538  		if (last_handle) {
539  			ret = pool->ops->evict(pool, last_handle);
540  			if (ret)
541  				goto next;
542  		}
543  next:
544  		spin_lock(&pool->lock);
545  		zhdr->under_reclaim = false;
546  		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
547  			/*
548  			 * Both buddies are now free, free the zbud page and
549  			 * return success.
550  			 */
551  			free_zbud_page(zhdr);
552  			pool->pages_nr--;
553  			spin_unlock(&pool->lock);
554  			return 0;
555  		} else if (zhdr->first_chunks == 0 ||
556  				zhdr->last_chunks == 0) {
557  			/* add to unbuddied list */
558  			freechunks = num_free_chunks(zhdr);
559  			list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
560  		} else {
561  			/* add to buddied list */
562  			list_add(&zhdr->buddy, &pool->buddied);
563  		}
564  
565  		/* add to beginning of LRU */
566  		list_add(&zhdr->lru, &pool->lru);
567  	}
568  	spin_unlock(&pool->lock);
569  	return -EAGAIN;
570  }
571  
572  /**
573   * zbud_map() - maps the allocation associated with the given handle
574   * @pool:	pool in which the allocation resides
575   * @handle:	handle associated with the allocation to be mapped
576   *
577   * While trivial for zbud, the mapping functions for others allocators
578   * implementing this allocation API could have more complex information encoded
579   * in the handle and could create temporary mappings to make the data
580   * accessible to the user.
581   *
582   * Returns: a pointer to the mapped allocation
583   */
584  void *zbud_map(struct zbud_pool *pool, unsigned long handle)
585  {
586  	return (void *)(handle);
587  }
588  
589  /**
590   * zbud_unmap() - maps the allocation associated with the given handle
591   * @pool:	pool in which the allocation resides
592   * @handle:	handle associated with the allocation to be unmapped
593   */
594  void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
595  {
596  }
597  
598  /**
599   * zbud_get_pool_size() - gets the zbud pool size in pages
600   * @pool:	pool whose size is being queried
601   *
602   * Returns: size in pages of the given pool.  The pool lock need not be
603   * taken to access pages_nr.
604   */
605  u64 zbud_get_pool_size(struct zbud_pool *pool)
606  {
607  	return pool->pages_nr;
608  }
609  
610  static int __init init_zbud(void)
611  {
612  	/* Make sure the zbud header will fit in one chunk */
613  	BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
614  	pr_info("loaded\n");
615  
616  #ifdef CONFIG_ZPOOL
617  	zpool_register_driver(&zbud_zpool_driver);
618  #endif
619  
620  	return 0;
621  }
622  
623  static void __exit exit_zbud(void)
624  {
625  #ifdef CONFIG_ZPOOL
626  	zpool_unregister_driver(&zbud_zpool_driver);
627  #endif
628  
629  	pr_info("unloaded\n");
630  }
631  
632  module_init(init_zbud);
633  module_exit(exit_zbud);
634  
635  MODULE_LICENSE("GPL");
636  MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
637  MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");
638