1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * z3fold.c 4 * 5 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 6 * Copyright (C) 2016, Sony Mobile Communications Inc. 7 * 8 * This implementation is based on zbud written by Seth Jennings. 9 * 10 * z3fold is an special purpose allocator for storing compressed pages. It 11 * can store up to three compressed pages per page which improves the 12 * compression ratio of zbud while retaining its main concepts (e. g. always 13 * storing an integral number of objects per page) and simplicity. 14 * It still has simple and deterministic reclaim properties that make it 15 * preferable to a higher density approach (with no requirement on integral 16 * number of object per page) when reclaim is used. 17 * 18 * As in zbud, pages are divided into "chunks". The size of the chunks is 19 * fixed at compile time and is determined by NCHUNKS_ORDER below. 20 * 21 * z3fold doesn't export any API and is meant to be used via zpool API. 22 */ 23 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 25 26 #include <linux/atomic.h> 27 #include <linux/sched.h> 28 #include <linux/cpumask.h> 29 #include <linux/list.h> 30 #include <linux/mm.h> 31 #include <linux/module.h> 32 #include <linux/page-flags.h> 33 #include <linux/migrate.h> 34 #include <linux/node.h> 35 #include <linux/compaction.h> 36 #include <linux/percpu.h> 37 #include <linux/mount.h> 38 #include <linux/pseudo_fs.h> 39 #include <linux/fs.h> 40 #include <linux/preempt.h> 41 #include <linux/workqueue.h> 42 #include <linux/slab.h> 43 #include <linux/spinlock.h> 44 #include <linux/zpool.h> 45 #include <linux/magic.h> 46 47 /* 48 * NCHUNKS_ORDER determines the internal allocation granularity, effectively 49 * adjusting internal fragmentation. It also determines the number of 50 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 51 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 52 * in the beginning of an allocated page are occupied by z3fold header, so 53 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 54 * which shows the max number of free chunks in z3fold page, also there will 55 * be 63, or 62, respectively, freelists per pool. 56 */ 57 #define NCHUNKS_ORDER 6 58 59 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 60 #define CHUNK_SIZE (1 << CHUNK_SHIFT) 61 #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 62 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 63 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 64 #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) 65 66 #define BUDDY_MASK (0x3) 67 #define BUDDY_SHIFT 2 68 #define SLOTS_ALIGN (0x40) 69 70 /***************** 71 * Structures 72 *****************/ 73 struct z3fold_pool; 74 struct z3fold_ops { 75 int (*evict)(struct z3fold_pool *pool, unsigned long handle); 76 }; 77 78 enum buddy { 79 HEADLESS = 0, 80 FIRST, 81 MIDDLE, 82 LAST, 83 BUDDIES_MAX = LAST 84 }; 85 86 struct z3fold_buddy_slots { 87 /* 88 * we are using BUDDY_MASK in handle_to_buddy etc. so there should 89 * be enough slots to hold all possible variants 90 */ 91 unsigned long slot[BUDDY_MASK + 1]; 92 unsigned long pool; /* back link + flags */ 93 }; 94 #define HANDLE_FLAG_MASK (0x03) 95 96 /* 97 * struct z3fold_header - z3fold page metadata occupying first chunks of each 98 * z3fold page, except for HEADLESS pages 99 * @buddy: links the z3fold page into the relevant list in the 100 * pool 101 * @page_lock: per-page lock 102 * @refcount: reference count for the z3fold page 103 * @work: work_struct for page layout optimization 104 * @slots: pointer to the structure holding buddy slots 105 * @pool: pointer to the containing pool 106 * @cpu: CPU which this page "belongs" to 107 * @first_chunks: the size of the first buddy in chunks, 0 if free 108 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 109 * @last_chunks: the size of the last buddy in chunks, 0 if free 110 * @first_num: the starting number (for the first handle) 111 * @mapped_count: the number of objects currently mapped 112 */ 113 struct z3fold_header { 114 struct list_head buddy; 115 spinlock_t page_lock; 116 struct kref refcount; 117 struct work_struct work; 118 struct z3fold_buddy_slots *slots; 119 struct z3fold_pool *pool; 120 short cpu; 121 unsigned short first_chunks; 122 unsigned short middle_chunks; 123 unsigned short last_chunks; 124 unsigned short start_middle; 125 unsigned short first_num:2; 126 unsigned short mapped_count:2; 127 }; 128 129 /** 130 * struct z3fold_pool - stores metadata for each z3fold pool 131 * @name: pool name 132 * @lock: protects pool unbuddied/lru lists 133 * @stale_lock: protects pool stale page list 134 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 135 * buddies; the list each z3fold page is added to depends on 136 * the size of its free region. 137 * @lru: list tracking the z3fold pages in LRU order by most recently 138 * added buddy. 139 * @stale: list of pages marked for freeing 140 * @pages_nr: number of z3fold pages in the pool. 141 * @c_handle: cache for z3fold_buddy_slots allocation 142 * @ops: pointer to a structure of user defined operations specified at 143 * pool creation time. 144 * @compact_wq: workqueue for page layout background optimization 145 * @release_wq: workqueue for safe page release 146 * @work: work_struct for safe page release 147 * @inode: inode for z3fold pseudo filesystem 148 * 149 * This structure is allocated at pool creation time and maintains metadata 150 * pertaining to a particular z3fold pool. 151 */ 152 struct z3fold_pool { 153 const char *name; 154 spinlock_t lock; 155 spinlock_t stale_lock; 156 struct list_head *unbuddied; 157 struct list_head lru; 158 struct list_head stale; 159 atomic64_t pages_nr; 160 struct kmem_cache *c_handle; 161 const struct z3fold_ops *ops; 162 struct zpool *zpool; 163 const struct zpool_ops *zpool_ops; 164 struct workqueue_struct *compact_wq; 165 struct workqueue_struct *release_wq; 166 struct work_struct work; 167 struct inode *inode; 168 }; 169 170 /* 171 * Internal z3fold page flags 172 */ 173 enum z3fold_page_flags { 174 PAGE_HEADLESS = 0, 175 MIDDLE_CHUNK_MAPPED, 176 NEEDS_COMPACTING, 177 PAGE_STALE, 178 PAGE_CLAIMED, /* by either reclaim or free */ 179 }; 180 181 /***************** 182 * Helpers 183 *****************/ 184 185 /* Converts an allocation size in bytes to size in z3fold chunks */ 186 static int size_to_chunks(size_t size) 187 { 188 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 189 } 190 191 #define for_each_unbuddied_list(_iter, _begin) \ 192 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 193 194 static void compact_page_work(struct work_struct *w); 195 196 static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 197 gfp_t gfp) 198 { 199 struct z3fold_buddy_slots *slots; 200 201 slots = kmem_cache_alloc(pool->c_handle, 202 (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); 203 204 if (slots) { 205 memset(slots->slot, 0, sizeof(slots->slot)); 206 slots->pool = (unsigned long)pool; 207 } 208 209 return slots; 210 } 211 212 static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 213 { 214 return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 215 } 216 217 static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 218 { 219 return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 220 } 221 222 static inline void free_handle(unsigned long handle) 223 { 224 struct z3fold_buddy_slots *slots; 225 int i; 226 bool is_free; 227 228 if (handle & (1 << PAGE_HEADLESS)) 229 return; 230 231 WARN_ON(*(unsigned long *)handle == 0); 232 *(unsigned long *)handle = 0; 233 slots = handle_to_slots(handle); 234 is_free = true; 235 for (i = 0; i <= BUDDY_MASK; i++) { 236 if (slots->slot[i]) { 237 is_free = false; 238 break; 239 } 240 } 241 242 if (is_free) { 243 struct z3fold_pool *pool = slots_to_pool(slots); 244 245 kmem_cache_free(pool->c_handle, slots); 246 } 247 } 248 249 static int z3fold_init_fs_context(struct fs_context *fc) 250 { 251 return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; 252 } 253 254 static struct file_system_type z3fold_fs = { 255 .name = "z3fold", 256 .init_fs_context = z3fold_init_fs_context, 257 .kill_sb = kill_anon_super, 258 }; 259 260 static struct vfsmount *z3fold_mnt; 261 static int z3fold_mount(void) 262 { 263 int ret = 0; 264 265 z3fold_mnt = kern_mount(&z3fold_fs); 266 if (IS_ERR(z3fold_mnt)) 267 ret = PTR_ERR(z3fold_mnt); 268 269 return ret; 270 } 271 272 static void z3fold_unmount(void) 273 { 274 kern_unmount(z3fold_mnt); 275 } 276 277 static const struct address_space_operations z3fold_aops; 278 static int z3fold_register_migration(struct z3fold_pool *pool) 279 { 280 pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); 281 if (IS_ERR(pool->inode)) { 282 pool->inode = NULL; 283 return 1; 284 } 285 286 pool->inode->i_mapping->private_data = pool; 287 pool->inode->i_mapping->a_ops = &z3fold_aops; 288 return 0; 289 } 290 291 static void z3fold_unregister_migration(struct z3fold_pool *pool) 292 { 293 if (pool->inode) 294 iput(pool->inode); 295 } 296 297 /* Initializes the z3fold header of a newly allocated z3fold page */ 298 static struct z3fold_header *init_z3fold_page(struct page *page, 299 struct z3fold_pool *pool, gfp_t gfp) 300 { 301 struct z3fold_header *zhdr = page_address(page); 302 struct z3fold_buddy_slots *slots = alloc_slots(pool, gfp); 303 304 if (!slots) 305 return NULL; 306 307 INIT_LIST_HEAD(&page->lru); 308 clear_bit(PAGE_HEADLESS, &page->private); 309 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 310 clear_bit(NEEDS_COMPACTING, &page->private); 311 clear_bit(PAGE_STALE, &page->private); 312 clear_bit(PAGE_CLAIMED, &page->private); 313 314 spin_lock_init(&zhdr->page_lock); 315 kref_init(&zhdr->refcount); 316 zhdr->first_chunks = 0; 317 zhdr->middle_chunks = 0; 318 zhdr->last_chunks = 0; 319 zhdr->first_num = 0; 320 zhdr->start_middle = 0; 321 zhdr->cpu = -1; 322 zhdr->slots = slots; 323 zhdr->pool = pool; 324 INIT_LIST_HEAD(&zhdr->buddy); 325 INIT_WORK(&zhdr->work, compact_page_work); 326 return zhdr; 327 } 328 329 /* Resets the struct page fields and frees the page */ 330 static void free_z3fold_page(struct page *page, bool headless) 331 { 332 if (!headless) { 333 lock_page(page); 334 __ClearPageMovable(page); 335 unlock_page(page); 336 } 337 ClearPagePrivate(page); 338 __free_page(page); 339 } 340 341 /* Lock a z3fold page */ 342 static inline void z3fold_page_lock(struct z3fold_header *zhdr) 343 { 344 spin_lock(&zhdr->page_lock); 345 } 346 347 /* Try to lock a z3fold page */ 348 static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 349 { 350 return spin_trylock(&zhdr->page_lock); 351 } 352 353 /* Unlock a z3fold page */ 354 static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 355 { 356 spin_unlock(&zhdr->page_lock); 357 } 358 359 /* Helper function to build the index */ 360 static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 361 { 362 return (bud + zhdr->first_num) & BUDDY_MASK; 363 } 364 365 /* 366 * Encodes the handle of a particular buddy within a z3fold page 367 * Pool lock should be held as this function accesses first_num 368 */ 369 static unsigned long __encode_handle(struct z3fold_header *zhdr, 370 struct z3fold_buddy_slots *slots, 371 enum buddy bud) 372 { 373 unsigned long h = (unsigned long)zhdr; 374 int idx = 0; 375 376 /* 377 * For a headless page, its handle is its pointer with the extra 378 * PAGE_HEADLESS bit set 379 */ 380 if (bud == HEADLESS) 381 return h | (1 << PAGE_HEADLESS); 382 383 /* otherwise, return pointer to encoded handle */ 384 idx = __idx(zhdr, bud); 385 h += idx; 386 if (bud == LAST) 387 h |= (zhdr->last_chunks << BUDDY_SHIFT); 388 389 slots->slot[idx] = h; 390 return (unsigned long)&slots->slot[idx]; 391 } 392 393 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 394 { 395 return __encode_handle(zhdr, zhdr->slots, bud); 396 } 397 398 /* Returns the z3fold page where a given handle is stored */ 399 static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h) 400 { 401 unsigned long addr = h; 402 403 if (!(addr & (1 << PAGE_HEADLESS))) 404 addr = *(unsigned long *)h; 405 406 return (struct z3fold_header *)(addr & PAGE_MASK); 407 } 408 409 /* only for LAST bud, returns zero otherwise */ 410 static unsigned short handle_to_chunks(unsigned long handle) 411 { 412 unsigned long addr = *(unsigned long *)handle; 413 414 return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 415 } 416 417 /* 418 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 419 * but that doesn't matter. because the masking will result in the 420 * correct buddy number. 421 */ 422 static enum buddy handle_to_buddy(unsigned long handle) 423 { 424 struct z3fold_header *zhdr; 425 unsigned long addr; 426 427 WARN_ON(handle & (1 << PAGE_HEADLESS)); 428 addr = *(unsigned long *)handle; 429 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 430 return (addr - zhdr->first_num) & BUDDY_MASK; 431 } 432 433 static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 434 { 435 return zhdr->pool; 436 } 437 438 static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 439 { 440 struct page *page = virt_to_page(zhdr); 441 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 442 443 WARN_ON(!list_empty(&zhdr->buddy)); 444 set_bit(PAGE_STALE, &page->private); 445 clear_bit(NEEDS_COMPACTING, &page->private); 446 spin_lock(&pool->lock); 447 if (!list_empty(&page->lru)) 448 list_del_init(&page->lru); 449 spin_unlock(&pool->lock); 450 if (locked) 451 z3fold_page_unlock(zhdr); 452 spin_lock(&pool->stale_lock); 453 list_add(&zhdr->buddy, &pool->stale); 454 queue_work(pool->release_wq, &pool->work); 455 spin_unlock(&pool->stale_lock); 456 } 457 458 static void __attribute__((__unused__)) 459 release_z3fold_page(struct kref *ref) 460 { 461 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 462 refcount); 463 __release_z3fold_page(zhdr, false); 464 } 465 466 static void release_z3fold_page_locked(struct kref *ref) 467 { 468 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 469 refcount); 470 WARN_ON(z3fold_page_trylock(zhdr)); 471 __release_z3fold_page(zhdr, true); 472 } 473 474 static void release_z3fold_page_locked_list(struct kref *ref) 475 { 476 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 477 refcount); 478 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 479 spin_lock(&pool->lock); 480 list_del_init(&zhdr->buddy); 481 spin_unlock(&pool->lock); 482 483 WARN_ON(z3fold_page_trylock(zhdr)); 484 __release_z3fold_page(zhdr, true); 485 } 486 487 static void free_pages_work(struct work_struct *w) 488 { 489 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 490 491 spin_lock(&pool->stale_lock); 492 while (!list_empty(&pool->stale)) { 493 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 494 struct z3fold_header, buddy); 495 struct page *page = virt_to_page(zhdr); 496 497 list_del(&zhdr->buddy); 498 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 499 continue; 500 spin_unlock(&pool->stale_lock); 501 cancel_work_sync(&zhdr->work); 502 free_z3fold_page(page, false); 503 cond_resched(); 504 spin_lock(&pool->stale_lock); 505 } 506 spin_unlock(&pool->stale_lock); 507 } 508 509 /* 510 * Returns the number of free chunks in a z3fold page. 511 * NB: can't be used with HEADLESS pages. 512 */ 513 static int num_free_chunks(struct z3fold_header *zhdr) 514 { 515 int nfree; 516 /* 517 * If there is a middle object, pick up the bigger free space 518 * either before or after it. Otherwise just subtract the number 519 * of chunks occupied by the first and the last objects. 520 */ 521 if (zhdr->middle_chunks != 0) { 522 int nfree_before = zhdr->first_chunks ? 523 0 : zhdr->start_middle - ZHDR_CHUNKS; 524 int nfree_after = zhdr->last_chunks ? 525 0 : TOTAL_CHUNKS - 526 (zhdr->start_middle + zhdr->middle_chunks); 527 nfree = max(nfree_before, nfree_after); 528 } else 529 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 530 return nfree; 531 } 532 533 /* Add to the appropriate unbuddied list */ 534 static inline void add_to_unbuddied(struct z3fold_pool *pool, 535 struct z3fold_header *zhdr) 536 { 537 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 538 zhdr->middle_chunks == 0) { 539 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); 540 541 int freechunks = num_free_chunks(zhdr); 542 spin_lock(&pool->lock); 543 list_add(&zhdr->buddy, &unbuddied[freechunks]); 544 spin_unlock(&pool->lock); 545 zhdr->cpu = smp_processor_id(); 546 put_cpu_ptr(pool->unbuddied); 547 } 548 } 549 550 static inline void *mchunk_memmove(struct z3fold_header *zhdr, 551 unsigned short dst_chunk) 552 { 553 void *beg = zhdr; 554 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 555 beg + (zhdr->start_middle << CHUNK_SHIFT), 556 zhdr->middle_chunks << CHUNK_SHIFT); 557 } 558 559 #define BIG_CHUNK_GAP 3 560 /* Has to be called with lock held */ 561 static int z3fold_compact_page(struct z3fold_header *zhdr) 562 { 563 struct page *page = virt_to_page(zhdr); 564 565 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 566 return 0; /* can't move middle chunk, it's used */ 567 568 if (unlikely(PageIsolated(page))) 569 return 0; 570 571 if (zhdr->middle_chunks == 0) 572 return 0; /* nothing to compact */ 573 574 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 575 /* move to the beginning */ 576 mchunk_memmove(zhdr, ZHDR_CHUNKS); 577 zhdr->first_chunks = zhdr->middle_chunks; 578 zhdr->middle_chunks = 0; 579 zhdr->start_middle = 0; 580 zhdr->first_num++; 581 return 1; 582 } 583 584 /* 585 * moving data is expensive, so let's only do that if 586 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 587 */ 588 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 589 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 590 BIG_CHUNK_GAP) { 591 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 592 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 593 return 1; 594 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 595 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 596 + zhdr->middle_chunks) >= 597 BIG_CHUNK_GAP) { 598 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 599 zhdr->middle_chunks; 600 mchunk_memmove(zhdr, new_start); 601 zhdr->start_middle = new_start; 602 return 1; 603 } 604 605 return 0; 606 } 607 608 static void do_compact_page(struct z3fold_header *zhdr, bool locked) 609 { 610 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 611 struct page *page; 612 613 page = virt_to_page(zhdr); 614 if (locked) 615 WARN_ON(z3fold_page_trylock(zhdr)); 616 else 617 z3fold_page_lock(zhdr); 618 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 619 z3fold_page_unlock(zhdr); 620 return; 621 } 622 spin_lock(&pool->lock); 623 list_del_init(&zhdr->buddy); 624 spin_unlock(&pool->lock); 625 626 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 627 atomic64_dec(&pool->pages_nr); 628 return; 629 } 630 631 if (unlikely(PageIsolated(page) || 632 test_bit(PAGE_CLAIMED, &page->private) || 633 test_bit(PAGE_STALE, &page->private))) { 634 z3fold_page_unlock(zhdr); 635 return; 636 } 637 638 z3fold_compact_page(zhdr); 639 add_to_unbuddied(pool, zhdr); 640 z3fold_page_unlock(zhdr); 641 } 642 643 static void compact_page_work(struct work_struct *w) 644 { 645 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 646 work); 647 648 do_compact_page(zhdr, false); 649 } 650 651 /* returns _locked_ z3fold page header or NULL */ 652 static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 653 size_t size, bool can_sleep) 654 { 655 struct z3fold_header *zhdr = NULL; 656 struct page *page; 657 struct list_head *unbuddied; 658 int chunks = size_to_chunks(size), i; 659 660 lookup: 661 /* First, try to find an unbuddied z3fold page. */ 662 unbuddied = get_cpu_ptr(pool->unbuddied); 663 for_each_unbuddied_list(i, chunks) { 664 struct list_head *l = &unbuddied[i]; 665 666 zhdr = list_first_entry_or_null(READ_ONCE(l), 667 struct z3fold_header, buddy); 668 669 if (!zhdr) 670 continue; 671 672 /* Re-check under lock. */ 673 spin_lock(&pool->lock); 674 l = &unbuddied[i]; 675 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 676 struct z3fold_header, buddy)) || 677 !z3fold_page_trylock(zhdr)) { 678 spin_unlock(&pool->lock); 679 zhdr = NULL; 680 put_cpu_ptr(pool->unbuddied); 681 if (can_sleep) 682 cond_resched(); 683 goto lookup; 684 } 685 list_del_init(&zhdr->buddy); 686 zhdr->cpu = -1; 687 spin_unlock(&pool->lock); 688 689 page = virt_to_page(zhdr); 690 if (test_bit(NEEDS_COMPACTING, &page->private)) { 691 z3fold_page_unlock(zhdr); 692 zhdr = NULL; 693 put_cpu_ptr(pool->unbuddied); 694 if (can_sleep) 695 cond_resched(); 696 goto lookup; 697 } 698 699 /* 700 * this page could not be removed from its unbuddied 701 * list while pool lock was held, and then we've taken 702 * page lock so kref_put could not be called before 703 * we got here, so it's safe to just call kref_get() 704 */ 705 kref_get(&zhdr->refcount); 706 break; 707 } 708 put_cpu_ptr(pool->unbuddied); 709 710 if (!zhdr) { 711 int cpu; 712 713 /* look for _exact_ match on other cpus' lists */ 714 for_each_online_cpu(cpu) { 715 struct list_head *l; 716 717 unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 718 spin_lock(&pool->lock); 719 l = &unbuddied[chunks]; 720 721 zhdr = list_first_entry_or_null(READ_ONCE(l), 722 struct z3fold_header, buddy); 723 724 if (!zhdr || !z3fold_page_trylock(zhdr)) { 725 spin_unlock(&pool->lock); 726 zhdr = NULL; 727 continue; 728 } 729 list_del_init(&zhdr->buddy); 730 zhdr->cpu = -1; 731 spin_unlock(&pool->lock); 732 733 page = virt_to_page(zhdr); 734 if (test_bit(NEEDS_COMPACTING, &page->private)) { 735 z3fold_page_unlock(zhdr); 736 zhdr = NULL; 737 if (can_sleep) 738 cond_resched(); 739 continue; 740 } 741 kref_get(&zhdr->refcount); 742 break; 743 } 744 } 745 746 return zhdr; 747 } 748 749 /* 750 * API Functions 751 */ 752 753 /** 754 * z3fold_create_pool() - create a new z3fold pool 755 * @name: pool name 756 * @gfp: gfp flags when allocating the z3fold pool structure 757 * @ops: user-defined operations for the z3fold pool 758 * 759 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 760 * failed. 761 */ 762 static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 763 const struct z3fold_ops *ops) 764 { 765 struct z3fold_pool *pool = NULL; 766 int i, cpu; 767 768 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 769 if (!pool) 770 goto out; 771 pool->c_handle = kmem_cache_create("z3fold_handle", 772 sizeof(struct z3fold_buddy_slots), 773 SLOTS_ALIGN, 0, NULL); 774 if (!pool->c_handle) 775 goto out_c; 776 spin_lock_init(&pool->lock); 777 spin_lock_init(&pool->stale_lock); 778 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 779 if (!pool->unbuddied) 780 goto out_pool; 781 for_each_possible_cpu(cpu) { 782 struct list_head *unbuddied = 783 per_cpu_ptr(pool->unbuddied, cpu); 784 for_each_unbuddied_list(i, 0) 785 INIT_LIST_HEAD(&unbuddied[i]); 786 } 787 INIT_LIST_HEAD(&pool->lru); 788 INIT_LIST_HEAD(&pool->stale); 789 atomic64_set(&pool->pages_nr, 0); 790 pool->name = name; 791 pool->compact_wq = create_singlethread_workqueue(pool->name); 792 if (!pool->compact_wq) 793 goto out_unbuddied; 794 pool->release_wq = create_singlethread_workqueue(pool->name); 795 if (!pool->release_wq) 796 goto out_wq; 797 if (z3fold_register_migration(pool)) 798 goto out_rwq; 799 INIT_WORK(&pool->work, free_pages_work); 800 pool->ops = ops; 801 return pool; 802 803 out_rwq: 804 destroy_workqueue(pool->release_wq); 805 out_wq: 806 destroy_workqueue(pool->compact_wq); 807 out_unbuddied: 808 free_percpu(pool->unbuddied); 809 out_pool: 810 kmem_cache_destroy(pool->c_handle); 811 out_c: 812 kfree(pool); 813 out: 814 return NULL; 815 } 816 817 /** 818 * z3fold_destroy_pool() - destroys an existing z3fold pool 819 * @pool: the z3fold pool to be destroyed 820 * 821 * The pool should be emptied before this function is called. 822 */ 823 static void z3fold_destroy_pool(struct z3fold_pool *pool) 824 { 825 kmem_cache_destroy(pool->c_handle); 826 827 /* 828 * We need to destroy pool->compact_wq before pool->release_wq, 829 * as any pending work on pool->compact_wq will call 830 * queue_work(pool->release_wq, &pool->work). 831 * 832 * There are still outstanding pages until both workqueues are drained, 833 * so we cannot unregister migration until then. 834 */ 835 836 destroy_workqueue(pool->compact_wq); 837 destroy_workqueue(pool->release_wq); 838 z3fold_unregister_migration(pool); 839 kfree(pool); 840 } 841 842 /** 843 * z3fold_alloc() - allocates a region of a given size 844 * @pool: z3fold pool from which to allocate 845 * @size: size in bytes of the desired allocation 846 * @gfp: gfp flags used if the pool needs to grow 847 * @handle: handle of the new allocation 848 * 849 * This function will attempt to find a free region in the pool large enough to 850 * satisfy the allocation request. A search of the unbuddied lists is 851 * performed first. If no suitable free region is found, then a new page is 852 * allocated and added to the pool to satisfy the request. 853 * 854 * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used 855 * as z3fold pool pages. 856 * 857 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 858 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 859 * a new page. 860 */ 861 static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 862 unsigned long *handle) 863 { 864 int chunks = size_to_chunks(size); 865 struct z3fold_header *zhdr = NULL; 866 struct page *page = NULL; 867 enum buddy bud; 868 bool can_sleep = gfpflags_allow_blocking(gfp); 869 870 if (!size) 871 return -EINVAL; 872 873 if (size > PAGE_SIZE) 874 return -ENOSPC; 875 876 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 877 bud = HEADLESS; 878 else { 879 retry: 880 zhdr = __z3fold_alloc(pool, size, can_sleep); 881 if (zhdr) { 882 if (zhdr->first_chunks == 0) { 883 if (zhdr->middle_chunks != 0 && 884 chunks >= zhdr->start_middle) 885 bud = LAST; 886 else 887 bud = FIRST; 888 } else if (zhdr->last_chunks == 0) 889 bud = LAST; 890 else if (zhdr->middle_chunks == 0) 891 bud = MIDDLE; 892 else { 893 if (kref_put(&zhdr->refcount, 894 release_z3fold_page_locked)) 895 atomic64_dec(&pool->pages_nr); 896 else 897 z3fold_page_unlock(zhdr); 898 pr_err("No free chunks in unbuddied\n"); 899 WARN_ON(1); 900 goto retry; 901 } 902 page = virt_to_page(zhdr); 903 goto found; 904 } 905 bud = FIRST; 906 } 907 908 page = NULL; 909 if (can_sleep) { 910 spin_lock(&pool->stale_lock); 911 zhdr = list_first_entry_or_null(&pool->stale, 912 struct z3fold_header, buddy); 913 /* 914 * Before allocating a page, let's see if we can take one from 915 * the stale pages list. cancel_work_sync() can sleep so we 916 * limit this case to the contexts where we can sleep 917 */ 918 if (zhdr) { 919 list_del(&zhdr->buddy); 920 spin_unlock(&pool->stale_lock); 921 cancel_work_sync(&zhdr->work); 922 page = virt_to_page(zhdr); 923 } else { 924 spin_unlock(&pool->stale_lock); 925 } 926 } 927 if (!page) 928 page = alloc_page(gfp); 929 930 if (!page) 931 return -ENOMEM; 932 933 zhdr = init_z3fold_page(page, pool, gfp); 934 if (!zhdr) { 935 __free_page(page); 936 return -ENOMEM; 937 } 938 atomic64_inc(&pool->pages_nr); 939 940 if (bud == HEADLESS) { 941 set_bit(PAGE_HEADLESS, &page->private); 942 goto headless; 943 } 944 if (can_sleep) { 945 lock_page(page); 946 __SetPageMovable(page, pool->inode->i_mapping); 947 unlock_page(page); 948 } else { 949 if (trylock_page(page)) { 950 __SetPageMovable(page, pool->inode->i_mapping); 951 unlock_page(page); 952 } 953 } 954 z3fold_page_lock(zhdr); 955 956 found: 957 if (bud == FIRST) 958 zhdr->first_chunks = chunks; 959 else if (bud == LAST) 960 zhdr->last_chunks = chunks; 961 else { 962 zhdr->middle_chunks = chunks; 963 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 964 } 965 add_to_unbuddied(pool, zhdr); 966 967 headless: 968 spin_lock(&pool->lock); 969 /* Add/move z3fold page to beginning of LRU */ 970 if (!list_empty(&page->lru)) 971 list_del(&page->lru); 972 973 list_add(&page->lru, &pool->lru); 974 975 *handle = encode_handle(zhdr, bud); 976 spin_unlock(&pool->lock); 977 if (bud != HEADLESS) 978 z3fold_page_unlock(zhdr); 979 980 return 0; 981 } 982 983 /** 984 * z3fold_free() - frees the allocation associated with the given handle 985 * @pool: pool in which the allocation resided 986 * @handle: handle associated with the allocation returned by z3fold_alloc() 987 * 988 * In the case that the z3fold page in which the allocation resides is under 989 * reclaim, as indicated by the PG_reclaim flag being set, this function 990 * only sets the first|last_chunks to 0. The page is actually freed 991 * once both buddies are evicted (see z3fold_reclaim_page() below). 992 */ 993 static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 994 { 995 struct z3fold_header *zhdr; 996 struct page *page; 997 enum buddy bud; 998 999 zhdr = handle_to_z3fold_header(handle); 1000 page = virt_to_page(zhdr); 1001 1002 if (test_bit(PAGE_HEADLESS, &page->private)) { 1003 /* if a headless page is under reclaim, just leave. 1004 * NB: we use test_and_set_bit for a reason: if the bit 1005 * has not been set before, we release this page 1006 * immediately so we don't care about its value any more. 1007 */ 1008 if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1009 spin_lock(&pool->lock); 1010 list_del(&page->lru); 1011 spin_unlock(&pool->lock); 1012 free_z3fold_page(page, true); 1013 atomic64_dec(&pool->pages_nr); 1014 } 1015 return; 1016 } 1017 1018 /* Non-headless case */ 1019 z3fold_page_lock(zhdr); 1020 bud = handle_to_buddy(handle); 1021 1022 switch (bud) { 1023 case FIRST: 1024 zhdr->first_chunks = 0; 1025 break; 1026 case MIDDLE: 1027 zhdr->middle_chunks = 0; 1028 break; 1029 case LAST: 1030 zhdr->last_chunks = 0; 1031 break; 1032 default: 1033 pr_err("%s: unknown bud %d\n", __func__, bud); 1034 WARN_ON(1); 1035 z3fold_page_unlock(zhdr); 1036 return; 1037 } 1038 1039 free_handle(handle); 1040 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 1041 atomic64_dec(&pool->pages_nr); 1042 return; 1043 } 1044 if (test_bit(PAGE_CLAIMED, &page->private)) { 1045 z3fold_page_unlock(zhdr); 1046 return; 1047 } 1048 if (unlikely(PageIsolated(page)) || 1049 test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1050 z3fold_page_unlock(zhdr); 1051 return; 1052 } 1053 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1054 spin_lock(&pool->lock); 1055 list_del_init(&zhdr->buddy); 1056 spin_unlock(&pool->lock); 1057 zhdr->cpu = -1; 1058 kref_get(&zhdr->refcount); 1059 do_compact_page(zhdr, true); 1060 return; 1061 } 1062 kref_get(&zhdr->refcount); 1063 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1064 z3fold_page_unlock(zhdr); 1065 } 1066 1067 /** 1068 * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 1069 * @pool: pool from which a page will attempt to be evicted 1070 * @retries: number of pages on the LRU list for which eviction will 1071 * be attempted before failing 1072 * 1073 * z3fold reclaim is different from normal system reclaim in that it is done 1074 * from the bottom, up. This is because only the bottom layer, z3fold, has 1075 * information on how the allocations are organized within each z3fold page. 1076 * This has the potential to create interesting locking situations between 1077 * z3fold and the user, however. 1078 * 1079 * To avoid these, this is how z3fold_reclaim_page() should be called: 1080 * 1081 * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 1082 * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 1083 * call the user-defined eviction handler with the pool and handle as 1084 * arguments. 1085 * 1086 * If the handle can not be evicted, the eviction handler should return 1087 * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 1088 * appropriate list and try the next z3fold page on the LRU up to 1089 * a user defined number of retries. 1090 * 1091 * If the handle is successfully evicted, the eviction handler should 1092 * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 1093 * contains logic to delay freeing the page if the page is under reclaim, 1094 * as indicated by the setting of the PG_reclaim flag on the underlying page. 1095 * 1096 * If all buddies in the z3fold page are successfully evicted, then the 1097 * z3fold page can be freed. 1098 * 1099 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 1100 * no pages to evict or an eviction handler is not registered, -EAGAIN if 1101 * the retry limit was hit. 1102 */ 1103 static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 1104 { 1105 int i, ret = 0; 1106 struct z3fold_header *zhdr = NULL; 1107 struct page *page = NULL; 1108 struct list_head *pos; 1109 struct z3fold_buddy_slots slots; 1110 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 1111 1112 spin_lock(&pool->lock); 1113 if (!pool->ops || !pool->ops->evict || retries == 0) { 1114 spin_unlock(&pool->lock); 1115 return -EINVAL; 1116 } 1117 for (i = 0; i < retries; i++) { 1118 if (list_empty(&pool->lru)) { 1119 spin_unlock(&pool->lock); 1120 return -EINVAL; 1121 } 1122 list_for_each_prev(pos, &pool->lru) { 1123 page = list_entry(pos, struct page, lru); 1124 1125 /* this bit could have been set by free, in which case 1126 * we pass over to the next page in the pool. 1127 */ 1128 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1129 page = NULL; 1130 continue; 1131 } 1132 1133 if (unlikely(PageIsolated(page))) { 1134 clear_bit(PAGE_CLAIMED, &page->private); 1135 page = NULL; 1136 continue; 1137 } 1138 zhdr = page_address(page); 1139 if (test_bit(PAGE_HEADLESS, &page->private)) 1140 break; 1141 1142 if (!z3fold_page_trylock(zhdr)) { 1143 clear_bit(PAGE_CLAIMED, &page->private); 1144 zhdr = NULL; 1145 continue; /* can't evict at this point */ 1146 } 1147 kref_get(&zhdr->refcount); 1148 list_del_init(&zhdr->buddy); 1149 zhdr->cpu = -1; 1150 break; 1151 } 1152 1153 if (!zhdr) 1154 break; 1155 1156 list_del_init(&page->lru); 1157 spin_unlock(&pool->lock); 1158 1159 if (!test_bit(PAGE_HEADLESS, &page->private)) { 1160 /* 1161 * We need encode the handles before unlocking, and 1162 * use our local slots structure because z3fold_free 1163 * can zero out zhdr->slots and we can't do much 1164 * about that 1165 */ 1166 first_handle = 0; 1167 last_handle = 0; 1168 middle_handle = 0; 1169 if (zhdr->first_chunks) 1170 first_handle = __encode_handle(zhdr, &slots, 1171 FIRST); 1172 if (zhdr->middle_chunks) 1173 middle_handle = __encode_handle(zhdr, &slots, 1174 MIDDLE); 1175 if (zhdr->last_chunks) 1176 last_handle = __encode_handle(zhdr, &slots, 1177 LAST); 1178 /* 1179 * it's safe to unlock here because we hold a 1180 * reference to this page 1181 */ 1182 z3fold_page_unlock(zhdr); 1183 } else { 1184 first_handle = __encode_handle(zhdr, &slots, HEADLESS); 1185 last_handle = middle_handle = 0; 1186 } 1187 1188 /* Issue the eviction callback(s) */ 1189 if (middle_handle) { 1190 ret = pool->ops->evict(pool, middle_handle); 1191 if (ret) 1192 goto next; 1193 } 1194 if (first_handle) { 1195 ret = pool->ops->evict(pool, first_handle); 1196 if (ret) 1197 goto next; 1198 } 1199 if (last_handle) { 1200 ret = pool->ops->evict(pool, last_handle); 1201 if (ret) 1202 goto next; 1203 } 1204 next: 1205 if (test_bit(PAGE_HEADLESS, &page->private)) { 1206 if (ret == 0) { 1207 free_z3fold_page(page, true); 1208 atomic64_dec(&pool->pages_nr); 1209 return 0; 1210 } 1211 spin_lock(&pool->lock); 1212 list_add(&page->lru, &pool->lru); 1213 spin_unlock(&pool->lock); 1214 clear_bit(PAGE_CLAIMED, &page->private); 1215 } else { 1216 z3fold_page_lock(zhdr); 1217 if (kref_put(&zhdr->refcount, 1218 release_z3fold_page_locked)) { 1219 atomic64_dec(&pool->pages_nr); 1220 return 0; 1221 } 1222 /* 1223 * if we are here, the page is still not completely 1224 * free. Take the global pool lock then to be able 1225 * to add it back to the lru list 1226 */ 1227 spin_lock(&pool->lock); 1228 list_add(&page->lru, &pool->lru); 1229 spin_unlock(&pool->lock); 1230 z3fold_page_unlock(zhdr); 1231 clear_bit(PAGE_CLAIMED, &page->private); 1232 } 1233 1234 /* We started off locked to we need to lock the pool back */ 1235 spin_lock(&pool->lock); 1236 } 1237 spin_unlock(&pool->lock); 1238 return -EAGAIN; 1239 } 1240 1241 /** 1242 * z3fold_map() - maps the allocation associated with the given handle 1243 * @pool: pool in which the allocation resides 1244 * @handle: handle associated with the allocation to be mapped 1245 * 1246 * Extracts the buddy number from handle and constructs the pointer to the 1247 * correct starting chunk within the page. 1248 * 1249 * Returns: a pointer to the mapped allocation 1250 */ 1251 static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 1252 { 1253 struct z3fold_header *zhdr; 1254 struct page *page; 1255 void *addr; 1256 enum buddy buddy; 1257 1258 zhdr = handle_to_z3fold_header(handle); 1259 addr = zhdr; 1260 page = virt_to_page(zhdr); 1261 1262 if (test_bit(PAGE_HEADLESS, &page->private)) 1263 goto out; 1264 1265 z3fold_page_lock(zhdr); 1266 buddy = handle_to_buddy(handle); 1267 switch (buddy) { 1268 case FIRST: 1269 addr += ZHDR_SIZE_ALIGNED; 1270 break; 1271 case MIDDLE: 1272 addr += zhdr->start_middle << CHUNK_SHIFT; 1273 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1274 break; 1275 case LAST: 1276 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 1277 break; 1278 default: 1279 pr_err("unknown buddy id %d\n", buddy); 1280 WARN_ON(1); 1281 addr = NULL; 1282 break; 1283 } 1284 1285 if (addr) 1286 zhdr->mapped_count++; 1287 z3fold_page_unlock(zhdr); 1288 out: 1289 return addr; 1290 } 1291 1292 /** 1293 * z3fold_unmap() - unmaps the allocation associated with the given handle 1294 * @pool: pool in which the allocation resides 1295 * @handle: handle associated with the allocation to be unmapped 1296 */ 1297 static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1298 { 1299 struct z3fold_header *zhdr; 1300 struct page *page; 1301 enum buddy buddy; 1302 1303 zhdr = handle_to_z3fold_header(handle); 1304 page = virt_to_page(zhdr); 1305 1306 if (test_bit(PAGE_HEADLESS, &page->private)) 1307 return; 1308 1309 z3fold_page_lock(zhdr); 1310 buddy = handle_to_buddy(handle); 1311 if (buddy == MIDDLE) 1312 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1313 zhdr->mapped_count--; 1314 z3fold_page_unlock(zhdr); 1315 } 1316 1317 /** 1318 * z3fold_get_pool_size() - gets the z3fold pool size in pages 1319 * @pool: pool whose size is being queried 1320 * 1321 * Returns: size in pages of the given pool. 1322 */ 1323 static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 1324 { 1325 return atomic64_read(&pool->pages_nr); 1326 } 1327 1328 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 1329 { 1330 struct z3fold_header *zhdr; 1331 struct z3fold_pool *pool; 1332 1333 VM_BUG_ON_PAGE(!PageMovable(page), page); 1334 VM_BUG_ON_PAGE(PageIsolated(page), page); 1335 1336 if (test_bit(PAGE_HEADLESS, &page->private) || 1337 test_bit(PAGE_CLAIMED, &page->private)) 1338 return false; 1339 1340 zhdr = page_address(page); 1341 z3fold_page_lock(zhdr); 1342 if (test_bit(NEEDS_COMPACTING, &page->private) || 1343 test_bit(PAGE_STALE, &page->private)) 1344 goto out; 1345 1346 pool = zhdr_to_pool(zhdr); 1347 1348 if (zhdr->mapped_count == 0) { 1349 kref_get(&zhdr->refcount); 1350 if (!list_empty(&zhdr->buddy)) 1351 list_del_init(&zhdr->buddy); 1352 spin_lock(&pool->lock); 1353 if (!list_empty(&page->lru)) 1354 list_del(&page->lru); 1355 spin_unlock(&pool->lock); 1356 z3fold_page_unlock(zhdr); 1357 return true; 1358 } 1359 out: 1360 z3fold_page_unlock(zhdr); 1361 return false; 1362 } 1363 1364 static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, 1365 struct page *page, enum migrate_mode mode) 1366 { 1367 struct z3fold_header *zhdr, *new_zhdr; 1368 struct z3fold_pool *pool; 1369 struct address_space *new_mapping; 1370 1371 VM_BUG_ON_PAGE(!PageMovable(page), page); 1372 VM_BUG_ON_PAGE(!PageIsolated(page), page); 1373 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 1374 1375 zhdr = page_address(page); 1376 pool = zhdr_to_pool(zhdr); 1377 1378 if (!z3fold_page_trylock(zhdr)) { 1379 return -EAGAIN; 1380 } 1381 if (zhdr->mapped_count != 0) { 1382 z3fold_page_unlock(zhdr); 1383 return -EBUSY; 1384 } 1385 if (work_pending(&zhdr->work)) { 1386 z3fold_page_unlock(zhdr); 1387 return -EAGAIN; 1388 } 1389 new_zhdr = page_address(newpage); 1390 memcpy(new_zhdr, zhdr, PAGE_SIZE); 1391 newpage->private = page->private; 1392 page->private = 0; 1393 z3fold_page_unlock(zhdr); 1394 spin_lock_init(&new_zhdr->page_lock); 1395 INIT_WORK(&new_zhdr->work, compact_page_work); 1396 /* 1397 * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1398 * so we only have to reinitialize it. 1399 */ 1400 INIT_LIST_HEAD(&new_zhdr->buddy); 1401 new_mapping = page_mapping(page); 1402 __ClearPageMovable(page); 1403 ClearPagePrivate(page); 1404 1405 get_page(newpage); 1406 z3fold_page_lock(new_zhdr); 1407 if (new_zhdr->first_chunks) 1408 encode_handle(new_zhdr, FIRST); 1409 if (new_zhdr->last_chunks) 1410 encode_handle(new_zhdr, LAST); 1411 if (new_zhdr->middle_chunks) 1412 encode_handle(new_zhdr, MIDDLE); 1413 set_bit(NEEDS_COMPACTING, &newpage->private); 1414 new_zhdr->cpu = smp_processor_id(); 1415 spin_lock(&pool->lock); 1416 list_add(&newpage->lru, &pool->lru); 1417 spin_unlock(&pool->lock); 1418 __SetPageMovable(newpage, new_mapping); 1419 z3fold_page_unlock(new_zhdr); 1420 1421 queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 1422 1423 page_mapcount_reset(page); 1424 put_page(page); 1425 return 0; 1426 } 1427 1428 static void z3fold_page_putback(struct page *page) 1429 { 1430 struct z3fold_header *zhdr; 1431 struct z3fold_pool *pool; 1432 1433 zhdr = page_address(page); 1434 pool = zhdr_to_pool(zhdr); 1435 1436 z3fold_page_lock(zhdr); 1437 if (!list_empty(&zhdr->buddy)) 1438 list_del_init(&zhdr->buddy); 1439 INIT_LIST_HEAD(&page->lru); 1440 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 1441 atomic64_dec(&pool->pages_nr); 1442 return; 1443 } 1444 spin_lock(&pool->lock); 1445 list_add(&page->lru, &pool->lru); 1446 spin_unlock(&pool->lock); 1447 z3fold_page_unlock(zhdr); 1448 } 1449 1450 static const struct address_space_operations z3fold_aops = { 1451 .isolate_page = z3fold_page_isolate, 1452 .migratepage = z3fold_page_migrate, 1453 .putback_page = z3fold_page_putback, 1454 }; 1455 1456 /***************** 1457 * zpool 1458 ****************/ 1459 1460 static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 1461 { 1462 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 1463 return pool->zpool_ops->evict(pool->zpool, handle); 1464 else 1465 return -ENOENT; 1466 } 1467 1468 static const struct z3fold_ops z3fold_zpool_ops = { 1469 .evict = z3fold_zpool_evict 1470 }; 1471 1472 static void *z3fold_zpool_create(const char *name, gfp_t gfp, 1473 const struct zpool_ops *zpool_ops, 1474 struct zpool *zpool) 1475 { 1476 struct z3fold_pool *pool; 1477 1478 pool = z3fold_create_pool(name, gfp, 1479 zpool_ops ? &z3fold_zpool_ops : NULL); 1480 if (pool) { 1481 pool->zpool = zpool; 1482 pool->zpool_ops = zpool_ops; 1483 } 1484 return pool; 1485 } 1486 1487 static void z3fold_zpool_destroy(void *pool) 1488 { 1489 z3fold_destroy_pool(pool); 1490 } 1491 1492 static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1493 unsigned long *handle) 1494 { 1495 return z3fold_alloc(pool, size, gfp, handle); 1496 } 1497 static void z3fold_zpool_free(void *pool, unsigned long handle) 1498 { 1499 z3fold_free(pool, handle); 1500 } 1501 1502 static int z3fold_zpool_shrink(void *pool, unsigned int pages, 1503 unsigned int *reclaimed) 1504 { 1505 unsigned int total = 0; 1506 int ret = -EINVAL; 1507 1508 while (total < pages) { 1509 ret = z3fold_reclaim_page(pool, 8); 1510 if (ret < 0) 1511 break; 1512 total++; 1513 } 1514 1515 if (reclaimed) 1516 *reclaimed = total; 1517 1518 return ret; 1519 } 1520 1521 static void *z3fold_zpool_map(void *pool, unsigned long handle, 1522 enum zpool_mapmode mm) 1523 { 1524 return z3fold_map(pool, handle); 1525 } 1526 static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1527 { 1528 z3fold_unmap(pool, handle); 1529 } 1530 1531 static u64 z3fold_zpool_total_size(void *pool) 1532 { 1533 return z3fold_get_pool_size(pool) * PAGE_SIZE; 1534 } 1535 1536 static struct zpool_driver z3fold_zpool_driver = { 1537 .type = "z3fold", 1538 .owner = THIS_MODULE, 1539 .create = z3fold_zpool_create, 1540 .destroy = z3fold_zpool_destroy, 1541 .malloc = z3fold_zpool_malloc, 1542 .free = z3fold_zpool_free, 1543 .shrink = z3fold_zpool_shrink, 1544 .map = z3fold_zpool_map, 1545 .unmap = z3fold_zpool_unmap, 1546 .total_size = z3fold_zpool_total_size, 1547 }; 1548 1549 MODULE_ALIAS("zpool-z3fold"); 1550 1551 static int __init init_z3fold(void) 1552 { 1553 int ret; 1554 1555 /* Make sure the z3fold header is not larger than the page size */ 1556 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 1557 ret = z3fold_mount(); 1558 if (ret) 1559 return ret; 1560 1561 zpool_register_driver(&z3fold_zpool_driver); 1562 1563 return 0; 1564 } 1565 1566 static void __exit exit_z3fold(void) 1567 { 1568 z3fold_unmount(); 1569 zpool_unregister_driver(&z3fold_zpool_driver); 1570 } 1571 1572 module_init(init_z3fold); 1573 module_exit(exit_z3fold); 1574 1575 MODULE_LICENSE("GPL"); 1576 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1577 MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1578