1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * z3fold.c 4 * 5 * Author: Vitaly Wool <vitaly.wool@konsulko.com> 6 * Copyright (C) 2016, Sony Mobile Communications Inc. 7 * 8 * This implementation is based on zbud written by Seth Jennings. 9 * 10 * z3fold is an special purpose allocator for storing compressed pages. It 11 * can store up to three compressed pages per page which improves the 12 * compression ratio of zbud while retaining its main concepts (e. g. always 13 * storing an integral number of objects per page) and simplicity. 14 * It still has simple and deterministic reclaim properties that make it 15 * preferable to a higher density approach (with no requirement on integral 16 * number of object per page) when reclaim is used. 17 * 18 * As in zbud, pages are divided into "chunks". The size of the chunks is 19 * fixed at compile time and is determined by NCHUNKS_ORDER below. 20 * 21 * z3fold doesn't export any API and is meant to be used via zpool API. 22 */ 23 24 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 25 26 #include <linux/atomic.h> 27 #include <linux/sched.h> 28 #include <linux/cpumask.h> 29 #include <linux/list.h> 30 #include <linux/mm.h> 31 #include <linux/module.h> 32 #include <linux/page-flags.h> 33 #include <linux/migrate.h> 34 #include <linux/node.h> 35 #include <linux/compaction.h> 36 #include <linux/percpu.h> 37 #include <linux/mount.h> 38 #include <linux/pseudo_fs.h> 39 #include <linux/fs.h> 40 #include <linux/preempt.h> 41 #include <linux/workqueue.h> 42 #include <linux/slab.h> 43 #include <linux/spinlock.h> 44 #include <linux/zpool.h> 45 #include <linux/magic.h> 46 #include <linux/kmemleak.h> 47 48 /* 49 * NCHUNKS_ORDER determines the internal allocation granularity, effectively 50 * adjusting internal fragmentation. It also determines the number of 51 * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the 52 * allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks 53 * in the beginning of an allocated page are occupied by z3fold header, so 54 * NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y), 55 * which shows the max number of free chunks in z3fold page, also there will 56 * be 63, or 62, respectively, freelists per pool. 57 */ 58 #define NCHUNKS_ORDER 6 59 60 #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER) 61 #define CHUNK_SIZE (1 << CHUNK_SHIFT) 62 #define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE) 63 #define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT) 64 #define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT) 65 #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) 66 67 #define BUDDY_MASK (0x3) 68 #define BUDDY_SHIFT 2 69 #define SLOTS_ALIGN (0x40) 70 71 /***************** 72 * Structures 73 *****************/ 74 struct z3fold_pool; 75 struct z3fold_ops { 76 int (*evict)(struct z3fold_pool *pool, unsigned long handle); 77 }; 78 79 enum buddy { 80 HEADLESS = 0, 81 FIRST, 82 MIDDLE, 83 LAST, 84 BUDDIES_MAX = LAST 85 }; 86 87 struct z3fold_buddy_slots { 88 /* 89 * we are using BUDDY_MASK in handle_to_buddy etc. so there should 90 * be enough slots to hold all possible variants 91 */ 92 unsigned long slot[BUDDY_MASK + 1]; 93 unsigned long pool; /* back link + flags */ 94 rwlock_t lock; 95 }; 96 #define HANDLE_FLAG_MASK (0x03) 97 98 /* 99 * struct z3fold_header - z3fold page metadata occupying first chunks of each 100 * z3fold page, except for HEADLESS pages 101 * @buddy: links the z3fold page into the relevant list in the 102 * pool 103 * @page_lock: per-page lock 104 * @refcount: reference count for the z3fold page 105 * @work: work_struct for page layout optimization 106 * @slots: pointer to the structure holding buddy slots 107 * @pool: pointer to the containing pool 108 * @cpu: CPU which this page "belongs" to 109 * @first_chunks: the size of the first buddy in chunks, 0 if free 110 * @middle_chunks: the size of the middle buddy in chunks, 0 if free 111 * @last_chunks: the size of the last buddy in chunks, 0 if free 112 * @first_num: the starting number (for the first handle) 113 * @mapped_count: the number of objects currently mapped 114 */ 115 struct z3fold_header { 116 struct list_head buddy; 117 spinlock_t page_lock; 118 struct kref refcount; 119 struct work_struct work; 120 struct z3fold_buddy_slots *slots; 121 struct z3fold_pool *pool; 122 short cpu; 123 unsigned short first_chunks; 124 unsigned short middle_chunks; 125 unsigned short last_chunks; 126 unsigned short start_middle; 127 unsigned short first_num:2; 128 unsigned short mapped_count:2; 129 unsigned short foreign_handles:2; 130 }; 131 132 /** 133 * struct z3fold_pool - stores metadata for each z3fold pool 134 * @name: pool name 135 * @lock: protects pool unbuddied/lru lists 136 * @stale_lock: protects pool stale page list 137 * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- 138 * buddies; the list each z3fold page is added to depends on 139 * the size of its free region. 140 * @lru: list tracking the z3fold pages in LRU order by most recently 141 * added buddy. 142 * @stale: list of pages marked for freeing 143 * @pages_nr: number of z3fold pages in the pool. 144 * @c_handle: cache for z3fold_buddy_slots allocation 145 * @ops: pointer to a structure of user defined operations specified at 146 * pool creation time. 147 * @compact_wq: workqueue for page layout background optimization 148 * @release_wq: workqueue for safe page release 149 * @work: work_struct for safe page release 150 * @inode: inode for z3fold pseudo filesystem 151 * 152 * This structure is allocated at pool creation time and maintains metadata 153 * pertaining to a particular z3fold pool. 154 */ 155 struct z3fold_pool { 156 const char *name; 157 spinlock_t lock; 158 spinlock_t stale_lock; 159 struct list_head *unbuddied; 160 struct list_head lru; 161 struct list_head stale; 162 atomic64_t pages_nr; 163 struct kmem_cache *c_handle; 164 const struct z3fold_ops *ops; 165 struct zpool *zpool; 166 const struct zpool_ops *zpool_ops; 167 struct workqueue_struct *compact_wq; 168 struct workqueue_struct *release_wq; 169 struct work_struct work; 170 struct inode *inode; 171 }; 172 173 /* 174 * Internal z3fold page flags 175 */ 176 enum z3fold_page_flags { 177 PAGE_HEADLESS = 0, 178 MIDDLE_CHUNK_MAPPED, 179 NEEDS_COMPACTING, 180 PAGE_STALE, 181 PAGE_CLAIMED, /* by either reclaim or free */ 182 }; 183 184 /* 185 * handle flags, go under HANDLE_FLAG_MASK 186 */ 187 enum z3fold_handle_flags { 188 HANDLES_ORPHANED = 0, 189 }; 190 191 /* 192 * Forward declarations 193 */ 194 static struct z3fold_header *__z3fold_alloc(struct z3fold_pool *, size_t, bool); 195 static void compact_page_work(struct work_struct *w); 196 197 /***************** 198 * Helpers 199 *****************/ 200 201 /* Converts an allocation size in bytes to size in z3fold chunks */ 202 static int size_to_chunks(size_t size) 203 { 204 return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT; 205 } 206 207 #define for_each_unbuddied_list(_iter, _begin) \ 208 for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) 209 210 static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, 211 gfp_t gfp) 212 { 213 struct z3fold_buddy_slots *slots; 214 215 slots = kmem_cache_alloc(pool->c_handle, 216 (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); 217 218 if (slots) { 219 /* It will be freed separately in free_handle(). */ 220 kmemleak_not_leak(slots); 221 memset(slots->slot, 0, sizeof(slots->slot)); 222 slots->pool = (unsigned long)pool; 223 rwlock_init(&slots->lock); 224 } 225 226 return slots; 227 } 228 229 static inline struct z3fold_pool *slots_to_pool(struct z3fold_buddy_slots *s) 230 { 231 return (struct z3fold_pool *)(s->pool & ~HANDLE_FLAG_MASK); 232 } 233 234 static inline struct z3fold_buddy_slots *handle_to_slots(unsigned long handle) 235 { 236 return (struct z3fold_buddy_slots *)(handle & ~(SLOTS_ALIGN - 1)); 237 } 238 239 /* Lock a z3fold page */ 240 static inline void z3fold_page_lock(struct z3fold_header *zhdr) 241 { 242 spin_lock(&zhdr->page_lock); 243 } 244 245 /* Try to lock a z3fold page */ 246 static inline int z3fold_page_trylock(struct z3fold_header *zhdr) 247 { 248 return spin_trylock(&zhdr->page_lock); 249 } 250 251 /* Unlock a z3fold page */ 252 static inline void z3fold_page_unlock(struct z3fold_header *zhdr) 253 { 254 spin_unlock(&zhdr->page_lock); 255 } 256 257 258 static inline struct z3fold_header *__get_z3fold_header(unsigned long handle, 259 bool lock) 260 { 261 struct z3fold_buddy_slots *slots; 262 struct z3fold_header *zhdr; 263 int locked = 0; 264 265 if (!(handle & (1 << PAGE_HEADLESS))) { 266 slots = handle_to_slots(handle); 267 do { 268 unsigned long addr; 269 270 read_lock(&slots->lock); 271 addr = *(unsigned long *)handle; 272 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 273 if (lock) 274 locked = z3fold_page_trylock(zhdr); 275 read_unlock(&slots->lock); 276 if (locked) 277 break; 278 cpu_relax(); 279 } while (lock); 280 } else { 281 zhdr = (struct z3fold_header *)(handle & PAGE_MASK); 282 } 283 284 return zhdr; 285 } 286 287 /* Returns the z3fold page where a given handle is stored */ 288 static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h) 289 { 290 return __get_z3fold_header(h, false); 291 } 292 293 /* return locked z3fold page if it's not headless */ 294 static inline struct z3fold_header *get_z3fold_header(unsigned long h) 295 { 296 return __get_z3fold_header(h, true); 297 } 298 299 static inline void put_z3fold_header(struct z3fold_header *zhdr) 300 { 301 struct page *page = virt_to_page(zhdr); 302 303 if (!test_bit(PAGE_HEADLESS, &page->private)) 304 z3fold_page_unlock(zhdr); 305 } 306 307 static inline void free_handle(unsigned long handle) 308 { 309 struct z3fold_buddy_slots *slots; 310 struct z3fold_header *zhdr; 311 int i; 312 bool is_free; 313 314 if (handle & (1 << PAGE_HEADLESS)) 315 return; 316 317 if (WARN_ON(*(unsigned long *)handle == 0)) 318 return; 319 320 zhdr = handle_to_z3fold_header(handle); 321 slots = handle_to_slots(handle); 322 write_lock(&slots->lock); 323 *(unsigned long *)handle = 0; 324 if (zhdr->slots == slots) { 325 write_unlock(&slots->lock); 326 return; /* simple case, nothing else to do */ 327 } 328 329 /* we are freeing a foreign handle if we are here */ 330 zhdr->foreign_handles--; 331 is_free = true; 332 if (!test_bit(HANDLES_ORPHANED, &slots->pool)) { 333 write_unlock(&slots->lock); 334 return; 335 } 336 for (i = 0; i <= BUDDY_MASK; i++) { 337 if (slots->slot[i]) { 338 is_free = false; 339 break; 340 } 341 } 342 write_unlock(&slots->lock); 343 344 if (is_free) { 345 struct z3fold_pool *pool = slots_to_pool(slots); 346 347 kmem_cache_free(pool->c_handle, slots); 348 } 349 } 350 351 static int z3fold_init_fs_context(struct fs_context *fc) 352 { 353 return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; 354 } 355 356 static struct file_system_type z3fold_fs = { 357 .name = "z3fold", 358 .init_fs_context = z3fold_init_fs_context, 359 .kill_sb = kill_anon_super, 360 }; 361 362 static struct vfsmount *z3fold_mnt; 363 static int z3fold_mount(void) 364 { 365 int ret = 0; 366 367 z3fold_mnt = kern_mount(&z3fold_fs); 368 if (IS_ERR(z3fold_mnt)) 369 ret = PTR_ERR(z3fold_mnt); 370 371 return ret; 372 } 373 374 static void z3fold_unmount(void) 375 { 376 kern_unmount(z3fold_mnt); 377 } 378 379 static const struct address_space_operations z3fold_aops; 380 static int z3fold_register_migration(struct z3fold_pool *pool) 381 { 382 pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); 383 if (IS_ERR(pool->inode)) { 384 pool->inode = NULL; 385 return 1; 386 } 387 388 pool->inode->i_mapping->private_data = pool; 389 pool->inode->i_mapping->a_ops = &z3fold_aops; 390 return 0; 391 } 392 393 static void z3fold_unregister_migration(struct z3fold_pool *pool) 394 { 395 if (pool->inode) 396 iput(pool->inode); 397 } 398 399 /* Initializes the z3fold header of a newly allocated z3fold page */ 400 static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, 401 struct z3fold_pool *pool, gfp_t gfp) 402 { 403 struct z3fold_header *zhdr = page_address(page); 404 struct z3fold_buddy_slots *slots; 405 406 INIT_LIST_HEAD(&page->lru); 407 clear_bit(PAGE_HEADLESS, &page->private); 408 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 409 clear_bit(NEEDS_COMPACTING, &page->private); 410 clear_bit(PAGE_STALE, &page->private); 411 clear_bit(PAGE_CLAIMED, &page->private); 412 if (headless) 413 return zhdr; 414 415 slots = alloc_slots(pool, gfp); 416 if (!slots) 417 return NULL; 418 419 spin_lock_init(&zhdr->page_lock); 420 kref_init(&zhdr->refcount); 421 zhdr->first_chunks = 0; 422 zhdr->middle_chunks = 0; 423 zhdr->last_chunks = 0; 424 zhdr->first_num = 0; 425 zhdr->start_middle = 0; 426 zhdr->cpu = -1; 427 zhdr->foreign_handles = 0; 428 zhdr->mapped_count = 0; 429 zhdr->slots = slots; 430 zhdr->pool = pool; 431 INIT_LIST_HEAD(&zhdr->buddy); 432 INIT_WORK(&zhdr->work, compact_page_work); 433 return zhdr; 434 } 435 436 /* Resets the struct page fields and frees the page */ 437 static void free_z3fold_page(struct page *page, bool headless) 438 { 439 if (!headless) { 440 lock_page(page); 441 __ClearPageMovable(page); 442 unlock_page(page); 443 } 444 ClearPagePrivate(page); 445 __free_page(page); 446 } 447 448 /* Helper function to build the index */ 449 static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) 450 { 451 return (bud + zhdr->first_num) & BUDDY_MASK; 452 } 453 454 /* 455 * Encodes the handle of a particular buddy within a z3fold page 456 * Pool lock should be held as this function accesses first_num 457 */ 458 static unsigned long __encode_handle(struct z3fold_header *zhdr, 459 struct z3fold_buddy_slots *slots, 460 enum buddy bud) 461 { 462 unsigned long h = (unsigned long)zhdr; 463 int idx = 0; 464 465 /* 466 * For a headless page, its handle is its pointer with the extra 467 * PAGE_HEADLESS bit set 468 */ 469 if (bud == HEADLESS) 470 return h | (1 << PAGE_HEADLESS); 471 472 /* otherwise, return pointer to encoded handle */ 473 idx = __idx(zhdr, bud); 474 h += idx; 475 if (bud == LAST) 476 h |= (zhdr->last_chunks << BUDDY_SHIFT); 477 478 write_lock(&slots->lock); 479 slots->slot[idx] = h; 480 write_unlock(&slots->lock); 481 return (unsigned long)&slots->slot[idx]; 482 } 483 484 static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) 485 { 486 return __encode_handle(zhdr, zhdr->slots, bud); 487 } 488 489 /* only for LAST bud, returns zero otherwise */ 490 static unsigned short handle_to_chunks(unsigned long handle) 491 { 492 struct z3fold_buddy_slots *slots = handle_to_slots(handle); 493 unsigned long addr; 494 495 read_lock(&slots->lock); 496 addr = *(unsigned long *)handle; 497 read_unlock(&slots->lock); 498 return (addr & ~PAGE_MASK) >> BUDDY_SHIFT; 499 } 500 501 /* 502 * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle 503 * but that doesn't matter. because the masking will result in the 504 * correct buddy number. 505 */ 506 static enum buddy handle_to_buddy(unsigned long handle) 507 { 508 struct z3fold_header *zhdr; 509 struct z3fold_buddy_slots *slots = handle_to_slots(handle); 510 unsigned long addr; 511 512 read_lock(&slots->lock); 513 WARN_ON(handle & (1 << PAGE_HEADLESS)); 514 addr = *(unsigned long *)handle; 515 read_unlock(&slots->lock); 516 zhdr = (struct z3fold_header *)(addr & PAGE_MASK); 517 return (addr - zhdr->first_num) & BUDDY_MASK; 518 } 519 520 static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) 521 { 522 return zhdr->pool; 523 } 524 525 static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) 526 { 527 struct page *page = virt_to_page(zhdr); 528 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 529 bool is_free = true; 530 int i; 531 532 WARN_ON(!list_empty(&zhdr->buddy)); 533 set_bit(PAGE_STALE, &page->private); 534 clear_bit(NEEDS_COMPACTING, &page->private); 535 spin_lock(&pool->lock); 536 if (!list_empty(&page->lru)) 537 list_del_init(&page->lru); 538 spin_unlock(&pool->lock); 539 540 /* If there are no foreign handles, free the handles array */ 541 read_lock(&zhdr->slots->lock); 542 for (i = 0; i <= BUDDY_MASK; i++) { 543 if (zhdr->slots->slot[i]) { 544 is_free = false; 545 break; 546 } 547 } 548 if (!is_free) 549 set_bit(HANDLES_ORPHANED, &zhdr->slots->pool); 550 read_unlock(&zhdr->slots->lock); 551 552 if (is_free) 553 kmem_cache_free(pool->c_handle, zhdr->slots); 554 555 if (locked) 556 z3fold_page_unlock(zhdr); 557 558 spin_lock(&pool->stale_lock); 559 list_add(&zhdr->buddy, &pool->stale); 560 queue_work(pool->release_wq, &pool->work); 561 spin_unlock(&pool->stale_lock); 562 } 563 564 static void __attribute__((__unused__)) 565 release_z3fold_page(struct kref *ref) 566 { 567 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 568 refcount); 569 __release_z3fold_page(zhdr, false); 570 } 571 572 static void release_z3fold_page_locked(struct kref *ref) 573 { 574 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 575 refcount); 576 WARN_ON(z3fold_page_trylock(zhdr)); 577 __release_z3fold_page(zhdr, true); 578 } 579 580 static void release_z3fold_page_locked_list(struct kref *ref) 581 { 582 struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, 583 refcount); 584 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 585 586 spin_lock(&pool->lock); 587 list_del_init(&zhdr->buddy); 588 spin_unlock(&pool->lock); 589 590 WARN_ON(z3fold_page_trylock(zhdr)); 591 __release_z3fold_page(zhdr, true); 592 } 593 594 static void free_pages_work(struct work_struct *w) 595 { 596 struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); 597 598 spin_lock(&pool->stale_lock); 599 while (!list_empty(&pool->stale)) { 600 struct z3fold_header *zhdr = list_first_entry(&pool->stale, 601 struct z3fold_header, buddy); 602 struct page *page = virt_to_page(zhdr); 603 604 list_del(&zhdr->buddy); 605 if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) 606 continue; 607 spin_unlock(&pool->stale_lock); 608 cancel_work_sync(&zhdr->work); 609 free_z3fold_page(page, false); 610 cond_resched(); 611 spin_lock(&pool->stale_lock); 612 } 613 spin_unlock(&pool->stale_lock); 614 } 615 616 /* 617 * Returns the number of free chunks in a z3fold page. 618 * NB: can't be used with HEADLESS pages. 619 */ 620 static int num_free_chunks(struct z3fold_header *zhdr) 621 { 622 int nfree; 623 /* 624 * If there is a middle object, pick up the bigger free space 625 * either before or after it. Otherwise just subtract the number 626 * of chunks occupied by the first and the last objects. 627 */ 628 if (zhdr->middle_chunks != 0) { 629 int nfree_before = zhdr->first_chunks ? 630 0 : zhdr->start_middle - ZHDR_CHUNKS; 631 int nfree_after = zhdr->last_chunks ? 632 0 : TOTAL_CHUNKS - 633 (zhdr->start_middle + zhdr->middle_chunks); 634 nfree = max(nfree_before, nfree_after); 635 } else 636 nfree = NCHUNKS - zhdr->first_chunks - zhdr->last_chunks; 637 return nfree; 638 } 639 640 /* Add to the appropriate unbuddied list */ 641 static inline void add_to_unbuddied(struct z3fold_pool *pool, 642 struct z3fold_header *zhdr) 643 { 644 if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || 645 zhdr->middle_chunks == 0) { 646 struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); 647 648 int freechunks = num_free_chunks(zhdr); 649 spin_lock(&pool->lock); 650 list_add(&zhdr->buddy, &unbuddied[freechunks]); 651 spin_unlock(&pool->lock); 652 zhdr->cpu = smp_processor_id(); 653 put_cpu_ptr(pool->unbuddied); 654 } 655 } 656 657 static inline void *mchunk_memmove(struct z3fold_header *zhdr, 658 unsigned short dst_chunk) 659 { 660 void *beg = zhdr; 661 return memmove(beg + (dst_chunk << CHUNK_SHIFT), 662 beg + (zhdr->start_middle << CHUNK_SHIFT), 663 zhdr->middle_chunks << CHUNK_SHIFT); 664 } 665 666 static inline bool buddy_single(struct z3fold_header *zhdr) 667 { 668 return !((zhdr->first_chunks && zhdr->middle_chunks) || 669 (zhdr->first_chunks && zhdr->last_chunks) || 670 (zhdr->middle_chunks && zhdr->last_chunks)); 671 } 672 673 static struct z3fold_header *compact_single_buddy(struct z3fold_header *zhdr) 674 { 675 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 676 void *p = zhdr; 677 unsigned long old_handle = 0; 678 size_t sz = 0; 679 struct z3fold_header *new_zhdr = NULL; 680 int first_idx = __idx(zhdr, FIRST); 681 int middle_idx = __idx(zhdr, MIDDLE); 682 int last_idx = __idx(zhdr, LAST); 683 unsigned short *moved_chunks = NULL; 684 685 /* 686 * No need to protect slots here -- all the slots are "local" and 687 * the page lock is already taken 688 */ 689 if (zhdr->first_chunks && zhdr->slots->slot[first_idx]) { 690 p += ZHDR_SIZE_ALIGNED; 691 sz = zhdr->first_chunks << CHUNK_SHIFT; 692 old_handle = (unsigned long)&zhdr->slots->slot[first_idx]; 693 moved_chunks = &zhdr->first_chunks; 694 } else if (zhdr->middle_chunks && zhdr->slots->slot[middle_idx]) { 695 p += zhdr->start_middle << CHUNK_SHIFT; 696 sz = zhdr->middle_chunks << CHUNK_SHIFT; 697 old_handle = (unsigned long)&zhdr->slots->slot[middle_idx]; 698 moved_chunks = &zhdr->middle_chunks; 699 } else if (zhdr->last_chunks && zhdr->slots->slot[last_idx]) { 700 p += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); 701 sz = zhdr->last_chunks << CHUNK_SHIFT; 702 old_handle = (unsigned long)&zhdr->slots->slot[last_idx]; 703 moved_chunks = &zhdr->last_chunks; 704 } 705 706 if (sz > 0) { 707 enum buddy new_bud = HEADLESS; 708 short chunks = size_to_chunks(sz); 709 void *q; 710 711 new_zhdr = __z3fold_alloc(pool, sz, false); 712 if (!new_zhdr) 713 return NULL; 714 715 if (WARN_ON(new_zhdr == zhdr)) 716 goto out_fail; 717 718 if (new_zhdr->first_chunks == 0) { 719 if (new_zhdr->middle_chunks != 0 && 720 chunks >= new_zhdr->start_middle) { 721 new_bud = LAST; 722 } else { 723 new_bud = FIRST; 724 } 725 } else if (new_zhdr->last_chunks == 0) { 726 new_bud = LAST; 727 } else if (new_zhdr->middle_chunks == 0) { 728 new_bud = MIDDLE; 729 } 730 q = new_zhdr; 731 switch (new_bud) { 732 case FIRST: 733 new_zhdr->first_chunks = chunks; 734 q += ZHDR_SIZE_ALIGNED; 735 break; 736 case MIDDLE: 737 new_zhdr->middle_chunks = chunks; 738 new_zhdr->start_middle = 739 new_zhdr->first_chunks + ZHDR_CHUNKS; 740 q += new_zhdr->start_middle << CHUNK_SHIFT; 741 break; 742 case LAST: 743 new_zhdr->last_chunks = chunks; 744 q += PAGE_SIZE - (new_zhdr->last_chunks << CHUNK_SHIFT); 745 break; 746 default: 747 goto out_fail; 748 } 749 new_zhdr->foreign_handles++; 750 memcpy(q, p, sz); 751 write_lock(&zhdr->slots->lock); 752 *(unsigned long *)old_handle = (unsigned long)new_zhdr + 753 __idx(new_zhdr, new_bud); 754 if (new_bud == LAST) 755 *(unsigned long *)old_handle |= 756 (new_zhdr->last_chunks << BUDDY_SHIFT); 757 write_unlock(&zhdr->slots->lock); 758 add_to_unbuddied(pool, new_zhdr); 759 z3fold_page_unlock(new_zhdr); 760 761 *moved_chunks = 0; 762 } 763 764 return new_zhdr; 765 766 out_fail: 767 if (new_zhdr) { 768 if (kref_put(&new_zhdr->refcount, release_z3fold_page_locked)) 769 atomic64_dec(&pool->pages_nr); 770 else { 771 add_to_unbuddied(pool, new_zhdr); 772 z3fold_page_unlock(new_zhdr); 773 } 774 } 775 return NULL; 776 777 } 778 779 #define BIG_CHUNK_GAP 3 780 /* Has to be called with lock held */ 781 static int z3fold_compact_page(struct z3fold_header *zhdr) 782 { 783 struct page *page = virt_to_page(zhdr); 784 785 if (test_bit(MIDDLE_CHUNK_MAPPED, &page->private)) 786 return 0; /* can't move middle chunk, it's used */ 787 788 if (unlikely(PageIsolated(page))) 789 return 0; 790 791 if (zhdr->middle_chunks == 0) 792 return 0; /* nothing to compact */ 793 794 if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) { 795 /* move to the beginning */ 796 mchunk_memmove(zhdr, ZHDR_CHUNKS); 797 zhdr->first_chunks = zhdr->middle_chunks; 798 zhdr->middle_chunks = 0; 799 zhdr->start_middle = 0; 800 zhdr->first_num++; 801 return 1; 802 } 803 804 /* 805 * moving data is expensive, so let's only do that if 806 * there's substantial gain (at least BIG_CHUNK_GAP chunks) 807 */ 808 if (zhdr->first_chunks != 0 && zhdr->last_chunks == 0 && 809 zhdr->start_middle - (zhdr->first_chunks + ZHDR_CHUNKS) >= 810 BIG_CHUNK_GAP) { 811 mchunk_memmove(zhdr, zhdr->first_chunks + ZHDR_CHUNKS); 812 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 813 return 1; 814 } else if (zhdr->last_chunks != 0 && zhdr->first_chunks == 0 && 815 TOTAL_CHUNKS - (zhdr->last_chunks + zhdr->start_middle 816 + zhdr->middle_chunks) >= 817 BIG_CHUNK_GAP) { 818 unsigned short new_start = TOTAL_CHUNKS - zhdr->last_chunks - 819 zhdr->middle_chunks; 820 mchunk_memmove(zhdr, new_start); 821 zhdr->start_middle = new_start; 822 return 1; 823 } 824 825 return 0; 826 } 827 828 static void do_compact_page(struct z3fold_header *zhdr, bool locked) 829 { 830 struct z3fold_pool *pool = zhdr_to_pool(zhdr); 831 struct page *page; 832 833 page = virt_to_page(zhdr); 834 if (locked) 835 WARN_ON(z3fold_page_trylock(zhdr)); 836 else 837 z3fold_page_lock(zhdr); 838 if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { 839 z3fold_page_unlock(zhdr); 840 return; 841 } 842 spin_lock(&pool->lock); 843 list_del_init(&zhdr->buddy); 844 spin_unlock(&pool->lock); 845 846 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 847 atomic64_dec(&pool->pages_nr); 848 return; 849 } 850 851 if (unlikely(PageIsolated(page) || 852 test_bit(PAGE_CLAIMED, &page->private) || 853 test_bit(PAGE_STALE, &page->private))) { 854 z3fold_page_unlock(zhdr); 855 return; 856 } 857 858 if (!zhdr->foreign_handles && buddy_single(zhdr) && 859 zhdr->mapped_count == 0 && compact_single_buddy(zhdr)) { 860 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) 861 atomic64_dec(&pool->pages_nr); 862 else 863 z3fold_page_unlock(zhdr); 864 return; 865 } 866 867 z3fold_compact_page(zhdr); 868 add_to_unbuddied(pool, zhdr); 869 z3fold_page_unlock(zhdr); 870 } 871 872 static void compact_page_work(struct work_struct *w) 873 { 874 struct z3fold_header *zhdr = container_of(w, struct z3fold_header, 875 work); 876 877 do_compact_page(zhdr, false); 878 } 879 880 /* returns _locked_ z3fold page header or NULL */ 881 static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool, 882 size_t size, bool can_sleep) 883 { 884 struct z3fold_header *zhdr = NULL; 885 struct page *page; 886 struct list_head *unbuddied; 887 int chunks = size_to_chunks(size), i; 888 889 lookup: 890 /* First, try to find an unbuddied z3fold page. */ 891 unbuddied = get_cpu_ptr(pool->unbuddied); 892 for_each_unbuddied_list(i, chunks) { 893 struct list_head *l = &unbuddied[i]; 894 895 zhdr = list_first_entry_or_null(READ_ONCE(l), 896 struct z3fold_header, buddy); 897 898 if (!zhdr) 899 continue; 900 901 /* Re-check under lock. */ 902 spin_lock(&pool->lock); 903 l = &unbuddied[i]; 904 if (unlikely(zhdr != list_first_entry(READ_ONCE(l), 905 struct z3fold_header, buddy)) || 906 !z3fold_page_trylock(zhdr)) { 907 spin_unlock(&pool->lock); 908 zhdr = NULL; 909 put_cpu_ptr(pool->unbuddied); 910 if (can_sleep) 911 cond_resched(); 912 goto lookup; 913 } 914 list_del_init(&zhdr->buddy); 915 zhdr->cpu = -1; 916 spin_unlock(&pool->lock); 917 918 page = virt_to_page(zhdr); 919 if (test_bit(NEEDS_COMPACTING, &page->private) || 920 test_bit(PAGE_CLAIMED, &page->private)) { 921 z3fold_page_unlock(zhdr); 922 zhdr = NULL; 923 put_cpu_ptr(pool->unbuddied); 924 if (can_sleep) 925 cond_resched(); 926 goto lookup; 927 } 928 929 /* 930 * this page could not be removed from its unbuddied 931 * list while pool lock was held, and then we've taken 932 * page lock so kref_put could not be called before 933 * we got here, so it's safe to just call kref_get() 934 */ 935 kref_get(&zhdr->refcount); 936 break; 937 } 938 put_cpu_ptr(pool->unbuddied); 939 940 if (!zhdr) { 941 int cpu; 942 943 /* look for _exact_ match on other cpus' lists */ 944 for_each_online_cpu(cpu) { 945 struct list_head *l; 946 947 unbuddied = per_cpu_ptr(pool->unbuddied, cpu); 948 spin_lock(&pool->lock); 949 l = &unbuddied[chunks]; 950 951 zhdr = list_first_entry_or_null(READ_ONCE(l), 952 struct z3fold_header, buddy); 953 954 if (!zhdr || !z3fold_page_trylock(zhdr)) { 955 spin_unlock(&pool->lock); 956 zhdr = NULL; 957 continue; 958 } 959 list_del_init(&zhdr->buddy); 960 zhdr->cpu = -1; 961 spin_unlock(&pool->lock); 962 963 page = virt_to_page(zhdr); 964 if (test_bit(NEEDS_COMPACTING, &page->private) || 965 test_bit(PAGE_CLAIMED, &page->private)) { 966 z3fold_page_unlock(zhdr); 967 zhdr = NULL; 968 if (can_sleep) 969 cond_resched(); 970 continue; 971 } 972 kref_get(&zhdr->refcount); 973 break; 974 } 975 } 976 977 return zhdr; 978 } 979 980 /* 981 * API Functions 982 */ 983 984 /** 985 * z3fold_create_pool() - create a new z3fold pool 986 * @name: pool name 987 * @gfp: gfp flags when allocating the z3fold pool structure 988 * @ops: user-defined operations for the z3fold pool 989 * 990 * Return: pointer to the new z3fold pool or NULL if the metadata allocation 991 * failed. 992 */ 993 static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, 994 const struct z3fold_ops *ops) 995 { 996 struct z3fold_pool *pool = NULL; 997 int i, cpu; 998 999 pool = kzalloc(sizeof(struct z3fold_pool), gfp); 1000 if (!pool) 1001 goto out; 1002 pool->c_handle = kmem_cache_create("z3fold_handle", 1003 sizeof(struct z3fold_buddy_slots), 1004 SLOTS_ALIGN, 0, NULL); 1005 if (!pool->c_handle) 1006 goto out_c; 1007 spin_lock_init(&pool->lock); 1008 spin_lock_init(&pool->stale_lock); 1009 pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); 1010 if (!pool->unbuddied) 1011 goto out_pool; 1012 for_each_possible_cpu(cpu) { 1013 struct list_head *unbuddied = 1014 per_cpu_ptr(pool->unbuddied, cpu); 1015 for_each_unbuddied_list(i, 0) 1016 INIT_LIST_HEAD(&unbuddied[i]); 1017 } 1018 INIT_LIST_HEAD(&pool->lru); 1019 INIT_LIST_HEAD(&pool->stale); 1020 atomic64_set(&pool->pages_nr, 0); 1021 pool->name = name; 1022 pool->compact_wq = create_singlethread_workqueue(pool->name); 1023 if (!pool->compact_wq) 1024 goto out_unbuddied; 1025 pool->release_wq = create_singlethread_workqueue(pool->name); 1026 if (!pool->release_wq) 1027 goto out_wq; 1028 if (z3fold_register_migration(pool)) 1029 goto out_rwq; 1030 INIT_WORK(&pool->work, free_pages_work); 1031 pool->ops = ops; 1032 return pool; 1033 1034 out_rwq: 1035 destroy_workqueue(pool->release_wq); 1036 out_wq: 1037 destroy_workqueue(pool->compact_wq); 1038 out_unbuddied: 1039 free_percpu(pool->unbuddied); 1040 out_pool: 1041 kmem_cache_destroy(pool->c_handle); 1042 out_c: 1043 kfree(pool); 1044 out: 1045 return NULL; 1046 } 1047 1048 /** 1049 * z3fold_destroy_pool() - destroys an existing z3fold pool 1050 * @pool: the z3fold pool to be destroyed 1051 * 1052 * The pool should be emptied before this function is called. 1053 */ 1054 static void z3fold_destroy_pool(struct z3fold_pool *pool) 1055 { 1056 kmem_cache_destroy(pool->c_handle); 1057 1058 /* 1059 * We need to destroy pool->compact_wq before pool->release_wq, 1060 * as any pending work on pool->compact_wq will call 1061 * queue_work(pool->release_wq, &pool->work). 1062 * 1063 * There are still outstanding pages until both workqueues are drained, 1064 * so we cannot unregister migration until then. 1065 */ 1066 1067 destroy_workqueue(pool->compact_wq); 1068 destroy_workqueue(pool->release_wq); 1069 z3fold_unregister_migration(pool); 1070 kfree(pool); 1071 } 1072 1073 /** 1074 * z3fold_alloc() - allocates a region of a given size 1075 * @pool: z3fold pool from which to allocate 1076 * @size: size in bytes of the desired allocation 1077 * @gfp: gfp flags used if the pool needs to grow 1078 * @handle: handle of the new allocation 1079 * 1080 * This function will attempt to find a free region in the pool large enough to 1081 * satisfy the allocation request. A search of the unbuddied lists is 1082 * performed first. If no suitable free region is found, then a new page is 1083 * allocated and added to the pool to satisfy the request. 1084 * 1085 * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used 1086 * as z3fold pool pages. 1087 * 1088 * Return: 0 if success and handle is set, otherwise -EINVAL if the size or 1089 * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate 1090 * a new page. 1091 */ 1092 static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, 1093 unsigned long *handle) 1094 { 1095 int chunks = size_to_chunks(size); 1096 struct z3fold_header *zhdr = NULL; 1097 struct page *page = NULL; 1098 enum buddy bud; 1099 bool can_sleep = gfpflags_allow_blocking(gfp); 1100 1101 if (!size) 1102 return -EINVAL; 1103 1104 if (size > PAGE_SIZE) 1105 return -ENOSPC; 1106 1107 if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) 1108 bud = HEADLESS; 1109 else { 1110 retry: 1111 zhdr = __z3fold_alloc(pool, size, can_sleep); 1112 if (zhdr) { 1113 if (zhdr->first_chunks == 0) { 1114 if (zhdr->middle_chunks != 0 && 1115 chunks >= zhdr->start_middle) 1116 bud = LAST; 1117 else 1118 bud = FIRST; 1119 } else if (zhdr->last_chunks == 0) 1120 bud = LAST; 1121 else if (zhdr->middle_chunks == 0) 1122 bud = MIDDLE; 1123 else { 1124 if (kref_put(&zhdr->refcount, 1125 release_z3fold_page_locked)) 1126 atomic64_dec(&pool->pages_nr); 1127 else 1128 z3fold_page_unlock(zhdr); 1129 pr_err("No free chunks in unbuddied\n"); 1130 WARN_ON(1); 1131 goto retry; 1132 } 1133 page = virt_to_page(zhdr); 1134 goto found; 1135 } 1136 bud = FIRST; 1137 } 1138 1139 page = NULL; 1140 if (can_sleep) { 1141 spin_lock(&pool->stale_lock); 1142 zhdr = list_first_entry_or_null(&pool->stale, 1143 struct z3fold_header, buddy); 1144 /* 1145 * Before allocating a page, let's see if we can take one from 1146 * the stale pages list. cancel_work_sync() can sleep so we 1147 * limit this case to the contexts where we can sleep 1148 */ 1149 if (zhdr) { 1150 list_del(&zhdr->buddy); 1151 spin_unlock(&pool->stale_lock); 1152 cancel_work_sync(&zhdr->work); 1153 page = virt_to_page(zhdr); 1154 } else { 1155 spin_unlock(&pool->stale_lock); 1156 } 1157 } 1158 if (!page) 1159 page = alloc_page(gfp); 1160 1161 if (!page) 1162 return -ENOMEM; 1163 1164 zhdr = init_z3fold_page(page, bud == HEADLESS, pool, gfp); 1165 if (!zhdr) { 1166 __free_page(page); 1167 return -ENOMEM; 1168 } 1169 atomic64_inc(&pool->pages_nr); 1170 1171 if (bud == HEADLESS) { 1172 set_bit(PAGE_HEADLESS, &page->private); 1173 goto headless; 1174 } 1175 if (can_sleep) { 1176 lock_page(page); 1177 __SetPageMovable(page, pool->inode->i_mapping); 1178 unlock_page(page); 1179 } else { 1180 if (trylock_page(page)) { 1181 __SetPageMovable(page, pool->inode->i_mapping); 1182 unlock_page(page); 1183 } 1184 } 1185 z3fold_page_lock(zhdr); 1186 1187 found: 1188 if (bud == FIRST) 1189 zhdr->first_chunks = chunks; 1190 else if (bud == LAST) 1191 zhdr->last_chunks = chunks; 1192 else { 1193 zhdr->middle_chunks = chunks; 1194 zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; 1195 } 1196 add_to_unbuddied(pool, zhdr); 1197 1198 headless: 1199 spin_lock(&pool->lock); 1200 /* Add/move z3fold page to beginning of LRU */ 1201 if (!list_empty(&page->lru)) 1202 list_del(&page->lru); 1203 1204 list_add(&page->lru, &pool->lru); 1205 1206 *handle = encode_handle(zhdr, bud); 1207 spin_unlock(&pool->lock); 1208 if (bud != HEADLESS) 1209 z3fold_page_unlock(zhdr); 1210 1211 return 0; 1212 } 1213 1214 /** 1215 * z3fold_free() - frees the allocation associated with the given handle 1216 * @pool: pool in which the allocation resided 1217 * @handle: handle associated with the allocation returned by z3fold_alloc() 1218 * 1219 * In the case that the z3fold page in which the allocation resides is under 1220 * reclaim, as indicated by the PG_reclaim flag being set, this function 1221 * only sets the first|last_chunks to 0. The page is actually freed 1222 * once both buddies are evicted (see z3fold_reclaim_page() below). 1223 */ 1224 static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) 1225 { 1226 struct z3fold_header *zhdr; 1227 struct page *page; 1228 enum buddy bud; 1229 bool page_claimed; 1230 1231 zhdr = get_z3fold_header(handle); 1232 page = virt_to_page(zhdr); 1233 page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private); 1234 1235 if (test_bit(PAGE_HEADLESS, &page->private)) { 1236 /* if a headless page is under reclaim, just leave. 1237 * NB: we use test_and_set_bit for a reason: if the bit 1238 * has not been set before, we release this page 1239 * immediately so we don't care about its value any more. 1240 */ 1241 if (!page_claimed) { 1242 spin_lock(&pool->lock); 1243 list_del(&page->lru); 1244 spin_unlock(&pool->lock); 1245 put_z3fold_header(zhdr); 1246 free_z3fold_page(page, true); 1247 atomic64_dec(&pool->pages_nr); 1248 } 1249 return; 1250 } 1251 1252 /* Non-headless case */ 1253 bud = handle_to_buddy(handle); 1254 1255 switch (bud) { 1256 case FIRST: 1257 zhdr->first_chunks = 0; 1258 break; 1259 case MIDDLE: 1260 zhdr->middle_chunks = 0; 1261 break; 1262 case LAST: 1263 zhdr->last_chunks = 0; 1264 break; 1265 default: 1266 pr_err("%s: unknown bud %d\n", __func__, bud); 1267 WARN_ON(1); 1268 put_z3fold_header(zhdr); 1269 clear_bit(PAGE_CLAIMED, &page->private); 1270 return; 1271 } 1272 1273 if (!page_claimed) 1274 free_handle(handle); 1275 if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { 1276 atomic64_dec(&pool->pages_nr); 1277 return; 1278 } 1279 if (page_claimed) { 1280 /* the page has not been claimed by us */ 1281 z3fold_page_unlock(zhdr); 1282 return; 1283 } 1284 if (unlikely(PageIsolated(page)) || 1285 test_and_set_bit(NEEDS_COMPACTING, &page->private)) { 1286 put_z3fold_header(zhdr); 1287 clear_bit(PAGE_CLAIMED, &page->private); 1288 return; 1289 } 1290 if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { 1291 spin_lock(&pool->lock); 1292 list_del_init(&zhdr->buddy); 1293 spin_unlock(&pool->lock); 1294 zhdr->cpu = -1; 1295 kref_get(&zhdr->refcount); 1296 clear_bit(PAGE_CLAIMED, &page->private); 1297 do_compact_page(zhdr, true); 1298 return; 1299 } 1300 kref_get(&zhdr->refcount); 1301 clear_bit(PAGE_CLAIMED, &page->private); 1302 queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); 1303 put_z3fold_header(zhdr); 1304 } 1305 1306 /** 1307 * z3fold_reclaim_page() - evicts allocations from a pool page and frees it 1308 * @pool: pool from which a page will attempt to be evicted 1309 * @retries: number of pages on the LRU list for which eviction will 1310 * be attempted before failing 1311 * 1312 * z3fold reclaim is different from normal system reclaim in that it is done 1313 * from the bottom, up. This is because only the bottom layer, z3fold, has 1314 * information on how the allocations are organized within each z3fold page. 1315 * This has the potential to create interesting locking situations between 1316 * z3fold and the user, however. 1317 * 1318 * To avoid these, this is how z3fold_reclaim_page() should be called: 1319 * 1320 * The user detects a page should be reclaimed and calls z3fold_reclaim_page(). 1321 * z3fold_reclaim_page() will remove a z3fold page from the pool LRU list and 1322 * call the user-defined eviction handler with the pool and handle as 1323 * arguments. 1324 * 1325 * If the handle can not be evicted, the eviction handler should return 1326 * non-zero. z3fold_reclaim_page() will add the z3fold page back to the 1327 * appropriate list and try the next z3fold page on the LRU up to 1328 * a user defined number of retries. 1329 * 1330 * If the handle is successfully evicted, the eviction handler should 1331 * return 0 _and_ should have called z3fold_free() on the handle. z3fold_free() 1332 * contains logic to delay freeing the page if the page is under reclaim, 1333 * as indicated by the setting of the PG_reclaim flag on the underlying page. 1334 * 1335 * If all buddies in the z3fold page are successfully evicted, then the 1336 * z3fold page can be freed. 1337 * 1338 * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are 1339 * no pages to evict or an eviction handler is not registered, -EAGAIN if 1340 * the retry limit was hit. 1341 */ 1342 static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) 1343 { 1344 int i, ret = -1; 1345 struct z3fold_header *zhdr = NULL; 1346 struct page *page = NULL; 1347 struct list_head *pos; 1348 unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; 1349 1350 spin_lock(&pool->lock); 1351 if (!pool->ops || !pool->ops->evict || retries == 0) { 1352 spin_unlock(&pool->lock); 1353 return -EINVAL; 1354 } 1355 for (i = 0; i < retries; i++) { 1356 if (list_empty(&pool->lru)) { 1357 spin_unlock(&pool->lock); 1358 return -EINVAL; 1359 } 1360 list_for_each_prev(pos, &pool->lru) { 1361 page = list_entry(pos, struct page, lru); 1362 1363 /* this bit could have been set by free, in which case 1364 * we pass over to the next page in the pool. 1365 */ 1366 if (test_and_set_bit(PAGE_CLAIMED, &page->private)) { 1367 page = NULL; 1368 continue; 1369 } 1370 1371 if (unlikely(PageIsolated(page))) { 1372 clear_bit(PAGE_CLAIMED, &page->private); 1373 page = NULL; 1374 continue; 1375 } 1376 zhdr = page_address(page); 1377 if (test_bit(PAGE_HEADLESS, &page->private)) 1378 break; 1379 1380 if (!z3fold_page_trylock(zhdr)) { 1381 clear_bit(PAGE_CLAIMED, &page->private); 1382 zhdr = NULL; 1383 continue; /* can't evict at this point */ 1384 } 1385 if (zhdr->foreign_handles) { 1386 clear_bit(PAGE_CLAIMED, &page->private); 1387 z3fold_page_unlock(zhdr); 1388 zhdr = NULL; 1389 continue; /* can't evict such page */ 1390 } 1391 kref_get(&zhdr->refcount); 1392 list_del_init(&zhdr->buddy); 1393 zhdr->cpu = -1; 1394 break; 1395 } 1396 1397 if (!zhdr) 1398 break; 1399 1400 list_del_init(&page->lru); 1401 spin_unlock(&pool->lock); 1402 1403 if (!test_bit(PAGE_HEADLESS, &page->private)) { 1404 /* 1405 * We need encode the handles before unlocking, and 1406 * use our local slots structure because z3fold_free 1407 * can zero out zhdr->slots and we can't do much 1408 * about that 1409 */ 1410 first_handle = 0; 1411 last_handle = 0; 1412 middle_handle = 0; 1413 if (zhdr->first_chunks) 1414 first_handle = encode_handle(zhdr, FIRST); 1415 if (zhdr->middle_chunks) 1416 middle_handle = encode_handle(zhdr, MIDDLE); 1417 if (zhdr->last_chunks) 1418 last_handle = encode_handle(zhdr, LAST); 1419 /* 1420 * it's safe to unlock here because we hold a 1421 * reference to this page 1422 */ 1423 z3fold_page_unlock(zhdr); 1424 } else { 1425 first_handle = encode_handle(zhdr, HEADLESS); 1426 last_handle = middle_handle = 0; 1427 } 1428 /* Issue the eviction callback(s) */ 1429 if (middle_handle) { 1430 ret = pool->ops->evict(pool, middle_handle); 1431 if (ret) 1432 goto next; 1433 free_handle(middle_handle); 1434 } 1435 if (first_handle) { 1436 ret = pool->ops->evict(pool, first_handle); 1437 if (ret) 1438 goto next; 1439 free_handle(first_handle); 1440 } 1441 if (last_handle) { 1442 ret = pool->ops->evict(pool, last_handle); 1443 if (ret) 1444 goto next; 1445 free_handle(last_handle); 1446 } 1447 next: 1448 if (test_bit(PAGE_HEADLESS, &page->private)) { 1449 if (ret == 0) { 1450 free_z3fold_page(page, true); 1451 atomic64_dec(&pool->pages_nr); 1452 return 0; 1453 } 1454 spin_lock(&pool->lock); 1455 list_add(&page->lru, &pool->lru); 1456 spin_unlock(&pool->lock); 1457 clear_bit(PAGE_CLAIMED, &page->private); 1458 } else { 1459 z3fold_page_lock(zhdr); 1460 if (kref_put(&zhdr->refcount, 1461 release_z3fold_page_locked)) { 1462 atomic64_dec(&pool->pages_nr); 1463 return 0; 1464 } 1465 /* 1466 * if we are here, the page is still not completely 1467 * free. Take the global pool lock then to be able 1468 * to add it back to the lru list 1469 */ 1470 spin_lock(&pool->lock); 1471 list_add(&page->lru, &pool->lru); 1472 spin_unlock(&pool->lock); 1473 z3fold_page_unlock(zhdr); 1474 clear_bit(PAGE_CLAIMED, &page->private); 1475 } 1476 1477 /* We started off locked to we need to lock the pool back */ 1478 spin_lock(&pool->lock); 1479 } 1480 spin_unlock(&pool->lock); 1481 return -EAGAIN; 1482 } 1483 1484 /** 1485 * z3fold_map() - maps the allocation associated with the given handle 1486 * @pool: pool in which the allocation resides 1487 * @handle: handle associated with the allocation to be mapped 1488 * 1489 * Extracts the buddy number from handle and constructs the pointer to the 1490 * correct starting chunk within the page. 1491 * 1492 * Returns: a pointer to the mapped allocation 1493 */ 1494 static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) 1495 { 1496 struct z3fold_header *zhdr; 1497 struct page *page; 1498 void *addr; 1499 enum buddy buddy; 1500 1501 zhdr = get_z3fold_header(handle); 1502 addr = zhdr; 1503 page = virt_to_page(zhdr); 1504 1505 if (test_bit(PAGE_HEADLESS, &page->private)) 1506 goto out; 1507 1508 buddy = handle_to_buddy(handle); 1509 switch (buddy) { 1510 case FIRST: 1511 addr += ZHDR_SIZE_ALIGNED; 1512 break; 1513 case MIDDLE: 1514 addr += zhdr->start_middle << CHUNK_SHIFT; 1515 set_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1516 break; 1517 case LAST: 1518 addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); 1519 break; 1520 default: 1521 pr_err("unknown buddy id %d\n", buddy); 1522 WARN_ON(1); 1523 addr = NULL; 1524 break; 1525 } 1526 1527 if (addr) 1528 zhdr->mapped_count++; 1529 out: 1530 put_z3fold_header(zhdr); 1531 return addr; 1532 } 1533 1534 /** 1535 * z3fold_unmap() - unmaps the allocation associated with the given handle 1536 * @pool: pool in which the allocation resides 1537 * @handle: handle associated with the allocation to be unmapped 1538 */ 1539 static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle) 1540 { 1541 struct z3fold_header *zhdr; 1542 struct page *page; 1543 enum buddy buddy; 1544 1545 zhdr = get_z3fold_header(handle); 1546 page = virt_to_page(zhdr); 1547 1548 if (test_bit(PAGE_HEADLESS, &page->private)) 1549 return; 1550 1551 buddy = handle_to_buddy(handle); 1552 if (buddy == MIDDLE) 1553 clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); 1554 zhdr->mapped_count--; 1555 put_z3fold_header(zhdr); 1556 } 1557 1558 /** 1559 * z3fold_get_pool_size() - gets the z3fold pool size in pages 1560 * @pool: pool whose size is being queried 1561 * 1562 * Returns: size in pages of the given pool. 1563 */ 1564 static u64 z3fold_get_pool_size(struct z3fold_pool *pool) 1565 { 1566 return atomic64_read(&pool->pages_nr); 1567 } 1568 1569 static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) 1570 { 1571 struct z3fold_header *zhdr; 1572 struct z3fold_pool *pool; 1573 1574 VM_BUG_ON_PAGE(!PageMovable(page), page); 1575 VM_BUG_ON_PAGE(PageIsolated(page), page); 1576 1577 if (test_bit(PAGE_HEADLESS, &page->private) || 1578 test_bit(PAGE_CLAIMED, &page->private)) 1579 return false; 1580 1581 zhdr = page_address(page); 1582 z3fold_page_lock(zhdr); 1583 if (test_bit(NEEDS_COMPACTING, &page->private) || 1584 test_bit(PAGE_STALE, &page->private)) 1585 goto out; 1586 1587 if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) 1588 goto out; 1589 1590 pool = zhdr_to_pool(zhdr); 1591 spin_lock(&pool->lock); 1592 if (!list_empty(&zhdr->buddy)) 1593 list_del_init(&zhdr->buddy); 1594 if (!list_empty(&page->lru)) 1595 list_del_init(&page->lru); 1596 spin_unlock(&pool->lock); 1597 1598 kref_get(&zhdr->refcount); 1599 z3fold_page_unlock(zhdr); 1600 return true; 1601 1602 out: 1603 z3fold_page_unlock(zhdr); 1604 return false; 1605 } 1606 1607 static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, 1608 struct page *page, enum migrate_mode mode) 1609 { 1610 struct z3fold_header *zhdr, *new_zhdr; 1611 struct z3fold_pool *pool; 1612 struct address_space *new_mapping; 1613 1614 VM_BUG_ON_PAGE(!PageMovable(page), page); 1615 VM_BUG_ON_PAGE(!PageIsolated(page), page); 1616 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); 1617 1618 zhdr = page_address(page); 1619 pool = zhdr_to_pool(zhdr); 1620 1621 if (!z3fold_page_trylock(zhdr)) { 1622 return -EAGAIN; 1623 } 1624 if (zhdr->mapped_count != 0 || zhdr->foreign_handles != 0) { 1625 z3fold_page_unlock(zhdr); 1626 return -EBUSY; 1627 } 1628 if (work_pending(&zhdr->work)) { 1629 z3fold_page_unlock(zhdr); 1630 return -EAGAIN; 1631 } 1632 new_zhdr = page_address(newpage); 1633 memcpy(new_zhdr, zhdr, PAGE_SIZE); 1634 newpage->private = page->private; 1635 page->private = 0; 1636 z3fold_page_unlock(zhdr); 1637 spin_lock_init(&new_zhdr->page_lock); 1638 INIT_WORK(&new_zhdr->work, compact_page_work); 1639 /* 1640 * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, 1641 * so we only have to reinitialize it. 1642 */ 1643 INIT_LIST_HEAD(&new_zhdr->buddy); 1644 new_mapping = page_mapping(page); 1645 __ClearPageMovable(page); 1646 ClearPagePrivate(page); 1647 1648 get_page(newpage); 1649 z3fold_page_lock(new_zhdr); 1650 if (new_zhdr->first_chunks) 1651 encode_handle(new_zhdr, FIRST); 1652 if (new_zhdr->last_chunks) 1653 encode_handle(new_zhdr, LAST); 1654 if (new_zhdr->middle_chunks) 1655 encode_handle(new_zhdr, MIDDLE); 1656 set_bit(NEEDS_COMPACTING, &newpage->private); 1657 new_zhdr->cpu = smp_processor_id(); 1658 spin_lock(&pool->lock); 1659 list_add(&newpage->lru, &pool->lru); 1660 spin_unlock(&pool->lock); 1661 __SetPageMovable(newpage, new_mapping); 1662 z3fold_page_unlock(new_zhdr); 1663 1664 queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); 1665 1666 page_mapcount_reset(page); 1667 put_page(page); 1668 return 0; 1669 } 1670 1671 static void z3fold_page_putback(struct page *page) 1672 { 1673 struct z3fold_header *zhdr; 1674 struct z3fold_pool *pool; 1675 1676 zhdr = page_address(page); 1677 pool = zhdr_to_pool(zhdr); 1678 1679 z3fold_page_lock(zhdr); 1680 if (!list_empty(&zhdr->buddy)) 1681 list_del_init(&zhdr->buddy); 1682 INIT_LIST_HEAD(&page->lru); 1683 if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { 1684 atomic64_dec(&pool->pages_nr); 1685 return; 1686 } 1687 spin_lock(&pool->lock); 1688 list_add(&page->lru, &pool->lru); 1689 spin_unlock(&pool->lock); 1690 z3fold_page_unlock(zhdr); 1691 } 1692 1693 static const struct address_space_operations z3fold_aops = { 1694 .isolate_page = z3fold_page_isolate, 1695 .migratepage = z3fold_page_migrate, 1696 .putback_page = z3fold_page_putback, 1697 }; 1698 1699 /***************** 1700 * zpool 1701 ****************/ 1702 1703 static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle) 1704 { 1705 if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) 1706 return pool->zpool_ops->evict(pool->zpool, handle); 1707 else 1708 return -ENOENT; 1709 } 1710 1711 static const struct z3fold_ops z3fold_zpool_ops = { 1712 .evict = z3fold_zpool_evict 1713 }; 1714 1715 static void *z3fold_zpool_create(const char *name, gfp_t gfp, 1716 const struct zpool_ops *zpool_ops, 1717 struct zpool *zpool) 1718 { 1719 struct z3fold_pool *pool; 1720 1721 pool = z3fold_create_pool(name, gfp, 1722 zpool_ops ? &z3fold_zpool_ops : NULL); 1723 if (pool) { 1724 pool->zpool = zpool; 1725 pool->zpool_ops = zpool_ops; 1726 } 1727 return pool; 1728 } 1729 1730 static void z3fold_zpool_destroy(void *pool) 1731 { 1732 z3fold_destroy_pool(pool); 1733 } 1734 1735 static int z3fold_zpool_malloc(void *pool, size_t size, gfp_t gfp, 1736 unsigned long *handle) 1737 { 1738 return z3fold_alloc(pool, size, gfp, handle); 1739 } 1740 static void z3fold_zpool_free(void *pool, unsigned long handle) 1741 { 1742 z3fold_free(pool, handle); 1743 } 1744 1745 static int z3fold_zpool_shrink(void *pool, unsigned int pages, 1746 unsigned int *reclaimed) 1747 { 1748 unsigned int total = 0; 1749 int ret = -EINVAL; 1750 1751 while (total < pages) { 1752 ret = z3fold_reclaim_page(pool, 8); 1753 if (ret < 0) 1754 break; 1755 total++; 1756 } 1757 1758 if (reclaimed) 1759 *reclaimed = total; 1760 1761 return ret; 1762 } 1763 1764 static void *z3fold_zpool_map(void *pool, unsigned long handle, 1765 enum zpool_mapmode mm) 1766 { 1767 return z3fold_map(pool, handle); 1768 } 1769 static void z3fold_zpool_unmap(void *pool, unsigned long handle) 1770 { 1771 z3fold_unmap(pool, handle); 1772 } 1773 1774 static u64 z3fold_zpool_total_size(void *pool) 1775 { 1776 return z3fold_get_pool_size(pool) * PAGE_SIZE; 1777 } 1778 1779 static struct zpool_driver z3fold_zpool_driver = { 1780 .type = "z3fold", 1781 .owner = THIS_MODULE, 1782 .create = z3fold_zpool_create, 1783 .destroy = z3fold_zpool_destroy, 1784 .malloc = z3fold_zpool_malloc, 1785 .free = z3fold_zpool_free, 1786 .shrink = z3fold_zpool_shrink, 1787 .map = z3fold_zpool_map, 1788 .unmap = z3fold_zpool_unmap, 1789 .total_size = z3fold_zpool_total_size, 1790 }; 1791 1792 MODULE_ALIAS("zpool-z3fold"); 1793 1794 static int __init init_z3fold(void) 1795 { 1796 int ret; 1797 1798 /* Make sure the z3fold header is not larger than the page size */ 1799 BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE); 1800 ret = z3fold_mount(); 1801 if (ret) 1802 return ret; 1803 1804 zpool_register_driver(&z3fold_zpool_driver); 1805 1806 return 0; 1807 } 1808 1809 static void __exit exit_z3fold(void) 1810 { 1811 z3fold_unmount(); 1812 zpool_unregister_driver(&z3fold_zpool_driver); 1813 } 1814 1815 module_init(init_z3fold); 1816 module_exit(exit_z3fold); 1817 1818 MODULE_LICENSE("GPL"); 1819 MODULE_AUTHOR("Vitaly Wool <vitalywool@gmail.com>"); 1820 MODULE_DESCRIPTION("3-Fold Allocator for Compressed Pages"); 1821