1 /* 2 * linux/mm/swap.c 3 * 4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 5 */ 6 7 /* 8 * This file contains the default values for the operation of the 9 * Linux VM subsystem. Fine-tuning documentation can be found in 10 * Documentation/sysctl/vm.txt. 11 * Started 18.12.91 12 * Swap aging added 23.2.95, Stephen Tweedie. 13 * Buffermem limits added 12.3.98, Rik van Riel. 14 */ 15 16 #include <linux/mm.h> 17 #include <linux/sched.h> 18 #include <linux/kernel_stat.h> 19 #include <linux/swap.h> 20 #include <linux/mman.h> 21 #include <linux/pagemap.h> 22 #include <linux/pagevec.h> 23 #include <linux/init.h> 24 #include <linux/export.h> 25 #include <linux/mm_inline.h> 26 #include <linux/percpu_counter.h> 27 #include <linux/percpu.h> 28 #include <linux/cpu.h> 29 #include <linux/notifier.h> 30 #include <linux/backing-dev.h> 31 #include <linux/memcontrol.h> 32 #include <linux/gfp.h> 33 #include <linux/uio.h> 34 35 #include "internal.h" 36 37 #define CREATE_TRACE_POINTS 38 #include <trace/events/pagemap.h> 39 40 /* How many pages do we try to swap or page in/out together? */ 41 int page_cluster; 42 43 static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); 44 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); 45 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); 46 47 /* 48 * This path almost never happens for VM activity - pages are normally 49 * freed via pagevecs. But it gets used by networking. 50 */ 51 static void __page_cache_release(struct page *page) 52 { 53 if (PageLRU(page)) { 54 struct zone *zone = page_zone(page); 55 struct lruvec *lruvec; 56 unsigned long flags; 57 58 spin_lock_irqsave(&zone->lru_lock, flags); 59 lruvec = mem_cgroup_page_lruvec(page, zone); 60 VM_BUG_ON(!PageLRU(page)); 61 __ClearPageLRU(page); 62 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 63 spin_unlock_irqrestore(&zone->lru_lock, flags); 64 } 65 } 66 67 static void __put_single_page(struct page *page) 68 { 69 __page_cache_release(page); 70 free_hot_cold_page(page, 0); 71 } 72 73 static void __put_compound_page(struct page *page) 74 { 75 compound_page_dtor *dtor; 76 77 __page_cache_release(page); 78 dtor = get_compound_page_dtor(page); 79 (*dtor)(page); 80 } 81 82 static void put_compound_page(struct page *page) 83 { 84 if (unlikely(PageTail(page))) { 85 /* __split_huge_page_refcount can run under us */ 86 struct page *page_head = compound_trans_head(page); 87 88 if (likely(page != page_head && 89 get_page_unless_zero(page_head))) { 90 unsigned long flags; 91 92 /* 93 * THP can not break up slab pages so avoid taking 94 * compound_lock(). Slab performs non-atomic bit ops 95 * on page->flags for better performance. In particular 96 * slab_unlock() in slub used to be a hot path. It is 97 * still hot on arches that do not support 98 * this_cpu_cmpxchg_double(). 99 */ 100 if (PageSlab(page_head)) { 101 if (PageTail(page)) { 102 if (put_page_testzero(page_head)) 103 VM_BUG_ON(1); 104 105 atomic_dec(&page->_mapcount); 106 goto skip_lock_tail; 107 } else 108 goto skip_lock; 109 } 110 /* 111 * page_head wasn't a dangling pointer but it 112 * may not be a head page anymore by the time 113 * we obtain the lock. That is ok as long as it 114 * can't be freed from under us. 115 */ 116 flags = compound_lock_irqsave(page_head); 117 if (unlikely(!PageTail(page))) { 118 /* __split_huge_page_refcount run before us */ 119 compound_unlock_irqrestore(page_head, flags); 120 skip_lock: 121 if (put_page_testzero(page_head)) 122 __put_single_page(page_head); 123 out_put_single: 124 if (put_page_testzero(page)) 125 __put_single_page(page); 126 return; 127 } 128 VM_BUG_ON(page_head != page->first_page); 129 /* 130 * We can release the refcount taken by 131 * get_page_unless_zero() now that 132 * __split_huge_page_refcount() is blocked on 133 * the compound_lock. 134 */ 135 if (put_page_testzero(page_head)) 136 VM_BUG_ON(1); 137 /* __split_huge_page_refcount will wait now */ 138 VM_BUG_ON(page_mapcount(page) <= 0); 139 atomic_dec(&page->_mapcount); 140 VM_BUG_ON(atomic_read(&page_head->_count) <= 0); 141 VM_BUG_ON(atomic_read(&page->_count) != 0); 142 compound_unlock_irqrestore(page_head, flags); 143 144 skip_lock_tail: 145 if (put_page_testzero(page_head)) { 146 if (PageHead(page_head)) 147 __put_compound_page(page_head); 148 else 149 __put_single_page(page_head); 150 } 151 } else { 152 /* page_head is a dangling pointer */ 153 VM_BUG_ON(PageTail(page)); 154 goto out_put_single; 155 } 156 } else if (put_page_testzero(page)) { 157 if (PageHead(page)) 158 __put_compound_page(page); 159 else 160 __put_single_page(page); 161 } 162 } 163 164 void put_page(struct page *page) 165 { 166 if (unlikely(PageCompound(page))) 167 put_compound_page(page); 168 else if (put_page_testzero(page)) 169 __put_single_page(page); 170 } 171 EXPORT_SYMBOL(put_page); 172 173 /* 174 * This function is exported but must not be called by anything other 175 * than get_page(). It implements the slow path of get_page(). 176 */ 177 bool __get_page_tail(struct page *page) 178 { 179 /* 180 * This takes care of get_page() if run on a tail page 181 * returned by one of the get_user_pages/follow_page variants. 182 * get_user_pages/follow_page itself doesn't need the compound 183 * lock because it runs __get_page_tail_foll() under the 184 * proper PT lock that already serializes against 185 * split_huge_page(). 186 */ 187 unsigned long flags; 188 bool got = false; 189 struct page *page_head = compound_trans_head(page); 190 191 if (likely(page != page_head && get_page_unless_zero(page_head))) { 192 193 /* Ref to put_compound_page() comment. */ 194 if (PageSlab(page_head)) { 195 if (likely(PageTail(page))) { 196 __get_page_tail_foll(page, false); 197 return true; 198 } else { 199 put_page(page_head); 200 return false; 201 } 202 } 203 204 /* 205 * page_head wasn't a dangling pointer but it 206 * may not be a head page anymore by the time 207 * we obtain the lock. That is ok as long as it 208 * can't be freed from under us. 209 */ 210 flags = compound_lock_irqsave(page_head); 211 /* here __split_huge_page_refcount won't run anymore */ 212 if (likely(PageTail(page))) { 213 __get_page_tail_foll(page, false); 214 got = true; 215 } 216 compound_unlock_irqrestore(page_head, flags); 217 if (unlikely(!got)) 218 put_page(page_head); 219 } 220 return got; 221 } 222 EXPORT_SYMBOL(__get_page_tail); 223 224 /** 225 * put_pages_list() - release a list of pages 226 * @pages: list of pages threaded on page->lru 227 * 228 * Release a list of pages which are strung together on page.lru. Currently 229 * used by read_cache_pages() and related error recovery code. 230 */ 231 void put_pages_list(struct list_head *pages) 232 { 233 while (!list_empty(pages)) { 234 struct page *victim; 235 236 victim = list_entry(pages->prev, struct page, lru); 237 list_del(&victim->lru); 238 page_cache_release(victim); 239 } 240 } 241 EXPORT_SYMBOL(put_pages_list); 242 243 /* 244 * get_kernel_pages() - pin kernel pages in memory 245 * @kiov: An array of struct kvec structures 246 * @nr_segs: number of segments to pin 247 * @write: pinning for read/write, currently ignored 248 * @pages: array that receives pointers to the pages pinned. 249 * Should be at least nr_segs long. 250 * 251 * Returns number of pages pinned. This may be fewer than the number 252 * requested. If nr_pages is 0 or negative, returns 0. If no pages 253 * were pinned, returns -errno. Each page returned must be released 254 * with a put_page() call when it is finished with. 255 */ 256 int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write, 257 struct page **pages) 258 { 259 int seg; 260 261 for (seg = 0; seg < nr_segs; seg++) { 262 if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE)) 263 return seg; 264 265 pages[seg] = kmap_to_page(kiov[seg].iov_base); 266 page_cache_get(pages[seg]); 267 } 268 269 return seg; 270 } 271 EXPORT_SYMBOL_GPL(get_kernel_pages); 272 273 /* 274 * get_kernel_page() - pin a kernel page in memory 275 * @start: starting kernel address 276 * @write: pinning for read/write, currently ignored 277 * @pages: array that receives pointer to the page pinned. 278 * Must be at least nr_segs long. 279 * 280 * Returns 1 if page is pinned. If the page was not pinned, returns 281 * -errno. The page returned must be released with a put_page() call 282 * when it is finished with. 283 */ 284 int get_kernel_page(unsigned long start, int write, struct page **pages) 285 { 286 const struct kvec kiov = { 287 .iov_base = (void *)start, 288 .iov_len = PAGE_SIZE 289 }; 290 291 return get_kernel_pages(&kiov, 1, write, pages); 292 } 293 EXPORT_SYMBOL_GPL(get_kernel_page); 294 295 static void pagevec_lru_move_fn(struct pagevec *pvec, 296 void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), 297 void *arg) 298 { 299 int i; 300 struct zone *zone = NULL; 301 struct lruvec *lruvec; 302 unsigned long flags = 0; 303 304 for (i = 0; i < pagevec_count(pvec); i++) { 305 struct page *page = pvec->pages[i]; 306 struct zone *pagezone = page_zone(page); 307 308 if (pagezone != zone) { 309 if (zone) 310 spin_unlock_irqrestore(&zone->lru_lock, flags); 311 zone = pagezone; 312 spin_lock_irqsave(&zone->lru_lock, flags); 313 } 314 315 lruvec = mem_cgroup_page_lruvec(page, zone); 316 (*move_fn)(page, lruvec, arg); 317 } 318 if (zone) 319 spin_unlock_irqrestore(&zone->lru_lock, flags); 320 release_pages(pvec->pages, pvec->nr, pvec->cold); 321 pagevec_reinit(pvec); 322 } 323 324 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, 325 void *arg) 326 { 327 int *pgmoved = arg; 328 329 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 330 enum lru_list lru = page_lru_base_type(page); 331 list_move_tail(&page->lru, &lruvec->lists[lru]); 332 (*pgmoved)++; 333 } 334 } 335 336 /* 337 * pagevec_move_tail() must be called with IRQ disabled. 338 * Otherwise this may cause nasty races. 339 */ 340 static void pagevec_move_tail(struct pagevec *pvec) 341 { 342 int pgmoved = 0; 343 344 pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved); 345 __count_vm_events(PGROTATED, pgmoved); 346 } 347 348 /* 349 * Writeback is about to end against a page which has been marked for immediate 350 * reclaim. If it still appears to be reclaimable, move it to the tail of the 351 * inactive list. 352 */ 353 void rotate_reclaimable_page(struct page *page) 354 { 355 if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) && 356 !PageUnevictable(page) && PageLRU(page)) { 357 struct pagevec *pvec; 358 unsigned long flags; 359 360 page_cache_get(page); 361 local_irq_save(flags); 362 pvec = &__get_cpu_var(lru_rotate_pvecs); 363 if (!pagevec_add(pvec, page)) 364 pagevec_move_tail(pvec); 365 local_irq_restore(flags); 366 } 367 } 368 369 static void update_page_reclaim_stat(struct lruvec *lruvec, 370 int file, int rotated) 371 { 372 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 373 374 reclaim_stat->recent_scanned[file]++; 375 if (rotated) 376 reclaim_stat->recent_rotated[file]++; 377 } 378 379 static void __activate_page(struct page *page, struct lruvec *lruvec, 380 void *arg) 381 { 382 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 383 int file = page_is_file_cache(page); 384 int lru = page_lru_base_type(page); 385 386 del_page_from_lru_list(page, lruvec, lru); 387 SetPageActive(page); 388 lru += LRU_ACTIVE; 389 add_page_to_lru_list(page, lruvec, lru); 390 trace_mm_lru_activate(page, page_to_pfn(page)); 391 392 __count_vm_event(PGACTIVATE); 393 update_page_reclaim_stat(lruvec, file, 1); 394 } 395 } 396 397 #ifdef CONFIG_SMP 398 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); 399 400 static void activate_page_drain(int cpu) 401 { 402 struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu); 403 404 if (pagevec_count(pvec)) 405 pagevec_lru_move_fn(pvec, __activate_page, NULL); 406 } 407 408 void activate_page(struct page *page) 409 { 410 if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { 411 struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); 412 413 page_cache_get(page); 414 if (!pagevec_add(pvec, page)) 415 pagevec_lru_move_fn(pvec, __activate_page, NULL); 416 put_cpu_var(activate_page_pvecs); 417 } 418 } 419 420 #else 421 static inline void activate_page_drain(int cpu) 422 { 423 } 424 425 void activate_page(struct page *page) 426 { 427 struct zone *zone = page_zone(page); 428 429 spin_lock_irq(&zone->lru_lock); 430 __activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); 431 spin_unlock_irq(&zone->lru_lock); 432 } 433 #endif 434 435 static void __lru_cache_activate_page(struct page *page) 436 { 437 struct pagevec *pvec = &get_cpu_var(lru_add_pvec); 438 int i; 439 440 /* 441 * Search backwards on the optimistic assumption that the page being 442 * activated has just been added to this pagevec. Note that only 443 * the local pagevec is examined as a !PageLRU page could be in the 444 * process of being released, reclaimed, migrated or on a remote 445 * pagevec that is currently being drained. Furthermore, marking 446 * a remote pagevec's page PageActive potentially hits a race where 447 * a page is marked PageActive just after it is added to the inactive 448 * list causing accounting errors and BUG_ON checks to trigger. 449 */ 450 for (i = pagevec_count(pvec) - 1; i >= 0; i--) { 451 struct page *pagevec_page = pvec->pages[i]; 452 453 if (pagevec_page == page) { 454 SetPageActive(page); 455 break; 456 } 457 } 458 459 put_cpu_var(lru_add_pvec); 460 } 461 462 /* 463 * Mark a page as having seen activity. 464 * 465 * inactive,unreferenced -> inactive,referenced 466 * inactive,referenced -> active,unreferenced 467 * active,unreferenced -> active,referenced 468 */ 469 void mark_page_accessed(struct page *page) 470 { 471 if (!PageActive(page) && !PageUnevictable(page) && 472 PageReferenced(page)) { 473 474 /* 475 * If the page is on the LRU, queue it for activation via 476 * activate_page_pvecs. Otherwise, assume the page is on a 477 * pagevec, mark it active and it'll be moved to the active 478 * LRU on the next drain. 479 */ 480 if (PageLRU(page)) 481 activate_page(page); 482 else 483 __lru_cache_activate_page(page); 484 ClearPageReferenced(page); 485 } else if (!PageReferenced(page)) { 486 SetPageReferenced(page); 487 } 488 } 489 EXPORT_SYMBOL(mark_page_accessed); 490 491 /* 492 * Queue the page for addition to the LRU via pagevec. The decision on whether 493 * to add the page to the [in]active [file|anon] list is deferred until the 494 * pagevec is drained. This gives a chance for the caller of __lru_cache_add() 495 * have the page added to the active list using mark_page_accessed(). 496 */ 497 void __lru_cache_add(struct page *page) 498 { 499 struct pagevec *pvec = &get_cpu_var(lru_add_pvec); 500 501 page_cache_get(page); 502 if (!pagevec_space(pvec)) 503 __pagevec_lru_add(pvec); 504 pagevec_add(pvec, page); 505 put_cpu_var(lru_add_pvec); 506 } 507 EXPORT_SYMBOL(__lru_cache_add); 508 509 /** 510 * lru_cache_add - add a page to a page list 511 * @page: the page to be added to the LRU. 512 */ 513 void lru_cache_add(struct page *page) 514 { 515 if (PageActive(page)) { 516 VM_BUG_ON(PageUnevictable(page)); 517 } else if (PageUnevictable(page)) { 518 VM_BUG_ON(PageActive(page)); 519 } 520 521 VM_BUG_ON(PageLRU(page)); 522 __lru_cache_add(page); 523 } 524 525 /** 526 * add_page_to_unevictable_list - add a page to the unevictable list 527 * @page: the page to be added to the unevictable list 528 * 529 * Add page directly to its zone's unevictable list. To avoid races with 530 * tasks that might be making the page evictable, through eg. munlock, 531 * munmap or exit, while it's not on the lru, we want to add the page 532 * while it's locked or otherwise "invisible" to other tasks. This is 533 * difficult to do when using the pagevec cache, so bypass that. 534 */ 535 void add_page_to_unevictable_list(struct page *page) 536 { 537 struct zone *zone = page_zone(page); 538 struct lruvec *lruvec; 539 540 spin_lock_irq(&zone->lru_lock); 541 lruvec = mem_cgroup_page_lruvec(page, zone); 542 SetPageUnevictable(page); 543 SetPageLRU(page); 544 add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); 545 spin_unlock_irq(&zone->lru_lock); 546 } 547 548 /* 549 * If the page can not be invalidated, it is moved to the 550 * inactive list to speed up its reclaim. It is moved to the 551 * head of the list, rather than the tail, to give the flusher 552 * threads some time to write it out, as this is much more 553 * effective than the single-page writeout from reclaim. 554 * 555 * If the page isn't page_mapped and dirty/writeback, the page 556 * could reclaim asap using PG_reclaim. 557 * 558 * 1. active, mapped page -> none 559 * 2. active, dirty/writeback page -> inactive, head, PG_reclaim 560 * 3. inactive, mapped page -> none 561 * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim 562 * 5. inactive, clean -> inactive, tail 563 * 6. Others -> none 564 * 565 * In 4, why it moves inactive's head, the VM expects the page would 566 * be write it out by flusher threads as this is much more effective 567 * than the single-page writeout from reclaim. 568 */ 569 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, 570 void *arg) 571 { 572 int lru, file; 573 bool active; 574 575 if (!PageLRU(page)) 576 return; 577 578 if (PageUnevictable(page)) 579 return; 580 581 /* Some processes are using the page */ 582 if (page_mapped(page)) 583 return; 584 585 active = PageActive(page); 586 file = page_is_file_cache(page); 587 lru = page_lru_base_type(page); 588 589 del_page_from_lru_list(page, lruvec, lru + active); 590 ClearPageActive(page); 591 ClearPageReferenced(page); 592 add_page_to_lru_list(page, lruvec, lru); 593 594 if (PageWriteback(page) || PageDirty(page)) { 595 /* 596 * PG_reclaim could be raced with end_page_writeback 597 * It can make readahead confusing. But race window 598 * is _really_ small and it's non-critical problem. 599 */ 600 SetPageReclaim(page); 601 } else { 602 /* 603 * The page's writeback ends up during pagevec 604 * We moves tha page into tail of inactive. 605 */ 606 list_move_tail(&page->lru, &lruvec->lists[lru]); 607 __count_vm_event(PGROTATED); 608 } 609 610 if (active) 611 __count_vm_event(PGDEACTIVATE); 612 update_page_reclaim_stat(lruvec, file, 0); 613 } 614 615 /* 616 * Drain pages out of the cpu's pagevecs. 617 * Either "cpu" is the current CPU, and preemption has already been 618 * disabled; or "cpu" is being hot-unplugged, and is already dead. 619 */ 620 void lru_add_drain_cpu(int cpu) 621 { 622 struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu); 623 624 if (pagevec_count(pvec)) 625 __pagevec_lru_add(pvec); 626 627 pvec = &per_cpu(lru_rotate_pvecs, cpu); 628 if (pagevec_count(pvec)) { 629 unsigned long flags; 630 631 /* No harm done if a racing interrupt already did this */ 632 local_irq_save(flags); 633 pagevec_move_tail(pvec); 634 local_irq_restore(flags); 635 } 636 637 pvec = &per_cpu(lru_deactivate_pvecs, cpu); 638 if (pagevec_count(pvec)) 639 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); 640 641 activate_page_drain(cpu); 642 } 643 644 /** 645 * deactivate_page - forcefully deactivate a page 646 * @page: page to deactivate 647 * 648 * This function hints the VM that @page is a good reclaim candidate, 649 * for example if its invalidation fails due to the page being dirty 650 * or under writeback. 651 */ 652 void deactivate_page(struct page *page) 653 { 654 /* 655 * In a workload with many unevictable page such as mprotect, unevictable 656 * page deactivation for accelerating reclaim is pointless. 657 */ 658 if (PageUnevictable(page)) 659 return; 660 661 if (likely(get_page_unless_zero(page))) { 662 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); 663 664 if (!pagevec_add(pvec, page)) 665 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); 666 put_cpu_var(lru_deactivate_pvecs); 667 } 668 } 669 670 void lru_add_drain(void) 671 { 672 lru_add_drain_cpu(get_cpu()); 673 put_cpu(); 674 } 675 676 static void lru_add_drain_per_cpu(struct work_struct *dummy) 677 { 678 lru_add_drain(); 679 } 680 681 /* 682 * Returns 0 for success 683 */ 684 int lru_add_drain_all(void) 685 { 686 return schedule_on_each_cpu(lru_add_drain_per_cpu); 687 } 688 689 /* 690 * Batched page_cache_release(). Decrement the reference count on all the 691 * passed pages. If it fell to zero then remove the page from the LRU and 692 * free it. 693 * 694 * Avoid taking zone->lru_lock if possible, but if it is taken, retain it 695 * for the remainder of the operation. 696 * 697 * The locking in this function is against shrink_inactive_list(): we recheck 698 * the page count inside the lock to see whether shrink_inactive_list() 699 * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() 700 * will free it. 701 */ 702 void release_pages(struct page **pages, int nr, int cold) 703 { 704 int i; 705 LIST_HEAD(pages_to_free); 706 struct zone *zone = NULL; 707 struct lruvec *lruvec; 708 unsigned long uninitialized_var(flags); 709 710 for (i = 0; i < nr; i++) { 711 struct page *page = pages[i]; 712 713 if (unlikely(PageCompound(page))) { 714 if (zone) { 715 spin_unlock_irqrestore(&zone->lru_lock, flags); 716 zone = NULL; 717 } 718 put_compound_page(page); 719 continue; 720 } 721 722 if (!put_page_testzero(page)) 723 continue; 724 725 if (PageLRU(page)) { 726 struct zone *pagezone = page_zone(page); 727 728 if (pagezone != zone) { 729 if (zone) 730 spin_unlock_irqrestore(&zone->lru_lock, 731 flags); 732 zone = pagezone; 733 spin_lock_irqsave(&zone->lru_lock, flags); 734 } 735 736 lruvec = mem_cgroup_page_lruvec(page, zone); 737 VM_BUG_ON(!PageLRU(page)); 738 __ClearPageLRU(page); 739 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 740 } 741 742 /* Clear Active bit in case of parallel mark_page_accessed */ 743 ClearPageActive(page); 744 745 list_add(&page->lru, &pages_to_free); 746 } 747 if (zone) 748 spin_unlock_irqrestore(&zone->lru_lock, flags); 749 750 free_hot_cold_page_list(&pages_to_free, cold); 751 } 752 EXPORT_SYMBOL(release_pages); 753 754 /* 755 * The pages which we're about to release may be in the deferred lru-addition 756 * queues. That would prevent them from really being freed right now. That's 757 * OK from a correctness point of view but is inefficient - those pages may be 758 * cache-warm and we want to give them back to the page allocator ASAP. 759 * 760 * So __pagevec_release() will drain those queues here. __pagevec_lru_add() 761 * and __pagevec_lru_add_active() call release_pages() directly to avoid 762 * mutual recursion. 763 */ 764 void __pagevec_release(struct pagevec *pvec) 765 { 766 lru_add_drain(); 767 release_pages(pvec->pages, pagevec_count(pvec), pvec->cold); 768 pagevec_reinit(pvec); 769 } 770 EXPORT_SYMBOL(__pagevec_release); 771 772 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 773 /* used by __split_huge_page_refcount() */ 774 void lru_add_page_tail(struct page *page, struct page *page_tail, 775 struct lruvec *lruvec, struct list_head *list) 776 { 777 int uninitialized_var(active); 778 enum lru_list lru; 779 const int file = 0; 780 781 VM_BUG_ON(!PageHead(page)); 782 VM_BUG_ON(PageCompound(page_tail)); 783 VM_BUG_ON(PageLRU(page_tail)); 784 VM_BUG_ON(NR_CPUS != 1 && 785 !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); 786 787 if (!list) 788 SetPageLRU(page_tail); 789 790 if (page_evictable(page_tail)) { 791 if (PageActive(page)) { 792 SetPageActive(page_tail); 793 active = 1; 794 lru = LRU_ACTIVE_ANON; 795 } else { 796 active = 0; 797 lru = LRU_INACTIVE_ANON; 798 } 799 } else { 800 SetPageUnevictable(page_tail); 801 lru = LRU_UNEVICTABLE; 802 } 803 804 if (likely(PageLRU(page))) 805 list_add_tail(&page_tail->lru, &page->lru); 806 else if (list) { 807 /* page reclaim is reclaiming a huge page */ 808 get_page(page_tail); 809 list_add_tail(&page_tail->lru, list); 810 } else { 811 struct list_head *list_head; 812 /* 813 * Head page has not yet been counted, as an hpage, 814 * so we must account for each subpage individually. 815 * 816 * Use the standard add function to put page_tail on the list, 817 * but then correct its position so they all end up in order. 818 */ 819 add_page_to_lru_list(page_tail, lruvec, lru); 820 list_head = page_tail->lru.prev; 821 list_move_tail(&page_tail->lru, list_head); 822 } 823 824 if (!PageUnevictable(page)) 825 update_page_reclaim_stat(lruvec, file, active); 826 } 827 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 828 829 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, 830 void *arg) 831 { 832 int file = page_is_file_cache(page); 833 int active = PageActive(page); 834 enum lru_list lru = page_lru(page); 835 836 VM_BUG_ON(PageUnevictable(page)); 837 VM_BUG_ON(PageLRU(page)); 838 839 SetPageLRU(page); 840 add_page_to_lru_list(page, lruvec, lru); 841 update_page_reclaim_stat(lruvec, file, active); 842 trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); 843 } 844 845 /* 846 * Add the passed pages to the LRU, then drop the caller's refcount 847 * on them. Reinitialises the caller's pagevec. 848 */ 849 void __pagevec_lru_add(struct pagevec *pvec) 850 { 851 pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, NULL); 852 } 853 EXPORT_SYMBOL(__pagevec_lru_add); 854 855 /** 856 * pagevec_lookup - gang pagecache lookup 857 * @pvec: Where the resulting pages are placed 858 * @mapping: The address_space to search 859 * @start: The starting page index 860 * @nr_pages: The maximum number of pages 861 * 862 * pagevec_lookup() will search for and return a group of up to @nr_pages pages 863 * in the mapping. The pages are placed in @pvec. pagevec_lookup() takes a 864 * reference against the pages in @pvec. 865 * 866 * The search returns a group of mapping-contiguous pages with ascending 867 * indexes. There may be holes in the indices due to not-present pages. 868 * 869 * pagevec_lookup() returns the number of pages which were found. 870 */ 871 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 872 pgoff_t start, unsigned nr_pages) 873 { 874 pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages); 875 return pagevec_count(pvec); 876 } 877 EXPORT_SYMBOL(pagevec_lookup); 878 879 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, 880 pgoff_t *index, int tag, unsigned nr_pages) 881 { 882 pvec->nr = find_get_pages_tag(mapping, index, tag, 883 nr_pages, pvec->pages); 884 return pagevec_count(pvec); 885 } 886 EXPORT_SYMBOL(pagevec_lookup_tag); 887 888 /* 889 * Perform any setup for the swap system 890 */ 891 void __init swap_setup(void) 892 { 893 unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); 894 #ifdef CONFIG_SWAP 895 int i; 896 897 bdi_init(swapper_spaces[0].backing_dev_info); 898 for (i = 0; i < MAX_SWAPFILES; i++) { 899 spin_lock_init(&swapper_spaces[i].tree_lock); 900 INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); 901 } 902 #endif 903 904 /* Use a smaller cluster for small-memory machines */ 905 if (megs < 16) 906 page_cluster = 2; 907 else 908 page_cluster = 3; 909 /* 910 * Right now other parts of the system means that we 911 * _really_ don't want to cluster much more 912 */ 913 } 914