1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/power/snapshot.c 4 * 5 * This file provides system snapshot/restore functionality for swsusp. 6 * 7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 9 */ 10 11 #define pr_fmt(fmt) "PM: hibernation: " fmt 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/memblock.h> 25 #include <linux/nmi.h> 26 #include <linux/syscalls.h> 27 #include <linux/console.h> 28 #include <linux/highmem.h> 29 #include <linux/list.h> 30 #include <linux/slab.h> 31 #include <linux/compiler.h> 32 #include <linux/ktime.h> 33 #include <linux/set_memory.h> 34 35 #include <linux/uaccess.h> 36 #include <asm/mmu_context.h> 37 #include <asm/pgtable.h> 38 #include <asm/tlbflush.h> 39 #include <asm/io.h> 40 41 #include "power.h" 42 43 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) 44 static bool hibernate_restore_protection; 45 static bool hibernate_restore_protection_active; 46 47 void enable_restore_image_protection(void) 48 { 49 hibernate_restore_protection = true; 50 } 51 52 static inline void hibernate_restore_protection_begin(void) 53 { 54 hibernate_restore_protection_active = hibernate_restore_protection; 55 } 56 57 static inline void hibernate_restore_protection_end(void) 58 { 59 hibernate_restore_protection_active = false; 60 } 61 62 static inline void hibernate_restore_protect_page(void *page_address) 63 { 64 if (hibernate_restore_protection_active) 65 set_memory_ro((unsigned long)page_address, 1); 66 } 67 68 static inline void hibernate_restore_unprotect_page(void *page_address) 69 { 70 if (hibernate_restore_protection_active) 71 set_memory_rw((unsigned long)page_address, 1); 72 } 73 #else 74 static inline void hibernate_restore_protection_begin(void) {} 75 static inline void hibernate_restore_protection_end(void) {} 76 static inline void hibernate_restore_protect_page(void *page_address) {} 77 static inline void hibernate_restore_unprotect_page(void *page_address) {} 78 #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ 79 80 static int swsusp_page_is_free(struct page *); 81 static void swsusp_set_page_forbidden(struct page *); 82 static void swsusp_unset_page_forbidden(struct page *); 83 84 /* 85 * Number of bytes to reserve for memory allocations made by device drivers 86 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 87 * cause image creation to fail (tunable via /sys/power/reserved_size). 88 */ 89 unsigned long reserved_size; 90 91 void __init hibernate_reserved_size_init(void) 92 { 93 reserved_size = SPARE_PAGES * PAGE_SIZE; 94 } 95 96 /* 97 * Preferred image size in bytes (tunable via /sys/power/image_size). 98 * When it is set to N, swsusp will do its best to ensure the image 99 * size will not exceed N bytes, but if that is impossible, it will 100 * try to create the smallest image possible. 101 */ 102 unsigned long image_size; 103 104 void __init hibernate_image_size_init(void) 105 { 106 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; 107 } 108 109 /* 110 * List of PBEs needed for restoring the pages that were allocated before 111 * the suspend and included in the suspend image, but have also been 112 * allocated by the "resume" kernel, so their contents cannot be written 113 * directly to their "original" page frames. 114 */ 115 struct pbe *restore_pblist; 116 117 /* struct linked_page is used to build chains of pages */ 118 119 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 120 121 struct linked_page { 122 struct linked_page *next; 123 char data[LINKED_PAGE_DATA_SIZE]; 124 } __packed; 125 126 /* 127 * List of "safe" pages (ie. pages that were not used by the image kernel 128 * before hibernation) that may be used as temporary storage for image kernel 129 * memory contents. 130 */ 131 static struct linked_page *safe_pages_list; 132 133 /* Pointer to an auxiliary buffer (1 page) */ 134 static void *buffer; 135 136 #define PG_ANY 0 137 #define PG_SAFE 1 138 #define PG_UNSAFE_CLEAR 1 139 #define PG_UNSAFE_KEEP 0 140 141 static unsigned int allocated_unsafe_pages; 142 143 /** 144 * get_image_page - Allocate a page for a hibernation image. 145 * @gfp_mask: GFP mask for the allocation. 146 * @safe_needed: Get pages that were not used before hibernation (restore only) 147 * 148 * During image restoration, for storing the PBE list and the image data, we can 149 * only use memory pages that do not conflict with the pages used before 150 * hibernation. The "unsafe" pages have PageNosaveFree set and we count them 151 * using allocated_unsafe_pages. 152 * 153 * Each allocated image page is marked as PageNosave and PageNosaveFree so that 154 * swsusp_free() can release it. 155 */ 156 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 157 { 158 void *res; 159 160 res = (void *)get_zeroed_page(gfp_mask); 161 if (safe_needed) 162 while (res && swsusp_page_is_free(virt_to_page(res))) { 163 /* The page is unsafe, mark it for swsusp_free() */ 164 swsusp_set_page_forbidden(virt_to_page(res)); 165 allocated_unsafe_pages++; 166 res = (void *)get_zeroed_page(gfp_mask); 167 } 168 if (res) { 169 swsusp_set_page_forbidden(virt_to_page(res)); 170 swsusp_set_page_free(virt_to_page(res)); 171 } 172 return res; 173 } 174 175 static void *__get_safe_page(gfp_t gfp_mask) 176 { 177 if (safe_pages_list) { 178 void *ret = safe_pages_list; 179 180 safe_pages_list = safe_pages_list->next; 181 memset(ret, 0, PAGE_SIZE); 182 return ret; 183 } 184 return get_image_page(gfp_mask, PG_SAFE); 185 } 186 187 unsigned long get_safe_page(gfp_t gfp_mask) 188 { 189 return (unsigned long)__get_safe_page(gfp_mask); 190 } 191 192 static struct page *alloc_image_page(gfp_t gfp_mask) 193 { 194 struct page *page; 195 196 page = alloc_page(gfp_mask); 197 if (page) { 198 swsusp_set_page_forbidden(page); 199 swsusp_set_page_free(page); 200 } 201 return page; 202 } 203 204 static void recycle_safe_page(void *page_address) 205 { 206 struct linked_page *lp = page_address; 207 208 lp->next = safe_pages_list; 209 safe_pages_list = lp; 210 } 211 212 /** 213 * free_image_page - Free a page allocated for hibernation image. 214 * @addr: Address of the page to free. 215 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. 216 * 217 * The page to free should have been allocated by get_image_page() (page flags 218 * set by it are affected). 219 */ 220 static inline void free_image_page(void *addr, int clear_nosave_free) 221 { 222 struct page *page; 223 224 BUG_ON(!virt_addr_valid(addr)); 225 226 page = virt_to_page(addr); 227 228 swsusp_unset_page_forbidden(page); 229 if (clear_nosave_free) 230 swsusp_unset_page_free(page); 231 232 __free_page(page); 233 } 234 235 static inline void free_list_of_pages(struct linked_page *list, 236 int clear_page_nosave) 237 { 238 while (list) { 239 struct linked_page *lp = list->next; 240 241 free_image_page(list, clear_page_nosave); 242 list = lp; 243 } 244 } 245 246 /* 247 * struct chain_allocator is used for allocating small objects out of 248 * a linked list of pages called 'the chain'. 249 * 250 * The chain grows each time when there is no room for a new object in 251 * the current page. The allocated objects cannot be freed individually. 252 * It is only possible to free them all at once, by freeing the entire 253 * chain. 254 * 255 * NOTE: The chain allocator may be inefficient if the allocated objects 256 * are not much smaller than PAGE_SIZE. 257 */ 258 struct chain_allocator { 259 struct linked_page *chain; /* the chain */ 260 unsigned int used_space; /* total size of objects allocated out 261 of the current page */ 262 gfp_t gfp_mask; /* mask for allocating pages */ 263 int safe_needed; /* if set, only "safe" pages are allocated */ 264 }; 265 266 static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, 267 int safe_needed) 268 { 269 ca->chain = NULL; 270 ca->used_space = LINKED_PAGE_DATA_SIZE; 271 ca->gfp_mask = gfp_mask; 272 ca->safe_needed = safe_needed; 273 } 274 275 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 276 { 277 void *ret; 278 279 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 280 struct linked_page *lp; 281 282 lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : 283 get_image_page(ca->gfp_mask, PG_ANY); 284 if (!lp) 285 return NULL; 286 287 lp->next = ca->chain; 288 ca->chain = lp; 289 ca->used_space = 0; 290 } 291 ret = ca->chain->data + ca->used_space; 292 ca->used_space += size; 293 return ret; 294 } 295 296 /** 297 * Data types related to memory bitmaps. 298 * 299 * Memory bitmap is a structure consiting of many linked lists of 300 * objects. The main list's elements are of type struct zone_bitmap 301 * and each of them corresonds to one zone. For each zone bitmap 302 * object there is a list of objects of type struct bm_block that 303 * represent each blocks of bitmap in which information is stored. 304 * 305 * struct memory_bitmap contains a pointer to the main list of zone 306 * bitmap objects, a struct bm_position used for browsing the bitmap, 307 * and a pointer to the list of pages used for allocating all of the 308 * zone bitmap objects and bitmap block objects. 309 * 310 * NOTE: It has to be possible to lay out the bitmap in memory 311 * using only allocations of order 0. Additionally, the bitmap is 312 * designed to work with arbitrary number of zones (this is over the 313 * top for now, but let's avoid making unnecessary assumptions ;-). 314 * 315 * struct zone_bitmap contains a pointer to a list of bitmap block 316 * objects and a pointer to the bitmap block object that has been 317 * most recently used for setting bits. Additionally, it contains the 318 * PFNs that correspond to the start and end of the represented zone. 319 * 320 * struct bm_block contains a pointer to the memory page in which 321 * information is stored (in the form of a block of bitmap) 322 * It also contains the pfns that correspond to the start and end of 323 * the represented memory area. 324 * 325 * The memory bitmap is organized as a radix tree to guarantee fast random 326 * access to the bits. There is one radix tree for each zone (as returned 327 * from create_mem_extents). 328 * 329 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 330 * two linked lists for the nodes of the tree, one for the inner nodes and 331 * one for the leave nodes. The linked leave nodes are used for fast linear 332 * access of the memory bitmap. 333 * 334 * The struct rtree_node represents one node of the radix tree. 335 */ 336 337 #define BM_END_OF_MAP (~0UL) 338 339 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 340 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 341 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 342 343 /* 344 * struct rtree_node is a wrapper struct to link the nodes 345 * of the rtree together for easy linear iteration over 346 * bits and easy freeing 347 */ 348 struct rtree_node { 349 struct list_head list; 350 unsigned long *data; 351 }; 352 353 /* 354 * struct mem_zone_bm_rtree represents a bitmap used for one 355 * populated memory zone. 356 */ 357 struct mem_zone_bm_rtree { 358 struct list_head list; /* Link Zones together */ 359 struct list_head nodes; /* Radix Tree inner nodes */ 360 struct list_head leaves; /* Radix Tree leaves */ 361 unsigned long start_pfn; /* Zone start page frame */ 362 unsigned long end_pfn; /* Zone end page frame + 1 */ 363 struct rtree_node *rtree; /* Radix Tree Root */ 364 int levels; /* Number of Radix Tree Levels */ 365 unsigned int blocks; /* Number of Bitmap Blocks */ 366 }; 367 368 /* strcut bm_position is used for browsing memory bitmaps */ 369 370 struct bm_position { 371 struct mem_zone_bm_rtree *zone; 372 struct rtree_node *node; 373 unsigned long node_pfn; 374 int node_bit; 375 }; 376 377 struct memory_bitmap { 378 struct list_head zones; 379 struct linked_page *p_list; /* list of pages used to store zone 380 bitmap objects and bitmap block 381 objects */ 382 struct bm_position cur; /* most recently used bit position */ 383 }; 384 385 /* Functions that operate on memory bitmaps */ 386 387 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 388 #if BITS_PER_LONG == 32 389 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 390 #else 391 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 392 #endif 393 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 394 395 /** 396 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 397 * 398 * This function is used to allocate inner nodes as well as the 399 * leave nodes of the radix tree. It also adds the node to the 400 * corresponding linked list passed in by the *list parameter. 401 */ 402 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 403 struct chain_allocator *ca, 404 struct list_head *list) 405 { 406 struct rtree_node *node; 407 408 node = chain_alloc(ca, sizeof(struct rtree_node)); 409 if (!node) 410 return NULL; 411 412 node->data = get_image_page(gfp_mask, safe_needed); 413 if (!node->data) 414 return NULL; 415 416 list_add_tail(&node->list, list); 417 418 return node; 419 } 420 421 /** 422 * add_rtree_block - Add a new leave node to the radix tree. 423 * 424 * The leave nodes need to be allocated in order to keep the leaves 425 * linked list in order. This is guaranteed by the zone->blocks 426 * counter. 427 */ 428 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 429 int safe_needed, struct chain_allocator *ca) 430 { 431 struct rtree_node *node, *block, **dst; 432 unsigned int levels_needed, block_nr; 433 int i; 434 435 block_nr = zone->blocks; 436 levels_needed = 0; 437 438 /* How many levels do we need for this block nr? */ 439 while (block_nr) { 440 levels_needed += 1; 441 block_nr >>= BM_RTREE_LEVEL_SHIFT; 442 } 443 444 /* Make sure the rtree has enough levels */ 445 for (i = zone->levels; i < levels_needed; i++) { 446 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 447 &zone->nodes); 448 if (!node) 449 return -ENOMEM; 450 451 node->data[0] = (unsigned long)zone->rtree; 452 zone->rtree = node; 453 zone->levels += 1; 454 } 455 456 /* Allocate new block */ 457 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 458 if (!block) 459 return -ENOMEM; 460 461 /* Now walk the rtree to insert the block */ 462 node = zone->rtree; 463 dst = &zone->rtree; 464 block_nr = zone->blocks; 465 for (i = zone->levels; i > 0; i--) { 466 int index; 467 468 if (!node) { 469 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 470 &zone->nodes); 471 if (!node) 472 return -ENOMEM; 473 *dst = node; 474 } 475 476 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 477 index &= BM_RTREE_LEVEL_MASK; 478 dst = (struct rtree_node **)&((*dst)->data[index]); 479 node = *dst; 480 } 481 482 zone->blocks += 1; 483 *dst = block; 484 485 return 0; 486 } 487 488 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 489 int clear_nosave_free); 490 491 /** 492 * create_zone_bm_rtree - Create a radix tree for one zone. 493 * 494 * Allocated the mem_zone_bm_rtree structure and initializes it. 495 * This function also allocated and builds the radix tree for the 496 * zone. 497 */ 498 static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, 499 int safe_needed, 500 struct chain_allocator *ca, 501 unsigned long start, 502 unsigned long end) 503 { 504 struct mem_zone_bm_rtree *zone; 505 unsigned int i, nr_blocks; 506 unsigned long pages; 507 508 pages = end - start; 509 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 510 if (!zone) 511 return NULL; 512 513 INIT_LIST_HEAD(&zone->nodes); 514 INIT_LIST_HEAD(&zone->leaves); 515 zone->start_pfn = start; 516 zone->end_pfn = end; 517 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 518 519 for (i = 0; i < nr_blocks; i++) { 520 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 521 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 522 return NULL; 523 } 524 } 525 526 return zone; 527 } 528 529 /** 530 * free_zone_bm_rtree - Free the memory of the radix tree. 531 * 532 * Free all node pages of the radix tree. The mem_zone_bm_rtree 533 * structure itself is not freed here nor are the rtree_node 534 * structs. 535 */ 536 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 537 int clear_nosave_free) 538 { 539 struct rtree_node *node; 540 541 list_for_each_entry(node, &zone->nodes, list) 542 free_image_page(node->data, clear_nosave_free); 543 544 list_for_each_entry(node, &zone->leaves, list) 545 free_image_page(node->data, clear_nosave_free); 546 } 547 548 static void memory_bm_position_reset(struct memory_bitmap *bm) 549 { 550 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 551 list); 552 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 553 struct rtree_node, list); 554 bm->cur.node_pfn = 0; 555 bm->cur.node_bit = 0; 556 } 557 558 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 559 560 struct mem_extent { 561 struct list_head hook; 562 unsigned long start; 563 unsigned long end; 564 }; 565 566 /** 567 * free_mem_extents - Free a list of memory extents. 568 * @list: List of extents to free. 569 */ 570 static void free_mem_extents(struct list_head *list) 571 { 572 struct mem_extent *ext, *aux; 573 574 list_for_each_entry_safe(ext, aux, list, hook) { 575 list_del(&ext->hook); 576 kfree(ext); 577 } 578 } 579 580 /** 581 * create_mem_extents - Create a list of memory extents. 582 * @list: List to put the extents into. 583 * @gfp_mask: Mask to use for memory allocations. 584 * 585 * The extents represent contiguous ranges of PFNs. 586 */ 587 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 588 { 589 struct zone *zone; 590 591 INIT_LIST_HEAD(list); 592 593 for_each_populated_zone(zone) { 594 unsigned long zone_start, zone_end; 595 struct mem_extent *ext, *cur, *aux; 596 597 zone_start = zone->zone_start_pfn; 598 zone_end = zone_end_pfn(zone); 599 600 list_for_each_entry(ext, list, hook) 601 if (zone_start <= ext->end) 602 break; 603 604 if (&ext->hook == list || zone_end < ext->start) { 605 /* New extent is necessary */ 606 struct mem_extent *new_ext; 607 608 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 609 if (!new_ext) { 610 free_mem_extents(list); 611 return -ENOMEM; 612 } 613 new_ext->start = zone_start; 614 new_ext->end = zone_end; 615 list_add_tail(&new_ext->hook, &ext->hook); 616 continue; 617 } 618 619 /* Merge this zone's range of PFNs with the existing one */ 620 if (zone_start < ext->start) 621 ext->start = zone_start; 622 if (zone_end > ext->end) 623 ext->end = zone_end; 624 625 /* More merging may be possible */ 626 cur = ext; 627 list_for_each_entry_safe_continue(cur, aux, list, hook) { 628 if (zone_end < cur->start) 629 break; 630 if (zone_end < cur->end) 631 ext->end = cur->end; 632 list_del(&cur->hook); 633 kfree(cur); 634 } 635 } 636 637 return 0; 638 } 639 640 /** 641 * memory_bm_create - Allocate memory for a memory bitmap. 642 */ 643 static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, 644 int safe_needed) 645 { 646 struct chain_allocator ca; 647 struct list_head mem_extents; 648 struct mem_extent *ext; 649 int error; 650 651 chain_init(&ca, gfp_mask, safe_needed); 652 INIT_LIST_HEAD(&bm->zones); 653 654 error = create_mem_extents(&mem_extents, gfp_mask); 655 if (error) 656 return error; 657 658 list_for_each_entry(ext, &mem_extents, hook) { 659 struct mem_zone_bm_rtree *zone; 660 661 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 662 ext->start, ext->end); 663 if (!zone) { 664 error = -ENOMEM; 665 goto Error; 666 } 667 list_add_tail(&zone->list, &bm->zones); 668 } 669 670 bm->p_list = ca.chain; 671 memory_bm_position_reset(bm); 672 Exit: 673 free_mem_extents(&mem_extents); 674 return error; 675 676 Error: 677 bm->p_list = ca.chain; 678 memory_bm_free(bm, PG_UNSAFE_CLEAR); 679 goto Exit; 680 } 681 682 /** 683 * memory_bm_free - Free memory occupied by the memory bitmap. 684 * @bm: Memory bitmap. 685 */ 686 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 687 { 688 struct mem_zone_bm_rtree *zone; 689 690 list_for_each_entry(zone, &bm->zones, list) 691 free_zone_bm_rtree(zone, clear_nosave_free); 692 693 free_list_of_pages(bm->p_list, clear_nosave_free); 694 695 INIT_LIST_HEAD(&bm->zones); 696 } 697 698 /** 699 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. 700 * 701 * Find the bit in memory bitmap @bm that corresponds to the given PFN. 702 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. 703 * 704 * Walk the radix tree to find the page containing the bit that represents @pfn 705 * and return the position of the bit in @addr and @bit_nr. 706 */ 707 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 708 void **addr, unsigned int *bit_nr) 709 { 710 struct mem_zone_bm_rtree *curr, *zone; 711 struct rtree_node *node; 712 int i, block_nr; 713 714 zone = bm->cur.zone; 715 716 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 717 goto zone_found; 718 719 zone = NULL; 720 721 /* Find the right zone */ 722 list_for_each_entry(curr, &bm->zones, list) { 723 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 724 zone = curr; 725 break; 726 } 727 } 728 729 if (!zone) 730 return -EFAULT; 731 732 zone_found: 733 /* 734 * We have found the zone. Now walk the radix tree to find the leaf node 735 * for our PFN. 736 */ 737 738 /* 739 * If the zone we wish to scan is the the current zone and the 740 * pfn falls into the current node then we do not need to walk 741 * the tree. 742 */ 743 node = bm->cur.node; 744 if (zone == bm->cur.zone && 745 ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 746 goto node_found; 747 748 node = zone->rtree; 749 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 750 751 for (i = zone->levels; i > 0; i--) { 752 int index; 753 754 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 755 index &= BM_RTREE_LEVEL_MASK; 756 BUG_ON(node->data[index] == 0); 757 node = (struct rtree_node *)node->data[index]; 758 } 759 760 node_found: 761 /* Update last position */ 762 bm->cur.zone = zone; 763 bm->cur.node = node; 764 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 765 766 /* Set return values */ 767 *addr = node->data; 768 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 769 770 return 0; 771 } 772 773 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 774 { 775 void *addr; 776 unsigned int bit; 777 int error; 778 779 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 780 BUG_ON(error); 781 set_bit(bit, addr); 782 } 783 784 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 785 { 786 void *addr; 787 unsigned int bit; 788 int error; 789 790 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 791 if (!error) 792 set_bit(bit, addr); 793 794 return error; 795 } 796 797 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 798 { 799 void *addr; 800 unsigned int bit; 801 int error; 802 803 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 804 BUG_ON(error); 805 clear_bit(bit, addr); 806 } 807 808 static void memory_bm_clear_current(struct memory_bitmap *bm) 809 { 810 int bit; 811 812 bit = max(bm->cur.node_bit - 1, 0); 813 clear_bit(bit, bm->cur.node->data); 814 } 815 816 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 817 { 818 void *addr; 819 unsigned int bit; 820 int error; 821 822 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 823 BUG_ON(error); 824 return test_bit(bit, addr); 825 } 826 827 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 828 { 829 void *addr; 830 unsigned int bit; 831 832 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 833 } 834 835 /* 836 * rtree_next_node - Jump to the next leaf node. 837 * 838 * Set the position to the beginning of the next node in the 839 * memory bitmap. This is either the next node in the current 840 * zone's radix tree or the first node in the radix tree of the 841 * next zone. 842 * 843 * Return true if there is a next node, false otherwise. 844 */ 845 static bool rtree_next_node(struct memory_bitmap *bm) 846 { 847 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { 848 bm->cur.node = list_entry(bm->cur.node->list.next, 849 struct rtree_node, list); 850 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 851 bm->cur.node_bit = 0; 852 touch_softlockup_watchdog(); 853 return true; 854 } 855 856 /* No more nodes, goto next zone */ 857 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { 858 bm->cur.zone = list_entry(bm->cur.zone->list.next, 859 struct mem_zone_bm_rtree, list); 860 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 861 struct rtree_node, list); 862 bm->cur.node_pfn = 0; 863 bm->cur.node_bit = 0; 864 return true; 865 } 866 867 /* No more zones */ 868 return false; 869 } 870 871 /** 872 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. 873 * @bm: Memory bitmap. 874 * 875 * Starting from the last returned position this function searches for the next 876 * set bit in @bm and returns the PFN represented by it. If no more bits are 877 * set, BM_END_OF_MAP is returned. 878 * 879 * It is required to run memory_bm_position_reset() before the first call to 880 * this function for the given memory bitmap. 881 */ 882 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 883 { 884 unsigned long bits, pfn, pages; 885 int bit; 886 887 do { 888 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 889 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 890 bit = find_next_bit(bm->cur.node->data, bits, 891 bm->cur.node_bit); 892 if (bit < bits) { 893 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 894 bm->cur.node_bit = bit + 1; 895 return pfn; 896 } 897 } while (rtree_next_node(bm)); 898 899 return BM_END_OF_MAP; 900 } 901 902 /* 903 * This structure represents a range of page frames the contents of which 904 * should not be saved during hibernation. 905 */ 906 struct nosave_region { 907 struct list_head list; 908 unsigned long start_pfn; 909 unsigned long end_pfn; 910 }; 911 912 static LIST_HEAD(nosave_regions); 913 914 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) 915 { 916 struct rtree_node *node; 917 918 list_for_each_entry(node, &zone->nodes, list) 919 recycle_safe_page(node->data); 920 921 list_for_each_entry(node, &zone->leaves, list) 922 recycle_safe_page(node->data); 923 } 924 925 static void memory_bm_recycle(struct memory_bitmap *bm) 926 { 927 struct mem_zone_bm_rtree *zone; 928 struct linked_page *p_list; 929 930 list_for_each_entry(zone, &bm->zones, list) 931 recycle_zone_bm_rtree(zone); 932 933 p_list = bm->p_list; 934 while (p_list) { 935 struct linked_page *lp = p_list; 936 937 p_list = lp->next; 938 recycle_safe_page(lp); 939 } 940 } 941 942 /** 943 * register_nosave_region - Register a region of unsaveable memory. 944 * 945 * Register a range of page frames the contents of which should not be saved 946 * during hibernation (to be used in the early initialization code). 947 */ 948 void __init __register_nosave_region(unsigned long start_pfn, 949 unsigned long end_pfn, int use_kmalloc) 950 { 951 struct nosave_region *region; 952 953 if (start_pfn >= end_pfn) 954 return; 955 956 if (!list_empty(&nosave_regions)) { 957 /* Try to extend the previous region (they should be sorted) */ 958 region = list_entry(nosave_regions.prev, 959 struct nosave_region, list); 960 if (region->end_pfn == start_pfn) { 961 region->end_pfn = end_pfn; 962 goto Report; 963 } 964 } 965 if (use_kmalloc) { 966 /* During init, this shouldn't fail */ 967 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 968 BUG_ON(!region); 969 } else { 970 /* This allocation cannot fail */ 971 region = memblock_alloc(sizeof(struct nosave_region), 972 SMP_CACHE_BYTES); 973 if (!region) 974 panic("%s: Failed to allocate %zu bytes\n", __func__, 975 sizeof(struct nosave_region)); 976 } 977 region->start_pfn = start_pfn; 978 region->end_pfn = end_pfn; 979 list_add_tail(®ion->list, &nosave_regions); 980 Report: 981 pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n", 982 (unsigned long long) start_pfn << PAGE_SHIFT, 983 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 984 } 985 986 /* 987 * Set bits in this map correspond to the page frames the contents of which 988 * should not be saved during the suspend. 989 */ 990 static struct memory_bitmap *forbidden_pages_map; 991 992 /* Set bits in this map correspond to free page frames. */ 993 static struct memory_bitmap *free_pages_map; 994 995 /* 996 * Each page frame allocated for creating the image is marked by setting the 997 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 998 */ 999 1000 void swsusp_set_page_free(struct page *page) 1001 { 1002 if (free_pages_map) 1003 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 1004 } 1005 1006 static int swsusp_page_is_free(struct page *page) 1007 { 1008 return free_pages_map ? 1009 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 1010 } 1011 1012 void swsusp_unset_page_free(struct page *page) 1013 { 1014 if (free_pages_map) 1015 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 1016 } 1017 1018 static void swsusp_set_page_forbidden(struct page *page) 1019 { 1020 if (forbidden_pages_map) 1021 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 1022 } 1023 1024 int swsusp_page_is_forbidden(struct page *page) 1025 { 1026 return forbidden_pages_map ? 1027 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 1028 } 1029 1030 static void swsusp_unset_page_forbidden(struct page *page) 1031 { 1032 if (forbidden_pages_map) 1033 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 1034 } 1035 1036 /** 1037 * mark_nosave_pages - Mark pages that should not be saved. 1038 * @bm: Memory bitmap. 1039 * 1040 * Set the bits in @bm that correspond to the page frames the contents of which 1041 * should not be saved. 1042 */ 1043 static void mark_nosave_pages(struct memory_bitmap *bm) 1044 { 1045 struct nosave_region *region; 1046 1047 if (list_empty(&nosave_regions)) 1048 return; 1049 1050 list_for_each_entry(region, &nosave_regions, list) { 1051 unsigned long pfn; 1052 1053 pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", 1054 (unsigned long long) region->start_pfn << PAGE_SHIFT, 1055 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 1056 - 1); 1057 1058 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 1059 if (pfn_valid(pfn)) { 1060 /* 1061 * It is safe to ignore the result of 1062 * mem_bm_set_bit_check() here, since we won't 1063 * touch the PFNs for which the error is 1064 * returned anyway. 1065 */ 1066 mem_bm_set_bit_check(bm, pfn); 1067 } 1068 } 1069 } 1070 1071 /** 1072 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. 1073 * 1074 * Create bitmaps needed for marking page frames that should not be saved and 1075 * free page frames. The forbidden_pages_map and free_pages_map pointers are 1076 * only modified if everything goes well, because we don't want the bits to be 1077 * touched before both bitmaps are set up. 1078 */ 1079 int create_basic_memory_bitmaps(void) 1080 { 1081 struct memory_bitmap *bm1, *bm2; 1082 int error = 0; 1083 1084 if (forbidden_pages_map && free_pages_map) 1085 return 0; 1086 else 1087 BUG_ON(forbidden_pages_map || free_pages_map); 1088 1089 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1090 if (!bm1) 1091 return -ENOMEM; 1092 1093 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1094 if (error) 1095 goto Free_first_object; 1096 1097 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1098 if (!bm2) 1099 goto Free_first_bitmap; 1100 1101 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1102 if (error) 1103 goto Free_second_object; 1104 1105 forbidden_pages_map = bm1; 1106 free_pages_map = bm2; 1107 mark_nosave_pages(forbidden_pages_map); 1108 1109 pr_debug("Basic memory bitmaps created\n"); 1110 1111 return 0; 1112 1113 Free_second_object: 1114 kfree(bm2); 1115 Free_first_bitmap: 1116 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1117 Free_first_object: 1118 kfree(bm1); 1119 return -ENOMEM; 1120 } 1121 1122 /** 1123 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. 1124 * 1125 * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The 1126 * auxiliary pointers are necessary so that the bitmaps themselves are not 1127 * referred to while they are being freed. 1128 */ 1129 void free_basic_memory_bitmaps(void) 1130 { 1131 struct memory_bitmap *bm1, *bm2; 1132 1133 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1134 return; 1135 1136 bm1 = forbidden_pages_map; 1137 bm2 = free_pages_map; 1138 forbidden_pages_map = NULL; 1139 free_pages_map = NULL; 1140 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1141 kfree(bm1); 1142 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1143 kfree(bm2); 1144 1145 pr_debug("Basic memory bitmaps freed\n"); 1146 } 1147 1148 void clear_free_pages(void) 1149 { 1150 struct memory_bitmap *bm = free_pages_map; 1151 unsigned long pfn; 1152 1153 if (WARN_ON(!(free_pages_map))) 1154 return; 1155 1156 if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { 1157 memory_bm_position_reset(bm); 1158 pfn = memory_bm_next_pfn(bm); 1159 while (pfn != BM_END_OF_MAP) { 1160 if (pfn_valid(pfn)) 1161 clear_highpage(pfn_to_page(pfn)); 1162 1163 pfn = memory_bm_next_pfn(bm); 1164 } 1165 memory_bm_position_reset(bm); 1166 pr_info("free pages cleared after restore\n"); 1167 } 1168 } 1169 1170 /** 1171 * snapshot_additional_pages - Estimate the number of extra pages needed. 1172 * @zone: Memory zone to carry out the computation for. 1173 * 1174 * Estimate the number of additional pages needed for setting up a hibernation 1175 * image data structures for @zone (usually, the returned value is greater than 1176 * the exact number). 1177 */ 1178 unsigned int snapshot_additional_pages(struct zone *zone) 1179 { 1180 unsigned int rtree, nodes; 1181 1182 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1183 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1184 LINKED_PAGE_DATA_SIZE); 1185 while (nodes > 1) { 1186 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1187 rtree += nodes; 1188 } 1189 1190 return 2 * rtree; 1191 } 1192 1193 #ifdef CONFIG_HIGHMEM 1194 /** 1195 * count_free_highmem_pages - Compute the total number of free highmem pages. 1196 * 1197 * The returned number is system-wide. 1198 */ 1199 static unsigned int count_free_highmem_pages(void) 1200 { 1201 struct zone *zone; 1202 unsigned int cnt = 0; 1203 1204 for_each_populated_zone(zone) 1205 if (is_highmem(zone)) 1206 cnt += zone_page_state(zone, NR_FREE_PAGES); 1207 1208 return cnt; 1209 } 1210 1211 /** 1212 * saveable_highmem_page - Check if a highmem page is saveable. 1213 * 1214 * Determine whether a highmem page should be included in a hibernation image. 1215 * 1216 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1217 * and it isn't part of a free chunk of pages. 1218 */ 1219 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1220 { 1221 struct page *page; 1222 1223 if (!pfn_valid(pfn)) 1224 return NULL; 1225 1226 page = pfn_to_online_page(pfn); 1227 if (!page || page_zone(page) != zone) 1228 return NULL; 1229 1230 BUG_ON(!PageHighMem(page)); 1231 1232 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1233 return NULL; 1234 1235 if (PageReserved(page) || PageOffline(page)) 1236 return NULL; 1237 1238 if (page_is_guard(page)) 1239 return NULL; 1240 1241 return page; 1242 } 1243 1244 /** 1245 * count_highmem_pages - Compute the total number of saveable highmem pages. 1246 */ 1247 static unsigned int count_highmem_pages(void) 1248 { 1249 struct zone *zone; 1250 unsigned int n = 0; 1251 1252 for_each_populated_zone(zone) { 1253 unsigned long pfn, max_zone_pfn; 1254 1255 if (!is_highmem(zone)) 1256 continue; 1257 1258 mark_free_pages(zone); 1259 max_zone_pfn = zone_end_pfn(zone); 1260 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1261 if (saveable_highmem_page(zone, pfn)) 1262 n++; 1263 } 1264 return n; 1265 } 1266 #else 1267 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1268 { 1269 return NULL; 1270 } 1271 #endif /* CONFIG_HIGHMEM */ 1272 1273 /** 1274 * saveable_page - Check if the given page is saveable. 1275 * 1276 * Determine whether a non-highmem page should be included in a hibernation 1277 * image. 1278 * 1279 * We should save the page if it isn't Nosave, and is not in the range 1280 * of pages statically defined as 'unsaveable', and it isn't part of 1281 * a free chunk of pages. 1282 */ 1283 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1284 { 1285 struct page *page; 1286 1287 if (!pfn_valid(pfn)) 1288 return NULL; 1289 1290 page = pfn_to_online_page(pfn); 1291 if (!page || page_zone(page) != zone) 1292 return NULL; 1293 1294 BUG_ON(PageHighMem(page)); 1295 1296 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1297 return NULL; 1298 1299 if (PageOffline(page)) 1300 return NULL; 1301 1302 if (PageReserved(page) 1303 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1304 return NULL; 1305 1306 if (page_is_guard(page)) 1307 return NULL; 1308 1309 return page; 1310 } 1311 1312 /** 1313 * count_data_pages - Compute the total number of saveable non-highmem pages. 1314 */ 1315 static unsigned int count_data_pages(void) 1316 { 1317 struct zone *zone; 1318 unsigned long pfn, max_zone_pfn; 1319 unsigned int n = 0; 1320 1321 for_each_populated_zone(zone) { 1322 if (is_highmem(zone)) 1323 continue; 1324 1325 mark_free_pages(zone); 1326 max_zone_pfn = zone_end_pfn(zone); 1327 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1328 if (saveable_page(zone, pfn)) 1329 n++; 1330 } 1331 return n; 1332 } 1333 1334 /* 1335 * This is needed, because copy_page and memcpy are not usable for copying 1336 * task structs. 1337 */ 1338 static inline void do_copy_page(long *dst, long *src) 1339 { 1340 int n; 1341 1342 for (n = PAGE_SIZE / sizeof(long); n; n--) 1343 *dst++ = *src++; 1344 } 1345 1346 /** 1347 * safe_copy_page - Copy a page in a safe way. 1348 * 1349 * Check if the page we are going to copy is marked as present in the kernel 1350 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or 1351 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() 1352 * always returns 'true'. 1353 */ 1354 static void safe_copy_page(void *dst, struct page *s_page) 1355 { 1356 if (kernel_page_present(s_page)) { 1357 do_copy_page(dst, page_address(s_page)); 1358 } else { 1359 kernel_map_pages(s_page, 1, 1); 1360 do_copy_page(dst, page_address(s_page)); 1361 kernel_map_pages(s_page, 1, 0); 1362 } 1363 } 1364 1365 #ifdef CONFIG_HIGHMEM 1366 static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) 1367 { 1368 return is_highmem(zone) ? 1369 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1370 } 1371 1372 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1373 { 1374 struct page *s_page, *d_page; 1375 void *src, *dst; 1376 1377 s_page = pfn_to_page(src_pfn); 1378 d_page = pfn_to_page(dst_pfn); 1379 if (PageHighMem(s_page)) { 1380 src = kmap_atomic(s_page); 1381 dst = kmap_atomic(d_page); 1382 do_copy_page(dst, src); 1383 kunmap_atomic(dst); 1384 kunmap_atomic(src); 1385 } else { 1386 if (PageHighMem(d_page)) { 1387 /* 1388 * The page pointed to by src may contain some kernel 1389 * data modified by kmap_atomic() 1390 */ 1391 safe_copy_page(buffer, s_page); 1392 dst = kmap_atomic(d_page); 1393 copy_page(dst, buffer); 1394 kunmap_atomic(dst); 1395 } else { 1396 safe_copy_page(page_address(d_page), s_page); 1397 } 1398 } 1399 } 1400 #else 1401 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1402 1403 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1404 { 1405 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1406 pfn_to_page(src_pfn)); 1407 } 1408 #endif /* CONFIG_HIGHMEM */ 1409 1410 static void copy_data_pages(struct memory_bitmap *copy_bm, 1411 struct memory_bitmap *orig_bm) 1412 { 1413 struct zone *zone; 1414 unsigned long pfn; 1415 1416 for_each_populated_zone(zone) { 1417 unsigned long max_zone_pfn; 1418 1419 mark_free_pages(zone); 1420 max_zone_pfn = zone_end_pfn(zone); 1421 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1422 if (page_is_saveable(zone, pfn)) 1423 memory_bm_set_bit(orig_bm, pfn); 1424 } 1425 memory_bm_position_reset(orig_bm); 1426 memory_bm_position_reset(copy_bm); 1427 for(;;) { 1428 pfn = memory_bm_next_pfn(orig_bm); 1429 if (unlikely(pfn == BM_END_OF_MAP)) 1430 break; 1431 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1432 } 1433 } 1434 1435 /* Total number of image pages */ 1436 static unsigned int nr_copy_pages; 1437 /* Number of pages needed for saving the original pfns of the image pages */ 1438 static unsigned int nr_meta_pages; 1439 /* 1440 * Numbers of normal and highmem page frames allocated for hibernation image 1441 * before suspending devices. 1442 */ 1443 static unsigned int alloc_normal, alloc_highmem; 1444 /* 1445 * Memory bitmap used for marking saveable pages (during hibernation) or 1446 * hibernation image pages (during restore) 1447 */ 1448 static struct memory_bitmap orig_bm; 1449 /* 1450 * Memory bitmap used during hibernation for marking allocated page frames that 1451 * will contain copies of saveable pages. During restore it is initially used 1452 * for marking hibernation image pages, but then the set bits from it are 1453 * duplicated in @orig_bm and it is released. On highmem systems it is next 1454 * used for marking "safe" highmem pages, but it has to be reinitialized for 1455 * this purpose. 1456 */ 1457 static struct memory_bitmap copy_bm; 1458 1459 /** 1460 * swsusp_free - Free pages allocated for hibernation image. 1461 * 1462 * Image pages are alocated before snapshot creation, so they need to be 1463 * released after resume. 1464 */ 1465 void swsusp_free(void) 1466 { 1467 unsigned long fb_pfn, fr_pfn; 1468 1469 if (!forbidden_pages_map || !free_pages_map) 1470 goto out; 1471 1472 memory_bm_position_reset(forbidden_pages_map); 1473 memory_bm_position_reset(free_pages_map); 1474 1475 loop: 1476 fr_pfn = memory_bm_next_pfn(free_pages_map); 1477 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1478 1479 /* 1480 * Find the next bit set in both bitmaps. This is guaranteed to 1481 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1482 */ 1483 do { 1484 if (fb_pfn < fr_pfn) 1485 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1486 if (fr_pfn < fb_pfn) 1487 fr_pfn = memory_bm_next_pfn(free_pages_map); 1488 } while (fb_pfn != fr_pfn); 1489 1490 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1491 struct page *page = pfn_to_page(fr_pfn); 1492 1493 memory_bm_clear_current(forbidden_pages_map); 1494 memory_bm_clear_current(free_pages_map); 1495 hibernate_restore_unprotect_page(page_address(page)); 1496 __free_page(page); 1497 goto loop; 1498 } 1499 1500 out: 1501 nr_copy_pages = 0; 1502 nr_meta_pages = 0; 1503 restore_pblist = NULL; 1504 buffer = NULL; 1505 alloc_normal = 0; 1506 alloc_highmem = 0; 1507 hibernate_restore_protection_end(); 1508 } 1509 1510 /* Helper functions used for the shrinking of memory. */ 1511 1512 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1513 1514 /** 1515 * preallocate_image_pages - Allocate a number of pages for hibernation image. 1516 * @nr_pages: Number of page frames to allocate. 1517 * @mask: GFP flags to use for the allocation. 1518 * 1519 * Return value: Number of page frames actually allocated 1520 */ 1521 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1522 { 1523 unsigned long nr_alloc = 0; 1524 1525 while (nr_pages > 0) { 1526 struct page *page; 1527 1528 page = alloc_image_page(mask); 1529 if (!page) 1530 break; 1531 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1532 if (PageHighMem(page)) 1533 alloc_highmem++; 1534 else 1535 alloc_normal++; 1536 nr_pages--; 1537 nr_alloc++; 1538 } 1539 1540 return nr_alloc; 1541 } 1542 1543 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1544 unsigned long avail_normal) 1545 { 1546 unsigned long alloc; 1547 1548 if (avail_normal <= alloc_normal) 1549 return 0; 1550 1551 alloc = avail_normal - alloc_normal; 1552 if (nr_pages < alloc) 1553 alloc = nr_pages; 1554 1555 return preallocate_image_pages(alloc, GFP_IMAGE); 1556 } 1557 1558 #ifdef CONFIG_HIGHMEM 1559 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1560 { 1561 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1562 } 1563 1564 /** 1565 * __fraction - Compute (an approximation of) x * (multiplier / base). 1566 */ 1567 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1568 { 1569 return div64_u64(x * multiplier, base); 1570 } 1571 1572 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1573 unsigned long highmem, 1574 unsigned long total) 1575 { 1576 unsigned long alloc = __fraction(nr_pages, highmem, total); 1577 1578 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1579 } 1580 #else /* CONFIG_HIGHMEM */ 1581 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1582 { 1583 return 0; 1584 } 1585 1586 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1587 unsigned long highmem, 1588 unsigned long total) 1589 { 1590 return 0; 1591 } 1592 #endif /* CONFIG_HIGHMEM */ 1593 1594 /** 1595 * free_unnecessary_pages - Release preallocated pages not needed for the image. 1596 */ 1597 static unsigned long free_unnecessary_pages(void) 1598 { 1599 unsigned long save, to_free_normal, to_free_highmem, free; 1600 1601 save = count_data_pages(); 1602 if (alloc_normal >= save) { 1603 to_free_normal = alloc_normal - save; 1604 save = 0; 1605 } else { 1606 to_free_normal = 0; 1607 save -= alloc_normal; 1608 } 1609 save += count_highmem_pages(); 1610 if (alloc_highmem >= save) { 1611 to_free_highmem = alloc_highmem - save; 1612 } else { 1613 to_free_highmem = 0; 1614 save -= alloc_highmem; 1615 if (to_free_normal > save) 1616 to_free_normal -= save; 1617 else 1618 to_free_normal = 0; 1619 } 1620 free = to_free_normal + to_free_highmem; 1621 1622 memory_bm_position_reset(©_bm); 1623 1624 while (to_free_normal > 0 || to_free_highmem > 0) { 1625 unsigned long pfn = memory_bm_next_pfn(©_bm); 1626 struct page *page = pfn_to_page(pfn); 1627 1628 if (PageHighMem(page)) { 1629 if (!to_free_highmem) 1630 continue; 1631 to_free_highmem--; 1632 alloc_highmem--; 1633 } else { 1634 if (!to_free_normal) 1635 continue; 1636 to_free_normal--; 1637 alloc_normal--; 1638 } 1639 memory_bm_clear_bit(©_bm, pfn); 1640 swsusp_unset_page_forbidden(page); 1641 swsusp_unset_page_free(page); 1642 __free_page(page); 1643 } 1644 1645 return free; 1646 } 1647 1648 /** 1649 * minimum_image_size - Estimate the minimum acceptable size of an image. 1650 * @saveable: Number of saveable pages in the system. 1651 * 1652 * We want to avoid attempting to free too much memory too hard, so estimate the 1653 * minimum acceptable size of a hibernation image to use as the lower limit for 1654 * preallocating memory. 1655 * 1656 * We assume that the minimum image size should be proportional to 1657 * 1658 * [number of saveable pages] - [number of pages that can be freed in theory] 1659 * 1660 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1661 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. 1662 */ 1663 static unsigned long minimum_image_size(unsigned long saveable) 1664 { 1665 unsigned long size; 1666 1667 size = global_node_page_state(NR_SLAB_RECLAIMABLE) 1668 + global_node_page_state(NR_ACTIVE_ANON) 1669 + global_node_page_state(NR_INACTIVE_ANON) 1670 + global_node_page_state(NR_ACTIVE_FILE) 1671 + global_node_page_state(NR_INACTIVE_FILE); 1672 1673 return saveable <= size ? 0 : saveable - size; 1674 } 1675 1676 /** 1677 * hibernate_preallocate_memory - Preallocate memory for hibernation image. 1678 * 1679 * To create a hibernation image it is necessary to make a copy of every page 1680 * frame in use. We also need a number of page frames to be free during 1681 * hibernation for allocations made while saving the image and for device 1682 * drivers, in case they need to allocate memory from their hibernation 1683 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1684 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through 1685 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1686 * total number of available page frames and allocate at least 1687 * 1688 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1689 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1690 * 1691 * of them, which corresponds to the maximum size of a hibernation image. 1692 * 1693 * If image_size is set below the number following from the above formula, 1694 * the preallocation of memory is continued until the total number of saveable 1695 * pages in the system is below the requested image size or the minimum 1696 * acceptable image size returned by minimum_image_size(), whichever is greater. 1697 */ 1698 int hibernate_preallocate_memory(void) 1699 { 1700 struct zone *zone; 1701 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1702 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1703 ktime_t start, stop; 1704 int error; 1705 1706 pr_info("Preallocating image memory\n"); 1707 start = ktime_get(); 1708 1709 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1710 if (error) { 1711 pr_err("Cannot allocate original bitmap\n"); 1712 goto err_out; 1713 } 1714 1715 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1716 if (error) { 1717 pr_err("Cannot allocate copy bitmap\n"); 1718 goto err_out; 1719 } 1720 1721 alloc_normal = 0; 1722 alloc_highmem = 0; 1723 1724 /* Count the number of saveable data pages. */ 1725 save_highmem = count_highmem_pages(); 1726 saveable = count_data_pages(); 1727 1728 /* 1729 * Compute the total number of page frames we can use (count) and the 1730 * number of pages needed for image metadata (size). 1731 */ 1732 count = saveable; 1733 saveable += save_highmem; 1734 highmem = save_highmem; 1735 size = 0; 1736 for_each_populated_zone(zone) { 1737 size += snapshot_additional_pages(zone); 1738 if (is_highmem(zone)) 1739 highmem += zone_page_state(zone, NR_FREE_PAGES); 1740 else 1741 count += zone_page_state(zone, NR_FREE_PAGES); 1742 } 1743 avail_normal = count; 1744 count += highmem; 1745 count -= totalreserve_pages; 1746 1747 /* Compute the maximum number of saveable pages to leave in memory. */ 1748 max_size = (count - (size + PAGES_FOR_IO)) / 2 1749 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1750 /* Compute the desired number of image pages specified by image_size. */ 1751 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1752 if (size > max_size) 1753 size = max_size; 1754 /* 1755 * If the desired number of image pages is at least as large as the 1756 * current number of saveable pages in memory, allocate page frames for 1757 * the image and we're done. 1758 */ 1759 if (size >= saveable) { 1760 pages = preallocate_image_highmem(save_highmem); 1761 pages += preallocate_image_memory(saveable - pages, avail_normal); 1762 goto out; 1763 } 1764 1765 /* Estimate the minimum size of the image. */ 1766 pages = minimum_image_size(saveable); 1767 /* 1768 * To avoid excessive pressure on the normal zone, leave room in it to 1769 * accommodate an image of the minimum size (unless it's already too 1770 * small, in which case don't preallocate pages from it at all). 1771 */ 1772 if (avail_normal > pages) 1773 avail_normal -= pages; 1774 else 1775 avail_normal = 0; 1776 if (size < pages) 1777 size = min_t(unsigned long, pages, max_size); 1778 1779 /* 1780 * Let the memory management subsystem know that we're going to need a 1781 * large number of page frames to allocate and make it free some memory. 1782 * NOTE: If this is not done, performance will be hurt badly in some 1783 * test cases. 1784 */ 1785 shrink_all_memory(saveable - size); 1786 1787 /* 1788 * The number of saveable pages in memory was too high, so apply some 1789 * pressure to decrease it. First, make room for the largest possible 1790 * image and fail if that doesn't work. Next, try to decrease the size 1791 * of the image as much as indicated by 'size' using allocations from 1792 * highmem and non-highmem zones separately. 1793 */ 1794 pages_highmem = preallocate_image_highmem(highmem / 2); 1795 alloc = count - max_size; 1796 if (alloc > pages_highmem) 1797 alloc -= pages_highmem; 1798 else 1799 alloc = 0; 1800 pages = preallocate_image_memory(alloc, avail_normal); 1801 if (pages < alloc) { 1802 /* We have exhausted non-highmem pages, try highmem. */ 1803 alloc -= pages; 1804 pages += pages_highmem; 1805 pages_highmem = preallocate_image_highmem(alloc); 1806 if (pages_highmem < alloc) { 1807 pr_err("Image allocation is %lu pages short\n", 1808 alloc - pages_highmem); 1809 goto err_out; 1810 } 1811 pages += pages_highmem; 1812 /* 1813 * size is the desired number of saveable pages to leave in 1814 * memory, so try to preallocate (all memory - size) pages. 1815 */ 1816 alloc = (count - pages) - size; 1817 pages += preallocate_image_highmem(alloc); 1818 } else { 1819 /* 1820 * There are approximately max_size saveable pages at this point 1821 * and we want to reduce this number down to size. 1822 */ 1823 alloc = max_size - size; 1824 size = preallocate_highmem_fraction(alloc, highmem, count); 1825 pages_highmem += size; 1826 alloc -= size; 1827 size = preallocate_image_memory(alloc, avail_normal); 1828 pages_highmem += preallocate_image_highmem(alloc - size); 1829 pages += pages_highmem + size; 1830 } 1831 1832 /* 1833 * We only need as many page frames for the image as there are saveable 1834 * pages in memory, but we have allocated more. Release the excessive 1835 * ones now. 1836 */ 1837 pages -= free_unnecessary_pages(); 1838 1839 out: 1840 stop = ktime_get(); 1841 pr_info("Allocated %lu pages for snapshot\n", pages); 1842 swsusp_show_speed(start, stop, pages, "Allocated"); 1843 1844 return 0; 1845 1846 err_out: 1847 swsusp_free(); 1848 return -ENOMEM; 1849 } 1850 1851 #ifdef CONFIG_HIGHMEM 1852 /** 1853 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. 1854 * 1855 * Compute the number of non-highmem pages that will be necessary for creating 1856 * copies of highmem pages. 1857 */ 1858 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1859 { 1860 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1861 1862 if (free_highmem >= nr_highmem) 1863 nr_highmem = 0; 1864 else 1865 nr_highmem -= free_highmem; 1866 1867 return nr_highmem; 1868 } 1869 #else 1870 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1871 #endif /* CONFIG_HIGHMEM */ 1872 1873 /** 1874 * enough_free_mem - Check if there is enough free memory for the image. 1875 */ 1876 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1877 { 1878 struct zone *zone; 1879 unsigned int free = alloc_normal; 1880 1881 for_each_populated_zone(zone) 1882 if (!is_highmem(zone)) 1883 free += zone_page_state(zone, NR_FREE_PAGES); 1884 1885 nr_pages += count_pages_for_highmem(nr_highmem); 1886 pr_debug("Normal pages needed: %u + %u, available pages: %u\n", 1887 nr_pages, PAGES_FOR_IO, free); 1888 1889 return free > nr_pages + PAGES_FOR_IO; 1890 } 1891 1892 #ifdef CONFIG_HIGHMEM 1893 /** 1894 * get_highmem_buffer - Allocate a buffer for highmem pages. 1895 * 1896 * If there are some highmem pages in the hibernation image, we may need a 1897 * buffer to copy them and/or load their data. 1898 */ 1899 static inline int get_highmem_buffer(int safe_needed) 1900 { 1901 buffer = get_image_page(GFP_ATOMIC, safe_needed); 1902 return buffer ? 0 : -ENOMEM; 1903 } 1904 1905 /** 1906 * alloc_highmem_image_pages - Allocate some highmem pages for the image. 1907 * 1908 * Try to allocate as many pages as needed, but if the number of free highmem 1909 * pages is less than that, allocate them all. 1910 */ 1911 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1912 unsigned int nr_highmem) 1913 { 1914 unsigned int to_alloc = count_free_highmem_pages(); 1915 1916 if (to_alloc > nr_highmem) 1917 to_alloc = nr_highmem; 1918 1919 nr_highmem -= to_alloc; 1920 while (to_alloc-- > 0) { 1921 struct page *page; 1922 1923 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); 1924 memory_bm_set_bit(bm, page_to_pfn(page)); 1925 } 1926 return nr_highmem; 1927 } 1928 #else 1929 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1930 1931 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1932 unsigned int n) { return 0; } 1933 #endif /* CONFIG_HIGHMEM */ 1934 1935 /** 1936 * swsusp_alloc - Allocate memory for hibernation image. 1937 * 1938 * We first try to allocate as many highmem pages as there are 1939 * saveable highmem pages in the system. If that fails, we allocate 1940 * non-highmem pages for the copies of the remaining highmem ones. 1941 * 1942 * In this approach it is likely that the copies of highmem pages will 1943 * also be located in the high memory, because of the way in which 1944 * copy_data_pages() works. 1945 */ 1946 static int swsusp_alloc(struct memory_bitmap *copy_bm, 1947 unsigned int nr_pages, unsigned int nr_highmem) 1948 { 1949 if (nr_highmem > 0) { 1950 if (get_highmem_buffer(PG_ANY)) 1951 goto err_out; 1952 if (nr_highmem > alloc_highmem) { 1953 nr_highmem -= alloc_highmem; 1954 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1955 } 1956 } 1957 if (nr_pages > alloc_normal) { 1958 nr_pages -= alloc_normal; 1959 while (nr_pages-- > 0) { 1960 struct page *page; 1961 1962 page = alloc_image_page(GFP_ATOMIC); 1963 if (!page) 1964 goto err_out; 1965 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1966 } 1967 } 1968 1969 return 0; 1970 1971 err_out: 1972 swsusp_free(); 1973 return -ENOMEM; 1974 } 1975 1976 asmlinkage __visible int swsusp_save(void) 1977 { 1978 unsigned int nr_pages, nr_highmem; 1979 1980 pr_info("Creating image:\n"); 1981 1982 drain_local_pages(NULL); 1983 nr_pages = count_data_pages(); 1984 nr_highmem = count_highmem_pages(); 1985 pr_info("Need to copy %u pages\n", nr_pages + nr_highmem); 1986 1987 if (!enough_free_mem(nr_pages, nr_highmem)) { 1988 pr_err("Not enough free memory\n"); 1989 return -ENOMEM; 1990 } 1991 1992 if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { 1993 pr_err("Memory allocation failed\n"); 1994 return -ENOMEM; 1995 } 1996 1997 /* 1998 * During allocating of suspend pagedir, new cold pages may appear. 1999 * Kill them. 2000 */ 2001 drain_local_pages(NULL); 2002 copy_data_pages(©_bm, &orig_bm); 2003 2004 /* 2005 * End of critical section. From now on, we can write to memory, 2006 * but we should not touch disk. This specially means we must _not_ 2007 * touch swap space! Except we must write out our image of course. 2008 */ 2009 2010 nr_pages += nr_highmem; 2011 nr_copy_pages = nr_pages; 2012 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 2013 2014 pr_info("Image created (%d pages copied)\n", nr_pages); 2015 2016 return 0; 2017 } 2018 2019 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 2020 static int init_header_complete(struct swsusp_info *info) 2021 { 2022 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 2023 info->version_code = LINUX_VERSION_CODE; 2024 return 0; 2025 } 2026 2027 static char *check_image_kernel(struct swsusp_info *info) 2028 { 2029 if (info->version_code != LINUX_VERSION_CODE) 2030 return "kernel version"; 2031 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 2032 return "system type"; 2033 if (strcmp(info->uts.release,init_utsname()->release)) 2034 return "kernel release"; 2035 if (strcmp(info->uts.version,init_utsname()->version)) 2036 return "version"; 2037 if (strcmp(info->uts.machine,init_utsname()->machine)) 2038 return "machine"; 2039 return NULL; 2040 } 2041 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 2042 2043 unsigned long snapshot_get_image_size(void) 2044 { 2045 return nr_copy_pages + nr_meta_pages + 1; 2046 } 2047 2048 static int init_header(struct swsusp_info *info) 2049 { 2050 memset(info, 0, sizeof(struct swsusp_info)); 2051 info->num_physpages = get_num_physpages(); 2052 info->image_pages = nr_copy_pages; 2053 info->pages = snapshot_get_image_size(); 2054 info->size = info->pages; 2055 info->size <<= PAGE_SHIFT; 2056 return init_header_complete(info); 2057 } 2058 2059 /** 2060 * pack_pfns - Prepare PFNs for saving. 2061 * @bm: Memory bitmap. 2062 * @buf: Memory buffer to store the PFNs in. 2063 * 2064 * PFNs corresponding to set bits in @bm are stored in the area of memory 2065 * pointed to by @buf (1 page at a time). 2066 */ 2067 static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 2068 { 2069 int j; 2070 2071 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2072 buf[j] = memory_bm_next_pfn(bm); 2073 if (unlikely(buf[j] == BM_END_OF_MAP)) 2074 break; 2075 } 2076 } 2077 2078 /** 2079 * snapshot_read_next - Get the address to read the next image page from. 2080 * @handle: Snapshot handle to be used for the reading. 2081 * 2082 * On the first call, @handle should point to a zeroed snapshot_handle 2083 * structure. The structure gets populated then and a pointer to it should be 2084 * passed to this function every next time. 2085 * 2086 * On success, the function returns a positive number. Then, the caller 2087 * is allowed to read up to the returned number of bytes from the memory 2088 * location computed by the data_of() macro. 2089 * 2090 * The function returns 0 to indicate the end of the data stream condition, 2091 * and negative numbers are returned on errors. If that happens, the structure 2092 * pointed to by @handle is not updated and should not be used any more. 2093 */ 2094 int snapshot_read_next(struct snapshot_handle *handle) 2095 { 2096 if (handle->cur > nr_meta_pages + nr_copy_pages) 2097 return 0; 2098 2099 if (!buffer) { 2100 /* This makes the buffer be freed by swsusp_free() */ 2101 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2102 if (!buffer) 2103 return -ENOMEM; 2104 } 2105 if (!handle->cur) { 2106 int error; 2107 2108 error = init_header((struct swsusp_info *)buffer); 2109 if (error) 2110 return error; 2111 handle->buffer = buffer; 2112 memory_bm_position_reset(&orig_bm); 2113 memory_bm_position_reset(©_bm); 2114 } else if (handle->cur <= nr_meta_pages) { 2115 clear_page(buffer); 2116 pack_pfns(buffer, &orig_bm); 2117 } else { 2118 struct page *page; 2119 2120 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 2121 if (PageHighMem(page)) { 2122 /* 2123 * Highmem pages are copied to the buffer, 2124 * because we can't return with a kmapped 2125 * highmem page (we may not be called again). 2126 */ 2127 void *kaddr; 2128 2129 kaddr = kmap_atomic(page); 2130 copy_page(buffer, kaddr); 2131 kunmap_atomic(kaddr); 2132 handle->buffer = buffer; 2133 } else { 2134 handle->buffer = page_address(page); 2135 } 2136 } 2137 handle->cur++; 2138 return PAGE_SIZE; 2139 } 2140 2141 static void duplicate_memory_bitmap(struct memory_bitmap *dst, 2142 struct memory_bitmap *src) 2143 { 2144 unsigned long pfn; 2145 2146 memory_bm_position_reset(src); 2147 pfn = memory_bm_next_pfn(src); 2148 while (pfn != BM_END_OF_MAP) { 2149 memory_bm_set_bit(dst, pfn); 2150 pfn = memory_bm_next_pfn(src); 2151 } 2152 } 2153 2154 /** 2155 * mark_unsafe_pages - Mark pages that were used before hibernation. 2156 * 2157 * Mark the pages that cannot be used for storing the image during restoration, 2158 * because they conflict with the pages that had been used before hibernation. 2159 */ 2160 static void mark_unsafe_pages(struct memory_bitmap *bm) 2161 { 2162 unsigned long pfn; 2163 2164 /* Clear the "free"/"unsafe" bit for all PFNs */ 2165 memory_bm_position_reset(free_pages_map); 2166 pfn = memory_bm_next_pfn(free_pages_map); 2167 while (pfn != BM_END_OF_MAP) { 2168 memory_bm_clear_current(free_pages_map); 2169 pfn = memory_bm_next_pfn(free_pages_map); 2170 } 2171 2172 /* Mark pages that correspond to the "original" PFNs as "unsafe" */ 2173 duplicate_memory_bitmap(free_pages_map, bm); 2174 2175 allocated_unsafe_pages = 0; 2176 } 2177 2178 static int check_header(struct swsusp_info *info) 2179 { 2180 char *reason; 2181 2182 reason = check_image_kernel(info); 2183 if (!reason && info->num_physpages != get_num_physpages()) 2184 reason = "memory size"; 2185 if (reason) { 2186 pr_err("Image mismatch: %s\n", reason); 2187 return -EPERM; 2188 } 2189 return 0; 2190 } 2191 2192 /** 2193 * load header - Check the image header and copy the data from it. 2194 */ 2195 static int load_header(struct swsusp_info *info) 2196 { 2197 int error; 2198 2199 restore_pblist = NULL; 2200 error = check_header(info); 2201 if (!error) { 2202 nr_copy_pages = info->image_pages; 2203 nr_meta_pages = info->pages - info->image_pages - 1; 2204 } 2205 return error; 2206 } 2207 2208 /** 2209 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. 2210 * @bm: Memory bitmap. 2211 * @buf: Area of memory containing the PFNs. 2212 * 2213 * For each element of the array pointed to by @buf (1 page at a time), set the 2214 * corresponding bit in @bm. 2215 */ 2216 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2217 { 2218 int j; 2219 2220 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2221 if (unlikely(buf[j] == BM_END_OF_MAP)) 2222 break; 2223 2224 if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) 2225 memory_bm_set_bit(bm, buf[j]); 2226 else 2227 return -EFAULT; 2228 } 2229 2230 return 0; 2231 } 2232 2233 #ifdef CONFIG_HIGHMEM 2234 /* 2235 * struct highmem_pbe is used for creating the list of highmem pages that 2236 * should be restored atomically during the resume from disk, because the page 2237 * frames they have occupied before the suspend are in use. 2238 */ 2239 struct highmem_pbe { 2240 struct page *copy_page; /* data is here now */ 2241 struct page *orig_page; /* data was here before the suspend */ 2242 struct highmem_pbe *next; 2243 }; 2244 2245 /* 2246 * List of highmem PBEs needed for restoring the highmem pages that were 2247 * allocated before the suspend and included in the suspend image, but have 2248 * also been allocated by the "resume" kernel, so their contents cannot be 2249 * written directly to their "original" page frames. 2250 */ 2251 static struct highmem_pbe *highmem_pblist; 2252 2253 /** 2254 * count_highmem_image_pages - Compute the number of highmem pages in the image. 2255 * @bm: Memory bitmap. 2256 * 2257 * The bits in @bm that correspond to image pages are assumed to be set. 2258 */ 2259 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2260 { 2261 unsigned long pfn; 2262 unsigned int cnt = 0; 2263 2264 memory_bm_position_reset(bm); 2265 pfn = memory_bm_next_pfn(bm); 2266 while (pfn != BM_END_OF_MAP) { 2267 if (PageHighMem(pfn_to_page(pfn))) 2268 cnt++; 2269 2270 pfn = memory_bm_next_pfn(bm); 2271 } 2272 return cnt; 2273 } 2274 2275 static unsigned int safe_highmem_pages; 2276 2277 static struct memory_bitmap *safe_highmem_bm; 2278 2279 /** 2280 * prepare_highmem_image - Allocate memory for loading highmem data from image. 2281 * @bm: Pointer to an uninitialized memory bitmap structure. 2282 * @nr_highmem_p: Pointer to the number of highmem image pages. 2283 * 2284 * Try to allocate as many highmem pages as there are highmem image pages 2285 * (@nr_highmem_p points to the variable containing the number of highmem image 2286 * pages). The pages that are "safe" (ie. will not be overwritten when the 2287 * hibernation image is restored entirely) have the corresponding bits set in 2288 * @bm (it must be unitialized). 2289 * 2290 * NOTE: This function should not be called if there are no highmem image pages. 2291 */ 2292 static int prepare_highmem_image(struct memory_bitmap *bm, 2293 unsigned int *nr_highmem_p) 2294 { 2295 unsigned int to_alloc; 2296 2297 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2298 return -ENOMEM; 2299 2300 if (get_highmem_buffer(PG_SAFE)) 2301 return -ENOMEM; 2302 2303 to_alloc = count_free_highmem_pages(); 2304 if (to_alloc > *nr_highmem_p) 2305 to_alloc = *nr_highmem_p; 2306 else 2307 *nr_highmem_p = to_alloc; 2308 2309 safe_highmem_pages = 0; 2310 while (to_alloc-- > 0) { 2311 struct page *page; 2312 2313 page = alloc_page(__GFP_HIGHMEM); 2314 if (!swsusp_page_is_free(page)) { 2315 /* The page is "safe", set its bit the bitmap */ 2316 memory_bm_set_bit(bm, page_to_pfn(page)); 2317 safe_highmem_pages++; 2318 } 2319 /* Mark the page as allocated */ 2320 swsusp_set_page_forbidden(page); 2321 swsusp_set_page_free(page); 2322 } 2323 memory_bm_position_reset(bm); 2324 safe_highmem_bm = bm; 2325 return 0; 2326 } 2327 2328 static struct page *last_highmem_page; 2329 2330 /** 2331 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. 2332 * 2333 * For a given highmem image page get a buffer that suspend_write_next() should 2334 * return to its caller to write to. 2335 * 2336 * If the page is to be saved to its "original" page frame or a copy of 2337 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2338 * the copy of the page is to be made in normal memory, so the address of 2339 * the copy is returned. 2340 * 2341 * If @buffer is returned, the caller of suspend_write_next() will write 2342 * the page's contents to @buffer, so they will have to be copied to the 2343 * right location on the next call to suspend_write_next() and it is done 2344 * with the help of copy_last_highmem_page(). For this purpose, if 2345 * @buffer is returned, @last_highmem_page is set to the page to which 2346 * the data will have to be copied from @buffer. 2347 */ 2348 static void *get_highmem_page_buffer(struct page *page, 2349 struct chain_allocator *ca) 2350 { 2351 struct highmem_pbe *pbe; 2352 void *kaddr; 2353 2354 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2355 /* 2356 * We have allocated the "original" page frame and we can 2357 * use it directly to store the loaded page. 2358 */ 2359 last_highmem_page = page; 2360 return buffer; 2361 } 2362 /* 2363 * The "original" page frame has not been allocated and we have to 2364 * use a "safe" page frame to store the loaded page. 2365 */ 2366 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2367 if (!pbe) { 2368 swsusp_free(); 2369 return ERR_PTR(-ENOMEM); 2370 } 2371 pbe->orig_page = page; 2372 if (safe_highmem_pages > 0) { 2373 struct page *tmp; 2374 2375 /* Copy of the page will be stored in high memory */ 2376 kaddr = buffer; 2377 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2378 safe_highmem_pages--; 2379 last_highmem_page = tmp; 2380 pbe->copy_page = tmp; 2381 } else { 2382 /* Copy of the page will be stored in normal memory */ 2383 kaddr = safe_pages_list; 2384 safe_pages_list = safe_pages_list->next; 2385 pbe->copy_page = virt_to_page(kaddr); 2386 } 2387 pbe->next = highmem_pblist; 2388 highmem_pblist = pbe; 2389 return kaddr; 2390 } 2391 2392 /** 2393 * copy_last_highmem_page - Copy most the most recent highmem image page. 2394 * 2395 * Copy the contents of a highmem image from @buffer, where the caller of 2396 * snapshot_write_next() has stored them, to the right location represented by 2397 * @last_highmem_page . 2398 */ 2399 static void copy_last_highmem_page(void) 2400 { 2401 if (last_highmem_page) { 2402 void *dst; 2403 2404 dst = kmap_atomic(last_highmem_page); 2405 copy_page(dst, buffer); 2406 kunmap_atomic(dst); 2407 last_highmem_page = NULL; 2408 } 2409 } 2410 2411 static inline int last_highmem_page_copied(void) 2412 { 2413 return !last_highmem_page; 2414 } 2415 2416 static inline void free_highmem_data(void) 2417 { 2418 if (safe_highmem_bm) 2419 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2420 2421 if (buffer) 2422 free_image_page(buffer, PG_UNSAFE_CLEAR); 2423 } 2424 #else 2425 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2426 2427 static inline int prepare_highmem_image(struct memory_bitmap *bm, 2428 unsigned int *nr_highmem_p) { return 0; } 2429 2430 static inline void *get_highmem_page_buffer(struct page *page, 2431 struct chain_allocator *ca) 2432 { 2433 return ERR_PTR(-EINVAL); 2434 } 2435 2436 static inline void copy_last_highmem_page(void) {} 2437 static inline int last_highmem_page_copied(void) { return 1; } 2438 static inline void free_highmem_data(void) {} 2439 #endif /* CONFIG_HIGHMEM */ 2440 2441 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2442 2443 /** 2444 * prepare_image - Make room for loading hibernation image. 2445 * @new_bm: Unitialized memory bitmap structure. 2446 * @bm: Memory bitmap with unsafe pages marked. 2447 * 2448 * Use @bm to mark the pages that will be overwritten in the process of 2449 * restoring the system memory state from the suspend image ("unsafe" pages) 2450 * and allocate memory for the image. 2451 * 2452 * The idea is to allocate a new memory bitmap first and then allocate 2453 * as many pages as needed for image data, but without specifying what those 2454 * pages will be used for just yet. Instead, we mark them all as allocated and 2455 * create a lists of "safe" pages to be used later. On systems with high 2456 * memory a list of "safe" highmem pages is created too. 2457 */ 2458 static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2459 { 2460 unsigned int nr_pages, nr_highmem; 2461 struct linked_page *lp; 2462 int error; 2463 2464 /* If there is no highmem, the buffer will not be necessary */ 2465 free_image_page(buffer, PG_UNSAFE_CLEAR); 2466 buffer = NULL; 2467 2468 nr_highmem = count_highmem_image_pages(bm); 2469 mark_unsafe_pages(bm); 2470 2471 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2472 if (error) 2473 goto Free; 2474 2475 duplicate_memory_bitmap(new_bm, bm); 2476 memory_bm_free(bm, PG_UNSAFE_KEEP); 2477 if (nr_highmem > 0) { 2478 error = prepare_highmem_image(bm, &nr_highmem); 2479 if (error) 2480 goto Free; 2481 } 2482 /* 2483 * Reserve some safe pages for potential later use. 2484 * 2485 * NOTE: This way we make sure there will be enough safe pages for the 2486 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2487 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2488 * 2489 * nr_copy_pages cannot be less than allocated_unsafe_pages too. 2490 */ 2491 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2492 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2493 while (nr_pages > 0) { 2494 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2495 if (!lp) { 2496 error = -ENOMEM; 2497 goto Free; 2498 } 2499 lp->next = safe_pages_list; 2500 safe_pages_list = lp; 2501 nr_pages--; 2502 } 2503 /* Preallocate memory for the image */ 2504 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2505 while (nr_pages > 0) { 2506 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2507 if (!lp) { 2508 error = -ENOMEM; 2509 goto Free; 2510 } 2511 if (!swsusp_page_is_free(virt_to_page(lp))) { 2512 /* The page is "safe", add it to the list */ 2513 lp->next = safe_pages_list; 2514 safe_pages_list = lp; 2515 } 2516 /* Mark the page as allocated */ 2517 swsusp_set_page_forbidden(virt_to_page(lp)); 2518 swsusp_set_page_free(virt_to_page(lp)); 2519 nr_pages--; 2520 } 2521 return 0; 2522 2523 Free: 2524 swsusp_free(); 2525 return error; 2526 } 2527 2528 /** 2529 * get_buffer - Get the address to store the next image data page. 2530 * 2531 * Get the address that snapshot_write_next() should return to its caller to 2532 * write to. 2533 */ 2534 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2535 { 2536 struct pbe *pbe; 2537 struct page *page; 2538 unsigned long pfn = memory_bm_next_pfn(bm); 2539 2540 if (pfn == BM_END_OF_MAP) 2541 return ERR_PTR(-EFAULT); 2542 2543 page = pfn_to_page(pfn); 2544 if (PageHighMem(page)) 2545 return get_highmem_page_buffer(page, ca); 2546 2547 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2548 /* 2549 * We have allocated the "original" page frame and we can 2550 * use it directly to store the loaded page. 2551 */ 2552 return page_address(page); 2553 2554 /* 2555 * The "original" page frame has not been allocated and we have to 2556 * use a "safe" page frame to store the loaded page. 2557 */ 2558 pbe = chain_alloc(ca, sizeof(struct pbe)); 2559 if (!pbe) { 2560 swsusp_free(); 2561 return ERR_PTR(-ENOMEM); 2562 } 2563 pbe->orig_address = page_address(page); 2564 pbe->address = safe_pages_list; 2565 safe_pages_list = safe_pages_list->next; 2566 pbe->next = restore_pblist; 2567 restore_pblist = pbe; 2568 return pbe->address; 2569 } 2570 2571 /** 2572 * snapshot_write_next - Get the address to store the next image page. 2573 * @handle: Snapshot handle structure to guide the writing. 2574 * 2575 * On the first call, @handle should point to a zeroed snapshot_handle 2576 * structure. The structure gets populated then and a pointer to it should be 2577 * passed to this function every next time. 2578 * 2579 * On success, the function returns a positive number. Then, the caller 2580 * is allowed to write up to the returned number of bytes to the memory 2581 * location computed by the data_of() macro. 2582 * 2583 * The function returns 0 to indicate the "end of file" condition. Negative 2584 * numbers are returned on errors, in which cases the structure pointed to by 2585 * @handle is not updated and should not be used any more. 2586 */ 2587 int snapshot_write_next(struct snapshot_handle *handle) 2588 { 2589 static struct chain_allocator ca; 2590 int error = 0; 2591 2592 /* Check if we have already loaded the entire image */ 2593 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2594 return 0; 2595 2596 handle->sync_read = 1; 2597 2598 if (!handle->cur) { 2599 if (!buffer) 2600 /* This makes the buffer be freed by swsusp_free() */ 2601 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2602 2603 if (!buffer) 2604 return -ENOMEM; 2605 2606 handle->buffer = buffer; 2607 } else if (handle->cur == 1) { 2608 error = load_header(buffer); 2609 if (error) 2610 return error; 2611 2612 safe_pages_list = NULL; 2613 2614 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2615 if (error) 2616 return error; 2617 2618 hibernate_restore_protection_begin(); 2619 } else if (handle->cur <= nr_meta_pages + 1) { 2620 error = unpack_orig_pfns(buffer, ©_bm); 2621 if (error) 2622 return error; 2623 2624 if (handle->cur == nr_meta_pages + 1) { 2625 error = prepare_image(&orig_bm, ©_bm); 2626 if (error) 2627 return error; 2628 2629 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2630 memory_bm_position_reset(&orig_bm); 2631 restore_pblist = NULL; 2632 handle->buffer = get_buffer(&orig_bm, &ca); 2633 handle->sync_read = 0; 2634 if (IS_ERR(handle->buffer)) 2635 return PTR_ERR(handle->buffer); 2636 } 2637 } else { 2638 copy_last_highmem_page(); 2639 hibernate_restore_protect_page(handle->buffer); 2640 handle->buffer = get_buffer(&orig_bm, &ca); 2641 if (IS_ERR(handle->buffer)) 2642 return PTR_ERR(handle->buffer); 2643 if (handle->buffer != buffer) 2644 handle->sync_read = 0; 2645 } 2646 handle->cur++; 2647 return PAGE_SIZE; 2648 } 2649 2650 /** 2651 * snapshot_write_finalize - Complete the loading of a hibernation image. 2652 * 2653 * Must be called after the last call to snapshot_write_next() in case the last 2654 * page in the image happens to be a highmem page and its contents should be 2655 * stored in highmem. Additionally, it recycles bitmap memory that's not 2656 * necessary any more. 2657 */ 2658 void snapshot_write_finalize(struct snapshot_handle *handle) 2659 { 2660 copy_last_highmem_page(); 2661 hibernate_restore_protect_page(handle->buffer); 2662 /* Do that only if we have loaded the image entirely */ 2663 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2664 memory_bm_recycle(&orig_bm); 2665 free_highmem_data(); 2666 } 2667 } 2668 2669 int snapshot_image_loaded(struct snapshot_handle *handle) 2670 { 2671 return !(!nr_copy_pages || !last_highmem_page_copied() || 2672 handle->cur <= nr_meta_pages + nr_copy_pages); 2673 } 2674 2675 #ifdef CONFIG_HIGHMEM 2676 /* Assumes that @buf is ready and points to a "safe" page */ 2677 static inline void swap_two_pages_data(struct page *p1, struct page *p2, 2678 void *buf) 2679 { 2680 void *kaddr1, *kaddr2; 2681 2682 kaddr1 = kmap_atomic(p1); 2683 kaddr2 = kmap_atomic(p2); 2684 copy_page(buf, kaddr1); 2685 copy_page(kaddr1, kaddr2); 2686 copy_page(kaddr2, buf); 2687 kunmap_atomic(kaddr2); 2688 kunmap_atomic(kaddr1); 2689 } 2690 2691 /** 2692 * restore_highmem - Put highmem image pages into their original locations. 2693 * 2694 * For each highmem page that was in use before hibernation and is included in 2695 * the image, and also has been allocated by the "restore" kernel, swap its 2696 * current contents with the previous (ie. "before hibernation") ones. 2697 * 2698 * If the restore eventually fails, we can call this function once again and 2699 * restore the highmem state as seen by the restore kernel. 2700 */ 2701 int restore_highmem(void) 2702 { 2703 struct highmem_pbe *pbe = highmem_pblist; 2704 void *buf; 2705 2706 if (!pbe) 2707 return 0; 2708 2709 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2710 if (!buf) 2711 return -ENOMEM; 2712 2713 while (pbe) { 2714 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2715 pbe = pbe->next; 2716 } 2717 free_image_page(buf, PG_UNSAFE_CLEAR); 2718 return 0; 2719 } 2720 #endif /* CONFIG_HIGHMEM */ 2721