1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/power/snapshot.c 4 * 5 * This file provides system snapshot/restore functionality for swsusp. 6 * 7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 9 */ 10 11 #define pr_fmt(fmt) "PM: hibernation: " fmt 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/memblock.h> 25 #include <linux/nmi.h> 26 #include <linux/syscalls.h> 27 #include <linux/console.h> 28 #include <linux/highmem.h> 29 #include <linux/list.h> 30 #include <linux/slab.h> 31 #include <linux/compiler.h> 32 #include <linux/ktime.h> 33 #include <linux/set_memory.h> 34 35 #include <linux/uaccess.h> 36 #include <asm/mmu_context.h> 37 #include <asm/pgtable.h> 38 #include <asm/tlbflush.h> 39 #include <asm/io.h> 40 41 #include "power.h" 42 43 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) 44 static bool hibernate_restore_protection; 45 static bool hibernate_restore_protection_active; 46 47 void enable_restore_image_protection(void) 48 { 49 hibernate_restore_protection = true; 50 } 51 52 static inline void hibernate_restore_protection_begin(void) 53 { 54 hibernate_restore_protection_active = hibernate_restore_protection; 55 } 56 57 static inline void hibernate_restore_protection_end(void) 58 { 59 hibernate_restore_protection_active = false; 60 } 61 62 static inline void hibernate_restore_protect_page(void *page_address) 63 { 64 if (hibernate_restore_protection_active) 65 set_memory_ro((unsigned long)page_address, 1); 66 } 67 68 static inline void hibernate_restore_unprotect_page(void *page_address) 69 { 70 if (hibernate_restore_protection_active) 71 set_memory_rw((unsigned long)page_address, 1); 72 } 73 #else 74 static inline void hibernate_restore_protection_begin(void) {} 75 static inline void hibernate_restore_protection_end(void) {} 76 static inline void hibernate_restore_protect_page(void *page_address) {} 77 static inline void hibernate_restore_unprotect_page(void *page_address) {} 78 #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ 79 80 static int swsusp_page_is_free(struct page *); 81 static void swsusp_set_page_forbidden(struct page *); 82 static void swsusp_unset_page_forbidden(struct page *); 83 84 /* 85 * Number of bytes to reserve for memory allocations made by device drivers 86 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 87 * cause image creation to fail (tunable via /sys/power/reserved_size). 88 */ 89 unsigned long reserved_size; 90 91 void __init hibernate_reserved_size_init(void) 92 { 93 reserved_size = SPARE_PAGES * PAGE_SIZE; 94 } 95 96 /* 97 * Preferred image size in bytes (tunable via /sys/power/image_size). 98 * When it is set to N, swsusp will do its best to ensure the image 99 * size will not exceed N bytes, but if that is impossible, it will 100 * try to create the smallest image possible. 101 */ 102 unsigned long image_size; 103 104 void __init hibernate_image_size_init(void) 105 { 106 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; 107 } 108 109 /* 110 * List of PBEs needed for restoring the pages that were allocated before 111 * the suspend and included in the suspend image, but have also been 112 * allocated by the "resume" kernel, so their contents cannot be written 113 * directly to their "original" page frames. 114 */ 115 struct pbe *restore_pblist; 116 117 /* struct linked_page is used to build chains of pages */ 118 119 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 120 121 struct linked_page { 122 struct linked_page *next; 123 char data[LINKED_PAGE_DATA_SIZE]; 124 } __packed; 125 126 /* 127 * List of "safe" pages (ie. pages that were not used by the image kernel 128 * before hibernation) that may be used as temporary storage for image kernel 129 * memory contents. 130 */ 131 static struct linked_page *safe_pages_list; 132 133 /* Pointer to an auxiliary buffer (1 page) */ 134 static void *buffer; 135 136 #define PG_ANY 0 137 #define PG_SAFE 1 138 #define PG_UNSAFE_CLEAR 1 139 #define PG_UNSAFE_KEEP 0 140 141 static unsigned int allocated_unsafe_pages; 142 143 /** 144 * get_image_page - Allocate a page for a hibernation image. 145 * @gfp_mask: GFP mask for the allocation. 146 * @safe_needed: Get pages that were not used before hibernation (restore only) 147 * 148 * During image restoration, for storing the PBE list and the image data, we can 149 * only use memory pages that do not conflict with the pages used before 150 * hibernation. The "unsafe" pages have PageNosaveFree set and we count them 151 * using allocated_unsafe_pages. 152 * 153 * Each allocated image page is marked as PageNosave and PageNosaveFree so that 154 * swsusp_free() can release it. 155 */ 156 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 157 { 158 void *res; 159 160 res = (void *)get_zeroed_page(gfp_mask); 161 if (safe_needed) 162 while (res && swsusp_page_is_free(virt_to_page(res))) { 163 /* The page is unsafe, mark it for swsusp_free() */ 164 swsusp_set_page_forbidden(virt_to_page(res)); 165 allocated_unsafe_pages++; 166 res = (void *)get_zeroed_page(gfp_mask); 167 } 168 if (res) { 169 swsusp_set_page_forbidden(virt_to_page(res)); 170 swsusp_set_page_free(virt_to_page(res)); 171 } 172 return res; 173 } 174 175 static void *__get_safe_page(gfp_t gfp_mask) 176 { 177 if (safe_pages_list) { 178 void *ret = safe_pages_list; 179 180 safe_pages_list = safe_pages_list->next; 181 memset(ret, 0, PAGE_SIZE); 182 return ret; 183 } 184 return get_image_page(gfp_mask, PG_SAFE); 185 } 186 187 unsigned long get_safe_page(gfp_t gfp_mask) 188 { 189 return (unsigned long)__get_safe_page(gfp_mask); 190 } 191 192 static struct page *alloc_image_page(gfp_t gfp_mask) 193 { 194 struct page *page; 195 196 page = alloc_page(gfp_mask); 197 if (page) { 198 swsusp_set_page_forbidden(page); 199 swsusp_set_page_free(page); 200 } 201 return page; 202 } 203 204 static void recycle_safe_page(void *page_address) 205 { 206 struct linked_page *lp = page_address; 207 208 lp->next = safe_pages_list; 209 safe_pages_list = lp; 210 } 211 212 /** 213 * free_image_page - Free a page allocated for hibernation image. 214 * @addr: Address of the page to free. 215 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. 216 * 217 * The page to free should have been allocated by get_image_page() (page flags 218 * set by it are affected). 219 */ 220 static inline void free_image_page(void *addr, int clear_nosave_free) 221 { 222 struct page *page; 223 224 BUG_ON(!virt_addr_valid(addr)); 225 226 page = virt_to_page(addr); 227 228 swsusp_unset_page_forbidden(page); 229 if (clear_nosave_free) 230 swsusp_unset_page_free(page); 231 232 __free_page(page); 233 } 234 235 static inline void free_list_of_pages(struct linked_page *list, 236 int clear_page_nosave) 237 { 238 while (list) { 239 struct linked_page *lp = list->next; 240 241 free_image_page(list, clear_page_nosave); 242 list = lp; 243 } 244 } 245 246 /* 247 * struct chain_allocator is used for allocating small objects out of 248 * a linked list of pages called 'the chain'. 249 * 250 * The chain grows each time when there is no room for a new object in 251 * the current page. The allocated objects cannot be freed individually. 252 * It is only possible to free them all at once, by freeing the entire 253 * chain. 254 * 255 * NOTE: The chain allocator may be inefficient if the allocated objects 256 * are not much smaller than PAGE_SIZE. 257 */ 258 struct chain_allocator { 259 struct linked_page *chain; /* the chain */ 260 unsigned int used_space; /* total size of objects allocated out 261 of the current page */ 262 gfp_t gfp_mask; /* mask for allocating pages */ 263 int safe_needed; /* if set, only "safe" pages are allocated */ 264 }; 265 266 static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, 267 int safe_needed) 268 { 269 ca->chain = NULL; 270 ca->used_space = LINKED_PAGE_DATA_SIZE; 271 ca->gfp_mask = gfp_mask; 272 ca->safe_needed = safe_needed; 273 } 274 275 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 276 { 277 void *ret; 278 279 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 280 struct linked_page *lp; 281 282 lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : 283 get_image_page(ca->gfp_mask, PG_ANY); 284 if (!lp) 285 return NULL; 286 287 lp->next = ca->chain; 288 ca->chain = lp; 289 ca->used_space = 0; 290 } 291 ret = ca->chain->data + ca->used_space; 292 ca->used_space += size; 293 return ret; 294 } 295 296 /** 297 * Data types related to memory bitmaps. 298 * 299 * Memory bitmap is a structure consiting of many linked lists of 300 * objects. The main list's elements are of type struct zone_bitmap 301 * and each of them corresonds to one zone. For each zone bitmap 302 * object there is a list of objects of type struct bm_block that 303 * represent each blocks of bitmap in which information is stored. 304 * 305 * struct memory_bitmap contains a pointer to the main list of zone 306 * bitmap objects, a struct bm_position used for browsing the bitmap, 307 * and a pointer to the list of pages used for allocating all of the 308 * zone bitmap objects and bitmap block objects. 309 * 310 * NOTE: It has to be possible to lay out the bitmap in memory 311 * using only allocations of order 0. Additionally, the bitmap is 312 * designed to work with arbitrary number of zones (this is over the 313 * top for now, but let's avoid making unnecessary assumptions ;-). 314 * 315 * struct zone_bitmap contains a pointer to a list of bitmap block 316 * objects and a pointer to the bitmap block object that has been 317 * most recently used for setting bits. Additionally, it contains the 318 * PFNs that correspond to the start and end of the represented zone. 319 * 320 * struct bm_block contains a pointer to the memory page in which 321 * information is stored (in the form of a block of bitmap) 322 * It also contains the pfns that correspond to the start and end of 323 * the represented memory area. 324 * 325 * The memory bitmap is organized as a radix tree to guarantee fast random 326 * access to the bits. There is one radix tree for each zone (as returned 327 * from create_mem_extents). 328 * 329 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 330 * two linked lists for the nodes of the tree, one for the inner nodes and 331 * one for the leave nodes. The linked leave nodes are used for fast linear 332 * access of the memory bitmap. 333 * 334 * The struct rtree_node represents one node of the radix tree. 335 */ 336 337 #define BM_END_OF_MAP (~0UL) 338 339 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 340 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 341 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 342 343 /* 344 * struct rtree_node is a wrapper struct to link the nodes 345 * of the rtree together for easy linear iteration over 346 * bits and easy freeing 347 */ 348 struct rtree_node { 349 struct list_head list; 350 unsigned long *data; 351 }; 352 353 /* 354 * struct mem_zone_bm_rtree represents a bitmap used for one 355 * populated memory zone. 356 */ 357 struct mem_zone_bm_rtree { 358 struct list_head list; /* Link Zones together */ 359 struct list_head nodes; /* Radix Tree inner nodes */ 360 struct list_head leaves; /* Radix Tree leaves */ 361 unsigned long start_pfn; /* Zone start page frame */ 362 unsigned long end_pfn; /* Zone end page frame + 1 */ 363 struct rtree_node *rtree; /* Radix Tree Root */ 364 int levels; /* Number of Radix Tree Levels */ 365 unsigned int blocks; /* Number of Bitmap Blocks */ 366 }; 367 368 /* strcut bm_position is used for browsing memory bitmaps */ 369 370 struct bm_position { 371 struct mem_zone_bm_rtree *zone; 372 struct rtree_node *node; 373 unsigned long node_pfn; 374 int node_bit; 375 }; 376 377 struct memory_bitmap { 378 struct list_head zones; 379 struct linked_page *p_list; /* list of pages used to store zone 380 bitmap objects and bitmap block 381 objects */ 382 struct bm_position cur; /* most recently used bit position */ 383 }; 384 385 /* Functions that operate on memory bitmaps */ 386 387 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 388 #if BITS_PER_LONG == 32 389 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 390 #else 391 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 392 #endif 393 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 394 395 /** 396 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 397 * 398 * This function is used to allocate inner nodes as well as the 399 * leave nodes of the radix tree. It also adds the node to the 400 * corresponding linked list passed in by the *list parameter. 401 */ 402 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 403 struct chain_allocator *ca, 404 struct list_head *list) 405 { 406 struct rtree_node *node; 407 408 node = chain_alloc(ca, sizeof(struct rtree_node)); 409 if (!node) 410 return NULL; 411 412 node->data = get_image_page(gfp_mask, safe_needed); 413 if (!node->data) 414 return NULL; 415 416 list_add_tail(&node->list, list); 417 418 return node; 419 } 420 421 /** 422 * add_rtree_block - Add a new leave node to the radix tree. 423 * 424 * The leave nodes need to be allocated in order to keep the leaves 425 * linked list in order. This is guaranteed by the zone->blocks 426 * counter. 427 */ 428 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 429 int safe_needed, struct chain_allocator *ca) 430 { 431 struct rtree_node *node, *block, **dst; 432 unsigned int levels_needed, block_nr; 433 int i; 434 435 block_nr = zone->blocks; 436 levels_needed = 0; 437 438 /* How many levels do we need for this block nr? */ 439 while (block_nr) { 440 levels_needed += 1; 441 block_nr >>= BM_RTREE_LEVEL_SHIFT; 442 } 443 444 /* Make sure the rtree has enough levels */ 445 for (i = zone->levels; i < levels_needed; i++) { 446 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 447 &zone->nodes); 448 if (!node) 449 return -ENOMEM; 450 451 node->data[0] = (unsigned long)zone->rtree; 452 zone->rtree = node; 453 zone->levels += 1; 454 } 455 456 /* Allocate new block */ 457 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 458 if (!block) 459 return -ENOMEM; 460 461 /* Now walk the rtree to insert the block */ 462 node = zone->rtree; 463 dst = &zone->rtree; 464 block_nr = zone->blocks; 465 for (i = zone->levels; i > 0; i--) { 466 int index; 467 468 if (!node) { 469 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 470 &zone->nodes); 471 if (!node) 472 return -ENOMEM; 473 *dst = node; 474 } 475 476 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 477 index &= BM_RTREE_LEVEL_MASK; 478 dst = (struct rtree_node **)&((*dst)->data[index]); 479 node = *dst; 480 } 481 482 zone->blocks += 1; 483 *dst = block; 484 485 return 0; 486 } 487 488 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 489 int clear_nosave_free); 490 491 /** 492 * create_zone_bm_rtree - Create a radix tree for one zone. 493 * 494 * Allocated the mem_zone_bm_rtree structure and initializes it. 495 * This function also allocated and builds the radix tree for the 496 * zone. 497 */ 498 static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, 499 int safe_needed, 500 struct chain_allocator *ca, 501 unsigned long start, 502 unsigned long end) 503 { 504 struct mem_zone_bm_rtree *zone; 505 unsigned int i, nr_blocks; 506 unsigned long pages; 507 508 pages = end - start; 509 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 510 if (!zone) 511 return NULL; 512 513 INIT_LIST_HEAD(&zone->nodes); 514 INIT_LIST_HEAD(&zone->leaves); 515 zone->start_pfn = start; 516 zone->end_pfn = end; 517 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 518 519 for (i = 0; i < nr_blocks; i++) { 520 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 521 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 522 return NULL; 523 } 524 } 525 526 return zone; 527 } 528 529 /** 530 * free_zone_bm_rtree - Free the memory of the radix tree. 531 * 532 * Free all node pages of the radix tree. The mem_zone_bm_rtree 533 * structure itself is not freed here nor are the rtree_node 534 * structs. 535 */ 536 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 537 int clear_nosave_free) 538 { 539 struct rtree_node *node; 540 541 list_for_each_entry(node, &zone->nodes, list) 542 free_image_page(node->data, clear_nosave_free); 543 544 list_for_each_entry(node, &zone->leaves, list) 545 free_image_page(node->data, clear_nosave_free); 546 } 547 548 static void memory_bm_position_reset(struct memory_bitmap *bm) 549 { 550 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 551 list); 552 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 553 struct rtree_node, list); 554 bm->cur.node_pfn = 0; 555 bm->cur.node_bit = 0; 556 } 557 558 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 559 560 struct mem_extent { 561 struct list_head hook; 562 unsigned long start; 563 unsigned long end; 564 }; 565 566 /** 567 * free_mem_extents - Free a list of memory extents. 568 * @list: List of extents to free. 569 */ 570 static void free_mem_extents(struct list_head *list) 571 { 572 struct mem_extent *ext, *aux; 573 574 list_for_each_entry_safe(ext, aux, list, hook) { 575 list_del(&ext->hook); 576 kfree(ext); 577 } 578 } 579 580 /** 581 * create_mem_extents - Create a list of memory extents. 582 * @list: List to put the extents into. 583 * @gfp_mask: Mask to use for memory allocations. 584 * 585 * The extents represent contiguous ranges of PFNs. 586 */ 587 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 588 { 589 struct zone *zone; 590 591 INIT_LIST_HEAD(list); 592 593 for_each_populated_zone(zone) { 594 unsigned long zone_start, zone_end; 595 struct mem_extent *ext, *cur, *aux; 596 597 zone_start = zone->zone_start_pfn; 598 zone_end = zone_end_pfn(zone); 599 600 list_for_each_entry(ext, list, hook) 601 if (zone_start <= ext->end) 602 break; 603 604 if (&ext->hook == list || zone_end < ext->start) { 605 /* New extent is necessary */ 606 struct mem_extent *new_ext; 607 608 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 609 if (!new_ext) { 610 free_mem_extents(list); 611 return -ENOMEM; 612 } 613 new_ext->start = zone_start; 614 new_ext->end = zone_end; 615 list_add_tail(&new_ext->hook, &ext->hook); 616 continue; 617 } 618 619 /* Merge this zone's range of PFNs with the existing one */ 620 if (zone_start < ext->start) 621 ext->start = zone_start; 622 if (zone_end > ext->end) 623 ext->end = zone_end; 624 625 /* More merging may be possible */ 626 cur = ext; 627 list_for_each_entry_safe_continue(cur, aux, list, hook) { 628 if (zone_end < cur->start) 629 break; 630 if (zone_end < cur->end) 631 ext->end = cur->end; 632 list_del(&cur->hook); 633 kfree(cur); 634 } 635 } 636 637 return 0; 638 } 639 640 /** 641 * memory_bm_create - Allocate memory for a memory bitmap. 642 */ 643 static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, 644 int safe_needed) 645 { 646 struct chain_allocator ca; 647 struct list_head mem_extents; 648 struct mem_extent *ext; 649 int error; 650 651 chain_init(&ca, gfp_mask, safe_needed); 652 INIT_LIST_HEAD(&bm->zones); 653 654 error = create_mem_extents(&mem_extents, gfp_mask); 655 if (error) 656 return error; 657 658 list_for_each_entry(ext, &mem_extents, hook) { 659 struct mem_zone_bm_rtree *zone; 660 661 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 662 ext->start, ext->end); 663 if (!zone) { 664 error = -ENOMEM; 665 goto Error; 666 } 667 list_add_tail(&zone->list, &bm->zones); 668 } 669 670 bm->p_list = ca.chain; 671 memory_bm_position_reset(bm); 672 Exit: 673 free_mem_extents(&mem_extents); 674 return error; 675 676 Error: 677 bm->p_list = ca.chain; 678 memory_bm_free(bm, PG_UNSAFE_CLEAR); 679 goto Exit; 680 } 681 682 /** 683 * memory_bm_free - Free memory occupied by the memory bitmap. 684 * @bm: Memory bitmap. 685 */ 686 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 687 { 688 struct mem_zone_bm_rtree *zone; 689 690 list_for_each_entry(zone, &bm->zones, list) 691 free_zone_bm_rtree(zone, clear_nosave_free); 692 693 free_list_of_pages(bm->p_list, clear_nosave_free); 694 695 INIT_LIST_HEAD(&bm->zones); 696 } 697 698 /** 699 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. 700 * 701 * Find the bit in memory bitmap @bm that corresponds to the given PFN. 702 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. 703 * 704 * Walk the radix tree to find the page containing the bit that represents @pfn 705 * and return the position of the bit in @addr and @bit_nr. 706 */ 707 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 708 void **addr, unsigned int *bit_nr) 709 { 710 struct mem_zone_bm_rtree *curr, *zone; 711 struct rtree_node *node; 712 int i, block_nr; 713 714 zone = bm->cur.zone; 715 716 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 717 goto zone_found; 718 719 zone = NULL; 720 721 /* Find the right zone */ 722 list_for_each_entry(curr, &bm->zones, list) { 723 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 724 zone = curr; 725 break; 726 } 727 } 728 729 if (!zone) 730 return -EFAULT; 731 732 zone_found: 733 /* 734 * We have found the zone. Now walk the radix tree to find the leaf node 735 * for our PFN. 736 */ 737 738 /* 739 * If the zone we wish to scan is the the current zone and the 740 * pfn falls into the current node then we do not need to walk 741 * the tree. 742 */ 743 node = bm->cur.node; 744 if (zone == bm->cur.zone && 745 ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 746 goto node_found; 747 748 node = zone->rtree; 749 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 750 751 for (i = zone->levels; i > 0; i--) { 752 int index; 753 754 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 755 index &= BM_RTREE_LEVEL_MASK; 756 BUG_ON(node->data[index] == 0); 757 node = (struct rtree_node *)node->data[index]; 758 } 759 760 node_found: 761 /* Update last position */ 762 bm->cur.zone = zone; 763 bm->cur.node = node; 764 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 765 766 /* Set return values */ 767 *addr = node->data; 768 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 769 770 return 0; 771 } 772 773 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 774 { 775 void *addr; 776 unsigned int bit; 777 int error; 778 779 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 780 BUG_ON(error); 781 set_bit(bit, addr); 782 } 783 784 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 785 { 786 void *addr; 787 unsigned int bit; 788 int error; 789 790 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 791 if (!error) 792 set_bit(bit, addr); 793 794 return error; 795 } 796 797 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 798 { 799 void *addr; 800 unsigned int bit; 801 int error; 802 803 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 804 BUG_ON(error); 805 clear_bit(bit, addr); 806 } 807 808 static void memory_bm_clear_current(struct memory_bitmap *bm) 809 { 810 int bit; 811 812 bit = max(bm->cur.node_bit - 1, 0); 813 clear_bit(bit, bm->cur.node->data); 814 } 815 816 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 817 { 818 void *addr; 819 unsigned int bit; 820 int error; 821 822 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 823 BUG_ON(error); 824 return test_bit(bit, addr); 825 } 826 827 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 828 { 829 void *addr; 830 unsigned int bit; 831 832 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 833 } 834 835 /* 836 * rtree_next_node - Jump to the next leaf node. 837 * 838 * Set the position to the beginning of the next node in the 839 * memory bitmap. This is either the next node in the current 840 * zone's radix tree or the first node in the radix tree of the 841 * next zone. 842 * 843 * Return true if there is a next node, false otherwise. 844 */ 845 static bool rtree_next_node(struct memory_bitmap *bm) 846 { 847 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { 848 bm->cur.node = list_entry(bm->cur.node->list.next, 849 struct rtree_node, list); 850 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 851 bm->cur.node_bit = 0; 852 touch_softlockup_watchdog(); 853 return true; 854 } 855 856 /* No more nodes, goto next zone */ 857 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { 858 bm->cur.zone = list_entry(bm->cur.zone->list.next, 859 struct mem_zone_bm_rtree, list); 860 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 861 struct rtree_node, list); 862 bm->cur.node_pfn = 0; 863 bm->cur.node_bit = 0; 864 return true; 865 } 866 867 /* No more zones */ 868 return false; 869 } 870 871 /** 872 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. 873 * @bm: Memory bitmap. 874 * 875 * Starting from the last returned position this function searches for the next 876 * set bit in @bm and returns the PFN represented by it. If no more bits are 877 * set, BM_END_OF_MAP is returned. 878 * 879 * It is required to run memory_bm_position_reset() before the first call to 880 * this function for the given memory bitmap. 881 */ 882 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 883 { 884 unsigned long bits, pfn, pages; 885 int bit; 886 887 do { 888 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 889 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 890 bit = find_next_bit(bm->cur.node->data, bits, 891 bm->cur.node_bit); 892 if (bit < bits) { 893 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 894 bm->cur.node_bit = bit + 1; 895 return pfn; 896 } 897 } while (rtree_next_node(bm)); 898 899 return BM_END_OF_MAP; 900 } 901 902 /* 903 * This structure represents a range of page frames the contents of which 904 * should not be saved during hibernation. 905 */ 906 struct nosave_region { 907 struct list_head list; 908 unsigned long start_pfn; 909 unsigned long end_pfn; 910 }; 911 912 static LIST_HEAD(nosave_regions); 913 914 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) 915 { 916 struct rtree_node *node; 917 918 list_for_each_entry(node, &zone->nodes, list) 919 recycle_safe_page(node->data); 920 921 list_for_each_entry(node, &zone->leaves, list) 922 recycle_safe_page(node->data); 923 } 924 925 static void memory_bm_recycle(struct memory_bitmap *bm) 926 { 927 struct mem_zone_bm_rtree *zone; 928 struct linked_page *p_list; 929 930 list_for_each_entry(zone, &bm->zones, list) 931 recycle_zone_bm_rtree(zone); 932 933 p_list = bm->p_list; 934 while (p_list) { 935 struct linked_page *lp = p_list; 936 937 p_list = lp->next; 938 recycle_safe_page(lp); 939 } 940 } 941 942 /** 943 * register_nosave_region - Register a region of unsaveable memory. 944 * 945 * Register a range of page frames the contents of which should not be saved 946 * during hibernation (to be used in the early initialization code). 947 */ 948 void __init __register_nosave_region(unsigned long start_pfn, 949 unsigned long end_pfn, int use_kmalloc) 950 { 951 struct nosave_region *region; 952 953 if (start_pfn >= end_pfn) 954 return; 955 956 if (!list_empty(&nosave_regions)) { 957 /* Try to extend the previous region (they should be sorted) */ 958 region = list_entry(nosave_regions.prev, 959 struct nosave_region, list); 960 if (region->end_pfn == start_pfn) { 961 region->end_pfn = end_pfn; 962 goto Report; 963 } 964 } 965 if (use_kmalloc) { 966 /* During init, this shouldn't fail */ 967 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 968 BUG_ON(!region); 969 } else { 970 /* This allocation cannot fail */ 971 region = memblock_alloc(sizeof(struct nosave_region), 972 SMP_CACHE_BYTES); 973 if (!region) 974 panic("%s: Failed to allocate %zu bytes\n", __func__, 975 sizeof(struct nosave_region)); 976 } 977 region->start_pfn = start_pfn; 978 region->end_pfn = end_pfn; 979 list_add_tail(®ion->list, &nosave_regions); 980 Report: 981 pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n", 982 (unsigned long long) start_pfn << PAGE_SHIFT, 983 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 984 } 985 986 /* 987 * Set bits in this map correspond to the page frames the contents of which 988 * should not be saved during the suspend. 989 */ 990 static struct memory_bitmap *forbidden_pages_map; 991 992 /* Set bits in this map correspond to free page frames. */ 993 static struct memory_bitmap *free_pages_map; 994 995 /* 996 * Each page frame allocated for creating the image is marked by setting the 997 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 998 */ 999 1000 void swsusp_set_page_free(struct page *page) 1001 { 1002 if (free_pages_map) 1003 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 1004 } 1005 1006 static int swsusp_page_is_free(struct page *page) 1007 { 1008 return free_pages_map ? 1009 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 1010 } 1011 1012 void swsusp_unset_page_free(struct page *page) 1013 { 1014 if (free_pages_map) 1015 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 1016 } 1017 1018 static void swsusp_set_page_forbidden(struct page *page) 1019 { 1020 if (forbidden_pages_map) 1021 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 1022 } 1023 1024 int swsusp_page_is_forbidden(struct page *page) 1025 { 1026 return forbidden_pages_map ? 1027 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 1028 } 1029 1030 static void swsusp_unset_page_forbidden(struct page *page) 1031 { 1032 if (forbidden_pages_map) 1033 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 1034 } 1035 1036 /** 1037 * mark_nosave_pages - Mark pages that should not be saved. 1038 * @bm: Memory bitmap. 1039 * 1040 * Set the bits in @bm that correspond to the page frames the contents of which 1041 * should not be saved. 1042 */ 1043 static void mark_nosave_pages(struct memory_bitmap *bm) 1044 { 1045 struct nosave_region *region; 1046 1047 if (list_empty(&nosave_regions)) 1048 return; 1049 1050 list_for_each_entry(region, &nosave_regions, list) { 1051 unsigned long pfn; 1052 1053 pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", 1054 (unsigned long long) region->start_pfn << PAGE_SHIFT, 1055 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 1056 - 1); 1057 1058 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 1059 if (pfn_valid(pfn)) { 1060 /* 1061 * It is safe to ignore the result of 1062 * mem_bm_set_bit_check() here, since we won't 1063 * touch the PFNs for which the error is 1064 * returned anyway. 1065 */ 1066 mem_bm_set_bit_check(bm, pfn); 1067 } 1068 } 1069 } 1070 1071 /** 1072 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. 1073 * 1074 * Create bitmaps needed for marking page frames that should not be saved and 1075 * free page frames. The forbidden_pages_map and free_pages_map pointers are 1076 * only modified if everything goes well, because we don't want the bits to be 1077 * touched before both bitmaps are set up. 1078 */ 1079 int create_basic_memory_bitmaps(void) 1080 { 1081 struct memory_bitmap *bm1, *bm2; 1082 int error = 0; 1083 1084 if (forbidden_pages_map && free_pages_map) 1085 return 0; 1086 else 1087 BUG_ON(forbidden_pages_map || free_pages_map); 1088 1089 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1090 if (!bm1) 1091 return -ENOMEM; 1092 1093 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1094 if (error) 1095 goto Free_first_object; 1096 1097 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1098 if (!bm2) 1099 goto Free_first_bitmap; 1100 1101 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1102 if (error) 1103 goto Free_second_object; 1104 1105 forbidden_pages_map = bm1; 1106 free_pages_map = bm2; 1107 mark_nosave_pages(forbidden_pages_map); 1108 1109 pr_debug("Basic memory bitmaps created\n"); 1110 1111 return 0; 1112 1113 Free_second_object: 1114 kfree(bm2); 1115 Free_first_bitmap: 1116 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1117 Free_first_object: 1118 kfree(bm1); 1119 return -ENOMEM; 1120 } 1121 1122 /** 1123 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. 1124 * 1125 * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The 1126 * auxiliary pointers are necessary so that the bitmaps themselves are not 1127 * referred to while they are being freed. 1128 */ 1129 void free_basic_memory_bitmaps(void) 1130 { 1131 struct memory_bitmap *bm1, *bm2; 1132 1133 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1134 return; 1135 1136 bm1 = forbidden_pages_map; 1137 bm2 = free_pages_map; 1138 forbidden_pages_map = NULL; 1139 free_pages_map = NULL; 1140 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1141 kfree(bm1); 1142 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1143 kfree(bm2); 1144 1145 pr_debug("Basic memory bitmaps freed\n"); 1146 } 1147 1148 void clear_free_pages(void) 1149 { 1150 struct memory_bitmap *bm = free_pages_map; 1151 unsigned long pfn; 1152 1153 if (WARN_ON(!(free_pages_map))) 1154 return; 1155 1156 if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { 1157 memory_bm_position_reset(bm); 1158 pfn = memory_bm_next_pfn(bm); 1159 while (pfn != BM_END_OF_MAP) { 1160 if (pfn_valid(pfn)) 1161 clear_highpage(pfn_to_page(pfn)); 1162 1163 pfn = memory_bm_next_pfn(bm); 1164 } 1165 memory_bm_position_reset(bm); 1166 pr_info("free pages cleared after restore\n"); 1167 } 1168 } 1169 1170 /** 1171 * snapshot_additional_pages - Estimate the number of extra pages needed. 1172 * @zone: Memory zone to carry out the computation for. 1173 * 1174 * Estimate the number of additional pages needed for setting up a hibernation 1175 * image data structures for @zone (usually, the returned value is greater than 1176 * the exact number). 1177 */ 1178 unsigned int snapshot_additional_pages(struct zone *zone) 1179 { 1180 unsigned int rtree, nodes; 1181 1182 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1183 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1184 LINKED_PAGE_DATA_SIZE); 1185 while (nodes > 1) { 1186 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1187 rtree += nodes; 1188 } 1189 1190 return 2 * rtree; 1191 } 1192 1193 #ifdef CONFIG_HIGHMEM 1194 /** 1195 * count_free_highmem_pages - Compute the total number of free highmem pages. 1196 * 1197 * The returned number is system-wide. 1198 */ 1199 static unsigned int count_free_highmem_pages(void) 1200 { 1201 struct zone *zone; 1202 unsigned int cnt = 0; 1203 1204 for_each_populated_zone(zone) 1205 if (is_highmem(zone)) 1206 cnt += zone_page_state(zone, NR_FREE_PAGES); 1207 1208 return cnt; 1209 } 1210 1211 /** 1212 * saveable_highmem_page - Check if a highmem page is saveable. 1213 * 1214 * Determine whether a highmem page should be included in a hibernation image. 1215 * 1216 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1217 * and it isn't part of a free chunk of pages. 1218 */ 1219 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1220 { 1221 struct page *page; 1222 1223 if (!pfn_valid(pfn)) 1224 return NULL; 1225 1226 page = pfn_to_online_page(pfn); 1227 if (!page || page_zone(page) != zone) 1228 return NULL; 1229 1230 BUG_ON(!PageHighMem(page)); 1231 1232 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1233 return NULL; 1234 1235 if (PageReserved(page) || PageOffline(page)) 1236 return NULL; 1237 1238 if (page_is_guard(page)) 1239 return NULL; 1240 1241 return page; 1242 } 1243 1244 /** 1245 * count_highmem_pages - Compute the total number of saveable highmem pages. 1246 */ 1247 static unsigned int count_highmem_pages(void) 1248 { 1249 struct zone *zone; 1250 unsigned int n = 0; 1251 1252 for_each_populated_zone(zone) { 1253 unsigned long pfn, max_zone_pfn; 1254 1255 if (!is_highmem(zone)) 1256 continue; 1257 1258 mark_free_pages(zone); 1259 max_zone_pfn = zone_end_pfn(zone); 1260 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1261 if (saveable_highmem_page(zone, pfn)) 1262 n++; 1263 } 1264 return n; 1265 } 1266 #else 1267 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1268 { 1269 return NULL; 1270 } 1271 #endif /* CONFIG_HIGHMEM */ 1272 1273 /** 1274 * saveable_page - Check if the given page is saveable. 1275 * 1276 * Determine whether a non-highmem page should be included in a hibernation 1277 * image. 1278 * 1279 * We should save the page if it isn't Nosave, and is not in the range 1280 * of pages statically defined as 'unsaveable', and it isn't part of 1281 * a free chunk of pages. 1282 */ 1283 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1284 { 1285 struct page *page; 1286 1287 if (!pfn_valid(pfn)) 1288 return NULL; 1289 1290 page = pfn_to_online_page(pfn); 1291 if (!page || page_zone(page) != zone) 1292 return NULL; 1293 1294 BUG_ON(PageHighMem(page)); 1295 1296 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1297 return NULL; 1298 1299 if (PageOffline(page)) 1300 return NULL; 1301 1302 if (PageReserved(page) 1303 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1304 return NULL; 1305 1306 if (page_is_guard(page)) 1307 return NULL; 1308 1309 return page; 1310 } 1311 1312 /** 1313 * count_data_pages - Compute the total number of saveable non-highmem pages. 1314 */ 1315 static unsigned int count_data_pages(void) 1316 { 1317 struct zone *zone; 1318 unsigned long pfn, max_zone_pfn; 1319 unsigned int n = 0; 1320 1321 for_each_populated_zone(zone) { 1322 if (is_highmem(zone)) 1323 continue; 1324 1325 mark_free_pages(zone); 1326 max_zone_pfn = zone_end_pfn(zone); 1327 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1328 if (saveable_page(zone, pfn)) 1329 n++; 1330 } 1331 return n; 1332 } 1333 1334 /* 1335 * This is needed, because copy_page and memcpy are not usable for copying 1336 * task structs. 1337 */ 1338 static inline void do_copy_page(long *dst, long *src) 1339 { 1340 int n; 1341 1342 for (n = PAGE_SIZE / sizeof(long); n; n--) 1343 *dst++ = *src++; 1344 } 1345 1346 /** 1347 * safe_copy_page - Copy a page in a safe way. 1348 * 1349 * Check if the page we are going to copy is marked as present in the kernel 1350 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or 1351 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() 1352 * always returns 'true'. 1353 */ 1354 static void safe_copy_page(void *dst, struct page *s_page) 1355 { 1356 if (kernel_page_present(s_page)) { 1357 do_copy_page(dst, page_address(s_page)); 1358 } else { 1359 kernel_map_pages(s_page, 1, 1); 1360 do_copy_page(dst, page_address(s_page)); 1361 kernel_map_pages(s_page, 1, 0); 1362 } 1363 } 1364 1365 #ifdef CONFIG_HIGHMEM 1366 static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) 1367 { 1368 return is_highmem(zone) ? 1369 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1370 } 1371 1372 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1373 { 1374 struct page *s_page, *d_page; 1375 void *src, *dst; 1376 1377 s_page = pfn_to_page(src_pfn); 1378 d_page = pfn_to_page(dst_pfn); 1379 if (PageHighMem(s_page)) { 1380 src = kmap_atomic(s_page); 1381 dst = kmap_atomic(d_page); 1382 do_copy_page(dst, src); 1383 kunmap_atomic(dst); 1384 kunmap_atomic(src); 1385 } else { 1386 if (PageHighMem(d_page)) { 1387 /* 1388 * The page pointed to by src may contain some kernel 1389 * data modified by kmap_atomic() 1390 */ 1391 safe_copy_page(buffer, s_page); 1392 dst = kmap_atomic(d_page); 1393 copy_page(dst, buffer); 1394 kunmap_atomic(dst); 1395 } else { 1396 safe_copy_page(page_address(d_page), s_page); 1397 } 1398 } 1399 } 1400 #else 1401 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1402 1403 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1404 { 1405 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1406 pfn_to_page(src_pfn)); 1407 } 1408 #endif /* CONFIG_HIGHMEM */ 1409 1410 static void copy_data_pages(struct memory_bitmap *copy_bm, 1411 struct memory_bitmap *orig_bm) 1412 { 1413 struct zone *zone; 1414 unsigned long pfn; 1415 1416 for_each_populated_zone(zone) { 1417 unsigned long max_zone_pfn; 1418 1419 mark_free_pages(zone); 1420 max_zone_pfn = zone_end_pfn(zone); 1421 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1422 if (page_is_saveable(zone, pfn)) 1423 memory_bm_set_bit(orig_bm, pfn); 1424 } 1425 memory_bm_position_reset(orig_bm); 1426 memory_bm_position_reset(copy_bm); 1427 for(;;) { 1428 pfn = memory_bm_next_pfn(orig_bm); 1429 if (unlikely(pfn == BM_END_OF_MAP)) 1430 break; 1431 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1432 } 1433 } 1434 1435 /* Total number of image pages */ 1436 static unsigned int nr_copy_pages; 1437 /* Number of pages needed for saving the original pfns of the image pages */ 1438 static unsigned int nr_meta_pages; 1439 /* 1440 * Numbers of normal and highmem page frames allocated for hibernation image 1441 * before suspending devices. 1442 */ 1443 static unsigned int alloc_normal, alloc_highmem; 1444 /* 1445 * Memory bitmap used for marking saveable pages (during hibernation) or 1446 * hibernation image pages (during restore) 1447 */ 1448 static struct memory_bitmap orig_bm; 1449 /* 1450 * Memory bitmap used during hibernation for marking allocated page frames that 1451 * will contain copies of saveable pages. During restore it is initially used 1452 * for marking hibernation image pages, but then the set bits from it are 1453 * duplicated in @orig_bm and it is released. On highmem systems it is next 1454 * used for marking "safe" highmem pages, but it has to be reinitialized for 1455 * this purpose. 1456 */ 1457 static struct memory_bitmap copy_bm; 1458 1459 /** 1460 * swsusp_free - Free pages allocated for hibernation image. 1461 * 1462 * Image pages are alocated before snapshot creation, so they need to be 1463 * released after resume. 1464 */ 1465 void swsusp_free(void) 1466 { 1467 unsigned long fb_pfn, fr_pfn; 1468 1469 if (!forbidden_pages_map || !free_pages_map) 1470 goto out; 1471 1472 memory_bm_position_reset(forbidden_pages_map); 1473 memory_bm_position_reset(free_pages_map); 1474 1475 loop: 1476 fr_pfn = memory_bm_next_pfn(free_pages_map); 1477 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1478 1479 /* 1480 * Find the next bit set in both bitmaps. This is guaranteed to 1481 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1482 */ 1483 do { 1484 if (fb_pfn < fr_pfn) 1485 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1486 if (fr_pfn < fb_pfn) 1487 fr_pfn = memory_bm_next_pfn(free_pages_map); 1488 } while (fb_pfn != fr_pfn); 1489 1490 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1491 struct page *page = pfn_to_page(fr_pfn); 1492 1493 memory_bm_clear_current(forbidden_pages_map); 1494 memory_bm_clear_current(free_pages_map); 1495 hibernate_restore_unprotect_page(page_address(page)); 1496 __free_page(page); 1497 goto loop; 1498 } 1499 1500 out: 1501 nr_copy_pages = 0; 1502 nr_meta_pages = 0; 1503 restore_pblist = NULL; 1504 buffer = NULL; 1505 alloc_normal = 0; 1506 alloc_highmem = 0; 1507 hibernate_restore_protection_end(); 1508 } 1509 1510 /* Helper functions used for the shrinking of memory. */ 1511 1512 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1513 1514 /** 1515 * preallocate_image_pages - Allocate a number of pages for hibernation image. 1516 * @nr_pages: Number of page frames to allocate. 1517 * @mask: GFP flags to use for the allocation. 1518 * 1519 * Return value: Number of page frames actually allocated 1520 */ 1521 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1522 { 1523 unsigned long nr_alloc = 0; 1524 1525 while (nr_pages > 0) { 1526 struct page *page; 1527 1528 page = alloc_image_page(mask); 1529 if (!page) 1530 break; 1531 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1532 if (PageHighMem(page)) 1533 alloc_highmem++; 1534 else 1535 alloc_normal++; 1536 nr_pages--; 1537 nr_alloc++; 1538 } 1539 1540 return nr_alloc; 1541 } 1542 1543 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1544 unsigned long avail_normal) 1545 { 1546 unsigned long alloc; 1547 1548 if (avail_normal <= alloc_normal) 1549 return 0; 1550 1551 alloc = avail_normal - alloc_normal; 1552 if (nr_pages < alloc) 1553 alloc = nr_pages; 1554 1555 return preallocate_image_pages(alloc, GFP_IMAGE); 1556 } 1557 1558 #ifdef CONFIG_HIGHMEM 1559 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1560 { 1561 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1562 } 1563 1564 /** 1565 * __fraction - Compute (an approximation of) x * (multiplier / base). 1566 */ 1567 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1568 { 1569 return div64_u64(x * multiplier, base); 1570 } 1571 1572 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1573 unsigned long highmem, 1574 unsigned long total) 1575 { 1576 unsigned long alloc = __fraction(nr_pages, highmem, total); 1577 1578 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1579 } 1580 #else /* CONFIG_HIGHMEM */ 1581 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1582 { 1583 return 0; 1584 } 1585 1586 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1587 unsigned long highmem, 1588 unsigned long total) 1589 { 1590 return 0; 1591 } 1592 #endif /* CONFIG_HIGHMEM */ 1593 1594 /** 1595 * free_unnecessary_pages - Release preallocated pages not needed for the image. 1596 */ 1597 static unsigned long free_unnecessary_pages(void) 1598 { 1599 unsigned long save, to_free_normal, to_free_highmem, free; 1600 1601 save = count_data_pages(); 1602 if (alloc_normal >= save) { 1603 to_free_normal = alloc_normal - save; 1604 save = 0; 1605 } else { 1606 to_free_normal = 0; 1607 save -= alloc_normal; 1608 } 1609 save += count_highmem_pages(); 1610 if (alloc_highmem >= save) { 1611 to_free_highmem = alloc_highmem - save; 1612 } else { 1613 to_free_highmem = 0; 1614 save -= alloc_highmem; 1615 if (to_free_normal > save) 1616 to_free_normal -= save; 1617 else 1618 to_free_normal = 0; 1619 } 1620 free = to_free_normal + to_free_highmem; 1621 1622 memory_bm_position_reset(©_bm); 1623 1624 while (to_free_normal > 0 || to_free_highmem > 0) { 1625 unsigned long pfn = memory_bm_next_pfn(©_bm); 1626 struct page *page = pfn_to_page(pfn); 1627 1628 if (PageHighMem(page)) { 1629 if (!to_free_highmem) 1630 continue; 1631 to_free_highmem--; 1632 alloc_highmem--; 1633 } else { 1634 if (!to_free_normal) 1635 continue; 1636 to_free_normal--; 1637 alloc_normal--; 1638 } 1639 memory_bm_clear_bit(©_bm, pfn); 1640 swsusp_unset_page_forbidden(page); 1641 swsusp_unset_page_free(page); 1642 __free_page(page); 1643 } 1644 1645 return free; 1646 } 1647 1648 /** 1649 * minimum_image_size - Estimate the minimum acceptable size of an image. 1650 * @saveable: Number of saveable pages in the system. 1651 * 1652 * We want to avoid attempting to free too much memory too hard, so estimate the 1653 * minimum acceptable size of a hibernation image to use as the lower limit for 1654 * preallocating memory. 1655 * 1656 * We assume that the minimum image size should be proportional to 1657 * 1658 * [number of saveable pages] - [number of pages that can be freed in theory] 1659 * 1660 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1661 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. 1662 */ 1663 static unsigned long minimum_image_size(unsigned long saveable) 1664 { 1665 unsigned long size; 1666 1667 size = global_node_page_state(NR_SLAB_RECLAIMABLE) 1668 + global_node_page_state(NR_ACTIVE_ANON) 1669 + global_node_page_state(NR_INACTIVE_ANON) 1670 + global_node_page_state(NR_ACTIVE_FILE) 1671 + global_node_page_state(NR_INACTIVE_FILE); 1672 1673 return saveable <= size ? 0 : saveable - size; 1674 } 1675 1676 /** 1677 * hibernate_preallocate_memory - Preallocate memory for hibernation image. 1678 * 1679 * To create a hibernation image it is necessary to make a copy of every page 1680 * frame in use. We also need a number of page frames to be free during 1681 * hibernation for allocations made while saving the image and for device 1682 * drivers, in case they need to allocate memory from their hibernation 1683 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1684 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through 1685 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1686 * total number of available page frames and allocate at least 1687 * 1688 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1689 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1690 * 1691 * of them, which corresponds to the maximum size of a hibernation image. 1692 * 1693 * If image_size is set below the number following from the above formula, 1694 * the preallocation of memory is continued until the total number of saveable 1695 * pages in the system is below the requested image size or the minimum 1696 * acceptable image size returned by minimum_image_size(), whichever is greater. 1697 */ 1698 int hibernate_preallocate_memory(void) 1699 { 1700 struct zone *zone; 1701 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1702 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1703 ktime_t start, stop; 1704 int error; 1705 1706 pr_info("Preallocating image memory\n"); 1707 start = ktime_get(); 1708 1709 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1710 if (error) { 1711 pr_err("Cannot allocate original bitmap\n"); 1712 goto err_out; 1713 } 1714 1715 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1716 if (error) { 1717 pr_err("Cannot allocate copy bitmap\n"); 1718 goto err_out; 1719 } 1720 1721 alloc_normal = 0; 1722 alloc_highmem = 0; 1723 1724 /* Count the number of saveable data pages. */ 1725 save_highmem = count_highmem_pages(); 1726 saveable = count_data_pages(); 1727 1728 /* 1729 * Compute the total number of page frames we can use (count) and the 1730 * number of pages needed for image metadata (size). 1731 */ 1732 count = saveable; 1733 saveable += save_highmem; 1734 highmem = save_highmem; 1735 size = 0; 1736 for_each_populated_zone(zone) { 1737 size += snapshot_additional_pages(zone); 1738 if (is_highmem(zone)) 1739 highmem += zone_page_state(zone, NR_FREE_PAGES); 1740 else 1741 count += zone_page_state(zone, NR_FREE_PAGES); 1742 } 1743 avail_normal = count; 1744 count += highmem; 1745 count -= totalreserve_pages; 1746 1747 /* Add number of pages required for page keys (s390 only). */ 1748 size += page_key_additional_pages(saveable); 1749 1750 /* Compute the maximum number of saveable pages to leave in memory. */ 1751 max_size = (count - (size + PAGES_FOR_IO)) / 2 1752 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1753 /* Compute the desired number of image pages specified by image_size. */ 1754 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1755 if (size > max_size) 1756 size = max_size; 1757 /* 1758 * If the desired number of image pages is at least as large as the 1759 * current number of saveable pages in memory, allocate page frames for 1760 * the image and we're done. 1761 */ 1762 if (size >= saveable) { 1763 pages = preallocate_image_highmem(save_highmem); 1764 pages += preallocate_image_memory(saveable - pages, avail_normal); 1765 goto out; 1766 } 1767 1768 /* Estimate the minimum size of the image. */ 1769 pages = minimum_image_size(saveable); 1770 /* 1771 * To avoid excessive pressure on the normal zone, leave room in it to 1772 * accommodate an image of the minimum size (unless it's already too 1773 * small, in which case don't preallocate pages from it at all). 1774 */ 1775 if (avail_normal > pages) 1776 avail_normal -= pages; 1777 else 1778 avail_normal = 0; 1779 if (size < pages) 1780 size = min_t(unsigned long, pages, max_size); 1781 1782 /* 1783 * Let the memory management subsystem know that we're going to need a 1784 * large number of page frames to allocate and make it free some memory. 1785 * NOTE: If this is not done, performance will be hurt badly in some 1786 * test cases. 1787 */ 1788 shrink_all_memory(saveable - size); 1789 1790 /* 1791 * The number of saveable pages in memory was too high, so apply some 1792 * pressure to decrease it. First, make room for the largest possible 1793 * image and fail if that doesn't work. Next, try to decrease the size 1794 * of the image as much as indicated by 'size' using allocations from 1795 * highmem and non-highmem zones separately. 1796 */ 1797 pages_highmem = preallocate_image_highmem(highmem / 2); 1798 alloc = count - max_size; 1799 if (alloc > pages_highmem) 1800 alloc -= pages_highmem; 1801 else 1802 alloc = 0; 1803 pages = preallocate_image_memory(alloc, avail_normal); 1804 if (pages < alloc) { 1805 /* We have exhausted non-highmem pages, try highmem. */ 1806 alloc -= pages; 1807 pages += pages_highmem; 1808 pages_highmem = preallocate_image_highmem(alloc); 1809 if (pages_highmem < alloc) { 1810 pr_err("Image allocation is %lu pages short\n", 1811 alloc - pages_highmem); 1812 goto err_out; 1813 } 1814 pages += pages_highmem; 1815 /* 1816 * size is the desired number of saveable pages to leave in 1817 * memory, so try to preallocate (all memory - size) pages. 1818 */ 1819 alloc = (count - pages) - size; 1820 pages += preallocate_image_highmem(alloc); 1821 } else { 1822 /* 1823 * There are approximately max_size saveable pages at this point 1824 * and we want to reduce this number down to size. 1825 */ 1826 alloc = max_size - size; 1827 size = preallocate_highmem_fraction(alloc, highmem, count); 1828 pages_highmem += size; 1829 alloc -= size; 1830 size = preallocate_image_memory(alloc, avail_normal); 1831 pages_highmem += preallocate_image_highmem(alloc - size); 1832 pages += pages_highmem + size; 1833 } 1834 1835 /* 1836 * We only need as many page frames for the image as there are saveable 1837 * pages in memory, but we have allocated more. Release the excessive 1838 * ones now. 1839 */ 1840 pages -= free_unnecessary_pages(); 1841 1842 out: 1843 stop = ktime_get(); 1844 pr_info("Allocated %lu pages for snapshot\n", pages); 1845 swsusp_show_speed(start, stop, pages, "Allocated"); 1846 1847 return 0; 1848 1849 err_out: 1850 swsusp_free(); 1851 return -ENOMEM; 1852 } 1853 1854 #ifdef CONFIG_HIGHMEM 1855 /** 1856 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. 1857 * 1858 * Compute the number of non-highmem pages that will be necessary for creating 1859 * copies of highmem pages. 1860 */ 1861 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1862 { 1863 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1864 1865 if (free_highmem >= nr_highmem) 1866 nr_highmem = 0; 1867 else 1868 nr_highmem -= free_highmem; 1869 1870 return nr_highmem; 1871 } 1872 #else 1873 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1874 #endif /* CONFIG_HIGHMEM */ 1875 1876 /** 1877 * enough_free_mem - Check if there is enough free memory for the image. 1878 */ 1879 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1880 { 1881 struct zone *zone; 1882 unsigned int free = alloc_normal; 1883 1884 for_each_populated_zone(zone) 1885 if (!is_highmem(zone)) 1886 free += zone_page_state(zone, NR_FREE_PAGES); 1887 1888 nr_pages += count_pages_for_highmem(nr_highmem); 1889 pr_debug("Normal pages needed: %u + %u, available pages: %u\n", 1890 nr_pages, PAGES_FOR_IO, free); 1891 1892 return free > nr_pages + PAGES_FOR_IO; 1893 } 1894 1895 #ifdef CONFIG_HIGHMEM 1896 /** 1897 * get_highmem_buffer - Allocate a buffer for highmem pages. 1898 * 1899 * If there are some highmem pages in the hibernation image, we may need a 1900 * buffer to copy them and/or load their data. 1901 */ 1902 static inline int get_highmem_buffer(int safe_needed) 1903 { 1904 buffer = get_image_page(GFP_ATOMIC, safe_needed); 1905 return buffer ? 0 : -ENOMEM; 1906 } 1907 1908 /** 1909 * alloc_highmem_image_pages - Allocate some highmem pages for the image. 1910 * 1911 * Try to allocate as many pages as needed, but if the number of free highmem 1912 * pages is less than that, allocate them all. 1913 */ 1914 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1915 unsigned int nr_highmem) 1916 { 1917 unsigned int to_alloc = count_free_highmem_pages(); 1918 1919 if (to_alloc > nr_highmem) 1920 to_alloc = nr_highmem; 1921 1922 nr_highmem -= to_alloc; 1923 while (to_alloc-- > 0) { 1924 struct page *page; 1925 1926 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); 1927 memory_bm_set_bit(bm, page_to_pfn(page)); 1928 } 1929 return nr_highmem; 1930 } 1931 #else 1932 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1933 1934 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1935 unsigned int n) { return 0; } 1936 #endif /* CONFIG_HIGHMEM */ 1937 1938 /** 1939 * swsusp_alloc - Allocate memory for hibernation image. 1940 * 1941 * We first try to allocate as many highmem pages as there are 1942 * saveable highmem pages in the system. If that fails, we allocate 1943 * non-highmem pages for the copies of the remaining highmem ones. 1944 * 1945 * In this approach it is likely that the copies of highmem pages will 1946 * also be located in the high memory, because of the way in which 1947 * copy_data_pages() works. 1948 */ 1949 static int swsusp_alloc(struct memory_bitmap *copy_bm, 1950 unsigned int nr_pages, unsigned int nr_highmem) 1951 { 1952 if (nr_highmem > 0) { 1953 if (get_highmem_buffer(PG_ANY)) 1954 goto err_out; 1955 if (nr_highmem > alloc_highmem) { 1956 nr_highmem -= alloc_highmem; 1957 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1958 } 1959 } 1960 if (nr_pages > alloc_normal) { 1961 nr_pages -= alloc_normal; 1962 while (nr_pages-- > 0) { 1963 struct page *page; 1964 1965 page = alloc_image_page(GFP_ATOMIC); 1966 if (!page) 1967 goto err_out; 1968 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1969 } 1970 } 1971 1972 return 0; 1973 1974 err_out: 1975 swsusp_free(); 1976 return -ENOMEM; 1977 } 1978 1979 asmlinkage __visible int swsusp_save(void) 1980 { 1981 unsigned int nr_pages, nr_highmem; 1982 1983 pr_info("Creating image:\n"); 1984 1985 drain_local_pages(NULL); 1986 nr_pages = count_data_pages(); 1987 nr_highmem = count_highmem_pages(); 1988 pr_info("Need to copy %u pages\n", nr_pages + nr_highmem); 1989 1990 if (!enough_free_mem(nr_pages, nr_highmem)) { 1991 pr_err("Not enough free memory\n"); 1992 return -ENOMEM; 1993 } 1994 1995 if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { 1996 pr_err("Memory allocation failed\n"); 1997 return -ENOMEM; 1998 } 1999 2000 /* 2001 * During allocating of suspend pagedir, new cold pages may appear. 2002 * Kill them. 2003 */ 2004 drain_local_pages(NULL); 2005 copy_data_pages(©_bm, &orig_bm); 2006 2007 /* 2008 * End of critical section. From now on, we can write to memory, 2009 * but we should not touch disk. This specially means we must _not_ 2010 * touch swap space! Except we must write out our image of course. 2011 */ 2012 2013 nr_pages += nr_highmem; 2014 nr_copy_pages = nr_pages; 2015 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 2016 2017 pr_info("Image created (%d pages copied)\n", nr_pages); 2018 2019 return 0; 2020 } 2021 2022 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 2023 static int init_header_complete(struct swsusp_info *info) 2024 { 2025 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 2026 info->version_code = LINUX_VERSION_CODE; 2027 return 0; 2028 } 2029 2030 static char *check_image_kernel(struct swsusp_info *info) 2031 { 2032 if (info->version_code != LINUX_VERSION_CODE) 2033 return "kernel version"; 2034 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 2035 return "system type"; 2036 if (strcmp(info->uts.release,init_utsname()->release)) 2037 return "kernel release"; 2038 if (strcmp(info->uts.version,init_utsname()->version)) 2039 return "version"; 2040 if (strcmp(info->uts.machine,init_utsname()->machine)) 2041 return "machine"; 2042 return NULL; 2043 } 2044 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 2045 2046 unsigned long snapshot_get_image_size(void) 2047 { 2048 return nr_copy_pages + nr_meta_pages + 1; 2049 } 2050 2051 static int init_header(struct swsusp_info *info) 2052 { 2053 memset(info, 0, sizeof(struct swsusp_info)); 2054 info->num_physpages = get_num_physpages(); 2055 info->image_pages = nr_copy_pages; 2056 info->pages = snapshot_get_image_size(); 2057 info->size = info->pages; 2058 info->size <<= PAGE_SHIFT; 2059 return init_header_complete(info); 2060 } 2061 2062 /** 2063 * pack_pfns - Prepare PFNs for saving. 2064 * @bm: Memory bitmap. 2065 * @buf: Memory buffer to store the PFNs in. 2066 * 2067 * PFNs corresponding to set bits in @bm are stored in the area of memory 2068 * pointed to by @buf (1 page at a time). 2069 */ 2070 static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 2071 { 2072 int j; 2073 2074 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2075 buf[j] = memory_bm_next_pfn(bm); 2076 if (unlikely(buf[j] == BM_END_OF_MAP)) 2077 break; 2078 /* Save page key for data page (s390 only). */ 2079 page_key_read(buf + j); 2080 } 2081 } 2082 2083 /** 2084 * snapshot_read_next - Get the address to read the next image page from. 2085 * @handle: Snapshot handle to be used for the reading. 2086 * 2087 * On the first call, @handle should point to a zeroed snapshot_handle 2088 * structure. The structure gets populated then and a pointer to it should be 2089 * passed to this function every next time. 2090 * 2091 * On success, the function returns a positive number. Then, the caller 2092 * is allowed to read up to the returned number of bytes from the memory 2093 * location computed by the data_of() macro. 2094 * 2095 * The function returns 0 to indicate the end of the data stream condition, 2096 * and negative numbers are returned on errors. If that happens, the structure 2097 * pointed to by @handle is not updated and should not be used any more. 2098 */ 2099 int snapshot_read_next(struct snapshot_handle *handle) 2100 { 2101 if (handle->cur > nr_meta_pages + nr_copy_pages) 2102 return 0; 2103 2104 if (!buffer) { 2105 /* This makes the buffer be freed by swsusp_free() */ 2106 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2107 if (!buffer) 2108 return -ENOMEM; 2109 } 2110 if (!handle->cur) { 2111 int error; 2112 2113 error = init_header((struct swsusp_info *)buffer); 2114 if (error) 2115 return error; 2116 handle->buffer = buffer; 2117 memory_bm_position_reset(&orig_bm); 2118 memory_bm_position_reset(©_bm); 2119 } else if (handle->cur <= nr_meta_pages) { 2120 clear_page(buffer); 2121 pack_pfns(buffer, &orig_bm); 2122 } else { 2123 struct page *page; 2124 2125 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 2126 if (PageHighMem(page)) { 2127 /* 2128 * Highmem pages are copied to the buffer, 2129 * because we can't return with a kmapped 2130 * highmem page (we may not be called again). 2131 */ 2132 void *kaddr; 2133 2134 kaddr = kmap_atomic(page); 2135 copy_page(buffer, kaddr); 2136 kunmap_atomic(kaddr); 2137 handle->buffer = buffer; 2138 } else { 2139 handle->buffer = page_address(page); 2140 } 2141 } 2142 handle->cur++; 2143 return PAGE_SIZE; 2144 } 2145 2146 static void duplicate_memory_bitmap(struct memory_bitmap *dst, 2147 struct memory_bitmap *src) 2148 { 2149 unsigned long pfn; 2150 2151 memory_bm_position_reset(src); 2152 pfn = memory_bm_next_pfn(src); 2153 while (pfn != BM_END_OF_MAP) { 2154 memory_bm_set_bit(dst, pfn); 2155 pfn = memory_bm_next_pfn(src); 2156 } 2157 } 2158 2159 /** 2160 * mark_unsafe_pages - Mark pages that were used before hibernation. 2161 * 2162 * Mark the pages that cannot be used for storing the image during restoration, 2163 * because they conflict with the pages that had been used before hibernation. 2164 */ 2165 static void mark_unsafe_pages(struct memory_bitmap *bm) 2166 { 2167 unsigned long pfn; 2168 2169 /* Clear the "free"/"unsafe" bit for all PFNs */ 2170 memory_bm_position_reset(free_pages_map); 2171 pfn = memory_bm_next_pfn(free_pages_map); 2172 while (pfn != BM_END_OF_MAP) { 2173 memory_bm_clear_current(free_pages_map); 2174 pfn = memory_bm_next_pfn(free_pages_map); 2175 } 2176 2177 /* Mark pages that correspond to the "original" PFNs as "unsafe" */ 2178 duplicate_memory_bitmap(free_pages_map, bm); 2179 2180 allocated_unsafe_pages = 0; 2181 } 2182 2183 static int check_header(struct swsusp_info *info) 2184 { 2185 char *reason; 2186 2187 reason = check_image_kernel(info); 2188 if (!reason && info->num_physpages != get_num_physpages()) 2189 reason = "memory size"; 2190 if (reason) { 2191 pr_err("Image mismatch: %s\n", reason); 2192 return -EPERM; 2193 } 2194 return 0; 2195 } 2196 2197 /** 2198 * load header - Check the image header and copy the data from it. 2199 */ 2200 static int load_header(struct swsusp_info *info) 2201 { 2202 int error; 2203 2204 restore_pblist = NULL; 2205 error = check_header(info); 2206 if (!error) { 2207 nr_copy_pages = info->image_pages; 2208 nr_meta_pages = info->pages - info->image_pages - 1; 2209 } 2210 return error; 2211 } 2212 2213 /** 2214 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. 2215 * @bm: Memory bitmap. 2216 * @buf: Area of memory containing the PFNs. 2217 * 2218 * For each element of the array pointed to by @buf (1 page at a time), set the 2219 * corresponding bit in @bm. 2220 */ 2221 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2222 { 2223 int j; 2224 2225 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2226 if (unlikely(buf[j] == BM_END_OF_MAP)) 2227 break; 2228 2229 /* Extract and buffer page key for data page (s390 only). */ 2230 page_key_memorize(buf + j); 2231 2232 if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) 2233 memory_bm_set_bit(bm, buf[j]); 2234 else 2235 return -EFAULT; 2236 } 2237 2238 return 0; 2239 } 2240 2241 #ifdef CONFIG_HIGHMEM 2242 /* 2243 * struct highmem_pbe is used for creating the list of highmem pages that 2244 * should be restored atomically during the resume from disk, because the page 2245 * frames they have occupied before the suspend are in use. 2246 */ 2247 struct highmem_pbe { 2248 struct page *copy_page; /* data is here now */ 2249 struct page *orig_page; /* data was here before the suspend */ 2250 struct highmem_pbe *next; 2251 }; 2252 2253 /* 2254 * List of highmem PBEs needed for restoring the highmem pages that were 2255 * allocated before the suspend and included in the suspend image, but have 2256 * also been allocated by the "resume" kernel, so their contents cannot be 2257 * written directly to their "original" page frames. 2258 */ 2259 static struct highmem_pbe *highmem_pblist; 2260 2261 /** 2262 * count_highmem_image_pages - Compute the number of highmem pages in the image. 2263 * @bm: Memory bitmap. 2264 * 2265 * The bits in @bm that correspond to image pages are assumed to be set. 2266 */ 2267 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2268 { 2269 unsigned long pfn; 2270 unsigned int cnt = 0; 2271 2272 memory_bm_position_reset(bm); 2273 pfn = memory_bm_next_pfn(bm); 2274 while (pfn != BM_END_OF_MAP) { 2275 if (PageHighMem(pfn_to_page(pfn))) 2276 cnt++; 2277 2278 pfn = memory_bm_next_pfn(bm); 2279 } 2280 return cnt; 2281 } 2282 2283 static unsigned int safe_highmem_pages; 2284 2285 static struct memory_bitmap *safe_highmem_bm; 2286 2287 /** 2288 * prepare_highmem_image - Allocate memory for loading highmem data from image. 2289 * @bm: Pointer to an uninitialized memory bitmap structure. 2290 * @nr_highmem_p: Pointer to the number of highmem image pages. 2291 * 2292 * Try to allocate as many highmem pages as there are highmem image pages 2293 * (@nr_highmem_p points to the variable containing the number of highmem image 2294 * pages). The pages that are "safe" (ie. will not be overwritten when the 2295 * hibernation image is restored entirely) have the corresponding bits set in 2296 * @bm (it must be unitialized). 2297 * 2298 * NOTE: This function should not be called if there are no highmem image pages. 2299 */ 2300 static int prepare_highmem_image(struct memory_bitmap *bm, 2301 unsigned int *nr_highmem_p) 2302 { 2303 unsigned int to_alloc; 2304 2305 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2306 return -ENOMEM; 2307 2308 if (get_highmem_buffer(PG_SAFE)) 2309 return -ENOMEM; 2310 2311 to_alloc = count_free_highmem_pages(); 2312 if (to_alloc > *nr_highmem_p) 2313 to_alloc = *nr_highmem_p; 2314 else 2315 *nr_highmem_p = to_alloc; 2316 2317 safe_highmem_pages = 0; 2318 while (to_alloc-- > 0) { 2319 struct page *page; 2320 2321 page = alloc_page(__GFP_HIGHMEM); 2322 if (!swsusp_page_is_free(page)) { 2323 /* The page is "safe", set its bit the bitmap */ 2324 memory_bm_set_bit(bm, page_to_pfn(page)); 2325 safe_highmem_pages++; 2326 } 2327 /* Mark the page as allocated */ 2328 swsusp_set_page_forbidden(page); 2329 swsusp_set_page_free(page); 2330 } 2331 memory_bm_position_reset(bm); 2332 safe_highmem_bm = bm; 2333 return 0; 2334 } 2335 2336 static struct page *last_highmem_page; 2337 2338 /** 2339 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. 2340 * 2341 * For a given highmem image page get a buffer that suspend_write_next() should 2342 * return to its caller to write to. 2343 * 2344 * If the page is to be saved to its "original" page frame or a copy of 2345 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2346 * the copy of the page is to be made in normal memory, so the address of 2347 * the copy is returned. 2348 * 2349 * If @buffer is returned, the caller of suspend_write_next() will write 2350 * the page's contents to @buffer, so they will have to be copied to the 2351 * right location on the next call to suspend_write_next() and it is done 2352 * with the help of copy_last_highmem_page(). For this purpose, if 2353 * @buffer is returned, @last_highmem_page is set to the page to which 2354 * the data will have to be copied from @buffer. 2355 */ 2356 static void *get_highmem_page_buffer(struct page *page, 2357 struct chain_allocator *ca) 2358 { 2359 struct highmem_pbe *pbe; 2360 void *kaddr; 2361 2362 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2363 /* 2364 * We have allocated the "original" page frame and we can 2365 * use it directly to store the loaded page. 2366 */ 2367 last_highmem_page = page; 2368 return buffer; 2369 } 2370 /* 2371 * The "original" page frame has not been allocated and we have to 2372 * use a "safe" page frame to store the loaded page. 2373 */ 2374 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2375 if (!pbe) { 2376 swsusp_free(); 2377 return ERR_PTR(-ENOMEM); 2378 } 2379 pbe->orig_page = page; 2380 if (safe_highmem_pages > 0) { 2381 struct page *tmp; 2382 2383 /* Copy of the page will be stored in high memory */ 2384 kaddr = buffer; 2385 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2386 safe_highmem_pages--; 2387 last_highmem_page = tmp; 2388 pbe->copy_page = tmp; 2389 } else { 2390 /* Copy of the page will be stored in normal memory */ 2391 kaddr = safe_pages_list; 2392 safe_pages_list = safe_pages_list->next; 2393 pbe->copy_page = virt_to_page(kaddr); 2394 } 2395 pbe->next = highmem_pblist; 2396 highmem_pblist = pbe; 2397 return kaddr; 2398 } 2399 2400 /** 2401 * copy_last_highmem_page - Copy most the most recent highmem image page. 2402 * 2403 * Copy the contents of a highmem image from @buffer, where the caller of 2404 * snapshot_write_next() has stored them, to the right location represented by 2405 * @last_highmem_page . 2406 */ 2407 static void copy_last_highmem_page(void) 2408 { 2409 if (last_highmem_page) { 2410 void *dst; 2411 2412 dst = kmap_atomic(last_highmem_page); 2413 copy_page(dst, buffer); 2414 kunmap_atomic(dst); 2415 last_highmem_page = NULL; 2416 } 2417 } 2418 2419 static inline int last_highmem_page_copied(void) 2420 { 2421 return !last_highmem_page; 2422 } 2423 2424 static inline void free_highmem_data(void) 2425 { 2426 if (safe_highmem_bm) 2427 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2428 2429 if (buffer) 2430 free_image_page(buffer, PG_UNSAFE_CLEAR); 2431 } 2432 #else 2433 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2434 2435 static inline int prepare_highmem_image(struct memory_bitmap *bm, 2436 unsigned int *nr_highmem_p) { return 0; } 2437 2438 static inline void *get_highmem_page_buffer(struct page *page, 2439 struct chain_allocator *ca) 2440 { 2441 return ERR_PTR(-EINVAL); 2442 } 2443 2444 static inline void copy_last_highmem_page(void) {} 2445 static inline int last_highmem_page_copied(void) { return 1; } 2446 static inline void free_highmem_data(void) {} 2447 #endif /* CONFIG_HIGHMEM */ 2448 2449 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2450 2451 /** 2452 * prepare_image - Make room for loading hibernation image. 2453 * @new_bm: Unitialized memory bitmap structure. 2454 * @bm: Memory bitmap with unsafe pages marked. 2455 * 2456 * Use @bm to mark the pages that will be overwritten in the process of 2457 * restoring the system memory state from the suspend image ("unsafe" pages) 2458 * and allocate memory for the image. 2459 * 2460 * The idea is to allocate a new memory bitmap first and then allocate 2461 * as many pages as needed for image data, but without specifying what those 2462 * pages will be used for just yet. Instead, we mark them all as allocated and 2463 * create a lists of "safe" pages to be used later. On systems with high 2464 * memory a list of "safe" highmem pages is created too. 2465 */ 2466 static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2467 { 2468 unsigned int nr_pages, nr_highmem; 2469 struct linked_page *lp; 2470 int error; 2471 2472 /* If there is no highmem, the buffer will not be necessary */ 2473 free_image_page(buffer, PG_UNSAFE_CLEAR); 2474 buffer = NULL; 2475 2476 nr_highmem = count_highmem_image_pages(bm); 2477 mark_unsafe_pages(bm); 2478 2479 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2480 if (error) 2481 goto Free; 2482 2483 duplicate_memory_bitmap(new_bm, bm); 2484 memory_bm_free(bm, PG_UNSAFE_KEEP); 2485 if (nr_highmem > 0) { 2486 error = prepare_highmem_image(bm, &nr_highmem); 2487 if (error) 2488 goto Free; 2489 } 2490 /* 2491 * Reserve some safe pages for potential later use. 2492 * 2493 * NOTE: This way we make sure there will be enough safe pages for the 2494 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2495 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2496 * 2497 * nr_copy_pages cannot be less than allocated_unsafe_pages too. 2498 */ 2499 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2500 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2501 while (nr_pages > 0) { 2502 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2503 if (!lp) { 2504 error = -ENOMEM; 2505 goto Free; 2506 } 2507 lp->next = safe_pages_list; 2508 safe_pages_list = lp; 2509 nr_pages--; 2510 } 2511 /* Preallocate memory for the image */ 2512 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2513 while (nr_pages > 0) { 2514 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2515 if (!lp) { 2516 error = -ENOMEM; 2517 goto Free; 2518 } 2519 if (!swsusp_page_is_free(virt_to_page(lp))) { 2520 /* The page is "safe", add it to the list */ 2521 lp->next = safe_pages_list; 2522 safe_pages_list = lp; 2523 } 2524 /* Mark the page as allocated */ 2525 swsusp_set_page_forbidden(virt_to_page(lp)); 2526 swsusp_set_page_free(virt_to_page(lp)); 2527 nr_pages--; 2528 } 2529 return 0; 2530 2531 Free: 2532 swsusp_free(); 2533 return error; 2534 } 2535 2536 /** 2537 * get_buffer - Get the address to store the next image data page. 2538 * 2539 * Get the address that snapshot_write_next() should return to its caller to 2540 * write to. 2541 */ 2542 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2543 { 2544 struct pbe *pbe; 2545 struct page *page; 2546 unsigned long pfn = memory_bm_next_pfn(bm); 2547 2548 if (pfn == BM_END_OF_MAP) 2549 return ERR_PTR(-EFAULT); 2550 2551 page = pfn_to_page(pfn); 2552 if (PageHighMem(page)) 2553 return get_highmem_page_buffer(page, ca); 2554 2555 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2556 /* 2557 * We have allocated the "original" page frame and we can 2558 * use it directly to store the loaded page. 2559 */ 2560 return page_address(page); 2561 2562 /* 2563 * The "original" page frame has not been allocated and we have to 2564 * use a "safe" page frame to store the loaded page. 2565 */ 2566 pbe = chain_alloc(ca, sizeof(struct pbe)); 2567 if (!pbe) { 2568 swsusp_free(); 2569 return ERR_PTR(-ENOMEM); 2570 } 2571 pbe->orig_address = page_address(page); 2572 pbe->address = safe_pages_list; 2573 safe_pages_list = safe_pages_list->next; 2574 pbe->next = restore_pblist; 2575 restore_pblist = pbe; 2576 return pbe->address; 2577 } 2578 2579 /** 2580 * snapshot_write_next - Get the address to store the next image page. 2581 * @handle: Snapshot handle structure to guide the writing. 2582 * 2583 * On the first call, @handle should point to a zeroed snapshot_handle 2584 * structure. The structure gets populated then and a pointer to it should be 2585 * passed to this function every next time. 2586 * 2587 * On success, the function returns a positive number. Then, the caller 2588 * is allowed to write up to the returned number of bytes to the memory 2589 * location computed by the data_of() macro. 2590 * 2591 * The function returns 0 to indicate the "end of file" condition. Negative 2592 * numbers are returned on errors, in which cases the structure pointed to by 2593 * @handle is not updated and should not be used any more. 2594 */ 2595 int snapshot_write_next(struct snapshot_handle *handle) 2596 { 2597 static struct chain_allocator ca; 2598 int error = 0; 2599 2600 /* Check if we have already loaded the entire image */ 2601 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2602 return 0; 2603 2604 handle->sync_read = 1; 2605 2606 if (!handle->cur) { 2607 if (!buffer) 2608 /* This makes the buffer be freed by swsusp_free() */ 2609 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2610 2611 if (!buffer) 2612 return -ENOMEM; 2613 2614 handle->buffer = buffer; 2615 } else if (handle->cur == 1) { 2616 error = load_header(buffer); 2617 if (error) 2618 return error; 2619 2620 safe_pages_list = NULL; 2621 2622 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2623 if (error) 2624 return error; 2625 2626 /* Allocate buffer for page keys. */ 2627 error = page_key_alloc(nr_copy_pages); 2628 if (error) 2629 return error; 2630 2631 hibernate_restore_protection_begin(); 2632 } else if (handle->cur <= nr_meta_pages + 1) { 2633 error = unpack_orig_pfns(buffer, ©_bm); 2634 if (error) 2635 return error; 2636 2637 if (handle->cur == nr_meta_pages + 1) { 2638 error = prepare_image(&orig_bm, ©_bm); 2639 if (error) 2640 return error; 2641 2642 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2643 memory_bm_position_reset(&orig_bm); 2644 restore_pblist = NULL; 2645 handle->buffer = get_buffer(&orig_bm, &ca); 2646 handle->sync_read = 0; 2647 if (IS_ERR(handle->buffer)) 2648 return PTR_ERR(handle->buffer); 2649 } 2650 } else { 2651 copy_last_highmem_page(); 2652 /* Restore page key for data page (s390 only). */ 2653 page_key_write(handle->buffer); 2654 hibernate_restore_protect_page(handle->buffer); 2655 handle->buffer = get_buffer(&orig_bm, &ca); 2656 if (IS_ERR(handle->buffer)) 2657 return PTR_ERR(handle->buffer); 2658 if (handle->buffer != buffer) 2659 handle->sync_read = 0; 2660 } 2661 handle->cur++; 2662 return PAGE_SIZE; 2663 } 2664 2665 /** 2666 * snapshot_write_finalize - Complete the loading of a hibernation image. 2667 * 2668 * Must be called after the last call to snapshot_write_next() in case the last 2669 * page in the image happens to be a highmem page and its contents should be 2670 * stored in highmem. Additionally, it recycles bitmap memory that's not 2671 * necessary any more. 2672 */ 2673 void snapshot_write_finalize(struct snapshot_handle *handle) 2674 { 2675 copy_last_highmem_page(); 2676 /* Restore page key for data page (s390 only). */ 2677 page_key_write(handle->buffer); 2678 page_key_free(); 2679 hibernate_restore_protect_page(handle->buffer); 2680 /* Do that only if we have loaded the image entirely */ 2681 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2682 memory_bm_recycle(&orig_bm); 2683 free_highmem_data(); 2684 } 2685 } 2686 2687 int snapshot_image_loaded(struct snapshot_handle *handle) 2688 { 2689 return !(!nr_copy_pages || !last_highmem_page_copied() || 2690 handle->cur <= nr_meta_pages + nr_copy_pages); 2691 } 2692 2693 #ifdef CONFIG_HIGHMEM 2694 /* Assumes that @buf is ready and points to a "safe" page */ 2695 static inline void swap_two_pages_data(struct page *p1, struct page *p2, 2696 void *buf) 2697 { 2698 void *kaddr1, *kaddr2; 2699 2700 kaddr1 = kmap_atomic(p1); 2701 kaddr2 = kmap_atomic(p2); 2702 copy_page(buf, kaddr1); 2703 copy_page(kaddr1, kaddr2); 2704 copy_page(kaddr2, buf); 2705 kunmap_atomic(kaddr2); 2706 kunmap_atomic(kaddr1); 2707 } 2708 2709 /** 2710 * restore_highmem - Put highmem image pages into their original locations. 2711 * 2712 * For each highmem page that was in use before hibernation and is included in 2713 * the image, and also has been allocated by the "restore" kernel, swap its 2714 * current contents with the previous (ie. "before hibernation") ones. 2715 * 2716 * If the restore eventually fails, we can call this function once again and 2717 * restore the highmem state as seen by the restore kernel. 2718 */ 2719 int restore_highmem(void) 2720 { 2721 struct highmem_pbe *pbe = highmem_pblist; 2722 void *buf; 2723 2724 if (!pbe) 2725 return 0; 2726 2727 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2728 if (!buf) 2729 return -ENOMEM; 2730 2731 while (pbe) { 2732 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2733 pbe = pbe->next; 2734 } 2735 free_image_page(buf, PG_UNSAFE_CLEAR); 2736 return 0; 2737 } 2738 #endif /* CONFIG_HIGHMEM */ 2739