1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/power/snapshot.c 4 * 5 * This file provides system snapshot/restore functionality for swsusp. 6 * 7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 9 */ 10 11 #define pr_fmt(fmt) "PM: hibernation: " fmt 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/memblock.h> 25 #include <linux/nmi.h> 26 #include <linux/syscalls.h> 27 #include <linux/console.h> 28 #include <linux/highmem.h> 29 #include <linux/list.h> 30 #include <linux/slab.h> 31 #include <linux/compiler.h> 32 #include <linux/ktime.h> 33 #include <linux/set_memory.h> 34 35 #include <linux/uaccess.h> 36 #include <asm/mmu_context.h> 37 #include <asm/tlbflush.h> 38 #include <asm/io.h> 39 40 #include "power.h" 41 42 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) 43 static bool hibernate_restore_protection; 44 static bool hibernate_restore_protection_active; 45 46 void enable_restore_image_protection(void) 47 { 48 hibernate_restore_protection = true; 49 } 50 51 static inline void hibernate_restore_protection_begin(void) 52 { 53 hibernate_restore_protection_active = hibernate_restore_protection; 54 } 55 56 static inline void hibernate_restore_protection_end(void) 57 { 58 hibernate_restore_protection_active = false; 59 } 60 61 static inline void hibernate_restore_protect_page(void *page_address) 62 { 63 if (hibernate_restore_protection_active) 64 set_memory_ro((unsigned long)page_address, 1); 65 } 66 67 static inline void hibernate_restore_unprotect_page(void *page_address) 68 { 69 if (hibernate_restore_protection_active) 70 set_memory_rw((unsigned long)page_address, 1); 71 } 72 #else 73 static inline void hibernate_restore_protection_begin(void) {} 74 static inline void hibernate_restore_protection_end(void) {} 75 static inline void hibernate_restore_protect_page(void *page_address) {} 76 static inline void hibernate_restore_unprotect_page(void *page_address) {} 77 #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ 78 79 static int swsusp_page_is_free(struct page *); 80 static void swsusp_set_page_forbidden(struct page *); 81 static void swsusp_unset_page_forbidden(struct page *); 82 83 /* 84 * Number of bytes to reserve for memory allocations made by device drivers 85 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 86 * cause image creation to fail (tunable via /sys/power/reserved_size). 87 */ 88 unsigned long reserved_size; 89 90 void __init hibernate_reserved_size_init(void) 91 { 92 reserved_size = SPARE_PAGES * PAGE_SIZE; 93 } 94 95 /* 96 * Preferred image size in bytes (tunable via /sys/power/image_size). 97 * When it is set to N, swsusp will do its best to ensure the image 98 * size will not exceed N bytes, but if that is impossible, it will 99 * try to create the smallest image possible. 100 */ 101 unsigned long image_size; 102 103 void __init hibernate_image_size_init(void) 104 { 105 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; 106 } 107 108 /* 109 * List of PBEs needed for restoring the pages that were allocated before 110 * the suspend and included in the suspend image, but have also been 111 * allocated by the "resume" kernel, so their contents cannot be written 112 * directly to their "original" page frames. 113 */ 114 struct pbe *restore_pblist; 115 116 /* struct linked_page is used to build chains of pages */ 117 118 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 119 120 struct linked_page { 121 struct linked_page *next; 122 char data[LINKED_PAGE_DATA_SIZE]; 123 } __packed; 124 125 /* 126 * List of "safe" pages (ie. pages that were not used by the image kernel 127 * before hibernation) that may be used as temporary storage for image kernel 128 * memory contents. 129 */ 130 static struct linked_page *safe_pages_list; 131 132 /* Pointer to an auxiliary buffer (1 page) */ 133 static void *buffer; 134 135 #define PG_ANY 0 136 #define PG_SAFE 1 137 #define PG_UNSAFE_CLEAR 1 138 #define PG_UNSAFE_KEEP 0 139 140 static unsigned int allocated_unsafe_pages; 141 142 /** 143 * get_image_page - Allocate a page for a hibernation image. 144 * @gfp_mask: GFP mask for the allocation. 145 * @safe_needed: Get pages that were not used before hibernation (restore only) 146 * 147 * During image restoration, for storing the PBE list and the image data, we can 148 * only use memory pages that do not conflict with the pages used before 149 * hibernation. The "unsafe" pages have PageNosaveFree set and we count them 150 * using allocated_unsafe_pages. 151 * 152 * Each allocated image page is marked as PageNosave and PageNosaveFree so that 153 * swsusp_free() can release it. 154 */ 155 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 156 { 157 void *res; 158 159 res = (void *)get_zeroed_page(gfp_mask); 160 if (safe_needed) 161 while (res && swsusp_page_is_free(virt_to_page(res))) { 162 /* The page is unsafe, mark it for swsusp_free() */ 163 swsusp_set_page_forbidden(virt_to_page(res)); 164 allocated_unsafe_pages++; 165 res = (void *)get_zeroed_page(gfp_mask); 166 } 167 if (res) { 168 swsusp_set_page_forbidden(virt_to_page(res)); 169 swsusp_set_page_free(virt_to_page(res)); 170 } 171 return res; 172 } 173 174 static void *__get_safe_page(gfp_t gfp_mask) 175 { 176 if (safe_pages_list) { 177 void *ret = safe_pages_list; 178 179 safe_pages_list = safe_pages_list->next; 180 memset(ret, 0, PAGE_SIZE); 181 return ret; 182 } 183 return get_image_page(gfp_mask, PG_SAFE); 184 } 185 186 unsigned long get_safe_page(gfp_t gfp_mask) 187 { 188 return (unsigned long)__get_safe_page(gfp_mask); 189 } 190 191 static struct page *alloc_image_page(gfp_t gfp_mask) 192 { 193 struct page *page; 194 195 page = alloc_page(gfp_mask); 196 if (page) { 197 swsusp_set_page_forbidden(page); 198 swsusp_set_page_free(page); 199 } 200 return page; 201 } 202 203 static void recycle_safe_page(void *page_address) 204 { 205 struct linked_page *lp = page_address; 206 207 lp->next = safe_pages_list; 208 safe_pages_list = lp; 209 } 210 211 /** 212 * free_image_page - Free a page allocated for hibernation image. 213 * @addr: Address of the page to free. 214 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. 215 * 216 * The page to free should have been allocated by get_image_page() (page flags 217 * set by it are affected). 218 */ 219 static inline void free_image_page(void *addr, int clear_nosave_free) 220 { 221 struct page *page; 222 223 BUG_ON(!virt_addr_valid(addr)); 224 225 page = virt_to_page(addr); 226 227 swsusp_unset_page_forbidden(page); 228 if (clear_nosave_free) 229 swsusp_unset_page_free(page); 230 231 __free_page(page); 232 } 233 234 static inline void free_list_of_pages(struct linked_page *list, 235 int clear_page_nosave) 236 { 237 while (list) { 238 struct linked_page *lp = list->next; 239 240 free_image_page(list, clear_page_nosave); 241 list = lp; 242 } 243 } 244 245 /* 246 * struct chain_allocator is used for allocating small objects out of 247 * a linked list of pages called 'the chain'. 248 * 249 * The chain grows each time when there is no room for a new object in 250 * the current page. The allocated objects cannot be freed individually. 251 * It is only possible to free them all at once, by freeing the entire 252 * chain. 253 * 254 * NOTE: The chain allocator may be inefficient if the allocated objects 255 * are not much smaller than PAGE_SIZE. 256 */ 257 struct chain_allocator { 258 struct linked_page *chain; /* the chain */ 259 unsigned int used_space; /* total size of objects allocated out 260 of the current page */ 261 gfp_t gfp_mask; /* mask for allocating pages */ 262 int safe_needed; /* if set, only "safe" pages are allocated */ 263 }; 264 265 static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, 266 int safe_needed) 267 { 268 ca->chain = NULL; 269 ca->used_space = LINKED_PAGE_DATA_SIZE; 270 ca->gfp_mask = gfp_mask; 271 ca->safe_needed = safe_needed; 272 } 273 274 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 275 { 276 void *ret; 277 278 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 279 struct linked_page *lp; 280 281 lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : 282 get_image_page(ca->gfp_mask, PG_ANY); 283 if (!lp) 284 return NULL; 285 286 lp->next = ca->chain; 287 ca->chain = lp; 288 ca->used_space = 0; 289 } 290 ret = ca->chain->data + ca->used_space; 291 ca->used_space += size; 292 return ret; 293 } 294 295 /** 296 * Data types related to memory bitmaps. 297 * 298 * Memory bitmap is a structure consiting of many linked lists of 299 * objects. The main list's elements are of type struct zone_bitmap 300 * and each of them corresonds to one zone. For each zone bitmap 301 * object there is a list of objects of type struct bm_block that 302 * represent each blocks of bitmap in which information is stored. 303 * 304 * struct memory_bitmap contains a pointer to the main list of zone 305 * bitmap objects, a struct bm_position used for browsing the bitmap, 306 * and a pointer to the list of pages used for allocating all of the 307 * zone bitmap objects and bitmap block objects. 308 * 309 * NOTE: It has to be possible to lay out the bitmap in memory 310 * using only allocations of order 0. Additionally, the bitmap is 311 * designed to work with arbitrary number of zones (this is over the 312 * top for now, but let's avoid making unnecessary assumptions ;-). 313 * 314 * struct zone_bitmap contains a pointer to a list of bitmap block 315 * objects and a pointer to the bitmap block object that has been 316 * most recently used for setting bits. Additionally, it contains the 317 * PFNs that correspond to the start and end of the represented zone. 318 * 319 * struct bm_block contains a pointer to the memory page in which 320 * information is stored (in the form of a block of bitmap) 321 * It also contains the pfns that correspond to the start and end of 322 * the represented memory area. 323 * 324 * The memory bitmap is organized as a radix tree to guarantee fast random 325 * access to the bits. There is one radix tree for each zone (as returned 326 * from create_mem_extents). 327 * 328 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 329 * two linked lists for the nodes of the tree, one for the inner nodes and 330 * one for the leave nodes. The linked leave nodes are used for fast linear 331 * access of the memory bitmap. 332 * 333 * The struct rtree_node represents one node of the radix tree. 334 */ 335 336 #define BM_END_OF_MAP (~0UL) 337 338 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 339 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 340 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 341 342 /* 343 * struct rtree_node is a wrapper struct to link the nodes 344 * of the rtree together for easy linear iteration over 345 * bits and easy freeing 346 */ 347 struct rtree_node { 348 struct list_head list; 349 unsigned long *data; 350 }; 351 352 /* 353 * struct mem_zone_bm_rtree represents a bitmap used for one 354 * populated memory zone. 355 */ 356 struct mem_zone_bm_rtree { 357 struct list_head list; /* Link Zones together */ 358 struct list_head nodes; /* Radix Tree inner nodes */ 359 struct list_head leaves; /* Radix Tree leaves */ 360 unsigned long start_pfn; /* Zone start page frame */ 361 unsigned long end_pfn; /* Zone end page frame + 1 */ 362 struct rtree_node *rtree; /* Radix Tree Root */ 363 int levels; /* Number of Radix Tree Levels */ 364 unsigned int blocks; /* Number of Bitmap Blocks */ 365 }; 366 367 /* strcut bm_position is used for browsing memory bitmaps */ 368 369 struct bm_position { 370 struct mem_zone_bm_rtree *zone; 371 struct rtree_node *node; 372 unsigned long node_pfn; 373 int node_bit; 374 }; 375 376 struct memory_bitmap { 377 struct list_head zones; 378 struct linked_page *p_list; /* list of pages used to store zone 379 bitmap objects and bitmap block 380 objects */ 381 struct bm_position cur; /* most recently used bit position */ 382 }; 383 384 /* Functions that operate on memory bitmaps */ 385 386 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 387 #if BITS_PER_LONG == 32 388 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 389 #else 390 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 391 #endif 392 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 393 394 /** 395 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 396 * 397 * This function is used to allocate inner nodes as well as the 398 * leave nodes of the radix tree. It also adds the node to the 399 * corresponding linked list passed in by the *list parameter. 400 */ 401 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 402 struct chain_allocator *ca, 403 struct list_head *list) 404 { 405 struct rtree_node *node; 406 407 node = chain_alloc(ca, sizeof(struct rtree_node)); 408 if (!node) 409 return NULL; 410 411 node->data = get_image_page(gfp_mask, safe_needed); 412 if (!node->data) 413 return NULL; 414 415 list_add_tail(&node->list, list); 416 417 return node; 418 } 419 420 /** 421 * add_rtree_block - Add a new leave node to the radix tree. 422 * 423 * The leave nodes need to be allocated in order to keep the leaves 424 * linked list in order. This is guaranteed by the zone->blocks 425 * counter. 426 */ 427 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 428 int safe_needed, struct chain_allocator *ca) 429 { 430 struct rtree_node *node, *block, **dst; 431 unsigned int levels_needed, block_nr; 432 int i; 433 434 block_nr = zone->blocks; 435 levels_needed = 0; 436 437 /* How many levels do we need for this block nr? */ 438 while (block_nr) { 439 levels_needed += 1; 440 block_nr >>= BM_RTREE_LEVEL_SHIFT; 441 } 442 443 /* Make sure the rtree has enough levels */ 444 for (i = zone->levels; i < levels_needed; i++) { 445 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 446 &zone->nodes); 447 if (!node) 448 return -ENOMEM; 449 450 node->data[0] = (unsigned long)zone->rtree; 451 zone->rtree = node; 452 zone->levels += 1; 453 } 454 455 /* Allocate new block */ 456 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 457 if (!block) 458 return -ENOMEM; 459 460 /* Now walk the rtree to insert the block */ 461 node = zone->rtree; 462 dst = &zone->rtree; 463 block_nr = zone->blocks; 464 for (i = zone->levels; i > 0; i--) { 465 int index; 466 467 if (!node) { 468 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 469 &zone->nodes); 470 if (!node) 471 return -ENOMEM; 472 *dst = node; 473 } 474 475 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 476 index &= BM_RTREE_LEVEL_MASK; 477 dst = (struct rtree_node **)&((*dst)->data[index]); 478 node = *dst; 479 } 480 481 zone->blocks += 1; 482 *dst = block; 483 484 return 0; 485 } 486 487 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 488 int clear_nosave_free); 489 490 /** 491 * create_zone_bm_rtree - Create a radix tree for one zone. 492 * 493 * Allocated the mem_zone_bm_rtree structure and initializes it. 494 * This function also allocated and builds the radix tree for the 495 * zone. 496 */ 497 static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, 498 int safe_needed, 499 struct chain_allocator *ca, 500 unsigned long start, 501 unsigned long end) 502 { 503 struct mem_zone_bm_rtree *zone; 504 unsigned int i, nr_blocks; 505 unsigned long pages; 506 507 pages = end - start; 508 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 509 if (!zone) 510 return NULL; 511 512 INIT_LIST_HEAD(&zone->nodes); 513 INIT_LIST_HEAD(&zone->leaves); 514 zone->start_pfn = start; 515 zone->end_pfn = end; 516 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 517 518 for (i = 0; i < nr_blocks; i++) { 519 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 520 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 521 return NULL; 522 } 523 } 524 525 return zone; 526 } 527 528 /** 529 * free_zone_bm_rtree - Free the memory of the radix tree. 530 * 531 * Free all node pages of the radix tree. The mem_zone_bm_rtree 532 * structure itself is not freed here nor are the rtree_node 533 * structs. 534 */ 535 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 536 int clear_nosave_free) 537 { 538 struct rtree_node *node; 539 540 list_for_each_entry(node, &zone->nodes, list) 541 free_image_page(node->data, clear_nosave_free); 542 543 list_for_each_entry(node, &zone->leaves, list) 544 free_image_page(node->data, clear_nosave_free); 545 } 546 547 static void memory_bm_position_reset(struct memory_bitmap *bm) 548 { 549 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 550 list); 551 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 552 struct rtree_node, list); 553 bm->cur.node_pfn = 0; 554 bm->cur.node_bit = 0; 555 } 556 557 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 558 559 struct mem_extent { 560 struct list_head hook; 561 unsigned long start; 562 unsigned long end; 563 }; 564 565 /** 566 * free_mem_extents - Free a list of memory extents. 567 * @list: List of extents to free. 568 */ 569 static void free_mem_extents(struct list_head *list) 570 { 571 struct mem_extent *ext, *aux; 572 573 list_for_each_entry_safe(ext, aux, list, hook) { 574 list_del(&ext->hook); 575 kfree(ext); 576 } 577 } 578 579 /** 580 * create_mem_extents - Create a list of memory extents. 581 * @list: List to put the extents into. 582 * @gfp_mask: Mask to use for memory allocations. 583 * 584 * The extents represent contiguous ranges of PFNs. 585 */ 586 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 587 { 588 struct zone *zone; 589 590 INIT_LIST_HEAD(list); 591 592 for_each_populated_zone(zone) { 593 unsigned long zone_start, zone_end; 594 struct mem_extent *ext, *cur, *aux; 595 596 zone_start = zone->zone_start_pfn; 597 zone_end = zone_end_pfn(zone); 598 599 list_for_each_entry(ext, list, hook) 600 if (zone_start <= ext->end) 601 break; 602 603 if (&ext->hook == list || zone_end < ext->start) { 604 /* New extent is necessary */ 605 struct mem_extent *new_ext; 606 607 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 608 if (!new_ext) { 609 free_mem_extents(list); 610 return -ENOMEM; 611 } 612 new_ext->start = zone_start; 613 new_ext->end = zone_end; 614 list_add_tail(&new_ext->hook, &ext->hook); 615 continue; 616 } 617 618 /* Merge this zone's range of PFNs with the existing one */ 619 if (zone_start < ext->start) 620 ext->start = zone_start; 621 if (zone_end > ext->end) 622 ext->end = zone_end; 623 624 /* More merging may be possible */ 625 cur = ext; 626 list_for_each_entry_safe_continue(cur, aux, list, hook) { 627 if (zone_end < cur->start) 628 break; 629 if (zone_end < cur->end) 630 ext->end = cur->end; 631 list_del(&cur->hook); 632 kfree(cur); 633 } 634 } 635 636 return 0; 637 } 638 639 /** 640 * memory_bm_create - Allocate memory for a memory bitmap. 641 */ 642 static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, 643 int safe_needed) 644 { 645 struct chain_allocator ca; 646 struct list_head mem_extents; 647 struct mem_extent *ext; 648 int error; 649 650 chain_init(&ca, gfp_mask, safe_needed); 651 INIT_LIST_HEAD(&bm->zones); 652 653 error = create_mem_extents(&mem_extents, gfp_mask); 654 if (error) 655 return error; 656 657 list_for_each_entry(ext, &mem_extents, hook) { 658 struct mem_zone_bm_rtree *zone; 659 660 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 661 ext->start, ext->end); 662 if (!zone) { 663 error = -ENOMEM; 664 goto Error; 665 } 666 list_add_tail(&zone->list, &bm->zones); 667 } 668 669 bm->p_list = ca.chain; 670 memory_bm_position_reset(bm); 671 Exit: 672 free_mem_extents(&mem_extents); 673 return error; 674 675 Error: 676 bm->p_list = ca.chain; 677 memory_bm_free(bm, PG_UNSAFE_CLEAR); 678 goto Exit; 679 } 680 681 /** 682 * memory_bm_free - Free memory occupied by the memory bitmap. 683 * @bm: Memory bitmap. 684 */ 685 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 686 { 687 struct mem_zone_bm_rtree *zone; 688 689 list_for_each_entry(zone, &bm->zones, list) 690 free_zone_bm_rtree(zone, clear_nosave_free); 691 692 free_list_of_pages(bm->p_list, clear_nosave_free); 693 694 INIT_LIST_HEAD(&bm->zones); 695 } 696 697 /** 698 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. 699 * 700 * Find the bit in memory bitmap @bm that corresponds to the given PFN. 701 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. 702 * 703 * Walk the radix tree to find the page containing the bit that represents @pfn 704 * and return the position of the bit in @addr and @bit_nr. 705 */ 706 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 707 void **addr, unsigned int *bit_nr) 708 { 709 struct mem_zone_bm_rtree *curr, *zone; 710 struct rtree_node *node; 711 int i, block_nr; 712 713 zone = bm->cur.zone; 714 715 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 716 goto zone_found; 717 718 zone = NULL; 719 720 /* Find the right zone */ 721 list_for_each_entry(curr, &bm->zones, list) { 722 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 723 zone = curr; 724 break; 725 } 726 } 727 728 if (!zone) 729 return -EFAULT; 730 731 zone_found: 732 /* 733 * We have found the zone. Now walk the radix tree to find the leaf node 734 * for our PFN. 735 */ 736 737 /* 738 * If the zone we wish to scan is the the current zone and the 739 * pfn falls into the current node then we do not need to walk 740 * the tree. 741 */ 742 node = bm->cur.node; 743 if (zone == bm->cur.zone && 744 ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 745 goto node_found; 746 747 node = zone->rtree; 748 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 749 750 for (i = zone->levels; i > 0; i--) { 751 int index; 752 753 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 754 index &= BM_RTREE_LEVEL_MASK; 755 BUG_ON(node->data[index] == 0); 756 node = (struct rtree_node *)node->data[index]; 757 } 758 759 node_found: 760 /* Update last position */ 761 bm->cur.zone = zone; 762 bm->cur.node = node; 763 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 764 765 /* Set return values */ 766 *addr = node->data; 767 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 768 769 return 0; 770 } 771 772 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 773 { 774 void *addr; 775 unsigned int bit; 776 int error; 777 778 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 779 BUG_ON(error); 780 set_bit(bit, addr); 781 } 782 783 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 784 { 785 void *addr; 786 unsigned int bit; 787 int error; 788 789 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 790 if (!error) 791 set_bit(bit, addr); 792 793 return error; 794 } 795 796 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 797 { 798 void *addr; 799 unsigned int bit; 800 int error; 801 802 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 803 BUG_ON(error); 804 clear_bit(bit, addr); 805 } 806 807 static void memory_bm_clear_current(struct memory_bitmap *bm) 808 { 809 int bit; 810 811 bit = max(bm->cur.node_bit - 1, 0); 812 clear_bit(bit, bm->cur.node->data); 813 } 814 815 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 816 { 817 void *addr; 818 unsigned int bit; 819 int error; 820 821 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 822 BUG_ON(error); 823 return test_bit(bit, addr); 824 } 825 826 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 827 { 828 void *addr; 829 unsigned int bit; 830 831 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 832 } 833 834 /* 835 * rtree_next_node - Jump to the next leaf node. 836 * 837 * Set the position to the beginning of the next node in the 838 * memory bitmap. This is either the next node in the current 839 * zone's radix tree or the first node in the radix tree of the 840 * next zone. 841 * 842 * Return true if there is a next node, false otherwise. 843 */ 844 static bool rtree_next_node(struct memory_bitmap *bm) 845 { 846 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { 847 bm->cur.node = list_entry(bm->cur.node->list.next, 848 struct rtree_node, list); 849 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 850 bm->cur.node_bit = 0; 851 touch_softlockup_watchdog(); 852 return true; 853 } 854 855 /* No more nodes, goto next zone */ 856 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { 857 bm->cur.zone = list_entry(bm->cur.zone->list.next, 858 struct mem_zone_bm_rtree, list); 859 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 860 struct rtree_node, list); 861 bm->cur.node_pfn = 0; 862 bm->cur.node_bit = 0; 863 return true; 864 } 865 866 /* No more zones */ 867 return false; 868 } 869 870 /** 871 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. 872 * @bm: Memory bitmap. 873 * 874 * Starting from the last returned position this function searches for the next 875 * set bit in @bm and returns the PFN represented by it. If no more bits are 876 * set, BM_END_OF_MAP is returned. 877 * 878 * It is required to run memory_bm_position_reset() before the first call to 879 * this function for the given memory bitmap. 880 */ 881 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 882 { 883 unsigned long bits, pfn, pages; 884 int bit; 885 886 do { 887 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 888 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 889 bit = find_next_bit(bm->cur.node->data, bits, 890 bm->cur.node_bit); 891 if (bit < bits) { 892 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 893 bm->cur.node_bit = bit + 1; 894 return pfn; 895 } 896 } while (rtree_next_node(bm)); 897 898 return BM_END_OF_MAP; 899 } 900 901 /* 902 * This structure represents a range of page frames the contents of which 903 * should not be saved during hibernation. 904 */ 905 struct nosave_region { 906 struct list_head list; 907 unsigned long start_pfn; 908 unsigned long end_pfn; 909 }; 910 911 static LIST_HEAD(nosave_regions); 912 913 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) 914 { 915 struct rtree_node *node; 916 917 list_for_each_entry(node, &zone->nodes, list) 918 recycle_safe_page(node->data); 919 920 list_for_each_entry(node, &zone->leaves, list) 921 recycle_safe_page(node->data); 922 } 923 924 static void memory_bm_recycle(struct memory_bitmap *bm) 925 { 926 struct mem_zone_bm_rtree *zone; 927 struct linked_page *p_list; 928 929 list_for_each_entry(zone, &bm->zones, list) 930 recycle_zone_bm_rtree(zone); 931 932 p_list = bm->p_list; 933 while (p_list) { 934 struct linked_page *lp = p_list; 935 936 p_list = lp->next; 937 recycle_safe_page(lp); 938 } 939 } 940 941 /** 942 * register_nosave_region - Register a region of unsaveable memory. 943 * 944 * Register a range of page frames the contents of which should not be saved 945 * during hibernation (to be used in the early initialization code). 946 */ 947 void __init __register_nosave_region(unsigned long start_pfn, 948 unsigned long end_pfn, int use_kmalloc) 949 { 950 struct nosave_region *region; 951 952 if (start_pfn >= end_pfn) 953 return; 954 955 if (!list_empty(&nosave_regions)) { 956 /* Try to extend the previous region (they should be sorted) */ 957 region = list_entry(nosave_regions.prev, 958 struct nosave_region, list); 959 if (region->end_pfn == start_pfn) { 960 region->end_pfn = end_pfn; 961 goto Report; 962 } 963 } 964 if (use_kmalloc) { 965 /* During init, this shouldn't fail */ 966 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 967 BUG_ON(!region); 968 } else { 969 /* This allocation cannot fail */ 970 region = memblock_alloc(sizeof(struct nosave_region), 971 SMP_CACHE_BYTES); 972 if (!region) 973 panic("%s: Failed to allocate %zu bytes\n", __func__, 974 sizeof(struct nosave_region)); 975 } 976 region->start_pfn = start_pfn; 977 region->end_pfn = end_pfn; 978 list_add_tail(®ion->list, &nosave_regions); 979 Report: 980 pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n", 981 (unsigned long long) start_pfn << PAGE_SHIFT, 982 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 983 } 984 985 /* 986 * Set bits in this map correspond to the page frames the contents of which 987 * should not be saved during the suspend. 988 */ 989 static struct memory_bitmap *forbidden_pages_map; 990 991 /* Set bits in this map correspond to free page frames. */ 992 static struct memory_bitmap *free_pages_map; 993 994 /* 995 * Each page frame allocated for creating the image is marked by setting the 996 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 997 */ 998 999 void swsusp_set_page_free(struct page *page) 1000 { 1001 if (free_pages_map) 1002 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 1003 } 1004 1005 static int swsusp_page_is_free(struct page *page) 1006 { 1007 return free_pages_map ? 1008 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 1009 } 1010 1011 void swsusp_unset_page_free(struct page *page) 1012 { 1013 if (free_pages_map) 1014 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 1015 } 1016 1017 static void swsusp_set_page_forbidden(struct page *page) 1018 { 1019 if (forbidden_pages_map) 1020 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 1021 } 1022 1023 int swsusp_page_is_forbidden(struct page *page) 1024 { 1025 return forbidden_pages_map ? 1026 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 1027 } 1028 1029 static void swsusp_unset_page_forbidden(struct page *page) 1030 { 1031 if (forbidden_pages_map) 1032 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 1033 } 1034 1035 /** 1036 * mark_nosave_pages - Mark pages that should not be saved. 1037 * @bm: Memory bitmap. 1038 * 1039 * Set the bits in @bm that correspond to the page frames the contents of which 1040 * should not be saved. 1041 */ 1042 static void mark_nosave_pages(struct memory_bitmap *bm) 1043 { 1044 struct nosave_region *region; 1045 1046 if (list_empty(&nosave_regions)) 1047 return; 1048 1049 list_for_each_entry(region, &nosave_regions, list) { 1050 unsigned long pfn; 1051 1052 pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", 1053 (unsigned long long) region->start_pfn << PAGE_SHIFT, 1054 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 1055 - 1); 1056 1057 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 1058 if (pfn_valid(pfn)) { 1059 /* 1060 * It is safe to ignore the result of 1061 * mem_bm_set_bit_check() here, since we won't 1062 * touch the PFNs for which the error is 1063 * returned anyway. 1064 */ 1065 mem_bm_set_bit_check(bm, pfn); 1066 } 1067 } 1068 } 1069 1070 /** 1071 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. 1072 * 1073 * Create bitmaps needed for marking page frames that should not be saved and 1074 * free page frames. The forbidden_pages_map and free_pages_map pointers are 1075 * only modified if everything goes well, because we don't want the bits to be 1076 * touched before both bitmaps are set up. 1077 */ 1078 int create_basic_memory_bitmaps(void) 1079 { 1080 struct memory_bitmap *bm1, *bm2; 1081 int error = 0; 1082 1083 if (forbidden_pages_map && free_pages_map) 1084 return 0; 1085 else 1086 BUG_ON(forbidden_pages_map || free_pages_map); 1087 1088 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1089 if (!bm1) 1090 return -ENOMEM; 1091 1092 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1093 if (error) 1094 goto Free_first_object; 1095 1096 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1097 if (!bm2) 1098 goto Free_first_bitmap; 1099 1100 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1101 if (error) 1102 goto Free_second_object; 1103 1104 forbidden_pages_map = bm1; 1105 free_pages_map = bm2; 1106 mark_nosave_pages(forbidden_pages_map); 1107 1108 pr_debug("Basic memory bitmaps created\n"); 1109 1110 return 0; 1111 1112 Free_second_object: 1113 kfree(bm2); 1114 Free_first_bitmap: 1115 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1116 Free_first_object: 1117 kfree(bm1); 1118 return -ENOMEM; 1119 } 1120 1121 /** 1122 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. 1123 * 1124 * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The 1125 * auxiliary pointers are necessary so that the bitmaps themselves are not 1126 * referred to while they are being freed. 1127 */ 1128 void free_basic_memory_bitmaps(void) 1129 { 1130 struct memory_bitmap *bm1, *bm2; 1131 1132 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1133 return; 1134 1135 bm1 = forbidden_pages_map; 1136 bm2 = free_pages_map; 1137 forbidden_pages_map = NULL; 1138 free_pages_map = NULL; 1139 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1140 kfree(bm1); 1141 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1142 kfree(bm2); 1143 1144 pr_debug("Basic memory bitmaps freed\n"); 1145 } 1146 1147 void clear_free_pages(void) 1148 { 1149 struct memory_bitmap *bm = free_pages_map; 1150 unsigned long pfn; 1151 1152 if (WARN_ON(!(free_pages_map))) 1153 return; 1154 1155 if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { 1156 memory_bm_position_reset(bm); 1157 pfn = memory_bm_next_pfn(bm); 1158 while (pfn != BM_END_OF_MAP) { 1159 if (pfn_valid(pfn)) 1160 clear_highpage(pfn_to_page(pfn)); 1161 1162 pfn = memory_bm_next_pfn(bm); 1163 } 1164 memory_bm_position_reset(bm); 1165 pr_info("free pages cleared after restore\n"); 1166 } 1167 } 1168 1169 /** 1170 * snapshot_additional_pages - Estimate the number of extra pages needed. 1171 * @zone: Memory zone to carry out the computation for. 1172 * 1173 * Estimate the number of additional pages needed for setting up a hibernation 1174 * image data structures for @zone (usually, the returned value is greater than 1175 * the exact number). 1176 */ 1177 unsigned int snapshot_additional_pages(struct zone *zone) 1178 { 1179 unsigned int rtree, nodes; 1180 1181 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1182 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1183 LINKED_PAGE_DATA_SIZE); 1184 while (nodes > 1) { 1185 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1186 rtree += nodes; 1187 } 1188 1189 return 2 * rtree; 1190 } 1191 1192 #ifdef CONFIG_HIGHMEM 1193 /** 1194 * count_free_highmem_pages - Compute the total number of free highmem pages. 1195 * 1196 * The returned number is system-wide. 1197 */ 1198 static unsigned int count_free_highmem_pages(void) 1199 { 1200 struct zone *zone; 1201 unsigned int cnt = 0; 1202 1203 for_each_populated_zone(zone) 1204 if (is_highmem(zone)) 1205 cnt += zone_page_state(zone, NR_FREE_PAGES); 1206 1207 return cnt; 1208 } 1209 1210 /** 1211 * saveable_highmem_page - Check if a highmem page is saveable. 1212 * 1213 * Determine whether a highmem page should be included in a hibernation image. 1214 * 1215 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1216 * and it isn't part of a free chunk of pages. 1217 */ 1218 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1219 { 1220 struct page *page; 1221 1222 if (!pfn_valid(pfn)) 1223 return NULL; 1224 1225 page = pfn_to_online_page(pfn); 1226 if (!page || page_zone(page) != zone) 1227 return NULL; 1228 1229 BUG_ON(!PageHighMem(page)); 1230 1231 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1232 return NULL; 1233 1234 if (PageReserved(page) || PageOffline(page)) 1235 return NULL; 1236 1237 if (page_is_guard(page)) 1238 return NULL; 1239 1240 return page; 1241 } 1242 1243 /** 1244 * count_highmem_pages - Compute the total number of saveable highmem pages. 1245 */ 1246 static unsigned int count_highmem_pages(void) 1247 { 1248 struct zone *zone; 1249 unsigned int n = 0; 1250 1251 for_each_populated_zone(zone) { 1252 unsigned long pfn, max_zone_pfn; 1253 1254 if (!is_highmem(zone)) 1255 continue; 1256 1257 mark_free_pages(zone); 1258 max_zone_pfn = zone_end_pfn(zone); 1259 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1260 if (saveable_highmem_page(zone, pfn)) 1261 n++; 1262 } 1263 return n; 1264 } 1265 #else 1266 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1267 { 1268 return NULL; 1269 } 1270 #endif /* CONFIG_HIGHMEM */ 1271 1272 /** 1273 * saveable_page - Check if the given page is saveable. 1274 * 1275 * Determine whether a non-highmem page should be included in a hibernation 1276 * image. 1277 * 1278 * We should save the page if it isn't Nosave, and is not in the range 1279 * of pages statically defined as 'unsaveable', and it isn't part of 1280 * a free chunk of pages. 1281 */ 1282 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1283 { 1284 struct page *page; 1285 1286 if (!pfn_valid(pfn)) 1287 return NULL; 1288 1289 page = pfn_to_online_page(pfn); 1290 if (!page || page_zone(page) != zone) 1291 return NULL; 1292 1293 BUG_ON(PageHighMem(page)); 1294 1295 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1296 return NULL; 1297 1298 if (PageOffline(page)) 1299 return NULL; 1300 1301 if (PageReserved(page) 1302 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1303 return NULL; 1304 1305 if (page_is_guard(page)) 1306 return NULL; 1307 1308 return page; 1309 } 1310 1311 /** 1312 * count_data_pages - Compute the total number of saveable non-highmem pages. 1313 */ 1314 static unsigned int count_data_pages(void) 1315 { 1316 struct zone *zone; 1317 unsigned long pfn, max_zone_pfn; 1318 unsigned int n = 0; 1319 1320 for_each_populated_zone(zone) { 1321 if (is_highmem(zone)) 1322 continue; 1323 1324 mark_free_pages(zone); 1325 max_zone_pfn = zone_end_pfn(zone); 1326 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1327 if (saveable_page(zone, pfn)) 1328 n++; 1329 } 1330 return n; 1331 } 1332 1333 /* 1334 * This is needed, because copy_page and memcpy are not usable for copying 1335 * task structs. 1336 */ 1337 static inline void do_copy_page(long *dst, long *src) 1338 { 1339 int n; 1340 1341 for (n = PAGE_SIZE / sizeof(long); n; n--) 1342 *dst++ = *src++; 1343 } 1344 1345 /** 1346 * safe_copy_page - Copy a page in a safe way. 1347 * 1348 * Check if the page we are going to copy is marked as present in the kernel 1349 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or 1350 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() 1351 * always returns 'true'. 1352 */ 1353 static void safe_copy_page(void *dst, struct page *s_page) 1354 { 1355 if (kernel_page_present(s_page)) { 1356 do_copy_page(dst, page_address(s_page)); 1357 } else { 1358 kernel_map_pages(s_page, 1, 1); 1359 do_copy_page(dst, page_address(s_page)); 1360 kernel_map_pages(s_page, 1, 0); 1361 } 1362 } 1363 1364 #ifdef CONFIG_HIGHMEM 1365 static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) 1366 { 1367 return is_highmem(zone) ? 1368 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1369 } 1370 1371 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1372 { 1373 struct page *s_page, *d_page; 1374 void *src, *dst; 1375 1376 s_page = pfn_to_page(src_pfn); 1377 d_page = pfn_to_page(dst_pfn); 1378 if (PageHighMem(s_page)) { 1379 src = kmap_atomic(s_page); 1380 dst = kmap_atomic(d_page); 1381 do_copy_page(dst, src); 1382 kunmap_atomic(dst); 1383 kunmap_atomic(src); 1384 } else { 1385 if (PageHighMem(d_page)) { 1386 /* 1387 * The page pointed to by src may contain some kernel 1388 * data modified by kmap_atomic() 1389 */ 1390 safe_copy_page(buffer, s_page); 1391 dst = kmap_atomic(d_page); 1392 copy_page(dst, buffer); 1393 kunmap_atomic(dst); 1394 } else { 1395 safe_copy_page(page_address(d_page), s_page); 1396 } 1397 } 1398 } 1399 #else 1400 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1401 1402 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1403 { 1404 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1405 pfn_to_page(src_pfn)); 1406 } 1407 #endif /* CONFIG_HIGHMEM */ 1408 1409 static void copy_data_pages(struct memory_bitmap *copy_bm, 1410 struct memory_bitmap *orig_bm) 1411 { 1412 struct zone *zone; 1413 unsigned long pfn; 1414 1415 for_each_populated_zone(zone) { 1416 unsigned long max_zone_pfn; 1417 1418 mark_free_pages(zone); 1419 max_zone_pfn = zone_end_pfn(zone); 1420 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1421 if (page_is_saveable(zone, pfn)) 1422 memory_bm_set_bit(orig_bm, pfn); 1423 } 1424 memory_bm_position_reset(orig_bm); 1425 memory_bm_position_reset(copy_bm); 1426 for(;;) { 1427 pfn = memory_bm_next_pfn(orig_bm); 1428 if (unlikely(pfn == BM_END_OF_MAP)) 1429 break; 1430 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1431 } 1432 } 1433 1434 /* Total number of image pages */ 1435 static unsigned int nr_copy_pages; 1436 /* Number of pages needed for saving the original pfns of the image pages */ 1437 static unsigned int nr_meta_pages; 1438 /* 1439 * Numbers of normal and highmem page frames allocated for hibernation image 1440 * before suspending devices. 1441 */ 1442 static unsigned int alloc_normal, alloc_highmem; 1443 /* 1444 * Memory bitmap used for marking saveable pages (during hibernation) or 1445 * hibernation image pages (during restore) 1446 */ 1447 static struct memory_bitmap orig_bm; 1448 /* 1449 * Memory bitmap used during hibernation for marking allocated page frames that 1450 * will contain copies of saveable pages. During restore it is initially used 1451 * for marking hibernation image pages, but then the set bits from it are 1452 * duplicated in @orig_bm and it is released. On highmem systems it is next 1453 * used for marking "safe" highmem pages, but it has to be reinitialized for 1454 * this purpose. 1455 */ 1456 static struct memory_bitmap copy_bm; 1457 1458 /** 1459 * swsusp_free - Free pages allocated for hibernation image. 1460 * 1461 * Image pages are alocated before snapshot creation, so they need to be 1462 * released after resume. 1463 */ 1464 void swsusp_free(void) 1465 { 1466 unsigned long fb_pfn, fr_pfn; 1467 1468 if (!forbidden_pages_map || !free_pages_map) 1469 goto out; 1470 1471 memory_bm_position_reset(forbidden_pages_map); 1472 memory_bm_position_reset(free_pages_map); 1473 1474 loop: 1475 fr_pfn = memory_bm_next_pfn(free_pages_map); 1476 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1477 1478 /* 1479 * Find the next bit set in both bitmaps. This is guaranteed to 1480 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1481 */ 1482 do { 1483 if (fb_pfn < fr_pfn) 1484 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1485 if (fr_pfn < fb_pfn) 1486 fr_pfn = memory_bm_next_pfn(free_pages_map); 1487 } while (fb_pfn != fr_pfn); 1488 1489 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1490 struct page *page = pfn_to_page(fr_pfn); 1491 1492 memory_bm_clear_current(forbidden_pages_map); 1493 memory_bm_clear_current(free_pages_map); 1494 hibernate_restore_unprotect_page(page_address(page)); 1495 __free_page(page); 1496 goto loop; 1497 } 1498 1499 out: 1500 nr_copy_pages = 0; 1501 nr_meta_pages = 0; 1502 restore_pblist = NULL; 1503 buffer = NULL; 1504 alloc_normal = 0; 1505 alloc_highmem = 0; 1506 hibernate_restore_protection_end(); 1507 } 1508 1509 /* Helper functions used for the shrinking of memory. */ 1510 1511 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1512 1513 /** 1514 * preallocate_image_pages - Allocate a number of pages for hibernation image. 1515 * @nr_pages: Number of page frames to allocate. 1516 * @mask: GFP flags to use for the allocation. 1517 * 1518 * Return value: Number of page frames actually allocated 1519 */ 1520 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1521 { 1522 unsigned long nr_alloc = 0; 1523 1524 while (nr_pages > 0) { 1525 struct page *page; 1526 1527 page = alloc_image_page(mask); 1528 if (!page) 1529 break; 1530 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1531 if (PageHighMem(page)) 1532 alloc_highmem++; 1533 else 1534 alloc_normal++; 1535 nr_pages--; 1536 nr_alloc++; 1537 } 1538 1539 return nr_alloc; 1540 } 1541 1542 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1543 unsigned long avail_normal) 1544 { 1545 unsigned long alloc; 1546 1547 if (avail_normal <= alloc_normal) 1548 return 0; 1549 1550 alloc = avail_normal - alloc_normal; 1551 if (nr_pages < alloc) 1552 alloc = nr_pages; 1553 1554 return preallocate_image_pages(alloc, GFP_IMAGE); 1555 } 1556 1557 #ifdef CONFIG_HIGHMEM 1558 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1559 { 1560 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1561 } 1562 1563 /** 1564 * __fraction - Compute (an approximation of) x * (multiplier / base). 1565 */ 1566 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1567 { 1568 return div64_u64(x * multiplier, base); 1569 } 1570 1571 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1572 unsigned long highmem, 1573 unsigned long total) 1574 { 1575 unsigned long alloc = __fraction(nr_pages, highmem, total); 1576 1577 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1578 } 1579 #else /* CONFIG_HIGHMEM */ 1580 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1581 { 1582 return 0; 1583 } 1584 1585 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1586 unsigned long highmem, 1587 unsigned long total) 1588 { 1589 return 0; 1590 } 1591 #endif /* CONFIG_HIGHMEM */ 1592 1593 /** 1594 * free_unnecessary_pages - Release preallocated pages not needed for the image. 1595 */ 1596 static unsigned long free_unnecessary_pages(void) 1597 { 1598 unsigned long save, to_free_normal, to_free_highmem, free; 1599 1600 save = count_data_pages(); 1601 if (alloc_normal >= save) { 1602 to_free_normal = alloc_normal - save; 1603 save = 0; 1604 } else { 1605 to_free_normal = 0; 1606 save -= alloc_normal; 1607 } 1608 save += count_highmem_pages(); 1609 if (alloc_highmem >= save) { 1610 to_free_highmem = alloc_highmem - save; 1611 } else { 1612 to_free_highmem = 0; 1613 save -= alloc_highmem; 1614 if (to_free_normal > save) 1615 to_free_normal -= save; 1616 else 1617 to_free_normal = 0; 1618 } 1619 free = to_free_normal + to_free_highmem; 1620 1621 memory_bm_position_reset(©_bm); 1622 1623 while (to_free_normal > 0 || to_free_highmem > 0) { 1624 unsigned long pfn = memory_bm_next_pfn(©_bm); 1625 struct page *page = pfn_to_page(pfn); 1626 1627 if (PageHighMem(page)) { 1628 if (!to_free_highmem) 1629 continue; 1630 to_free_highmem--; 1631 alloc_highmem--; 1632 } else { 1633 if (!to_free_normal) 1634 continue; 1635 to_free_normal--; 1636 alloc_normal--; 1637 } 1638 memory_bm_clear_bit(©_bm, pfn); 1639 swsusp_unset_page_forbidden(page); 1640 swsusp_unset_page_free(page); 1641 __free_page(page); 1642 } 1643 1644 return free; 1645 } 1646 1647 /** 1648 * minimum_image_size - Estimate the minimum acceptable size of an image. 1649 * @saveable: Number of saveable pages in the system. 1650 * 1651 * We want to avoid attempting to free too much memory too hard, so estimate the 1652 * minimum acceptable size of a hibernation image to use as the lower limit for 1653 * preallocating memory. 1654 * 1655 * We assume that the minimum image size should be proportional to 1656 * 1657 * [number of saveable pages] - [number of pages that can be freed in theory] 1658 * 1659 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1660 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. 1661 */ 1662 static unsigned long minimum_image_size(unsigned long saveable) 1663 { 1664 unsigned long size; 1665 1666 size = global_node_page_state(NR_SLAB_RECLAIMABLE) 1667 + global_node_page_state(NR_ACTIVE_ANON) 1668 + global_node_page_state(NR_INACTIVE_ANON) 1669 + global_node_page_state(NR_ACTIVE_FILE) 1670 + global_node_page_state(NR_INACTIVE_FILE); 1671 1672 return saveable <= size ? 0 : saveable - size; 1673 } 1674 1675 /** 1676 * hibernate_preallocate_memory - Preallocate memory for hibernation image. 1677 * 1678 * To create a hibernation image it is necessary to make a copy of every page 1679 * frame in use. We also need a number of page frames to be free during 1680 * hibernation for allocations made while saving the image and for device 1681 * drivers, in case they need to allocate memory from their hibernation 1682 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1683 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through 1684 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1685 * total number of available page frames and allocate at least 1686 * 1687 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1688 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1689 * 1690 * of them, which corresponds to the maximum size of a hibernation image. 1691 * 1692 * If image_size is set below the number following from the above formula, 1693 * the preallocation of memory is continued until the total number of saveable 1694 * pages in the system is below the requested image size or the minimum 1695 * acceptable image size returned by minimum_image_size(), whichever is greater. 1696 */ 1697 int hibernate_preallocate_memory(void) 1698 { 1699 struct zone *zone; 1700 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1701 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1702 ktime_t start, stop; 1703 int error; 1704 1705 pr_info("Preallocating image memory\n"); 1706 start = ktime_get(); 1707 1708 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1709 if (error) { 1710 pr_err("Cannot allocate original bitmap\n"); 1711 goto err_out; 1712 } 1713 1714 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1715 if (error) { 1716 pr_err("Cannot allocate copy bitmap\n"); 1717 goto err_out; 1718 } 1719 1720 alloc_normal = 0; 1721 alloc_highmem = 0; 1722 1723 /* Count the number of saveable data pages. */ 1724 save_highmem = count_highmem_pages(); 1725 saveable = count_data_pages(); 1726 1727 /* 1728 * Compute the total number of page frames we can use (count) and the 1729 * number of pages needed for image metadata (size). 1730 */ 1731 count = saveable; 1732 saveable += save_highmem; 1733 highmem = save_highmem; 1734 size = 0; 1735 for_each_populated_zone(zone) { 1736 size += snapshot_additional_pages(zone); 1737 if (is_highmem(zone)) 1738 highmem += zone_page_state(zone, NR_FREE_PAGES); 1739 else 1740 count += zone_page_state(zone, NR_FREE_PAGES); 1741 } 1742 avail_normal = count; 1743 count += highmem; 1744 count -= totalreserve_pages; 1745 1746 /* Compute the maximum number of saveable pages to leave in memory. */ 1747 max_size = (count - (size + PAGES_FOR_IO)) / 2 1748 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1749 /* Compute the desired number of image pages specified by image_size. */ 1750 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1751 if (size > max_size) 1752 size = max_size; 1753 /* 1754 * If the desired number of image pages is at least as large as the 1755 * current number of saveable pages in memory, allocate page frames for 1756 * the image and we're done. 1757 */ 1758 if (size >= saveable) { 1759 pages = preallocate_image_highmem(save_highmem); 1760 pages += preallocate_image_memory(saveable - pages, avail_normal); 1761 goto out; 1762 } 1763 1764 /* Estimate the minimum size of the image. */ 1765 pages = minimum_image_size(saveable); 1766 /* 1767 * To avoid excessive pressure on the normal zone, leave room in it to 1768 * accommodate an image of the minimum size (unless it's already too 1769 * small, in which case don't preallocate pages from it at all). 1770 */ 1771 if (avail_normal > pages) 1772 avail_normal -= pages; 1773 else 1774 avail_normal = 0; 1775 if (size < pages) 1776 size = min_t(unsigned long, pages, max_size); 1777 1778 /* 1779 * Let the memory management subsystem know that we're going to need a 1780 * large number of page frames to allocate and make it free some memory. 1781 * NOTE: If this is not done, performance will be hurt badly in some 1782 * test cases. 1783 */ 1784 shrink_all_memory(saveable - size); 1785 1786 /* 1787 * The number of saveable pages in memory was too high, so apply some 1788 * pressure to decrease it. First, make room for the largest possible 1789 * image and fail if that doesn't work. Next, try to decrease the size 1790 * of the image as much as indicated by 'size' using allocations from 1791 * highmem and non-highmem zones separately. 1792 */ 1793 pages_highmem = preallocate_image_highmem(highmem / 2); 1794 alloc = count - max_size; 1795 if (alloc > pages_highmem) 1796 alloc -= pages_highmem; 1797 else 1798 alloc = 0; 1799 pages = preallocate_image_memory(alloc, avail_normal); 1800 if (pages < alloc) { 1801 /* We have exhausted non-highmem pages, try highmem. */ 1802 alloc -= pages; 1803 pages += pages_highmem; 1804 pages_highmem = preallocate_image_highmem(alloc); 1805 if (pages_highmem < alloc) { 1806 pr_err("Image allocation is %lu pages short\n", 1807 alloc - pages_highmem); 1808 goto err_out; 1809 } 1810 pages += pages_highmem; 1811 /* 1812 * size is the desired number of saveable pages to leave in 1813 * memory, so try to preallocate (all memory - size) pages. 1814 */ 1815 alloc = (count - pages) - size; 1816 pages += preallocate_image_highmem(alloc); 1817 } else { 1818 /* 1819 * There are approximately max_size saveable pages at this point 1820 * and we want to reduce this number down to size. 1821 */ 1822 alloc = max_size - size; 1823 size = preallocate_highmem_fraction(alloc, highmem, count); 1824 pages_highmem += size; 1825 alloc -= size; 1826 size = preallocate_image_memory(alloc, avail_normal); 1827 pages_highmem += preallocate_image_highmem(alloc - size); 1828 pages += pages_highmem + size; 1829 } 1830 1831 /* 1832 * We only need as many page frames for the image as there are saveable 1833 * pages in memory, but we have allocated more. Release the excessive 1834 * ones now. 1835 */ 1836 pages -= free_unnecessary_pages(); 1837 1838 out: 1839 stop = ktime_get(); 1840 pr_info("Allocated %lu pages for snapshot\n", pages); 1841 swsusp_show_speed(start, stop, pages, "Allocated"); 1842 1843 return 0; 1844 1845 err_out: 1846 swsusp_free(); 1847 return -ENOMEM; 1848 } 1849 1850 #ifdef CONFIG_HIGHMEM 1851 /** 1852 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. 1853 * 1854 * Compute the number of non-highmem pages that will be necessary for creating 1855 * copies of highmem pages. 1856 */ 1857 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1858 { 1859 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1860 1861 if (free_highmem >= nr_highmem) 1862 nr_highmem = 0; 1863 else 1864 nr_highmem -= free_highmem; 1865 1866 return nr_highmem; 1867 } 1868 #else 1869 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1870 #endif /* CONFIG_HIGHMEM */ 1871 1872 /** 1873 * enough_free_mem - Check if there is enough free memory for the image. 1874 */ 1875 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1876 { 1877 struct zone *zone; 1878 unsigned int free = alloc_normal; 1879 1880 for_each_populated_zone(zone) 1881 if (!is_highmem(zone)) 1882 free += zone_page_state(zone, NR_FREE_PAGES); 1883 1884 nr_pages += count_pages_for_highmem(nr_highmem); 1885 pr_debug("Normal pages needed: %u + %u, available pages: %u\n", 1886 nr_pages, PAGES_FOR_IO, free); 1887 1888 return free > nr_pages + PAGES_FOR_IO; 1889 } 1890 1891 #ifdef CONFIG_HIGHMEM 1892 /** 1893 * get_highmem_buffer - Allocate a buffer for highmem pages. 1894 * 1895 * If there are some highmem pages in the hibernation image, we may need a 1896 * buffer to copy them and/or load their data. 1897 */ 1898 static inline int get_highmem_buffer(int safe_needed) 1899 { 1900 buffer = get_image_page(GFP_ATOMIC, safe_needed); 1901 return buffer ? 0 : -ENOMEM; 1902 } 1903 1904 /** 1905 * alloc_highmem_image_pages - Allocate some highmem pages for the image. 1906 * 1907 * Try to allocate as many pages as needed, but if the number of free highmem 1908 * pages is less than that, allocate them all. 1909 */ 1910 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1911 unsigned int nr_highmem) 1912 { 1913 unsigned int to_alloc = count_free_highmem_pages(); 1914 1915 if (to_alloc > nr_highmem) 1916 to_alloc = nr_highmem; 1917 1918 nr_highmem -= to_alloc; 1919 while (to_alloc-- > 0) { 1920 struct page *page; 1921 1922 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); 1923 memory_bm_set_bit(bm, page_to_pfn(page)); 1924 } 1925 return nr_highmem; 1926 } 1927 #else 1928 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1929 1930 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1931 unsigned int n) { return 0; } 1932 #endif /* CONFIG_HIGHMEM */ 1933 1934 /** 1935 * swsusp_alloc - Allocate memory for hibernation image. 1936 * 1937 * We first try to allocate as many highmem pages as there are 1938 * saveable highmem pages in the system. If that fails, we allocate 1939 * non-highmem pages for the copies of the remaining highmem ones. 1940 * 1941 * In this approach it is likely that the copies of highmem pages will 1942 * also be located in the high memory, because of the way in which 1943 * copy_data_pages() works. 1944 */ 1945 static int swsusp_alloc(struct memory_bitmap *copy_bm, 1946 unsigned int nr_pages, unsigned int nr_highmem) 1947 { 1948 if (nr_highmem > 0) { 1949 if (get_highmem_buffer(PG_ANY)) 1950 goto err_out; 1951 if (nr_highmem > alloc_highmem) { 1952 nr_highmem -= alloc_highmem; 1953 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1954 } 1955 } 1956 if (nr_pages > alloc_normal) { 1957 nr_pages -= alloc_normal; 1958 while (nr_pages-- > 0) { 1959 struct page *page; 1960 1961 page = alloc_image_page(GFP_ATOMIC); 1962 if (!page) 1963 goto err_out; 1964 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1965 } 1966 } 1967 1968 return 0; 1969 1970 err_out: 1971 swsusp_free(); 1972 return -ENOMEM; 1973 } 1974 1975 asmlinkage __visible int swsusp_save(void) 1976 { 1977 unsigned int nr_pages, nr_highmem; 1978 1979 pr_info("Creating image:\n"); 1980 1981 drain_local_pages(NULL); 1982 nr_pages = count_data_pages(); 1983 nr_highmem = count_highmem_pages(); 1984 pr_info("Need to copy %u pages\n", nr_pages + nr_highmem); 1985 1986 if (!enough_free_mem(nr_pages, nr_highmem)) { 1987 pr_err("Not enough free memory\n"); 1988 return -ENOMEM; 1989 } 1990 1991 if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { 1992 pr_err("Memory allocation failed\n"); 1993 return -ENOMEM; 1994 } 1995 1996 /* 1997 * During allocating of suspend pagedir, new cold pages may appear. 1998 * Kill them. 1999 */ 2000 drain_local_pages(NULL); 2001 copy_data_pages(©_bm, &orig_bm); 2002 2003 /* 2004 * End of critical section. From now on, we can write to memory, 2005 * but we should not touch disk. This specially means we must _not_ 2006 * touch swap space! Except we must write out our image of course. 2007 */ 2008 2009 nr_pages += nr_highmem; 2010 nr_copy_pages = nr_pages; 2011 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 2012 2013 pr_info("Image created (%d pages copied)\n", nr_pages); 2014 2015 return 0; 2016 } 2017 2018 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 2019 static int init_header_complete(struct swsusp_info *info) 2020 { 2021 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 2022 info->version_code = LINUX_VERSION_CODE; 2023 return 0; 2024 } 2025 2026 static char *check_image_kernel(struct swsusp_info *info) 2027 { 2028 if (info->version_code != LINUX_VERSION_CODE) 2029 return "kernel version"; 2030 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 2031 return "system type"; 2032 if (strcmp(info->uts.release,init_utsname()->release)) 2033 return "kernel release"; 2034 if (strcmp(info->uts.version,init_utsname()->version)) 2035 return "version"; 2036 if (strcmp(info->uts.machine,init_utsname()->machine)) 2037 return "machine"; 2038 return NULL; 2039 } 2040 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 2041 2042 unsigned long snapshot_get_image_size(void) 2043 { 2044 return nr_copy_pages + nr_meta_pages + 1; 2045 } 2046 2047 static int init_header(struct swsusp_info *info) 2048 { 2049 memset(info, 0, sizeof(struct swsusp_info)); 2050 info->num_physpages = get_num_physpages(); 2051 info->image_pages = nr_copy_pages; 2052 info->pages = snapshot_get_image_size(); 2053 info->size = info->pages; 2054 info->size <<= PAGE_SHIFT; 2055 return init_header_complete(info); 2056 } 2057 2058 /** 2059 * pack_pfns - Prepare PFNs for saving. 2060 * @bm: Memory bitmap. 2061 * @buf: Memory buffer to store the PFNs in. 2062 * 2063 * PFNs corresponding to set bits in @bm are stored in the area of memory 2064 * pointed to by @buf (1 page at a time). 2065 */ 2066 static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 2067 { 2068 int j; 2069 2070 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2071 buf[j] = memory_bm_next_pfn(bm); 2072 if (unlikely(buf[j] == BM_END_OF_MAP)) 2073 break; 2074 } 2075 } 2076 2077 /** 2078 * snapshot_read_next - Get the address to read the next image page from. 2079 * @handle: Snapshot handle to be used for the reading. 2080 * 2081 * On the first call, @handle should point to a zeroed snapshot_handle 2082 * structure. The structure gets populated then and a pointer to it should be 2083 * passed to this function every next time. 2084 * 2085 * On success, the function returns a positive number. Then, the caller 2086 * is allowed to read up to the returned number of bytes from the memory 2087 * location computed by the data_of() macro. 2088 * 2089 * The function returns 0 to indicate the end of the data stream condition, 2090 * and negative numbers are returned on errors. If that happens, the structure 2091 * pointed to by @handle is not updated and should not be used any more. 2092 */ 2093 int snapshot_read_next(struct snapshot_handle *handle) 2094 { 2095 if (handle->cur > nr_meta_pages + nr_copy_pages) 2096 return 0; 2097 2098 if (!buffer) { 2099 /* This makes the buffer be freed by swsusp_free() */ 2100 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2101 if (!buffer) 2102 return -ENOMEM; 2103 } 2104 if (!handle->cur) { 2105 int error; 2106 2107 error = init_header((struct swsusp_info *)buffer); 2108 if (error) 2109 return error; 2110 handle->buffer = buffer; 2111 memory_bm_position_reset(&orig_bm); 2112 memory_bm_position_reset(©_bm); 2113 } else if (handle->cur <= nr_meta_pages) { 2114 clear_page(buffer); 2115 pack_pfns(buffer, &orig_bm); 2116 } else { 2117 struct page *page; 2118 2119 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 2120 if (PageHighMem(page)) { 2121 /* 2122 * Highmem pages are copied to the buffer, 2123 * because we can't return with a kmapped 2124 * highmem page (we may not be called again). 2125 */ 2126 void *kaddr; 2127 2128 kaddr = kmap_atomic(page); 2129 copy_page(buffer, kaddr); 2130 kunmap_atomic(kaddr); 2131 handle->buffer = buffer; 2132 } else { 2133 handle->buffer = page_address(page); 2134 } 2135 } 2136 handle->cur++; 2137 return PAGE_SIZE; 2138 } 2139 2140 static void duplicate_memory_bitmap(struct memory_bitmap *dst, 2141 struct memory_bitmap *src) 2142 { 2143 unsigned long pfn; 2144 2145 memory_bm_position_reset(src); 2146 pfn = memory_bm_next_pfn(src); 2147 while (pfn != BM_END_OF_MAP) { 2148 memory_bm_set_bit(dst, pfn); 2149 pfn = memory_bm_next_pfn(src); 2150 } 2151 } 2152 2153 /** 2154 * mark_unsafe_pages - Mark pages that were used before hibernation. 2155 * 2156 * Mark the pages that cannot be used for storing the image during restoration, 2157 * because they conflict with the pages that had been used before hibernation. 2158 */ 2159 static void mark_unsafe_pages(struct memory_bitmap *bm) 2160 { 2161 unsigned long pfn; 2162 2163 /* Clear the "free"/"unsafe" bit for all PFNs */ 2164 memory_bm_position_reset(free_pages_map); 2165 pfn = memory_bm_next_pfn(free_pages_map); 2166 while (pfn != BM_END_OF_MAP) { 2167 memory_bm_clear_current(free_pages_map); 2168 pfn = memory_bm_next_pfn(free_pages_map); 2169 } 2170 2171 /* Mark pages that correspond to the "original" PFNs as "unsafe" */ 2172 duplicate_memory_bitmap(free_pages_map, bm); 2173 2174 allocated_unsafe_pages = 0; 2175 } 2176 2177 static int check_header(struct swsusp_info *info) 2178 { 2179 char *reason; 2180 2181 reason = check_image_kernel(info); 2182 if (!reason && info->num_physpages != get_num_physpages()) 2183 reason = "memory size"; 2184 if (reason) { 2185 pr_err("Image mismatch: %s\n", reason); 2186 return -EPERM; 2187 } 2188 return 0; 2189 } 2190 2191 /** 2192 * load header - Check the image header and copy the data from it. 2193 */ 2194 static int load_header(struct swsusp_info *info) 2195 { 2196 int error; 2197 2198 restore_pblist = NULL; 2199 error = check_header(info); 2200 if (!error) { 2201 nr_copy_pages = info->image_pages; 2202 nr_meta_pages = info->pages - info->image_pages - 1; 2203 } 2204 return error; 2205 } 2206 2207 /** 2208 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. 2209 * @bm: Memory bitmap. 2210 * @buf: Area of memory containing the PFNs. 2211 * 2212 * For each element of the array pointed to by @buf (1 page at a time), set the 2213 * corresponding bit in @bm. 2214 */ 2215 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2216 { 2217 int j; 2218 2219 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2220 if (unlikely(buf[j] == BM_END_OF_MAP)) 2221 break; 2222 2223 if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) 2224 memory_bm_set_bit(bm, buf[j]); 2225 else 2226 return -EFAULT; 2227 } 2228 2229 return 0; 2230 } 2231 2232 #ifdef CONFIG_HIGHMEM 2233 /* 2234 * struct highmem_pbe is used for creating the list of highmem pages that 2235 * should be restored atomically during the resume from disk, because the page 2236 * frames they have occupied before the suspend are in use. 2237 */ 2238 struct highmem_pbe { 2239 struct page *copy_page; /* data is here now */ 2240 struct page *orig_page; /* data was here before the suspend */ 2241 struct highmem_pbe *next; 2242 }; 2243 2244 /* 2245 * List of highmem PBEs needed for restoring the highmem pages that were 2246 * allocated before the suspend and included in the suspend image, but have 2247 * also been allocated by the "resume" kernel, so their contents cannot be 2248 * written directly to their "original" page frames. 2249 */ 2250 static struct highmem_pbe *highmem_pblist; 2251 2252 /** 2253 * count_highmem_image_pages - Compute the number of highmem pages in the image. 2254 * @bm: Memory bitmap. 2255 * 2256 * The bits in @bm that correspond to image pages are assumed to be set. 2257 */ 2258 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2259 { 2260 unsigned long pfn; 2261 unsigned int cnt = 0; 2262 2263 memory_bm_position_reset(bm); 2264 pfn = memory_bm_next_pfn(bm); 2265 while (pfn != BM_END_OF_MAP) { 2266 if (PageHighMem(pfn_to_page(pfn))) 2267 cnt++; 2268 2269 pfn = memory_bm_next_pfn(bm); 2270 } 2271 return cnt; 2272 } 2273 2274 static unsigned int safe_highmem_pages; 2275 2276 static struct memory_bitmap *safe_highmem_bm; 2277 2278 /** 2279 * prepare_highmem_image - Allocate memory for loading highmem data from image. 2280 * @bm: Pointer to an uninitialized memory bitmap structure. 2281 * @nr_highmem_p: Pointer to the number of highmem image pages. 2282 * 2283 * Try to allocate as many highmem pages as there are highmem image pages 2284 * (@nr_highmem_p points to the variable containing the number of highmem image 2285 * pages). The pages that are "safe" (ie. will not be overwritten when the 2286 * hibernation image is restored entirely) have the corresponding bits set in 2287 * @bm (it must be unitialized). 2288 * 2289 * NOTE: This function should not be called if there are no highmem image pages. 2290 */ 2291 static int prepare_highmem_image(struct memory_bitmap *bm, 2292 unsigned int *nr_highmem_p) 2293 { 2294 unsigned int to_alloc; 2295 2296 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2297 return -ENOMEM; 2298 2299 if (get_highmem_buffer(PG_SAFE)) 2300 return -ENOMEM; 2301 2302 to_alloc = count_free_highmem_pages(); 2303 if (to_alloc > *nr_highmem_p) 2304 to_alloc = *nr_highmem_p; 2305 else 2306 *nr_highmem_p = to_alloc; 2307 2308 safe_highmem_pages = 0; 2309 while (to_alloc-- > 0) { 2310 struct page *page; 2311 2312 page = alloc_page(__GFP_HIGHMEM); 2313 if (!swsusp_page_is_free(page)) { 2314 /* The page is "safe", set its bit the bitmap */ 2315 memory_bm_set_bit(bm, page_to_pfn(page)); 2316 safe_highmem_pages++; 2317 } 2318 /* Mark the page as allocated */ 2319 swsusp_set_page_forbidden(page); 2320 swsusp_set_page_free(page); 2321 } 2322 memory_bm_position_reset(bm); 2323 safe_highmem_bm = bm; 2324 return 0; 2325 } 2326 2327 static struct page *last_highmem_page; 2328 2329 /** 2330 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. 2331 * 2332 * For a given highmem image page get a buffer that suspend_write_next() should 2333 * return to its caller to write to. 2334 * 2335 * If the page is to be saved to its "original" page frame or a copy of 2336 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2337 * the copy of the page is to be made in normal memory, so the address of 2338 * the copy is returned. 2339 * 2340 * If @buffer is returned, the caller of suspend_write_next() will write 2341 * the page's contents to @buffer, so they will have to be copied to the 2342 * right location on the next call to suspend_write_next() and it is done 2343 * with the help of copy_last_highmem_page(). For this purpose, if 2344 * @buffer is returned, @last_highmem_page is set to the page to which 2345 * the data will have to be copied from @buffer. 2346 */ 2347 static void *get_highmem_page_buffer(struct page *page, 2348 struct chain_allocator *ca) 2349 { 2350 struct highmem_pbe *pbe; 2351 void *kaddr; 2352 2353 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2354 /* 2355 * We have allocated the "original" page frame and we can 2356 * use it directly to store the loaded page. 2357 */ 2358 last_highmem_page = page; 2359 return buffer; 2360 } 2361 /* 2362 * The "original" page frame has not been allocated and we have to 2363 * use a "safe" page frame to store the loaded page. 2364 */ 2365 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2366 if (!pbe) { 2367 swsusp_free(); 2368 return ERR_PTR(-ENOMEM); 2369 } 2370 pbe->orig_page = page; 2371 if (safe_highmem_pages > 0) { 2372 struct page *tmp; 2373 2374 /* Copy of the page will be stored in high memory */ 2375 kaddr = buffer; 2376 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2377 safe_highmem_pages--; 2378 last_highmem_page = tmp; 2379 pbe->copy_page = tmp; 2380 } else { 2381 /* Copy of the page will be stored in normal memory */ 2382 kaddr = safe_pages_list; 2383 safe_pages_list = safe_pages_list->next; 2384 pbe->copy_page = virt_to_page(kaddr); 2385 } 2386 pbe->next = highmem_pblist; 2387 highmem_pblist = pbe; 2388 return kaddr; 2389 } 2390 2391 /** 2392 * copy_last_highmem_page - Copy most the most recent highmem image page. 2393 * 2394 * Copy the contents of a highmem image from @buffer, where the caller of 2395 * snapshot_write_next() has stored them, to the right location represented by 2396 * @last_highmem_page . 2397 */ 2398 static void copy_last_highmem_page(void) 2399 { 2400 if (last_highmem_page) { 2401 void *dst; 2402 2403 dst = kmap_atomic(last_highmem_page); 2404 copy_page(dst, buffer); 2405 kunmap_atomic(dst); 2406 last_highmem_page = NULL; 2407 } 2408 } 2409 2410 static inline int last_highmem_page_copied(void) 2411 { 2412 return !last_highmem_page; 2413 } 2414 2415 static inline void free_highmem_data(void) 2416 { 2417 if (safe_highmem_bm) 2418 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2419 2420 if (buffer) 2421 free_image_page(buffer, PG_UNSAFE_CLEAR); 2422 } 2423 #else 2424 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2425 2426 static inline int prepare_highmem_image(struct memory_bitmap *bm, 2427 unsigned int *nr_highmem_p) { return 0; } 2428 2429 static inline void *get_highmem_page_buffer(struct page *page, 2430 struct chain_allocator *ca) 2431 { 2432 return ERR_PTR(-EINVAL); 2433 } 2434 2435 static inline void copy_last_highmem_page(void) {} 2436 static inline int last_highmem_page_copied(void) { return 1; } 2437 static inline void free_highmem_data(void) {} 2438 #endif /* CONFIG_HIGHMEM */ 2439 2440 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2441 2442 /** 2443 * prepare_image - Make room for loading hibernation image. 2444 * @new_bm: Unitialized memory bitmap structure. 2445 * @bm: Memory bitmap with unsafe pages marked. 2446 * 2447 * Use @bm to mark the pages that will be overwritten in the process of 2448 * restoring the system memory state from the suspend image ("unsafe" pages) 2449 * and allocate memory for the image. 2450 * 2451 * The idea is to allocate a new memory bitmap first and then allocate 2452 * as many pages as needed for image data, but without specifying what those 2453 * pages will be used for just yet. Instead, we mark them all as allocated and 2454 * create a lists of "safe" pages to be used later. On systems with high 2455 * memory a list of "safe" highmem pages is created too. 2456 */ 2457 static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2458 { 2459 unsigned int nr_pages, nr_highmem; 2460 struct linked_page *lp; 2461 int error; 2462 2463 /* If there is no highmem, the buffer will not be necessary */ 2464 free_image_page(buffer, PG_UNSAFE_CLEAR); 2465 buffer = NULL; 2466 2467 nr_highmem = count_highmem_image_pages(bm); 2468 mark_unsafe_pages(bm); 2469 2470 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2471 if (error) 2472 goto Free; 2473 2474 duplicate_memory_bitmap(new_bm, bm); 2475 memory_bm_free(bm, PG_UNSAFE_KEEP); 2476 if (nr_highmem > 0) { 2477 error = prepare_highmem_image(bm, &nr_highmem); 2478 if (error) 2479 goto Free; 2480 } 2481 /* 2482 * Reserve some safe pages for potential later use. 2483 * 2484 * NOTE: This way we make sure there will be enough safe pages for the 2485 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2486 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2487 * 2488 * nr_copy_pages cannot be less than allocated_unsafe_pages too. 2489 */ 2490 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2491 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2492 while (nr_pages > 0) { 2493 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2494 if (!lp) { 2495 error = -ENOMEM; 2496 goto Free; 2497 } 2498 lp->next = safe_pages_list; 2499 safe_pages_list = lp; 2500 nr_pages--; 2501 } 2502 /* Preallocate memory for the image */ 2503 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2504 while (nr_pages > 0) { 2505 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2506 if (!lp) { 2507 error = -ENOMEM; 2508 goto Free; 2509 } 2510 if (!swsusp_page_is_free(virt_to_page(lp))) { 2511 /* The page is "safe", add it to the list */ 2512 lp->next = safe_pages_list; 2513 safe_pages_list = lp; 2514 } 2515 /* Mark the page as allocated */ 2516 swsusp_set_page_forbidden(virt_to_page(lp)); 2517 swsusp_set_page_free(virt_to_page(lp)); 2518 nr_pages--; 2519 } 2520 return 0; 2521 2522 Free: 2523 swsusp_free(); 2524 return error; 2525 } 2526 2527 /** 2528 * get_buffer - Get the address to store the next image data page. 2529 * 2530 * Get the address that snapshot_write_next() should return to its caller to 2531 * write to. 2532 */ 2533 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2534 { 2535 struct pbe *pbe; 2536 struct page *page; 2537 unsigned long pfn = memory_bm_next_pfn(bm); 2538 2539 if (pfn == BM_END_OF_MAP) 2540 return ERR_PTR(-EFAULT); 2541 2542 page = pfn_to_page(pfn); 2543 if (PageHighMem(page)) 2544 return get_highmem_page_buffer(page, ca); 2545 2546 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2547 /* 2548 * We have allocated the "original" page frame and we can 2549 * use it directly to store the loaded page. 2550 */ 2551 return page_address(page); 2552 2553 /* 2554 * The "original" page frame has not been allocated and we have to 2555 * use a "safe" page frame to store the loaded page. 2556 */ 2557 pbe = chain_alloc(ca, sizeof(struct pbe)); 2558 if (!pbe) { 2559 swsusp_free(); 2560 return ERR_PTR(-ENOMEM); 2561 } 2562 pbe->orig_address = page_address(page); 2563 pbe->address = safe_pages_list; 2564 safe_pages_list = safe_pages_list->next; 2565 pbe->next = restore_pblist; 2566 restore_pblist = pbe; 2567 return pbe->address; 2568 } 2569 2570 /** 2571 * snapshot_write_next - Get the address to store the next image page. 2572 * @handle: Snapshot handle structure to guide the writing. 2573 * 2574 * On the first call, @handle should point to a zeroed snapshot_handle 2575 * structure. The structure gets populated then and a pointer to it should be 2576 * passed to this function every next time. 2577 * 2578 * On success, the function returns a positive number. Then, the caller 2579 * is allowed to write up to the returned number of bytes to the memory 2580 * location computed by the data_of() macro. 2581 * 2582 * The function returns 0 to indicate the "end of file" condition. Negative 2583 * numbers are returned on errors, in which cases the structure pointed to by 2584 * @handle is not updated and should not be used any more. 2585 */ 2586 int snapshot_write_next(struct snapshot_handle *handle) 2587 { 2588 static struct chain_allocator ca; 2589 int error = 0; 2590 2591 /* Check if we have already loaded the entire image */ 2592 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2593 return 0; 2594 2595 handle->sync_read = 1; 2596 2597 if (!handle->cur) { 2598 if (!buffer) 2599 /* This makes the buffer be freed by swsusp_free() */ 2600 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2601 2602 if (!buffer) 2603 return -ENOMEM; 2604 2605 handle->buffer = buffer; 2606 } else if (handle->cur == 1) { 2607 error = load_header(buffer); 2608 if (error) 2609 return error; 2610 2611 safe_pages_list = NULL; 2612 2613 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2614 if (error) 2615 return error; 2616 2617 hibernate_restore_protection_begin(); 2618 } else if (handle->cur <= nr_meta_pages + 1) { 2619 error = unpack_orig_pfns(buffer, ©_bm); 2620 if (error) 2621 return error; 2622 2623 if (handle->cur == nr_meta_pages + 1) { 2624 error = prepare_image(&orig_bm, ©_bm); 2625 if (error) 2626 return error; 2627 2628 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2629 memory_bm_position_reset(&orig_bm); 2630 restore_pblist = NULL; 2631 handle->buffer = get_buffer(&orig_bm, &ca); 2632 handle->sync_read = 0; 2633 if (IS_ERR(handle->buffer)) 2634 return PTR_ERR(handle->buffer); 2635 } 2636 } else { 2637 copy_last_highmem_page(); 2638 hibernate_restore_protect_page(handle->buffer); 2639 handle->buffer = get_buffer(&orig_bm, &ca); 2640 if (IS_ERR(handle->buffer)) 2641 return PTR_ERR(handle->buffer); 2642 if (handle->buffer != buffer) 2643 handle->sync_read = 0; 2644 } 2645 handle->cur++; 2646 return PAGE_SIZE; 2647 } 2648 2649 /** 2650 * snapshot_write_finalize - Complete the loading of a hibernation image. 2651 * 2652 * Must be called after the last call to snapshot_write_next() in case the last 2653 * page in the image happens to be a highmem page and its contents should be 2654 * stored in highmem. Additionally, it recycles bitmap memory that's not 2655 * necessary any more. 2656 */ 2657 void snapshot_write_finalize(struct snapshot_handle *handle) 2658 { 2659 copy_last_highmem_page(); 2660 hibernate_restore_protect_page(handle->buffer); 2661 /* Do that only if we have loaded the image entirely */ 2662 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2663 memory_bm_recycle(&orig_bm); 2664 free_highmem_data(); 2665 } 2666 } 2667 2668 int snapshot_image_loaded(struct snapshot_handle *handle) 2669 { 2670 return !(!nr_copy_pages || !last_highmem_page_copied() || 2671 handle->cur <= nr_meta_pages + nr_copy_pages); 2672 } 2673 2674 #ifdef CONFIG_HIGHMEM 2675 /* Assumes that @buf is ready and points to a "safe" page */ 2676 static inline void swap_two_pages_data(struct page *p1, struct page *p2, 2677 void *buf) 2678 { 2679 void *kaddr1, *kaddr2; 2680 2681 kaddr1 = kmap_atomic(p1); 2682 kaddr2 = kmap_atomic(p2); 2683 copy_page(buf, kaddr1); 2684 copy_page(kaddr1, kaddr2); 2685 copy_page(kaddr2, buf); 2686 kunmap_atomic(kaddr2); 2687 kunmap_atomic(kaddr1); 2688 } 2689 2690 /** 2691 * restore_highmem - Put highmem image pages into their original locations. 2692 * 2693 * For each highmem page that was in use before hibernation and is included in 2694 * the image, and also has been allocated by the "restore" kernel, swap its 2695 * current contents with the previous (ie. "before hibernation") ones. 2696 * 2697 * If the restore eventually fails, we can call this function once again and 2698 * restore the highmem state as seen by the restore kernel. 2699 */ 2700 int restore_highmem(void) 2701 { 2702 struct highmem_pbe *pbe = highmem_pblist; 2703 void *buf; 2704 2705 if (!pbe) 2706 return 0; 2707 2708 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2709 if (!buf) 2710 return -ENOMEM; 2711 2712 while (pbe) { 2713 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2714 pbe = pbe->next; 2715 } 2716 free_image_page(buf, PG_UNSAFE_CLEAR); 2717 return 0; 2718 } 2719 #endif /* CONFIG_HIGHMEM */ 2720