1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/power/snapshot.c 4 * 5 * This file provides system snapshot/restore functionality for swsusp. 6 * 7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 9 */ 10 11 #define pr_fmt(fmt) "PM: " fmt 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/memblock.h> 25 #include <linux/nmi.h> 26 #include <linux/syscalls.h> 27 #include <linux/console.h> 28 #include <linux/highmem.h> 29 #include <linux/list.h> 30 #include <linux/slab.h> 31 #include <linux/compiler.h> 32 #include <linux/ktime.h> 33 #include <linux/set_memory.h> 34 35 #include <linux/uaccess.h> 36 #include <asm/mmu_context.h> 37 #include <asm/pgtable.h> 38 #include <asm/tlbflush.h> 39 #include <asm/io.h> 40 41 #include "power.h" 42 43 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) 44 static bool hibernate_restore_protection; 45 static bool hibernate_restore_protection_active; 46 47 void enable_restore_image_protection(void) 48 { 49 hibernate_restore_protection = true; 50 } 51 52 static inline void hibernate_restore_protection_begin(void) 53 { 54 hibernate_restore_protection_active = hibernate_restore_protection; 55 } 56 57 static inline void hibernate_restore_protection_end(void) 58 { 59 hibernate_restore_protection_active = false; 60 } 61 62 static inline void hibernate_restore_protect_page(void *page_address) 63 { 64 if (hibernate_restore_protection_active) 65 set_memory_ro((unsigned long)page_address, 1); 66 } 67 68 static inline void hibernate_restore_unprotect_page(void *page_address) 69 { 70 if (hibernate_restore_protection_active) 71 set_memory_rw((unsigned long)page_address, 1); 72 } 73 #else 74 static inline void hibernate_restore_protection_begin(void) {} 75 static inline void hibernate_restore_protection_end(void) {} 76 static inline void hibernate_restore_protect_page(void *page_address) {} 77 static inline void hibernate_restore_unprotect_page(void *page_address) {} 78 #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ 79 80 static int swsusp_page_is_free(struct page *); 81 static void swsusp_set_page_forbidden(struct page *); 82 static void swsusp_unset_page_forbidden(struct page *); 83 84 /* 85 * Number of bytes to reserve for memory allocations made by device drivers 86 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 87 * cause image creation to fail (tunable via /sys/power/reserved_size). 88 */ 89 unsigned long reserved_size; 90 91 void __init hibernate_reserved_size_init(void) 92 { 93 reserved_size = SPARE_PAGES * PAGE_SIZE; 94 } 95 96 /* 97 * Preferred image size in bytes (tunable via /sys/power/image_size). 98 * When it is set to N, swsusp will do its best to ensure the image 99 * size will not exceed N bytes, but if that is impossible, it will 100 * try to create the smallest image possible. 101 */ 102 unsigned long image_size; 103 104 void __init hibernate_image_size_init(void) 105 { 106 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; 107 } 108 109 /* 110 * List of PBEs needed for restoring the pages that were allocated before 111 * the suspend and included in the suspend image, but have also been 112 * allocated by the "resume" kernel, so their contents cannot be written 113 * directly to their "original" page frames. 114 */ 115 struct pbe *restore_pblist; 116 117 /* struct linked_page is used to build chains of pages */ 118 119 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 120 121 struct linked_page { 122 struct linked_page *next; 123 char data[LINKED_PAGE_DATA_SIZE]; 124 } __packed; 125 126 /* 127 * List of "safe" pages (ie. pages that were not used by the image kernel 128 * before hibernation) that may be used as temporary storage for image kernel 129 * memory contents. 130 */ 131 static struct linked_page *safe_pages_list; 132 133 /* Pointer to an auxiliary buffer (1 page) */ 134 static void *buffer; 135 136 #define PG_ANY 0 137 #define PG_SAFE 1 138 #define PG_UNSAFE_CLEAR 1 139 #define PG_UNSAFE_KEEP 0 140 141 static unsigned int allocated_unsafe_pages; 142 143 /** 144 * get_image_page - Allocate a page for a hibernation image. 145 * @gfp_mask: GFP mask for the allocation. 146 * @safe_needed: Get pages that were not used before hibernation (restore only) 147 * 148 * During image restoration, for storing the PBE list and the image data, we can 149 * only use memory pages that do not conflict with the pages used before 150 * hibernation. The "unsafe" pages have PageNosaveFree set and we count them 151 * using allocated_unsafe_pages. 152 * 153 * Each allocated image page is marked as PageNosave and PageNosaveFree so that 154 * swsusp_free() can release it. 155 */ 156 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 157 { 158 void *res; 159 160 res = (void *)get_zeroed_page(gfp_mask); 161 if (safe_needed) 162 while (res && swsusp_page_is_free(virt_to_page(res))) { 163 /* The page is unsafe, mark it for swsusp_free() */ 164 swsusp_set_page_forbidden(virt_to_page(res)); 165 allocated_unsafe_pages++; 166 res = (void *)get_zeroed_page(gfp_mask); 167 } 168 if (res) { 169 swsusp_set_page_forbidden(virt_to_page(res)); 170 swsusp_set_page_free(virt_to_page(res)); 171 } 172 return res; 173 } 174 175 static void *__get_safe_page(gfp_t gfp_mask) 176 { 177 if (safe_pages_list) { 178 void *ret = safe_pages_list; 179 180 safe_pages_list = safe_pages_list->next; 181 memset(ret, 0, PAGE_SIZE); 182 return ret; 183 } 184 return get_image_page(gfp_mask, PG_SAFE); 185 } 186 187 unsigned long get_safe_page(gfp_t gfp_mask) 188 { 189 return (unsigned long)__get_safe_page(gfp_mask); 190 } 191 192 static struct page *alloc_image_page(gfp_t gfp_mask) 193 { 194 struct page *page; 195 196 page = alloc_page(gfp_mask); 197 if (page) { 198 swsusp_set_page_forbidden(page); 199 swsusp_set_page_free(page); 200 } 201 return page; 202 } 203 204 static void recycle_safe_page(void *page_address) 205 { 206 struct linked_page *lp = page_address; 207 208 lp->next = safe_pages_list; 209 safe_pages_list = lp; 210 } 211 212 /** 213 * free_image_page - Free a page allocated for hibernation image. 214 * @addr: Address of the page to free. 215 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. 216 * 217 * The page to free should have been allocated by get_image_page() (page flags 218 * set by it are affected). 219 */ 220 static inline void free_image_page(void *addr, int clear_nosave_free) 221 { 222 struct page *page; 223 224 BUG_ON(!virt_addr_valid(addr)); 225 226 page = virt_to_page(addr); 227 228 swsusp_unset_page_forbidden(page); 229 if (clear_nosave_free) 230 swsusp_unset_page_free(page); 231 232 __free_page(page); 233 } 234 235 static inline void free_list_of_pages(struct linked_page *list, 236 int clear_page_nosave) 237 { 238 while (list) { 239 struct linked_page *lp = list->next; 240 241 free_image_page(list, clear_page_nosave); 242 list = lp; 243 } 244 } 245 246 /* 247 * struct chain_allocator is used for allocating small objects out of 248 * a linked list of pages called 'the chain'. 249 * 250 * The chain grows each time when there is no room for a new object in 251 * the current page. The allocated objects cannot be freed individually. 252 * It is only possible to free them all at once, by freeing the entire 253 * chain. 254 * 255 * NOTE: The chain allocator may be inefficient if the allocated objects 256 * are not much smaller than PAGE_SIZE. 257 */ 258 struct chain_allocator { 259 struct linked_page *chain; /* the chain */ 260 unsigned int used_space; /* total size of objects allocated out 261 of the current page */ 262 gfp_t gfp_mask; /* mask for allocating pages */ 263 int safe_needed; /* if set, only "safe" pages are allocated */ 264 }; 265 266 static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, 267 int safe_needed) 268 { 269 ca->chain = NULL; 270 ca->used_space = LINKED_PAGE_DATA_SIZE; 271 ca->gfp_mask = gfp_mask; 272 ca->safe_needed = safe_needed; 273 } 274 275 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 276 { 277 void *ret; 278 279 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 280 struct linked_page *lp; 281 282 lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : 283 get_image_page(ca->gfp_mask, PG_ANY); 284 if (!lp) 285 return NULL; 286 287 lp->next = ca->chain; 288 ca->chain = lp; 289 ca->used_space = 0; 290 } 291 ret = ca->chain->data + ca->used_space; 292 ca->used_space += size; 293 return ret; 294 } 295 296 /** 297 * Data types related to memory bitmaps. 298 * 299 * Memory bitmap is a structure consiting of many linked lists of 300 * objects. The main list's elements are of type struct zone_bitmap 301 * and each of them corresonds to one zone. For each zone bitmap 302 * object there is a list of objects of type struct bm_block that 303 * represent each blocks of bitmap in which information is stored. 304 * 305 * struct memory_bitmap contains a pointer to the main list of zone 306 * bitmap objects, a struct bm_position used for browsing the bitmap, 307 * and a pointer to the list of pages used for allocating all of the 308 * zone bitmap objects and bitmap block objects. 309 * 310 * NOTE: It has to be possible to lay out the bitmap in memory 311 * using only allocations of order 0. Additionally, the bitmap is 312 * designed to work with arbitrary number of zones (this is over the 313 * top for now, but let's avoid making unnecessary assumptions ;-). 314 * 315 * struct zone_bitmap contains a pointer to a list of bitmap block 316 * objects and a pointer to the bitmap block object that has been 317 * most recently used for setting bits. Additionally, it contains the 318 * PFNs that correspond to the start and end of the represented zone. 319 * 320 * struct bm_block contains a pointer to the memory page in which 321 * information is stored (in the form of a block of bitmap) 322 * It also contains the pfns that correspond to the start and end of 323 * the represented memory area. 324 * 325 * The memory bitmap is organized as a radix tree to guarantee fast random 326 * access to the bits. There is one radix tree for each zone (as returned 327 * from create_mem_extents). 328 * 329 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 330 * two linked lists for the nodes of the tree, one for the inner nodes and 331 * one for the leave nodes. The linked leave nodes are used for fast linear 332 * access of the memory bitmap. 333 * 334 * The struct rtree_node represents one node of the radix tree. 335 */ 336 337 #define BM_END_OF_MAP (~0UL) 338 339 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 340 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 341 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 342 343 /* 344 * struct rtree_node is a wrapper struct to link the nodes 345 * of the rtree together for easy linear iteration over 346 * bits and easy freeing 347 */ 348 struct rtree_node { 349 struct list_head list; 350 unsigned long *data; 351 }; 352 353 /* 354 * struct mem_zone_bm_rtree represents a bitmap used for one 355 * populated memory zone. 356 */ 357 struct mem_zone_bm_rtree { 358 struct list_head list; /* Link Zones together */ 359 struct list_head nodes; /* Radix Tree inner nodes */ 360 struct list_head leaves; /* Radix Tree leaves */ 361 unsigned long start_pfn; /* Zone start page frame */ 362 unsigned long end_pfn; /* Zone end page frame + 1 */ 363 struct rtree_node *rtree; /* Radix Tree Root */ 364 int levels; /* Number of Radix Tree Levels */ 365 unsigned int blocks; /* Number of Bitmap Blocks */ 366 }; 367 368 /* strcut bm_position is used for browsing memory bitmaps */ 369 370 struct bm_position { 371 struct mem_zone_bm_rtree *zone; 372 struct rtree_node *node; 373 unsigned long node_pfn; 374 int node_bit; 375 }; 376 377 struct memory_bitmap { 378 struct list_head zones; 379 struct linked_page *p_list; /* list of pages used to store zone 380 bitmap objects and bitmap block 381 objects */ 382 struct bm_position cur; /* most recently used bit position */ 383 }; 384 385 /* Functions that operate on memory bitmaps */ 386 387 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 388 #if BITS_PER_LONG == 32 389 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 390 #else 391 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 392 #endif 393 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 394 395 /** 396 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 397 * 398 * This function is used to allocate inner nodes as well as the 399 * leave nodes of the radix tree. It also adds the node to the 400 * corresponding linked list passed in by the *list parameter. 401 */ 402 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 403 struct chain_allocator *ca, 404 struct list_head *list) 405 { 406 struct rtree_node *node; 407 408 node = chain_alloc(ca, sizeof(struct rtree_node)); 409 if (!node) 410 return NULL; 411 412 node->data = get_image_page(gfp_mask, safe_needed); 413 if (!node->data) 414 return NULL; 415 416 list_add_tail(&node->list, list); 417 418 return node; 419 } 420 421 /** 422 * add_rtree_block - Add a new leave node to the radix tree. 423 * 424 * The leave nodes need to be allocated in order to keep the leaves 425 * linked list in order. This is guaranteed by the zone->blocks 426 * counter. 427 */ 428 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 429 int safe_needed, struct chain_allocator *ca) 430 { 431 struct rtree_node *node, *block, **dst; 432 unsigned int levels_needed, block_nr; 433 int i; 434 435 block_nr = zone->blocks; 436 levels_needed = 0; 437 438 /* How many levels do we need for this block nr? */ 439 while (block_nr) { 440 levels_needed += 1; 441 block_nr >>= BM_RTREE_LEVEL_SHIFT; 442 } 443 444 /* Make sure the rtree has enough levels */ 445 for (i = zone->levels; i < levels_needed; i++) { 446 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 447 &zone->nodes); 448 if (!node) 449 return -ENOMEM; 450 451 node->data[0] = (unsigned long)zone->rtree; 452 zone->rtree = node; 453 zone->levels += 1; 454 } 455 456 /* Allocate new block */ 457 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 458 if (!block) 459 return -ENOMEM; 460 461 /* Now walk the rtree to insert the block */ 462 node = zone->rtree; 463 dst = &zone->rtree; 464 block_nr = zone->blocks; 465 for (i = zone->levels; i > 0; i--) { 466 int index; 467 468 if (!node) { 469 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 470 &zone->nodes); 471 if (!node) 472 return -ENOMEM; 473 *dst = node; 474 } 475 476 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 477 index &= BM_RTREE_LEVEL_MASK; 478 dst = (struct rtree_node **)&((*dst)->data[index]); 479 node = *dst; 480 } 481 482 zone->blocks += 1; 483 *dst = block; 484 485 return 0; 486 } 487 488 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 489 int clear_nosave_free); 490 491 /** 492 * create_zone_bm_rtree - Create a radix tree for one zone. 493 * 494 * Allocated the mem_zone_bm_rtree structure and initializes it. 495 * This function also allocated and builds the radix tree for the 496 * zone. 497 */ 498 static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, 499 int safe_needed, 500 struct chain_allocator *ca, 501 unsigned long start, 502 unsigned long end) 503 { 504 struct mem_zone_bm_rtree *zone; 505 unsigned int i, nr_blocks; 506 unsigned long pages; 507 508 pages = end - start; 509 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 510 if (!zone) 511 return NULL; 512 513 INIT_LIST_HEAD(&zone->nodes); 514 INIT_LIST_HEAD(&zone->leaves); 515 zone->start_pfn = start; 516 zone->end_pfn = end; 517 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 518 519 for (i = 0; i < nr_blocks; i++) { 520 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 521 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 522 return NULL; 523 } 524 } 525 526 return zone; 527 } 528 529 /** 530 * free_zone_bm_rtree - Free the memory of the radix tree. 531 * 532 * Free all node pages of the radix tree. The mem_zone_bm_rtree 533 * structure itself is not freed here nor are the rtree_node 534 * structs. 535 */ 536 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 537 int clear_nosave_free) 538 { 539 struct rtree_node *node; 540 541 list_for_each_entry(node, &zone->nodes, list) 542 free_image_page(node->data, clear_nosave_free); 543 544 list_for_each_entry(node, &zone->leaves, list) 545 free_image_page(node->data, clear_nosave_free); 546 } 547 548 static void memory_bm_position_reset(struct memory_bitmap *bm) 549 { 550 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 551 list); 552 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 553 struct rtree_node, list); 554 bm->cur.node_pfn = 0; 555 bm->cur.node_bit = 0; 556 } 557 558 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 559 560 struct mem_extent { 561 struct list_head hook; 562 unsigned long start; 563 unsigned long end; 564 }; 565 566 /** 567 * free_mem_extents - Free a list of memory extents. 568 * @list: List of extents to free. 569 */ 570 static void free_mem_extents(struct list_head *list) 571 { 572 struct mem_extent *ext, *aux; 573 574 list_for_each_entry_safe(ext, aux, list, hook) { 575 list_del(&ext->hook); 576 kfree(ext); 577 } 578 } 579 580 /** 581 * create_mem_extents - Create a list of memory extents. 582 * @list: List to put the extents into. 583 * @gfp_mask: Mask to use for memory allocations. 584 * 585 * The extents represent contiguous ranges of PFNs. 586 */ 587 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 588 { 589 struct zone *zone; 590 591 INIT_LIST_HEAD(list); 592 593 for_each_populated_zone(zone) { 594 unsigned long zone_start, zone_end; 595 struct mem_extent *ext, *cur, *aux; 596 597 zone_start = zone->zone_start_pfn; 598 zone_end = zone_end_pfn(zone); 599 600 list_for_each_entry(ext, list, hook) 601 if (zone_start <= ext->end) 602 break; 603 604 if (&ext->hook == list || zone_end < ext->start) { 605 /* New extent is necessary */ 606 struct mem_extent *new_ext; 607 608 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 609 if (!new_ext) { 610 free_mem_extents(list); 611 return -ENOMEM; 612 } 613 new_ext->start = zone_start; 614 new_ext->end = zone_end; 615 list_add_tail(&new_ext->hook, &ext->hook); 616 continue; 617 } 618 619 /* Merge this zone's range of PFNs with the existing one */ 620 if (zone_start < ext->start) 621 ext->start = zone_start; 622 if (zone_end > ext->end) 623 ext->end = zone_end; 624 625 /* More merging may be possible */ 626 cur = ext; 627 list_for_each_entry_safe_continue(cur, aux, list, hook) { 628 if (zone_end < cur->start) 629 break; 630 if (zone_end < cur->end) 631 ext->end = cur->end; 632 list_del(&cur->hook); 633 kfree(cur); 634 } 635 } 636 637 return 0; 638 } 639 640 /** 641 * memory_bm_create - Allocate memory for a memory bitmap. 642 */ 643 static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, 644 int safe_needed) 645 { 646 struct chain_allocator ca; 647 struct list_head mem_extents; 648 struct mem_extent *ext; 649 int error; 650 651 chain_init(&ca, gfp_mask, safe_needed); 652 INIT_LIST_HEAD(&bm->zones); 653 654 error = create_mem_extents(&mem_extents, gfp_mask); 655 if (error) 656 return error; 657 658 list_for_each_entry(ext, &mem_extents, hook) { 659 struct mem_zone_bm_rtree *zone; 660 661 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 662 ext->start, ext->end); 663 if (!zone) { 664 error = -ENOMEM; 665 goto Error; 666 } 667 list_add_tail(&zone->list, &bm->zones); 668 } 669 670 bm->p_list = ca.chain; 671 memory_bm_position_reset(bm); 672 Exit: 673 free_mem_extents(&mem_extents); 674 return error; 675 676 Error: 677 bm->p_list = ca.chain; 678 memory_bm_free(bm, PG_UNSAFE_CLEAR); 679 goto Exit; 680 } 681 682 /** 683 * memory_bm_free - Free memory occupied by the memory bitmap. 684 * @bm: Memory bitmap. 685 */ 686 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 687 { 688 struct mem_zone_bm_rtree *zone; 689 690 list_for_each_entry(zone, &bm->zones, list) 691 free_zone_bm_rtree(zone, clear_nosave_free); 692 693 free_list_of_pages(bm->p_list, clear_nosave_free); 694 695 INIT_LIST_HEAD(&bm->zones); 696 } 697 698 /** 699 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. 700 * 701 * Find the bit in memory bitmap @bm that corresponds to the given PFN. 702 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. 703 * 704 * Walk the radix tree to find the page containing the bit that represents @pfn 705 * and return the position of the bit in @addr and @bit_nr. 706 */ 707 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 708 void **addr, unsigned int *bit_nr) 709 { 710 struct mem_zone_bm_rtree *curr, *zone; 711 struct rtree_node *node; 712 int i, block_nr; 713 714 zone = bm->cur.zone; 715 716 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 717 goto zone_found; 718 719 zone = NULL; 720 721 /* Find the right zone */ 722 list_for_each_entry(curr, &bm->zones, list) { 723 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 724 zone = curr; 725 break; 726 } 727 } 728 729 if (!zone) 730 return -EFAULT; 731 732 zone_found: 733 /* 734 * We have found the zone. Now walk the radix tree to find the leaf node 735 * for our PFN. 736 */ 737 738 /* 739 * If the zone we wish to scan is the the current zone and the 740 * pfn falls into the current node then we do not need to walk 741 * the tree. 742 */ 743 node = bm->cur.node; 744 if (zone == bm->cur.zone && 745 ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 746 goto node_found; 747 748 node = zone->rtree; 749 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 750 751 for (i = zone->levels; i > 0; i--) { 752 int index; 753 754 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 755 index &= BM_RTREE_LEVEL_MASK; 756 BUG_ON(node->data[index] == 0); 757 node = (struct rtree_node *)node->data[index]; 758 } 759 760 node_found: 761 /* Update last position */ 762 bm->cur.zone = zone; 763 bm->cur.node = node; 764 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 765 766 /* Set return values */ 767 *addr = node->data; 768 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 769 770 return 0; 771 } 772 773 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 774 { 775 void *addr; 776 unsigned int bit; 777 int error; 778 779 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 780 BUG_ON(error); 781 set_bit(bit, addr); 782 } 783 784 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 785 { 786 void *addr; 787 unsigned int bit; 788 int error; 789 790 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 791 if (!error) 792 set_bit(bit, addr); 793 794 return error; 795 } 796 797 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 798 { 799 void *addr; 800 unsigned int bit; 801 int error; 802 803 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 804 BUG_ON(error); 805 clear_bit(bit, addr); 806 } 807 808 static void memory_bm_clear_current(struct memory_bitmap *bm) 809 { 810 int bit; 811 812 bit = max(bm->cur.node_bit - 1, 0); 813 clear_bit(bit, bm->cur.node->data); 814 } 815 816 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 817 { 818 void *addr; 819 unsigned int bit; 820 int error; 821 822 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 823 BUG_ON(error); 824 return test_bit(bit, addr); 825 } 826 827 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 828 { 829 void *addr; 830 unsigned int bit; 831 832 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 833 } 834 835 /* 836 * rtree_next_node - Jump to the next leaf node. 837 * 838 * Set the position to the beginning of the next node in the 839 * memory bitmap. This is either the next node in the current 840 * zone's radix tree or the first node in the radix tree of the 841 * next zone. 842 * 843 * Return true if there is a next node, false otherwise. 844 */ 845 static bool rtree_next_node(struct memory_bitmap *bm) 846 { 847 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { 848 bm->cur.node = list_entry(bm->cur.node->list.next, 849 struct rtree_node, list); 850 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 851 bm->cur.node_bit = 0; 852 touch_softlockup_watchdog(); 853 return true; 854 } 855 856 /* No more nodes, goto next zone */ 857 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { 858 bm->cur.zone = list_entry(bm->cur.zone->list.next, 859 struct mem_zone_bm_rtree, list); 860 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 861 struct rtree_node, list); 862 bm->cur.node_pfn = 0; 863 bm->cur.node_bit = 0; 864 return true; 865 } 866 867 /* No more zones */ 868 return false; 869 } 870 871 /** 872 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. 873 * @bm: Memory bitmap. 874 * 875 * Starting from the last returned position this function searches for the next 876 * set bit in @bm and returns the PFN represented by it. If no more bits are 877 * set, BM_END_OF_MAP is returned. 878 * 879 * It is required to run memory_bm_position_reset() before the first call to 880 * this function for the given memory bitmap. 881 */ 882 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 883 { 884 unsigned long bits, pfn, pages; 885 int bit; 886 887 do { 888 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 889 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 890 bit = find_next_bit(bm->cur.node->data, bits, 891 bm->cur.node_bit); 892 if (bit < bits) { 893 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 894 bm->cur.node_bit = bit + 1; 895 return pfn; 896 } 897 } while (rtree_next_node(bm)); 898 899 return BM_END_OF_MAP; 900 } 901 902 /* 903 * This structure represents a range of page frames the contents of which 904 * should not be saved during hibernation. 905 */ 906 struct nosave_region { 907 struct list_head list; 908 unsigned long start_pfn; 909 unsigned long end_pfn; 910 }; 911 912 static LIST_HEAD(nosave_regions); 913 914 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) 915 { 916 struct rtree_node *node; 917 918 list_for_each_entry(node, &zone->nodes, list) 919 recycle_safe_page(node->data); 920 921 list_for_each_entry(node, &zone->leaves, list) 922 recycle_safe_page(node->data); 923 } 924 925 static void memory_bm_recycle(struct memory_bitmap *bm) 926 { 927 struct mem_zone_bm_rtree *zone; 928 struct linked_page *p_list; 929 930 list_for_each_entry(zone, &bm->zones, list) 931 recycle_zone_bm_rtree(zone); 932 933 p_list = bm->p_list; 934 while (p_list) { 935 struct linked_page *lp = p_list; 936 937 p_list = lp->next; 938 recycle_safe_page(lp); 939 } 940 } 941 942 /** 943 * register_nosave_region - Register a region of unsaveable memory. 944 * 945 * Register a range of page frames the contents of which should not be saved 946 * during hibernation (to be used in the early initialization code). 947 */ 948 void __init __register_nosave_region(unsigned long start_pfn, 949 unsigned long end_pfn, int use_kmalloc) 950 { 951 struct nosave_region *region; 952 953 if (start_pfn >= end_pfn) 954 return; 955 956 if (!list_empty(&nosave_regions)) { 957 /* Try to extend the previous region (they should be sorted) */ 958 region = list_entry(nosave_regions.prev, 959 struct nosave_region, list); 960 if (region->end_pfn == start_pfn) { 961 region->end_pfn = end_pfn; 962 goto Report; 963 } 964 } 965 if (use_kmalloc) { 966 /* During init, this shouldn't fail */ 967 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 968 BUG_ON(!region); 969 } else { 970 /* This allocation cannot fail */ 971 region = memblock_alloc(sizeof(struct nosave_region), 972 SMP_CACHE_BYTES); 973 if (!region) 974 panic("%s: Failed to allocate %zu bytes\n", __func__, 975 sizeof(struct nosave_region)); 976 } 977 region->start_pfn = start_pfn; 978 region->end_pfn = end_pfn; 979 list_add_tail(®ion->list, &nosave_regions); 980 Report: 981 pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n", 982 (unsigned long long) start_pfn << PAGE_SHIFT, 983 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 984 } 985 986 /* 987 * Set bits in this map correspond to the page frames the contents of which 988 * should not be saved during the suspend. 989 */ 990 static struct memory_bitmap *forbidden_pages_map; 991 992 /* Set bits in this map correspond to free page frames. */ 993 static struct memory_bitmap *free_pages_map; 994 995 /* 996 * Each page frame allocated for creating the image is marked by setting the 997 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 998 */ 999 1000 void swsusp_set_page_free(struct page *page) 1001 { 1002 if (free_pages_map) 1003 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 1004 } 1005 1006 static int swsusp_page_is_free(struct page *page) 1007 { 1008 return free_pages_map ? 1009 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 1010 } 1011 1012 void swsusp_unset_page_free(struct page *page) 1013 { 1014 if (free_pages_map) 1015 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 1016 } 1017 1018 static void swsusp_set_page_forbidden(struct page *page) 1019 { 1020 if (forbidden_pages_map) 1021 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 1022 } 1023 1024 int swsusp_page_is_forbidden(struct page *page) 1025 { 1026 return forbidden_pages_map ? 1027 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 1028 } 1029 1030 static void swsusp_unset_page_forbidden(struct page *page) 1031 { 1032 if (forbidden_pages_map) 1033 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 1034 } 1035 1036 /** 1037 * mark_nosave_pages - Mark pages that should not be saved. 1038 * @bm: Memory bitmap. 1039 * 1040 * Set the bits in @bm that correspond to the page frames the contents of which 1041 * should not be saved. 1042 */ 1043 static void mark_nosave_pages(struct memory_bitmap *bm) 1044 { 1045 struct nosave_region *region; 1046 1047 if (list_empty(&nosave_regions)) 1048 return; 1049 1050 list_for_each_entry(region, &nosave_regions, list) { 1051 unsigned long pfn; 1052 1053 pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", 1054 (unsigned long long) region->start_pfn << PAGE_SHIFT, 1055 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 1056 - 1); 1057 1058 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 1059 if (pfn_valid(pfn)) { 1060 /* 1061 * It is safe to ignore the result of 1062 * mem_bm_set_bit_check() here, since we won't 1063 * touch the PFNs for which the error is 1064 * returned anyway. 1065 */ 1066 mem_bm_set_bit_check(bm, pfn); 1067 } 1068 } 1069 } 1070 1071 /** 1072 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. 1073 * 1074 * Create bitmaps needed for marking page frames that should not be saved and 1075 * free page frames. The forbidden_pages_map and free_pages_map pointers are 1076 * only modified if everything goes well, because we don't want the bits to be 1077 * touched before both bitmaps are set up. 1078 */ 1079 int create_basic_memory_bitmaps(void) 1080 { 1081 struct memory_bitmap *bm1, *bm2; 1082 int error = 0; 1083 1084 if (forbidden_pages_map && free_pages_map) 1085 return 0; 1086 else 1087 BUG_ON(forbidden_pages_map || free_pages_map); 1088 1089 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1090 if (!bm1) 1091 return -ENOMEM; 1092 1093 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1094 if (error) 1095 goto Free_first_object; 1096 1097 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1098 if (!bm2) 1099 goto Free_first_bitmap; 1100 1101 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1102 if (error) 1103 goto Free_second_object; 1104 1105 forbidden_pages_map = bm1; 1106 free_pages_map = bm2; 1107 mark_nosave_pages(forbidden_pages_map); 1108 1109 pr_debug("Basic memory bitmaps created\n"); 1110 1111 return 0; 1112 1113 Free_second_object: 1114 kfree(bm2); 1115 Free_first_bitmap: 1116 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1117 Free_first_object: 1118 kfree(bm1); 1119 return -ENOMEM; 1120 } 1121 1122 /** 1123 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. 1124 * 1125 * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The 1126 * auxiliary pointers are necessary so that the bitmaps themselves are not 1127 * referred to while they are being freed. 1128 */ 1129 void free_basic_memory_bitmaps(void) 1130 { 1131 struct memory_bitmap *bm1, *bm2; 1132 1133 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1134 return; 1135 1136 bm1 = forbidden_pages_map; 1137 bm2 = free_pages_map; 1138 forbidden_pages_map = NULL; 1139 free_pages_map = NULL; 1140 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1141 kfree(bm1); 1142 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1143 kfree(bm2); 1144 1145 pr_debug("Basic memory bitmaps freed\n"); 1146 } 1147 1148 void clear_free_pages(void) 1149 { 1150 #ifdef CONFIG_PAGE_POISONING_ZERO 1151 struct memory_bitmap *bm = free_pages_map; 1152 unsigned long pfn; 1153 1154 if (WARN_ON(!(free_pages_map))) 1155 return; 1156 1157 memory_bm_position_reset(bm); 1158 pfn = memory_bm_next_pfn(bm); 1159 while (pfn != BM_END_OF_MAP) { 1160 if (pfn_valid(pfn)) 1161 clear_highpage(pfn_to_page(pfn)); 1162 1163 pfn = memory_bm_next_pfn(bm); 1164 } 1165 memory_bm_position_reset(bm); 1166 pr_info("free pages cleared after restore\n"); 1167 #endif /* PAGE_POISONING_ZERO */ 1168 } 1169 1170 /** 1171 * snapshot_additional_pages - Estimate the number of extra pages needed. 1172 * @zone: Memory zone to carry out the computation for. 1173 * 1174 * Estimate the number of additional pages needed for setting up a hibernation 1175 * image data structures for @zone (usually, the returned value is greater than 1176 * the exact number). 1177 */ 1178 unsigned int snapshot_additional_pages(struct zone *zone) 1179 { 1180 unsigned int rtree, nodes; 1181 1182 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1183 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1184 LINKED_PAGE_DATA_SIZE); 1185 while (nodes > 1) { 1186 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1187 rtree += nodes; 1188 } 1189 1190 return 2 * rtree; 1191 } 1192 1193 #ifdef CONFIG_HIGHMEM 1194 /** 1195 * count_free_highmem_pages - Compute the total number of free highmem pages. 1196 * 1197 * The returned number is system-wide. 1198 */ 1199 static unsigned int count_free_highmem_pages(void) 1200 { 1201 struct zone *zone; 1202 unsigned int cnt = 0; 1203 1204 for_each_populated_zone(zone) 1205 if (is_highmem(zone)) 1206 cnt += zone_page_state(zone, NR_FREE_PAGES); 1207 1208 return cnt; 1209 } 1210 1211 /** 1212 * saveable_highmem_page - Check if a highmem page is saveable. 1213 * 1214 * Determine whether a highmem page should be included in a hibernation image. 1215 * 1216 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1217 * and it isn't part of a free chunk of pages. 1218 */ 1219 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1220 { 1221 struct page *page; 1222 1223 if (!pfn_valid(pfn)) 1224 return NULL; 1225 1226 page = pfn_to_online_page(pfn); 1227 if (!page || page_zone(page) != zone) 1228 return NULL; 1229 1230 BUG_ON(!PageHighMem(page)); 1231 1232 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1233 return NULL; 1234 1235 if (PageReserved(page) || PageOffline(page)) 1236 return NULL; 1237 1238 if (page_is_guard(page)) 1239 return NULL; 1240 1241 return page; 1242 } 1243 1244 /** 1245 * count_highmem_pages - Compute the total number of saveable highmem pages. 1246 */ 1247 static unsigned int count_highmem_pages(void) 1248 { 1249 struct zone *zone; 1250 unsigned int n = 0; 1251 1252 for_each_populated_zone(zone) { 1253 unsigned long pfn, max_zone_pfn; 1254 1255 if (!is_highmem(zone)) 1256 continue; 1257 1258 mark_free_pages(zone); 1259 max_zone_pfn = zone_end_pfn(zone); 1260 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1261 if (saveable_highmem_page(zone, pfn)) 1262 n++; 1263 } 1264 return n; 1265 } 1266 #else 1267 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1268 { 1269 return NULL; 1270 } 1271 #endif /* CONFIG_HIGHMEM */ 1272 1273 /** 1274 * saveable_page - Check if the given page is saveable. 1275 * 1276 * Determine whether a non-highmem page should be included in a hibernation 1277 * image. 1278 * 1279 * We should save the page if it isn't Nosave, and is not in the range 1280 * of pages statically defined as 'unsaveable', and it isn't part of 1281 * a free chunk of pages. 1282 */ 1283 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1284 { 1285 struct page *page; 1286 1287 if (!pfn_valid(pfn)) 1288 return NULL; 1289 1290 page = pfn_to_online_page(pfn); 1291 if (!page || page_zone(page) != zone) 1292 return NULL; 1293 1294 BUG_ON(PageHighMem(page)); 1295 1296 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1297 return NULL; 1298 1299 if (PageOffline(page)) 1300 return NULL; 1301 1302 if (PageReserved(page) 1303 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1304 return NULL; 1305 1306 if (page_is_guard(page)) 1307 return NULL; 1308 1309 return page; 1310 } 1311 1312 /** 1313 * count_data_pages - Compute the total number of saveable non-highmem pages. 1314 */ 1315 static unsigned int count_data_pages(void) 1316 { 1317 struct zone *zone; 1318 unsigned long pfn, max_zone_pfn; 1319 unsigned int n = 0; 1320 1321 for_each_populated_zone(zone) { 1322 if (is_highmem(zone)) 1323 continue; 1324 1325 mark_free_pages(zone); 1326 max_zone_pfn = zone_end_pfn(zone); 1327 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1328 if (saveable_page(zone, pfn)) 1329 n++; 1330 } 1331 return n; 1332 } 1333 1334 /* 1335 * This is needed, because copy_page and memcpy are not usable for copying 1336 * task structs. 1337 */ 1338 static inline void do_copy_page(long *dst, long *src) 1339 { 1340 int n; 1341 1342 for (n = PAGE_SIZE / sizeof(long); n; n--) 1343 *dst++ = *src++; 1344 } 1345 1346 /** 1347 * safe_copy_page - Copy a page in a safe way. 1348 * 1349 * Check if the page we are going to copy is marked as present in the kernel 1350 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or 1351 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() 1352 * always returns 'true'. 1353 */ 1354 static void safe_copy_page(void *dst, struct page *s_page) 1355 { 1356 if (kernel_page_present(s_page)) { 1357 do_copy_page(dst, page_address(s_page)); 1358 } else { 1359 kernel_map_pages(s_page, 1, 1); 1360 do_copy_page(dst, page_address(s_page)); 1361 kernel_map_pages(s_page, 1, 0); 1362 } 1363 } 1364 1365 #ifdef CONFIG_HIGHMEM 1366 static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) 1367 { 1368 return is_highmem(zone) ? 1369 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1370 } 1371 1372 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1373 { 1374 struct page *s_page, *d_page; 1375 void *src, *dst; 1376 1377 s_page = pfn_to_page(src_pfn); 1378 d_page = pfn_to_page(dst_pfn); 1379 if (PageHighMem(s_page)) { 1380 src = kmap_atomic(s_page); 1381 dst = kmap_atomic(d_page); 1382 do_copy_page(dst, src); 1383 kunmap_atomic(dst); 1384 kunmap_atomic(src); 1385 } else { 1386 if (PageHighMem(d_page)) { 1387 /* 1388 * The page pointed to by src may contain some kernel 1389 * data modified by kmap_atomic() 1390 */ 1391 safe_copy_page(buffer, s_page); 1392 dst = kmap_atomic(d_page); 1393 copy_page(dst, buffer); 1394 kunmap_atomic(dst); 1395 } else { 1396 safe_copy_page(page_address(d_page), s_page); 1397 } 1398 } 1399 } 1400 #else 1401 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1402 1403 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1404 { 1405 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1406 pfn_to_page(src_pfn)); 1407 } 1408 #endif /* CONFIG_HIGHMEM */ 1409 1410 static void copy_data_pages(struct memory_bitmap *copy_bm, 1411 struct memory_bitmap *orig_bm) 1412 { 1413 struct zone *zone; 1414 unsigned long pfn; 1415 1416 for_each_populated_zone(zone) { 1417 unsigned long max_zone_pfn; 1418 1419 mark_free_pages(zone); 1420 max_zone_pfn = zone_end_pfn(zone); 1421 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1422 if (page_is_saveable(zone, pfn)) 1423 memory_bm_set_bit(orig_bm, pfn); 1424 } 1425 memory_bm_position_reset(orig_bm); 1426 memory_bm_position_reset(copy_bm); 1427 for(;;) { 1428 pfn = memory_bm_next_pfn(orig_bm); 1429 if (unlikely(pfn == BM_END_OF_MAP)) 1430 break; 1431 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1432 } 1433 } 1434 1435 /* Total number of image pages */ 1436 static unsigned int nr_copy_pages; 1437 /* Number of pages needed for saving the original pfns of the image pages */ 1438 static unsigned int nr_meta_pages; 1439 /* 1440 * Numbers of normal and highmem page frames allocated for hibernation image 1441 * before suspending devices. 1442 */ 1443 static unsigned int alloc_normal, alloc_highmem; 1444 /* 1445 * Memory bitmap used for marking saveable pages (during hibernation) or 1446 * hibernation image pages (during restore) 1447 */ 1448 static struct memory_bitmap orig_bm; 1449 /* 1450 * Memory bitmap used during hibernation for marking allocated page frames that 1451 * will contain copies of saveable pages. During restore it is initially used 1452 * for marking hibernation image pages, but then the set bits from it are 1453 * duplicated in @orig_bm and it is released. On highmem systems it is next 1454 * used for marking "safe" highmem pages, but it has to be reinitialized for 1455 * this purpose. 1456 */ 1457 static struct memory_bitmap copy_bm; 1458 1459 /** 1460 * swsusp_free - Free pages allocated for hibernation image. 1461 * 1462 * Image pages are alocated before snapshot creation, so they need to be 1463 * released after resume. 1464 */ 1465 void swsusp_free(void) 1466 { 1467 unsigned long fb_pfn, fr_pfn; 1468 1469 if (!forbidden_pages_map || !free_pages_map) 1470 goto out; 1471 1472 memory_bm_position_reset(forbidden_pages_map); 1473 memory_bm_position_reset(free_pages_map); 1474 1475 loop: 1476 fr_pfn = memory_bm_next_pfn(free_pages_map); 1477 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1478 1479 /* 1480 * Find the next bit set in both bitmaps. This is guaranteed to 1481 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1482 */ 1483 do { 1484 if (fb_pfn < fr_pfn) 1485 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1486 if (fr_pfn < fb_pfn) 1487 fr_pfn = memory_bm_next_pfn(free_pages_map); 1488 } while (fb_pfn != fr_pfn); 1489 1490 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1491 struct page *page = pfn_to_page(fr_pfn); 1492 1493 memory_bm_clear_current(forbidden_pages_map); 1494 memory_bm_clear_current(free_pages_map); 1495 hibernate_restore_unprotect_page(page_address(page)); 1496 __free_page(page); 1497 goto loop; 1498 } 1499 1500 out: 1501 nr_copy_pages = 0; 1502 nr_meta_pages = 0; 1503 restore_pblist = NULL; 1504 buffer = NULL; 1505 alloc_normal = 0; 1506 alloc_highmem = 0; 1507 hibernate_restore_protection_end(); 1508 } 1509 1510 /* Helper functions used for the shrinking of memory. */ 1511 1512 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1513 1514 /** 1515 * preallocate_image_pages - Allocate a number of pages for hibernation image. 1516 * @nr_pages: Number of page frames to allocate. 1517 * @mask: GFP flags to use for the allocation. 1518 * 1519 * Return value: Number of page frames actually allocated 1520 */ 1521 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1522 { 1523 unsigned long nr_alloc = 0; 1524 1525 while (nr_pages > 0) { 1526 struct page *page; 1527 1528 page = alloc_image_page(mask); 1529 if (!page) 1530 break; 1531 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1532 if (PageHighMem(page)) 1533 alloc_highmem++; 1534 else 1535 alloc_normal++; 1536 nr_pages--; 1537 nr_alloc++; 1538 } 1539 1540 return nr_alloc; 1541 } 1542 1543 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1544 unsigned long avail_normal) 1545 { 1546 unsigned long alloc; 1547 1548 if (avail_normal <= alloc_normal) 1549 return 0; 1550 1551 alloc = avail_normal - alloc_normal; 1552 if (nr_pages < alloc) 1553 alloc = nr_pages; 1554 1555 return preallocate_image_pages(alloc, GFP_IMAGE); 1556 } 1557 1558 #ifdef CONFIG_HIGHMEM 1559 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1560 { 1561 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1562 } 1563 1564 /** 1565 * __fraction - Compute (an approximation of) x * (multiplier / base). 1566 */ 1567 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1568 { 1569 x *= multiplier; 1570 do_div(x, base); 1571 return (unsigned long)x; 1572 } 1573 1574 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1575 unsigned long highmem, 1576 unsigned long total) 1577 { 1578 unsigned long alloc = __fraction(nr_pages, highmem, total); 1579 1580 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1581 } 1582 #else /* CONFIG_HIGHMEM */ 1583 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1584 { 1585 return 0; 1586 } 1587 1588 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1589 unsigned long highmem, 1590 unsigned long total) 1591 { 1592 return 0; 1593 } 1594 #endif /* CONFIG_HIGHMEM */ 1595 1596 /** 1597 * free_unnecessary_pages - Release preallocated pages not needed for the image. 1598 */ 1599 static unsigned long free_unnecessary_pages(void) 1600 { 1601 unsigned long save, to_free_normal, to_free_highmem, free; 1602 1603 save = count_data_pages(); 1604 if (alloc_normal >= save) { 1605 to_free_normal = alloc_normal - save; 1606 save = 0; 1607 } else { 1608 to_free_normal = 0; 1609 save -= alloc_normal; 1610 } 1611 save += count_highmem_pages(); 1612 if (alloc_highmem >= save) { 1613 to_free_highmem = alloc_highmem - save; 1614 } else { 1615 to_free_highmem = 0; 1616 save -= alloc_highmem; 1617 if (to_free_normal > save) 1618 to_free_normal -= save; 1619 else 1620 to_free_normal = 0; 1621 } 1622 free = to_free_normal + to_free_highmem; 1623 1624 memory_bm_position_reset(©_bm); 1625 1626 while (to_free_normal > 0 || to_free_highmem > 0) { 1627 unsigned long pfn = memory_bm_next_pfn(©_bm); 1628 struct page *page = pfn_to_page(pfn); 1629 1630 if (PageHighMem(page)) { 1631 if (!to_free_highmem) 1632 continue; 1633 to_free_highmem--; 1634 alloc_highmem--; 1635 } else { 1636 if (!to_free_normal) 1637 continue; 1638 to_free_normal--; 1639 alloc_normal--; 1640 } 1641 memory_bm_clear_bit(©_bm, pfn); 1642 swsusp_unset_page_forbidden(page); 1643 swsusp_unset_page_free(page); 1644 __free_page(page); 1645 } 1646 1647 return free; 1648 } 1649 1650 /** 1651 * minimum_image_size - Estimate the minimum acceptable size of an image. 1652 * @saveable: Number of saveable pages in the system. 1653 * 1654 * We want to avoid attempting to free too much memory too hard, so estimate the 1655 * minimum acceptable size of a hibernation image to use as the lower limit for 1656 * preallocating memory. 1657 * 1658 * We assume that the minimum image size should be proportional to 1659 * 1660 * [number of saveable pages] - [number of pages that can be freed in theory] 1661 * 1662 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1663 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. 1664 */ 1665 static unsigned long minimum_image_size(unsigned long saveable) 1666 { 1667 unsigned long size; 1668 1669 size = global_node_page_state(NR_SLAB_RECLAIMABLE) 1670 + global_node_page_state(NR_ACTIVE_ANON) 1671 + global_node_page_state(NR_INACTIVE_ANON) 1672 + global_node_page_state(NR_ACTIVE_FILE) 1673 + global_node_page_state(NR_INACTIVE_FILE); 1674 1675 return saveable <= size ? 0 : saveable - size; 1676 } 1677 1678 /** 1679 * hibernate_preallocate_memory - Preallocate memory for hibernation image. 1680 * 1681 * To create a hibernation image it is necessary to make a copy of every page 1682 * frame in use. We also need a number of page frames to be free during 1683 * hibernation for allocations made while saving the image and for device 1684 * drivers, in case they need to allocate memory from their hibernation 1685 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1686 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through 1687 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1688 * total number of available page frames and allocate at least 1689 * 1690 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1691 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1692 * 1693 * of them, which corresponds to the maximum size of a hibernation image. 1694 * 1695 * If image_size is set below the number following from the above formula, 1696 * the preallocation of memory is continued until the total number of saveable 1697 * pages in the system is below the requested image size or the minimum 1698 * acceptable image size returned by minimum_image_size(), whichever is greater. 1699 */ 1700 int hibernate_preallocate_memory(void) 1701 { 1702 struct zone *zone; 1703 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1704 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1705 ktime_t start, stop; 1706 int error; 1707 1708 pr_info("Preallocating image memory... "); 1709 start = ktime_get(); 1710 1711 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1712 if (error) 1713 goto err_out; 1714 1715 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1716 if (error) 1717 goto err_out; 1718 1719 alloc_normal = 0; 1720 alloc_highmem = 0; 1721 1722 /* Count the number of saveable data pages. */ 1723 save_highmem = count_highmem_pages(); 1724 saveable = count_data_pages(); 1725 1726 /* 1727 * Compute the total number of page frames we can use (count) and the 1728 * number of pages needed for image metadata (size). 1729 */ 1730 count = saveable; 1731 saveable += save_highmem; 1732 highmem = save_highmem; 1733 size = 0; 1734 for_each_populated_zone(zone) { 1735 size += snapshot_additional_pages(zone); 1736 if (is_highmem(zone)) 1737 highmem += zone_page_state(zone, NR_FREE_PAGES); 1738 else 1739 count += zone_page_state(zone, NR_FREE_PAGES); 1740 } 1741 avail_normal = count; 1742 count += highmem; 1743 count -= totalreserve_pages; 1744 1745 /* Add number of pages required for page keys (s390 only). */ 1746 size += page_key_additional_pages(saveable); 1747 1748 /* Compute the maximum number of saveable pages to leave in memory. */ 1749 max_size = (count - (size + PAGES_FOR_IO)) / 2 1750 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1751 /* Compute the desired number of image pages specified by image_size. */ 1752 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1753 if (size > max_size) 1754 size = max_size; 1755 /* 1756 * If the desired number of image pages is at least as large as the 1757 * current number of saveable pages in memory, allocate page frames for 1758 * the image and we're done. 1759 */ 1760 if (size >= saveable) { 1761 pages = preallocate_image_highmem(save_highmem); 1762 pages += preallocate_image_memory(saveable - pages, avail_normal); 1763 goto out; 1764 } 1765 1766 /* Estimate the minimum size of the image. */ 1767 pages = minimum_image_size(saveable); 1768 /* 1769 * To avoid excessive pressure on the normal zone, leave room in it to 1770 * accommodate an image of the minimum size (unless it's already too 1771 * small, in which case don't preallocate pages from it at all). 1772 */ 1773 if (avail_normal > pages) 1774 avail_normal -= pages; 1775 else 1776 avail_normal = 0; 1777 if (size < pages) 1778 size = min_t(unsigned long, pages, max_size); 1779 1780 /* 1781 * Let the memory management subsystem know that we're going to need a 1782 * large number of page frames to allocate and make it free some memory. 1783 * NOTE: If this is not done, performance will be hurt badly in some 1784 * test cases. 1785 */ 1786 shrink_all_memory(saveable - size); 1787 1788 /* 1789 * The number of saveable pages in memory was too high, so apply some 1790 * pressure to decrease it. First, make room for the largest possible 1791 * image and fail if that doesn't work. Next, try to decrease the size 1792 * of the image as much as indicated by 'size' using allocations from 1793 * highmem and non-highmem zones separately. 1794 */ 1795 pages_highmem = preallocate_image_highmem(highmem / 2); 1796 alloc = count - max_size; 1797 if (alloc > pages_highmem) 1798 alloc -= pages_highmem; 1799 else 1800 alloc = 0; 1801 pages = preallocate_image_memory(alloc, avail_normal); 1802 if (pages < alloc) { 1803 /* We have exhausted non-highmem pages, try highmem. */ 1804 alloc -= pages; 1805 pages += pages_highmem; 1806 pages_highmem = preallocate_image_highmem(alloc); 1807 if (pages_highmem < alloc) 1808 goto err_out; 1809 pages += pages_highmem; 1810 /* 1811 * size is the desired number of saveable pages to leave in 1812 * memory, so try to preallocate (all memory - size) pages. 1813 */ 1814 alloc = (count - pages) - size; 1815 pages += preallocate_image_highmem(alloc); 1816 } else { 1817 /* 1818 * There are approximately max_size saveable pages at this point 1819 * and we want to reduce this number down to size. 1820 */ 1821 alloc = max_size - size; 1822 size = preallocate_highmem_fraction(alloc, highmem, count); 1823 pages_highmem += size; 1824 alloc -= size; 1825 size = preallocate_image_memory(alloc, avail_normal); 1826 pages_highmem += preallocate_image_highmem(alloc - size); 1827 pages += pages_highmem + size; 1828 } 1829 1830 /* 1831 * We only need as many page frames for the image as there are saveable 1832 * pages in memory, but we have allocated more. Release the excessive 1833 * ones now. 1834 */ 1835 pages -= free_unnecessary_pages(); 1836 1837 out: 1838 stop = ktime_get(); 1839 pr_cont("done (allocated %lu pages)\n", pages); 1840 swsusp_show_speed(start, stop, pages, "Allocated"); 1841 1842 return 0; 1843 1844 err_out: 1845 pr_cont("\n"); 1846 swsusp_free(); 1847 return -ENOMEM; 1848 } 1849 1850 #ifdef CONFIG_HIGHMEM 1851 /** 1852 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. 1853 * 1854 * Compute the number of non-highmem pages that will be necessary for creating 1855 * copies of highmem pages. 1856 */ 1857 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1858 { 1859 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1860 1861 if (free_highmem >= nr_highmem) 1862 nr_highmem = 0; 1863 else 1864 nr_highmem -= free_highmem; 1865 1866 return nr_highmem; 1867 } 1868 #else 1869 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1870 #endif /* CONFIG_HIGHMEM */ 1871 1872 /** 1873 * enough_free_mem - Check if there is enough free memory for the image. 1874 */ 1875 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1876 { 1877 struct zone *zone; 1878 unsigned int free = alloc_normal; 1879 1880 for_each_populated_zone(zone) 1881 if (!is_highmem(zone)) 1882 free += zone_page_state(zone, NR_FREE_PAGES); 1883 1884 nr_pages += count_pages_for_highmem(nr_highmem); 1885 pr_debug("Normal pages needed: %u + %u, available pages: %u\n", 1886 nr_pages, PAGES_FOR_IO, free); 1887 1888 return free > nr_pages + PAGES_FOR_IO; 1889 } 1890 1891 #ifdef CONFIG_HIGHMEM 1892 /** 1893 * get_highmem_buffer - Allocate a buffer for highmem pages. 1894 * 1895 * If there are some highmem pages in the hibernation image, we may need a 1896 * buffer to copy them and/or load their data. 1897 */ 1898 static inline int get_highmem_buffer(int safe_needed) 1899 { 1900 buffer = get_image_page(GFP_ATOMIC, safe_needed); 1901 return buffer ? 0 : -ENOMEM; 1902 } 1903 1904 /** 1905 * alloc_highmem_image_pages - Allocate some highmem pages for the image. 1906 * 1907 * Try to allocate as many pages as needed, but if the number of free highmem 1908 * pages is less than that, allocate them all. 1909 */ 1910 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1911 unsigned int nr_highmem) 1912 { 1913 unsigned int to_alloc = count_free_highmem_pages(); 1914 1915 if (to_alloc > nr_highmem) 1916 to_alloc = nr_highmem; 1917 1918 nr_highmem -= to_alloc; 1919 while (to_alloc-- > 0) { 1920 struct page *page; 1921 1922 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); 1923 memory_bm_set_bit(bm, page_to_pfn(page)); 1924 } 1925 return nr_highmem; 1926 } 1927 #else 1928 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1929 1930 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1931 unsigned int n) { return 0; } 1932 #endif /* CONFIG_HIGHMEM */ 1933 1934 /** 1935 * swsusp_alloc - Allocate memory for hibernation image. 1936 * 1937 * We first try to allocate as many highmem pages as there are 1938 * saveable highmem pages in the system. If that fails, we allocate 1939 * non-highmem pages for the copies of the remaining highmem ones. 1940 * 1941 * In this approach it is likely that the copies of highmem pages will 1942 * also be located in the high memory, because of the way in which 1943 * copy_data_pages() works. 1944 */ 1945 static int swsusp_alloc(struct memory_bitmap *copy_bm, 1946 unsigned int nr_pages, unsigned int nr_highmem) 1947 { 1948 if (nr_highmem > 0) { 1949 if (get_highmem_buffer(PG_ANY)) 1950 goto err_out; 1951 if (nr_highmem > alloc_highmem) { 1952 nr_highmem -= alloc_highmem; 1953 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1954 } 1955 } 1956 if (nr_pages > alloc_normal) { 1957 nr_pages -= alloc_normal; 1958 while (nr_pages-- > 0) { 1959 struct page *page; 1960 1961 page = alloc_image_page(GFP_ATOMIC); 1962 if (!page) 1963 goto err_out; 1964 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1965 } 1966 } 1967 1968 return 0; 1969 1970 err_out: 1971 swsusp_free(); 1972 return -ENOMEM; 1973 } 1974 1975 asmlinkage __visible int swsusp_save(void) 1976 { 1977 unsigned int nr_pages, nr_highmem; 1978 1979 pr_info("Creating hibernation image:\n"); 1980 1981 drain_local_pages(NULL); 1982 nr_pages = count_data_pages(); 1983 nr_highmem = count_highmem_pages(); 1984 pr_info("Need to copy %u pages\n", nr_pages + nr_highmem); 1985 1986 if (!enough_free_mem(nr_pages, nr_highmem)) { 1987 pr_err("Not enough free memory\n"); 1988 return -ENOMEM; 1989 } 1990 1991 if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { 1992 pr_err("Memory allocation failed\n"); 1993 return -ENOMEM; 1994 } 1995 1996 /* 1997 * During allocating of suspend pagedir, new cold pages may appear. 1998 * Kill them. 1999 */ 2000 drain_local_pages(NULL); 2001 copy_data_pages(©_bm, &orig_bm); 2002 2003 /* 2004 * End of critical section. From now on, we can write to memory, 2005 * but we should not touch disk. This specially means we must _not_ 2006 * touch swap space! Except we must write out our image of course. 2007 */ 2008 2009 nr_pages += nr_highmem; 2010 nr_copy_pages = nr_pages; 2011 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 2012 2013 pr_info("Hibernation image created (%d pages copied)\n", nr_pages); 2014 2015 return 0; 2016 } 2017 2018 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 2019 static int init_header_complete(struct swsusp_info *info) 2020 { 2021 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 2022 info->version_code = LINUX_VERSION_CODE; 2023 return 0; 2024 } 2025 2026 static char *check_image_kernel(struct swsusp_info *info) 2027 { 2028 if (info->version_code != LINUX_VERSION_CODE) 2029 return "kernel version"; 2030 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 2031 return "system type"; 2032 if (strcmp(info->uts.release,init_utsname()->release)) 2033 return "kernel release"; 2034 if (strcmp(info->uts.version,init_utsname()->version)) 2035 return "version"; 2036 if (strcmp(info->uts.machine,init_utsname()->machine)) 2037 return "machine"; 2038 return NULL; 2039 } 2040 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 2041 2042 unsigned long snapshot_get_image_size(void) 2043 { 2044 return nr_copy_pages + nr_meta_pages + 1; 2045 } 2046 2047 static int init_header(struct swsusp_info *info) 2048 { 2049 memset(info, 0, sizeof(struct swsusp_info)); 2050 info->num_physpages = get_num_physpages(); 2051 info->image_pages = nr_copy_pages; 2052 info->pages = snapshot_get_image_size(); 2053 info->size = info->pages; 2054 info->size <<= PAGE_SHIFT; 2055 return init_header_complete(info); 2056 } 2057 2058 /** 2059 * pack_pfns - Prepare PFNs for saving. 2060 * @bm: Memory bitmap. 2061 * @buf: Memory buffer to store the PFNs in. 2062 * 2063 * PFNs corresponding to set bits in @bm are stored in the area of memory 2064 * pointed to by @buf (1 page at a time). 2065 */ 2066 static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 2067 { 2068 int j; 2069 2070 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2071 buf[j] = memory_bm_next_pfn(bm); 2072 if (unlikely(buf[j] == BM_END_OF_MAP)) 2073 break; 2074 /* Save page key for data page (s390 only). */ 2075 page_key_read(buf + j); 2076 } 2077 } 2078 2079 /** 2080 * snapshot_read_next - Get the address to read the next image page from. 2081 * @handle: Snapshot handle to be used for the reading. 2082 * 2083 * On the first call, @handle should point to a zeroed snapshot_handle 2084 * structure. The structure gets populated then and a pointer to it should be 2085 * passed to this function every next time. 2086 * 2087 * On success, the function returns a positive number. Then, the caller 2088 * is allowed to read up to the returned number of bytes from the memory 2089 * location computed by the data_of() macro. 2090 * 2091 * The function returns 0 to indicate the end of the data stream condition, 2092 * and negative numbers are returned on errors. If that happens, the structure 2093 * pointed to by @handle is not updated and should not be used any more. 2094 */ 2095 int snapshot_read_next(struct snapshot_handle *handle) 2096 { 2097 if (handle->cur > nr_meta_pages + nr_copy_pages) 2098 return 0; 2099 2100 if (!buffer) { 2101 /* This makes the buffer be freed by swsusp_free() */ 2102 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2103 if (!buffer) 2104 return -ENOMEM; 2105 } 2106 if (!handle->cur) { 2107 int error; 2108 2109 error = init_header((struct swsusp_info *)buffer); 2110 if (error) 2111 return error; 2112 handle->buffer = buffer; 2113 memory_bm_position_reset(&orig_bm); 2114 memory_bm_position_reset(©_bm); 2115 } else if (handle->cur <= nr_meta_pages) { 2116 clear_page(buffer); 2117 pack_pfns(buffer, &orig_bm); 2118 } else { 2119 struct page *page; 2120 2121 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 2122 if (PageHighMem(page)) { 2123 /* 2124 * Highmem pages are copied to the buffer, 2125 * because we can't return with a kmapped 2126 * highmem page (we may not be called again). 2127 */ 2128 void *kaddr; 2129 2130 kaddr = kmap_atomic(page); 2131 copy_page(buffer, kaddr); 2132 kunmap_atomic(kaddr); 2133 handle->buffer = buffer; 2134 } else { 2135 handle->buffer = page_address(page); 2136 } 2137 } 2138 handle->cur++; 2139 return PAGE_SIZE; 2140 } 2141 2142 static void duplicate_memory_bitmap(struct memory_bitmap *dst, 2143 struct memory_bitmap *src) 2144 { 2145 unsigned long pfn; 2146 2147 memory_bm_position_reset(src); 2148 pfn = memory_bm_next_pfn(src); 2149 while (pfn != BM_END_OF_MAP) { 2150 memory_bm_set_bit(dst, pfn); 2151 pfn = memory_bm_next_pfn(src); 2152 } 2153 } 2154 2155 /** 2156 * mark_unsafe_pages - Mark pages that were used before hibernation. 2157 * 2158 * Mark the pages that cannot be used for storing the image during restoration, 2159 * because they conflict with the pages that had been used before hibernation. 2160 */ 2161 static void mark_unsafe_pages(struct memory_bitmap *bm) 2162 { 2163 unsigned long pfn; 2164 2165 /* Clear the "free"/"unsafe" bit for all PFNs */ 2166 memory_bm_position_reset(free_pages_map); 2167 pfn = memory_bm_next_pfn(free_pages_map); 2168 while (pfn != BM_END_OF_MAP) { 2169 memory_bm_clear_current(free_pages_map); 2170 pfn = memory_bm_next_pfn(free_pages_map); 2171 } 2172 2173 /* Mark pages that correspond to the "original" PFNs as "unsafe" */ 2174 duplicate_memory_bitmap(free_pages_map, bm); 2175 2176 allocated_unsafe_pages = 0; 2177 } 2178 2179 static int check_header(struct swsusp_info *info) 2180 { 2181 char *reason; 2182 2183 reason = check_image_kernel(info); 2184 if (!reason && info->num_physpages != get_num_physpages()) 2185 reason = "memory size"; 2186 if (reason) { 2187 pr_err("Image mismatch: %s\n", reason); 2188 return -EPERM; 2189 } 2190 return 0; 2191 } 2192 2193 /** 2194 * load header - Check the image header and copy the data from it. 2195 */ 2196 static int load_header(struct swsusp_info *info) 2197 { 2198 int error; 2199 2200 restore_pblist = NULL; 2201 error = check_header(info); 2202 if (!error) { 2203 nr_copy_pages = info->image_pages; 2204 nr_meta_pages = info->pages - info->image_pages - 1; 2205 } 2206 return error; 2207 } 2208 2209 /** 2210 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. 2211 * @bm: Memory bitmap. 2212 * @buf: Area of memory containing the PFNs. 2213 * 2214 * For each element of the array pointed to by @buf (1 page at a time), set the 2215 * corresponding bit in @bm. 2216 */ 2217 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2218 { 2219 int j; 2220 2221 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2222 if (unlikely(buf[j] == BM_END_OF_MAP)) 2223 break; 2224 2225 /* Extract and buffer page key for data page (s390 only). */ 2226 page_key_memorize(buf + j); 2227 2228 if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) 2229 memory_bm_set_bit(bm, buf[j]); 2230 else 2231 return -EFAULT; 2232 } 2233 2234 return 0; 2235 } 2236 2237 #ifdef CONFIG_HIGHMEM 2238 /* 2239 * struct highmem_pbe is used for creating the list of highmem pages that 2240 * should be restored atomically during the resume from disk, because the page 2241 * frames they have occupied before the suspend are in use. 2242 */ 2243 struct highmem_pbe { 2244 struct page *copy_page; /* data is here now */ 2245 struct page *orig_page; /* data was here before the suspend */ 2246 struct highmem_pbe *next; 2247 }; 2248 2249 /* 2250 * List of highmem PBEs needed for restoring the highmem pages that were 2251 * allocated before the suspend and included in the suspend image, but have 2252 * also been allocated by the "resume" kernel, so their contents cannot be 2253 * written directly to their "original" page frames. 2254 */ 2255 static struct highmem_pbe *highmem_pblist; 2256 2257 /** 2258 * count_highmem_image_pages - Compute the number of highmem pages in the image. 2259 * @bm: Memory bitmap. 2260 * 2261 * The bits in @bm that correspond to image pages are assumed to be set. 2262 */ 2263 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2264 { 2265 unsigned long pfn; 2266 unsigned int cnt = 0; 2267 2268 memory_bm_position_reset(bm); 2269 pfn = memory_bm_next_pfn(bm); 2270 while (pfn != BM_END_OF_MAP) { 2271 if (PageHighMem(pfn_to_page(pfn))) 2272 cnt++; 2273 2274 pfn = memory_bm_next_pfn(bm); 2275 } 2276 return cnt; 2277 } 2278 2279 static unsigned int safe_highmem_pages; 2280 2281 static struct memory_bitmap *safe_highmem_bm; 2282 2283 /** 2284 * prepare_highmem_image - Allocate memory for loading highmem data from image. 2285 * @bm: Pointer to an uninitialized memory bitmap structure. 2286 * @nr_highmem_p: Pointer to the number of highmem image pages. 2287 * 2288 * Try to allocate as many highmem pages as there are highmem image pages 2289 * (@nr_highmem_p points to the variable containing the number of highmem image 2290 * pages). The pages that are "safe" (ie. will not be overwritten when the 2291 * hibernation image is restored entirely) have the corresponding bits set in 2292 * @bm (it must be unitialized). 2293 * 2294 * NOTE: This function should not be called if there are no highmem image pages. 2295 */ 2296 static int prepare_highmem_image(struct memory_bitmap *bm, 2297 unsigned int *nr_highmem_p) 2298 { 2299 unsigned int to_alloc; 2300 2301 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2302 return -ENOMEM; 2303 2304 if (get_highmem_buffer(PG_SAFE)) 2305 return -ENOMEM; 2306 2307 to_alloc = count_free_highmem_pages(); 2308 if (to_alloc > *nr_highmem_p) 2309 to_alloc = *nr_highmem_p; 2310 else 2311 *nr_highmem_p = to_alloc; 2312 2313 safe_highmem_pages = 0; 2314 while (to_alloc-- > 0) { 2315 struct page *page; 2316 2317 page = alloc_page(__GFP_HIGHMEM); 2318 if (!swsusp_page_is_free(page)) { 2319 /* The page is "safe", set its bit the bitmap */ 2320 memory_bm_set_bit(bm, page_to_pfn(page)); 2321 safe_highmem_pages++; 2322 } 2323 /* Mark the page as allocated */ 2324 swsusp_set_page_forbidden(page); 2325 swsusp_set_page_free(page); 2326 } 2327 memory_bm_position_reset(bm); 2328 safe_highmem_bm = bm; 2329 return 0; 2330 } 2331 2332 static struct page *last_highmem_page; 2333 2334 /** 2335 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. 2336 * 2337 * For a given highmem image page get a buffer that suspend_write_next() should 2338 * return to its caller to write to. 2339 * 2340 * If the page is to be saved to its "original" page frame or a copy of 2341 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2342 * the copy of the page is to be made in normal memory, so the address of 2343 * the copy is returned. 2344 * 2345 * If @buffer is returned, the caller of suspend_write_next() will write 2346 * the page's contents to @buffer, so they will have to be copied to the 2347 * right location on the next call to suspend_write_next() and it is done 2348 * with the help of copy_last_highmem_page(). For this purpose, if 2349 * @buffer is returned, @last_highmem_page is set to the page to which 2350 * the data will have to be copied from @buffer. 2351 */ 2352 static void *get_highmem_page_buffer(struct page *page, 2353 struct chain_allocator *ca) 2354 { 2355 struct highmem_pbe *pbe; 2356 void *kaddr; 2357 2358 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2359 /* 2360 * We have allocated the "original" page frame and we can 2361 * use it directly to store the loaded page. 2362 */ 2363 last_highmem_page = page; 2364 return buffer; 2365 } 2366 /* 2367 * The "original" page frame has not been allocated and we have to 2368 * use a "safe" page frame to store the loaded page. 2369 */ 2370 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2371 if (!pbe) { 2372 swsusp_free(); 2373 return ERR_PTR(-ENOMEM); 2374 } 2375 pbe->orig_page = page; 2376 if (safe_highmem_pages > 0) { 2377 struct page *tmp; 2378 2379 /* Copy of the page will be stored in high memory */ 2380 kaddr = buffer; 2381 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2382 safe_highmem_pages--; 2383 last_highmem_page = tmp; 2384 pbe->copy_page = tmp; 2385 } else { 2386 /* Copy of the page will be stored in normal memory */ 2387 kaddr = safe_pages_list; 2388 safe_pages_list = safe_pages_list->next; 2389 pbe->copy_page = virt_to_page(kaddr); 2390 } 2391 pbe->next = highmem_pblist; 2392 highmem_pblist = pbe; 2393 return kaddr; 2394 } 2395 2396 /** 2397 * copy_last_highmem_page - Copy most the most recent highmem image page. 2398 * 2399 * Copy the contents of a highmem image from @buffer, where the caller of 2400 * snapshot_write_next() has stored them, to the right location represented by 2401 * @last_highmem_page . 2402 */ 2403 static void copy_last_highmem_page(void) 2404 { 2405 if (last_highmem_page) { 2406 void *dst; 2407 2408 dst = kmap_atomic(last_highmem_page); 2409 copy_page(dst, buffer); 2410 kunmap_atomic(dst); 2411 last_highmem_page = NULL; 2412 } 2413 } 2414 2415 static inline int last_highmem_page_copied(void) 2416 { 2417 return !last_highmem_page; 2418 } 2419 2420 static inline void free_highmem_data(void) 2421 { 2422 if (safe_highmem_bm) 2423 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2424 2425 if (buffer) 2426 free_image_page(buffer, PG_UNSAFE_CLEAR); 2427 } 2428 #else 2429 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2430 2431 static inline int prepare_highmem_image(struct memory_bitmap *bm, 2432 unsigned int *nr_highmem_p) { return 0; } 2433 2434 static inline void *get_highmem_page_buffer(struct page *page, 2435 struct chain_allocator *ca) 2436 { 2437 return ERR_PTR(-EINVAL); 2438 } 2439 2440 static inline void copy_last_highmem_page(void) {} 2441 static inline int last_highmem_page_copied(void) { return 1; } 2442 static inline void free_highmem_data(void) {} 2443 #endif /* CONFIG_HIGHMEM */ 2444 2445 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2446 2447 /** 2448 * prepare_image - Make room for loading hibernation image. 2449 * @new_bm: Unitialized memory bitmap structure. 2450 * @bm: Memory bitmap with unsafe pages marked. 2451 * 2452 * Use @bm to mark the pages that will be overwritten in the process of 2453 * restoring the system memory state from the suspend image ("unsafe" pages) 2454 * and allocate memory for the image. 2455 * 2456 * The idea is to allocate a new memory bitmap first and then allocate 2457 * as many pages as needed for image data, but without specifying what those 2458 * pages will be used for just yet. Instead, we mark them all as allocated and 2459 * create a lists of "safe" pages to be used later. On systems with high 2460 * memory a list of "safe" highmem pages is created too. 2461 */ 2462 static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2463 { 2464 unsigned int nr_pages, nr_highmem; 2465 struct linked_page *lp; 2466 int error; 2467 2468 /* If there is no highmem, the buffer will not be necessary */ 2469 free_image_page(buffer, PG_UNSAFE_CLEAR); 2470 buffer = NULL; 2471 2472 nr_highmem = count_highmem_image_pages(bm); 2473 mark_unsafe_pages(bm); 2474 2475 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2476 if (error) 2477 goto Free; 2478 2479 duplicate_memory_bitmap(new_bm, bm); 2480 memory_bm_free(bm, PG_UNSAFE_KEEP); 2481 if (nr_highmem > 0) { 2482 error = prepare_highmem_image(bm, &nr_highmem); 2483 if (error) 2484 goto Free; 2485 } 2486 /* 2487 * Reserve some safe pages for potential later use. 2488 * 2489 * NOTE: This way we make sure there will be enough safe pages for the 2490 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2491 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2492 * 2493 * nr_copy_pages cannot be less than allocated_unsafe_pages too. 2494 */ 2495 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2496 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2497 while (nr_pages > 0) { 2498 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2499 if (!lp) { 2500 error = -ENOMEM; 2501 goto Free; 2502 } 2503 lp->next = safe_pages_list; 2504 safe_pages_list = lp; 2505 nr_pages--; 2506 } 2507 /* Preallocate memory for the image */ 2508 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2509 while (nr_pages > 0) { 2510 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2511 if (!lp) { 2512 error = -ENOMEM; 2513 goto Free; 2514 } 2515 if (!swsusp_page_is_free(virt_to_page(lp))) { 2516 /* The page is "safe", add it to the list */ 2517 lp->next = safe_pages_list; 2518 safe_pages_list = lp; 2519 } 2520 /* Mark the page as allocated */ 2521 swsusp_set_page_forbidden(virt_to_page(lp)); 2522 swsusp_set_page_free(virt_to_page(lp)); 2523 nr_pages--; 2524 } 2525 return 0; 2526 2527 Free: 2528 swsusp_free(); 2529 return error; 2530 } 2531 2532 /** 2533 * get_buffer - Get the address to store the next image data page. 2534 * 2535 * Get the address that snapshot_write_next() should return to its caller to 2536 * write to. 2537 */ 2538 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2539 { 2540 struct pbe *pbe; 2541 struct page *page; 2542 unsigned long pfn = memory_bm_next_pfn(bm); 2543 2544 if (pfn == BM_END_OF_MAP) 2545 return ERR_PTR(-EFAULT); 2546 2547 page = pfn_to_page(pfn); 2548 if (PageHighMem(page)) 2549 return get_highmem_page_buffer(page, ca); 2550 2551 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2552 /* 2553 * We have allocated the "original" page frame and we can 2554 * use it directly to store the loaded page. 2555 */ 2556 return page_address(page); 2557 2558 /* 2559 * The "original" page frame has not been allocated and we have to 2560 * use a "safe" page frame to store the loaded page. 2561 */ 2562 pbe = chain_alloc(ca, sizeof(struct pbe)); 2563 if (!pbe) { 2564 swsusp_free(); 2565 return ERR_PTR(-ENOMEM); 2566 } 2567 pbe->orig_address = page_address(page); 2568 pbe->address = safe_pages_list; 2569 safe_pages_list = safe_pages_list->next; 2570 pbe->next = restore_pblist; 2571 restore_pblist = pbe; 2572 return pbe->address; 2573 } 2574 2575 /** 2576 * snapshot_write_next - Get the address to store the next image page. 2577 * @handle: Snapshot handle structure to guide the writing. 2578 * 2579 * On the first call, @handle should point to a zeroed snapshot_handle 2580 * structure. The structure gets populated then and a pointer to it should be 2581 * passed to this function every next time. 2582 * 2583 * On success, the function returns a positive number. Then, the caller 2584 * is allowed to write up to the returned number of bytes to the memory 2585 * location computed by the data_of() macro. 2586 * 2587 * The function returns 0 to indicate the "end of file" condition. Negative 2588 * numbers are returned on errors, in which cases the structure pointed to by 2589 * @handle is not updated and should not be used any more. 2590 */ 2591 int snapshot_write_next(struct snapshot_handle *handle) 2592 { 2593 static struct chain_allocator ca; 2594 int error = 0; 2595 2596 /* Check if we have already loaded the entire image */ 2597 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2598 return 0; 2599 2600 handle->sync_read = 1; 2601 2602 if (!handle->cur) { 2603 if (!buffer) 2604 /* This makes the buffer be freed by swsusp_free() */ 2605 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2606 2607 if (!buffer) 2608 return -ENOMEM; 2609 2610 handle->buffer = buffer; 2611 } else if (handle->cur == 1) { 2612 error = load_header(buffer); 2613 if (error) 2614 return error; 2615 2616 safe_pages_list = NULL; 2617 2618 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2619 if (error) 2620 return error; 2621 2622 /* Allocate buffer for page keys. */ 2623 error = page_key_alloc(nr_copy_pages); 2624 if (error) 2625 return error; 2626 2627 hibernate_restore_protection_begin(); 2628 } else if (handle->cur <= nr_meta_pages + 1) { 2629 error = unpack_orig_pfns(buffer, ©_bm); 2630 if (error) 2631 return error; 2632 2633 if (handle->cur == nr_meta_pages + 1) { 2634 error = prepare_image(&orig_bm, ©_bm); 2635 if (error) 2636 return error; 2637 2638 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2639 memory_bm_position_reset(&orig_bm); 2640 restore_pblist = NULL; 2641 handle->buffer = get_buffer(&orig_bm, &ca); 2642 handle->sync_read = 0; 2643 if (IS_ERR(handle->buffer)) 2644 return PTR_ERR(handle->buffer); 2645 } 2646 } else { 2647 copy_last_highmem_page(); 2648 /* Restore page key for data page (s390 only). */ 2649 page_key_write(handle->buffer); 2650 hibernate_restore_protect_page(handle->buffer); 2651 handle->buffer = get_buffer(&orig_bm, &ca); 2652 if (IS_ERR(handle->buffer)) 2653 return PTR_ERR(handle->buffer); 2654 if (handle->buffer != buffer) 2655 handle->sync_read = 0; 2656 } 2657 handle->cur++; 2658 return PAGE_SIZE; 2659 } 2660 2661 /** 2662 * snapshot_write_finalize - Complete the loading of a hibernation image. 2663 * 2664 * Must be called after the last call to snapshot_write_next() in case the last 2665 * page in the image happens to be a highmem page and its contents should be 2666 * stored in highmem. Additionally, it recycles bitmap memory that's not 2667 * necessary any more. 2668 */ 2669 void snapshot_write_finalize(struct snapshot_handle *handle) 2670 { 2671 copy_last_highmem_page(); 2672 /* Restore page key for data page (s390 only). */ 2673 page_key_write(handle->buffer); 2674 page_key_free(); 2675 hibernate_restore_protect_page(handle->buffer); 2676 /* Do that only if we have loaded the image entirely */ 2677 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2678 memory_bm_recycle(&orig_bm); 2679 free_highmem_data(); 2680 } 2681 } 2682 2683 int snapshot_image_loaded(struct snapshot_handle *handle) 2684 { 2685 return !(!nr_copy_pages || !last_highmem_page_copied() || 2686 handle->cur <= nr_meta_pages + nr_copy_pages); 2687 } 2688 2689 #ifdef CONFIG_HIGHMEM 2690 /* Assumes that @buf is ready and points to a "safe" page */ 2691 static inline void swap_two_pages_data(struct page *p1, struct page *p2, 2692 void *buf) 2693 { 2694 void *kaddr1, *kaddr2; 2695 2696 kaddr1 = kmap_atomic(p1); 2697 kaddr2 = kmap_atomic(p2); 2698 copy_page(buf, kaddr1); 2699 copy_page(kaddr1, kaddr2); 2700 copy_page(kaddr2, buf); 2701 kunmap_atomic(kaddr2); 2702 kunmap_atomic(kaddr1); 2703 } 2704 2705 /** 2706 * restore_highmem - Put highmem image pages into their original locations. 2707 * 2708 * For each highmem page that was in use before hibernation and is included in 2709 * the image, and also has been allocated by the "restore" kernel, swap its 2710 * current contents with the previous (ie. "before hibernation") ones. 2711 * 2712 * If the restore eventually fails, we can call this function once again and 2713 * restore the highmem state as seen by the restore kernel. 2714 */ 2715 int restore_highmem(void) 2716 { 2717 struct highmem_pbe *pbe = highmem_pblist; 2718 void *buf; 2719 2720 if (!pbe) 2721 return 0; 2722 2723 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2724 if (!buf) 2725 return -ENOMEM; 2726 2727 while (pbe) { 2728 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2729 pbe = pbe->next; 2730 } 2731 free_image_page(buf, PG_UNSAFE_CLEAR); 2732 return 0; 2733 } 2734 #endif /* CONFIG_HIGHMEM */ 2735