1 /* 2 * linux/kernel/power/snapshot.c 3 * 4 * This file provides system snapshot/restore functionality for swsusp. 5 * 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * 9 * This file is released under the GPLv2. 10 * 11 */ 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/bootmem.h> 25 #include <linux/syscalls.h> 26 #include <linux/console.h> 27 #include <linux/highmem.h> 28 #include <linux/list.h> 29 #include <linux/slab.h> 30 #include <linux/compiler.h> 31 #include <linux/ktime.h> 32 33 #include <asm/uaccess.h> 34 #include <asm/mmu_context.h> 35 #include <asm/pgtable.h> 36 #include <asm/tlbflush.h> 37 #include <asm/io.h> 38 39 #include "power.h" 40 41 static int swsusp_page_is_free(struct page *); 42 static void swsusp_set_page_forbidden(struct page *); 43 static void swsusp_unset_page_forbidden(struct page *); 44 45 /* 46 * Number of bytes to reserve for memory allocations made by device drivers 47 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 48 * cause image creation to fail (tunable via /sys/power/reserved_size). 49 */ 50 unsigned long reserved_size; 51 52 void __init hibernate_reserved_size_init(void) 53 { 54 reserved_size = SPARE_PAGES * PAGE_SIZE; 55 } 56 57 /* 58 * Preferred image size in bytes (tunable via /sys/power/image_size). 59 * When it is set to N, swsusp will do its best to ensure the image 60 * size will not exceed N bytes, but if that is impossible, it will 61 * try to create the smallest image possible. 62 */ 63 unsigned long image_size; 64 65 void __init hibernate_image_size_init(void) 66 { 67 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; 68 } 69 70 /* List of PBEs needed for restoring the pages that were allocated before 71 * the suspend and included in the suspend image, but have also been 72 * allocated by the "resume" kernel, so their contents cannot be written 73 * directly to their "original" page frames. 74 */ 75 struct pbe *restore_pblist; 76 77 /* Pointer to an auxiliary buffer (1 page) */ 78 static void *buffer; 79 80 /** 81 * @safe_needed - on resume, for storing the PBE list and the image, 82 * we can only use memory pages that do not conflict with the pages 83 * used before suspend. The unsafe pages have PageNosaveFree set 84 * and we count them using unsafe_pages. 85 * 86 * Each allocated image page is marked as PageNosave and PageNosaveFree 87 * so that swsusp_free() can release it. 88 */ 89 90 #define PG_ANY 0 91 #define PG_SAFE 1 92 #define PG_UNSAFE_CLEAR 1 93 #define PG_UNSAFE_KEEP 0 94 95 static unsigned int allocated_unsafe_pages; 96 97 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 98 { 99 void *res; 100 101 res = (void *)get_zeroed_page(gfp_mask); 102 if (safe_needed) 103 while (res && swsusp_page_is_free(virt_to_page(res))) { 104 /* The page is unsafe, mark it for swsusp_free() */ 105 swsusp_set_page_forbidden(virt_to_page(res)); 106 allocated_unsafe_pages++; 107 res = (void *)get_zeroed_page(gfp_mask); 108 } 109 if (res) { 110 swsusp_set_page_forbidden(virt_to_page(res)); 111 swsusp_set_page_free(virt_to_page(res)); 112 } 113 return res; 114 } 115 116 unsigned long get_safe_page(gfp_t gfp_mask) 117 { 118 return (unsigned long)get_image_page(gfp_mask, PG_SAFE); 119 } 120 121 static struct page *alloc_image_page(gfp_t gfp_mask) 122 { 123 struct page *page; 124 125 page = alloc_page(gfp_mask); 126 if (page) { 127 swsusp_set_page_forbidden(page); 128 swsusp_set_page_free(page); 129 } 130 return page; 131 } 132 133 /** 134 * free_image_page - free page represented by @addr, allocated with 135 * get_image_page (page flags set by it must be cleared) 136 */ 137 138 static inline void free_image_page(void *addr, int clear_nosave_free) 139 { 140 struct page *page; 141 142 BUG_ON(!virt_addr_valid(addr)); 143 144 page = virt_to_page(addr); 145 146 swsusp_unset_page_forbidden(page); 147 if (clear_nosave_free) 148 swsusp_unset_page_free(page); 149 150 __free_page(page); 151 } 152 153 /* struct linked_page is used to build chains of pages */ 154 155 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 156 157 struct linked_page { 158 struct linked_page *next; 159 char data[LINKED_PAGE_DATA_SIZE]; 160 } __packed; 161 162 static inline void 163 free_list_of_pages(struct linked_page *list, int clear_page_nosave) 164 { 165 while (list) { 166 struct linked_page *lp = list->next; 167 168 free_image_page(list, clear_page_nosave); 169 list = lp; 170 } 171 } 172 173 /** 174 * struct chain_allocator is used for allocating small objects out of 175 * a linked list of pages called 'the chain'. 176 * 177 * The chain grows each time when there is no room for a new object in 178 * the current page. The allocated objects cannot be freed individually. 179 * It is only possible to free them all at once, by freeing the entire 180 * chain. 181 * 182 * NOTE: The chain allocator may be inefficient if the allocated objects 183 * are not much smaller than PAGE_SIZE. 184 */ 185 186 struct chain_allocator { 187 struct linked_page *chain; /* the chain */ 188 unsigned int used_space; /* total size of objects allocated out 189 * of the current page 190 */ 191 gfp_t gfp_mask; /* mask for allocating pages */ 192 int safe_needed; /* if set, only "safe" pages are allocated */ 193 }; 194 195 static void 196 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) 197 { 198 ca->chain = NULL; 199 ca->used_space = LINKED_PAGE_DATA_SIZE; 200 ca->gfp_mask = gfp_mask; 201 ca->safe_needed = safe_needed; 202 } 203 204 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 205 { 206 void *ret; 207 208 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 209 struct linked_page *lp; 210 211 lp = get_image_page(ca->gfp_mask, ca->safe_needed); 212 if (!lp) 213 return NULL; 214 215 lp->next = ca->chain; 216 ca->chain = lp; 217 ca->used_space = 0; 218 } 219 ret = ca->chain->data + ca->used_space; 220 ca->used_space += size; 221 return ret; 222 } 223 224 /** 225 * Data types related to memory bitmaps. 226 * 227 * Memory bitmap is a structure consiting of many linked lists of 228 * objects. The main list's elements are of type struct zone_bitmap 229 * and each of them corresonds to one zone. For each zone bitmap 230 * object there is a list of objects of type struct bm_block that 231 * represent each blocks of bitmap in which information is stored. 232 * 233 * struct memory_bitmap contains a pointer to the main list of zone 234 * bitmap objects, a struct bm_position used for browsing the bitmap, 235 * and a pointer to the list of pages used for allocating all of the 236 * zone bitmap objects and bitmap block objects. 237 * 238 * NOTE: It has to be possible to lay out the bitmap in memory 239 * using only allocations of order 0. Additionally, the bitmap is 240 * designed to work with arbitrary number of zones (this is over the 241 * top for now, but let's avoid making unnecessary assumptions ;-). 242 * 243 * struct zone_bitmap contains a pointer to a list of bitmap block 244 * objects and a pointer to the bitmap block object that has been 245 * most recently used for setting bits. Additionally, it contains the 246 * pfns that correspond to the start and end of the represented zone. 247 * 248 * struct bm_block contains a pointer to the memory page in which 249 * information is stored (in the form of a block of bitmap) 250 * It also contains the pfns that correspond to the start and end of 251 * the represented memory area. 252 * 253 * The memory bitmap is organized as a radix tree to guarantee fast random 254 * access to the bits. There is one radix tree for each zone (as returned 255 * from create_mem_extents). 256 * 257 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 258 * two linked lists for the nodes of the tree, one for the inner nodes and 259 * one for the leave nodes. The linked leave nodes are used for fast linear 260 * access of the memory bitmap. 261 * 262 * The struct rtree_node represents one node of the radix tree. 263 */ 264 265 #define BM_END_OF_MAP (~0UL) 266 267 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 268 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 269 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 270 271 /* 272 * struct rtree_node is a wrapper struct to link the nodes 273 * of the rtree together for easy linear iteration over 274 * bits and easy freeing 275 */ 276 struct rtree_node { 277 struct list_head list; 278 unsigned long *data; 279 }; 280 281 /* 282 * struct mem_zone_bm_rtree represents a bitmap used for one 283 * populated memory zone. 284 */ 285 struct mem_zone_bm_rtree { 286 struct list_head list; /* Link Zones together */ 287 struct list_head nodes; /* Radix Tree inner nodes */ 288 struct list_head leaves; /* Radix Tree leaves */ 289 unsigned long start_pfn; /* Zone start page frame */ 290 unsigned long end_pfn; /* Zone end page frame + 1 */ 291 struct rtree_node *rtree; /* Radix Tree Root */ 292 int levels; /* Number of Radix Tree Levels */ 293 unsigned int blocks; /* Number of Bitmap Blocks */ 294 }; 295 296 /* strcut bm_position is used for browsing memory bitmaps */ 297 298 struct bm_position { 299 struct mem_zone_bm_rtree *zone; 300 struct rtree_node *node; 301 unsigned long node_pfn; 302 int node_bit; 303 }; 304 305 struct memory_bitmap { 306 struct list_head zones; 307 struct linked_page *p_list; /* list of pages used to store zone 308 * bitmap objects and bitmap block 309 * objects 310 */ 311 struct bm_position cur; /* most recently used bit position */ 312 }; 313 314 /* Functions that operate on memory bitmaps */ 315 316 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 317 #if BITS_PER_LONG == 32 318 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 319 #else 320 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 321 #endif 322 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 323 324 /* 325 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 326 * 327 * This function is used to allocate inner nodes as well as the 328 * leave nodes of the radix tree. It also adds the node to the 329 * corresponding linked list passed in by the *list parameter. 330 */ 331 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 332 struct chain_allocator *ca, 333 struct list_head *list) 334 { 335 struct rtree_node *node; 336 337 node = chain_alloc(ca, sizeof(struct rtree_node)); 338 if (!node) 339 return NULL; 340 341 node->data = get_image_page(gfp_mask, safe_needed); 342 if (!node->data) 343 return NULL; 344 345 list_add_tail(&node->list, list); 346 347 return node; 348 } 349 350 /* 351 * add_rtree_block - Add a new leave node to the radix tree 352 * 353 * The leave nodes need to be allocated in order to keep the leaves 354 * linked list in order. This is guaranteed by the zone->blocks 355 * counter. 356 */ 357 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 358 int safe_needed, struct chain_allocator *ca) 359 { 360 struct rtree_node *node, *block, **dst; 361 unsigned int levels_needed, block_nr; 362 int i; 363 364 block_nr = zone->blocks; 365 levels_needed = 0; 366 367 /* How many levels do we need for this block nr? */ 368 while (block_nr) { 369 levels_needed += 1; 370 block_nr >>= BM_RTREE_LEVEL_SHIFT; 371 } 372 373 /* Make sure the rtree has enough levels */ 374 for (i = zone->levels; i < levels_needed; i++) { 375 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 376 &zone->nodes); 377 if (!node) 378 return -ENOMEM; 379 380 node->data[0] = (unsigned long)zone->rtree; 381 zone->rtree = node; 382 zone->levels += 1; 383 } 384 385 /* Allocate new block */ 386 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 387 if (!block) 388 return -ENOMEM; 389 390 /* Now walk the rtree to insert the block */ 391 node = zone->rtree; 392 dst = &zone->rtree; 393 block_nr = zone->blocks; 394 for (i = zone->levels; i > 0; i--) { 395 int index; 396 397 if (!node) { 398 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 399 &zone->nodes); 400 if (!node) 401 return -ENOMEM; 402 *dst = node; 403 } 404 405 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 406 index &= BM_RTREE_LEVEL_MASK; 407 dst = (struct rtree_node **)&((*dst)->data[index]); 408 node = *dst; 409 } 410 411 zone->blocks += 1; 412 *dst = block; 413 414 return 0; 415 } 416 417 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 418 int clear_nosave_free); 419 420 /* 421 * create_zone_bm_rtree - create a radix tree for one zone 422 * 423 * Allocated the mem_zone_bm_rtree structure and initializes it. 424 * This function also allocated and builds the radix tree for the 425 * zone. 426 */ 427 static struct mem_zone_bm_rtree * 428 create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, 429 struct chain_allocator *ca, 430 unsigned long start, unsigned long end) 431 { 432 struct mem_zone_bm_rtree *zone; 433 unsigned int i, nr_blocks; 434 unsigned long pages; 435 436 pages = end - start; 437 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 438 if (!zone) 439 return NULL; 440 441 INIT_LIST_HEAD(&zone->nodes); 442 INIT_LIST_HEAD(&zone->leaves); 443 zone->start_pfn = start; 444 zone->end_pfn = end; 445 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 446 447 for (i = 0; i < nr_blocks; i++) { 448 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 449 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 450 return NULL; 451 } 452 } 453 454 return zone; 455 } 456 457 /* 458 * free_zone_bm_rtree - Free the memory of the radix tree 459 * 460 * Free all node pages of the radix tree. The mem_zone_bm_rtree 461 * structure itself is not freed here nor are the rtree_node 462 * structs. 463 */ 464 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 465 int clear_nosave_free) 466 { 467 struct rtree_node *node; 468 469 list_for_each_entry(node, &zone->nodes, list) 470 free_image_page(node->data, clear_nosave_free); 471 472 list_for_each_entry(node, &zone->leaves, list) 473 free_image_page(node->data, clear_nosave_free); 474 } 475 476 static void memory_bm_position_reset(struct memory_bitmap *bm) 477 { 478 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 479 list); 480 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 481 struct rtree_node, list); 482 bm->cur.node_pfn = 0; 483 bm->cur.node_bit = 0; 484 } 485 486 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 487 488 struct mem_extent { 489 struct list_head hook; 490 unsigned long start; 491 unsigned long end; 492 }; 493 494 /** 495 * free_mem_extents - free a list of memory extents 496 * @list - list of extents to empty 497 */ 498 static void free_mem_extents(struct list_head *list) 499 { 500 struct mem_extent *ext, *aux; 501 502 list_for_each_entry_safe(ext, aux, list, hook) { 503 list_del(&ext->hook); 504 kfree(ext); 505 } 506 } 507 508 /** 509 * create_mem_extents - create a list of memory extents representing 510 * contiguous ranges of PFNs 511 * @list - list to put the extents into 512 * @gfp_mask - mask to use for memory allocations 513 */ 514 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 515 { 516 struct zone *zone; 517 518 INIT_LIST_HEAD(list); 519 520 for_each_populated_zone(zone) { 521 unsigned long zone_start, zone_end; 522 struct mem_extent *ext, *cur, *aux; 523 524 zone_start = zone->zone_start_pfn; 525 zone_end = zone_end_pfn(zone); 526 527 list_for_each_entry(ext, list, hook) 528 if (zone_start <= ext->end) 529 break; 530 531 if (&ext->hook == list || zone_end < ext->start) { 532 /* New extent is necessary */ 533 struct mem_extent *new_ext; 534 535 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 536 if (!new_ext) { 537 free_mem_extents(list); 538 return -ENOMEM; 539 } 540 new_ext->start = zone_start; 541 new_ext->end = zone_end; 542 list_add_tail(&new_ext->hook, &ext->hook); 543 continue; 544 } 545 546 /* Merge this zone's range of PFNs with the existing one */ 547 if (zone_start < ext->start) 548 ext->start = zone_start; 549 if (zone_end > ext->end) 550 ext->end = zone_end; 551 552 /* More merging may be possible */ 553 cur = ext; 554 list_for_each_entry_safe_continue(cur, aux, list, hook) { 555 if (zone_end < cur->start) 556 break; 557 if (zone_end < cur->end) 558 ext->end = cur->end; 559 list_del(&cur->hook); 560 kfree(cur); 561 } 562 } 563 564 return 0; 565 } 566 567 /** 568 * memory_bm_create - allocate memory for a memory bitmap 569 */ 570 static int 571 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 572 { 573 struct chain_allocator ca; 574 struct list_head mem_extents; 575 struct mem_extent *ext; 576 int error; 577 578 chain_init(&ca, gfp_mask, safe_needed); 579 INIT_LIST_HEAD(&bm->zones); 580 581 error = create_mem_extents(&mem_extents, gfp_mask); 582 if (error) 583 return error; 584 585 list_for_each_entry(ext, &mem_extents, hook) { 586 struct mem_zone_bm_rtree *zone; 587 588 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 589 ext->start, ext->end); 590 if (!zone) { 591 error = -ENOMEM; 592 goto Error; 593 } 594 list_add_tail(&zone->list, &bm->zones); 595 } 596 597 bm->p_list = ca.chain; 598 memory_bm_position_reset(bm); 599 Exit: 600 free_mem_extents(&mem_extents); 601 return error; 602 603 Error: 604 bm->p_list = ca.chain; 605 memory_bm_free(bm, PG_UNSAFE_CLEAR); 606 goto Exit; 607 } 608 609 /** 610 * memory_bm_free - free memory occupied by the memory bitmap @bm 611 */ 612 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 613 { 614 struct mem_zone_bm_rtree *zone; 615 616 list_for_each_entry(zone, &bm->zones, list) 617 free_zone_bm_rtree(zone, clear_nosave_free); 618 619 free_list_of_pages(bm->p_list, clear_nosave_free); 620 621 INIT_LIST_HEAD(&bm->zones); 622 } 623 624 /** 625 * memory_bm_find_bit - Find the bit for pfn in the memory 626 * bitmap 627 * 628 * Find the bit in the bitmap @bm that corresponds to given pfn. 629 * The cur.zone, cur.block and cur.node_pfn member of @bm are 630 * updated. 631 * It walks the radix tree to find the page which contains the bit for 632 * pfn and returns the bit position in **addr and *bit_nr. 633 */ 634 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 635 void **addr, unsigned int *bit_nr) 636 { 637 struct mem_zone_bm_rtree *curr, *zone; 638 struct rtree_node *node; 639 int i, block_nr; 640 641 zone = bm->cur.zone; 642 643 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 644 goto zone_found; 645 646 zone = NULL; 647 648 /* Find the right zone */ 649 list_for_each_entry(curr, &bm->zones, list) { 650 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 651 zone = curr; 652 break; 653 } 654 } 655 656 if (!zone) 657 return -EFAULT; 658 659 zone_found: 660 /* 661 * We have a zone. Now walk the radix tree to find the leave 662 * node for our pfn. 663 */ 664 665 node = bm->cur.node; 666 if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 667 goto node_found; 668 669 node = zone->rtree; 670 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 671 672 for (i = zone->levels; i > 0; i--) { 673 int index; 674 675 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 676 index &= BM_RTREE_LEVEL_MASK; 677 BUG_ON(node->data[index] == 0); 678 node = (struct rtree_node *)node->data[index]; 679 } 680 681 node_found: 682 /* Update last position */ 683 bm->cur.zone = zone; 684 bm->cur.node = node; 685 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 686 687 /* Set return values */ 688 *addr = node->data; 689 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 690 691 return 0; 692 } 693 694 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 695 { 696 void *addr; 697 unsigned int bit; 698 int error; 699 700 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 701 BUG_ON(error); 702 set_bit(bit, addr); 703 } 704 705 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 706 { 707 void *addr; 708 unsigned int bit; 709 int error; 710 711 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 712 if (!error) 713 set_bit(bit, addr); 714 715 return error; 716 } 717 718 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 719 { 720 void *addr; 721 unsigned int bit; 722 int error; 723 724 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 725 BUG_ON(error); 726 clear_bit(bit, addr); 727 } 728 729 static void memory_bm_clear_current(struct memory_bitmap *bm) 730 { 731 int bit; 732 733 bit = max(bm->cur.node_bit - 1, 0); 734 clear_bit(bit, bm->cur.node->data); 735 } 736 737 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 738 { 739 void *addr; 740 unsigned int bit; 741 int error; 742 743 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 744 BUG_ON(error); 745 return test_bit(bit, addr); 746 } 747 748 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 749 { 750 void *addr; 751 unsigned int bit; 752 753 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 754 } 755 756 /* 757 * rtree_next_node - Jumps to the next leave node 758 * 759 * Sets the position to the beginning of the next node in the 760 * memory bitmap. This is either the next node in the current 761 * zone's radix tree or the first node in the radix tree of the 762 * next zone. 763 * 764 * Returns true if there is a next node, false otherwise. 765 */ 766 static bool rtree_next_node(struct memory_bitmap *bm) 767 { 768 bm->cur.node = list_entry(bm->cur.node->list.next, 769 struct rtree_node, list); 770 if (&bm->cur.node->list != &bm->cur.zone->leaves) { 771 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 772 bm->cur.node_bit = 0; 773 touch_softlockup_watchdog(); 774 return true; 775 } 776 777 /* No more nodes, goto next zone */ 778 bm->cur.zone = list_entry(bm->cur.zone->list.next, 779 struct mem_zone_bm_rtree, list); 780 if (&bm->cur.zone->list != &bm->zones) { 781 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 782 struct rtree_node, list); 783 bm->cur.node_pfn = 0; 784 bm->cur.node_bit = 0; 785 return true; 786 } 787 788 /* No more zones */ 789 return false; 790 } 791 792 /** 793 * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm 794 * 795 * Starting from the last returned position this function searches 796 * for the next set bit in the memory bitmap and returns its 797 * number. If no more bit is set BM_END_OF_MAP is returned. 798 * 799 * It is required to run memory_bm_position_reset() before the 800 * first call to this function. 801 */ 802 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 803 { 804 unsigned long bits, pfn, pages; 805 int bit; 806 807 do { 808 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 809 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 810 bit = find_next_bit(bm->cur.node->data, bits, 811 bm->cur.node_bit); 812 if (bit < bits) { 813 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 814 bm->cur.node_bit = bit + 1; 815 return pfn; 816 } 817 } while (rtree_next_node(bm)); 818 819 return BM_END_OF_MAP; 820 } 821 822 /** 823 * This structure represents a range of page frames the contents of which 824 * should not be saved during the suspend. 825 */ 826 827 struct nosave_region { 828 struct list_head list; 829 unsigned long start_pfn; 830 unsigned long end_pfn; 831 }; 832 833 static LIST_HEAD(nosave_regions); 834 835 /** 836 * register_nosave_region - register a range of page frames the contents 837 * of which should not be saved during the suspend (to be used in the early 838 * initialization code) 839 */ 840 841 void __init 842 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, 843 int use_kmalloc) 844 { 845 struct nosave_region *region; 846 847 if (start_pfn >= end_pfn) 848 return; 849 850 if (!list_empty(&nosave_regions)) { 851 /* Try to extend the previous region (they should be sorted) */ 852 region = list_entry(nosave_regions.prev, 853 struct nosave_region, list); 854 if (region->end_pfn == start_pfn) { 855 region->end_pfn = end_pfn; 856 goto Report; 857 } 858 } 859 if (use_kmalloc) { 860 /* during init, this shouldn't fail */ 861 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 862 BUG_ON(!region); 863 } else 864 /* This allocation cannot fail */ 865 region = memblock_virt_alloc(sizeof(struct nosave_region), 0); 866 region->start_pfn = start_pfn; 867 region->end_pfn = end_pfn; 868 list_add_tail(®ion->list, &nosave_regions); 869 Report: 870 printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n", 871 (unsigned long long) start_pfn << PAGE_SHIFT, 872 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 873 } 874 875 /* 876 * Set bits in this map correspond to the page frames the contents of which 877 * should not be saved during the suspend. 878 */ 879 static struct memory_bitmap *forbidden_pages_map; 880 881 /* Set bits in this map correspond to free page frames. */ 882 static struct memory_bitmap *free_pages_map; 883 884 /* 885 * Each page frame allocated for creating the image is marked by setting the 886 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 887 */ 888 889 void swsusp_set_page_free(struct page *page) 890 { 891 if (free_pages_map) 892 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 893 } 894 895 static int swsusp_page_is_free(struct page *page) 896 { 897 return free_pages_map ? 898 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 899 } 900 901 void swsusp_unset_page_free(struct page *page) 902 { 903 if (free_pages_map) 904 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 905 } 906 907 static void swsusp_set_page_forbidden(struct page *page) 908 { 909 if (forbidden_pages_map) 910 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 911 } 912 913 int swsusp_page_is_forbidden(struct page *page) 914 { 915 return forbidden_pages_map ? 916 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 917 } 918 919 static void swsusp_unset_page_forbidden(struct page *page) 920 { 921 if (forbidden_pages_map) 922 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 923 } 924 925 /** 926 * mark_nosave_pages - set bits corresponding to the page frames the 927 * contents of which should not be saved in a given bitmap. 928 */ 929 930 static void mark_nosave_pages(struct memory_bitmap *bm) 931 { 932 struct nosave_region *region; 933 934 if (list_empty(&nosave_regions)) 935 return; 936 937 list_for_each_entry(region, &nosave_regions, list) { 938 unsigned long pfn; 939 940 pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", 941 (unsigned long long) region->start_pfn << PAGE_SHIFT, 942 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 943 - 1); 944 945 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 946 if (pfn_valid(pfn)) { 947 /* 948 * It is safe to ignore the result of 949 * mem_bm_set_bit_check() here, since we won't 950 * touch the PFNs for which the error is 951 * returned anyway. 952 */ 953 mem_bm_set_bit_check(bm, pfn); 954 } 955 } 956 } 957 958 static bool is_nosave_page(unsigned long pfn) 959 { 960 struct nosave_region *region; 961 962 list_for_each_entry(region, &nosave_regions, list) { 963 if (pfn >= region->start_pfn && pfn < region->end_pfn) { 964 pr_err("PM: %#010llx in e820 nosave region: " 965 "[mem %#010llx-%#010llx]\n", 966 (unsigned long long) pfn << PAGE_SHIFT, 967 (unsigned long long) region->start_pfn << PAGE_SHIFT, 968 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 969 - 1); 970 return true; 971 } 972 } 973 974 return false; 975 } 976 977 /** 978 * create_basic_memory_bitmaps - create bitmaps needed for marking page 979 * frames that should not be saved and free page frames. The pointers 980 * forbidden_pages_map and free_pages_map are only modified if everything 981 * goes well, because we don't want the bits to be used before both bitmaps 982 * are set up. 983 */ 984 985 int create_basic_memory_bitmaps(void) 986 { 987 struct memory_bitmap *bm1, *bm2; 988 int error = 0; 989 990 if (forbidden_pages_map && free_pages_map) 991 return 0; 992 else 993 BUG_ON(forbidden_pages_map || free_pages_map); 994 995 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 996 if (!bm1) 997 return -ENOMEM; 998 999 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1000 if (error) 1001 goto Free_first_object; 1002 1003 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1004 if (!bm2) 1005 goto Free_first_bitmap; 1006 1007 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1008 if (error) 1009 goto Free_second_object; 1010 1011 forbidden_pages_map = bm1; 1012 free_pages_map = bm2; 1013 mark_nosave_pages(forbidden_pages_map); 1014 1015 pr_debug("PM: Basic memory bitmaps created\n"); 1016 1017 return 0; 1018 1019 Free_second_object: 1020 kfree(bm2); 1021 Free_first_bitmap: 1022 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1023 Free_first_object: 1024 kfree(bm1); 1025 return -ENOMEM; 1026 } 1027 1028 /** 1029 * free_basic_memory_bitmaps - free memory bitmaps allocated by 1030 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary 1031 * so that the bitmaps themselves are not referred to while they are being 1032 * freed. 1033 */ 1034 1035 void free_basic_memory_bitmaps(void) 1036 { 1037 struct memory_bitmap *bm1, *bm2; 1038 1039 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1040 return; 1041 1042 bm1 = forbidden_pages_map; 1043 bm2 = free_pages_map; 1044 forbidden_pages_map = NULL; 1045 free_pages_map = NULL; 1046 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1047 kfree(bm1); 1048 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1049 kfree(bm2); 1050 1051 pr_debug("PM: Basic memory bitmaps freed\n"); 1052 } 1053 1054 /** 1055 * snapshot_additional_pages - estimate the number of additional pages 1056 * be needed for setting up the suspend image data structures for given 1057 * zone (usually the returned value is greater than the exact number) 1058 */ 1059 1060 unsigned int snapshot_additional_pages(struct zone *zone) 1061 { 1062 unsigned int rtree, nodes; 1063 1064 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1065 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1066 LINKED_PAGE_DATA_SIZE); 1067 while (nodes > 1) { 1068 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1069 rtree += nodes; 1070 } 1071 1072 return 2 * rtree; 1073 } 1074 1075 #ifdef CONFIG_HIGHMEM 1076 /** 1077 * count_free_highmem_pages - compute the total number of free highmem 1078 * pages, system-wide. 1079 */ 1080 1081 static unsigned int count_free_highmem_pages(void) 1082 { 1083 struct zone *zone; 1084 unsigned int cnt = 0; 1085 1086 for_each_populated_zone(zone) 1087 if (is_highmem(zone)) 1088 cnt += zone_page_state(zone, NR_FREE_PAGES); 1089 1090 return cnt; 1091 } 1092 1093 /** 1094 * saveable_highmem_page - Determine whether a highmem page should be 1095 * included in the suspend image. 1096 * 1097 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1098 * and it isn't a part of a free chunk of pages. 1099 */ 1100 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1101 { 1102 struct page *page; 1103 1104 if (!pfn_valid(pfn)) 1105 return NULL; 1106 1107 page = pfn_to_page(pfn); 1108 if (page_zone(page) != zone) 1109 return NULL; 1110 1111 BUG_ON(!PageHighMem(page)); 1112 1113 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || 1114 PageReserved(page)) 1115 return NULL; 1116 1117 if (page_is_guard(page)) 1118 return NULL; 1119 1120 return page; 1121 } 1122 1123 /** 1124 * count_highmem_pages - compute the total number of saveable highmem 1125 * pages. 1126 */ 1127 1128 static unsigned int count_highmem_pages(void) 1129 { 1130 struct zone *zone; 1131 unsigned int n = 0; 1132 1133 for_each_populated_zone(zone) { 1134 unsigned long pfn, max_zone_pfn; 1135 1136 if (!is_highmem(zone)) 1137 continue; 1138 1139 mark_free_pages(zone); 1140 max_zone_pfn = zone_end_pfn(zone); 1141 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1142 if (saveable_highmem_page(zone, pfn)) 1143 n++; 1144 } 1145 return n; 1146 } 1147 #else 1148 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1149 { 1150 return NULL; 1151 } 1152 #endif /* CONFIG_HIGHMEM */ 1153 1154 /** 1155 * saveable_page - Determine whether a non-highmem page should be included 1156 * in the suspend image. 1157 * 1158 * We should save the page if it isn't Nosave, and is not in the range 1159 * of pages statically defined as 'unsaveable', and it isn't a part of 1160 * a free chunk of pages. 1161 */ 1162 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1163 { 1164 struct page *page; 1165 1166 if (!pfn_valid(pfn)) 1167 return NULL; 1168 1169 page = pfn_to_page(pfn); 1170 if (page_zone(page) != zone) 1171 return NULL; 1172 1173 BUG_ON(PageHighMem(page)); 1174 1175 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1176 return NULL; 1177 1178 if (PageReserved(page) 1179 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1180 return NULL; 1181 1182 if (page_is_guard(page)) 1183 return NULL; 1184 1185 return page; 1186 } 1187 1188 /** 1189 * count_data_pages - compute the total number of saveable non-highmem 1190 * pages. 1191 */ 1192 1193 static unsigned int count_data_pages(void) 1194 { 1195 struct zone *zone; 1196 unsigned long pfn, max_zone_pfn; 1197 unsigned int n = 0; 1198 1199 for_each_populated_zone(zone) { 1200 if (is_highmem(zone)) 1201 continue; 1202 1203 mark_free_pages(zone); 1204 max_zone_pfn = zone_end_pfn(zone); 1205 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1206 if (saveable_page(zone, pfn)) 1207 n++; 1208 } 1209 return n; 1210 } 1211 1212 /* This is needed, because copy_page and memcpy are not usable for copying 1213 * task structs. 1214 */ 1215 static inline void do_copy_page(long *dst, long *src) 1216 { 1217 int n; 1218 1219 for (n = PAGE_SIZE / sizeof(long); n; n--) 1220 *dst++ = *src++; 1221 } 1222 1223 1224 /** 1225 * safe_copy_page - check if the page we are going to copy is marked as 1226 * present in the kernel page tables (this always is the case if 1227 * CONFIG_DEBUG_PAGEALLOC is not set and in that case 1228 * kernel_page_present() always returns 'true'). 1229 */ 1230 static void safe_copy_page(void *dst, struct page *s_page) 1231 { 1232 if (kernel_page_present(s_page)) { 1233 do_copy_page(dst, page_address(s_page)); 1234 } else { 1235 kernel_map_pages(s_page, 1, 1); 1236 do_copy_page(dst, page_address(s_page)); 1237 kernel_map_pages(s_page, 1, 0); 1238 } 1239 } 1240 1241 1242 #ifdef CONFIG_HIGHMEM 1243 static inline struct page * 1244 page_is_saveable(struct zone *zone, unsigned long pfn) 1245 { 1246 return is_highmem(zone) ? 1247 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1248 } 1249 1250 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1251 { 1252 struct page *s_page, *d_page; 1253 void *src, *dst; 1254 1255 s_page = pfn_to_page(src_pfn); 1256 d_page = pfn_to_page(dst_pfn); 1257 if (PageHighMem(s_page)) { 1258 src = kmap_atomic(s_page); 1259 dst = kmap_atomic(d_page); 1260 do_copy_page(dst, src); 1261 kunmap_atomic(dst); 1262 kunmap_atomic(src); 1263 } else { 1264 if (PageHighMem(d_page)) { 1265 /* Page pointed to by src may contain some kernel 1266 * data modified by kmap_atomic() 1267 */ 1268 safe_copy_page(buffer, s_page); 1269 dst = kmap_atomic(d_page); 1270 copy_page(dst, buffer); 1271 kunmap_atomic(dst); 1272 } else { 1273 safe_copy_page(page_address(d_page), s_page); 1274 } 1275 } 1276 } 1277 #else 1278 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1279 1280 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1281 { 1282 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1283 pfn_to_page(src_pfn)); 1284 } 1285 #endif /* CONFIG_HIGHMEM */ 1286 1287 static void 1288 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) 1289 { 1290 struct zone *zone; 1291 unsigned long pfn; 1292 1293 for_each_populated_zone(zone) { 1294 unsigned long max_zone_pfn; 1295 1296 mark_free_pages(zone); 1297 max_zone_pfn = zone_end_pfn(zone); 1298 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1299 if (page_is_saveable(zone, pfn)) 1300 memory_bm_set_bit(orig_bm, pfn); 1301 } 1302 memory_bm_position_reset(orig_bm); 1303 memory_bm_position_reset(copy_bm); 1304 for(;;) { 1305 pfn = memory_bm_next_pfn(orig_bm); 1306 if (unlikely(pfn == BM_END_OF_MAP)) 1307 break; 1308 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1309 } 1310 } 1311 1312 /* Total number of image pages */ 1313 static unsigned int nr_copy_pages; 1314 /* Number of pages needed for saving the original pfns of the image pages */ 1315 static unsigned int nr_meta_pages; 1316 /* 1317 * Numbers of normal and highmem page frames allocated for hibernation image 1318 * before suspending devices. 1319 */ 1320 unsigned int alloc_normal, alloc_highmem; 1321 /* 1322 * Memory bitmap used for marking saveable pages (during hibernation) or 1323 * hibernation image pages (during restore) 1324 */ 1325 static struct memory_bitmap orig_bm; 1326 /* 1327 * Memory bitmap used during hibernation for marking allocated page frames that 1328 * will contain copies of saveable pages. During restore it is initially used 1329 * for marking hibernation image pages, but then the set bits from it are 1330 * duplicated in @orig_bm and it is released. On highmem systems it is next 1331 * used for marking "safe" highmem pages, but it has to be reinitialized for 1332 * this purpose. 1333 */ 1334 static struct memory_bitmap copy_bm; 1335 1336 /** 1337 * swsusp_free - free pages allocated for the suspend. 1338 * 1339 * Suspend pages are alocated before the atomic copy is made, so we 1340 * need to release them after the resume. 1341 */ 1342 1343 void swsusp_free(void) 1344 { 1345 unsigned long fb_pfn, fr_pfn; 1346 1347 if (!forbidden_pages_map || !free_pages_map) 1348 goto out; 1349 1350 memory_bm_position_reset(forbidden_pages_map); 1351 memory_bm_position_reset(free_pages_map); 1352 1353 loop: 1354 fr_pfn = memory_bm_next_pfn(free_pages_map); 1355 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1356 1357 /* 1358 * Find the next bit set in both bitmaps. This is guaranteed to 1359 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1360 */ 1361 do { 1362 if (fb_pfn < fr_pfn) 1363 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1364 if (fr_pfn < fb_pfn) 1365 fr_pfn = memory_bm_next_pfn(free_pages_map); 1366 } while (fb_pfn != fr_pfn); 1367 1368 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1369 struct page *page = pfn_to_page(fr_pfn); 1370 1371 memory_bm_clear_current(forbidden_pages_map); 1372 memory_bm_clear_current(free_pages_map); 1373 __free_page(page); 1374 goto loop; 1375 } 1376 1377 out: 1378 nr_copy_pages = 0; 1379 nr_meta_pages = 0; 1380 restore_pblist = NULL; 1381 buffer = NULL; 1382 alloc_normal = 0; 1383 alloc_highmem = 0; 1384 } 1385 1386 /* Helper functions used for the shrinking of memory. */ 1387 1388 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1389 1390 /** 1391 * preallocate_image_pages - Allocate a number of pages for hibernation image 1392 * @nr_pages: Number of page frames to allocate. 1393 * @mask: GFP flags to use for the allocation. 1394 * 1395 * Return value: Number of page frames actually allocated 1396 */ 1397 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1398 { 1399 unsigned long nr_alloc = 0; 1400 1401 while (nr_pages > 0) { 1402 struct page *page; 1403 1404 page = alloc_image_page(mask); 1405 if (!page) 1406 break; 1407 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1408 if (PageHighMem(page)) 1409 alloc_highmem++; 1410 else 1411 alloc_normal++; 1412 nr_pages--; 1413 nr_alloc++; 1414 } 1415 1416 return nr_alloc; 1417 } 1418 1419 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1420 unsigned long avail_normal) 1421 { 1422 unsigned long alloc; 1423 1424 if (avail_normal <= alloc_normal) 1425 return 0; 1426 1427 alloc = avail_normal - alloc_normal; 1428 if (nr_pages < alloc) 1429 alloc = nr_pages; 1430 1431 return preallocate_image_pages(alloc, GFP_IMAGE); 1432 } 1433 1434 #ifdef CONFIG_HIGHMEM 1435 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1436 { 1437 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1438 } 1439 1440 /** 1441 * __fraction - Compute (an approximation of) x * (multiplier / base) 1442 */ 1443 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1444 { 1445 x *= multiplier; 1446 do_div(x, base); 1447 return (unsigned long)x; 1448 } 1449 1450 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1451 unsigned long highmem, 1452 unsigned long total) 1453 { 1454 unsigned long alloc = __fraction(nr_pages, highmem, total); 1455 1456 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1457 } 1458 #else /* CONFIG_HIGHMEM */ 1459 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1460 { 1461 return 0; 1462 } 1463 1464 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1465 unsigned long highmem, 1466 unsigned long total) 1467 { 1468 return 0; 1469 } 1470 #endif /* CONFIG_HIGHMEM */ 1471 1472 /** 1473 * free_unnecessary_pages - Release preallocated pages not needed for the image 1474 */ 1475 static void free_unnecessary_pages(void) 1476 { 1477 unsigned long save, to_free_normal, to_free_highmem; 1478 1479 save = count_data_pages(); 1480 if (alloc_normal >= save) { 1481 to_free_normal = alloc_normal - save; 1482 save = 0; 1483 } else { 1484 to_free_normal = 0; 1485 save -= alloc_normal; 1486 } 1487 save += count_highmem_pages(); 1488 if (alloc_highmem >= save) { 1489 to_free_highmem = alloc_highmem - save; 1490 } else { 1491 to_free_highmem = 0; 1492 save -= alloc_highmem; 1493 if (to_free_normal > save) 1494 to_free_normal -= save; 1495 else 1496 to_free_normal = 0; 1497 } 1498 1499 memory_bm_position_reset(©_bm); 1500 1501 while (to_free_normal > 0 || to_free_highmem > 0) { 1502 unsigned long pfn = memory_bm_next_pfn(©_bm); 1503 struct page *page = pfn_to_page(pfn); 1504 1505 if (PageHighMem(page)) { 1506 if (!to_free_highmem) 1507 continue; 1508 to_free_highmem--; 1509 alloc_highmem--; 1510 } else { 1511 if (!to_free_normal) 1512 continue; 1513 to_free_normal--; 1514 alloc_normal--; 1515 } 1516 memory_bm_clear_bit(©_bm, pfn); 1517 swsusp_unset_page_forbidden(page); 1518 swsusp_unset_page_free(page); 1519 __free_page(page); 1520 } 1521 } 1522 1523 /** 1524 * minimum_image_size - Estimate the minimum acceptable size of an image 1525 * @saveable: Number of saveable pages in the system. 1526 * 1527 * We want to avoid attempting to free too much memory too hard, so estimate the 1528 * minimum acceptable size of a hibernation image to use as the lower limit for 1529 * preallocating memory. 1530 * 1531 * We assume that the minimum image size should be proportional to 1532 * 1533 * [number of saveable pages] - [number of pages that can be freed in theory] 1534 * 1535 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1536 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages, 1537 * minus mapped file pages. 1538 */ 1539 static unsigned long minimum_image_size(unsigned long saveable) 1540 { 1541 unsigned long size; 1542 1543 size = global_page_state(NR_SLAB_RECLAIMABLE) 1544 + global_page_state(NR_ACTIVE_ANON) 1545 + global_page_state(NR_INACTIVE_ANON) 1546 + global_page_state(NR_ACTIVE_FILE) 1547 + global_page_state(NR_INACTIVE_FILE) 1548 - global_page_state(NR_FILE_MAPPED); 1549 1550 return saveable <= size ? 0 : saveable - size; 1551 } 1552 1553 /** 1554 * hibernate_preallocate_memory - Preallocate memory for hibernation image 1555 * 1556 * To create a hibernation image it is necessary to make a copy of every page 1557 * frame in use. We also need a number of page frames to be free during 1558 * hibernation for allocations made while saving the image and for device 1559 * drivers, in case they need to allocate memory from their hibernation 1560 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1561 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through 1562 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1563 * total number of available page frames and allocate at least 1564 * 1565 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1566 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1567 * 1568 * of them, which corresponds to the maximum size of a hibernation image. 1569 * 1570 * If image_size is set below the number following from the above formula, 1571 * the preallocation of memory is continued until the total number of saveable 1572 * pages in the system is below the requested image size or the minimum 1573 * acceptable image size returned by minimum_image_size(), whichever is greater. 1574 */ 1575 int hibernate_preallocate_memory(void) 1576 { 1577 struct zone *zone; 1578 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1579 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1580 ktime_t start, stop; 1581 int error; 1582 1583 printk(KERN_INFO "PM: Preallocating image memory... "); 1584 start = ktime_get(); 1585 1586 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1587 if (error) 1588 goto err_out; 1589 1590 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1591 if (error) 1592 goto err_out; 1593 1594 alloc_normal = 0; 1595 alloc_highmem = 0; 1596 1597 /* Count the number of saveable data pages. */ 1598 save_highmem = count_highmem_pages(); 1599 saveable = count_data_pages(); 1600 1601 /* 1602 * Compute the total number of page frames we can use (count) and the 1603 * number of pages needed for image metadata (size). 1604 */ 1605 count = saveable; 1606 saveable += save_highmem; 1607 highmem = save_highmem; 1608 size = 0; 1609 for_each_populated_zone(zone) { 1610 size += snapshot_additional_pages(zone); 1611 if (is_highmem(zone)) 1612 highmem += zone_page_state(zone, NR_FREE_PAGES); 1613 else 1614 count += zone_page_state(zone, NR_FREE_PAGES); 1615 } 1616 avail_normal = count; 1617 count += highmem; 1618 count -= totalreserve_pages; 1619 1620 /* Add number of pages required for page keys (s390 only). */ 1621 size += page_key_additional_pages(saveable); 1622 1623 /* Compute the maximum number of saveable pages to leave in memory. */ 1624 max_size = (count - (size + PAGES_FOR_IO)) / 2 1625 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1626 /* Compute the desired number of image pages specified by image_size. */ 1627 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1628 if (size > max_size) 1629 size = max_size; 1630 /* 1631 * If the desired number of image pages is at least as large as the 1632 * current number of saveable pages in memory, allocate page frames for 1633 * the image and we're done. 1634 */ 1635 if (size >= saveable) { 1636 pages = preallocate_image_highmem(save_highmem); 1637 pages += preallocate_image_memory(saveable - pages, avail_normal); 1638 goto out; 1639 } 1640 1641 /* Estimate the minimum size of the image. */ 1642 pages = minimum_image_size(saveable); 1643 /* 1644 * To avoid excessive pressure on the normal zone, leave room in it to 1645 * accommodate an image of the minimum size (unless it's already too 1646 * small, in which case don't preallocate pages from it at all). 1647 */ 1648 if (avail_normal > pages) 1649 avail_normal -= pages; 1650 else 1651 avail_normal = 0; 1652 if (size < pages) 1653 size = min_t(unsigned long, pages, max_size); 1654 1655 /* 1656 * Let the memory management subsystem know that we're going to need a 1657 * large number of page frames to allocate and make it free some memory. 1658 * NOTE: If this is not done, performance will be hurt badly in some 1659 * test cases. 1660 */ 1661 shrink_all_memory(saveable - size); 1662 1663 /* 1664 * The number of saveable pages in memory was too high, so apply some 1665 * pressure to decrease it. First, make room for the largest possible 1666 * image and fail if that doesn't work. Next, try to decrease the size 1667 * of the image as much as indicated by 'size' using allocations from 1668 * highmem and non-highmem zones separately. 1669 */ 1670 pages_highmem = preallocate_image_highmem(highmem / 2); 1671 alloc = count - max_size; 1672 if (alloc > pages_highmem) 1673 alloc -= pages_highmem; 1674 else 1675 alloc = 0; 1676 pages = preallocate_image_memory(alloc, avail_normal); 1677 if (pages < alloc) { 1678 /* We have exhausted non-highmem pages, try highmem. */ 1679 alloc -= pages; 1680 pages += pages_highmem; 1681 pages_highmem = preallocate_image_highmem(alloc); 1682 if (pages_highmem < alloc) 1683 goto err_out; 1684 pages += pages_highmem; 1685 /* 1686 * size is the desired number of saveable pages to leave in 1687 * memory, so try to preallocate (all memory - size) pages. 1688 */ 1689 alloc = (count - pages) - size; 1690 pages += preallocate_image_highmem(alloc); 1691 } else { 1692 /* 1693 * There are approximately max_size saveable pages at this point 1694 * and we want to reduce this number down to size. 1695 */ 1696 alloc = max_size - size; 1697 size = preallocate_highmem_fraction(alloc, highmem, count); 1698 pages_highmem += size; 1699 alloc -= size; 1700 size = preallocate_image_memory(alloc, avail_normal); 1701 pages_highmem += preallocate_image_highmem(alloc - size); 1702 pages += pages_highmem + size; 1703 } 1704 1705 /* 1706 * We only need as many page frames for the image as there are saveable 1707 * pages in memory, but we have allocated more. Release the excessive 1708 * ones now. 1709 */ 1710 free_unnecessary_pages(); 1711 1712 out: 1713 stop = ktime_get(); 1714 printk(KERN_CONT "done (allocated %lu pages)\n", pages); 1715 swsusp_show_speed(start, stop, pages, "Allocated"); 1716 1717 return 0; 1718 1719 err_out: 1720 printk(KERN_CONT "\n"); 1721 swsusp_free(); 1722 return -ENOMEM; 1723 } 1724 1725 #ifdef CONFIG_HIGHMEM 1726 /** 1727 * count_pages_for_highmem - compute the number of non-highmem pages 1728 * that will be necessary for creating copies of highmem pages. 1729 */ 1730 1731 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1732 { 1733 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1734 1735 if (free_highmem >= nr_highmem) 1736 nr_highmem = 0; 1737 else 1738 nr_highmem -= free_highmem; 1739 1740 return nr_highmem; 1741 } 1742 #else 1743 static unsigned int 1744 count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1745 #endif /* CONFIG_HIGHMEM */ 1746 1747 /** 1748 * enough_free_mem - Make sure we have enough free memory for the 1749 * snapshot image. 1750 */ 1751 1752 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1753 { 1754 struct zone *zone; 1755 unsigned int free = alloc_normal; 1756 1757 for_each_populated_zone(zone) 1758 if (!is_highmem(zone)) 1759 free += zone_page_state(zone, NR_FREE_PAGES); 1760 1761 nr_pages += count_pages_for_highmem(nr_highmem); 1762 pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", 1763 nr_pages, PAGES_FOR_IO, free); 1764 1765 return free > nr_pages + PAGES_FOR_IO; 1766 } 1767 1768 #ifdef CONFIG_HIGHMEM 1769 /** 1770 * get_highmem_buffer - if there are some highmem pages in the suspend 1771 * image, we may need the buffer to copy them and/or load their data. 1772 */ 1773 1774 static inline int get_highmem_buffer(int safe_needed) 1775 { 1776 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); 1777 return buffer ? 0 : -ENOMEM; 1778 } 1779 1780 /** 1781 * alloc_highmem_image_pages - allocate some highmem pages for the image. 1782 * Try to allocate as many pages as needed, but if the number of free 1783 * highmem pages is lesser than that, allocate them all. 1784 */ 1785 1786 static inline unsigned int 1787 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) 1788 { 1789 unsigned int to_alloc = count_free_highmem_pages(); 1790 1791 if (to_alloc > nr_highmem) 1792 to_alloc = nr_highmem; 1793 1794 nr_highmem -= to_alloc; 1795 while (to_alloc-- > 0) { 1796 struct page *page; 1797 1798 page = alloc_image_page(__GFP_HIGHMEM); 1799 memory_bm_set_bit(bm, page_to_pfn(page)); 1800 } 1801 return nr_highmem; 1802 } 1803 #else 1804 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1805 1806 static inline unsigned int 1807 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } 1808 #endif /* CONFIG_HIGHMEM */ 1809 1810 /** 1811 * swsusp_alloc - allocate memory for the suspend image 1812 * 1813 * We first try to allocate as many highmem pages as there are 1814 * saveable highmem pages in the system. If that fails, we allocate 1815 * non-highmem pages for the copies of the remaining highmem ones. 1816 * 1817 * In this approach it is likely that the copies of highmem pages will 1818 * also be located in the high memory, because of the way in which 1819 * copy_data_pages() works. 1820 */ 1821 1822 static int 1823 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 1824 unsigned int nr_pages, unsigned int nr_highmem) 1825 { 1826 if (nr_highmem > 0) { 1827 if (get_highmem_buffer(PG_ANY)) 1828 goto err_out; 1829 if (nr_highmem > alloc_highmem) { 1830 nr_highmem -= alloc_highmem; 1831 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1832 } 1833 } 1834 if (nr_pages > alloc_normal) { 1835 nr_pages -= alloc_normal; 1836 while (nr_pages-- > 0) { 1837 struct page *page; 1838 1839 page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); 1840 if (!page) 1841 goto err_out; 1842 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1843 } 1844 } 1845 1846 return 0; 1847 1848 err_out: 1849 swsusp_free(); 1850 return -ENOMEM; 1851 } 1852 1853 asmlinkage __visible int swsusp_save(void) 1854 { 1855 unsigned int nr_pages, nr_highmem; 1856 1857 printk(KERN_INFO "PM: Creating hibernation image:\n"); 1858 1859 drain_local_pages(NULL); 1860 nr_pages = count_data_pages(); 1861 nr_highmem = count_highmem_pages(); 1862 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); 1863 1864 if (!enough_free_mem(nr_pages, nr_highmem)) { 1865 printk(KERN_ERR "PM: Not enough free memory\n"); 1866 return -ENOMEM; 1867 } 1868 1869 if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { 1870 printk(KERN_ERR "PM: Memory allocation failed\n"); 1871 return -ENOMEM; 1872 } 1873 1874 /* During allocating of suspend pagedir, new cold pages may appear. 1875 * Kill them. 1876 */ 1877 drain_local_pages(NULL); 1878 copy_data_pages(©_bm, &orig_bm); 1879 1880 /* 1881 * End of critical section. From now on, we can write to memory, 1882 * but we should not touch disk. This specially means we must _not_ 1883 * touch swap space! Except we must write out our image of course. 1884 */ 1885 1886 nr_pages += nr_highmem; 1887 nr_copy_pages = nr_pages; 1888 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 1889 1890 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", 1891 nr_pages); 1892 1893 return 0; 1894 } 1895 1896 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 1897 static int init_header_complete(struct swsusp_info *info) 1898 { 1899 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 1900 info->version_code = LINUX_VERSION_CODE; 1901 return 0; 1902 } 1903 1904 static char *check_image_kernel(struct swsusp_info *info) 1905 { 1906 if (info->version_code != LINUX_VERSION_CODE) 1907 return "kernel version"; 1908 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 1909 return "system type"; 1910 if (strcmp(info->uts.release,init_utsname()->release)) 1911 return "kernel release"; 1912 if (strcmp(info->uts.version,init_utsname()->version)) 1913 return "version"; 1914 if (strcmp(info->uts.machine,init_utsname()->machine)) 1915 return "machine"; 1916 return NULL; 1917 } 1918 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 1919 1920 unsigned long snapshot_get_image_size(void) 1921 { 1922 return nr_copy_pages + nr_meta_pages + 1; 1923 } 1924 1925 static int init_header(struct swsusp_info *info) 1926 { 1927 memset(info, 0, sizeof(struct swsusp_info)); 1928 info->num_physpages = get_num_physpages(); 1929 info->image_pages = nr_copy_pages; 1930 info->pages = snapshot_get_image_size(); 1931 info->size = info->pages; 1932 info->size <<= PAGE_SHIFT; 1933 return init_header_complete(info); 1934 } 1935 1936 /** 1937 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm 1938 * are stored in the array @buf[] (1 page at a time) 1939 */ 1940 1941 static inline void 1942 pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 1943 { 1944 int j; 1945 1946 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 1947 buf[j] = memory_bm_next_pfn(bm); 1948 if (unlikely(buf[j] == BM_END_OF_MAP)) 1949 break; 1950 /* Save page key for data page (s390 only). */ 1951 page_key_read(buf + j); 1952 } 1953 } 1954 1955 /** 1956 * snapshot_read_next - used for reading the system memory snapshot. 1957 * 1958 * On the first call to it @handle should point to a zeroed 1959 * snapshot_handle structure. The structure gets updated and a pointer 1960 * to it should be passed to this function every next time. 1961 * 1962 * On success the function returns a positive number. Then, the caller 1963 * is allowed to read up to the returned number of bytes from the memory 1964 * location computed by the data_of() macro. 1965 * 1966 * The function returns 0 to indicate the end of data stream condition, 1967 * and a negative number is returned on error. In such cases the 1968 * structure pointed to by @handle is not updated and should not be used 1969 * any more. 1970 */ 1971 1972 int snapshot_read_next(struct snapshot_handle *handle) 1973 { 1974 if (handle->cur > nr_meta_pages + nr_copy_pages) 1975 return 0; 1976 1977 if (!buffer) { 1978 /* This makes the buffer be freed by swsusp_free() */ 1979 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 1980 if (!buffer) 1981 return -ENOMEM; 1982 } 1983 if (!handle->cur) { 1984 int error; 1985 1986 error = init_header((struct swsusp_info *)buffer); 1987 if (error) 1988 return error; 1989 handle->buffer = buffer; 1990 memory_bm_position_reset(&orig_bm); 1991 memory_bm_position_reset(©_bm); 1992 } else if (handle->cur <= nr_meta_pages) { 1993 clear_page(buffer); 1994 pack_pfns(buffer, &orig_bm); 1995 } else { 1996 struct page *page; 1997 1998 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 1999 if (PageHighMem(page)) { 2000 /* Highmem pages are copied to the buffer, 2001 * because we can't return with a kmapped 2002 * highmem page (we may not be called again). 2003 */ 2004 void *kaddr; 2005 2006 kaddr = kmap_atomic(page); 2007 copy_page(buffer, kaddr); 2008 kunmap_atomic(kaddr); 2009 handle->buffer = buffer; 2010 } else { 2011 handle->buffer = page_address(page); 2012 } 2013 } 2014 handle->cur++; 2015 return PAGE_SIZE; 2016 } 2017 2018 /** 2019 * mark_unsafe_pages - mark the pages that cannot be used for storing 2020 * the image during resume, because they conflict with the pages that 2021 * had been used before suspend 2022 */ 2023 2024 static int mark_unsafe_pages(struct memory_bitmap *bm) 2025 { 2026 struct zone *zone; 2027 unsigned long pfn, max_zone_pfn; 2028 2029 /* Clear page flags */ 2030 for_each_populated_zone(zone) { 2031 max_zone_pfn = zone_end_pfn(zone); 2032 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 2033 if (pfn_valid(pfn)) 2034 swsusp_unset_page_free(pfn_to_page(pfn)); 2035 } 2036 2037 /* Mark pages that correspond to the "original" pfns as "unsafe" */ 2038 memory_bm_position_reset(bm); 2039 do { 2040 pfn = memory_bm_next_pfn(bm); 2041 if (likely(pfn != BM_END_OF_MAP)) { 2042 if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn)) 2043 swsusp_set_page_free(pfn_to_page(pfn)); 2044 else 2045 return -EFAULT; 2046 } 2047 } while (pfn != BM_END_OF_MAP); 2048 2049 allocated_unsafe_pages = 0; 2050 2051 return 0; 2052 } 2053 2054 static void 2055 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) 2056 { 2057 unsigned long pfn; 2058 2059 memory_bm_position_reset(src); 2060 pfn = memory_bm_next_pfn(src); 2061 while (pfn != BM_END_OF_MAP) { 2062 memory_bm_set_bit(dst, pfn); 2063 pfn = memory_bm_next_pfn(src); 2064 } 2065 } 2066 2067 static int check_header(struct swsusp_info *info) 2068 { 2069 char *reason; 2070 2071 reason = check_image_kernel(info); 2072 if (!reason && info->num_physpages != get_num_physpages()) 2073 reason = "memory size"; 2074 if (reason) { 2075 printk(KERN_ERR "PM: Image mismatch: %s\n", reason); 2076 return -EPERM; 2077 } 2078 return 0; 2079 } 2080 2081 /** 2082 * load header - check the image header and copy data from it 2083 */ 2084 2085 static int 2086 load_header(struct swsusp_info *info) 2087 { 2088 int error; 2089 2090 restore_pblist = NULL; 2091 error = check_header(info); 2092 if (!error) { 2093 nr_copy_pages = info->image_pages; 2094 nr_meta_pages = info->pages - info->image_pages - 1; 2095 } 2096 return error; 2097 } 2098 2099 /** 2100 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 2101 * the corresponding bit in the memory bitmap @bm 2102 */ 2103 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2104 { 2105 int j; 2106 2107 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2108 if (unlikely(buf[j] == BM_END_OF_MAP)) 2109 break; 2110 2111 /* Extract and buffer page key for data page (s390 only). */ 2112 page_key_memorize(buf + j); 2113 2114 if (memory_bm_pfn_present(bm, buf[j])) 2115 memory_bm_set_bit(bm, buf[j]); 2116 else 2117 return -EFAULT; 2118 } 2119 2120 return 0; 2121 } 2122 2123 /* List of "safe" pages that may be used to store data loaded from the suspend 2124 * image 2125 */ 2126 static struct linked_page *safe_pages_list; 2127 2128 #ifdef CONFIG_HIGHMEM 2129 /* struct highmem_pbe is used for creating the list of highmem pages that 2130 * should be restored atomically during the resume from disk, because the page 2131 * frames they have occupied before the suspend are in use. 2132 */ 2133 struct highmem_pbe { 2134 struct page *copy_page; /* data is here now */ 2135 struct page *orig_page; /* data was here before the suspend */ 2136 struct highmem_pbe *next; 2137 }; 2138 2139 /* List of highmem PBEs needed for restoring the highmem pages that were 2140 * allocated before the suspend and included in the suspend image, but have 2141 * also been allocated by the "resume" kernel, so their contents cannot be 2142 * written directly to their "original" page frames. 2143 */ 2144 static struct highmem_pbe *highmem_pblist; 2145 2146 /** 2147 * count_highmem_image_pages - compute the number of highmem pages in the 2148 * suspend image. The bits in the memory bitmap @bm that correspond to the 2149 * image pages are assumed to be set. 2150 */ 2151 2152 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2153 { 2154 unsigned long pfn; 2155 unsigned int cnt = 0; 2156 2157 memory_bm_position_reset(bm); 2158 pfn = memory_bm_next_pfn(bm); 2159 while (pfn != BM_END_OF_MAP) { 2160 if (PageHighMem(pfn_to_page(pfn))) 2161 cnt++; 2162 2163 pfn = memory_bm_next_pfn(bm); 2164 } 2165 return cnt; 2166 } 2167 2168 /** 2169 * prepare_highmem_image - try to allocate as many highmem pages as 2170 * there are highmem image pages (@nr_highmem_p points to the variable 2171 * containing the number of highmem image pages). The pages that are 2172 * "safe" (ie. will not be overwritten when the suspend image is 2173 * restored) have the corresponding bits set in @bm (it must be 2174 * unitialized). 2175 * 2176 * NOTE: This function should not be called if there are no highmem 2177 * image pages. 2178 */ 2179 2180 static unsigned int safe_highmem_pages; 2181 2182 static struct memory_bitmap *safe_highmem_bm; 2183 2184 static int 2185 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2186 { 2187 unsigned int to_alloc; 2188 2189 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2190 return -ENOMEM; 2191 2192 if (get_highmem_buffer(PG_SAFE)) 2193 return -ENOMEM; 2194 2195 to_alloc = count_free_highmem_pages(); 2196 if (to_alloc > *nr_highmem_p) 2197 to_alloc = *nr_highmem_p; 2198 else 2199 *nr_highmem_p = to_alloc; 2200 2201 safe_highmem_pages = 0; 2202 while (to_alloc-- > 0) { 2203 struct page *page; 2204 2205 page = alloc_page(__GFP_HIGHMEM); 2206 if (!swsusp_page_is_free(page)) { 2207 /* The page is "safe", set its bit the bitmap */ 2208 memory_bm_set_bit(bm, page_to_pfn(page)); 2209 safe_highmem_pages++; 2210 } 2211 /* Mark the page as allocated */ 2212 swsusp_set_page_forbidden(page); 2213 swsusp_set_page_free(page); 2214 } 2215 memory_bm_position_reset(bm); 2216 safe_highmem_bm = bm; 2217 return 0; 2218 } 2219 2220 /** 2221 * get_highmem_page_buffer - for given highmem image page find the buffer 2222 * that suspend_write_next() should set for its caller to write to. 2223 * 2224 * If the page is to be saved to its "original" page frame or a copy of 2225 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2226 * the copy of the page is to be made in normal memory, so the address of 2227 * the copy is returned. 2228 * 2229 * If @buffer is returned, the caller of suspend_write_next() will write 2230 * the page's contents to @buffer, so they will have to be copied to the 2231 * right location on the next call to suspend_write_next() and it is done 2232 * with the help of copy_last_highmem_page(). For this purpose, if 2233 * @buffer is returned, @last_highmem page is set to the page to which 2234 * the data will have to be copied from @buffer. 2235 */ 2236 2237 static struct page *last_highmem_page; 2238 2239 static void * 2240 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2241 { 2242 struct highmem_pbe *pbe; 2243 void *kaddr; 2244 2245 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2246 /* We have allocated the "original" page frame and we can 2247 * use it directly to store the loaded page. 2248 */ 2249 last_highmem_page = page; 2250 return buffer; 2251 } 2252 /* The "original" page frame has not been allocated and we have to 2253 * use a "safe" page frame to store the loaded page. 2254 */ 2255 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2256 if (!pbe) { 2257 swsusp_free(); 2258 return ERR_PTR(-ENOMEM); 2259 } 2260 pbe->orig_page = page; 2261 if (safe_highmem_pages > 0) { 2262 struct page *tmp; 2263 2264 /* Copy of the page will be stored in high memory */ 2265 kaddr = buffer; 2266 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2267 safe_highmem_pages--; 2268 last_highmem_page = tmp; 2269 pbe->copy_page = tmp; 2270 } else { 2271 /* Copy of the page will be stored in normal memory */ 2272 kaddr = safe_pages_list; 2273 safe_pages_list = safe_pages_list->next; 2274 pbe->copy_page = virt_to_page(kaddr); 2275 } 2276 pbe->next = highmem_pblist; 2277 highmem_pblist = pbe; 2278 return kaddr; 2279 } 2280 2281 /** 2282 * copy_last_highmem_page - copy the contents of a highmem image from 2283 * @buffer, where the caller of snapshot_write_next() has place them, 2284 * to the right location represented by @last_highmem_page . 2285 */ 2286 2287 static void copy_last_highmem_page(void) 2288 { 2289 if (last_highmem_page) { 2290 void *dst; 2291 2292 dst = kmap_atomic(last_highmem_page); 2293 copy_page(dst, buffer); 2294 kunmap_atomic(dst); 2295 last_highmem_page = NULL; 2296 } 2297 } 2298 2299 static inline int last_highmem_page_copied(void) 2300 { 2301 return !last_highmem_page; 2302 } 2303 2304 static inline void free_highmem_data(void) 2305 { 2306 if (safe_highmem_bm) 2307 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2308 2309 if (buffer) 2310 free_image_page(buffer, PG_UNSAFE_CLEAR); 2311 } 2312 #else 2313 static inline int get_safe_write_buffer(void) { return 0; } 2314 2315 static unsigned int 2316 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2317 2318 static inline int 2319 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2320 { 2321 return 0; 2322 } 2323 2324 static inline void * 2325 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2326 { 2327 return ERR_PTR(-EINVAL); 2328 } 2329 2330 static inline void copy_last_highmem_page(void) {} 2331 static inline int last_highmem_page_copied(void) { return 1; } 2332 static inline void free_highmem_data(void) {} 2333 #endif /* CONFIG_HIGHMEM */ 2334 2335 /** 2336 * prepare_image - use the memory bitmap @bm to mark the pages that will 2337 * be overwritten in the process of restoring the system memory state 2338 * from the suspend image ("unsafe" pages) and allocate memory for the 2339 * image. 2340 * 2341 * The idea is to allocate a new memory bitmap first and then allocate 2342 * as many pages as needed for the image data, but not to assign these 2343 * pages to specific tasks initially. Instead, we just mark them as 2344 * allocated and create a lists of "safe" pages that will be used 2345 * later. On systems with high memory a list of "safe" highmem pages is 2346 * also created. 2347 */ 2348 2349 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2350 2351 static int 2352 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2353 { 2354 unsigned int nr_pages, nr_highmem; 2355 struct linked_page *sp_list, *lp; 2356 int error; 2357 2358 /* If there is no highmem, the buffer will not be necessary */ 2359 free_image_page(buffer, PG_UNSAFE_CLEAR); 2360 buffer = NULL; 2361 2362 nr_highmem = count_highmem_image_pages(bm); 2363 error = mark_unsafe_pages(bm); 2364 if (error) 2365 goto Free; 2366 2367 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2368 if (error) 2369 goto Free; 2370 2371 duplicate_memory_bitmap(new_bm, bm); 2372 memory_bm_free(bm, PG_UNSAFE_KEEP); 2373 if (nr_highmem > 0) { 2374 error = prepare_highmem_image(bm, &nr_highmem); 2375 if (error) 2376 goto Free; 2377 } 2378 /* Reserve some safe pages for potential later use. 2379 * 2380 * NOTE: This way we make sure there will be enough safe pages for the 2381 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2382 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2383 */ 2384 sp_list = NULL; 2385 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ 2386 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2387 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2388 while (nr_pages > 0) { 2389 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2390 if (!lp) { 2391 error = -ENOMEM; 2392 goto Free; 2393 } 2394 lp->next = sp_list; 2395 sp_list = lp; 2396 nr_pages--; 2397 } 2398 /* Preallocate memory for the image */ 2399 safe_pages_list = NULL; 2400 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2401 while (nr_pages > 0) { 2402 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2403 if (!lp) { 2404 error = -ENOMEM; 2405 goto Free; 2406 } 2407 if (!swsusp_page_is_free(virt_to_page(lp))) { 2408 /* The page is "safe", add it to the list */ 2409 lp->next = safe_pages_list; 2410 safe_pages_list = lp; 2411 } 2412 /* Mark the page as allocated */ 2413 swsusp_set_page_forbidden(virt_to_page(lp)); 2414 swsusp_set_page_free(virt_to_page(lp)); 2415 nr_pages--; 2416 } 2417 /* Free the reserved safe pages so that chain_alloc() can use them */ 2418 while (sp_list) { 2419 lp = sp_list->next; 2420 free_image_page(sp_list, PG_UNSAFE_CLEAR); 2421 sp_list = lp; 2422 } 2423 return 0; 2424 2425 Free: 2426 swsusp_free(); 2427 return error; 2428 } 2429 2430 /** 2431 * get_buffer - compute the address that snapshot_write_next() should 2432 * set for its caller to write to. 2433 */ 2434 2435 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2436 { 2437 struct pbe *pbe; 2438 struct page *page; 2439 unsigned long pfn = memory_bm_next_pfn(bm); 2440 2441 if (pfn == BM_END_OF_MAP) 2442 return ERR_PTR(-EFAULT); 2443 2444 page = pfn_to_page(pfn); 2445 if (PageHighMem(page)) 2446 return get_highmem_page_buffer(page, ca); 2447 2448 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2449 /* We have allocated the "original" page frame and we can 2450 * use it directly to store the loaded page. 2451 */ 2452 return page_address(page); 2453 2454 /* The "original" page frame has not been allocated and we have to 2455 * use a "safe" page frame to store the loaded page. 2456 */ 2457 pbe = chain_alloc(ca, sizeof(struct pbe)); 2458 if (!pbe) { 2459 swsusp_free(); 2460 return ERR_PTR(-ENOMEM); 2461 } 2462 pbe->orig_address = page_address(page); 2463 pbe->address = safe_pages_list; 2464 safe_pages_list = safe_pages_list->next; 2465 pbe->next = restore_pblist; 2466 restore_pblist = pbe; 2467 return pbe->address; 2468 } 2469 2470 /** 2471 * snapshot_write_next - used for writing the system memory snapshot. 2472 * 2473 * On the first call to it @handle should point to a zeroed 2474 * snapshot_handle structure. The structure gets updated and a pointer 2475 * to it should be passed to this function every next time. 2476 * 2477 * On success the function returns a positive number. Then, the caller 2478 * is allowed to write up to the returned number of bytes to the memory 2479 * location computed by the data_of() macro. 2480 * 2481 * The function returns 0 to indicate the "end of file" condition, 2482 * and a negative number is returned on error. In such cases the 2483 * structure pointed to by @handle is not updated and should not be used 2484 * any more. 2485 */ 2486 2487 int snapshot_write_next(struct snapshot_handle *handle) 2488 { 2489 static struct chain_allocator ca; 2490 int error = 0; 2491 2492 /* Check if we have already loaded the entire image */ 2493 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2494 return 0; 2495 2496 handle->sync_read = 1; 2497 2498 if (!handle->cur) { 2499 if (!buffer) 2500 /* This makes the buffer be freed by swsusp_free() */ 2501 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2502 2503 if (!buffer) 2504 return -ENOMEM; 2505 2506 handle->buffer = buffer; 2507 } else if (handle->cur == 1) { 2508 error = load_header(buffer); 2509 if (error) 2510 return error; 2511 2512 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2513 if (error) 2514 return error; 2515 2516 /* Allocate buffer for page keys. */ 2517 error = page_key_alloc(nr_copy_pages); 2518 if (error) 2519 return error; 2520 2521 } else if (handle->cur <= nr_meta_pages + 1) { 2522 error = unpack_orig_pfns(buffer, ©_bm); 2523 if (error) 2524 return error; 2525 2526 if (handle->cur == nr_meta_pages + 1) { 2527 error = prepare_image(&orig_bm, ©_bm); 2528 if (error) 2529 return error; 2530 2531 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2532 memory_bm_position_reset(&orig_bm); 2533 restore_pblist = NULL; 2534 handle->buffer = get_buffer(&orig_bm, &ca); 2535 handle->sync_read = 0; 2536 if (IS_ERR(handle->buffer)) 2537 return PTR_ERR(handle->buffer); 2538 } 2539 } else { 2540 copy_last_highmem_page(); 2541 /* Restore page key for data page (s390 only). */ 2542 page_key_write(handle->buffer); 2543 handle->buffer = get_buffer(&orig_bm, &ca); 2544 if (IS_ERR(handle->buffer)) 2545 return PTR_ERR(handle->buffer); 2546 if (handle->buffer != buffer) 2547 handle->sync_read = 0; 2548 } 2549 handle->cur++; 2550 return PAGE_SIZE; 2551 } 2552 2553 /** 2554 * snapshot_write_finalize - must be called after the last call to 2555 * snapshot_write_next() in case the last page in the image happens 2556 * to be a highmem page and its contents should be stored in the 2557 * highmem. Additionally, it releases the memory that will not be 2558 * used any more. 2559 */ 2560 2561 void snapshot_write_finalize(struct snapshot_handle *handle) 2562 { 2563 copy_last_highmem_page(); 2564 /* Restore page key for data page (s390 only). */ 2565 page_key_write(handle->buffer); 2566 page_key_free(); 2567 /* Free only if we have loaded the image entirely */ 2568 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2569 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 2570 free_highmem_data(); 2571 } 2572 } 2573 2574 int snapshot_image_loaded(struct snapshot_handle *handle) 2575 { 2576 return !(!nr_copy_pages || !last_highmem_page_copied() || 2577 handle->cur <= nr_meta_pages + nr_copy_pages); 2578 } 2579 2580 #ifdef CONFIG_HIGHMEM 2581 /* Assumes that @buf is ready and points to a "safe" page */ 2582 static inline void 2583 swap_two_pages_data(struct page *p1, struct page *p2, void *buf) 2584 { 2585 void *kaddr1, *kaddr2; 2586 2587 kaddr1 = kmap_atomic(p1); 2588 kaddr2 = kmap_atomic(p2); 2589 copy_page(buf, kaddr1); 2590 copy_page(kaddr1, kaddr2); 2591 copy_page(kaddr2, buf); 2592 kunmap_atomic(kaddr2); 2593 kunmap_atomic(kaddr1); 2594 } 2595 2596 /** 2597 * restore_highmem - for each highmem page that was allocated before 2598 * the suspend and included in the suspend image, and also has been 2599 * allocated by the "resume" kernel swap its current (ie. "before 2600 * resume") contents with the previous (ie. "before suspend") one. 2601 * 2602 * If the resume eventually fails, we can call this function once 2603 * again and restore the "before resume" highmem state. 2604 */ 2605 2606 int restore_highmem(void) 2607 { 2608 struct highmem_pbe *pbe = highmem_pblist; 2609 void *buf; 2610 2611 if (!pbe) 2612 return 0; 2613 2614 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2615 if (!buf) 2616 return -ENOMEM; 2617 2618 while (pbe) { 2619 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2620 pbe = pbe->next; 2621 } 2622 free_image_page(buf, PG_UNSAFE_CLEAR); 2623 return 0; 2624 } 2625 #endif /* CONFIG_HIGHMEM */ 2626