1 /* 2 * linux/kernel/power/snapshot.c 3 * 4 * This file provides system snapshot/restore functionality for swsusp. 5 * 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * 9 * This file is released under the GPLv2. 10 * 11 */ 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/bootmem.h> 25 #include <linux/syscalls.h> 26 #include <linux/console.h> 27 #include <linux/highmem.h> 28 #include <linux/list.h> 29 #include <linux/slab.h> 30 #include <linux/compiler.h> 31 #include <linux/ktime.h> 32 33 #include <asm/uaccess.h> 34 #include <asm/mmu_context.h> 35 #include <asm/pgtable.h> 36 #include <asm/tlbflush.h> 37 #include <asm/io.h> 38 39 #include "power.h" 40 41 static int swsusp_page_is_free(struct page *); 42 static void swsusp_set_page_forbidden(struct page *); 43 static void swsusp_unset_page_forbidden(struct page *); 44 45 /* 46 * Number of bytes to reserve for memory allocations made by device drivers 47 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 48 * cause image creation to fail (tunable via /sys/power/reserved_size). 49 */ 50 unsigned long reserved_size; 51 52 void __init hibernate_reserved_size_init(void) 53 { 54 reserved_size = SPARE_PAGES * PAGE_SIZE; 55 } 56 57 /* 58 * Preferred image size in bytes (tunable via /sys/power/image_size). 59 * When it is set to N, swsusp will do its best to ensure the image 60 * size will not exceed N bytes, but if that is impossible, it will 61 * try to create the smallest image possible. 62 */ 63 unsigned long image_size; 64 65 void __init hibernate_image_size_init(void) 66 { 67 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; 68 } 69 70 /* List of PBEs needed for restoring the pages that were allocated before 71 * the suspend and included in the suspend image, but have also been 72 * allocated by the "resume" kernel, so their contents cannot be written 73 * directly to their "original" page frames. 74 */ 75 struct pbe *restore_pblist; 76 77 /* Pointer to an auxiliary buffer (1 page) */ 78 static void *buffer; 79 80 /** 81 * @safe_needed - on resume, for storing the PBE list and the image, 82 * we can only use memory pages that do not conflict with the pages 83 * used before suspend. The unsafe pages have PageNosaveFree set 84 * and we count them using unsafe_pages. 85 * 86 * Each allocated image page is marked as PageNosave and PageNosaveFree 87 * so that swsusp_free() can release it. 88 */ 89 90 #define PG_ANY 0 91 #define PG_SAFE 1 92 #define PG_UNSAFE_CLEAR 1 93 #define PG_UNSAFE_KEEP 0 94 95 static unsigned int allocated_unsafe_pages; 96 97 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 98 { 99 void *res; 100 101 res = (void *)get_zeroed_page(gfp_mask); 102 if (safe_needed) 103 while (res && swsusp_page_is_free(virt_to_page(res))) { 104 /* The page is unsafe, mark it for swsusp_free() */ 105 swsusp_set_page_forbidden(virt_to_page(res)); 106 allocated_unsafe_pages++; 107 res = (void *)get_zeroed_page(gfp_mask); 108 } 109 if (res) { 110 swsusp_set_page_forbidden(virt_to_page(res)); 111 swsusp_set_page_free(virt_to_page(res)); 112 } 113 return res; 114 } 115 116 unsigned long get_safe_page(gfp_t gfp_mask) 117 { 118 return (unsigned long)get_image_page(gfp_mask, PG_SAFE); 119 } 120 121 static struct page *alloc_image_page(gfp_t gfp_mask) 122 { 123 struct page *page; 124 125 page = alloc_page(gfp_mask); 126 if (page) { 127 swsusp_set_page_forbidden(page); 128 swsusp_set_page_free(page); 129 } 130 return page; 131 } 132 133 /** 134 * free_image_page - free page represented by @addr, allocated with 135 * get_image_page (page flags set by it must be cleared) 136 */ 137 138 static inline void free_image_page(void *addr, int clear_nosave_free) 139 { 140 struct page *page; 141 142 BUG_ON(!virt_addr_valid(addr)); 143 144 page = virt_to_page(addr); 145 146 swsusp_unset_page_forbidden(page); 147 if (clear_nosave_free) 148 swsusp_unset_page_free(page); 149 150 __free_page(page); 151 } 152 153 /* struct linked_page is used to build chains of pages */ 154 155 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 156 157 struct linked_page { 158 struct linked_page *next; 159 char data[LINKED_PAGE_DATA_SIZE]; 160 } __packed; 161 162 static inline void 163 free_list_of_pages(struct linked_page *list, int clear_page_nosave) 164 { 165 while (list) { 166 struct linked_page *lp = list->next; 167 168 free_image_page(list, clear_page_nosave); 169 list = lp; 170 } 171 } 172 173 /** 174 * struct chain_allocator is used for allocating small objects out of 175 * a linked list of pages called 'the chain'. 176 * 177 * The chain grows each time when there is no room for a new object in 178 * the current page. The allocated objects cannot be freed individually. 179 * It is only possible to free them all at once, by freeing the entire 180 * chain. 181 * 182 * NOTE: The chain allocator may be inefficient if the allocated objects 183 * are not much smaller than PAGE_SIZE. 184 */ 185 186 struct chain_allocator { 187 struct linked_page *chain; /* the chain */ 188 unsigned int used_space; /* total size of objects allocated out 189 * of the current page 190 */ 191 gfp_t gfp_mask; /* mask for allocating pages */ 192 int safe_needed; /* if set, only "safe" pages are allocated */ 193 }; 194 195 static void 196 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) 197 { 198 ca->chain = NULL; 199 ca->used_space = LINKED_PAGE_DATA_SIZE; 200 ca->gfp_mask = gfp_mask; 201 ca->safe_needed = safe_needed; 202 } 203 204 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 205 { 206 void *ret; 207 208 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 209 struct linked_page *lp; 210 211 lp = get_image_page(ca->gfp_mask, ca->safe_needed); 212 if (!lp) 213 return NULL; 214 215 lp->next = ca->chain; 216 ca->chain = lp; 217 ca->used_space = 0; 218 } 219 ret = ca->chain->data + ca->used_space; 220 ca->used_space += size; 221 return ret; 222 } 223 224 /** 225 * Data types related to memory bitmaps. 226 * 227 * Memory bitmap is a structure consiting of many linked lists of 228 * objects. The main list's elements are of type struct zone_bitmap 229 * and each of them corresonds to one zone. For each zone bitmap 230 * object there is a list of objects of type struct bm_block that 231 * represent each blocks of bitmap in which information is stored. 232 * 233 * struct memory_bitmap contains a pointer to the main list of zone 234 * bitmap objects, a struct bm_position used for browsing the bitmap, 235 * and a pointer to the list of pages used for allocating all of the 236 * zone bitmap objects and bitmap block objects. 237 * 238 * NOTE: It has to be possible to lay out the bitmap in memory 239 * using only allocations of order 0. Additionally, the bitmap is 240 * designed to work with arbitrary number of zones (this is over the 241 * top for now, but let's avoid making unnecessary assumptions ;-). 242 * 243 * struct zone_bitmap contains a pointer to a list of bitmap block 244 * objects and a pointer to the bitmap block object that has been 245 * most recently used for setting bits. Additionally, it contains the 246 * pfns that correspond to the start and end of the represented zone. 247 * 248 * struct bm_block contains a pointer to the memory page in which 249 * information is stored (in the form of a block of bitmap) 250 * It also contains the pfns that correspond to the start and end of 251 * the represented memory area. 252 * 253 * The memory bitmap is organized as a radix tree to guarantee fast random 254 * access to the bits. There is one radix tree for each zone (as returned 255 * from create_mem_extents). 256 * 257 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 258 * two linked lists for the nodes of the tree, one for the inner nodes and 259 * one for the leave nodes. The linked leave nodes are used for fast linear 260 * access of the memory bitmap. 261 * 262 * The struct rtree_node represents one node of the radix tree. 263 */ 264 265 #define BM_END_OF_MAP (~0UL) 266 267 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 268 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 269 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 270 271 /* 272 * struct rtree_node is a wrapper struct to link the nodes 273 * of the rtree together for easy linear iteration over 274 * bits and easy freeing 275 */ 276 struct rtree_node { 277 struct list_head list; 278 unsigned long *data; 279 }; 280 281 /* 282 * struct mem_zone_bm_rtree represents a bitmap used for one 283 * populated memory zone. 284 */ 285 struct mem_zone_bm_rtree { 286 struct list_head list; /* Link Zones together */ 287 struct list_head nodes; /* Radix Tree inner nodes */ 288 struct list_head leaves; /* Radix Tree leaves */ 289 unsigned long start_pfn; /* Zone start page frame */ 290 unsigned long end_pfn; /* Zone end page frame + 1 */ 291 struct rtree_node *rtree; /* Radix Tree Root */ 292 int levels; /* Number of Radix Tree Levels */ 293 unsigned int blocks; /* Number of Bitmap Blocks */ 294 }; 295 296 /* strcut bm_position is used for browsing memory bitmaps */ 297 298 struct bm_position { 299 struct mem_zone_bm_rtree *zone; 300 struct rtree_node *node; 301 unsigned long node_pfn; 302 int node_bit; 303 }; 304 305 struct memory_bitmap { 306 struct list_head zones; 307 struct linked_page *p_list; /* list of pages used to store zone 308 * bitmap objects and bitmap block 309 * objects 310 */ 311 struct bm_position cur; /* most recently used bit position */ 312 }; 313 314 /* Functions that operate on memory bitmaps */ 315 316 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 317 #if BITS_PER_LONG == 32 318 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 319 #else 320 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 321 #endif 322 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 323 324 /* 325 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 326 * 327 * This function is used to allocate inner nodes as well as the 328 * leave nodes of the radix tree. It also adds the node to the 329 * corresponding linked list passed in by the *list parameter. 330 */ 331 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 332 struct chain_allocator *ca, 333 struct list_head *list) 334 { 335 struct rtree_node *node; 336 337 node = chain_alloc(ca, sizeof(struct rtree_node)); 338 if (!node) 339 return NULL; 340 341 node->data = get_image_page(gfp_mask, safe_needed); 342 if (!node->data) 343 return NULL; 344 345 list_add_tail(&node->list, list); 346 347 return node; 348 } 349 350 /* 351 * add_rtree_block - Add a new leave node to the radix tree 352 * 353 * The leave nodes need to be allocated in order to keep the leaves 354 * linked list in order. This is guaranteed by the zone->blocks 355 * counter. 356 */ 357 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 358 int safe_needed, struct chain_allocator *ca) 359 { 360 struct rtree_node *node, *block, **dst; 361 unsigned int levels_needed, block_nr; 362 int i; 363 364 block_nr = zone->blocks; 365 levels_needed = 0; 366 367 /* How many levels do we need for this block nr? */ 368 while (block_nr) { 369 levels_needed += 1; 370 block_nr >>= BM_RTREE_LEVEL_SHIFT; 371 } 372 373 /* Make sure the rtree has enough levels */ 374 for (i = zone->levels; i < levels_needed; i++) { 375 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 376 &zone->nodes); 377 if (!node) 378 return -ENOMEM; 379 380 node->data[0] = (unsigned long)zone->rtree; 381 zone->rtree = node; 382 zone->levels += 1; 383 } 384 385 /* Allocate new block */ 386 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 387 if (!block) 388 return -ENOMEM; 389 390 /* Now walk the rtree to insert the block */ 391 node = zone->rtree; 392 dst = &zone->rtree; 393 block_nr = zone->blocks; 394 for (i = zone->levels; i > 0; i--) { 395 int index; 396 397 if (!node) { 398 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 399 &zone->nodes); 400 if (!node) 401 return -ENOMEM; 402 *dst = node; 403 } 404 405 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 406 index &= BM_RTREE_LEVEL_MASK; 407 dst = (struct rtree_node **)&((*dst)->data[index]); 408 node = *dst; 409 } 410 411 zone->blocks += 1; 412 *dst = block; 413 414 return 0; 415 } 416 417 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 418 int clear_nosave_free); 419 420 /* 421 * create_zone_bm_rtree - create a radix tree for one zone 422 * 423 * Allocated the mem_zone_bm_rtree structure and initializes it. 424 * This function also allocated and builds the radix tree for the 425 * zone. 426 */ 427 static struct mem_zone_bm_rtree * 428 create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, 429 struct chain_allocator *ca, 430 unsigned long start, unsigned long end) 431 { 432 struct mem_zone_bm_rtree *zone; 433 unsigned int i, nr_blocks; 434 unsigned long pages; 435 436 pages = end - start; 437 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 438 if (!zone) 439 return NULL; 440 441 INIT_LIST_HEAD(&zone->nodes); 442 INIT_LIST_HEAD(&zone->leaves); 443 zone->start_pfn = start; 444 zone->end_pfn = end; 445 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 446 447 for (i = 0; i < nr_blocks; i++) { 448 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 449 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 450 return NULL; 451 } 452 } 453 454 return zone; 455 } 456 457 /* 458 * free_zone_bm_rtree - Free the memory of the radix tree 459 * 460 * Free all node pages of the radix tree. The mem_zone_bm_rtree 461 * structure itself is not freed here nor are the rtree_node 462 * structs. 463 */ 464 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 465 int clear_nosave_free) 466 { 467 struct rtree_node *node; 468 469 list_for_each_entry(node, &zone->nodes, list) 470 free_image_page(node->data, clear_nosave_free); 471 472 list_for_each_entry(node, &zone->leaves, list) 473 free_image_page(node->data, clear_nosave_free); 474 } 475 476 static void memory_bm_position_reset(struct memory_bitmap *bm) 477 { 478 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 479 list); 480 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 481 struct rtree_node, list); 482 bm->cur.node_pfn = 0; 483 bm->cur.node_bit = 0; 484 } 485 486 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 487 488 struct mem_extent { 489 struct list_head hook; 490 unsigned long start; 491 unsigned long end; 492 }; 493 494 /** 495 * free_mem_extents - free a list of memory extents 496 * @list - list of extents to empty 497 */ 498 static void free_mem_extents(struct list_head *list) 499 { 500 struct mem_extent *ext, *aux; 501 502 list_for_each_entry_safe(ext, aux, list, hook) { 503 list_del(&ext->hook); 504 kfree(ext); 505 } 506 } 507 508 /** 509 * create_mem_extents - create a list of memory extents representing 510 * contiguous ranges of PFNs 511 * @list - list to put the extents into 512 * @gfp_mask - mask to use for memory allocations 513 */ 514 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 515 { 516 struct zone *zone; 517 518 INIT_LIST_HEAD(list); 519 520 for_each_populated_zone(zone) { 521 unsigned long zone_start, zone_end; 522 struct mem_extent *ext, *cur, *aux; 523 524 zone_start = zone->zone_start_pfn; 525 zone_end = zone_end_pfn(zone); 526 527 list_for_each_entry(ext, list, hook) 528 if (zone_start <= ext->end) 529 break; 530 531 if (&ext->hook == list || zone_end < ext->start) { 532 /* New extent is necessary */ 533 struct mem_extent *new_ext; 534 535 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 536 if (!new_ext) { 537 free_mem_extents(list); 538 return -ENOMEM; 539 } 540 new_ext->start = zone_start; 541 new_ext->end = zone_end; 542 list_add_tail(&new_ext->hook, &ext->hook); 543 continue; 544 } 545 546 /* Merge this zone's range of PFNs with the existing one */ 547 if (zone_start < ext->start) 548 ext->start = zone_start; 549 if (zone_end > ext->end) 550 ext->end = zone_end; 551 552 /* More merging may be possible */ 553 cur = ext; 554 list_for_each_entry_safe_continue(cur, aux, list, hook) { 555 if (zone_end < cur->start) 556 break; 557 if (zone_end < cur->end) 558 ext->end = cur->end; 559 list_del(&cur->hook); 560 kfree(cur); 561 } 562 } 563 564 return 0; 565 } 566 567 /** 568 * memory_bm_create - allocate memory for a memory bitmap 569 */ 570 static int 571 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 572 { 573 struct chain_allocator ca; 574 struct list_head mem_extents; 575 struct mem_extent *ext; 576 int error; 577 578 chain_init(&ca, gfp_mask, safe_needed); 579 INIT_LIST_HEAD(&bm->zones); 580 581 error = create_mem_extents(&mem_extents, gfp_mask); 582 if (error) 583 return error; 584 585 list_for_each_entry(ext, &mem_extents, hook) { 586 struct mem_zone_bm_rtree *zone; 587 588 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 589 ext->start, ext->end); 590 if (!zone) { 591 error = -ENOMEM; 592 goto Error; 593 } 594 list_add_tail(&zone->list, &bm->zones); 595 } 596 597 bm->p_list = ca.chain; 598 memory_bm_position_reset(bm); 599 Exit: 600 free_mem_extents(&mem_extents); 601 return error; 602 603 Error: 604 bm->p_list = ca.chain; 605 memory_bm_free(bm, PG_UNSAFE_CLEAR); 606 goto Exit; 607 } 608 609 /** 610 * memory_bm_free - free memory occupied by the memory bitmap @bm 611 */ 612 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 613 { 614 struct mem_zone_bm_rtree *zone; 615 616 list_for_each_entry(zone, &bm->zones, list) 617 free_zone_bm_rtree(zone, clear_nosave_free); 618 619 free_list_of_pages(bm->p_list, clear_nosave_free); 620 621 INIT_LIST_HEAD(&bm->zones); 622 } 623 624 /** 625 * memory_bm_find_bit - Find the bit for pfn in the memory 626 * bitmap 627 * 628 * Find the bit in the bitmap @bm that corresponds to given pfn. 629 * The cur.zone, cur.block and cur.node_pfn member of @bm are 630 * updated. 631 * It walks the radix tree to find the page which contains the bit for 632 * pfn and returns the bit position in **addr and *bit_nr. 633 */ 634 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 635 void **addr, unsigned int *bit_nr) 636 { 637 struct mem_zone_bm_rtree *curr, *zone; 638 struct rtree_node *node; 639 int i, block_nr; 640 641 zone = bm->cur.zone; 642 643 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 644 goto zone_found; 645 646 zone = NULL; 647 648 /* Find the right zone */ 649 list_for_each_entry(curr, &bm->zones, list) { 650 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 651 zone = curr; 652 break; 653 } 654 } 655 656 if (!zone) 657 return -EFAULT; 658 659 zone_found: 660 /* 661 * We have a zone. Now walk the radix tree to find the leave 662 * node for our pfn. 663 */ 664 665 node = bm->cur.node; 666 if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 667 goto node_found; 668 669 node = zone->rtree; 670 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 671 672 for (i = zone->levels; i > 0; i--) { 673 int index; 674 675 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 676 index &= BM_RTREE_LEVEL_MASK; 677 BUG_ON(node->data[index] == 0); 678 node = (struct rtree_node *)node->data[index]; 679 } 680 681 node_found: 682 /* Update last position */ 683 bm->cur.zone = zone; 684 bm->cur.node = node; 685 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 686 687 /* Set return values */ 688 *addr = node->data; 689 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 690 691 return 0; 692 } 693 694 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 695 { 696 void *addr; 697 unsigned int bit; 698 int error; 699 700 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 701 BUG_ON(error); 702 set_bit(bit, addr); 703 } 704 705 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 706 { 707 void *addr; 708 unsigned int bit; 709 int error; 710 711 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 712 if (!error) 713 set_bit(bit, addr); 714 715 return error; 716 } 717 718 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 719 { 720 void *addr; 721 unsigned int bit; 722 int error; 723 724 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 725 BUG_ON(error); 726 clear_bit(bit, addr); 727 } 728 729 static void memory_bm_clear_current(struct memory_bitmap *bm) 730 { 731 int bit; 732 733 bit = max(bm->cur.node_bit - 1, 0); 734 clear_bit(bit, bm->cur.node->data); 735 } 736 737 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 738 { 739 void *addr; 740 unsigned int bit; 741 int error; 742 743 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 744 BUG_ON(error); 745 return test_bit(bit, addr); 746 } 747 748 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 749 { 750 void *addr; 751 unsigned int bit; 752 753 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 754 } 755 756 /* 757 * rtree_next_node - Jumps to the next leave node 758 * 759 * Sets the position to the beginning of the next node in the 760 * memory bitmap. This is either the next node in the current 761 * zone's radix tree or the first node in the radix tree of the 762 * next zone. 763 * 764 * Returns true if there is a next node, false otherwise. 765 */ 766 static bool rtree_next_node(struct memory_bitmap *bm) 767 { 768 bm->cur.node = list_entry(bm->cur.node->list.next, 769 struct rtree_node, list); 770 if (&bm->cur.node->list != &bm->cur.zone->leaves) { 771 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 772 bm->cur.node_bit = 0; 773 touch_softlockup_watchdog(); 774 return true; 775 } 776 777 /* No more nodes, goto next zone */ 778 bm->cur.zone = list_entry(bm->cur.zone->list.next, 779 struct mem_zone_bm_rtree, list); 780 if (&bm->cur.zone->list != &bm->zones) { 781 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 782 struct rtree_node, list); 783 bm->cur.node_pfn = 0; 784 bm->cur.node_bit = 0; 785 return true; 786 } 787 788 /* No more zones */ 789 return false; 790 } 791 792 /** 793 * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm 794 * 795 * Starting from the last returned position this function searches 796 * for the next set bit in the memory bitmap and returns its 797 * number. If no more bit is set BM_END_OF_MAP is returned. 798 * 799 * It is required to run memory_bm_position_reset() before the 800 * first call to this function. 801 */ 802 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 803 { 804 unsigned long bits, pfn, pages; 805 int bit; 806 807 do { 808 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 809 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 810 bit = find_next_bit(bm->cur.node->data, bits, 811 bm->cur.node_bit); 812 if (bit < bits) { 813 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 814 bm->cur.node_bit = bit + 1; 815 return pfn; 816 } 817 } while (rtree_next_node(bm)); 818 819 return BM_END_OF_MAP; 820 } 821 822 /** 823 * This structure represents a range of page frames the contents of which 824 * should not be saved during the suspend. 825 */ 826 827 struct nosave_region { 828 struct list_head list; 829 unsigned long start_pfn; 830 unsigned long end_pfn; 831 }; 832 833 static LIST_HEAD(nosave_regions); 834 835 /** 836 * register_nosave_region - register a range of page frames the contents 837 * of which should not be saved during the suspend (to be used in the early 838 * initialization code) 839 */ 840 841 void __init 842 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, 843 int use_kmalloc) 844 { 845 struct nosave_region *region; 846 847 if (start_pfn >= end_pfn) 848 return; 849 850 if (!list_empty(&nosave_regions)) { 851 /* Try to extend the previous region (they should be sorted) */ 852 region = list_entry(nosave_regions.prev, 853 struct nosave_region, list); 854 if (region->end_pfn == start_pfn) { 855 region->end_pfn = end_pfn; 856 goto Report; 857 } 858 } 859 if (use_kmalloc) { 860 /* during init, this shouldn't fail */ 861 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 862 BUG_ON(!region); 863 } else 864 /* This allocation cannot fail */ 865 region = memblock_virt_alloc(sizeof(struct nosave_region), 0); 866 region->start_pfn = start_pfn; 867 region->end_pfn = end_pfn; 868 list_add_tail(®ion->list, &nosave_regions); 869 Report: 870 printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n", 871 (unsigned long long) start_pfn << PAGE_SHIFT, 872 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 873 } 874 875 /* 876 * Set bits in this map correspond to the page frames the contents of which 877 * should not be saved during the suspend. 878 */ 879 static struct memory_bitmap *forbidden_pages_map; 880 881 /* Set bits in this map correspond to free page frames. */ 882 static struct memory_bitmap *free_pages_map; 883 884 /* 885 * Each page frame allocated for creating the image is marked by setting the 886 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 887 */ 888 889 void swsusp_set_page_free(struct page *page) 890 { 891 if (free_pages_map) 892 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 893 } 894 895 static int swsusp_page_is_free(struct page *page) 896 { 897 return free_pages_map ? 898 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 899 } 900 901 void swsusp_unset_page_free(struct page *page) 902 { 903 if (free_pages_map) 904 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 905 } 906 907 static void swsusp_set_page_forbidden(struct page *page) 908 { 909 if (forbidden_pages_map) 910 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 911 } 912 913 int swsusp_page_is_forbidden(struct page *page) 914 { 915 return forbidden_pages_map ? 916 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 917 } 918 919 static void swsusp_unset_page_forbidden(struct page *page) 920 { 921 if (forbidden_pages_map) 922 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 923 } 924 925 /** 926 * mark_nosave_pages - set bits corresponding to the page frames the 927 * contents of which should not be saved in a given bitmap. 928 */ 929 930 static void mark_nosave_pages(struct memory_bitmap *bm) 931 { 932 struct nosave_region *region; 933 934 if (list_empty(&nosave_regions)) 935 return; 936 937 list_for_each_entry(region, &nosave_regions, list) { 938 unsigned long pfn; 939 940 pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", 941 (unsigned long long) region->start_pfn << PAGE_SHIFT, 942 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 943 - 1); 944 945 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 946 if (pfn_valid(pfn)) { 947 /* 948 * It is safe to ignore the result of 949 * mem_bm_set_bit_check() here, since we won't 950 * touch the PFNs for which the error is 951 * returned anyway. 952 */ 953 mem_bm_set_bit_check(bm, pfn); 954 } 955 } 956 } 957 958 static bool is_nosave_page(unsigned long pfn) 959 { 960 struct nosave_region *region; 961 962 list_for_each_entry(region, &nosave_regions, list) { 963 if (pfn >= region->start_pfn && pfn < region->end_pfn) { 964 pr_err("PM: %#010llx in e820 nosave region: " 965 "[mem %#010llx-%#010llx]\n", 966 (unsigned long long) pfn << PAGE_SHIFT, 967 (unsigned long long) region->start_pfn << PAGE_SHIFT, 968 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 969 - 1); 970 return true; 971 } 972 } 973 974 return false; 975 } 976 977 /** 978 * create_basic_memory_bitmaps - create bitmaps needed for marking page 979 * frames that should not be saved and free page frames. The pointers 980 * forbidden_pages_map and free_pages_map are only modified if everything 981 * goes well, because we don't want the bits to be used before both bitmaps 982 * are set up. 983 */ 984 985 int create_basic_memory_bitmaps(void) 986 { 987 struct memory_bitmap *bm1, *bm2; 988 int error = 0; 989 990 if (forbidden_pages_map && free_pages_map) 991 return 0; 992 else 993 BUG_ON(forbidden_pages_map || free_pages_map); 994 995 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 996 if (!bm1) 997 return -ENOMEM; 998 999 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1000 if (error) 1001 goto Free_first_object; 1002 1003 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1004 if (!bm2) 1005 goto Free_first_bitmap; 1006 1007 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1008 if (error) 1009 goto Free_second_object; 1010 1011 forbidden_pages_map = bm1; 1012 free_pages_map = bm2; 1013 mark_nosave_pages(forbidden_pages_map); 1014 1015 pr_debug("PM: Basic memory bitmaps created\n"); 1016 1017 return 0; 1018 1019 Free_second_object: 1020 kfree(bm2); 1021 Free_first_bitmap: 1022 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1023 Free_first_object: 1024 kfree(bm1); 1025 return -ENOMEM; 1026 } 1027 1028 /** 1029 * free_basic_memory_bitmaps - free memory bitmaps allocated by 1030 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary 1031 * so that the bitmaps themselves are not referred to while they are being 1032 * freed. 1033 */ 1034 1035 void free_basic_memory_bitmaps(void) 1036 { 1037 struct memory_bitmap *bm1, *bm2; 1038 1039 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1040 return; 1041 1042 bm1 = forbidden_pages_map; 1043 bm2 = free_pages_map; 1044 forbidden_pages_map = NULL; 1045 free_pages_map = NULL; 1046 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1047 kfree(bm1); 1048 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1049 kfree(bm2); 1050 1051 pr_debug("PM: Basic memory bitmaps freed\n"); 1052 } 1053 1054 /** 1055 * snapshot_additional_pages - estimate the number of additional pages 1056 * be needed for setting up the suspend image data structures for given 1057 * zone (usually the returned value is greater than the exact number) 1058 */ 1059 1060 unsigned int snapshot_additional_pages(struct zone *zone) 1061 { 1062 unsigned int rtree, nodes; 1063 1064 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1065 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1066 LINKED_PAGE_DATA_SIZE); 1067 while (nodes > 1) { 1068 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1069 rtree += nodes; 1070 } 1071 1072 return 2 * rtree; 1073 } 1074 1075 #ifdef CONFIG_HIGHMEM 1076 /** 1077 * count_free_highmem_pages - compute the total number of free highmem 1078 * pages, system-wide. 1079 */ 1080 1081 static unsigned int count_free_highmem_pages(void) 1082 { 1083 struct zone *zone; 1084 unsigned int cnt = 0; 1085 1086 for_each_populated_zone(zone) 1087 if (is_highmem(zone)) 1088 cnt += zone_page_state(zone, NR_FREE_PAGES); 1089 1090 return cnt; 1091 } 1092 1093 /** 1094 * saveable_highmem_page - Determine whether a highmem page should be 1095 * included in the suspend image. 1096 * 1097 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1098 * and it isn't a part of a free chunk of pages. 1099 */ 1100 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1101 { 1102 struct page *page; 1103 1104 if (!pfn_valid(pfn)) 1105 return NULL; 1106 1107 page = pfn_to_page(pfn); 1108 if (page_zone(page) != zone) 1109 return NULL; 1110 1111 BUG_ON(!PageHighMem(page)); 1112 1113 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || 1114 PageReserved(page)) 1115 return NULL; 1116 1117 if (page_is_guard(page)) 1118 return NULL; 1119 1120 return page; 1121 } 1122 1123 /** 1124 * count_highmem_pages - compute the total number of saveable highmem 1125 * pages. 1126 */ 1127 1128 static unsigned int count_highmem_pages(void) 1129 { 1130 struct zone *zone; 1131 unsigned int n = 0; 1132 1133 for_each_populated_zone(zone) { 1134 unsigned long pfn, max_zone_pfn; 1135 1136 if (!is_highmem(zone)) 1137 continue; 1138 1139 mark_free_pages(zone); 1140 max_zone_pfn = zone_end_pfn(zone); 1141 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1142 if (saveable_highmem_page(zone, pfn)) 1143 n++; 1144 } 1145 return n; 1146 } 1147 #else 1148 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1149 { 1150 return NULL; 1151 } 1152 #endif /* CONFIG_HIGHMEM */ 1153 1154 /** 1155 * saveable_page - Determine whether a non-highmem page should be included 1156 * in the suspend image. 1157 * 1158 * We should save the page if it isn't Nosave, and is not in the range 1159 * of pages statically defined as 'unsaveable', and it isn't a part of 1160 * a free chunk of pages. 1161 */ 1162 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1163 { 1164 struct page *page; 1165 1166 if (!pfn_valid(pfn)) 1167 return NULL; 1168 1169 page = pfn_to_page(pfn); 1170 if (page_zone(page) != zone) 1171 return NULL; 1172 1173 BUG_ON(PageHighMem(page)); 1174 1175 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1176 return NULL; 1177 1178 if (PageReserved(page) 1179 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1180 return NULL; 1181 1182 if (page_is_guard(page)) 1183 return NULL; 1184 1185 return page; 1186 } 1187 1188 /** 1189 * count_data_pages - compute the total number of saveable non-highmem 1190 * pages. 1191 */ 1192 1193 static unsigned int count_data_pages(void) 1194 { 1195 struct zone *zone; 1196 unsigned long pfn, max_zone_pfn; 1197 unsigned int n = 0; 1198 1199 for_each_populated_zone(zone) { 1200 if (is_highmem(zone)) 1201 continue; 1202 1203 mark_free_pages(zone); 1204 max_zone_pfn = zone_end_pfn(zone); 1205 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1206 if (saveable_page(zone, pfn)) 1207 n++; 1208 } 1209 return n; 1210 } 1211 1212 /* This is needed, because copy_page and memcpy are not usable for copying 1213 * task structs. 1214 */ 1215 static inline void do_copy_page(long *dst, long *src) 1216 { 1217 int n; 1218 1219 for (n = PAGE_SIZE / sizeof(long); n; n--) 1220 *dst++ = *src++; 1221 } 1222 1223 1224 /** 1225 * safe_copy_page - check if the page we are going to copy is marked as 1226 * present in the kernel page tables (this always is the case if 1227 * CONFIG_DEBUG_PAGEALLOC is not set and in that case 1228 * kernel_page_present() always returns 'true'). 1229 */ 1230 static void safe_copy_page(void *dst, struct page *s_page) 1231 { 1232 if (kernel_page_present(s_page)) { 1233 do_copy_page(dst, page_address(s_page)); 1234 } else { 1235 kernel_map_pages(s_page, 1, 1); 1236 do_copy_page(dst, page_address(s_page)); 1237 kernel_map_pages(s_page, 1, 0); 1238 } 1239 } 1240 1241 1242 #ifdef CONFIG_HIGHMEM 1243 static inline struct page * 1244 page_is_saveable(struct zone *zone, unsigned long pfn) 1245 { 1246 return is_highmem(zone) ? 1247 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1248 } 1249 1250 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1251 { 1252 struct page *s_page, *d_page; 1253 void *src, *dst; 1254 1255 s_page = pfn_to_page(src_pfn); 1256 d_page = pfn_to_page(dst_pfn); 1257 if (PageHighMem(s_page)) { 1258 src = kmap_atomic(s_page); 1259 dst = kmap_atomic(d_page); 1260 do_copy_page(dst, src); 1261 kunmap_atomic(dst); 1262 kunmap_atomic(src); 1263 } else { 1264 if (PageHighMem(d_page)) { 1265 /* Page pointed to by src may contain some kernel 1266 * data modified by kmap_atomic() 1267 */ 1268 safe_copy_page(buffer, s_page); 1269 dst = kmap_atomic(d_page); 1270 copy_page(dst, buffer); 1271 kunmap_atomic(dst); 1272 } else { 1273 safe_copy_page(page_address(d_page), s_page); 1274 } 1275 } 1276 } 1277 #else 1278 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1279 1280 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1281 { 1282 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1283 pfn_to_page(src_pfn)); 1284 } 1285 #endif /* CONFIG_HIGHMEM */ 1286 1287 static void 1288 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) 1289 { 1290 struct zone *zone; 1291 unsigned long pfn; 1292 1293 for_each_populated_zone(zone) { 1294 unsigned long max_zone_pfn; 1295 1296 mark_free_pages(zone); 1297 max_zone_pfn = zone_end_pfn(zone); 1298 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1299 if (page_is_saveable(zone, pfn)) 1300 memory_bm_set_bit(orig_bm, pfn); 1301 } 1302 memory_bm_position_reset(orig_bm); 1303 memory_bm_position_reset(copy_bm); 1304 for(;;) { 1305 pfn = memory_bm_next_pfn(orig_bm); 1306 if (unlikely(pfn == BM_END_OF_MAP)) 1307 break; 1308 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1309 } 1310 } 1311 1312 /* Total number of image pages */ 1313 static unsigned int nr_copy_pages; 1314 /* Number of pages needed for saving the original pfns of the image pages */ 1315 static unsigned int nr_meta_pages; 1316 /* 1317 * Numbers of normal and highmem page frames allocated for hibernation image 1318 * before suspending devices. 1319 */ 1320 unsigned int alloc_normal, alloc_highmem; 1321 /* 1322 * Memory bitmap used for marking saveable pages (during hibernation) or 1323 * hibernation image pages (during restore) 1324 */ 1325 static struct memory_bitmap orig_bm; 1326 /* 1327 * Memory bitmap used during hibernation for marking allocated page frames that 1328 * will contain copies of saveable pages. During restore it is initially used 1329 * for marking hibernation image pages, but then the set bits from it are 1330 * duplicated in @orig_bm and it is released. On highmem systems it is next 1331 * used for marking "safe" highmem pages, but it has to be reinitialized for 1332 * this purpose. 1333 */ 1334 static struct memory_bitmap copy_bm; 1335 1336 /** 1337 * swsusp_free - free pages allocated for the suspend. 1338 * 1339 * Suspend pages are alocated before the atomic copy is made, so we 1340 * need to release them after the resume. 1341 */ 1342 1343 void swsusp_free(void) 1344 { 1345 unsigned long fb_pfn, fr_pfn; 1346 1347 if (!forbidden_pages_map || !free_pages_map) 1348 goto out; 1349 1350 memory_bm_position_reset(forbidden_pages_map); 1351 memory_bm_position_reset(free_pages_map); 1352 1353 loop: 1354 fr_pfn = memory_bm_next_pfn(free_pages_map); 1355 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1356 1357 /* 1358 * Find the next bit set in both bitmaps. This is guaranteed to 1359 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1360 */ 1361 do { 1362 if (fb_pfn < fr_pfn) 1363 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1364 if (fr_pfn < fb_pfn) 1365 fr_pfn = memory_bm_next_pfn(free_pages_map); 1366 } while (fb_pfn != fr_pfn); 1367 1368 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1369 struct page *page = pfn_to_page(fr_pfn); 1370 1371 memory_bm_clear_current(forbidden_pages_map); 1372 memory_bm_clear_current(free_pages_map); 1373 __free_page(page); 1374 goto loop; 1375 } 1376 1377 out: 1378 nr_copy_pages = 0; 1379 nr_meta_pages = 0; 1380 restore_pblist = NULL; 1381 buffer = NULL; 1382 alloc_normal = 0; 1383 alloc_highmem = 0; 1384 } 1385 1386 /* Helper functions used for the shrinking of memory. */ 1387 1388 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1389 1390 /** 1391 * preallocate_image_pages - Allocate a number of pages for hibernation image 1392 * @nr_pages: Number of page frames to allocate. 1393 * @mask: GFP flags to use for the allocation. 1394 * 1395 * Return value: Number of page frames actually allocated 1396 */ 1397 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1398 { 1399 unsigned long nr_alloc = 0; 1400 1401 while (nr_pages > 0) { 1402 struct page *page; 1403 1404 page = alloc_image_page(mask); 1405 if (!page) 1406 break; 1407 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1408 if (PageHighMem(page)) 1409 alloc_highmem++; 1410 else 1411 alloc_normal++; 1412 nr_pages--; 1413 nr_alloc++; 1414 } 1415 1416 return nr_alloc; 1417 } 1418 1419 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1420 unsigned long avail_normal) 1421 { 1422 unsigned long alloc; 1423 1424 if (avail_normal <= alloc_normal) 1425 return 0; 1426 1427 alloc = avail_normal - alloc_normal; 1428 if (nr_pages < alloc) 1429 alloc = nr_pages; 1430 1431 return preallocate_image_pages(alloc, GFP_IMAGE); 1432 } 1433 1434 #ifdef CONFIG_HIGHMEM 1435 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1436 { 1437 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1438 } 1439 1440 /** 1441 * __fraction - Compute (an approximation of) x * (multiplier / base) 1442 */ 1443 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1444 { 1445 x *= multiplier; 1446 do_div(x, base); 1447 return (unsigned long)x; 1448 } 1449 1450 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1451 unsigned long highmem, 1452 unsigned long total) 1453 { 1454 unsigned long alloc = __fraction(nr_pages, highmem, total); 1455 1456 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1457 } 1458 #else /* CONFIG_HIGHMEM */ 1459 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1460 { 1461 return 0; 1462 } 1463 1464 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1465 unsigned long highmem, 1466 unsigned long total) 1467 { 1468 return 0; 1469 } 1470 #endif /* CONFIG_HIGHMEM */ 1471 1472 /** 1473 * free_unnecessary_pages - Release preallocated pages not needed for the image 1474 */ 1475 static unsigned long free_unnecessary_pages(void) 1476 { 1477 unsigned long save, to_free_normal, to_free_highmem, free; 1478 1479 save = count_data_pages(); 1480 if (alloc_normal >= save) { 1481 to_free_normal = alloc_normal - save; 1482 save = 0; 1483 } else { 1484 to_free_normal = 0; 1485 save -= alloc_normal; 1486 } 1487 save += count_highmem_pages(); 1488 if (alloc_highmem >= save) { 1489 to_free_highmem = alloc_highmem - save; 1490 } else { 1491 to_free_highmem = 0; 1492 save -= alloc_highmem; 1493 if (to_free_normal > save) 1494 to_free_normal -= save; 1495 else 1496 to_free_normal = 0; 1497 } 1498 free = to_free_normal + to_free_highmem; 1499 1500 memory_bm_position_reset(©_bm); 1501 1502 while (to_free_normal > 0 || to_free_highmem > 0) { 1503 unsigned long pfn = memory_bm_next_pfn(©_bm); 1504 struct page *page = pfn_to_page(pfn); 1505 1506 if (PageHighMem(page)) { 1507 if (!to_free_highmem) 1508 continue; 1509 to_free_highmem--; 1510 alloc_highmem--; 1511 } else { 1512 if (!to_free_normal) 1513 continue; 1514 to_free_normal--; 1515 alloc_normal--; 1516 } 1517 memory_bm_clear_bit(©_bm, pfn); 1518 swsusp_unset_page_forbidden(page); 1519 swsusp_unset_page_free(page); 1520 __free_page(page); 1521 } 1522 1523 return free; 1524 } 1525 1526 /** 1527 * minimum_image_size - Estimate the minimum acceptable size of an image 1528 * @saveable: Number of saveable pages in the system. 1529 * 1530 * We want to avoid attempting to free too much memory too hard, so estimate the 1531 * minimum acceptable size of a hibernation image to use as the lower limit for 1532 * preallocating memory. 1533 * 1534 * We assume that the minimum image size should be proportional to 1535 * 1536 * [number of saveable pages] - [number of pages that can be freed in theory] 1537 * 1538 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1539 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages, 1540 * minus mapped file pages. 1541 */ 1542 static unsigned long minimum_image_size(unsigned long saveable) 1543 { 1544 unsigned long size; 1545 1546 size = global_page_state(NR_SLAB_RECLAIMABLE) 1547 + global_page_state(NR_ACTIVE_ANON) 1548 + global_page_state(NR_INACTIVE_ANON) 1549 + global_page_state(NR_ACTIVE_FILE) 1550 + global_page_state(NR_INACTIVE_FILE) 1551 - global_page_state(NR_FILE_MAPPED); 1552 1553 return saveable <= size ? 0 : saveable - size; 1554 } 1555 1556 /** 1557 * hibernate_preallocate_memory - Preallocate memory for hibernation image 1558 * 1559 * To create a hibernation image it is necessary to make a copy of every page 1560 * frame in use. We also need a number of page frames to be free during 1561 * hibernation for allocations made while saving the image and for device 1562 * drivers, in case they need to allocate memory from their hibernation 1563 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1564 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through 1565 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1566 * total number of available page frames and allocate at least 1567 * 1568 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1569 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1570 * 1571 * of them, which corresponds to the maximum size of a hibernation image. 1572 * 1573 * If image_size is set below the number following from the above formula, 1574 * the preallocation of memory is continued until the total number of saveable 1575 * pages in the system is below the requested image size or the minimum 1576 * acceptable image size returned by minimum_image_size(), whichever is greater. 1577 */ 1578 int hibernate_preallocate_memory(void) 1579 { 1580 struct zone *zone; 1581 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1582 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1583 ktime_t start, stop; 1584 int error; 1585 1586 printk(KERN_INFO "PM: Preallocating image memory... "); 1587 start = ktime_get(); 1588 1589 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1590 if (error) 1591 goto err_out; 1592 1593 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1594 if (error) 1595 goto err_out; 1596 1597 alloc_normal = 0; 1598 alloc_highmem = 0; 1599 1600 /* Count the number of saveable data pages. */ 1601 save_highmem = count_highmem_pages(); 1602 saveable = count_data_pages(); 1603 1604 /* 1605 * Compute the total number of page frames we can use (count) and the 1606 * number of pages needed for image metadata (size). 1607 */ 1608 count = saveable; 1609 saveable += save_highmem; 1610 highmem = save_highmem; 1611 size = 0; 1612 for_each_populated_zone(zone) { 1613 size += snapshot_additional_pages(zone); 1614 if (is_highmem(zone)) 1615 highmem += zone_page_state(zone, NR_FREE_PAGES); 1616 else 1617 count += zone_page_state(zone, NR_FREE_PAGES); 1618 } 1619 avail_normal = count; 1620 count += highmem; 1621 count -= totalreserve_pages; 1622 1623 /* Add number of pages required for page keys (s390 only). */ 1624 size += page_key_additional_pages(saveable); 1625 1626 /* Compute the maximum number of saveable pages to leave in memory. */ 1627 max_size = (count - (size + PAGES_FOR_IO)) / 2 1628 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1629 /* Compute the desired number of image pages specified by image_size. */ 1630 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1631 if (size > max_size) 1632 size = max_size; 1633 /* 1634 * If the desired number of image pages is at least as large as the 1635 * current number of saveable pages in memory, allocate page frames for 1636 * the image and we're done. 1637 */ 1638 if (size >= saveable) { 1639 pages = preallocate_image_highmem(save_highmem); 1640 pages += preallocate_image_memory(saveable - pages, avail_normal); 1641 goto out; 1642 } 1643 1644 /* Estimate the minimum size of the image. */ 1645 pages = minimum_image_size(saveable); 1646 /* 1647 * To avoid excessive pressure on the normal zone, leave room in it to 1648 * accommodate an image of the minimum size (unless it's already too 1649 * small, in which case don't preallocate pages from it at all). 1650 */ 1651 if (avail_normal > pages) 1652 avail_normal -= pages; 1653 else 1654 avail_normal = 0; 1655 if (size < pages) 1656 size = min_t(unsigned long, pages, max_size); 1657 1658 /* 1659 * Let the memory management subsystem know that we're going to need a 1660 * large number of page frames to allocate and make it free some memory. 1661 * NOTE: If this is not done, performance will be hurt badly in some 1662 * test cases. 1663 */ 1664 shrink_all_memory(saveable - size); 1665 1666 /* 1667 * The number of saveable pages in memory was too high, so apply some 1668 * pressure to decrease it. First, make room for the largest possible 1669 * image and fail if that doesn't work. Next, try to decrease the size 1670 * of the image as much as indicated by 'size' using allocations from 1671 * highmem and non-highmem zones separately. 1672 */ 1673 pages_highmem = preallocate_image_highmem(highmem / 2); 1674 alloc = count - max_size; 1675 if (alloc > pages_highmem) 1676 alloc -= pages_highmem; 1677 else 1678 alloc = 0; 1679 pages = preallocate_image_memory(alloc, avail_normal); 1680 if (pages < alloc) { 1681 /* We have exhausted non-highmem pages, try highmem. */ 1682 alloc -= pages; 1683 pages += pages_highmem; 1684 pages_highmem = preallocate_image_highmem(alloc); 1685 if (pages_highmem < alloc) 1686 goto err_out; 1687 pages += pages_highmem; 1688 /* 1689 * size is the desired number of saveable pages to leave in 1690 * memory, so try to preallocate (all memory - size) pages. 1691 */ 1692 alloc = (count - pages) - size; 1693 pages += preallocate_image_highmem(alloc); 1694 } else { 1695 /* 1696 * There are approximately max_size saveable pages at this point 1697 * and we want to reduce this number down to size. 1698 */ 1699 alloc = max_size - size; 1700 size = preallocate_highmem_fraction(alloc, highmem, count); 1701 pages_highmem += size; 1702 alloc -= size; 1703 size = preallocate_image_memory(alloc, avail_normal); 1704 pages_highmem += preallocate_image_highmem(alloc - size); 1705 pages += pages_highmem + size; 1706 } 1707 1708 /* 1709 * We only need as many page frames for the image as there are saveable 1710 * pages in memory, but we have allocated more. Release the excessive 1711 * ones now. 1712 */ 1713 pages -= free_unnecessary_pages(); 1714 1715 out: 1716 stop = ktime_get(); 1717 printk(KERN_CONT "done (allocated %lu pages)\n", pages); 1718 swsusp_show_speed(start, stop, pages, "Allocated"); 1719 1720 return 0; 1721 1722 err_out: 1723 printk(KERN_CONT "\n"); 1724 swsusp_free(); 1725 return -ENOMEM; 1726 } 1727 1728 #ifdef CONFIG_HIGHMEM 1729 /** 1730 * count_pages_for_highmem - compute the number of non-highmem pages 1731 * that will be necessary for creating copies of highmem pages. 1732 */ 1733 1734 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1735 { 1736 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1737 1738 if (free_highmem >= nr_highmem) 1739 nr_highmem = 0; 1740 else 1741 nr_highmem -= free_highmem; 1742 1743 return nr_highmem; 1744 } 1745 #else 1746 static unsigned int 1747 count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1748 #endif /* CONFIG_HIGHMEM */ 1749 1750 /** 1751 * enough_free_mem - Make sure we have enough free memory for the 1752 * snapshot image. 1753 */ 1754 1755 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1756 { 1757 struct zone *zone; 1758 unsigned int free = alloc_normal; 1759 1760 for_each_populated_zone(zone) 1761 if (!is_highmem(zone)) 1762 free += zone_page_state(zone, NR_FREE_PAGES); 1763 1764 nr_pages += count_pages_for_highmem(nr_highmem); 1765 pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", 1766 nr_pages, PAGES_FOR_IO, free); 1767 1768 return free > nr_pages + PAGES_FOR_IO; 1769 } 1770 1771 #ifdef CONFIG_HIGHMEM 1772 /** 1773 * get_highmem_buffer - if there are some highmem pages in the suspend 1774 * image, we may need the buffer to copy them and/or load their data. 1775 */ 1776 1777 static inline int get_highmem_buffer(int safe_needed) 1778 { 1779 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); 1780 return buffer ? 0 : -ENOMEM; 1781 } 1782 1783 /** 1784 * alloc_highmem_image_pages - allocate some highmem pages for the image. 1785 * Try to allocate as many pages as needed, but if the number of free 1786 * highmem pages is lesser than that, allocate them all. 1787 */ 1788 1789 static inline unsigned int 1790 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) 1791 { 1792 unsigned int to_alloc = count_free_highmem_pages(); 1793 1794 if (to_alloc > nr_highmem) 1795 to_alloc = nr_highmem; 1796 1797 nr_highmem -= to_alloc; 1798 while (to_alloc-- > 0) { 1799 struct page *page; 1800 1801 page = alloc_image_page(__GFP_HIGHMEM); 1802 memory_bm_set_bit(bm, page_to_pfn(page)); 1803 } 1804 return nr_highmem; 1805 } 1806 #else 1807 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1808 1809 static inline unsigned int 1810 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } 1811 #endif /* CONFIG_HIGHMEM */ 1812 1813 /** 1814 * swsusp_alloc - allocate memory for the suspend image 1815 * 1816 * We first try to allocate as many highmem pages as there are 1817 * saveable highmem pages in the system. If that fails, we allocate 1818 * non-highmem pages for the copies of the remaining highmem ones. 1819 * 1820 * In this approach it is likely that the copies of highmem pages will 1821 * also be located in the high memory, because of the way in which 1822 * copy_data_pages() works. 1823 */ 1824 1825 static int 1826 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 1827 unsigned int nr_pages, unsigned int nr_highmem) 1828 { 1829 if (nr_highmem > 0) { 1830 if (get_highmem_buffer(PG_ANY)) 1831 goto err_out; 1832 if (nr_highmem > alloc_highmem) { 1833 nr_highmem -= alloc_highmem; 1834 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1835 } 1836 } 1837 if (nr_pages > alloc_normal) { 1838 nr_pages -= alloc_normal; 1839 while (nr_pages-- > 0) { 1840 struct page *page; 1841 1842 page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); 1843 if (!page) 1844 goto err_out; 1845 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1846 } 1847 } 1848 1849 return 0; 1850 1851 err_out: 1852 swsusp_free(); 1853 return -ENOMEM; 1854 } 1855 1856 asmlinkage __visible int swsusp_save(void) 1857 { 1858 unsigned int nr_pages, nr_highmem; 1859 1860 printk(KERN_INFO "PM: Creating hibernation image:\n"); 1861 1862 drain_local_pages(NULL); 1863 nr_pages = count_data_pages(); 1864 nr_highmem = count_highmem_pages(); 1865 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); 1866 1867 if (!enough_free_mem(nr_pages, nr_highmem)) { 1868 printk(KERN_ERR "PM: Not enough free memory\n"); 1869 return -ENOMEM; 1870 } 1871 1872 if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { 1873 printk(KERN_ERR "PM: Memory allocation failed\n"); 1874 return -ENOMEM; 1875 } 1876 1877 /* During allocating of suspend pagedir, new cold pages may appear. 1878 * Kill them. 1879 */ 1880 drain_local_pages(NULL); 1881 copy_data_pages(©_bm, &orig_bm); 1882 1883 /* 1884 * End of critical section. From now on, we can write to memory, 1885 * but we should not touch disk. This specially means we must _not_ 1886 * touch swap space! Except we must write out our image of course. 1887 */ 1888 1889 nr_pages += nr_highmem; 1890 nr_copy_pages = nr_pages; 1891 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 1892 1893 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", 1894 nr_pages); 1895 1896 return 0; 1897 } 1898 1899 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 1900 static int init_header_complete(struct swsusp_info *info) 1901 { 1902 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 1903 info->version_code = LINUX_VERSION_CODE; 1904 return 0; 1905 } 1906 1907 static char *check_image_kernel(struct swsusp_info *info) 1908 { 1909 if (info->version_code != LINUX_VERSION_CODE) 1910 return "kernel version"; 1911 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 1912 return "system type"; 1913 if (strcmp(info->uts.release,init_utsname()->release)) 1914 return "kernel release"; 1915 if (strcmp(info->uts.version,init_utsname()->version)) 1916 return "version"; 1917 if (strcmp(info->uts.machine,init_utsname()->machine)) 1918 return "machine"; 1919 return NULL; 1920 } 1921 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 1922 1923 unsigned long snapshot_get_image_size(void) 1924 { 1925 return nr_copy_pages + nr_meta_pages + 1; 1926 } 1927 1928 static int init_header(struct swsusp_info *info) 1929 { 1930 memset(info, 0, sizeof(struct swsusp_info)); 1931 info->num_physpages = get_num_physpages(); 1932 info->image_pages = nr_copy_pages; 1933 info->pages = snapshot_get_image_size(); 1934 info->size = info->pages; 1935 info->size <<= PAGE_SHIFT; 1936 return init_header_complete(info); 1937 } 1938 1939 /** 1940 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm 1941 * are stored in the array @buf[] (1 page at a time) 1942 */ 1943 1944 static inline void 1945 pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 1946 { 1947 int j; 1948 1949 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 1950 buf[j] = memory_bm_next_pfn(bm); 1951 if (unlikely(buf[j] == BM_END_OF_MAP)) 1952 break; 1953 /* Save page key for data page (s390 only). */ 1954 page_key_read(buf + j); 1955 } 1956 } 1957 1958 /** 1959 * snapshot_read_next - used for reading the system memory snapshot. 1960 * 1961 * On the first call to it @handle should point to a zeroed 1962 * snapshot_handle structure. The structure gets updated and a pointer 1963 * to it should be passed to this function every next time. 1964 * 1965 * On success the function returns a positive number. Then, the caller 1966 * is allowed to read up to the returned number of bytes from the memory 1967 * location computed by the data_of() macro. 1968 * 1969 * The function returns 0 to indicate the end of data stream condition, 1970 * and a negative number is returned on error. In such cases the 1971 * structure pointed to by @handle is not updated and should not be used 1972 * any more. 1973 */ 1974 1975 int snapshot_read_next(struct snapshot_handle *handle) 1976 { 1977 if (handle->cur > nr_meta_pages + nr_copy_pages) 1978 return 0; 1979 1980 if (!buffer) { 1981 /* This makes the buffer be freed by swsusp_free() */ 1982 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 1983 if (!buffer) 1984 return -ENOMEM; 1985 } 1986 if (!handle->cur) { 1987 int error; 1988 1989 error = init_header((struct swsusp_info *)buffer); 1990 if (error) 1991 return error; 1992 handle->buffer = buffer; 1993 memory_bm_position_reset(&orig_bm); 1994 memory_bm_position_reset(©_bm); 1995 } else if (handle->cur <= nr_meta_pages) { 1996 clear_page(buffer); 1997 pack_pfns(buffer, &orig_bm); 1998 } else { 1999 struct page *page; 2000 2001 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 2002 if (PageHighMem(page)) { 2003 /* Highmem pages are copied to the buffer, 2004 * because we can't return with a kmapped 2005 * highmem page (we may not be called again). 2006 */ 2007 void *kaddr; 2008 2009 kaddr = kmap_atomic(page); 2010 copy_page(buffer, kaddr); 2011 kunmap_atomic(kaddr); 2012 handle->buffer = buffer; 2013 } else { 2014 handle->buffer = page_address(page); 2015 } 2016 } 2017 handle->cur++; 2018 return PAGE_SIZE; 2019 } 2020 2021 /** 2022 * mark_unsafe_pages - mark the pages that cannot be used for storing 2023 * the image during resume, because they conflict with the pages that 2024 * had been used before suspend 2025 */ 2026 2027 static int mark_unsafe_pages(struct memory_bitmap *bm) 2028 { 2029 struct zone *zone; 2030 unsigned long pfn, max_zone_pfn; 2031 2032 /* Clear page flags */ 2033 for_each_populated_zone(zone) { 2034 max_zone_pfn = zone_end_pfn(zone); 2035 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 2036 if (pfn_valid(pfn)) 2037 swsusp_unset_page_free(pfn_to_page(pfn)); 2038 } 2039 2040 /* Mark pages that correspond to the "original" pfns as "unsafe" */ 2041 memory_bm_position_reset(bm); 2042 do { 2043 pfn = memory_bm_next_pfn(bm); 2044 if (likely(pfn != BM_END_OF_MAP)) { 2045 if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn)) 2046 swsusp_set_page_free(pfn_to_page(pfn)); 2047 else 2048 return -EFAULT; 2049 } 2050 } while (pfn != BM_END_OF_MAP); 2051 2052 allocated_unsafe_pages = 0; 2053 2054 return 0; 2055 } 2056 2057 static void 2058 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) 2059 { 2060 unsigned long pfn; 2061 2062 memory_bm_position_reset(src); 2063 pfn = memory_bm_next_pfn(src); 2064 while (pfn != BM_END_OF_MAP) { 2065 memory_bm_set_bit(dst, pfn); 2066 pfn = memory_bm_next_pfn(src); 2067 } 2068 } 2069 2070 static int check_header(struct swsusp_info *info) 2071 { 2072 char *reason; 2073 2074 reason = check_image_kernel(info); 2075 if (!reason && info->num_physpages != get_num_physpages()) 2076 reason = "memory size"; 2077 if (reason) { 2078 printk(KERN_ERR "PM: Image mismatch: %s\n", reason); 2079 return -EPERM; 2080 } 2081 return 0; 2082 } 2083 2084 /** 2085 * load header - check the image header and copy data from it 2086 */ 2087 2088 static int 2089 load_header(struct swsusp_info *info) 2090 { 2091 int error; 2092 2093 restore_pblist = NULL; 2094 error = check_header(info); 2095 if (!error) { 2096 nr_copy_pages = info->image_pages; 2097 nr_meta_pages = info->pages - info->image_pages - 1; 2098 } 2099 return error; 2100 } 2101 2102 /** 2103 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 2104 * the corresponding bit in the memory bitmap @bm 2105 */ 2106 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2107 { 2108 int j; 2109 2110 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2111 if (unlikely(buf[j] == BM_END_OF_MAP)) 2112 break; 2113 2114 /* Extract and buffer page key for data page (s390 only). */ 2115 page_key_memorize(buf + j); 2116 2117 if (memory_bm_pfn_present(bm, buf[j])) 2118 memory_bm_set_bit(bm, buf[j]); 2119 else 2120 return -EFAULT; 2121 } 2122 2123 return 0; 2124 } 2125 2126 /* List of "safe" pages that may be used to store data loaded from the suspend 2127 * image 2128 */ 2129 static struct linked_page *safe_pages_list; 2130 2131 #ifdef CONFIG_HIGHMEM 2132 /* struct highmem_pbe is used for creating the list of highmem pages that 2133 * should be restored atomically during the resume from disk, because the page 2134 * frames they have occupied before the suspend are in use. 2135 */ 2136 struct highmem_pbe { 2137 struct page *copy_page; /* data is here now */ 2138 struct page *orig_page; /* data was here before the suspend */ 2139 struct highmem_pbe *next; 2140 }; 2141 2142 /* List of highmem PBEs needed for restoring the highmem pages that were 2143 * allocated before the suspend and included in the suspend image, but have 2144 * also been allocated by the "resume" kernel, so their contents cannot be 2145 * written directly to their "original" page frames. 2146 */ 2147 static struct highmem_pbe *highmem_pblist; 2148 2149 /** 2150 * count_highmem_image_pages - compute the number of highmem pages in the 2151 * suspend image. The bits in the memory bitmap @bm that correspond to the 2152 * image pages are assumed to be set. 2153 */ 2154 2155 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2156 { 2157 unsigned long pfn; 2158 unsigned int cnt = 0; 2159 2160 memory_bm_position_reset(bm); 2161 pfn = memory_bm_next_pfn(bm); 2162 while (pfn != BM_END_OF_MAP) { 2163 if (PageHighMem(pfn_to_page(pfn))) 2164 cnt++; 2165 2166 pfn = memory_bm_next_pfn(bm); 2167 } 2168 return cnt; 2169 } 2170 2171 /** 2172 * prepare_highmem_image - try to allocate as many highmem pages as 2173 * there are highmem image pages (@nr_highmem_p points to the variable 2174 * containing the number of highmem image pages). The pages that are 2175 * "safe" (ie. will not be overwritten when the suspend image is 2176 * restored) have the corresponding bits set in @bm (it must be 2177 * unitialized). 2178 * 2179 * NOTE: This function should not be called if there are no highmem 2180 * image pages. 2181 */ 2182 2183 static unsigned int safe_highmem_pages; 2184 2185 static struct memory_bitmap *safe_highmem_bm; 2186 2187 static int 2188 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2189 { 2190 unsigned int to_alloc; 2191 2192 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2193 return -ENOMEM; 2194 2195 if (get_highmem_buffer(PG_SAFE)) 2196 return -ENOMEM; 2197 2198 to_alloc = count_free_highmem_pages(); 2199 if (to_alloc > *nr_highmem_p) 2200 to_alloc = *nr_highmem_p; 2201 else 2202 *nr_highmem_p = to_alloc; 2203 2204 safe_highmem_pages = 0; 2205 while (to_alloc-- > 0) { 2206 struct page *page; 2207 2208 page = alloc_page(__GFP_HIGHMEM); 2209 if (!swsusp_page_is_free(page)) { 2210 /* The page is "safe", set its bit the bitmap */ 2211 memory_bm_set_bit(bm, page_to_pfn(page)); 2212 safe_highmem_pages++; 2213 } 2214 /* Mark the page as allocated */ 2215 swsusp_set_page_forbidden(page); 2216 swsusp_set_page_free(page); 2217 } 2218 memory_bm_position_reset(bm); 2219 safe_highmem_bm = bm; 2220 return 0; 2221 } 2222 2223 /** 2224 * get_highmem_page_buffer - for given highmem image page find the buffer 2225 * that suspend_write_next() should set for its caller to write to. 2226 * 2227 * If the page is to be saved to its "original" page frame or a copy of 2228 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2229 * the copy of the page is to be made in normal memory, so the address of 2230 * the copy is returned. 2231 * 2232 * If @buffer is returned, the caller of suspend_write_next() will write 2233 * the page's contents to @buffer, so they will have to be copied to the 2234 * right location on the next call to suspend_write_next() and it is done 2235 * with the help of copy_last_highmem_page(). For this purpose, if 2236 * @buffer is returned, @last_highmem page is set to the page to which 2237 * the data will have to be copied from @buffer. 2238 */ 2239 2240 static struct page *last_highmem_page; 2241 2242 static void * 2243 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2244 { 2245 struct highmem_pbe *pbe; 2246 void *kaddr; 2247 2248 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2249 /* We have allocated the "original" page frame and we can 2250 * use it directly to store the loaded page. 2251 */ 2252 last_highmem_page = page; 2253 return buffer; 2254 } 2255 /* The "original" page frame has not been allocated and we have to 2256 * use a "safe" page frame to store the loaded page. 2257 */ 2258 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2259 if (!pbe) { 2260 swsusp_free(); 2261 return ERR_PTR(-ENOMEM); 2262 } 2263 pbe->orig_page = page; 2264 if (safe_highmem_pages > 0) { 2265 struct page *tmp; 2266 2267 /* Copy of the page will be stored in high memory */ 2268 kaddr = buffer; 2269 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2270 safe_highmem_pages--; 2271 last_highmem_page = tmp; 2272 pbe->copy_page = tmp; 2273 } else { 2274 /* Copy of the page will be stored in normal memory */ 2275 kaddr = safe_pages_list; 2276 safe_pages_list = safe_pages_list->next; 2277 pbe->copy_page = virt_to_page(kaddr); 2278 } 2279 pbe->next = highmem_pblist; 2280 highmem_pblist = pbe; 2281 return kaddr; 2282 } 2283 2284 /** 2285 * copy_last_highmem_page - copy the contents of a highmem image from 2286 * @buffer, where the caller of snapshot_write_next() has place them, 2287 * to the right location represented by @last_highmem_page . 2288 */ 2289 2290 static void copy_last_highmem_page(void) 2291 { 2292 if (last_highmem_page) { 2293 void *dst; 2294 2295 dst = kmap_atomic(last_highmem_page); 2296 copy_page(dst, buffer); 2297 kunmap_atomic(dst); 2298 last_highmem_page = NULL; 2299 } 2300 } 2301 2302 static inline int last_highmem_page_copied(void) 2303 { 2304 return !last_highmem_page; 2305 } 2306 2307 static inline void free_highmem_data(void) 2308 { 2309 if (safe_highmem_bm) 2310 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2311 2312 if (buffer) 2313 free_image_page(buffer, PG_UNSAFE_CLEAR); 2314 } 2315 #else 2316 static unsigned int 2317 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2318 2319 static inline int 2320 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2321 { 2322 return 0; 2323 } 2324 2325 static inline void * 2326 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2327 { 2328 return ERR_PTR(-EINVAL); 2329 } 2330 2331 static inline void copy_last_highmem_page(void) {} 2332 static inline int last_highmem_page_copied(void) { return 1; } 2333 static inline void free_highmem_data(void) {} 2334 #endif /* CONFIG_HIGHMEM */ 2335 2336 /** 2337 * prepare_image - use the memory bitmap @bm to mark the pages that will 2338 * be overwritten in the process of restoring the system memory state 2339 * from the suspend image ("unsafe" pages) and allocate memory for the 2340 * image. 2341 * 2342 * The idea is to allocate a new memory bitmap first and then allocate 2343 * as many pages as needed for the image data, but not to assign these 2344 * pages to specific tasks initially. Instead, we just mark them as 2345 * allocated and create a lists of "safe" pages that will be used 2346 * later. On systems with high memory a list of "safe" highmem pages is 2347 * also created. 2348 */ 2349 2350 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2351 2352 static int 2353 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2354 { 2355 unsigned int nr_pages, nr_highmem; 2356 struct linked_page *sp_list, *lp; 2357 int error; 2358 2359 /* If there is no highmem, the buffer will not be necessary */ 2360 free_image_page(buffer, PG_UNSAFE_CLEAR); 2361 buffer = NULL; 2362 2363 nr_highmem = count_highmem_image_pages(bm); 2364 error = mark_unsafe_pages(bm); 2365 if (error) 2366 goto Free; 2367 2368 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2369 if (error) 2370 goto Free; 2371 2372 duplicate_memory_bitmap(new_bm, bm); 2373 memory_bm_free(bm, PG_UNSAFE_KEEP); 2374 if (nr_highmem > 0) { 2375 error = prepare_highmem_image(bm, &nr_highmem); 2376 if (error) 2377 goto Free; 2378 } 2379 /* Reserve some safe pages for potential later use. 2380 * 2381 * NOTE: This way we make sure there will be enough safe pages for the 2382 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2383 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2384 */ 2385 sp_list = NULL; 2386 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ 2387 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2388 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2389 while (nr_pages > 0) { 2390 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2391 if (!lp) { 2392 error = -ENOMEM; 2393 goto Free; 2394 } 2395 lp->next = sp_list; 2396 sp_list = lp; 2397 nr_pages--; 2398 } 2399 /* Preallocate memory for the image */ 2400 safe_pages_list = NULL; 2401 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2402 while (nr_pages > 0) { 2403 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2404 if (!lp) { 2405 error = -ENOMEM; 2406 goto Free; 2407 } 2408 if (!swsusp_page_is_free(virt_to_page(lp))) { 2409 /* The page is "safe", add it to the list */ 2410 lp->next = safe_pages_list; 2411 safe_pages_list = lp; 2412 } 2413 /* Mark the page as allocated */ 2414 swsusp_set_page_forbidden(virt_to_page(lp)); 2415 swsusp_set_page_free(virt_to_page(lp)); 2416 nr_pages--; 2417 } 2418 /* Free the reserved safe pages so that chain_alloc() can use them */ 2419 while (sp_list) { 2420 lp = sp_list->next; 2421 free_image_page(sp_list, PG_UNSAFE_CLEAR); 2422 sp_list = lp; 2423 } 2424 return 0; 2425 2426 Free: 2427 swsusp_free(); 2428 return error; 2429 } 2430 2431 /** 2432 * get_buffer - compute the address that snapshot_write_next() should 2433 * set for its caller to write to. 2434 */ 2435 2436 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2437 { 2438 struct pbe *pbe; 2439 struct page *page; 2440 unsigned long pfn = memory_bm_next_pfn(bm); 2441 2442 if (pfn == BM_END_OF_MAP) 2443 return ERR_PTR(-EFAULT); 2444 2445 page = pfn_to_page(pfn); 2446 if (PageHighMem(page)) 2447 return get_highmem_page_buffer(page, ca); 2448 2449 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2450 /* We have allocated the "original" page frame and we can 2451 * use it directly to store the loaded page. 2452 */ 2453 return page_address(page); 2454 2455 /* The "original" page frame has not been allocated and we have to 2456 * use a "safe" page frame to store the loaded page. 2457 */ 2458 pbe = chain_alloc(ca, sizeof(struct pbe)); 2459 if (!pbe) { 2460 swsusp_free(); 2461 return ERR_PTR(-ENOMEM); 2462 } 2463 pbe->orig_address = page_address(page); 2464 pbe->address = safe_pages_list; 2465 safe_pages_list = safe_pages_list->next; 2466 pbe->next = restore_pblist; 2467 restore_pblist = pbe; 2468 return pbe->address; 2469 } 2470 2471 /** 2472 * snapshot_write_next - used for writing the system memory snapshot. 2473 * 2474 * On the first call to it @handle should point to a zeroed 2475 * snapshot_handle structure. The structure gets updated and a pointer 2476 * to it should be passed to this function every next time. 2477 * 2478 * On success the function returns a positive number. Then, the caller 2479 * is allowed to write up to the returned number of bytes to the memory 2480 * location computed by the data_of() macro. 2481 * 2482 * The function returns 0 to indicate the "end of file" condition, 2483 * and a negative number is returned on error. In such cases the 2484 * structure pointed to by @handle is not updated and should not be used 2485 * any more. 2486 */ 2487 2488 int snapshot_write_next(struct snapshot_handle *handle) 2489 { 2490 static struct chain_allocator ca; 2491 int error = 0; 2492 2493 /* Check if we have already loaded the entire image */ 2494 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2495 return 0; 2496 2497 handle->sync_read = 1; 2498 2499 if (!handle->cur) { 2500 if (!buffer) 2501 /* This makes the buffer be freed by swsusp_free() */ 2502 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2503 2504 if (!buffer) 2505 return -ENOMEM; 2506 2507 handle->buffer = buffer; 2508 } else if (handle->cur == 1) { 2509 error = load_header(buffer); 2510 if (error) 2511 return error; 2512 2513 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2514 if (error) 2515 return error; 2516 2517 /* Allocate buffer for page keys. */ 2518 error = page_key_alloc(nr_copy_pages); 2519 if (error) 2520 return error; 2521 2522 } else if (handle->cur <= nr_meta_pages + 1) { 2523 error = unpack_orig_pfns(buffer, ©_bm); 2524 if (error) 2525 return error; 2526 2527 if (handle->cur == nr_meta_pages + 1) { 2528 error = prepare_image(&orig_bm, ©_bm); 2529 if (error) 2530 return error; 2531 2532 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2533 memory_bm_position_reset(&orig_bm); 2534 restore_pblist = NULL; 2535 handle->buffer = get_buffer(&orig_bm, &ca); 2536 handle->sync_read = 0; 2537 if (IS_ERR(handle->buffer)) 2538 return PTR_ERR(handle->buffer); 2539 } 2540 } else { 2541 copy_last_highmem_page(); 2542 /* Restore page key for data page (s390 only). */ 2543 page_key_write(handle->buffer); 2544 handle->buffer = get_buffer(&orig_bm, &ca); 2545 if (IS_ERR(handle->buffer)) 2546 return PTR_ERR(handle->buffer); 2547 if (handle->buffer != buffer) 2548 handle->sync_read = 0; 2549 } 2550 handle->cur++; 2551 return PAGE_SIZE; 2552 } 2553 2554 /** 2555 * snapshot_write_finalize - must be called after the last call to 2556 * snapshot_write_next() in case the last page in the image happens 2557 * to be a highmem page and its contents should be stored in the 2558 * highmem. Additionally, it releases the memory that will not be 2559 * used any more. 2560 */ 2561 2562 void snapshot_write_finalize(struct snapshot_handle *handle) 2563 { 2564 copy_last_highmem_page(); 2565 /* Restore page key for data page (s390 only). */ 2566 page_key_write(handle->buffer); 2567 page_key_free(); 2568 /* Free only if we have loaded the image entirely */ 2569 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2570 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 2571 free_highmem_data(); 2572 } 2573 } 2574 2575 int snapshot_image_loaded(struct snapshot_handle *handle) 2576 { 2577 return !(!nr_copy_pages || !last_highmem_page_copied() || 2578 handle->cur <= nr_meta_pages + nr_copy_pages); 2579 } 2580 2581 #ifdef CONFIG_HIGHMEM 2582 /* Assumes that @buf is ready and points to a "safe" page */ 2583 static inline void 2584 swap_two_pages_data(struct page *p1, struct page *p2, void *buf) 2585 { 2586 void *kaddr1, *kaddr2; 2587 2588 kaddr1 = kmap_atomic(p1); 2589 kaddr2 = kmap_atomic(p2); 2590 copy_page(buf, kaddr1); 2591 copy_page(kaddr1, kaddr2); 2592 copy_page(kaddr2, buf); 2593 kunmap_atomic(kaddr2); 2594 kunmap_atomic(kaddr1); 2595 } 2596 2597 /** 2598 * restore_highmem - for each highmem page that was allocated before 2599 * the suspend and included in the suspend image, and also has been 2600 * allocated by the "resume" kernel swap its current (ie. "before 2601 * resume") contents with the previous (ie. "before suspend") one. 2602 * 2603 * If the resume eventually fails, we can call this function once 2604 * again and restore the "before resume" highmem state. 2605 */ 2606 2607 int restore_highmem(void) 2608 { 2609 struct highmem_pbe *pbe = highmem_pblist; 2610 void *buf; 2611 2612 if (!pbe) 2613 return 0; 2614 2615 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2616 if (!buf) 2617 return -ENOMEM; 2618 2619 while (pbe) { 2620 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2621 pbe = pbe->next; 2622 } 2623 free_image_page(buf, PG_UNSAFE_CLEAR); 2624 return 0; 2625 } 2626 #endif /* CONFIG_HIGHMEM */ 2627