1 /* 2 * linux/kernel/power/snapshot.c 3 * 4 * This file provides system snapshot/restore functionality for swsusp. 5 * 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * 9 * This file is released under the GPLv2. 10 * 11 */ 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/bootmem.h> 25 #include <linux/syscalls.h> 26 #include <linux/console.h> 27 #include <linux/highmem.h> 28 #include <linux/list.h> 29 #include <linux/slab.h> 30 #include <linux/compiler.h> 31 #include <linux/ktime.h> 32 33 #include <asm/uaccess.h> 34 #include <asm/mmu_context.h> 35 #include <asm/pgtable.h> 36 #include <asm/tlbflush.h> 37 #include <asm/io.h> 38 39 #include "power.h" 40 41 static int swsusp_page_is_free(struct page *); 42 static void swsusp_set_page_forbidden(struct page *); 43 static void swsusp_unset_page_forbidden(struct page *); 44 45 /* 46 * Number of bytes to reserve for memory allocations made by device drivers 47 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 48 * cause image creation to fail (tunable via /sys/power/reserved_size). 49 */ 50 unsigned long reserved_size; 51 52 void __init hibernate_reserved_size_init(void) 53 { 54 reserved_size = SPARE_PAGES * PAGE_SIZE; 55 } 56 57 /* 58 * Preferred image size in bytes (tunable via /sys/power/image_size). 59 * When it is set to N, swsusp will do its best to ensure the image 60 * size will not exceed N bytes, but if that is impossible, it will 61 * try to create the smallest image possible. 62 */ 63 unsigned long image_size; 64 65 void __init hibernate_image_size_init(void) 66 { 67 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; 68 } 69 70 /* List of PBEs needed for restoring the pages that were allocated before 71 * the suspend and included in the suspend image, but have also been 72 * allocated by the "resume" kernel, so their contents cannot be written 73 * directly to their "original" page frames. 74 */ 75 struct pbe *restore_pblist; 76 77 /* Pointer to an auxiliary buffer (1 page) */ 78 static void *buffer; 79 80 /** 81 * @safe_needed - on resume, for storing the PBE list and the image, 82 * we can only use memory pages that do not conflict with the pages 83 * used before suspend. The unsafe pages have PageNosaveFree set 84 * and we count them using unsafe_pages. 85 * 86 * Each allocated image page is marked as PageNosave and PageNosaveFree 87 * so that swsusp_free() can release it. 88 */ 89 90 #define PG_ANY 0 91 #define PG_SAFE 1 92 #define PG_UNSAFE_CLEAR 1 93 #define PG_UNSAFE_KEEP 0 94 95 static unsigned int allocated_unsafe_pages; 96 97 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 98 { 99 void *res; 100 101 res = (void *)get_zeroed_page(gfp_mask); 102 if (safe_needed) 103 while (res && swsusp_page_is_free(virt_to_page(res))) { 104 /* The page is unsafe, mark it for swsusp_free() */ 105 swsusp_set_page_forbidden(virt_to_page(res)); 106 allocated_unsafe_pages++; 107 res = (void *)get_zeroed_page(gfp_mask); 108 } 109 if (res) { 110 swsusp_set_page_forbidden(virt_to_page(res)); 111 swsusp_set_page_free(virt_to_page(res)); 112 } 113 return res; 114 } 115 116 unsigned long get_safe_page(gfp_t gfp_mask) 117 { 118 return (unsigned long)get_image_page(gfp_mask, PG_SAFE); 119 } 120 121 static struct page *alloc_image_page(gfp_t gfp_mask) 122 { 123 struct page *page; 124 125 page = alloc_page(gfp_mask); 126 if (page) { 127 swsusp_set_page_forbidden(page); 128 swsusp_set_page_free(page); 129 } 130 return page; 131 } 132 133 /** 134 * free_image_page - free page represented by @addr, allocated with 135 * get_image_page (page flags set by it must be cleared) 136 */ 137 138 static inline void free_image_page(void *addr, int clear_nosave_free) 139 { 140 struct page *page; 141 142 BUG_ON(!virt_addr_valid(addr)); 143 144 page = virt_to_page(addr); 145 146 swsusp_unset_page_forbidden(page); 147 if (clear_nosave_free) 148 swsusp_unset_page_free(page); 149 150 __free_page(page); 151 } 152 153 /* struct linked_page is used to build chains of pages */ 154 155 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 156 157 struct linked_page { 158 struct linked_page *next; 159 char data[LINKED_PAGE_DATA_SIZE]; 160 } __packed; 161 162 static inline void 163 free_list_of_pages(struct linked_page *list, int clear_page_nosave) 164 { 165 while (list) { 166 struct linked_page *lp = list->next; 167 168 free_image_page(list, clear_page_nosave); 169 list = lp; 170 } 171 } 172 173 /** 174 * struct chain_allocator is used for allocating small objects out of 175 * a linked list of pages called 'the chain'. 176 * 177 * The chain grows each time when there is no room for a new object in 178 * the current page. The allocated objects cannot be freed individually. 179 * It is only possible to free them all at once, by freeing the entire 180 * chain. 181 * 182 * NOTE: The chain allocator may be inefficient if the allocated objects 183 * are not much smaller than PAGE_SIZE. 184 */ 185 186 struct chain_allocator { 187 struct linked_page *chain; /* the chain */ 188 unsigned int used_space; /* total size of objects allocated out 189 * of the current page 190 */ 191 gfp_t gfp_mask; /* mask for allocating pages */ 192 int safe_needed; /* if set, only "safe" pages are allocated */ 193 }; 194 195 static void 196 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) 197 { 198 ca->chain = NULL; 199 ca->used_space = LINKED_PAGE_DATA_SIZE; 200 ca->gfp_mask = gfp_mask; 201 ca->safe_needed = safe_needed; 202 } 203 204 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 205 { 206 void *ret; 207 208 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 209 struct linked_page *lp; 210 211 lp = get_image_page(ca->gfp_mask, ca->safe_needed); 212 if (!lp) 213 return NULL; 214 215 lp->next = ca->chain; 216 ca->chain = lp; 217 ca->used_space = 0; 218 } 219 ret = ca->chain->data + ca->used_space; 220 ca->used_space += size; 221 return ret; 222 } 223 224 /** 225 * Data types related to memory bitmaps. 226 * 227 * Memory bitmap is a structure consiting of many linked lists of 228 * objects. The main list's elements are of type struct zone_bitmap 229 * and each of them corresonds to one zone. For each zone bitmap 230 * object there is a list of objects of type struct bm_block that 231 * represent each blocks of bitmap in which information is stored. 232 * 233 * struct memory_bitmap contains a pointer to the main list of zone 234 * bitmap objects, a struct bm_position used for browsing the bitmap, 235 * and a pointer to the list of pages used for allocating all of the 236 * zone bitmap objects and bitmap block objects. 237 * 238 * NOTE: It has to be possible to lay out the bitmap in memory 239 * using only allocations of order 0. Additionally, the bitmap is 240 * designed to work with arbitrary number of zones (this is over the 241 * top for now, but let's avoid making unnecessary assumptions ;-). 242 * 243 * struct zone_bitmap contains a pointer to a list of bitmap block 244 * objects and a pointer to the bitmap block object that has been 245 * most recently used for setting bits. Additionally, it contains the 246 * pfns that correspond to the start and end of the represented zone. 247 * 248 * struct bm_block contains a pointer to the memory page in which 249 * information is stored (in the form of a block of bitmap) 250 * It also contains the pfns that correspond to the start and end of 251 * the represented memory area. 252 * 253 * The memory bitmap is organized as a radix tree to guarantee fast random 254 * access to the bits. There is one radix tree for each zone (as returned 255 * from create_mem_extents). 256 * 257 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 258 * two linked lists for the nodes of the tree, one for the inner nodes and 259 * one for the leave nodes. The linked leave nodes are used for fast linear 260 * access of the memory bitmap. 261 * 262 * The struct rtree_node represents one node of the radix tree. 263 */ 264 265 #define BM_END_OF_MAP (~0UL) 266 267 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 268 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 269 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 270 271 /* 272 * struct rtree_node is a wrapper struct to link the nodes 273 * of the rtree together for easy linear iteration over 274 * bits and easy freeing 275 */ 276 struct rtree_node { 277 struct list_head list; 278 unsigned long *data; 279 }; 280 281 /* 282 * struct mem_zone_bm_rtree represents a bitmap used for one 283 * populated memory zone. 284 */ 285 struct mem_zone_bm_rtree { 286 struct list_head list; /* Link Zones together */ 287 struct list_head nodes; /* Radix Tree inner nodes */ 288 struct list_head leaves; /* Radix Tree leaves */ 289 unsigned long start_pfn; /* Zone start page frame */ 290 unsigned long end_pfn; /* Zone end page frame + 1 */ 291 struct rtree_node *rtree; /* Radix Tree Root */ 292 int levels; /* Number of Radix Tree Levels */ 293 unsigned int blocks; /* Number of Bitmap Blocks */ 294 }; 295 296 /* strcut bm_position is used for browsing memory bitmaps */ 297 298 struct bm_position { 299 struct mem_zone_bm_rtree *zone; 300 struct rtree_node *node; 301 unsigned long node_pfn; 302 int node_bit; 303 }; 304 305 struct memory_bitmap { 306 struct list_head zones; 307 struct linked_page *p_list; /* list of pages used to store zone 308 * bitmap objects and bitmap block 309 * objects 310 */ 311 struct bm_position cur; /* most recently used bit position */ 312 }; 313 314 /* Functions that operate on memory bitmaps */ 315 316 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 317 #if BITS_PER_LONG == 32 318 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 319 #else 320 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 321 #endif 322 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 323 324 /* 325 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 326 * 327 * This function is used to allocate inner nodes as well as the 328 * leave nodes of the radix tree. It also adds the node to the 329 * corresponding linked list passed in by the *list parameter. 330 */ 331 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 332 struct chain_allocator *ca, 333 struct list_head *list) 334 { 335 struct rtree_node *node; 336 337 node = chain_alloc(ca, sizeof(struct rtree_node)); 338 if (!node) 339 return NULL; 340 341 node->data = get_image_page(gfp_mask, safe_needed); 342 if (!node->data) 343 return NULL; 344 345 list_add_tail(&node->list, list); 346 347 return node; 348 } 349 350 /* 351 * add_rtree_block - Add a new leave node to the radix tree 352 * 353 * The leave nodes need to be allocated in order to keep the leaves 354 * linked list in order. This is guaranteed by the zone->blocks 355 * counter. 356 */ 357 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 358 int safe_needed, struct chain_allocator *ca) 359 { 360 struct rtree_node *node, *block, **dst; 361 unsigned int levels_needed, block_nr; 362 int i; 363 364 block_nr = zone->blocks; 365 levels_needed = 0; 366 367 /* How many levels do we need for this block nr? */ 368 while (block_nr) { 369 levels_needed += 1; 370 block_nr >>= BM_RTREE_LEVEL_SHIFT; 371 } 372 373 /* Make sure the rtree has enough levels */ 374 for (i = zone->levels; i < levels_needed; i++) { 375 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 376 &zone->nodes); 377 if (!node) 378 return -ENOMEM; 379 380 node->data[0] = (unsigned long)zone->rtree; 381 zone->rtree = node; 382 zone->levels += 1; 383 } 384 385 /* Allocate new block */ 386 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 387 if (!block) 388 return -ENOMEM; 389 390 /* Now walk the rtree to insert the block */ 391 node = zone->rtree; 392 dst = &zone->rtree; 393 block_nr = zone->blocks; 394 for (i = zone->levels; i > 0; i--) { 395 int index; 396 397 if (!node) { 398 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 399 &zone->nodes); 400 if (!node) 401 return -ENOMEM; 402 *dst = node; 403 } 404 405 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 406 index &= BM_RTREE_LEVEL_MASK; 407 dst = (struct rtree_node **)&((*dst)->data[index]); 408 node = *dst; 409 } 410 411 zone->blocks += 1; 412 *dst = block; 413 414 return 0; 415 } 416 417 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 418 int clear_nosave_free); 419 420 /* 421 * create_zone_bm_rtree - create a radix tree for one zone 422 * 423 * Allocated the mem_zone_bm_rtree structure and initializes it. 424 * This function also allocated and builds the radix tree for the 425 * zone. 426 */ 427 static struct mem_zone_bm_rtree * 428 create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, 429 struct chain_allocator *ca, 430 unsigned long start, unsigned long end) 431 { 432 struct mem_zone_bm_rtree *zone; 433 unsigned int i, nr_blocks; 434 unsigned long pages; 435 436 pages = end - start; 437 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 438 if (!zone) 439 return NULL; 440 441 INIT_LIST_HEAD(&zone->nodes); 442 INIT_LIST_HEAD(&zone->leaves); 443 zone->start_pfn = start; 444 zone->end_pfn = end; 445 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 446 447 for (i = 0; i < nr_blocks; i++) { 448 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 449 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 450 return NULL; 451 } 452 } 453 454 return zone; 455 } 456 457 /* 458 * free_zone_bm_rtree - Free the memory of the radix tree 459 * 460 * Free all node pages of the radix tree. The mem_zone_bm_rtree 461 * structure itself is not freed here nor are the rtree_node 462 * structs. 463 */ 464 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 465 int clear_nosave_free) 466 { 467 struct rtree_node *node; 468 469 list_for_each_entry(node, &zone->nodes, list) 470 free_image_page(node->data, clear_nosave_free); 471 472 list_for_each_entry(node, &zone->leaves, list) 473 free_image_page(node->data, clear_nosave_free); 474 } 475 476 static void memory_bm_position_reset(struct memory_bitmap *bm) 477 { 478 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 479 list); 480 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 481 struct rtree_node, list); 482 bm->cur.node_pfn = 0; 483 bm->cur.node_bit = 0; 484 } 485 486 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 487 488 struct mem_extent { 489 struct list_head hook; 490 unsigned long start; 491 unsigned long end; 492 }; 493 494 /** 495 * free_mem_extents - free a list of memory extents 496 * @list - list of extents to empty 497 */ 498 static void free_mem_extents(struct list_head *list) 499 { 500 struct mem_extent *ext, *aux; 501 502 list_for_each_entry_safe(ext, aux, list, hook) { 503 list_del(&ext->hook); 504 kfree(ext); 505 } 506 } 507 508 /** 509 * create_mem_extents - create a list of memory extents representing 510 * contiguous ranges of PFNs 511 * @list - list to put the extents into 512 * @gfp_mask - mask to use for memory allocations 513 */ 514 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 515 { 516 struct zone *zone; 517 518 INIT_LIST_HEAD(list); 519 520 for_each_populated_zone(zone) { 521 unsigned long zone_start, zone_end; 522 struct mem_extent *ext, *cur, *aux; 523 524 zone_start = zone->zone_start_pfn; 525 zone_end = zone_end_pfn(zone); 526 527 list_for_each_entry(ext, list, hook) 528 if (zone_start <= ext->end) 529 break; 530 531 if (&ext->hook == list || zone_end < ext->start) { 532 /* New extent is necessary */ 533 struct mem_extent *new_ext; 534 535 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 536 if (!new_ext) { 537 free_mem_extents(list); 538 return -ENOMEM; 539 } 540 new_ext->start = zone_start; 541 new_ext->end = zone_end; 542 list_add_tail(&new_ext->hook, &ext->hook); 543 continue; 544 } 545 546 /* Merge this zone's range of PFNs with the existing one */ 547 if (zone_start < ext->start) 548 ext->start = zone_start; 549 if (zone_end > ext->end) 550 ext->end = zone_end; 551 552 /* More merging may be possible */ 553 cur = ext; 554 list_for_each_entry_safe_continue(cur, aux, list, hook) { 555 if (zone_end < cur->start) 556 break; 557 if (zone_end < cur->end) 558 ext->end = cur->end; 559 list_del(&cur->hook); 560 kfree(cur); 561 } 562 } 563 564 return 0; 565 } 566 567 /** 568 * memory_bm_create - allocate memory for a memory bitmap 569 */ 570 static int 571 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 572 { 573 struct chain_allocator ca; 574 struct list_head mem_extents; 575 struct mem_extent *ext; 576 int error; 577 578 chain_init(&ca, gfp_mask, safe_needed); 579 INIT_LIST_HEAD(&bm->zones); 580 581 error = create_mem_extents(&mem_extents, gfp_mask); 582 if (error) 583 return error; 584 585 list_for_each_entry(ext, &mem_extents, hook) { 586 struct mem_zone_bm_rtree *zone; 587 588 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 589 ext->start, ext->end); 590 if (!zone) { 591 error = -ENOMEM; 592 goto Error; 593 } 594 list_add_tail(&zone->list, &bm->zones); 595 } 596 597 bm->p_list = ca.chain; 598 memory_bm_position_reset(bm); 599 Exit: 600 free_mem_extents(&mem_extents); 601 return error; 602 603 Error: 604 bm->p_list = ca.chain; 605 memory_bm_free(bm, PG_UNSAFE_CLEAR); 606 goto Exit; 607 } 608 609 /** 610 * memory_bm_free - free memory occupied by the memory bitmap @bm 611 */ 612 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 613 { 614 struct mem_zone_bm_rtree *zone; 615 616 list_for_each_entry(zone, &bm->zones, list) 617 free_zone_bm_rtree(zone, clear_nosave_free); 618 619 free_list_of_pages(bm->p_list, clear_nosave_free); 620 621 INIT_LIST_HEAD(&bm->zones); 622 } 623 624 /** 625 * memory_bm_find_bit - Find the bit for pfn in the memory 626 * bitmap 627 * 628 * Find the bit in the bitmap @bm that corresponds to given pfn. 629 * The cur.zone, cur.block and cur.node_pfn member of @bm are 630 * updated. 631 * It walks the radix tree to find the page which contains the bit for 632 * pfn and returns the bit position in **addr and *bit_nr. 633 */ 634 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 635 void **addr, unsigned int *bit_nr) 636 { 637 struct mem_zone_bm_rtree *curr, *zone; 638 struct rtree_node *node; 639 int i, block_nr; 640 641 zone = bm->cur.zone; 642 643 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 644 goto zone_found; 645 646 zone = NULL; 647 648 /* Find the right zone */ 649 list_for_each_entry(curr, &bm->zones, list) { 650 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 651 zone = curr; 652 break; 653 } 654 } 655 656 if (!zone) 657 return -EFAULT; 658 659 zone_found: 660 /* 661 * We have a zone. Now walk the radix tree to find the leave 662 * node for our pfn. 663 */ 664 665 node = bm->cur.node; 666 if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 667 goto node_found; 668 669 node = zone->rtree; 670 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 671 672 for (i = zone->levels; i > 0; i--) { 673 int index; 674 675 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 676 index &= BM_RTREE_LEVEL_MASK; 677 BUG_ON(node->data[index] == 0); 678 node = (struct rtree_node *)node->data[index]; 679 } 680 681 node_found: 682 /* Update last position */ 683 bm->cur.zone = zone; 684 bm->cur.node = node; 685 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 686 687 /* Set return values */ 688 *addr = node->data; 689 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 690 691 return 0; 692 } 693 694 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 695 { 696 void *addr; 697 unsigned int bit; 698 int error; 699 700 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 701 BUG_ON(error); 702 set_bit(bit, addr); 703 } 704 705 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 706 { 707 void *addr; 708 unsigned int bit; 709 int error; 710 711 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 712 if (!error) 713 set_bit(bit, addr); 714 715 return error; 716 } 717 718 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 719 { 720 void *addr; 721 unsigned int bit; 722 int error; 723 724 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 725 BUG_ON(error); 726 clear_bit(bit, addr); 727 } 728 729 static void memory_bm_clear_current(struct memory_bitmap *bm) 730 { 731 int bit; 732 733 bit = max(bm->cur.node_bit - 1, 0); 734 clear_bit(bit, bm->cur.node->data); 735 } 736 737 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 738 { 739 void *addr; 740 unsigned int bit; 741 int error; 742 743 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 744 BUG_ON(error); 745 return test_bit(bit, addr); 746 } 747 748 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 749 { 750 void *addr; 751 unsigned int bit; 752 753 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 754 } 755 756 /* 757 * rtree_next_node - Jumps to the next leave node 758 * 759 * Sets the position to the beginning of the next node in the 760 * memory bitmap. This is either the next node in the current 761 * zone's radix tree or the first node in the radix tree of the 762 * next zone. 763 * 764 * Returns true if there is a next node, false otherwise. 765 */ 766 static bool rtree_next_node(struct memory_bitmap *bm) 767 { 768 bm->cur.node = list_entry(bm->cur.node->list.next, 769 struct rtree_node, list); 770 if (&bm->cur.node->list != &bm->cur.zone->leaves) { 771 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 772 bm->cur.node_bit = 0; 773 touch_softlockup_watchdog(); 774 return true; 775 } 776 777 /* No more nodes, goto next zone */ 778 bm->cur.zone = list_entry(bm->cur.zone->list.next, 779 struct mem_zone_bm_rtree, list); 780 if (&bm->cur.zone->list != &bm->zones) { 781 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 782 struct rtree_node, list); 783 bm->cur.node_pfn = 0; 784 bm->cur.node_bit = 0; 785 return true; 786 } 787 788 /* No more zones */ 789 return false; 790 } 791 792 /** 793 * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm 794 * 795 * Starting from the last returned position this function searches 796 * for the next set bit in the memory bitmap and returns its 797 * number. If no more bit is set BM_END_OF_MAP is returned. 798 * 799 * It is required to run memory_bm_position_reset() before the 800 * first call to this function. 801 */ 802 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 803 { 804 unsigned long bits, pfn, pages; 805 int bit; 806 807 do { 808 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 809 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 810 bit = find_next_bit(bm->cur.node->data, bits, 811 bm->cur.node_bit); 812 if (bit < bits) { 813 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 814 bm->cur.node_bit = bit + 1; 815 return pfn; 816 } 817 } while (rtree_next_node(bm)); 818 819 return BM_END_OF_MAP; 820 } 821 822 /** 823 * This structure represents a range of page frames the contents of which 824 * should not be saved during the suspend. 825 */ 826 827 struct nosave_region { 828 struct list_head list; 829 unsigned long start_pfn; 830 unsigned long end_pfn; 831 }; 832 833 static LIST_HEAD(nosave_regions); 834 835 /** 836 * register_nosave_region - register a range of page frames the contents 837 * of which should not be saved during the suspend (to be used in the early 838 * initialization code) 839 */ 840 841 void __init 842 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, 843 int use_kmalloc) 844 { 845 struct nosave_region *region; 846 847 if (start_pfn >= end_pfn) 848 return; 849 850 if (!list_empty(&nosave_regions)) { 851 /* Try to extend the previous region (they should be sorted) */ 852 region = list_entry(nosave_regions.prev, 853 struct nosave_region, list); 854 if (region->end_pfn == start_pfn) { 855 region->end_pfn = end_pfn; 856 goto Report; 857 } 858 } 859 if (use_kmalloc) { 860 /* during init, this shouldn't fail */ 861 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 862 BUG_ON(!region); 863 } else 864 /* This allocation cannot fail */ 865 region = memblock_virt_alloc(sizeof(struct nosave_region), 0); 866 region->start_pfn = start_pfn; 867 region->end_pfn = end_pfn; 868 list_add_tail(®ion->list, &nosave_regions); 869 Report: 870 printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n", 871 (unsigned long long) start_pfn << PAGE_SHIFT, 872 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 873 } 874 875 /* 876 * Set bits in this map correspond to the page frames the contents of which 877 * should not be saved during the suspend. 878 */ 879 static struct memory_bitmap *forbidden_pages_map; 880 881 /* Set bits in this map correspond to free page frames. */ 882 static struct memory_bitmap *free_pages_map; 883 884 /* 885 * Each page frame allocated for creating the image is marked by setting the 886 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 887 */ 888 889 void swsusp_set_page_free(struct page *page) 890 { 891 if (free_pages_map) 892 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 893 } 894 895 static int swsusp_page_is_free(struct page *page) 896 { 897 return free_pages_map ? 898 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 899 } 900 901 void swsusp_unset_page_free(struct page *page) 902 { 903 if (free_pages_map) 904 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 905 } 906 907 static void swsusp_set_page_forbidden(struct page *page) 908 { 909 if (forbidden_pages_map) 910 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 911 } 912 913 int swsusp_page_is_forbidden(struct page *page) 914 { 915 return forbidden_pages_map ? 916 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 917 } 918 919 static void swsusp_unset_page_forbidden(struct page *page) 920 { 921 if (forbidden_pages_map) 922 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 923 } 924 925 /** 926 * mark_nosave_pages - set bits corresponding to the page frames the 927 * contents of which should not be saved in a given bitmap. 928 */ 929 930 static void mark_nosave_pages(struct memory_bitmap *bm) 931 { 932 struct nosave_region *region; 933 934 if (list_empty(&nosave_regions)) 935 return; 936 937 list_for_each_entry(region, &nosave_regions, list) { 938 unsigned long pfn; 939 940 pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", 941 (unsigned long long) region->start_pfn << PAGE_SHIFT, 942 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 943 - 1); 944 945 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 946 if (pfn_valid(pfn)) { 947 /* 948 * It is safe to ignore the result of 949 * mem_bm_set_bit_check() here, since we won't 950 * touch the PFNs for which the error is 951 * returned anyway. 952 */ 953 mem_bm_set_bit_check(bm, pfn); 954 } 955 } 956 } 957 958 /** 959 * create_basic_memory_bitmaps - create bitmaps needed for marking page 960 * frames that should not be saved and free page frames. The pointers 961 * forbidden_pages_map and free_pages_map are only modified if everything 962 * goes well, because we don't want the bits to be used before both bitmaps 963 * are set up. 964 */ 965 966 int create_basic_memory_bitmaps(void) 967 { 968 struct memory_bitmap *bm1, *bm2; 969 int error = 0; 970 971 if (forbidden_pages_map && free_pages_map) 972 return 0; 973 else 974 BUG_ON(forbidden_pages_map || free_pages_map); 975 976 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 977 if (!bm1) 978 return -ENOMEM; 979 980 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 981 if (error) 982 goto Free_first_object; 983 984 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 985 if (!bm2) 986 goto Free_first_bitmap; 987 988 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 989 if (error) 990 goto Free_second_object; 991 992 forbidden_pages_map = bm1; 993 free_pages_map = bm2; 994 mark_nosave_pages(forbidden_pages_map); 995 996 pr_debug("PM: Basic memory bitmaps created\n"); 997 998 return 0; 999 1000 Free_second_object: 1001 kfree(bm2); 1002 Free_first_bitmap: 1003 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1004 Free_first_object: 1005 kfree(bm1); 1006 return -ENOMEM; 1007 } 1008 1009 /** 1010 * free_basic_memory_bitmaps - free memory bitmaps allocated by 1011 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary 1012 * so that the bitmaps themselves are not referred to while they are being 1013 * freed. 1014 */ 1015 1016 void free_basic_memory_bitmaps(void) 1017 { 1018 struct memory_bitmap *bm1, *bm2; 1019 1020 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1021 return; 1022 1023 bm1 = forbidden_pages_map; 1024 bm2 = free_pages_map; 1025 forbidden_pages_map = NULL; 1026 free_pages_map = NULL; 1027 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1028 kfree(bm1); 1029 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1030 kfree(bm2); 1031 1032 pr_debug("PM: Basic memory bitmaps freed\n"); 1033 } 1034 1035 /** 1036 * snapshot_additional_pages - estimate the number of additional pages 1037 * be needed for setting up the suspend image data structures for given 1038 * zone (usually the returned value is greater than the exact number) 1039 */ 1040 1041 unsigned int snapshot_additional_pages(struct zone *zone) 1042 { 1043 unsigned int rtree, nodes; 1044 1045 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1046 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1047 LINKED_PAGE_DATA_SIZE); 1048 while (nodes > 1) { 1049 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1050 rtree += nodes; 1051 } 1052 1053 return 2 * rtree; 1054 } 1055 1056 #ifdef CONFIG_HIGHMEM 1057 /** 1058 * count_free_highmem_pages - compute the total number of free highmem 1059 * pages, system-wide. 1060 */ 1061 1062 static unsigned int count_free_highmem_pages(void) 1063 { 1064 struct zone *zone; 1065 unsigned int cnt = 0; 1066 1067 for_each_populated_zone(zone) 1068 if (is_highmem(zone)) 1069 cnt += zone_page_state(zone, NR_FREE_PAGES); 1070 1071 return cnt; 1072 } 1073 1074 /** 1075 * saveable_highmem_page - Determine whether a highmem page should be 1076 * included in the suspend image. 1077 * 1078 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1079 * and it isn't a part of a free chunk of pages. 1080 */ 1081 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1082 { 1083 struct page *page; 1084 1085 if (!pfn_valid(pfn)) 1086 return NULL; 1087 1088 page = pfn_to_page(pfn); 1089 if (page_zone(page) != zone) 1090 return NULL; 1091 1092 BUG_ON(!PageHighMem(page)); 1093 1094 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || 1095 PageReserved(page)) 1096 return NULL; 1097 1098 if (page_is_guard(page)) 1099 return NULL; 1100 1101 return page; 1102 } 1103 1104 /** 1105 * count_highmem_pages - compute the total number of saveable highmem 1106 * pages. 1107 */ 1108 1109 static unsigned int count_highmem_pages(void) 1110 { 1111 struct zone *zone; 1112 unsigned int n = 0; 1113 1114 for_each_populated_zone(zone) { 1115 unsigned long pfn, max_zone_pfn; 1116 1117 if (!is_highmem(zone)) 1118 continue; 1119 1120 mark_free_pages(zone); 1121 max_zone_pfn = zone_end_pfn(zone); 1122 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1123 if (saveable_highmem_page(zone, pfn)) 1124 n++; 1125 } 1126 return n; 1127 } 1128 #else 1129 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1130 { 1131 return NULL; 1132 } 1133 #endif /* CONFIG_HIGHMEM */ 1134 1135 /** 1136 * saveable_page - Determine whether a non-highmem page should be included 1137 * in the suspend image. 1138 * 1139 * We should save the page if it isn't Nosave, and is not in the range 1140 * of pages statically defined as 'unsaveable', and it isn't a part of 1141 * a free chunk of pages. 1142 */ 1143 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1144 { 1145 struct page *page; 1146 1147 if (!pfn_valid(pfn)) 1148 return NULL; 1149 1150 page = pfn_to_page(pfn); 1151 if (page_zone(page) != zone) 1152 return NULL; 1153 1154 BUG_ON(PageHighMem(page)); 1155 1156 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1157 return NULL; 1158 1159 if (PageReserved(page) 1160 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1161 return NULL; 1162 1163 if (page_is_guard(page)) 1164 return NULL; 1165 1166 return page; 1167 } 1168 1169 /** 1170 * count_data_pages - compute the total number of saveable non-highmem 1171 * pages. 1172 */ 1173 1174 static unsigned int count_data_pages(void) 1175 { 1176 struct zone *zone; 1177 unsigned long pfn, max_zone_pfn; 1178 unsigned int n = 0; 1179 1180 for_each_populated_zone(zone) { 1181 if (is_highmem(zone)) 1182 continue; 1183 1184 mark_free_pages(zone); 1185 max_zone_pfn = zone_end_pfn(zone); 1186 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1187 if (saveable_page(zone, pfn)) 1188 n++; 1189 } 1190 return n; 1191 } 1192 1193 /* This is needed, because copy_page and memcpy are not usable for copying 1194 * task structs. 1195 */ 1196 static inline void do_copy_page(long *dst, long *src) 1197 { 1198 int n; 1199 1200 for (n = PAGE_SIZE / sizeof(long); n; n--) 1201 *dst++ = *src++; 1202 } 1203 1204 1205 /** 1206 * safe_copy_page - check if the page we are going to copy is marked as 1207 * present in the kernel page tables (this always is the case if 1208 * CONFIG_DEBUG_PAGEALLOC is not set and in that case 1209 * kernel_page_present() always returns 'true'). 1210 */ 1211 static void safe_copy_page(void *dst, struct page *s_page) 1212 { 1213 if (kernel_page_present(s_page)) { 1214 do_copy_page(dst, page_address(s_page)); 1215 } else { 1216 kernel_map_pages(s_page, 1, 1); 1217 do_copy_page(dst, page_address(s_page)); 1218 kernel_map_pages(s_page, 1, 0); 1219 } 1220 } 1221 1222 1223 #ifdef CONFIG_HIGHMEM 1224 static inline struct page * 1225 page_is_saveable(struct zone *zone, unsigned long pfn) 1226 { 1227 return is_highmem(zone) ? 1228 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1229 } 1230 1231 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1232 { 1233 struct page *s_page, *d_page; 1234 void *src, *dst; 1235 1236 s_page = pfn_to_page(src_pfn); 1237 d_page = pfn_to_page(dst_pfn); 1238 if (PageHighMem(s_page)) { 1239 src = kmap_atomic(s_page); 1240 dst = kmap_atomic(d_page); 1241 do_copy_page(dst, src); 1242 kunmap_atomic(dst); 1243 kunmap_atomic(src); 1244 } else { 1245 if (PageHighMem(d_page)) { 1246 /* Page pointed to by src may contain some kernel 1247 * data modified by kmap_atomic() 1248 */ 1249 safe_copy_page(buffer, s_page); 1250 dst = kmap_atomic(d_page); 1251 copy_page(dst, buffer); 1252 kunmap_atomic(dst); 1253 } else { 1254 safe_copy_page(page_address(d_page), s_page); 1255 } 1256 } 1257 } 1258 #else 1259 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1260 1261 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1262 { 1263 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1264 pfn_to_page(src_pfn)); 1265 } 1266 #endif /* CONFIG_HIGHMEM */ 1267 1268 static void 1269 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) 1270 { 1271 struct zone *zone; 1272 unsigned long pfn; 1273 1274 for_each_populated_zone(zone) { 1275 unsigned long max_zone_pfn; 1276 1277 mark_free_pages(zone); 1278 max_zone_pfn = zone_end_pfn(zone); 1279 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1280 if (page_is_saveable(zone, pfn)) 1281 memory_bm_set_bit(orig_bm, pfn); 1282 } 1283 memory_bm_position_reset(orig_bm); 1284 memory_bm_position_reset(copy_bm); 1285 for(;;) { 1286 pfn = memory_bm_next_pfn(orig_bm); 1287 if (unlikely(pfn == BM_END_OF_MAP)) 1288 break; 1289 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1290 } 1291 } 1292 1293 /* Total number of image pages */ 1294 static unsigned int nr_copy_pages; 1295 /* Number of pages needed for saving the original pfns of the image pages */ 1296 static unsigned int nr_meta_pages; 1297 /* 1298 * Numbers of normal and highmem page frames allocated for hibernation image 1299 * before suspending devices. 1300 */ 1301 unsigned int alloc_normal, alloc_highmem; 1302 /* 1303 * Memory bitmap used for marking saveable pages (during hibernation) or 1304 * hibernation image pages (during restore) 1305 */ 1306 static struct memory_bitmap orig_bm; 1307 /* 1308 * Memory bitmap used during hibernation for marking allocated page frames that 1309 * will contain copies of saveable pages. During restore it is initially used 1310 * for marking hibernation image pages, but then the set bits from it are 1311 * duplicated in @orig_bm and it is released. On highmem systems it is next 1312 * used for marking "safe" highmem pages, but it has to be reinitialized for 1313 * this purpose. 1314 */ 1315 static struct memory_bitmap copy_bm; 1316 1317 /** 1318 * swsusp_free - free pages allocated for the suspend. 1319 * 1320 * Suspend pages are alocated before the atomic copy is made, so we 1321 * need to release them after the resume. 1322 */ 1323 1324 void swsusp_free(void) 1325 { 1326 unsigned long fb_pfn, fr_pfn; 1327 1328 if (!forbidden_pages_map || !free_pages_map) 1329 goto out; 1330 1331 memory_bm_position_reset(forbidden_pages_map); 1332 memory_bm_position_reset(free_pages_map); 1333 1334 loop: 1335 fr_pfn = memory_bm_next_pfn(free_pages_map); 1336 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1337 1338 /* 1339 * Find the next bit set in both bitmaps. This is guaranteed to 1340 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1341 */ 1342 do { 1343 if (fb_pfn < fr_pfn) 1344 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1345 if (fr_pfn < fb_pfn) 1346 fr_pfn = memory_bm_next_pfn(free_pages_map); 1347 } while (fb_pfn != fr_pfn); 1348 1349 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1350 struct page *page = pfn_to_page(fr_pfn); 1351 1352 memory_bm_clear_current(forbidden_pages_map); 1353 memory_bm_clear_current(free_pages_map); 1354 __free_page(page); 1355 goto loop; 1356 } 1357 1358 out: 1359 nr_copy_pages = 0; 1360 nr_meta_pages = 0; 1361 restore_pblist = NULL; 1362 buffer = NULL; 1363 alloc_normal = 0; 1364 alloc_highmem = 0; 1365 } 1366 1367 /* Helper functions used for the shrinking of memory. */ 1368 1369 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1370 1371 /** 1372 * preallocate_image_pages - Allocate a number of pages for hibernation image 1373 * @nr_pages: Number of page frames to allocate. 1374 * @mask: GFP flags to use for the allocation. 1375 * 1376 * Return value: Number of page frames actually allocated 1377 */ 1378 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1379 { 1380 unsigned long nr_alloc = 0; 1381 1382 while (nr_pages > 0) { 1383 struct page *page; 1384 1385 page = alloc_image_page(mask); 1386 if (!page) 1387 break; 1388 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1389 if (PageHighMem(page)) 1390 alloc_highmem++; 1391 else 1392 alloc_normal++; 1393 nr_pages--; 1394 nr_alloc++; 1395 } 1396 1397 return nr_alloc; 1398 } 1399 1400 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1401 unsigned long avail_normal) 1402 { 1403 unsigned long alloc; 1404 1405 if (avail_normal <= alloc_normal) 1406 return 0; 1407 1408 alloc = avail_normal - alloc_normal; 1409 if (nr_pages < alloc) 1410 alloc = nr_pages; 1411 1412 return preallocate_image_pages(alloc, GFP_IMAGE); 1413 } 1414 1415 #ifdef CONFIG_HIGHMEM 1416 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1417 { 1418 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1419 } 1420 1421 /** 1422 * __fraction - Compute (an approximation of) x * (multiplier / base) 1423 */ 1424 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1425 { 1426 x *= multiplier; 1427 do_div(x, base); 1428 return (unsigned long)x; 1429 } 1430 1431 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1432 unsigned long highmem, 1433 unsigned long total) 1434 { 1435 unsigned long alloc = __fraction(nr_pages, highmem, total); 1436 1437 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1438 } 1439 #else /* CONFIG_HIGHMEM */ 1440 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1441 { 1442 return 0; 1443 } 1444 1445 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1446 unsigned long highmem, 1447 unsigned long total) 1448 { 1449 return 0; 1450 } 1451 #endif /* CONFIG_HIGHMEM */ 1452 1453 /** 1454 * free_unnecessary_pages - Release preallocated pages not needed for the image 1455 */ 1456 static unsigned long free_unnecessary_pages(void) 1457 { 1458 unsigned long save, to_free_normal, to_free_highmem, free; 1459 1460 save = count_data_pages(); 1461 if (alloc_normal >= save) { 1462 to_free_normal = alloc_normal - save; 1463 save = 0; 1464 } else { 1465 to_free_normal = 0; 1466 save -= alloc_normal; 1467 } 1468 save += count_highmem_pages(); 1469 if (alloc_highmem >= save) { 1470 to_free_highmem = alloc_highmem - save; 1471 } else { 1472 to_free_highmem = 0; 1473 save -= alloc_highmem; 1474 if (to_free_normal > save) 1475 to_free_normal -= save; 1476 else 1477 to_free_normal = 0; 1478 } 1479 free = to_free_normal + to_free_highmem; 1480 1481 memory_bm_position_reset(©_bm); 1482 1483 while (to_free_normal > 0 || to_free_highmem > 0) { 1484 unsigned long pfn = memory_bm_next_pfn(©_bm); 1485 struct page *page = pfn_to_page(pfn); 1486 1487 if (PageHighMem(page)) { 1488 if (!to_free_highmem) 1489 continue; 1490 to_free_highmem--; 1491 alloc_highmem--; 1492 } else { 1493 if (!to_free_normal) 1494 continue; 1495 to_free_normal--; 1496 alloc_normal--; 1497 } 1498 memory_bm_clear_bit(©_bm, pfn); 1499 swsusp_unset_page_forbidden(page); 1500 swsusp_unset_page_free(page); 1501 __free_page(page); 1502 } 1503 1504 return free; 1505 } 1506 1507 /** 1508 * minimum_image_size - Estimate the minimum acceptable size of an image 1509 * @saveable: Number of saveable pages in the system. 1510 * 1511 * We want to avoid attempting to free too much memory too hard, so estimate the 1512 * minimum acceptable size of a hibernation image to use as the lower limit for 1513 * preallocating memory. 1514 * 1515 * We assume that the minimum image size should be proportional to 1516 * 1517 * [number of saveable pages] - [number of pages that can be freed in theory] 1518 * 1519 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1520 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages, 1521 * minus mapped file pages. 1522 */ 1523 static unsigned long minimum_image_size(unsigned long saveable) 1524 { 1525 unsigned long size; 1526 1527 size = global_page_state(NR_SLAB_RECLAIMABLE) 1528 + global_page_state(NR_ACTIVE_ANON) 1529 + global_page_state(NR_INACTIVE_ANON) 1530 + global_page_state(NR_ACTIVE_FILE) 1531 + global_page_state(NR_INACTIVE_FILE) 1532 - global_page_state(NR_FILE_MAPPED); 1533 1534 return saveable <= size ? 0 : saveable - size; 1535 } 1536 1537 /** 1538 * hibernate_preallocate_memory - Preallocate memory for hibernation image 1539 * 1540 * To create a hibernation image it is necessary to make a copy of every page 1541 * frame in use. We also need a number of page frames to be free during 1542 * hibernation for allocations made while saving the image and for device 1543 * drivers, in case they need to allocate memory from their hibernation 1544 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1545 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through 1546 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1547 * total number of available page frames and allocate at least 1548 * 1549 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1550 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1551 * 1552 * of them, which corresponds to the maximum size of a hibernation image. 1553 * 1554 * If image_size is set below the number following from the above formula, 1555 * the preallocation of memory is continued until the total number of saveable 1556 * pages in the system is below the requested image size or the minimum 1557 * acceptable image size returned by minimum_image_size(), whichever is greater. 1558 */ 1559 int hibernate_preallocate_memory(void) 1560 { 1561 struct zone *zone; 1562 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1563 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1564 ktime_t start, stop; 1565 int error; 1566 1567 printk(KERN_INFO "PM: Preallocating image memory... "); 1568 start = ktime_get(); 1569 1570 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1571 if (error) 1572 goto err_out; 1573 1574 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1575 if (error) 1576 goto err_out; 1577 1578 alloc_normal = 0; 1579 alloc_highmem = 0; 1580 1581 /* Count the number of saveable data pages. */ 1582 save_highmem = count_highmem_pages(); 1583 saveable = count_data_pages(); 1584 1585 /* 1586 * Compute the total number of page frames we can use (count) and the 1587 * number of pages needed for image metadata (size). 1588 */ 1589 count = saveable; 1590 saveable += save_highmem; 1591 highmem = save_highmem; 1592 size = 0; 1593 for_each_populated_zone(zone) { 1594 size += snapshot_additional_pages(zone); 1595 if (is_highmem(zone)) 1596 highmem += zone_page_state(zone, NR_FREE_PAGES); 1597 else 1598 count += zone_page_state(zone, NR_FREE_PAGES); 1599 } 1600 avail_normal = count; 1601 count += highmem; 1602 count -= totalreserve_pages; 1603 1604 /* Add number of pages required for page keys (s390 only). */ 1605 size += page_key_additional_pages(saveable); 1606 1607 /* Compute the maximum number of saveable pages to leave in memory. */ 1608 max_size = (count - (size + PAGES_FOR_IO)) / 2 1609 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1610 /* Compute the desired number of image pages specified by image_size. */ 1611 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1612 if (size > max_size) 1613 size = max_size; 1614 /* 1615 * If the desired number of image pages is at least as large as the 1616 * current number of saveable pages in memory, allocate page frames for 1617 * the image and we're done. 1618 */ 1619 if (size >= saveable) { 1620 pages = preallocate_image_highmem(save_highmem); 1621 pages += preallocate_image_memory(saveable - pages, avail_normal); 1622 goto out; 1623 } 1624 1625 /* Estimate the minimum size of the image. */ 1626 pages = minimum_image_size(saveable); 1627 /* 1628 * To avoid excessive pressure on the normal zone, leave room in it to 1629 * accommodate an image of the minimum size (unless it's already too 1630 * small, in which case don't preallocate pages from it at all). 1631 */ 1632 if (avail_normal > pages) 1633 avail_normal -= pages; 1634 else 1635 avail_normal = 0; 1636 if (size < pages) 1637 size = min_t(unsigned long, pages, max_size); 1638 1639 /* 1640 * Let the memory management subsystem know that we're going to need a 1641 * large number of page frames to allocate and make it free some memory. 1642 * NOTE: If this is not done, performance will be hurt badly in some 1643 * test cases. 1644 */ 1645 shrink_all_memory(saveable - size); 1646 1647 /* 1648 * The number of saveable pages in memory was too high, so apply some 1649 * pressure to decrease it. First, make room for the largest possible 1650 * image and fail if that doesn't work. Next, try to decrease the size 1651 * of the image as much as indicated by 'size' using allocations from 1652 * highmem and non-highmem zones separately. 1653 */ 1654 pages_highmem = preallocate_image_highmem(highmem / 2); 1655 alloc = count - max_size; 1656 if (alloc > pages_highmem) 1657 alloc -= pages_highmem; 1658 else 1659 alloc = 0; 1660 pages = preallocate_image_memory(alloc, avail_normal); 1661 if (pages < alloc) { 1662 /* We have exhausted non-highmem pages, try highmem. */ 1663 alloc -= pages; 1664 pages += pages_highmem; 1665 pages_highmem = preallocate_image_highmem(alloc); 1666 if (pages_highmem < alloc) 1667 goto err_out; 1668 pages += pages_highmem; 1669 /* 1670 * size is the desired number of saveable pages to leave in 1671 * memory, so try to preallocate (all memory - size) pages. 1672 */ 1673 alloc = (count - pages) - size; 1674 pages += preallocate_image_highmem(alloc); 1675 } else { 1676 /* 1677 * There are approximately max_size saveable pages at this point 1678 * and we want to reduce this number down to size. 1679 */ 1680 alloc = max_size - size; 1681 size = preallocate_highmem_fraction(alloc, highmem, count); 1682 pages_highmem += size; 1683 alloc -= size; 1684 size = preallocate_image_memory(alloc, avail_normal); 1685 pages_highmem += preallocate_image_highmem(alloc - size); 1686 pages += pages_highmem + size; 1687 } 1688 1689 /* 1690 * We only need as many page frames for the image as there are saveable 1691 * pages in memory, but we have allocated more. Release the excessive 1692 * ones now. 1693 */ 1694 pages -= free_unnecessary_pages(); 1695 1696 out: 1697 stop = ktime_get(); 1698 printk(KERN_CONT "done (allocated %lu pages)\n", pages); 1699 swsusp_show_speed(start, stop, pages, "Allocated"); 1700 1701 return 0; 1702 1703 err_out: 1704 printk(KERN_CONT "\n"); 1705 swsusp_free(); 1706 return -ENOMEM; 1707 } 1708 1709 #ifdef CONFIG_HIGHMEM 1710 /** 1711 * count_pages_for_highmem - compute the number of non-highmem pages 1712 * that will be necessary for creating copies of highmem pages. 1713 */ 1714 1715 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1716 { 1717 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1718 1719 if (free_highmem >= nr_highmem) 1720 nr_highmem = 0; 1721 else 1722 nr_highmem -= free_highmem; 1723 1724 return nr_highmem; 1725 } 1726 #else 1727 static unsigned int 1728 count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1729 #endif /* CONFIG_HIGHMEM */ 1730 1731 /** 1732 * enough_free_mem - Make sure we have enough free memory for the 1733 * snapshot image. 1734 */ 1735 1736 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1737 { 1738 struct zone *zone; 1739 unsigned int free = alloc_normal; 1740 1741 for_each_populated_zone(zone) 1742 if (!is_highmem(zone)) 1743 free += zone_page_state(zone, NR_FREE_PAGES); 1744 1745 nr_pages += count_pages_for_highmem(nr_highmem); 1746 pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", 1747 nr_pages, PAGES_FOR_IO, free); 1748 1749 return free > nr_pages + PAGES_FOR_IO; 1750 } 1751 1752 #ifdef CONFIG_HIGHMEM 1753 /** 1754 * get_highmem_buffer - if there are some highmem pages in the suspend 1755 * image, we may need the buffer to copy them and/or load their data. 1756 */ 1757 1758 static inline int get_highmem_buffer(int safe_needed) 1759 { 1760 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); 1761 return buffer ? 0 : -ENOMEM; 1762 } 1763 1764 /** 1765 * alloc_highmem_image_pages - allocate some highmem pages for the image. 1766 * Try to allocate as many pages as needed, but if the number of free 1767 * highmem pages is lesser than that, allocate them all. 1768 */ 1769 1770 static inline unsigned int 1771 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) 1772 { 1773 unsigned int to_alloc = count_free_highmem_pages(); 1774 1775 if (to_alloc > nr_highmem) 1776 to_alloc = nr_highmem; 1777 1778 nr_highmem -= to_alloc; 1779 while (to_alloc-- > 0) { 1780 struct page *page; 1781 1782 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); 1783 memory_bm_set_bit(bm, page_to_pfn(page)); 1784 } 1785 return nr_highmem; 1786 } 1787 #else 1788 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1789 1790 static inline unsigned int 1791 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } 1792 #endif /* CONFIG_HIGHMEM */ 1793 1794 /** 1795 * swsusp_alloc - allocate memory for the suspend image 1796 * 1797 * We first try to allocate as many highmem pages as there are 1798 * saveable highmem pages in the system. If that fails, we allocate 1799 * non-highmem pages for the copies of the remaining highmem ones. 1800 * 1801 * In this approach it is likely that the copies of highmem pages will 1802 * also be located in the high memory, because of the way in which 1803 * copy_data_pages() works. 1804 */ 1805 1806 static int 1807 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 1808 unsigned int nr_pages, unsigned int nr_highmem) 1809 { 1810 if (nr_highmem > 0) { 1811 if (get_highmem_buffer(PG_ANY)) 1812 goto err_out; 1813 if (nr_highmem > alloc_highmem) { 1814 nr_highmem -= alloc_highmem; 1815 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1816 } 1817 } 1818 if (nr_pages > alloc_normal) { 1819 nr_pages -= alloc_normal; 1820 while (nr_pages-- > 0) { 1821 struct page *page; 1822 1823 page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); 1824 if (!page) 1825 goto err_out; 1826 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1827 } 1828 } 1829 1830 return 0; 1831 1832 err_out: 1833 swsusp_free(); 1834 return -ENOMEM; 1835 } 1836 1837 asmlinkage __visible int swsusp_save(void) 1838 { 1839 unsigned int nr_pages, nr_highmem; 1840 1841 printk(KERN_INFO "PM: Creating hibernation image:\n"); 1842 1843 drain_local_pages(NULL); 1844 nr_pages = count_data_pages(); 1845 nr_highmem = count_highmem_pages(); 1846 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); 1847 1848 if (!enough_free_mem(nr_pages, nr_highmem)) { 1849 printk(KERN_ERR "PM: Not enough free memory\n"); 1850 return -ENOMEM; 1851 } 1852 1853 if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { 1854 printk(KERN_ERR "PM: Memory allocation failed\n"); 1855 return -ENOMEM; 1856 } 1857 1858 /* During allocating of suspend pagedir, new cold pages may appear. 1859 * Kill them. 1860 */ 1861 drain_local_pages(NULL); 1862 copy_data_pages(©_bm, &orig_bm); 1863 1864 /* 1865 * End of critical section. From now on, we can write to memory, 1866 * but we should not touch disk. This specially means we must _not_ 1867 * touch swap space! Except we must write out our image of course. 1868 */ 1869 1870 nr_pages += nr_highmem; 1871 nr_copy_pages = nr_pages; 1872 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 1873 1874 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", 1875 nr_pages); 1876 1877 return 0; 1878 } 1879 1880 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 1881 static int init_header_complete(struct swsusp_info *info) 1882 { 1883 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 1884 info->version_code = LINUX_VERSION_CODE; 1885 return 0; 1886 } 1887 1888 static char *check_image_kernel(struct swsusp_info *info) 1889 { 1890 if (info->version_code != LINUX_VERSION_CODE) 1891 return "kernel version"; 1892 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 1893 return "system type"; 1894 if (strcmp(info->uts.release,init_utsname()->release)) 1895 return "kernel release"; 1896 if (strcmp(info->uts.version,init_utsname()->version)) 1897 return "version"; 1898 if (strcmp(info->uts.machine,init_utsname()->machine)) 1899 return "machine"; 1900 return NULL; 1901 } 1902 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 1903 1904 unsigned long snapshot_get_image_size(void) 1905 { 1906 return nr_copy_pages + nr_meta_pages + 1; 1907 } 1908 1909 static int init_header(struct swsusp_info *info) 1910 { 1911 memset(info, 0, sizeof(struct swsusp_info)); 1912 info->num_physpages = get_num_physpages(); 1913 info->image_pages = nr_copy_pages; 1914 info->pages = snapshot_get_image_size(); 1915 info->size = info->pages; 1916 info->size <<= PAGE_SHIFT; 1917 return init_header_complete(info); 1918 } 1919 1920 /** 1921 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm 1922 * are stored in the array @buf[] (1 page at a time) 1923 */ 1924 1925 static inline void 1926 pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 1927 { 1928 int j; 1929 1930 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 1931 buf[j] = memory_bm_next_pfn(bm); 1932 if (unlikely(buf[j] == BM_END_OF_MAP)) 1933 break; 1934 /* Save page key for data page (s390 only). */ 1935 page_key_read(buf + j); 1936 } 1937 } 1938 1939 /** 1940 * snapshot_read_next - used for reading the system memory snapshot. 1941 * 1942 * On the first call to it @handle should point to a zeroed 1943 * snapshot_handle structure. The structure gets updated and a pointer 1944 * to it should be passed to this function every next time. 1945 * 1946 * On success the function returns a positive number. Then, the caller 1947 * is allowed to read up to the returned number of bytes from the memory 1948 * location computed by the data_of() macro. 1949 * 1950 * The function returns 0 to indicate the end of data stream condition, 1951 * and a negative number is returned on error. In such cases the 1952 * structure pointed to by @handle is not updated and should not be used 1953 * any more. 1954 */ 1955 1956 int snapshot_read_next(struct snapshot_handle *handle) 1957 { 1958 if (handle->cur > nr_meta_pages + nr_copy_pages) 1959 return 0; 1960 1961 if (!buffer) { 1962 /* This makes the buffer be freed by swsusp_free() */ 1963 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 1964 if (!buffer) 1965 return -ENOMEM; 1966 } 1967 if (!handle->cur) { 1968 int error; 1969 1970 error = init_header((struct swsusp_info *)buffer); 1971 if (error) 1972 return error; 1973 handle->buffer = buffer; 1974 memory_bm_position_reset(&orig_bm); 1975 memory_bm_position_reset(©_bm); 1976 } else if (handle->cur <= nr_meta_pages) { 1977 clear_page(buffer); 1978 pack_pfns(buffer, &orig_bm); 1979 } else { 1980 struct page *page; 1981 1982 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 1983 if (PageHighMem(page)) { 1984 /* Highmem pages are copied to the buffer, 1985 * because we can't return with a kmapped 1986 * highmem page (we may not be called again). 1987 */ 1988 void *kaddr; 1989 1990 kaddr = kmap_atomic(page); 1991 copy_page(buffer, kaddr); 1992 kunmap_atomic(kaddr); 1993 handle->buffer = buffer; 1994 } else { 1995 handle->buffer = page_address(page); 1996 } 1997 } 1998 handle->cur++; 1999 return PAGE_SIZE; 2000 } 2001 2002 /** 2003 * mark_unsafe_pages - mark the pages that cannot be used for storing 2004 * the image during resume, because they conflict with the pages that 2005 * had been used before suspend 2006 */ 2007 2008 static int mark_unsafe_pages(struct memory_bitmap *bm) 2009 { 2010 struct zone *zone; 2011 unsigned long pfn, max_zone_pfn; 2012 2013 /* Clear page flags */ 2014 for_each_populated_zone(zone) { 2015 max_zone_pfn = zone_end_pfn(zone); 2016 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 2017 if (pfn_valid(pfn)) 2018 swsusp_unset_page_free(pfn_to_page(pfn)); 2019 } 2020 2021 /* Mark pages that correspond to the "original" pfns as "unsafe" */ 2022 memory_bm_position_reset(bm); 2023 do { 2024 pfn = memory_bm_next_pfn(bm); 2025 if (likely(pfn != BM_END_OF_MAP)) { 2026 if (likely(pfn_valid(pfn))) 2027 swsusp_set_page_free(pfn_to_page(pfn)); 2028 else 2029 return -EFAULT; 2030 } 2031 } while (pfn != BM_END_OF_MAP); 2032 2033 allocated_unsafe_pages = 0; 2034 2035 return 0; 2036 } 2037 2038 static void 2039 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) 2040 { 2041 unsigned long pfn; 2042 2043 memory_bm_position_reset(src); 2044 pfn = memory_bm_next_pfn(src); 2045 while (pfn != BM_END_OF_MAP) { 2046 memory_bm_set_bit(dst, pfn); 2047 pfn = memory_bm_next_pfn(src); 2048 } 2049 } 2050 2051 static int check_header(struct swsusp_info *info) 2052 { 2053 char *reason; 2054 2055 reason = check_image_kernel(info); 2056 if (!reason && info->num_physpages != get_num_physpages()) 2057 reason = "memory size"; 2058 if (reason) { 2059 printk(KERN_ERR "PM: Image mismatch: %s\n", reason); 2060 return -EPERM; 2061 } 2062 return 0; 2063 } 2064 2065 /** 2066 * load header - check the image header and copy data from it 2067 */ 2068 2069 static int 2070 load_header(struct swsusp_info *info) 2071 { 2072 int error; 2073 2074 restore_pblist = NULL; 2075 error = check_header(info); 2076 if (!error) { 2077 nr_copy_pages = info->image_pages; 2078 nr_meta_pages = info->pages - info->image_pages - 1; 2079 } 2080 return error; 2081 } 2082 2083 /** 2084 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 2085 * the corresponding bit in the memory bitmap @bm 2086 */ 2087 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2088 { 2089 int j; 2090 2091 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2092 if (unlikely(buf[j] == BM_END_OF_MAP)) 2093 break; 2094 2095 /* Extract and buffer page key for data page (s390 only). */ 2096 page_key_memorize(buf + j); 2097 2098 if (memory_bm_pfn_present(bm, buf[j])) 2099 memory_bm_set_bit(bm, buf[j]); 2100 else 2101 return -EFAULT; 2102 } 2103 2104 return 0; 2105 } 2106 2107 /* List of "safe" pages that may be used to store data loaded from the suspend 2108 * image 2109 */ 2110 static struct linked_page *safe_pages_list; 2111 2112 #ifdef CONFIG_HIGHMEM 2113 /* struct highmem_pbe is used for creating the list of highmem pages that 2114 * should be restored atomically during the resume from disk, because the page 2115 * frames they have occupied before the suspend are in use. 2116 */ 2117 struct highmem_pbe { 2118 struct page *copy_page; /* data is here now */ 2119 struct page *orig_page; /* data was here before the suspend */ 2120 struct highmem_pbe *next; 2121 }; 2122 2123 /* List of highmem PBEs needed for restoring the highmem pages that were 2124 * allocated before the suspend and included in the suspend image, but have 2125 * also been allocated by the "resume" kernel, so their contents cannot be 2126 * written directly to their "original" page frames. 2127 */ 2128 static struct highmem_pbe *highmem_pblist; 2129 2130 /** 2131 * count_highmem_image_pages - compute the number of highmem pages in the 2132 * suspend image. The bits in the memory bitmap @bm that correspond to the 2133 * image pages are assumed to be set. 2134 */ 2135 2136 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2137 { 2138 unsigned long pfn; 2139 unsigned int cnt = 0; 2140 2141 memory_bm_position_reset(bm); 2142 pfn = memory_bm_next_pfn(bm); 2143 while (pfn != BM_END_OF_MAP) { 2144 if (PageHighMem(pfn_to_page(pfn))) 2145 cnt++; 2146 2147 pfn = memory_bm_next_pfn(bm); 2148 } 2149 return cnt; 2150 } 2151 2152 /** 2153 * prepare_highmem_image - try to allocate as many highmem pages as 2154 * there are highmem image pages (@nr_highmem_p points to the variable 2155 * containing the number of highmem image pages). The pages that are 2156 * "safe" (ie. will not be overwritten when the suspend image is 2157 * restored) have the corresponding bits set in @bm (it must be 2158 * unitialized). 2159 * 2160 * NOTE: This function should not be called if there are no highmem 2161 * image pages. 2162 */ 2163 2164 static unsigned int safe_highmem_pages; 2165 2166 static struct memory_bitmap *safe_highmem_bm; 2167 2168 static int 2169 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2170 { 2171 unsigned int to_alloc; 2172 2173 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2174 return -ENOMEM; 2175 2176 if (get_highmem_buffer(PG_SAFE)) 2177 return -ENOMEM; 2178 2179 to_alloc = count_free_highmem_pages(); 2180 if (to_alloc > *nr_highmem_p) 2181 to_alloc = *nr_highmem_p; 2182 else 2183 *nr_highmem_p = to_alloc; 2184 2185 safe_highmem_pages = 0; 2186 while (to_alloc-- > 0) { 2187 struct page *page; 2188 2189 page = alloc_page(__GFP_HIGHMEM); 2190 if (!swsusp_page_is_free(page)) { 2191 /* The page is "safe", set its bit the bitmap */ 2192 memory_bm_set_bit(bm, page_to_pfn(page)); 2193 safe_highmem_pages++; 2194 } 2195 /* Mark the page as allocated */ 2196 swsusp_set_page_forbidden(page); 2197 swsusp_set_page_free(page); 2198 } 2199 memory_bm_position_reset(bm); 2200 safe_highmem_bm = bm; 2201 return 0; 2202 } 2203 2204 /** 2205 * get_highmem_page_buffer - for given highmem image page find the buffer 2206 * that suspend_write_next() should set for its caller to write to. 2207 * 2208 * If the page is to be saved to its "original" page frame or a copy of 2209 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2210 * the copy of the page is to be made in normal memory, so the address of 2211 * the copy is returned. 2212 * 2213 * If @buffer is returned, the caller of suspend_write_next() will write 2214 * the page's contents to @buffer, so they will have to be copied to the 2215 * right location on the next call to suspend_write_next() and it is done 2216 * with the help of copy_last_highmem_page(). For this purpose, if 2217 * @buffer is returned, @last_highmem page is set to the page to which 2218 * the data will have to be copied from @buffer. 2219 */ 2220 2221 static struct page *last_highmem_page; 2222 2223 static void * 2224 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2225 { 2226 struct highmem_pbe *pbe; 2227 void *kaddr; 2228 2229 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2230 /* We have allocated the "original" page frame and we can 2231 * use it directly to store the loaded page. 2232 */ 2233 last_highmem_page = page; 2234 return buffer; 2235 } 2236 /* The "original" page frame has not been allocated and we have to 2237 * use a "safe" page frame to store the loaded page. 2238 */ 2239 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2240 if (!pbe) { 2241 swsusp_free(); 2242 return ERR_PTR(-ENOMEM); 2243 } 2244 pbe->orig_page = page; 2245 if (safe_highmem_pages > 0) { 2246 struct page *tmp; 2247 2248 /* Copy of the page will be stored in high memory */ 2249 kaddr = buffer; 2250 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2251 safe_highmem_pages--; 2252 last_highmem_page = tmp; 2253 pbe->copy_page = tmp; 2254 } else { 2255 /* Copy of the page will be stored in normal memory */ 2256 kaddr = safe_pages_list; 2257 safe_pages_list = safe_pages_list->next; 2258 pbe->copy_page = virt_to_page(kaddr); 2259 } 2260 pbe->next = highmem_pblist; 2261 highmem_pblist = pbe; 2262 return kaddr; 2263 } 2264 2265 /** 2266 * copy_last_highmem_page - copy the contents of a highmem image from 2267 * @buffer, where the caller of snapshot_write_next() has place them, 2268 * to the right location represented by @last_highmem_page . 2269 */ 2270 2271 static void copy_last_highmem_page(void) 2272 { 2273 if (last_highmem_page) { 2274 void *dst; 2275 2276 dst = kmap_atomic(last_highmem_page); 2277 copy_page(dst, buffer); 2278 kunmap_atomic(dst); 2279 last_highmem_page = NULL; 2280 } 2281 } 2282 2283 static inline int last_highmem_page_copied(void) 2284 { 2285 return !last_highmem_page; 2286 } 2287 2288 static inline void free_highmem_data(void) 2289 { 2290 if (safe_highmem_bm) 2291 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2292 2293 if (buffer) 2294 free_image_page(buffer, PG_UNSAFE_CLEAR); 2295 } 2296 #else 2297 static unsigned int 2298 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2299 2300 static inline int 2301 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2302 { 2303 return 0; 2304 } 2305 2306 static inline void * 2307 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2308 { 2309 return ERR_PTR(-EINVAL); 2310 } 2311 2312 static inline void copy_last_highmem_page(void) {} 2313 static inline int last_highmem_page_copied(void) { return 1; } 2314 static inline void free_highmem_data(void) {} 2315 #endif /* CONFIG_HIGHMEM */ 2316 2317 /** 2318 * prepare_image - use the memory bitmap @bm to mark the pages that will 2319 * be overwritten in the process of restoring the system memory state 2320 * from the suspend image ("unsafe" pages) and allocate memory for the 2321 * image. 2322 * 2323 * The idea is to allocate a new memory bitmap first and then allocate 2324 * as many pages as needed for the image data, but not to assign these 2325 * pages to specific tasks initially. Instead, we just mark them as 2326 * allocated and create a lists of "safe" pages that will be used 2327 * later. On systems with high memory a list of "safe" highmem pages is 2328 * also created. 2329 */ 2330 2331 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2332 2333 static int 2334 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2335 { 2336 unsigned int nr_pages, nr_highmem; 2337 struct linked_page *sp_list, *lp; 2338 int error; 2339 2340 /* If there is no highmem, the buffer will not be necessary */ 2341 free_image_page(buffer, PG_UNSAFE_CLEAR); 2342 buffer = NULL; 2343 2344 nr_highmem = count_highmem_image_pages(bm); 2345 error = mark_unsafe_pages(bm); 2346 if (error) 2347 goto Free; 2348 2349 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2350 if (error) 2351 goto Free; 2352 2353 duplicate_memory_bitmap(new_bm, bm); 2354 memory_bm_free(bm, PG_UNSAFE_KEEP); 2355 if (nr_highmem > 0) { 2356 error = prepare_highmem_image(bm, &nr_highmem); 2357 if (error) 2358 goto Free; 2359 } 2360 /* Reserve some safe pages for potential later use. 2361 * 2362 * NOTE: This way we make sure there will be enough safe pages for the 2363 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2364 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2365 */ 2366 sp_list = NULL; 2367 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ 2368 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2369 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2370 while (nr_pages > 0) { 2371 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2372 if (!lp) { 2373 error = -ENOMEM; 2374 goto Free; 2375 } 2376 lp->next = sp_list; 2377 sp_list = lp; 2378 nr_pages--; 2379 } 2380 /* Preallocate memory for the image */ 2381 safe_pages_list = NULL; 2382 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2383 while (nr_pages > 0) { 2384 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2385 if (!lp) { 2386 error = -ENOMEM; 2387 goto Free; 2388 } 2389 if (!swsusp_page_is_free(virt_to_page(lp))) { 2390 /* The page is "safe", add it to the list */ 2391 lp->next = safe_pages_list; 2392 safe_pages_list = lp; 2393 } 2394 /* Mark the page as allocated */ 2395 swsusp_set_page_forbidden(virt_to_page(lp)); 2396 swsusp_set_page_free(virt_to_page(lp)); 2397 nr_pages--; 2398 } 2399 /* Free the reserved safe pages so that chain_alloc() can use them */ 2400 while (sp_list) { 2401 lp = sp_list->next; 2402 free_image_page(sp_list, PG_UNSAFE_CLEAR); 2403 sp_list = lp; 2404 } 2405 return 0; 2406 2407 Free: 2408 swsusp_free(); 2409 return error; 2410 } 2411 2412 /** 2413 * get_buffer - compute the address that snapshot_write_next() should 2414 * set for its caller to write to. 2415 */ 2416 2417 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2418 { 2419 struct pbe *pbe; 2420 struct page *page; 2421 unsigned long pfn = memory_bm_next_pfn(bm); 2422 2423 if (pfn == BM_END_OF_MAP) 2424 return ERR_PTR(-EFAULT); 2425 2426 page = pfn_to_page(pfn); 2427 if (PageHighMem(page)) 2428 return get_highmem_page_buffer(page, ca); 2429 2430 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2431 /* We have allocated the "original" page frame and we can 2432 * use it directly to store the loaded page. 2433 */ 2434 return page_address(page); 2435 2436 /* The "original" page frame has not been allocated and we have to 2437 * use a "safe" page frame to store the loaded page. 2438 */ 2439 pbe = chain_alloc(ca, sizeof(struct pbe)); 2440 if (!pbe) { 2441 swsusp_free(); 2442 return ERR_PTR(-ENOMEM); 2443 } 2444 pbe->orig_address = page_address(page); 2445 pbe->address = safe_pages_list; 2446 safe_pages_list = safe_pages_list->next; 2447 pbe->next = restore_pblist; 2448 restore_pblist = pbe; 2449 return pbe->address; 2450 } 2451 2452 /** 2453 * snapshot_write_next - used for writing the system memory snapshot. 2454 * 2455 * On the first call to it @handle should point to a zeroed 2456 * snapshot_handle structure. The structure gets updated and a pointer 2457 * to it should be passed to this function every next time. 2458 * 2459 * On success the function returns a positive number. Then, the caller 2460 * is allowed to write up to the returned number of bytes to the memory 2461 * location computed by the data_of() macro. 2462 * 2463 * The function returns 0 to indicate the "end of file" condition, 2464 * and a negative number is returned on error. In such cases the 2465 * structure pointed to by @handle is not updated and should not be used 2466 * any more. 2467 */ 2468 2469 int snapshot_write_next(struct snapshot_handle *handle) 2470 { 2471 static struct chain_allocator ca; 2472 int error = 0; 2473 2474 /* Check if we have already loaded the entire image */ 2475 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2476 return 0; 2477 2478 handle->sync_read = 1; 2479 2480 if (!handle->cur) { 2481 if (!buffer) 2482 /* This makes the buffer be freed by swsusp_free() */ 2483 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2484 2485 if (!buffer) 2486 return -ENOMEM; 2487 2488 handle->buffer = buffer; 2489 } else if (handle->cur == 1) { 2490 error = load_header(buffer); 2491 if (error) 2492 return error; 2493 2494 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2495 if (error) 2496 return error; 2497 2498 /* Allocate buffer for page keys. */ 2499 error = page_key_alloc(nr_copy_pages); 2500 if (error) 2501 return error; 2502 2503 } else if (handle->cur <= nr_meta_pages + 1) { 2504 error = unpack_orig_pfns(buffer, ©_bm); 2505 if (error) 2506 return error; 2507 2508 if (handle->cur == nr_meta_pages + 1) { 2509 error = prepare_image(&orig_bm, ©_bm); 2510 if (error) 2511 return error; 2512 2513 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2514 memory_bm_position_reset(&orig_bm); 2515 restore_pblist = NULL; 2516 handle->buffer = get_buffer(&orig_bm, &ca); 2517 handle->sync_read = 0; 2518 if (IS_ERR(handle->buffer)) 2519 return PTR_ERR(handle->buffer); 2520 } 2521 } else { 2522 copy_last_highmem_page(); 2523 /* Restore page key for data page (s390 only). */ 2524 page_key_write(handle->buffer); 2525 handle->buffer = get_buffer(&orig_bm, &ca); 2526 if (IS_ERR(handle->buffer)) 2527 return PTR_ERR(handle->buffer); 2528 if (handle->buffer != buffer) 2529 handle->sync_read = 0; 2530 } 2531 handle->cur++; 2532 return PAGE_SIZE; 2533 } 2534 2535 /** 2536 * snapshot_write_finalize - must be called after the last call to 2537 * snapshot_write_next() in case the last page in the image happens 2538 * to be a highmem page and its contents should be stored in the 2539 * highmem. Additionally, it releases the memory that will not be 2540 * used any more. 2541 */ 2542 2543 void snapshot_write_finalize(struct snapshot_handle *handle) 2544 { 2545 copy_last_highmem_page(); 2546 /* Restore page key for data page (s390 only). */ 2547 page_key_write(handle->buffer); 2548 page_key_free(); 2549 /* Free only if we have loaded the image entirely */ 2550 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2551 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 2552 free_highmem_data(); 2553 } 2554 } 2555 2556 int snapshot_image_loaded(struct snapshot_handle *handle) 2557 { 2558 return !(!nr_copy_pages || !last_highmem_page_copied() || 2559 handle->cur <= nr_meta_pages + nr_copy_pages); 2560 } 2561 2562 #ifdef CONFIG_HIGHMEM 2563 /* Assumes that @buf is ready and points to a "safe" page */ 2564 static inline void 2565 swap_two_pages_data(struct page *p1, struct page *p2, void *buf) 2566 { 2567 void *kaddr1, *kaddr2; 2568 2569 kaddr1 = kmap_atomic(p1); 2570 kaddr2 = kmap_atomic(p2); 2571 copy_page(buf, kaddr1); 2572 copy_page(kaddr1, kaddr2); 2573 copy_page(kaddr2, buf); 2574 kunmap_atomic(kaddr2); 2575 kunmap_atomic(kaddr1); 2576 } 2577 2578 /** 2579 * restore_highmem - for each highmem page that was allocated before 2580 * the suspend and included in the suspend image, and also has been 2581 * allocated by the "resume" kernel swap its current (ie. "before 2582 * resume") contents with the previous (ie. "before suspend") one. 2583 * 2584 * If the resume eventually fails, we can call this function once 2585 * again and restore the "before resume" highmem state. 2586 */ 2587 2588 int restore_highmem(void) 2589 { 2590 struct highmem_pbe *pbe = highmem_pblist; 2591 void *buf; 2592 2593 if (!pbe) 2594 return 0; 2595 2596 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2597 if (!buf) 2598 return -ENOMEM; 2599 2600 while (pbe) { 2601 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2602 pbe = pbe->next; 2603 } 2604 free_image_page(buf, PG_UNSAFE_CLEAR); 2605 return 0; 2606 } 2607 #endif /* CONFIG_HIGHMEM */ 2608