1 /* 2 * linux/kernel/power/snapshot.c 3 * 4 * This file provides system snapshot/restore functionality for swsusp. 5 * 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * 9 * This file is released under the GPLv2. 10 * 11 */ 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/bootmem.h> 25 #include <linux/syscalls.h> 26 #include <linux/console.h> 27 #include <linux/highmem.h> 28 #include <linux/list.h> 29 #include <linux/slab.h> 30 #include <linux/compiler.h> 31 32 #include <asm/uaccess.h> 33 #include <asm/mmu_context.h> 34 #include <asm/pgtable.h> 35 #include <asm/tlbflush.h> 36 #include <asm/io.h> 37 38 #include "power.h" 39 40 static int swsusp_page_is_free(struct page *); 41 static void swsusp_set_page_forbidden(struct page *); 42 static void swsusp_unset_page_forbidden(struct page *); 43 44 /* 45 * Number of bytes to reserve for memory allocations made by device drivers 46 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 47 * cause image creation to fail (tunable via /sys/power/reserved_size). 48 */ 49 unsigned long reserved_size; 50 51 void __init hibernate_reserved_size_init(void) 52 { 53 reserved_size = SPARE_PAGES * PAGE_SIZE; 54 } 55 56 /* 57 * Preferred image size in bytes (tunable via /sys/power/image_size). 58 * When it is set to N, swsusp will do its best to ensure the image 59 * size will not exceed N bytes, but if that is impossible, it will 60 * try to create the smallest image possible. 61 */ 62 unsigned long image_size; 63 64 void __init hibernate_image_size_init(void) 65 { 66 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; 67 } 68 69 /* List of PBEs needed for restoring the pages that were allocated before 70 * the suspend and included in the suspend image, but have also been 71 * allocated by the "resume" kernel, so their contents cannot be written 72 * directly to their "original" page frames. 73 */ 74 struct pbe *restore_pblist; 75 76 /* Pointer to an auxiliary buffer (1 page) */ 77 static void *buffer; 78 79 /** 80 * @safe_needed - on resume, for storing the PBE list and the image, 81 * we can only use memory pages that do not conflict with the pages 82 * used before suspend. The unsafe pages have PageNosaveFree set 83 * and we count them using unsafe_pages. 84 * 85 * Each allocated image page is marked as PageNosave and PageNosaveFree 86 * so that swsusp_free() can release it. 87 */ 88 89 #define PG_ANY 0 90 #define PG_SAFE 1 91 #define PG_UNSAFE_CLEAR 1 92 #define PG_UNSAFE_KEEP 0 93 94 static unsigned int allocated_unsafe_pages; 95 96 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 97 { 98 void *res; 99 100 res = (void *)get_zeroed_page(gfp_mask); 101 if (safe_needed) 102 while (res && swsusp_page_is_free(virt_to_page(res))) { 103 /* The page is unsafe, mark it for swsusp_free() */ 104 swsusp_set_page_forbidden(virt_to_page(res)); 105 allocated_unsafe_pages++; 106 res = (void *)get_zeroed_page(gfp_mask); 107 } 108 if (res) { 109 swsusp_set_page_forbidden(virt_to_page(res)); 110 swsusp_set_page_free(virt_to_page(res)); 111 } 112 return res; 113 } 114 115 unsigned long get_safe_page(gfp_t gfp_mask) 116 { 117 return (unsigned long)get_image_page(gfp_mask, PG_SAFE); 118 } 119 120 static struct page *alloc_image_page(gfp_t gfp_mask) 121 { 122 struct page *page; 123 124 page = alloc_page(gfp_mask); 125 if (page) { 126 swsusp_set_page_forbidden(page); 127 swsusp_set_page_free(page); 128 } 129 return page; 130 } 131 132 /** 133 * free_image_page - free page represented by @addr, allocated with 134 * get_image_page (page flags set by it must be cleared) 135 */ 136 137 static inline void free_image_page(void *addr, int clear_nosave_free) 138 { 139 struct page *page; 140 141 BUG_ON(!virt_addr_valid(addr)); 142 143 page = virt_to_page(addr); 144 145 swsusp_unset_page_forbidden(page); 146 if (clear_nosave_free) 147 swsusp_unset_page_free(page); 148 149 __free_page(page); 150 } 151 152 /* struct linked_page is used to build chains of pages */ 153 154 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 155 156 struct linked_page { 157 struct linked_page *next; 158 char data[LINKED_PAGE_DATA_SIZE]; 159 } __packed; 160 161 static inline void 162 free_list_of_pages(struct linked_page *list, int clear_page_nosave) 163 { 164 while (list) { 165 struct linked_page *lp = list->next; 166 167 free_image_page(list, clear_page_nosave); 168 list = lp; 169 } 170 } 171 172 /** 173 * struct chain_allocator is used for allocating small objects out of 174 * a linked list of pages called 'the chain'. 175 * 176 * The chain grows each time when there is no room for a new object in 177 * the current page. The allocated objects cannot be freed individually. 178 * It is only possible to free them all at once, by freeing the entire 179 * chain. 180 * 181 * NOTE: The chain allocator may be inefficient if the allocated objects 182 * are not much smaller than PAGE_SIZE. 183 */ 184 185 struct chain_allocator { 186 struct linked_page *chain; /* the chain */ 187 unsigned int used_space; /* total size of objects allocated out 188 * of the current page 189 */ 190 gfp_t gfp_mask; /* mask for allocating pages */ 191 int safe_needed; /* if set, only "safe" pages are allocated */ 192 }; 193 194 static void 195 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) 196 { 197 ca->chain = NULL; 198 ca->used_space = LINKED_PAGE_DATA_SIZE; 199 ca->gfp_mask = gfp_mask; 200 ca->safe_needed = safe_needed; 201 } 202 203 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 204 { 205 void *ret; 206 207 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 208 struct linked_page *lp; 209 210 lp = get_image_page(ca->gfp_mask, ca->safe_needed); 211 if (!lp) 212 return NULL; 213 214 lp->next = ca->chain; 215 ca->chain = lp; 216 ca->used_space = 0; 217 } 218 ret = ca->chain->data + ca->used_space; 219 ca->used_space += size; 220 return ret; 221 } 222 223 /** 224 * Data types related to memory bitmaps. 225 * 226 * Memory bitmap is a structure consiting of many linked lists of 227 * objects. The main list's elements are of type struct zone_bitmap 228 * and each of them corresonds to one zone. For each zone bitmap 229 * object there is a list of objects of type struct bm_block that 230 * represent each blocks of bitmap in which information is stored. 231 * 232 * struct memory_bitmap contains a pointer to the main list of zone 233 * bitmap objects, a struct bm_position used for browsing the bitmap, 234 * and a pointer to the list of pages used for allocating all of the 235 * zone bitmap objects and bitmap block objects. 236 * 237 * NOTE: It has to be possible to lay out the bitmap in memory 238 * using only allocations of order 0. Additionally, the bitmap is 239 * designed to work with arbitrary number of zones (this is over the 240 * top for now, but let's avoid making unnecessary assumptions ;-). 241 * 242 * struct zone_bitmap contains a pointer to a list of bitmap block 243 * objects and a pointer to the bitmap block object that has been 244 * most recently used for setting bits. Additionally, it contains the 245 * pfns that correspond to the start and end of the represented zone. 246 * 247 * struct bm_block contains a pointer to the memory page in which 248 * information is stored (in the form of a block of bitmap) 249 * It also contains the pfns that correspond to the start and end of 250 * the represented memory area. 251 * 252 * The memory bitmap is organized as a radix tree to guarantee fast random 253 * access to the bits. There is one radix tree for each zone (as returned 254 * from create_mem_extents). 255 * 256 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 257 * two linked lists for the nodes of the tree, one for the inner nodes and 258 * one for the leave nodes. The linked leave nodes are used for fast linear 259 * access of the memory bitmap. 260 * 261 * The struct rtree_node represents one node of the radix tree. 262 */ 263 264 #define BM_END_OF_MAP (~0UL) 265 266 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 267 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 268 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 269 270 struct bm_block { 271 struct list_head hook; /* hook into a list of bitmap blocks */ 272 unsigned long start_pfn; /* pfn represented by the first bit */ 273 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ 274 unsigned long *data; /* bitmap representing pages */ 275 }; 276 277 static inline unsigned long bm_block_bits(struct bm_block *bb) 278 { 279 return bb->end_pfn - bb->start_pfn; 280 } 281 282 /* 283 * struct rtree_node is a wrapper struct to link the nodes 284 * of the rtree together for easy linear iteration over 285 * bits and easy freeing 286 */ 287 struct rtree_node { 288 struct list_head list; 289 unsigned long *data; 290 }; 291 292 /* 293 * struct mem_zone_bm_rtree represents a bitmap used for one 294 * populated memory zone. 295 */ 296 struct mem_zone_bm_rtree { 297 struct list_head list; /* Link Zones together */ 298 struct list_head nodes; /* Radix Tree inner nodes */ 299 struct list_head leaves; /* Radix Tree leaves */ 300 unsigned long start_pfn; /* Zone start page frame */ 301 unsigned long end_pfn; /* Zone end page frame + 1 */ 302 struct rtree_node *rtree; /* Radix Tree Root */ 303 int levels; /* Number of Radix Tree Levels */ 304 unsigned int blocks; /* Number of Bitmap Blocks */ 305 }; 306 307 /* strcut bm_position is used for browsing memory bitmaps */ 308 309 struct bm_position { 310 struct bm_block *block; 311 int bit; 312 }; 313 314 struct memory_bitmap { 315 struct list_head zones; 316 struct list_head blocks; /* list of bitmap blocks */ 317 struct linked_page *p_list; /* list of pages used to store zone 318 * bitmap objects and bitmap block 319 * objects 320 */ 321 struct bm_position cur; /* most recently used bit position */ 322 }; 323 324 /* Functions that operate on memory bitmaps */ 325 326 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 327 #if BITS_PER_LONG == 32 328 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 329 #else 330 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 331 #endif 332 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 333 334 /* 335 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 336 * 337 * This function is used to allocate inner nodes as well as the 338 * leave nodes of the radix tree. It also adds the node to the 339 * corresponding linked list passed in by the *list parameter. 340 */ 341 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 342 struct chain_allocator *ca, 343 struct list_head *list) 344 { 345 struct rtree_node *node; 346 347 node = chain_alloc(ca, sizeof(struct rtree_node)); 348 if (!node) 349 return NULL; 350 351 node->data = get_image_page(gfp_mask, safe_needed); 352 if (!node->data) 353 return NULL; 354 355 list_add_tail(&node->list, list); 356 357 return node; 358 } 359 360 /* 361 * add_rtree_block - Add a new leave node to the radix tree 362 * 363 * The leave nodes need to be allocated in order to keep the leaves 364 * linked list in order. This is guaranteed by the zone->blocks 365 * counter. 366 */ 367 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 368 int safe_needed, struct chain_allocator *ca) 369 { 370 struct rtree_node *node, *block, **dst; 371 unsigned int levels_needed, block_nr; 372 int i; 373 374 block_nr = zone->blocks; 375 levels_needed = 0; 376 377 /* How many levels do we need for this block nr? */ 378 while (block_nr) { 379 levels_needed += 1; 380 block_nr >>= BM_RTREE_LEVEL_SHIFT; 381 } 382 383 /* Make sure the rtree has enough levels */ 384 for (i = zone->levels; i < levels_needed; i++) { 385 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 386 &zone->nodes); 387 if (!node) 388 return -ENOMEM; 389 390 node->data[0] = (unsigned long)zone->rtree; 391 zone->rtree = node; 392 zone->levels += 1; 393 } 394 395 /* Allocate new block */ 396 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 397 if (!block) 398 return -ENOMEM; 399 400 /* Now walk the rtree to insert the block */ 401 node = zone->rtree; 402 dst = &zone->rtree; 403 block_nr = zone->blocks; 404 for (i = zone->levels; i > 0; i--) { 405 int index; 406 407 if (!node) { 408 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 409 &zone->nodes); 410 if (!node) 411 return -ENOMEM; 412 *dst = node; 413 } 414 415 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 416 index &= BM_RTREE_LEVEL_MASK; 417 dst = (struct rtree_node **)&((*dst)->data[index]); 418 node = *dst; 419 } 420 421 zone->blocks += 1; 422 *dst = block; 423 424 return 0; 425 } 426 427 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 428 int clear_nosave_free); 429 430 /* 431 * create_zone_bm_rtree - create a radix tree for one zone 432 * 433 * Allocated the mem_zone_bm_rtree structure and initializes it. 434 * This function also allocated and builds the radix tree for the 435 * zone. 436 */ 437 static struct mem_zone_bm_rtree * 438 create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, 439 struct chain_allocator *ca, 440 unsigned long start, unsigned long end) 441 { 442 struct mem_zone_bm_rtree *zone; 443 unsigned int i, nr_blocks; 444 unsigned long pages; 445 446 pages = end - start; 447 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 448 if (!zone) 449 return NULL; 450 451 INIT_LIST_HEAD(&zone->nodes); 452 INIT_LIST_HEAD(&zone->leaves); 453 zone->start_pfn = start; 454 zone->end_pfn = end; 455 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 456 457 for (i = 0; i < nr_blocks; i++) { 458 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 459 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 460 return NULL; 461 } 462 } 463 464 return zone; 465 } 466 467 /* 468 * free_zone_bm_rtree - Free the memory of the radix tree 469 * 470 * Free all node pages of the radix tree. The mem_zone_bm_rtree 471 * structure itself is not freed here nor are the rtree_node 472 * structs. 473 */ 474 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 475 int clear_nosave_free) 476 { 477 struct rtree_node *node; 478 479 list_for_each_entry(node, &zone->nodes, list) 480 free_image_page(node->data, clear_nosave_free); 481 482 list_for_each_entry(node, &zone->leaves, list) 483 free_image_page(node->data, clear_nosave_free); 484 } 485 486 static void memory_bm_position_reset(struct memory_bitmap *bm) 487 { 488 bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); 489 bm->cur.bit = 0; 490 } 491 492 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 493 494 /** 495 * create_bm_block_list - create a list of block bitmap objects 496 * @pages - number of pages to track 497 * @list - list to put the allocated blocks into 498 * @ca - chain allocator to be used for allocating memory 499 */ 500 static int create_bm_block_list(unsigned long pages, 501 struct list_head *list, 502 struct chain_allocator *ca) 503 { 504 unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 505 506 while (nr_blocks-- > 0) { 507 struct bm_block *bb; 508 509 bb = chain_alloc(ca, sizeof(struct bm_block)); 510 if (!bb) 511 return -ENOMEM; 512 list_add(&bb->hook, list); 513 } 514 515 return 0; 516 } 517 518 struct mem_extent { 519 struct list_head hook; 520 unsigned long start; 521 unsigned long end; 522 }; 523 524 /** 525 * free_mem_extents - free a list of memory extents 526 * @list - list of extents to empty 527 */ 528 static void free_mem_extents(struct list_head *list) 529 { 530 struct mem_extent *ext, *aux; 531 532 list_for_each_entry_safe(ext, aux, list, hook) { 533 list_del(&ext->hook); 534 kfree(ext); 535 } 536 } 537 538 /** 539 * create_mem_extents - create a list of memory extents representing 540 * contiguous ranges of PFNs 541 * @list - list to put the extents into 542 * @gfp_mask - mask to use for memory allocations 543 */ 544 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 545 { 546 struct zone *zone; 547 548 INIT_LIST_HEAD(list); 549 550 for_each_populated_zone(zone) { 551 unsigned long zone_start, zone_end; 552 struct mem_extent *ext, *cur, *aux; 553 554 zone_start = zone->zone_start_pfn; 555 zone_end = zone_end_pfn(zone); 556 557 list_for_each_entry(ext, list, hook) 558 if (zone_start <= ext->end) 559 break; 560 561 if (&ext->hook == list || zone_end < ext->start) { 562 /* New extent is necessary */ 563 struct mem_extent *new_ext; 564 565 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 566 if (!new_ext) { 567 free_mem_extents(list); 568 return -ENOMEM; 569 } 570 new_ext->start = zone_start; 571 new_ext->end = zone_end; 572 list_add_tail(&new_ext->hook, &ext->hook); 573 continue; 574 } 575 576 /* Merge this zone's range of PFNs with the existing one */ 577 if (zone_start < ext->start) 578 ext->start = zone_start; 579 if (zone_end > ext->end) 580 ext->end = zone_end; 581 582 /* More merging may be possible */ 583 cur = ext; 584 list_for_each_entry_safe_continue(cur, aux, list, hook) { 585 if (zone_end < cur->start) 586 break; 587 if (zone_end < cur->end) 588 ext->end = cur->end; 589 list_del(&cur->hook); 590 kfree(cur); 591 } 592 } 593 594 return 0; 595 } 596 597 /** 598 * memory_bm_create - allocate memory for a memory bitmap 599 */ 600 static int 601 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 602 { 603 struct chain_allocator ca; 604 struct list_head mem_extents; 605 struct mem_extent *ext; 606 int error; 607 608 chain_init(&ca, gfp_mask, safe_needed); 609 INIT_LIST_HEAD(&bm->blocks); 610 INIT_LIST_HEAD(&bm->zones); 611 612 error = create_mem_extents(&mem_extents, gfp_mask); 613 if (error) 614 return error; 615 616 list_for_each_entry(ext, &mem_extents, hook) { 617 struct mem_zone_bm_rtree *zone; 618 struct bm_block *bb; 619 unsigned long pfn = ext->start; 620 unsigned long pages = ext->end - ext->start; 621 622 bb = list_entry(bm->blocks.prev, struct bm_block, hook); 623 624 error = create_bm_block_list(pages, bm->blocks.prev, &ca); 625 if (error) 626 goto Error; 627 628 list_for_each_entry_continue(bb, &bm->blocks, hook) { 629 bb->data = get_image_page(gfp_mask, safe_needed); 630 if (!bb->data) { 631 error = -ENOMEM; 632 goto Error; 633 } 634 635 bb->start_pfn = pfn; 636 if (pages >= BM_BITS_PER_BLOCK) { 637 pfn += BM_BITS_PER_BLOCK; 638 pages -= BM_BITS_PER_BLOCK; 639 } else { 640 /* This is executed only once in the loop */ 641 pfn += pages; 642 } 643 bb->end_pfn = pfn; 644 } 645 646 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 647 ext->start, ext->end); 648 if (!zone) 649 goto Error; 650 list_add_tail(&zone->list, &bm->zones); 651 } 652 653 bm->p_list = ca.chain; 654 memory_bm_position_reset(bm); 655 Exit: 656 free_mem_extents(&mem_extents); 657 return error; 658 659 Error: 660 bm->p_list = ca.chain; 661 memory_bm_free(bm, PG_UNSAFE_CLEAR); 662 goto Exit; 663 } 664 665 /** 666 * memory_bm_free - free memory occupied by the memory bitmap @bm 667 */ 668 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 669 { 670 struct mem_zone_bm_rtree *zone; 671 struct bm_block *bb; 672 673 list_for_each_entry(bb, &bm->blocks, hook) 674 if (bb->data) 675 free_image_page(bb->data, clear_nosave_free); 676 677 list_for_each_entry(zone, &bm->zones, list) 678 free_zone_bm_rtree(zone, clear_nosave_free); 679 680 free_list_of_pages(bm->p_list, clear_nosave_free); 681 682 INIT_LIST_HEAD(&bm->zones); 683 INIT_LIST_HEAD(&bm->blocks); 684 } 685 686 /** 687 * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds 688 * to given pfn. The cur_zone_bm member of @bm and the cur_block member 689 * of @bm->cur_zone_bm are updated. 690 */ 691 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 692 void **addr, unsigned int *bit_nr) 693 { 694 struct bm_block *bb; 695 696 /* 697 * Check if the pfn corresponds to the current bitmap block and find 698 * the block where it fits if this is not the case. 699 */ 700 bb = bm->cur.block; 701 if (pfn < bb->start_pfn) 702 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) 703 if (pfn >= bb->start_pfn) 704 break; 705 706 if (pfn >= bb->end_pfn) 707 list_for_each_entry_continue(bb, &bm->blocks, hook) 708 if (pfn >= bb->start_pfn && pfn < bb->end_pfn) 709 break; 710 711 if (&bb->hook == &bm->blocks) 712 return -EFAULT; 713 714 /* The block has been found */ 715 bm->cur.block = bb; 716 pfn -= bb->start_pfn; 717 bm->cur.bit = pfn + 1; 718 *bit_nr = pfn; 719 *addr = bb->data; 720 return 0; 721 } 722 723 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 724 { 725 void *addr; 726 unsigned int bit; 727 int error; 728 729 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 730 BUG_ON(error); 731 set_bit(bit, addr); 732 } 733 734 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 735 { 736 void *addr; 737 unsigned int bit; 738 int error; 739 740 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 741 if (!error) 742 set_bit(bit, addr); 743 return error; 744 } 745 746 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 747 { 748 void *addr; 749 unsigned int bit; 750 int error; 751 752 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 753 BUG_ON(error); 754 clear_bit(bit, addr); 755 } 756 757 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 758 { 759 void *addr; 760 unsigned int bit; 761 int error; 762 763 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 764 BUG_ON(error); 765 return test_bit(bit, addr); 766 } 767 768 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 769 { 770 void *addr; 771 unsigned int bit; 772 773 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 774 } 775 776 /** 777 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit 778 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is 779 * returned. 780 * 781 * It is required to run memory_bm_position_reset() before the first call to 782 * this function. 783 */ 784 785 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 786 { 787 struct bm_block *bb; 788 int bit; 789 790 bb = bm->cur.block; 791 do { 792 bit = bm->cur.bit; 793 bit = find_next_bit(bb->data, bm_block_bits(bb), bit); 794 if (bit < bm_block_bits(bb)) 795 goto Return_pfn; 796 797 bb = list_entry(bb->hook.next, struct bm_block, hook); 798 bm->cur.block = bb; 799 bm->cur.bit = 0; 800 } while (&bb->hook != &bm->blocks); 801 802 memory_bm_position_reset(bm); 803 return BM_END_OF_MAP; 804 805 Return_pfn: 806 bm->cur.bit = bit + 1; 807 return bb->start_pfn + bit; 808 } 809 810 /** 811 * This structure represents a range of page frames the contents of which 812 * should not be saved during the suspend. 813 */ 814 815 struct nosave_region { 816 struct list_head list; 817 unsigned long start_pfn; 818 unsigned long end_pfn; 819 }; 820 821 static LIST_HEAD(nosave_regions); 822 823 /** 824 * register_nosave_region - register a range of page frames the contents 825 * of which should not be saved during the suspend (to be used in the early 826 * initialization code) 827 */ 828 829 void __init 830 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, 831 int use_kmalloc) 832 { 833 struct nosave_region *region; 834 835 if (start_pfn >= end_pfn) 836 return; 837 838 if (!list_empty(&nosave_regions)) { 839 /* Try to extend the previous region (they should be sorted) */ 840 region = list_entry(nosave_regions.prev, 841 struct nosave_region, list); 842 if (region->end_pfn == start_pfn) { 843 region->end_pfn = end_pfn; 844 goto Report; 845 } 846 } 847 if (use_kmalloc) { 848 /* during init, this shouldn't fail */ 849 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 850 BUG_ON(!region); 851 } else 852 /* This allocation cannot fail */ 853 region = memblock_virt_alloc(sizeof(struct nosave_region), 0); 854 region->start_pfn = start_pfn; 855 region->end_pfn = end_pfn; 856 list_add_tail(®ion->list, &nosave_regions); 857 Report: 858 printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n", 859 (unsigned long long) start_pfn << PAGE_SHIFT, 860 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 861 } 862 863 /* 864 * Set bits in this map correspond to the page frames the contents of which 865 * should not be saved during the suspend. 866 */ 867 static struct memory_bitmap *forbidden_pages_map; 868 869 /* Set bits in this map correspond to free page frames. */ 870 static struct memory_bitmap *free_pages_map; 871 872 /* 873 * Each page frame allocated for creating the image is marked by setting the 874 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 875 */ 876 877 void swsusp_set_page_free(struct page *page) 878 { 879 if (free_pages_map) 880 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 881 } 882 883 static int swsusp_page_is_free(struct page *page) 884 { 885 return free_pages_map ? 886 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 887 } 888 889 void swsusp_unset_page_free(struct page *page) 890 { 891 if (free_pages_map) 892 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 893 } 894 895 static void swsusp_set_page_forbidden(struct page *page) 896 { 897 if (forbidden_pages_map) 898 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 899 } 900 901 int swsusp_page_is_forbidden(struct page *page) 902 { 903 return forbidden_pages_map ? 904 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 905 } 906 907 static void swsusp_unset_page_forbidden(struct page *page) 908 { 909 if (forbidden_pages_map) 910 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 911 } 912 913 /** 914 * mark_nosave_pages - set bits corresponding to the page frames the 915 * contents of which should not be saved in a given bitmap. 916 */ 917 918 static void mark_nosave_pages(struct memory_bitmap *bm) 919 { 920 struct nosave_region *region; 921 922 if (list_empty(&nosave_regions)) 923 return; 924 925 list_for_each_entry(region, &nosave_regions, list) { 926 unsigned long pfn; 927 928 pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n", 929 (unsigned long long) region->start_pfn << PAGE_SHIFT, 930 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 931 - 1); 932 933 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 934 if (pfn_valid(pfn)) { 935 /* 936 * It is safe to ignore the result of 937 * mem_bm_set_bit_check() here, since we won't 938 * touch the PFNs for which the error is 939 * returned anyway. 940 */ 941 mem_bm_set_bit_check(bm, pfn); 942 } 943 } 944 } 945 946 /** 947 * create_basic_memory_bitmaps - create bitmaps needed for marking page 948 * frames that should not be saved and free page frames. The pointers 949 * forbidden_pages_map and free_pages_map are only modified if everything 950 * goes well, because we don't want the bits to be used before both bitmaps 951 * are set up. 952 */ 953 954 int create_basic_memory_bitmaps(void) 955 { 956 struct memory_bitmap *bm1, *bm2; 957 int error = 0; 958 959 if (forbidden_pages_map && free_pages_map) 960 return 0; 961 else 962 BUG_ON(forbidden_pages_map || free_pages_map); 963 964 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 965 if (!bm1) 966 return -ENOMEM; 967 968 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 969 if (error) 970 goto Free_first_object; 971 972 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 973 if (!bm2) 974 goto Free_first_bitmap; 975 976 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 977 if (error) 978 goto Free_second_object; 979 980 forbidden_pages_map = bm1; 981 free_pages_map = bm2; 982 mark_nosave_pages(forbidden_pages_map); 983 984 pr_debug("PM: Basic memory bitmaps created\n"); 985 986 return 0; 987 988 Free_second_object: 989 kfree(bm2); 990 Free_first_bitmap: 991 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 992 Free_first_object: 993 kfree(bm1); 994 return -ENOMEM; 995 } 996 997 /** 998 * free_basic_memory_bitmaps - free memory bitmaps allocated by 999 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary 1000 * so that the bitmaps themselves are not referred to while they are being 1001 * freed. 1002 */ 1003 1004 void free_basic_memory_bitmaps(void) 1005 { 1006 struct memory_bitmap *bm1, *bm2; 1007 1008 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1009 return; 1010 1011 bm1 = forbidden_pages_map; 1012 bm2 = free_pages_map; 1013 forbidden_pages_map = NULL; 1014 free_pages_map = NULL; 1015 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1016 kfree(bm1); 1017 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1018 kfree(bm2); 1019 1020 pr_debug("PM: Basic memory bitmaps freed\n"); 1021 } 1022 1023 /** 1024 * snapshot_additional_pages - estimate the number of additional pages 1025 * be needed for setting up the suspend image data structures for given 1026 * zone (usually the returned value is greater than the exact number) 1027 */ 1028 1029 unsigned int snapshot_additional_pages(struct zone *zone) 1030 { 1031 unsigned int rtree, nodes; 1032 unsigned int res; 1033 1034 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1035 res += DIV_ROUND_UP(res * sizeof(struct bm_block), 1036 LINKED_PAGE_DATA_SIZE); 1037 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1038 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1039 LINKED_PAGE_DATA_SIZE); 1040 while (nodes > 1) { 1041 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1042 rtree += nodes; 1043 } 1044 1045 return 2 * (res + rtree); 1046 } 1047 1048 #ifdef CONFIG_HIGHMEM 1049 /** 1050 * count_free_highmem_pages - compute the total number of free highmem 1051 * pages, system-wide. 1052 */ 1053 1054 static unsigned int count_free_highmem_pages(void) 1055 { 1056 struct zone *zone; 1057 unsigned int cnt = 0; 1058 1059 for_each_populated_zone(zone) 1060 if (is_highmem(zone)) 1061 cnt += zone_page_state(zone, NR_FREE_PAGES); 1062 1063 return cnt; 1064 } 1065 1066 /** 1067 * saveable_highmem_page - Determine whether a highmem page should be 1068 * included in the suspend image. 1069 * 1070 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1071 * and it isn't a part of a free chunk of pages. 1072 */ 1073 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1074 { 1075 struct page *page; 1076 1077 if (!pfn_valid(pfn)) 1078 return NULL; 1079 1080 page = pfn_to_page(pfn); 1081 if (page_zone(page) != zone) 1082 return NULL; 1083 1084 BUG_ON(!PageHighMem(page)); 1085 1086 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || 1087 PageReserved(page)) 1088 return NULL; 1089 1090 if (page_is_guard(page)) 1091 return NULL; 1092 1093 return page; 1094 } 1095 1096 /** 1097 * count_highmem_pages - compute the total number of saveable highmem 1098 * pages. 1099 */ 1100 1101 static unsigned int count_highmem_pages(void) 1102 { 1103 struct zone *zone; 1104 unsigned int n = 0; 1105 1106 for_each_populated_zone(zone) { 1107 unsigned long pfn, max_zone_pfn; 1108 1109 if (!is_highmem(zone)) 1110 continue; 1111 1112 mark_free_pages(zone); 1113 max_zone_pfn = zone_end_pfn(zone); 1114 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1115 if (saveable_highmem_page(zone, pfn)) 1116 n++; 1117 } 1118 return n; 1119 } 1120 #else 1121 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1122 { 1123 return NULL; 1124 } 1125 #endif /* CONFIG_HIGHMEM */ 1126 1127 /** 1128 * saveable_page - Determine whether a non-highmem page should be included 1129 * in the suspend image. 1130 * 1131 * We should save the page if it isn't Nosave, and is not in the range 1132 * of pages statically defined as 'unsaveable', and it isn't a part of 1133 * a free chunk of pages. 1134 */ 1135 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1136 { 1137 struct page *page; 1138 1139 if (!pfn_valid(pfn)) 1140 return NULL; 1141 1142 page = pfn_to_page(pfn); 1143 if (page_zone(page) != zone) 1144 return NULL; 1145 1146 BUG_ON(PageHighMem(page)); 1147 1148 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1149 return NULL; 1150 1151 if (PageReserved(page) 1152 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1153 return NULL; 1154 1155 if (page_is_guard(page)) 1156 return NULL; 1157 1158 return page; 1159 } 1160 1161 /** 1162 * count_data_pages - compute the total number of saveable non-highmem 1163 * pages. 1164 */ 1165 1166 static unsigned int count_data_pages(void) 1167 { 1168 struct zone *zone; 1169 unsigned long pfn, max_zone_pfn; 1170 unsigned int n = 0; 1171 1172 for_each_populated_zone(zone) { 1173 if (is_highmem(zone)) 1174 continue; 1175 1176 mark_free_pages(zone); 1177 max_zone_pfn = zone_end_pfn(zone); 1178 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1179 if (saveable_page(zone, pfn)) 1180 n++; 1181 } 1182 return n; 1183 } 1184 1185 /* This is needed, because copy_page and memcpy are not usable for copying 1186 * task structs. 1187 */ 1188 static inline void do_copy_page(long *dst, long *src) 1189 { 1190 int n; 1191 1192 for (n = PAGE_SIZE / sizeof(long); n; n--) 1193 *dst++ = *src++; 1194 } 1195 1196 1197 /** 1198 * safe_copy_page - check if the page we are going to copy is marked as 1199 * present in the kernel page tables (this always is the case if 1200 * CONFIG_DEBUG_PAGEALLOC is not set and in that case 1201 * kernel_page_present() always returns 'true'). 1202 */ 1203 static void safe_copy_page(void *dst, struct page *s_page) 1204 { 1205 if (kernel_page_present(s_page)) { 1206 do_copy_page(dst, page_address(s_page)); 1207 } else { 1208 kernel_map_pages(s_page, 1, 1); 1209 do_copy_page(dst, page_address(s_page)); 1210 kernel_map_pages(s_page, 1, 0); 1211 } 1212 } 1213 1214 1215 #ifdef CONFIG_HIGHMEM 1216 static inline struct page * 1217 page_is_saveable(struct zone *zone, unsigned long pfn) 1218 { 1219 return is_highmem(zone) ? 1220 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1221 } 1222 1223 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1224 { 1225 struct page *s_page, *d_page; 1226 void *src, *dst; 1227 1228 s_page = pfn_to_page(src_pfn); 1229 d_page = pfn_to_page(dst_pfn); 1230 if (PageHighMem(s_page)) { 1231 src = kmap_atomic(s_page); 1232 dst = kmap_atomic(d_page); 1233 do_copy_page(dst, src); 1234 kunmap_atomic(dst); 1235 kunmap_atomic(src); 1236 } else { 1237 if (PageHighMem(d_page)) { 1238 /* Page pointed to by src may contain some kernel 1239 * data modified by kmap_atomic() 1240 */ 1241 safe_copy_page(buffer, s_page); 1242 dst = kmap_atomic(d_page); 1243 copy_page(dst, buffer); 1244 kunmap_atomic(dst); 1245 } else { 1246 safe_copy_page(page_address(d_page), s_page); 1247 } 1248 } 1249 } 1250 #else 1251 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1252 1253 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1254 { 1255 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1256 pfn_to_page(src_pfn)); 1257 } 1258 #endif /* CONFIG_HIGHMEM */ 1259 1260 static void 1261 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) 1262 { 1263 struct zone *zone; 1264 unsigned long pfn; 1265 1266 for_each_populated_zone(zone) { 1267 unsigned long max_zone_pfn; 1268 1269 mark_free_pages(zone); 1270 max_zone_pfn = zone_end_pfn(zone); 1271 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1272 if (page_is_saveable(zone, pfn)) 1273 memory_bm_set_bit(orig_bm, pfn); 1274 } 1275 memory_bm_position_reset(orig_bm); 1276 memory_bm_position_reset(copy_bm); 1277 for(;;) { 1278 pfn = memory_bm_next_pfn(orig_bm); 1279 if (unlikely(pfn == BM_END_OF_MAP)) 1280 break; 1281 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1282 } 1283 } 1284 1285 /* Total number of image pages */ 1286 static unsigned int nr_copy_pages; 1287 /* Number of pages needed for saving the original pfns of the image pages */ 1288 static unsigned int nr_meta_pages; 1289 /* 1290 * Numbers of normal and highmem page frames allocated for hibernation image 1291 * before suspending devices. 1292 */ 1293 unsigned int alloc_normal, alloc_highmem; 1294 /* 1295 * Memory bitmap used for marking saveable pages (during hibernation) or 1296 * hibernation image pages (during restore) 1297 */ 1298 static struct memory_bitmap orig_bm; 1299 /* 1300 * Memory bitmap used during hibernation for marking allocated page frames that 1301 * will contain copies of saveable pages. During restore it is initially used 1302 * for marking hibernation image pages, but then the set bits from it are 1303 * duplicated in @orig_bm and it is released. On highmem systems it is next 1304 * used for marking "safe" highmem pages, but it has to be reinitialized for 1305 * this purpose. 1306 */ 1307 static struct memory_bitmap copy_bm; 1308 1309 /** 1310 * swsusp_free - free pages allocated for the suspend. 1311 * 1312 * Suspend pages are alocated before the atomic copy is made, so we 1313 * need to release them after the resume. 1314 */ 1315 1316 void swsusp_free(void) 1317 { 1318 struct zone *zone; 1319 unsigned long pfn, max_zone_pfn; 1320 1321 for_each_populated_zone(zone) { 1322 max_zone_pfn = zone_end_pfn(zone); 1323 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1324 if (pfn_valid(pfn)) { 1325 struct page *page = pfn_to_page(pfn); 1326 1327 if (swsusp_page_is_forbidden(page) && 1328 swsusp_page_is_free(page)) { 1329 swsusp_unset_page_forbidden(page); 1330 swsusp_unset_page_free(page); 1331 __free_page(page); 1332 } 1333 } 1334 } 1335 nr_copy_pages = 0; 1336 nr_meta_pages = 0; 1337 restore_pblist = NULL; 1338 buffer = NULL; 1339 alloc_normal = 0; 1340 alloc_highmem = 0; 1341 } 1342 1343 /* Helper functions used for the shrinking of memory. */ 1344 1345 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1346 1347 /** 1348 * preallocate_image_pages - Allocate a number of pages for hibernation image 1349 * @nr_pages: Number of page frames to allocate. 1350 * @mask: GFP flags to use for the allocation. 1351 * 1352 * Return value: Number of page frames actually allocated 1353 */ 1354 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1355 { 1356 unsigned long nr_alloc = 0; 1357 1358 while (nr_pages > 0) { 1359 struct page *page; 1360 1361 page = alloc_image_page(mask); 1362 if (!page) 1363 break; 1364 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1365 if (PageHighMem(page)) 1366 alloc_highmem++; 1367 else 1368 alloc_normal++; 1369 nr_pages--; 1370 nr_alloc++; 1371 } 1372 1373 return nr_alloc; 1374 } 1375 1376 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1377 unsigned long avail_normal) 1378 { 1379 unsigned long alloc; 1380 1381 if (avail_normal <= alloc_normal) 1382 return 0; 1383 1384 alloc = avail_normal - alloc_normal; 1385 if (nr_pages < alloc) 1386 alloc = nr_pages; 1387 1388 return preallocate_image_pages(alloc, GFP_IMAGE); 1389 } 1390 1391 #ifdef CONFIG_HIGHMEM 1392 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1393 { 1394 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1395 } 1396 1397 /** 1398 * __fraction - Compute (an approximation of) x * (multiplier / base) 1399 */ 1400 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1401 { 1402 x *= multiplier; 1403 do_div(x, base); 1404 return (unsigned long)x; 1405 } 1406 1407 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1408 unsigned long highmem, 1409 unsigned long total) 1410 { 1411 unsigned long alloc = __fraction(nr_pages, highmem, total); 1412 1413 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1414 } 1415 #else /* CONFIG_HIGHMEM */ 1416 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1417 { 1418 return 0; 1419 } 1420 1421 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1422 unsigned long highmem, 1423 unsigned long total) 1424 { 1425 return 0; 1426 } 1427 #endif /* CONFIG_HIGHMEM */ 1428 1429 /** 1430 * free_unnecessary_pages - Release preallocated pages not needed for the image 1431 */ 1432 static void free_unnecessary_pages(void) 1433 { 1434 unsigned long save, to_free_normal, to_free_highmem; 1435 1436 save = count_data_pages(); 1437 if (alloc_normal >= save) { 1438 to_free_normal = alloc_normal - save; 1439 save = 0; 1440 } else { 1441 to_free_normal = 0; 1442 save -= alloc_normal; 1443 } 1444 save += count_highmem_pages(); 1445 if (alloc_highmem >= save) { 1446 to_free_highmem = alloc_highmem - save; 1447 } else { 1448 to_free_highmem = 0; 1449 save -= alloc_highmem; 1450 if (to_free_normal > save) 1451 to_free_normal -= save; 1452 else 1453 to_free_normal = 0; 1454 } 1455 1456 memory_bm_position_reset(©_bm); 1457 1458 while (to_free_normal > 0 || to_free_highmem > 0) { 1459 unsigned long pfn = memory_bm_next_pfn(©_bm); 1460 struct page *page = pfn_to_page(pfn); 1461 1462 if (PageHighMem(page)) { 1463 if (!to_free_highmem) 1464 continue; 1465 to_free_highmem--; 1466 alloc_highmem--; 1467 } else { 1468 if (!to_free_normal) 1469 continue; 1470 to_free_normal--; 1471 alloc_normal--; 1472 } 1473 memory_bm_clear_bit(©_bm, pfn); 1474 swsusp_unset_page_forbidden(page); 1475 swsusp_unset_page_free(page); 1476 __free_page(page); 1477 } 1478 } 1479 1480 /** 1481 * minimum_image_size - Estimate the minimum acceptable size of an image 1482 * @saveable: Number of saveable pages in the system. 1483 * 1484 * We want to avoid attempting to free too much memory too hard, so estimate the 1485 * minimum acceptable size of a hibernation image to use as the lower limit for 1486 * preallocating memory. 1487 * 1488 * We assume that the minimum image size should be proportional to 1489 * 1490 * [number of saveable pages] - [number of pages that can be freed in theory] 1491 * 1492 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1493 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages, 1494 * minus mapped file pages. 1495 */ 1496 static unsigned long minimum_image_size(unsigned long saveable) 1497 { 1498 unsigned long size; 1499 1500 size = global_page_state(NR_SLAB_RECLAIMABLE) 1501 + global_page_state(NR_ACTIVE_ANON) 1502 + global_page_state(NR_INACTIVE_ANON) 1503 + global_page_state(NR_ACTIVE_FILE) 1504 + global_page_state(NR_INACTIVE_FILE) 1505 - global_page_state(NR_FILE_MAPPED); 1506 1507 return saveable <= size ? 0 : saveable - size; 1508 } 1509 1510 /** 1511 * hibernate_preallocate_memory - Preallocate memory for hibernation image 1512 * 1513 * To create a hibernation image it is necessary to make a copy of every page 1514 * frame in use. We also need a number of page frames to be free during 1515 * hibernation for allocations made while saving the image and for device 1516 * drivers, in case they need to allocate memory from their hibernation 1517 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1518 * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through 1519 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1520 * total number of available page frames and allocate at least 1521 * 1522 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1523 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1524 * 1525 * of them, which corresponds to the maximum size of a hibernation image. 1526 * 1527 * If image_size is set below the number following from the above formula, 1528 * the preallocation of memory is continued until the total number of saveable 1529 * pages in the system is below the requested image size or the minimum 1530 * acceptable image size returned by minimum_image_size(), whichever is greater. 1531 */ 1532 int hibernate_preallocate_memory(void) 1533 { 1534 struct zone *zone; 1535 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1536 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1537 struct timeval start, stop; 1538 int error; 1539 1540 printk(KERN_INFO "PM: Preallocating image memory... "); 1541 do_gettimeofday(&start); 1542 1543 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1544 if (error) 1545 goto err_out; 1546 1547 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1548 if (error) 1549 goto err_out; 1550 1551 alloc_normal = 0; 1552 alloc_highmem = 0; 1553 1554 /* Count the number of saveable data pages. */ 1555 save_highmem = count_highmem_pages(); 1556 saveable = count_data_pages(); 1557 1558 /* 1559 * Compute the total number of page frames we can use (count) and the 1560 * number of pages needed for image metadata (size). 1561 */ 1562 count = saveable; 1563 saveable += save_highmem; 1564 highmem = save_highmem; 1565 size = 0; 1566 for_each_populated_zone(zone) { 1567 size += snapshot_additional_pages(zone); 1568 if (is_highmem(zone)) 1569 highmem += zone_page_state(zone, NR_FREE_PAGES); 1570 else 1571 count += zone_page_state(zone, NR_FREE_PAGES); 1572 } 1573 avail_normal = count; 1574 count += highmem; 1575 count -= totalreserve_pages; 1576 1577 /* Add number of pages required for page keys (s390 only). */ 1578 size += page_key_additional_pages(saveable); 1579 1580 /* Compute the maximum number of saveable pages to leave in memory. */ 1581 max_size = (count - (size + PAGES_FOR_IO)) / 2 1582 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1583 /* Compute the desired number of image pages specified by image_size. */ 1584 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1585 if (size > max_size) 1586 size = max_size; 1587 /* 1588 * If the desired number of image pages is at least as large as the 1589 * current number of saveable pages in memory, allocate page frames for 1590 * the image and we're done. 1591 */ 1592 if (size >= saveable) { 1593 pages = preallocate_image_highmem(save_highmem); 1594 pages += preallocate_image_memory(saveable - pages, avail_normal); 1595 goto out; 1596 } 1597 1598 /* Estimate the minimum size of the image. */ 1599 pages = minimum_image_size(saveable); 1600 /* 1601 * To avoid excessive pressure on the normal zone, leave room in it to 1602 * accommodate an image of the minimum size (unless it's already too 1603 * small, in which case don't preallocate pages from it at all). 1604 */ 1605 if (avail_normal > pages) 1606 avail_normal -= pages; 1607 else 1608 avail_normal = 0; 1609 if (size < pages) 1610 size = min_t(unsigned long, pages, max_size); 1611 1612 /* 1613 * Let the memory management subsystem know that we're going to need a 1614 * large number of page frames to allocate and make it free some memory. 1615 * NOTE: If this is not done, performance will be hurt badly in some 1616 * test cases. 1617 */ 1618 shrink_all_memory(saveable - size); 1619 1620 /* 1621 * The number of saveable pages in memory was too high, so apply some 1622 * pressure to decrease it. First, make room for the largest possible 1623 * image and fail if that doesn't work. Next, try to decrease the size 1624 * of the image as much as indicated by 'size' using allocations from 1625 * highmem and non-highmem zones separately. 1626 */ 1627 pages_highmem = preallocate_image_highmem(highmem / 2); 1628 alloc = count - max_size; 1629 if (alloc > pages_highmem) 1630 alloc -= pages_highmem; 1631 else 1632 alloc = 0; 1633 pages = preallocate_image_memory(alloc, avail_normal); 1634 if (pages < alloc) { 1635 /* We have exhausted non-highmem pages, try highmem. */ 1636 alloc -= pages; 1637 pages += pages_highmem; 1638 pages_highmem = preallocate_image_highmem(alloc); 1639 if (pages_highmem < alloc) 1640 goto err_out; 1641 pages += pages_highmem; 1642 /* 1643 * size is the desired number of saveable pages to leave in 1644 * memory, so try to preallocate (all memory - size) pages. 1645 */ 1646 alloc = (count - pages) - size; 1647 pages += preallocate_image_highmem(alloc); 1648 } else { 1649 /* 1650 * There are approximately max_size saveable pages at this point 1651 * and we want to reduce this number down to size. 1652 */ 1653 alloc = max_size - size; 1654 size = preallocate_highmem_fraction(alloc, highmem, count); 1655 pages_highmem += size; 1656 alloc -= size; 1657 size = preallocate_image_memory(alloc, avail_normal); 1658 pages_highmem += preallocate_image_highmem(alloc - size); 1659 pages += pages_highmem + size; 1660 } 1661 1662 /* 1663 * We only need as many page frames for the image as there are saveable 1664 * pages in memory, but we have allocated more. Release the excessive 1665 * ones now. 1666 */ 1667 free_unnecessary_pages(); 1668 1669 out: 1670 do_gettimeofday(&stop); 1671 printk(KERN_CONT "done (allocated %lu pages)\n", pages); 1672 swsusp_show_speed(&start, &stop, pages, "Allocated"); 1673 1674 return 0; 1675 1676 err_out: 1677 printk(KERN_CONT "\n"); 1678 swsusp_free(); 1679 return -ENOMEM; 1680 } 1681 1682 #ifdef CONFIG_HIGHMEM 1683 /** 1684 * count_pages_for_highmem - compute the number of non-highmem pages 1685 * that will be necessary for creating copies of highmem pages. 1686 */ 1687 1688 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1689 { 1690 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1691 1692 if (free_highmem >= nr_highmem) 1693 nr_highmem = 0; 1694 else 1695 nr_highmem -= free_highmem; 1696 1697 return nr_highmem; 1698 } 1699 #else 1700 static unsigned int 1701 count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1702 #endif /* CONFIG_HIGHMEM */ 1703 1704 /** 1705 * enough_free_mem - Make sure we have enough free memory for the 1706 * snapshot image. 1707 */ 1708 1709 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1710 { 1711 struct zone *zone; 1712 unsigned int free = alloc_normal; 1713 1714 for_each_populated_zone(zone) 1715 if (!is_highmem(zone)) 1716 free += zone_page_state(zone, NR_FREE_PAGES); 1717 1718 nr_pages += count_pages_for_highmem(nr_highmem); 1719 pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", 1720 nr_pages, PAGES_FOR_IO, free); 1721 1722 return free > nr_pages + PAGES_FOR_IO; 1723 } 1724 1725 #ifdef CONFIG_HIGHMEM 1726 /** 1727 * get_highmem_buffer - if there are some highmem pages in the suspend 1728 * image, we may need the buffer to copy them and/or load their data. 1729 */ 1730 1731 static inline int get_highmem_buffer(int safe_needed) 1732 { 1733 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); 1734 return buffer ? 0 : -ENOMEM; 1735 } 1736 1737 /** 1738 * alloc_highmem_image_pages - allocate some highmem pages for the image. 1739 * Try to allocate as many pages as needed, but if the number of free 1740 * highmem pages is lesser than that, allocate them all. 1741 */ 1742 1743 static inline unsigned int 1744 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) 1745 { 1746 unsigned int to_alloc = count_free_highmem_pages(); 1747 1748 if (to_alloc > nr_highmem) 1749 to_alloc = nr_highmem; 1750 1751 nr_highmem -= to_alloc; 1752 while (to_alloc-- > 0) { 1753 struct page *page; 1754 1755 page = alloc_image_page(__GFP_HIGHMEM); 1756 memory_bm_set_bit(bm, page_to_pfn(page)); 1757 } 1758 return nr_highmem; 1759 } 1760 #else 1761 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1762 1763 static inline unsigned int 1764 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } 1765 #endif /* CONFIG_HIGHMEM */ 1766 1767 /** 1768 * swsusp_alloc - allocate memory for the suspend image 1769 * 1770 * We first try to allocate as many highmem pages as there are 1771 * saveable highmem pages in the system. If that fails, we allocate 1772 * non-highmem pages for the copies of the remaining highmem ones. 1773 * 1774 * In this approach it is likely that the copies of highmem pages will 1775 * also be located in the high memory, because of the way in which 1776 * copy_data_pages() works. 1777 */ 1778 1779 static int 1780 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 1781 unsigned int nr_pages, unsigned int nr_highmem) 1782 { 1783 if (nr_highmem > 0) { 1784 if (get_highmem_buffer(PG_ANY)) 1785 goto err_out; 1786 if (nr_highmem > alloc_highmem) { 1787 nr_highmem -= alloc_highmem; 1788 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1789 } 1790 } 1791 if (nr_pages > alloc_normal) { 1792 nr_pages -= alloc_normal; 1793 while (nr_pages-- > 0) { 1794 struct page *page; 1795 1796 page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); 1797 if (!page) 1798 goto err_out; 1799 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1800 } 1801 } 1802 1803 return 0; 1804 1805 err_out: 1806 swsusp_free(); 1807 return -ENOMEM; 1808 } 1809 1810 asmlinkage __visible int swsusp_save(void) 1811 { 1812 unsigned int nr_pages, nr_highmem; 1813 1814 printk(KERN_INFO "PM: Creating hibernation image:\n"); 1815 1816 drain_local_pages(NULL); 1817 nr_pages = count_data_pages(); 1818 nr_highmem = count_highmem_pages(); 1819 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); 1820 1821 if (!enough_free_mem(nr_pages, nr_highmem)) { 1822 printk(KERN_ERR "PM: Not enough free memory\n"); 1823 return -ENOMEM; 1824 } 1825 1826 if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { 1827 printk(KERN_ERR "PM: Memory allocation failed\n"); 1828 return -ENOMEM; 1829 } 1830 1831 /* During allocating of suspend pagedir, new cold pages may appear. 1832 * Kill them. 1833 */ 1834 drain_local_pages(NULL); 1835 copy_data_pages(©_bm, &orig_bm); 1836 1837 /* 1838 * End of critical section. From now on, we can write to memory, 1839 * but we should not touch disk. This specially means we must _not_ 1840 * touch swap space! Except we must write out our image of course. 1841 */ 1842 1843 nr_pages += nr_highmem; 1844 nr_copy_pages = nr_pages; 1845 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 1846 1847 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", 1848 nr_pages); 1849 1850 return 0; 1851 } 1852 1853 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 1854 static int init_header_complete(struct swsusp_info *info) 1855 { 1856 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 1857 info->version_code = LINUX_VERSION_CODE; 1858 return 0; 1859 } 1860 1861 static char *check_image_kernel(struct swsusp_info *info) 1862 { 1863 if (info->version_code != LINUX_VERSION_CODE) 1864 return "kernel version"; 1865 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 1866 return "system type"; 1867 if (strcmp(info->uts.release,init_utsname()->release)) 1868 return "kernel release"; 1869 if (strcmp(info->uts.version,init_utsname()->version)) 1870 return "version"; 1871 if (strcmp(info->uts.machine,init_utsname()->machine)) 1872 return "machine"; 1873 return NULL; 1874 } 1875 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 1876 1877 unsigned long snapshot_get_image_size(void) 1878 { 1879 return nr_copy_pages + nr_meta_pages + 1; 1880 } 1881 1882 static int init_header(struct swsusp_info *info) 1883 { 1884 memset(info, 0, sizeof(struct swsusp_info)); 1885 info->num_physpages = get_num_physpages(); 1886 info->image_pages = nr_copy_pages; 1887 info->pages = snapshot_get_image_size(); 1888 info->size = info->pages; 1889 info->size <<= PAGE_SHIFT; 1890 return init_header_complete(info); 1891 } 1892 1893 /** 1894 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm 1895 * are stored in the array @buf[] (1 page at a time) 1896 */ 1897 1898 static inline void 1899 pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 1900 { 1901 int j; 1902 1903 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 1904 buf[j] = memory_bm_next_pfn(bm); 1905 if (unlikely(buf[j] == BM_END_OF_MAP)) 1906 break; 1907 /* Save page key for data page (s390 only). */ 1908 page_key_read(buf + j); 1909 } 1910 } 1911 1912 /** 1913 * snapshot_read_next - used for reading the system memory snapshot. 1914 * 1915 * On the first call to it @handle should point to a zeroed 1916 * snapshot_handle structure. The structure gets updated and a pointer 1917 * to it should be passed to this function every next time. 1918 * 1919 * On success the function returns a positive number. Then, the caller 1920 * is allowed to read up to the returned number of bytes from the memory 1921 * location computed by the data_of() macro. 1922 * 1923 * The function returns 0 to indicate the end of data stream condition, 1924 * and a negative number is returned on error. In such cases the 1925 * structure pointed to by @handle is not updated and should not be used 1926 * any more. 1927 */ 1928 1929 int snapshot_read_next(struct snapshot_handle *handle) 1930 { 1931 if (handle->cur > nr_meta_pages + nr_copy_pages) 1932 return 0; 1933 1934 if (!buffer) { 1935 /* This makes the buffer be freed by swsusp_free() */ 1936 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 1937 if (!buffer) 1938 return -ENOMEM; 1939 } 1940 if (!handle->cur) { 1941 int error; 1942 1943 error = init_header((struct swsusp_info *)buffer); 1944 if (error) 1945 return error; 1946 handle->buffer = buffer; 1947 memory_bm_position_reset(&orig_bm); 1948 memory_bm_position_reset(©_bm); 1949 } else if (handle->cur <= nr_meta_pages) { 1950 clear_page(buffer); 1951 pack_pfns(buffer, &orig_bm); 1952 } else { 1953 struct page *page; 1954 1955 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 1956 if (PageHighMem(page)) { 1957 /* Highmem pages are copied to the buffer, 1958 * because we can't return with a kmapped 1959 * highmem page (we may not be called again). 1960 */ 1961 void *kaddr; 1962 1963 kaddr = kmap_atomic(page); 1964 copy_page(buffer, kaddr); 1965 kunmap_atomic(kaddr); 1966 handle->buffer = buffer; 1967 } else { 1968 handle->buffer = page_address(page); 1969 } 1970 } 1971 handle->cur++; 1972 return PAGE_SIZE; 1973 } 1974 1975 /** 1976 * mark_unsafe_pages - mark the pages that cannot be used for storing 1977 * the image during resume, because they conflict with the pages that 1978 * had been used before suspend 1979 */ 1980 1981 static int mark_unsafe_pages(struct memory_bitmap *bm) 1982 { 1983 struct zone *zone; 1984 unsigned long pfn, max_zone_pfn; 1985 1986 /* Clear page flags */ 1987 for_each_populated_zone(zone) { 1988 max_zone_pfn = zone_end_pfn(zone); 1989 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1990 if (pfn_valid(pfn)) 1991 swsusp_unset_page_free(pfn_to_page(pfn)); 1992 } 1993 1994 /* Mark pages that correspond to the "original" pfns as "unsafe" */ 1995 memory_bm_position_reset(bm); 1996 do { 1997 pfn = memory_bm_next_pfn(bm); 1998 if (likely(pfn != BM_END_OF_MAP)) { 1999 if (likely(pfn_valid(pfn))) 2000 swsusp_set_page_free(pfn_to_page(pfn)); 2001 else 2002 return -EFAULT; 2003 } 2004 } while (pfn != BM_END_OF_MAP); 2005 2006 allocated_unsafe_pages = 0; 2007 2008 return 0; 2009 } 2010 2011 static void 2012 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) 2013 { 2014 unsigned long pfn; 2015 2016 memory_bm_position_reset(src); 2017 pfn = memory_bm_next_pfn(src); 2018 while (pfn != BM_END_OF_MAP) { 2019 memory_bm_set_bit(dst, pfn); 2020 pfn = memory_bm_next_pfn(src); 2021 } 2022 } 2023 2024 static int check_header(struct swsusp_info *info) 2025 { 2026 char *reason; 2027 2028 reason = check_image_kernel(info); 2029 if (!reason && info->num_physpages != get_num_physpages()) 2030 reason = "memory size"; 2031 if (reason) { 2032 printk(KERN_ERR "PM: Image mismatch: %s\n", reason); 2033 return -EPERM; 2034 } 2035 return 0; 2036 } 2037 2038 /** 2039 * load header - check the image header and copy data from it 2040 */ 2041 2042 static int 2043 load_header(struct swsusp_info *info) 2044 { 2045 int error; 2046 2047 restore_pblist = NULL; 2048 error = check_header(info); 2049 if (!error) { 2050 nr_copy_pages = info->image_pages; 2051 nr_meta_pages = info->pages - info->image_pages - 1; 2052 } 2053 return error; 2054 } 2055 2056 /** 2057 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 2058 * the corresponding bit in the memory bitmap @bm 2059 */ 2060 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2061 { 2062 int j; 2063 2064 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2065 if (unlikely(buf[j] == BM_END_OF_MAP)) 2066 break; 2067 2068 /* Extract and buffer page key for data page (s390 only). */ 2069 page_key_memorize(buf + j); 2070 2071 if (memory_bm_pfn_present(bm, buf[j])) 2072 memory_bm_set_bit(bm, buf[j]); 2073 else 2074 return -EFAULT; 2075 } 2076 2077 return 0; 2078 } 2079 2080 /* List of "safe" pages that may be used to store data loaded from the suspend 2081 * image 2082 */ 2083 static struct linked_page *safe_pages_list; 2084 2085 #ifdef CONFIG_HIGHMEM 2086 /* struct highmem_pbe is used for creating the list of highmem pages that 2087 * should be restored atomically during the resume from disk, because the page 2088 * frames they have occupied before the suspend are in use. 2089 */ 2090 struct highmem_pbe { 2091 struct page *copy_page; /* data is here now */ 2092 struct page *orig_page; /* data was here before the suspend */ 2093 struct highmem_pbe *next; 2094 }; 2095 2096 /* List of highmem PBEs needed for restoring the highmem pages that were 2097 * allocated before the suspend and included in the suspend image, but have 2098 * also been allocated by the "resume" kernel, so their contents cannot be 2099 * written directly to their "original" page frames. 2100 */ 2101 static struct highmem_pbe *highmem_pblist; 2102 2103 /** 2104 * count_highmem_image_pages - compute the number of highmem pages in the 2105 * suspend image. The bits in the memory bitmap @bm that correspond to the 2106 * image pages are assumed to be set. 2107 */ 2108 2109 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2110 { 2111 unsigned long pfn; 2112 unsigned int cnt = 0; 2113 2114 memory_bm_position_reset(bm); 2115 pfn = memory_bm_next_pfn(bm); 2116 while (pfn != BM_END_OF_MAP) { 2117 if (PageHighMem(pfn_to_page(pfn))) 2118 cnt++; 2119 2120 pfn = memory_bm_next_pfn(bm); 2121 } 2122 return cnt; 2123 } 2124 2125 /** 2126 * prepare_highmem_image - try to allocate as many highmem pages as 2127 * there are highmem image pages (@nr_highmem_p points to the variable 2128 * containing the number of highmem image pages). The pages that are 2129 * "safe" (ie. will not be overwritten when the suspend image is 2130 * restored) have the corresponding bits set in @bm (it must be 2131 * unitialized). 2132 * 2133 * NOTE: This function should not be called if there are no highmem 2134 * image pages. 2135 */ 2136 2137 static unsigned int safe_highmem_pages; 2138 2139 static struct memory_bitmap *safe_highmem_bm; 2140 2141 static int 2142 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2143 { 2144 unsigned int to_alloc; 2145 2146 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2147 return -ENOMEM; 2148 2149 if (get_highmem_buffer(PG_SAFE)) 2150 return -ENOMEM; 2151 2152 to_alloc = count_free_highmem_pages(); 2153 if (to_alloc > *nr_highmem_p) 2154 to_alloc = *nr_highmem_p; 2155 else 2156 *nr_highmem_p = to_alloc; 2157 2158 safe_highmem_pages = 0; 2159 while (to_alloc-- > 0) { 2160 struct page *page; 2161 2162 page = alloc_page(__GFP_HIGHMEM); 2163 if (!swsusp_page_is_free(page)) { 2164 /* The page is "safe", set its bit the bitmap */ 2165 memory_bm_set_bit(bm, page_to_pfn(page)); 2166 safe_highmem_pages++; 2167 } 2168 /* Mark the page as allocated */ 2169 swsusp_set_page_forbidden(page); 2170 swsusp_set_page_free(page); 2171 } 2172 memory_bm_position_reset(bm); 2173 safe_highmem_bm = bm; 2174 return 0; 2175 } 2176 2177 /** 2178 * get_highmem_page_buffer - for given highmem image page find the buffer 2179 * that suspend_write_next() should set for its caller to write to. 2180 * 2181 * If the page is to be saved to its "original" page frame or a copy of 2182 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2183 * the copy of the page is to be made in normal memory, so the address of 2184 * the copy is returned. 2185 * 2186 * If @buffer is returned, the caller of suspend_write_next() will write 2187 * the page's contents to @buffer, so they will have to be copied to the 2188 * right location on the next call to suspend_write_next() and it is done 2189 * with the help of copy_last_highmem_page(). For this purpose, if 2190 * @buffer is returned, @last_highmem page is set to the page to which 2191 * the data will have to be copied from @buffer. 2192 */ 2193 2194 static struct page *last_highmem_page; 2195 2196 static void * 2197 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2198 { 2199 struct highmem_pbe *pbe; 2200 void *kaddr; 2201 2202 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2203 /* We have allocated the "original" page frame and we can 2204 * use it directly to store the loaded page. 2205 */ 2206 last_highmem_page = page; 2207 return buffer; 2208 } 2209 /* The "original" page frame has not been allocated and we have to 2210 * use a "safe" page frame to store the loaded page. 2211 */ 2212 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2213 if (!pbe) { 2214 swsusp_free(); 2215 return ERR_PTR(-ENOMEM); 2216 } 2217 pbe->orig_page = page; 2218 if (safe_highmem_pages > 0) { 2219 struct page *tmp; 2220 2221 /* Copy of the page will be stored in high memory */ 2222 kaddr = buffer; 2223 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2224 safe_highmem_pages--; 2225 last_highmem_page = tmp; 2226 pbe->copy_page = tmp; 2227 } else { 2228 /* Copy of the page will be stored in normal memory */ 2229 kaddr = safe_pages_list; 2230 safe_pages_list = safe_pages_list->next; 2231 pbe->copy_page = virt_to_page(kaddr); 2232 } 2233 pbe->next = highmem_pblist; 2234 highmem_pblist = pbe; 2235 return kaddr; 2236 } 2237 2238 /** 2239 * copy_last_highmem_page - copy the contents of a highmem image from 2240 * @buffer, where the caller of snapshot_write_next() has place them, 2241 * to the right location represented by @last_highmem_page . 2242 */ 2243 2244 static void copy_last_highmem_page(void) 2245 { 2246 if (last_highmem_page) { 2247 void *dst; 2248 2249 dst = kmap_atomic(last_highmem_page); 2250 copy_page(dst, buffer); 2251 kunmap_atomic(dst); 2252 last_highmem_page = NULL; 2253 } 2254 } 2255 2256 static inline int last_highmem_page_copied(void) 2257 { 2258 return !last_highmem_page; 2259 } 2260 2261 static inline void free_highmem_data(void) 2262 { 2263 if (safe_highmem_bm) 2264 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2265 2266 if (buffer) 2267 free_image_page(buffer, PG_UNSAFE_CLEAR); 2268 } 2269 #else 2270 static inline int get_safe_write_buffer(void) { return 0; } 2271 2272 static unsigned int 2273 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2274 2275 static inline int 2276 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 2277 { 2278 return 0; 2279 } 2280 2281 static inline void * 2282 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 2283 { 2284 return ERR_PTR(-EINVAL); 2285 } 2286 2287 static inline void copy_last_highmem_page(void) {} 2288 static inline int last_highmem_page_copied(void) { return 1; } 2289 static inline void free_highmem_data(void) {} 2290 #endif /* CONFIG_HIGHMEM */ 2291 2292 /** 2293 * prepare_image - use the memory bitmap @bm to mark the pages that will 2294 * be overwritten in the process of restoring the system memory state 2295 * from the suspend image ("unsafe" pages) and allocate memory for the 2296 * image. 2297 * 2298 * The idea is to allocate a new memory bitmap first and then allocate 2299 * as many pages as needed for the image data, but not to assign these 2300 * pages to specific tasks initially. Instead, we just mark them as 2301 * allocated and create a lists of "safe" pages that will be used 2302 * later. On systems with high memory a list of "safe" highmem pages is 2303 * also created. 2304 */ 2305 2306 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2307 2308 static int 2309 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2310 { 2311 unsigned int nr_pages, nr_highmem; 2312 struct linked_page *sp_list, *lp; 2313 int error; 2314 2315 /* If there is no highmem, the buffer will not be necessary */ 2316 free_image_page(buffer, PG_UNSAFE_CLEAR); 2317 buffer = NULL; 2318 2319 nr_highmem = count_highmem_image_pages(bm); 2320 error = mark_unsafe_pages(bm); 2321 if (error) 2322 goto Free; 2323 2324 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2325 if (error) 2326 goto Free; 2327 2328 duplicate_memory_bitmap(new_bm, bm); 2329 memory_bm_free(bm, PG_UNSAFE_KEEP); 2330 if (nr_highmem > 0) { 2331 error = prepare_highmem_image(bm, &nr_highmem); 2332 if (error) 2333 goto Free; 2334 } 2335 /* Reserve some safe pages for potential later use. 2336 * 2337 * NOTE: This way we make sure there will be enough safe pages for the 2338 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2339 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2340 */ 2341 sp_list = NULL; 2342 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ 2343 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2344 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2345 while (nr_pages > 0) { 2346 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2347 if (!lp) { 2348 error = -ENOMEM; 2349 goto Free; 2350 } 2351 lp->next = sp_list; 2352 sp_list = lp; 2353 nr_pages--; 2354 } 2355 /* Preallocate memory for the image */ 2356 safe_pages_list = NULL; 2357 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2358 while (nr_pages > 0) { 2359 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2360 if (!lp) { 2361 error = -ENOMEM; 2362 goto Free; 2363 } 2364 if (!swsusp_page_is_free(virt_to_page(lp))) { 2365 /* The page is "safe", add it to the list */ 2366 lp->next = safe_pages_list; 2367 safe_pages_list = lp; 2368 } 2369 /* Mark the page as allocated */ 2370 swsusp_set_page_forbidden(virt_to_page(lp)); 2371 swsusp_set_page_free(virt_to_page(lp)); 2372 nr_pages--; 2373 } 2374 /* Free the reserved safe pages so that chain_alloc() can use them */ 2375 while (sp_list) { 2376 lp = sp_list->next; 2377 free_image_page(sp_list, PG_UNSAFE_CLEAR); 2378 sp_list = lp; 2379 } 2380 return 0; 2381 2382 Free: 2383 swsusp_free(); 2384 return error; 2385 } 2386 2387 /** 2388 * get_buffer - compute the address that snapshot_write_next() should 2389 * set for its caller to write to. 2390 */ 2391 2392 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2393 { 2394 struct pbe *pbe; 2395 struct page *page; 2396 unsigned long pfn = memory_bm_next_pfn(bm); 2397 2398 if (pfn == BM_END_OF_MAP) 2399 return ERR_PTR(-EFAULT); 2400 2401 page = pfn_to_page(pfn); 2402 if (PageHighMem(page)) 2403 return get_highmem_page_buffer(page, ca); 2404 2405 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2406 /* We have allocated the "original" page frame and we can 2407 * use it directly to store the loaded page. 2408 */ 2409 return page_address(page); 2410 2411 /* The "original" page frame has not been allocated and we have to 2412 * use a "safe" page frame to store the loaded page. 2413 */ 2414 pbe = chain_alloc(ca, sizeof(struct pbe)); 2415 if (!pbe) { 2416 swsusp_free(); 2417 return ERR_PTR(-ENOMEM); 2418 } 2419 pbe->orig_address = page_address(page); 2420 pbe->address = safe_pages_list; 2421 safe_pages_list = safe_pages_list->next; 2422 pbe->next = restore_pblist; 2423 restore_pblist = pbe; 2424 return pbe->address; 2425 } 2426 2427 /** 2428 * snapshot_write_next - used for writing the system memory snapshot. 2429 * 2430 * On the first call to it @handle should point to a zeroed 2431 * snapshot_handle structure. The structure gets updated and a pointer 2432 * to it should be passed to this function every next time. 2433 * 2434 * On success the function returns a positive number. Then, the caller 2435 * is allowed to write up to the returned number of bytes to the memory 2436 * location computed by the data_of() macro. 2437 * 2438 * The function returns 0 to indicate the "end of file" condition, 2439 * and a negative number is returned on error. In such cases the 2440 * structure pointed to by @handle is not updated and should not be used 2441 * any more. 2442 */ 2443 2444 int snapshot_write_next(struct snapshot_handle *handle) 2445 { 2446 static struct chain_allocator ca; 2447 int error = 0; 2448 2449 /* Check if we have already loaded the entire image */ 2450 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2451 return 0; 2452 2453 handle->sync_read = 1; 2454 2455 if (!handle->cur) { 2456 if (!buffer) 2457 /* This makes the buffer be freed by swsusp_free() */ 2458 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2459 2460 if (!buffer) 2461 return -ENOMEM; 2462 2463 handle->buffer = buffer; 2464 } else if (handle->cur == 1) { 2465 error = load_header(buffer); 2466 if (error) 2467 return error; 2468 2469 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2470 if (error) 2471 return error; 2472 2473 /* Allocate buffer for page keys. */ 2474 error = page_key_alloc(nr_copy_pages); 2475 if (error) 2476 return error; 2477 2478 } else if (handle->cur <= nr_meta_pages + 1) { 2479 error = unpack_orig_pfns(buffer, ©_bm); 2480 if (error) 2481 return error; 2482 2483 if (handle->cur == nr_meta_pages + 1) { 2484 error = prepare_image(&orig_bm, ©_bm); 2485 if (error) 2486 return error; 2487 2488 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2489 memory_bm_position_reset(&orig_bm); 2490 restore_pblist = NULL; 2491 handle->buffer = get_buffer(&orig_bm, &ca); 2492 handle->sync_read = 0; 2493 if (IS_ERR(handle->buffer)) 2494 return PTR_ERR(handle->buffer); 2495 } 2496 } else { 2497 copy_last_highmem_page(); 2498 /* Restore page key for data page (s390 only). */ 2499 page_key_write(handle->buffer); 2500 handle->buffer = get_buffer(&orig_bm, &ca); 2501 if (IS_ERR(handle->buffer)) 2502 return PTR_ERR(handle->buffer); 2503 if (handle->buffer != buffer) 2504 handle->sync_read = 0; 2505 } 2506 handle->cur++; 2507 return PAGE_SIZE; 2508 } 2509 2510 /** 2511 * snapshot_write_finalize - must be called after the last call to 2512 * snapshot_write_next() in case the last page in the image happens 2513 * to be a highmem page and its contents should be stored in the 2514 * highmem. Additionally, it releases the memory that will not be 2515 * used any more. 2516 */ 2517 2518 void snapshot_write_finalize(struct snapshot_handle *handle) 2519 { 2520 copy_last_highmem_page(); 2521 /* Restore page key for data page (s390 only). */ 2522 page_key_write(handle->buffer); 2523 page_key_free(); 2524 /* Free only if we have loaded the image entirely */ 2525 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2526 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 2527 free_highmem_data(); 2528 } 2529 } 2530 2531 int snapshot_image_loaded(struct snapshot_handle *handle) 2532 { 2533 return !(!nr_copy_pages || !last_highmem_page_copied() || 2534 handle->cur <= nr_meta_pages + nr_copy_pages); 2535 } 2536 2537 #ifdef CONFIG_HIGHMEM 2538 /* Assumes that @buf is ready and points to a "safe" page */ 2539 static inline void 2540 swap_two_pages_data(struct page *p1, struct page *p2, void *buf) 2541 { 2542 void *kaddr1, *kaddr2; 2543 2544 kaddr1 = kmap_atomic(p1); 2545 kaddr2 = kmap_atomic(p2); 2546 copy_page(buf, kaddr1); 2547 copy_page(kaddr1, kaddr2); 2548 copy_page(kaddr2, buf); 2549 kunmap_atomic(kaddr2); 2550 kunmap_atomic(kaddr1); 2551 } 2552 2553 /** 2554 * restore_highmem - for each highmem page that was allocated before 2555 * the suspend and included in the suspend image, and also has been 2556 * allocated by the "resume" kernel swap its current (ie. "before 2557 * resume") contents with the previous (ie. "before suspend") one. 2558 * 2559 * If the resume eventually fails, we can call this function once 2560 * again and restore the "before resume" highmem state. 2561 */ 2562 2563 int restore_highmem(void) 2564 { 2565 struct highmem_pbe *pbe = highmem_pblist; 2566 void *buf; 2567 2568 if (!pbe) 2569 return 0; 2570 2571 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2572 if (!buf) 2573 return -ENOMEM; 2574 2575 while (pbe) { 2576 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2577 pbe = pbe->next; 2578 } 2579 free_image_page(buf, PG_UNSAFE_CLEAR); 2580 return 0; 2581 } 2582 #endif /* CONFIG_HIGHMEM */ 2583