xref: /openbmc/linux/kernel/power/snapshot.c (revision b04b4f78)
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provides system snapshot/restore functionality for swsusp.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
8  *
9  * This file is released under the GPLv2.
10  *
11  */
12 
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/init.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
28 #include <linux/list.h>
29 
30 #include <asm/uaccess.h>
31 #include <asm/mmu_context.h>
32 #include <asm/pgtable.h>
33 #include <asm/tlbflush.h>
34 #include <asm/io.h>
35 
36 #include "power.h"
37 
38 static int swsusp_page_is_free(struct page *);
39 static void swsusp_set_page_forbidden(struct page *);
40 static void swsusp_unset_page_forbidden(struct page *);
41 
42 /* List of PBEs needed for restoring the pages that were allocated before
43  * the suspend and included in the suspend image, but have also been
44  * allocated by the "resume" kernel, so their contents cannot be written
45  * directly to their "original" page frames.
46  */
47 struct pbe *restore_pblist;
48 
49 /* Pointer to an auxiliary buffer (1 page) */
50 static void *buffer;
51 
52 /**
53  *	@safe_needed - on resume, for storing the PBE list and the image,
54  *	we can only use memory pages that do not conflict with the pages
55  *	used before suspend.  The unsafe pages have PageNosaveFree set
56  *	and we count them using unsafe_pages.
57  *
58  *	Each allocated image page is marked as PageNosave and PageNosaveFree
59  *	so that swsusp_free() can release it.
60  */
61 
62 #define PG_ANY		0
63 #define PG_SAFE		1
64 #define PG_UNSAFE_CLEAR	1
65 #define PG_UNSAFE_KEEP	0
66 
67 static unsigned int allocated_unsafe_pages;
68 
69 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
70 {
71 	void *res;
72 
73 	res = (void *)get_zeroed_page(gfp_mask);
74 	if (safe_needed)
75 		while (res && swsusp_page_is_free(virt_to_page(res))) {
76 			/* The page is unsafe, mark it for swsusp_free() */
77 			swsusp_set_page_forbidden(virt_to_page(res));
78 			allocated_unsafe_pages++;
79 			res = (void *)get_zeroed_page(gfp_mask);
80 		}
81 	if (res) {
82 		swsusp_set_page_forbidden(virt_to_page(res));
83 		swsusp_set_page_free(virt_to_page(res));
84 	}
85 	return res;
86 }
87 
88 unsigned long get_safe_page(gfp_t gfp_mask)
89 {
90 	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
91 }
92 
93 static struct page *alloc_image_page(gfp_t gfp_mask)
94 {
95 	struct page *page;
96 
97 	page = alloc_page(gfp_mask);
98 	if (page) {
99 		swsusp_set_page_forbidden(page);
100 		swsusp_set_page_free(page);
101 	}
102 	return page;
103 }
104 
105 /**
106  *	free_image_page - free page represented by @addr, allocated with
107  *	get_image_page (page flags set by it must be cleared)
108  */
109 
110 static inline void free_image_page(void *addr, int clear_nosave_free)
111 {
112 	struct page *page;
113 
114 	BUG_ON(!virt_addr_valid(addr));
115 
116 	page = virt_to_page(addr);
117 
118 	swsusp_unset_page_forbidden(page);
119 	if (clear_nosave_free)
120 		swsusp_unset_page_free(page);
121 
122 	__free_page(page);
123 }
124 
125 /* struct linked_page is used to build chains of pages */
126 
127 #define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
128 
129 struct linked_page {
130 	struct linked_page *next;
131 	char data[LINKED_PAGE_DATA_SIZE];
132 } __attribute__((packed));
133 
134 static inline void
135 free_list_of_pages(struct linked_page *list, int clear_page_nosave)
136 {
137 	while (list) {
138 		struct linked_page *lp = list->next;
139 
140 		free_image_page(list, clear_page_nosave);
141 		list = lp;
142 	}
143 }
144 
145 /**
146   *	struct chain_allocator is used for allocating small objects out of
147   *	a linked list of pages called 'the chain'.
148   *
149   *	The chain grows each time when there is no room for a new object in
150   *	the current page.  The allocated objects cannot be freed individually.
151   *	It is only possible to free them all at once, by freeing the entire
152   *	chain.
153   *
154   *	NOTE: The chain allocator may be inefficient if the allocated objects
155   *	are not much smaller than PAGE_SIZE.
156   */
157 
158 struct chain_allocator {
159 	struct linked_page *chain;	/* the chain */
160 	unsigned int used_space;	/* total size of objects allocated out
161 					 * of the current page
162 					 */
163 	gfp_t gfp_mask;		/* mask for allocating pages */
164 	int safe_needed;	/* if set, only "safe" pages are allocated */
165 };
166 
167 static void
168 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
169 {
170 	ca->chain = NULL;
171 	ca->used_space = LINKED_PAGE_DATA_SIZE;
172 	ca->gfp_mask = gfp_mask;
173 	ca->safe_needed = safe_needed;
174 }
175 
176 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
177 {
178 	void *ret;
179 
180 	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
181 		struct linked_page *lp;
182 
183 		lp = get_image_page(ca->gfp_mask, ca->safe_needed);
184 		if (!lp)
185 			return NULL;
186 
187 		lp->next = ca->chain;
188 		ca->chain = lp;
189 		ca->used_space = 0;
190 	}
191 	ret = ca->chain->data + ca->used_space;
192 	ca->used_space += size;
193 	return ret;
194 }
195 
196 /**
197  *	Data types related to memory bitmaps.
198  *
199  *	Memory bitmap is a structure consiting of many linked lists of
200  *	objects.  The main list's elements are of type struct zone_bitmap
201  *	and each of them corresonds to one zone.  For each zone bitmap
202  *	object there is a list of objects of type struct bm_block that
203  *	represent each blocks of bitmap in which information is stored.
204  *
205  *	struct memory_bitmap contains a pointer to the main list of zone
206  *	bitmap objects, a struct bm_position used for browsing the bitmap,
207  *	and a pointer to the list of pages used for allocating all of the
208  *	zone bitmap objects and bitmap block objects.
209  *
210  *	NOTE: It has to be possible to lay out the bitmap in memory
211  *	using only allocations of order 0.  Additionally, the bitmap is
212  *	designed to work with arbitrary number of zones (this is over the
213  *	top for now, but let's avoid making unnecessary assumptions ;-).
214  *
215  *	struct zone_bitmap contains a pointer to a list of bitmap block
216  *	objects and a pointer to the bitmap block object that has been
217  *	most recently used for setting bits.  Additionally, it contains the
218  *	pfns that correspond to the start and end of the represented zone.
219  *
220  *	struct bm_block contains a pointer to the memory page in which
221  *	information is stored (in the form of a block of bitmap)
222  *	It also contains the pfns that correspond to the start and end of
223  *	the represented memory area.
224  */
225 
226 #define BM_END_OF_MAP	(~0UL)
227 
228 #define BM_BITS_PER_BLOCK	(PAGE_SIZE << 3)
229 
230 struct bm_block {
231 	struct list_head hook;	/* hook into a list of bitmap blocks */
232 	unsigned long start_pfn;	/* pfn represented by the first bit */
233 	unsigned long end_pfn;	/* pfn represented by the last bit plus 1 */
234 	unsigned long *data;	/* bitmap representing pages */
235 };
236 
237 static inline unsigned long bm_block_bits(struct bm_block *bb)
238 {
239 	return bb->end_pfn - bb->start_pfn;
240 }
241 
242 /* strcut bm_position is used for browsing memory bitmaps */
243 
244 struct bm_position {
245 	struct bm_block *block;
246 	int bit;
247 };
248 
249 struct memory_bitmap {
250 	struct list_head blocks;	/* list of bitmap blocks */
251 	struct linked_page *p_list;	/* list of pages used to store zone
252 					 * bitmap objects and bitmap block
253 					 * objects
254 					 */
255 	struct bm_position cur;	/* most recently used bit position */
256 };
257 
258 /* Functions that operate on memory bitmaps */
259 
260 static void memory_bm_position_reset(struct memory_bitmap *bm)
261 {
262 	bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
263 	bm->cur.bit = 0;
264 }
265 
266 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
267 
268 /**
269  *	create_bm_block_list - create a list of block bitmap objects
270  *	@nr_blocks - number of blocks to allocate
271  *	@list - list to put the allocated blocks into
272  *	@ca - chain allocator to be used for allocating memory
273  */
274 static int create_bm_block_list(unsigned long pages,
275 				struct list_head *list,
276 				struct chain_allocator *ca)
277 {
278 	unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
279 
280 	while (nr_blocks-- > 0) {
281 		struct bm_block *bb;
282 
283 		bb = chain_alloc(ca, sizeof(struct bm_block));
284 		if (!bb)
285 			return -ENOMEM;
286 		list_add(&bb->hook, list);
287 	}
288 
289 	return 0;
290 }
291 
292 struct mem_extent {
293 	struct list_head hook;
294 	unsigned long start;
295 	unsigned long end;
296 };
297 
298 /**
299  *	free_mem_extents - free a list of memory extents
300  *	@list - list of extents to empty
301  */
302 static void free_mem_extents(struct list_head *list)
303 {
304 	struct mem_extent *ext, *aux;
305 
306 	list_for_each_entry_safe(ext, aux, list, hook) {
307 		list_del(&ext->hook);
308 		kfree(ext);
309 	}
310 }
311 
312 /**
313  *	create_mem_extents - create a list of memory extents representing
314  *	                     contiguous ranges of PFNs
315  *	@list - list to put the extents into
316  *	@gfp_mask - mask to use for memory allocations
317  */
318 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
319 {
320 	struct zone *zone;
321 
322 	INIT_LIST_HEAD(list);
323 
324 	for_each_populated_zone(zone) {
325 		unsigned long zone_start, zone_end;
326 		struct mem_extent *ext, *cur, *aux;
327 
328 		zone_start = zone->zone_start_pfn;
329 		zone_end = zone->zone_start_pfn + zone->spanned_pages;
330 
331 		list_for_each_entry(ext, list, hook)
332 			if (zone_start <= ext->end)
333 				break;
334 
335 		if (&ext->hook == list || zone_end < ext->start) {
336 			/* New extent is necessary */
337 			struct mem_extent *new_ext;
338 
339 			new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
340 			if (!new_ext) {
341 				free_mem_extents(list);
342 				return -ENOMEM;
343 			}
344 			new_ext->start = zone_start;
345 			new_ext->end = zone_end;
346 			list_add_tail(&new_ext->hook, &ext->hook);
347 			continue;
348 		}
349 
350 		/* Merge this zone's range of PFNs with the existing one */
351 		if (zone_start < ext->start)
352 			ext->start = zone_start;
353 		if (zone_end > ext->end)
354 			ext->end = zone_end;
355 
356 		/* More merging may be possible */
357 		cur = ext;
358 		list_for_each_entry_safe_continue(cur, aux, list, hook) {
359 			if (zone_end < cur->start)
360 				break;
361 			if (zone_end < cur->end)
362 				ext->end = cur->end;
363 			list_del(&cur->hook);
364 			kfree(cur);
365 		}
366 	}
367 
368 	return 0;
369 }
370 
371 /**
372   *	memory_bm_create - allocate memory for a memory bitmap
373   */
374 static int
375 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
376 {
377 	struct chain_allocator ca;
378 	struct list_head mem_extents;
379 	struct mem_extent *ext;
380 	int error;
381 
382 	chain_init(&ca, gfp_mask, safe_needed);
383 	INIT_LIST_HEAD(&bm->blocks);
384 
385 	error = create_mem_extents(&mem_extents, gfp_mask);
386 	if (error)
387 		return error;
388 
389 	list_for_each_entry(ext, &mem_extents, hook) {
390 		struct bm_block *bb;
391 		unsigned long pfn = ext->start;
392 		unsigned long pages = ext->end - ext->start;
393 
394 		bb = list_entry(bm->blocks.prev, struct bm_block, hook);
395 
396 		error = create_bm_block_list(pages, bm->blocks.prev, &ca);
397 		if (error)
398 			goto Error;
399 
400 		list_for_each_entry_continue(bb, &bm->blocks, hook) {
401 			bb->data = get_image_page(gfp_mask, safe_needed);
402 			if (!bb->data) {
403 				error = -ENOMEM;
404 				goto Error;
405 			}
406 
407 			bb->start_pfn = pfn;
408 			if (pages >= BM_BITS_PER_BLOCK) {
409 				pfn += BM_BITS_PER_BLOCK;
410 				pages -= BM_BITS_PER_BLOCK;
411 			} else {
412 				/* This is executed only once in the loop */
413 				pfn += pages;
414 			}
415 			bb->end_pfn = pfn;
416 		}
417 	}
418 
419 	bm->p_list = ca.chain;
420 	memory_bm_position_reset(bm);
421  Exit:
422 	free_mem_extents(&mem_extents);
423 	return error;
424 
425  Error:
426 	bm->p_list = ca.chain;
427 	memory_bm_free(bm, PG_UNSAFE_CLEAR);
428 	goto Exit;
429 }
430 
431 /**
432   *	memory_bm_free - free memory occupied by the memory bitmap @bm
433   */
434 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
435 {
436 	struct bm_block *bb;
437 
438 	list_for_each_entry(bb, &bm->blocks, hook)
439 		if (bb->data)
440 			free_image_page(bb->data, clear_nosave_free);
441 
442 	free_list_of_pages(bm->p_list, clear_nosave_free);
443 
444 	INIT_LIST_HEAD(&bm->blocks);
445 }
446 
447 /**
448  *	memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
449  *	to given pfn.  The cur_zone_bm member of @bm and the cur_block member
450  *	of @bm->cur_zone_bm are updated.
451  */
452 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
453 				void **addr, unsigned int *bit_nr)
454 {
455 	struct bm_block *bb;
456 
457 	/*
458 	 * Check if the pfn corresponds to the current bitmap block and find
459 	 * the block where it fits if this is not the case.
460 	 */
461 	bb = bm->cur.block;
462 	if (pfn < bb->start_pfn)
463 		list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
464 			if (pfn >= bb->start_pfn)
465 				break;
466 
467 	if (pfn >= bb->end_pfn)
468 		list_for_each_entry_continue(bb, &bm->blocks, hook)
469 			if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
470 				break;
471 
472 	if (&bb->hook == &bm->blocks)
473 		return -EFAULT;
474 
475 	/* The block has been found */
476 	bm->cur.block = bb;
477 	pfn -= bb->start_pfn;
478 	bm->cur.bit = pfn + 1;
479 	*bit_nr = pfn;
480 	*addr = bb->data;
481 	return 0;
482 }
483 
484 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
485 {
486 	void *addr;
487 	unsigned int bit;
488 	int error;
489 
490 	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
491 	BUG_ON(error);
492 	set_bit(bit, addr);
493 }
494 
495 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
496 {
497 	void *addr;
498 	unsigned int bit;
499 	int error;
500 
501 	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
502 	if (!error)
503 		set_bit(bit, addr);
504 	return error;
505 }
506 
507 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
508 {
509 	void *addr;
510 	unsigned int bit;
511 	int error;
512 
513 	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
514 	BUG_ON(error);
515 	clear_bit(bit, addr);
516 }
517 
518 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
519 {
520 	void *addr;
521 	unsigned int bit;
522 	int error;
523 
524 	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
525 	BUG_ON(error);
526 	return test_bit(bit, addr);
527 }
528 
529 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
530 {
531 	void *addr;
532 	unsigned int bit;
533 
534 	return !memory_bm_find_bit(bm, pfn, &addr, &bit);
535 }
536 
537 /**
538  *	memory_bm_next_pfn - find the pfn that corresponds to the next set bit
539  *	in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
540  *	returned.
541  *
542  *	It is required to run memory_bm_position_reset() before the first call to
543  *	this function.
544  */
545 
546 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
547 {
548 	struct bm_block *bb;
549 	int bit;
550 
551 	bb = bm->cur.block;
552 	do {
553 		bit = bm->cur.bit;
554 		bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
555 		if (bit < bm_block_bits(bb))
556 			goto Return_pfn;
557 
558 		bb = list_entry(bb->hook.next, struct bm_block, hook);
559 		bm->cur.block = bb;
560 		bm->cur.bit = 0;
561 	} while (&bb->hook != &bm->blocks);
562 
563 	memory_bm_position_reset(bm);
564 	return BM_END_OF_MAP;
565 
566  Return_pfn:
567 	bm->cur.bit = bit + 1;
568 	return bb->start_pfn + bit;
569 }
570 
571 /**
572  *	This structure represents a range of page frames the contents of which
573  *	should not be saved during the suspend.
574  */
575 
576 struct nosave_region {
577 	struct list_head list;
578 	unsigned long start_pfn;
579 	unsigned long end_pfn;
580 };
581 
582 static LIST_HEAD(nosave_regions);
583 
584 /**
585  *	register_nosave_region - register a range of page frames the contents
586  *	of which should not be saved during the suspend (to be used in the early
587  *	initialization code)
588  */
589 
590 void __init
591 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
592 			 int use_kmalloc)
593 {
594 	struct nosave_region *region;
595 
596 	if (start_pfn >= end_pfn)
597 		return;
598 
599 	if (!list_empty(&nosave_regions)) {
600 		/* Try to extend the previous region (they should be sorted) */
601 		region = list_entry(nosave_regions.prev,
602 					struct nosave_region, list);
603 		if (region->end_pfn == start_pfn) {
604 			region->end_pfn = end_pfn;
605 			goto Report;
606 		}
607 	}
608 	if (use_kmalloc) {
609 		/* during init, this shouldn't fail */
610 		region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
611 		BUG_ON(!region);
612 	} else
613 		/* This allocation cannot fail */
614 		region = alloc_bootmem_low(sizeof(struct nosave_region));
615 	region->start_pfn = start_pfn;
616 	region->end_pfn = end_pfn;
617 	list_add_tail(&region->list, &nosave_regions);
618  Report:
619 	printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
620 		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
621 }
622 
623 /*
624  * Set bits in this map correspond to the page frames the contents of which
625  * should not be saved during the suspend.
626  */
627 static struct memory_bitmap *forbidden_pages_map;
628 
629 /* Set bits in this map correspond to free page frames. */
630 static struct memory_bitmap *free_pages_map;
631 
632 /*
633  * Each page frame allocated for creating the image is marked by setting the
634  * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
635  */
636 
637 void swsusp_set_page_free(struct page *page)
638 {
639 	if (free_pages_map)
640 		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
641 }
642 
643 static int swsusp_page_is_free(struct page *page)
644 {
645 	return free_pages_map ?
646 		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
647 }
648 
649 void swsusp_unset_page_free(struct page *page)
650 {
651 	if (free_pages_map)
652 		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
653 }
654 
655 static void swsusp_set_page_forbidden(struct page *page)
656 {
657 	if (forbidden_pages_map)
658 		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
659 }
660 
661 int swsusp_page_is_forbidden(struct page *page)
662 {
663 	return forbidden_pages_map ?
664 		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
665 }
666 
667 static void swsusp_unset_page_forbidden(struct page *page)
668 {
669 	if (forbidden_pages_map)
670 		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
671 }
672 
673 /**
674  *	mark_nosave_pages - set bits corresponding to the page frames the
675  *	contents of which should not be saved in a given bitmap.
676  */
677 
678 static void mark_nosave_pages(struct memory_bitmap *bm)
679 {
680 	struct nosave_region *region;
681 
682 	if (list_empty(&nosave_regions))
683 		return;
684 
685 	list_for_each_entry(region, &nosave_regions, list) {
686 		unsigned long pfn;
687 
688 		pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
689 				region->start_pfn << PAGE_SHIFT,
690 				region->end_pfn << PAGE_SHIFT);
691 
692 		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
693 			if (pfn_valid(pfn)) {
694 				/*
695 				 * It is safe to ignore the result of
696 				 * mem_bm_set_bit_check() here, since we won't
697 				 * touch the PFNs for which the error is
698 				 * returned anyway.
699 				 */
700 				mem_bm_set_bit_check(bm, pfn);
701 			}
702 	}
703 }
704 
705 /**
706  *	create_basic_memory_bitmaps - create bitmaps needed for marking page
707  *	frames that should not be saved and free page frames.  The pointers
708  *	forbidden_pages_map and free_pages_map are only modified if everything
709  *	goes well, because we don't want the bits to be used before both bitmaps
710  *	are set up.
711  */
712 
713 int create_basic_memory_bitmaps(void)
714 {
715 	struct memory_bitmap *bm1, *bm2;
716 	int error = 0;
717 
718 	BUG_ON(forbidden_pages_map || free_pages_map);
719 
720 	bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
721 	if (!bm1)
722 		return -ENOMEM;
723 
724 	error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
725 	if (error)
726 		goto Free_first_object;
727 
728 	bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
729 	if (!bm2)
730 		goto Free_first_bitmap;
731 
732 	error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
733 	if (error)
734 		goto Free_second_object;
735 
736 	forbidden_pages_map = bm1;
737 	free_pages_map = bm2;
738 	mark_nosave_pages(forbidden_pages_map);
739 
740 	pr_debug("PM: Basic memory bitmaps created\n");
741 
742 	return 0;
743 
744  Free_second_object:
745 	kfree(bm2);
746  Free_first_bitmap:
747  	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
748  Free_first_object:
749 	kfree(bm1);
750 	return -ENOMEM;
751 }
752 
753 /**
754  *	free_basic_memory_bitmaps - free memory bitmaps allocated by
755  *	create_basic_memory_bitmaps().  The auxiliary pointers are necessary
756  *	so that the bitmaps themselves are not referred to while they are being
757  *	freed.
758  */
759 
760 void free_basic_memory_bitmaps(void)
761 {
762 	struct memory_bitmap *bm1, *bm2;
763 
764 	BUG_ON(!(forbidden_pages_map && free_pages_map));
765 
766 	bm1 = forbidden_pages_map;
767 	bm2 = free_pages_map;
768 	forbidden_pages_map = NULL;
769 	free_pages_map = NULL;
770 	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
771 	kfree(bm1);
772 	memory_bm_free(bm2, PG_UNSAFE_CLEAR);
773 	kfree(bm2);
774 
775 	pr_debug("PM: Basic memory bitmaps freed\n");
776 }
777 
778 /**
779  *	snapshot_additional_pages - estimate the number of additional pages
780  *	be needed for setting up the suspend image data structures for given
781  *	zone (usually the returned value is greater than the exact number)
782  */
783 
784 unsigned int snapshot_additional_pages(struct zone *zone)
785 {
786 	unsigned int res;
787 
788 	res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
789 	res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
790 	return 2 * res;
791 }
792 
793 #ifdef CONFIG_HIGHMEM
794 /**
795  *	count_free_highmem_pages - compute the total number of free highmem
796  *	pages, system-wide.
797  */
798 
799 static unsigned int count_free_highmem_pages(void)
800 {
801 	struct zone *zone;
802 	unsigned int cnt = 0;
803 
804 	for_each_populated_zone(zone)
805 		if (is_highmem(zone))
806 			cnt += zone_page_state(zone, NR_FREE_PAGES);
807 
808 	return cnt;
809 }
810 
811 /**
812  *	saveable_highmem_page - Determine whether a highmem page should be
813  *	included in the suspend image.
814  *
815  *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
816  *	and it isn't a part of a free chunk of pages.
817  */
818 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
819 {
820 	struct page *page;
821 
822 	if (!pfn_valid(pfn))
823 		return NULL;
824 
825 	page = pfn_to_page(pfn);
826 	if (page_zone(page) != zone)
827 		return NULL;
828 
829 	BUG_ON(!PageHighMem(page));
830 
831 	if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
832 	    PageReserved(page))
833 		return NULL;
834 
835 	return page;
836 }
837 
838 /**
839  *	count_highmem_pages - compute the total number of saveable highmem
840  *	pages.
841  */
842 
843 unsigned int count_highmem_pages(void)
844 {
845 	struct zone *zone;
846 	unsigned int n = 0;
847 
848 	for_each_zone(zone) {
849 		unsigned long pfn, max_zone_pfn;
850 
851 		if (!is_highmem(zone))
852 			continue;
853 
854 		mark_free_pages(zone);
855 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
856 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
857 			if (saveable_highmem_page(zone, pfn))
858 				n++;
859 	}
860 	return n;
861 }
862 #else
863 static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
864 {
865 	return NULL;
866 }
867 #endif /* CONFIG_HIGHMEM */
868 
869 /**
870  *	saveable_page - Determine whether a non-highmem page should be included
871  *	in the suspend image.
872  *
873  *	We should save the page if it isn't Nosave, and is not in the range
874  *	of pages statically defined as 'unsaveable', and it isn't a part of
875  *	a free chunk of pages.
876  */
877 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
878 {
879 	struct page *page;
880 
881 	if (!pfn_valid(pfn))
882 		return NULL;
883 
884 	page = pfn_to_page(pfn);
885 	if (page_zone(page) != zone)
886 		return NULL;
887 
888 	BUG_ON(PageHighMem(page));
889 
890 	if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
891 		return NULL;
892 
893 	if (PageReserved(page)
894 	    && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
895 		return NULL;
896 
897 	return page;
898 }
899 
900 /**
901  *	count_data_pages - compute the total number of saveable non-highmem
902  *	pages.
903  */
904 
905 unsigned int count_data_pages(void)
906 {
907 	struct zone *zone;
908 	unsigned long pfn, max_zone_pfn;
909 	unsigned int n = 0;
910 
911 	for_each_zone(zone) {
912 		if (is_highmem(zone))
913 			continue;
914 
915 		mark_free_pages(zone);
916 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
917 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
918 			if (saveable_page(zone, pfn))
919 				n++;
920 	}
921 	return n;
922 }
923 
924 /* This is needed, because copy_page and memcpy are not usable for copying
925  * task structs.
926  */
927 static inline void do_copy_page(long *dst, long *src)
928 {
929 	int n;
930 
931 	for (n = PAGE_SIZE / sizeof(long); n; n--)
932 		*dst++ = *src++;
933 }
934 
935 
936 /**
937  *	safe_copy_page - check if the page we are going to copy is marked as
938  *		present in the kernel page tables (this always is the case if
939  *		CONFIG_DEBUG_PAGEALLOC is not set and in that case
940  *		kernel_page_present() always returns 'true').
941  */
942 static void safe_copy_page(void *dst, struct page *s_page)
943 {
944 	if (kernel_page_present(s_page)) {
945 		do_copy_page(dst, page_address(s_page));
946 	} else {
947 		kernel_map_pages(s_page, 1, 1);
948 		do_copy_page(dst, page_address(s_page));
949 		kernel_map_pages(s_page, 1, 0);
950 	}
951 }
952 
953 
954 #ifdef CONFIG_HIGHMEM
955 static inline struct page *
956 page_is_saveable(struct zone *zone, unsigned long pfn)
957 {
958 	return is_highmem(zone) ?
959 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
960 }
961 
962 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
963 {
964 	struct page *s_page, *d_page;
965 	void *src, *dst;
966 
967 	s_page = pfn_to_page(src_pfn);
968 	d_page = pfn_to_page(dst_pfn);
969 	if (PageHighMem(s_page)) {
970 		src = kmap_atomic(s_page, KM_USER0);
971 		dst = kmap_atomic(d_page, KM_USER1);
972 		do_copy_page(dst, src);
973 		kunmap_atomic(src, KM_USER0);
974 		kunmap_atomic(dst, KM_USER1);
975 	} else {
976 		if (PageHighMem(d_page)) {
977 			/* Page pointed to by src may contain some kernel
978 			 * data modified by kmap_atomic()
979 			 */
980 			safe_copy_page(buffer, s_page);
981 			dst = kmap_atomic(d_page, KM_USER0);
982 			memcpy(dst, buffer, PAGE_SIZE);
983 			kunmap_atomic(dst, KM_USER0);
984 		} else {
985 			safe_copy_page(page_address(d_page), s_page);
986 		}
987 	}
988 }
989 #else
990 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
991 
992 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
993 {
994 	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
995 				pfn_to_page(src_pfn));
996 }
997 #endif /* CONFIG_HIGHMEM */
998 
999 static void
1000 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1001 {
1002 	struct zone *zone;
1003 	unsigned long pfn;
1004 
1005 	for_each_zone(zone) {
1006 		unsigned long max_zone_pfn;
1007 
1008 		mark_free_pages(zone);
1009 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1010 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1011 			if (page_is_saveable(zone, pfn))
1012 				memory_bm_set_bit(orig_bm, pfn);
1013 	}
1014 	memory_bm_position_reset(orig_bm);
1015 	memory_bm_position_reset(copy_bm);
1016 	for(;;) {
1017 		pfn = memory_bm_next_pfn(orig_bm);
1018 		if (unlikely(pfn == BM_END_OF_MAP))
1019 			break;
1020 		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1021 	}
1022 }
1023 
1024 /* Total number of image pages */
1025 static unsigned int nr_copy_pages;
1026 /* Number of pages needed for saving the original pfns of the image pages */
1027 static unsigned int nr_meta_pages;
1028 
1029 /**
1030  *	swsusp_free - free pages allocated for the suspend.
1031  *
1032  *	Suspend pages are alocated before the atomic copy is made, so we
1033  *	need to release them after the resume.
1034  */
1035 
1036 void swsusp_free(void)
1037 {
1038 	struct zone *zone;
1039 	unsigned long pfn, max_zone_pfn;
1040 
1041 	for_each_zone(zone) {
1042 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1043 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1044 			if (pfn_valid(pfn)) {
1045 				struct page *page = pfn_to_page(pfn);
1046 
1047 				if (swsusp_page_is_forbidden(page) &&
1048 				    swsusp_page_is_free(page)) {
1049 					swsusp_unset_page_forbidden(page);
1050 					swsusp_unset_page_free(page);
1051 					__free_page(page);
1052 				}
1053 			}
1054 	}
1055 	nr_copy_pages = 0;
1056 	nr_meta_pages = 0;
1057 	restore_pblist = NULL;
1058 	buffer = NULL;
1059 }
1060 
1061 #ifdef CONFIG_HIGHMEM
1062 /**
1063   *	count_pages_for_highmem - compute the number of non-highmem pages
1064   *	that will be necessary for creating copies of highmem pages.
1065   */
1066 
1067 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1068 {
1069 	unsigned int free_highmem = count_free_highmem_pages();
1070 
1071 	if (free_highmem >= nr_highmem)
1072 		nr_highmem = 0;
1073 	else
1074 		nr_highmem -= free_highmem;
1075 
1076 	return nr_highmem;
1077 }
1078 #else
1079 static unsigned int
1080 count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1081 #endif /* CONFIG_HIGHMEM */
1082 
1083 /**
1084  *	enough_free_mem - Make sure we have enough free memory for the
1085  *	snapshot image.
1086  */
1087 
1088 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1089 {
1090 	struct zone *zone;
1091 	unsigned int free = 0, meta = 0;
1092 
1093 	for_each_zone(zone) {
1094 		meta += snapshot_additional_pages(zone);
1095 		if (!is_highmem(zone))
1096 			free += zone_page_state(zone, NR_FREE_PAGES);
1097 	}
1098 
1099 	nr_pages += count_pages_for_highmem(nr_highmem);
1100 	pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
1101 		nr_pages, PAGES_FOR_IO, meta, free);
1102 
1103 	return free > nr_pages + PAGES_FOR_IO + meta;
1104 }
1105 
1106 #ifdef CONFIG_HIGHMEM
1107 /**
1108  *	get_highmem_buffer - if there are some highmem pages in the suspend
1109  *	image, we may need the buffer to copy them and/or load their data.
1110  */
1111 
1112 static inline int get_highmem_buffer(int safe_needed)
1113 {
1114 	buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1115 	return buffer ? 0 : -ENOMEM;
1116 }
1117 
1118 /**
1119  *	alloc_highmem_image_pages - allocate some highmem pages for the image.
1120  *	Try to allocate as many pages as needed, but if the number of free
1121  *	highmem pages is lesser than that, allocate them all.
1122  */
1123 
1124 static inline unsigned int
1125 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1126 {
1127 	unsigned int to_alloc = count_free_highmem_pages();
1128 
1129 	if (to_alloc > nr_highmem)
1130 		to_alloc = nr_highmem;
1131 
1132 	nr_highmem -= to_alloc;
1133 	while (to_alloc-- > 0) {
1134 		struct page *page;
1135 
1136 		page = alloc_image_page(__GFP_HIGHMEM);
1137 		memory_bm_set_bit(bm, page_to_pfn(page));
1138 	}
1139 	return nr_highmem;
1140 }
1141 #else
1142 static inline int get_highmem_buffer(int safe_needed) { return 0; }
1143 
1144 static inline unsigned int
1145 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1146 #endif /* CONFIG_HIGHMEM */
1147 
1148 /**
1149  *	swsusp_alloc - allocate memory for the suspend image
1150  *
1151  *	We first try to allocate as many highmem pages as there are
1152  *	saveable highmem pages in the system.  If that fails, we allocate
1153  *	non-highmem pages for the copies of the remaining highmem ones.
1154  *
1155  *	In this approach it is likely that the copies of highmem pages will
1156  *	also be located in the high memory, because of the way in which
1157  *	copy_data_pages() works.
1158  */
1159 
1160 static int
1161 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1162 		unsigned int nr_pages, unsigned int nr_highmem)
1163 {
1164 	int error;
1165 
1166 	error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1167 	if (error)
1168 		goto Free;
1169 
1170 	error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1171 	if (error)
1172 		goto Free;
1173 
1174 	if (nr_highmem > 0) {
1175 		error = get_highmem_buffer(PG_ANY);
1176 		if (error)
1177 			goto Free;
1178 
1179 		nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
1180 	}
1181 	while (nr_pages-- > 0) {
1182 		struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1183 
1184 		if (!page)
1185 			goto Free;
1186 
1187 		memory_bm_set_bit(copy_bm, page_to_pfn(page));
1188 	}
1189 	return 0;
1190 
1191  Free:
1192 	swsusp_free();
1193 	return -ENOMEM;
1194 }
1195 
1196 /* Memory bitmap used for marking saveable pages (during suspend) or the
1197  * suspend image pages (during resume)
1198  */
1199 static struct memory_bitmap orig_bm;
1200 /* Memory bitmap used on suspend for marking allocated pages that will contain
1201  * the copies of saveable pages.  During resume it is initially used for
1202  * marking the suspend image pages, but then its set bits are duplicated in
1203  * @orig_bm and it is released.  Next, on systems with high memory, it may be
1204  * used for marking "safe" highmem pages, but it has to be reinitialized for
1205  * this purpose.
1206  */
1207 static struct memory_bitmap copy_bm;
1208 
1209 asmlinkage int swsusp_save(void)
1210 {
1211 	unsigned int nr_pages, nr_highmem;
1212 
1213 	printk(KERN_INFO "PM: Creating hibernation image: \n");
1214 
1215 	drain_local_pages(NULL);
1216 	nr_pages = count_data_pages();
1217 	nr_highmem = count_highmem_pages();
1218 	printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1219 
1220 	if (!enough_free_mem(nr_pages, nr_highmem)) {
1221 		printk(KERN_ERR "PM: Not enough free memory\n");
1222 		return -ENOMEM;
1223 	}
1224 
1225 	if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1226 		printk(KERN_ERR "PM: Memory allocation failed\n");
1227 		return -ENOMEM;
1228 	}
1229 
1230 	/* During allocating of suspend pagedir, new cold pages may appear.
1231 	 * Kill them.
1232 	 */
1233 	drain_local_pages(NULL);
1234 	copy_data_pages(&copy_bm, &orig_bm);
1235 
1236 	/*
1237 	 * End of critical section. From now on, we can write to memory,
1238 	 * but we should not touch disk. This specially means we must _not_
1239 	 * touch swap space! Except we must write out our image of course.
1240 	 */
1241 
1242 	nr_pages += nr_highmem;
1243 	nr_copy_pages = nr_pages;
1244 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1245 
1246 	printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1247 		nr_pages);
1248 
1249 	return 0;
1250 }
1251 
1252 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
1253 static int init_header_complete(struct swsusp_info *info)
1254 {
1255 	memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1256 	info->version_code = LINUX_VERSION_CODE;
1257 	return 0;
1258 }
1259 
1260 static char *check_image_kernel(struct swsusp_info *info)
1261 {
1262 	if (info->version_code != LINUX_VERSION_CODE)
1263 		return "kernel version";
1264 	if (strcmp(info->uts.sysname,init_utsname()->sysname))
1265 		return "system type";
1266 	if (strcmp(info->uts.release,init_utsname()->release))
1267 		return "kernel release";
1268 	if (strcmp(info->uts.version,init_utsname()->version))
1269 		return "version";
1270 	if (strcmp(info->uts.machine,init_utsname()->machine))
1271 		return "machine";
1272 	return NULL;
1273 }
1274 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1275 
1276 unsigned long snapshot_get_image_size(void)
1277 {
1278 	return nr_copy_pages + nr_meta_pages + 1;
1279 }
1280 
1281 static int init_header(struct swsusp_info *info)
1282 {
1283 	memset(info, 0, sizeof(struct swsusp_info));
1284 	info->num_physpages = num_physpages;
1285 	info->image_pages = nr_copy_pages;
1286 	info->pages = snapshot_get_image_size();
1287 	info->size = info->pages;
1288 	info->size <<= PAGE_SHIFT;
1289 	return init_header_complete(info);
1290 }
1291 
1292 /**
1293  *	pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1294  *	are stored in the array @buf[] (1 page at a time)
1295  */
1296 
1297 static inline void
1298 pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1299 {
1300 	int j;
1301 
1302 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1303 		buf[j] = memory_bm_next_pfn(bm);
1304 		if (unlikely(buf[j] == BM_END_OF_MAP))
1305 			break;
1306 	}
1307 }
1308 
1309 /**
1310  *	snapshot_read_next - used for reading the system memory snapshot.
1311  *
1312  *	On the first call to it @handle should point to a zeroed
1313  *	snapshot_handle structure.  The structure gets updated and a pointer
1314  *	to it should be passed to this function every next time.
1315  *
1316  *	The @count parameter should contain the number of bytes the caller
1317  *	wants to read from the snapshot.  It must not be zero.
1318  *
1319  *	On success the function returns a positive number.  Then, the caller
1320  *	is allowed to read up to the returned number of bytes from the memory
1321  *	location computed by the data_of() macro.  The number returned
1322  *	may be smaller than @count, but this only happens if the read would
1323  *	cross a page boundary otherwise.
1324  *
1325  *	The function returns 0 to indicate the end of data stream condition,
1326  *	and a negative number is returned on error.  In such cases the
1327  *	structure pointed to by @handle is not updated and should not be used
1328  *	any more.
1329  */
1330 
1331 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1332 {
1333 	if (handle->cur > nr_meta_pages + nr_copy_pages)
1334 		return 0;
1335 
1336 	if (!buffer) {
1337 		/* This makes the buffer be freed by swsusp_free() */
1338 		buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1339 		if (!buffer)
1340 			return -ENOMEM;
1341 	}
1342 	if (!handle->offset) {
1343 		int error;
1344 
1345 		error = init_header((struct swsusp_info *)buffer);
1346 		if (error)
1347 			return error;
1348 		handle->buffer = buffer;
1349 		memory_bm_position_reset(&orig_bm);
1350 		memory_bm_position_reset(&copy_bm);
1351 	}
1352 	if (handle->prev < handle->cur) {
1353 		if (handle->cur <= nr_meta_pages) {
1354 			memset(buffer, 0, PAGE_SIZE);
1355 			pack_pfns(buffer, &orig_bm);
1356 		} else {
1357 			struct page *page;
1358 
1359 			page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1360 			if (PageHighMem(page)) {
1361 				/* Highmem pages are copied to the buffer,
1362 				 * because we can't return with a kmapped
1363 				 * highmem page (we may not be called again).
1364 				 */
1365 				void *kaddr;
1366 
1367 				kaddr = kmap_atomic(page, KM_USER0);
1368 				memcpy(buffer, kaddr, PAGE_SIZE);
1369 				kunmap_atomic(kaddr, KM_USER0);
1370 				handle->buffer = buffer;
1371 			} else {
1372 				handle->buffer = page_address(page);
1373 			}
1374 		}
1375 		handle->prev = handle->cur;
1376 	}
1377 	handle->buf_offset = handle->cur_offset;
1378 	if (handle->cur_offset + count >= PAGE_SIZE) {
1379 		count = PAGE_SIZE - handle->cur_offset;
1380 		handle->cur_offset = 0;
1381 		handle->cur++;
1382 	} else {
1383 		handle->cur_offset += count;
1384 	}
1385 	handle->offset += count;
1386 	return count;
1387 }
1388 
1389 /**
1390  *	mark_unsafe_pages - mark the pages that cannot be used for storing
1391  *	the image during resume, because they conflict with the pages that
1392  *	had been used before suspend
1393  */
1394 
1395 static int mark_unsafe_pages(struct memory_bitmap *bm)
1396 {
1397 	struct zone *zone;
1398 	unsigned long pfn, max_zone_pfn;
1399 
1400 	/* Clear page flags */
1401 	for_each_zone(zone) {
1402 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1403 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1404 			if (pfn_valid(pfn))
1405 				swsusp_unset_page_free(pfn_to_page(pfn));
1406 	}
1407 
1408 	/* Mark pages that correspond to the "original" pfns as "unsafe" */
1409 	memory_bm_position_reset(bm);
1410 	do {
1411 		pfn = memory_bm_next_pfn(bm);
1412 		if (likely(pfn != BM_END_OF_MAP)) {
1413 			if (likely(pfn_valid(pfn)))
1414 				swsusp_set_page_free(pfn_to_page(pfn));
1415 			else
1416 				return -EFAULT;
1417 		}
1418 	} while (pfn != BM_END_OF_MAP);
1419 
1420 	allocated_unsafe_pages = 0;
1421 
1422 	return 0;
1423 }
1424 
1425 static void
1426 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1427 {
1428 	unsigned long pfn;
1429 
1430 	memory_bm_position_reset(src);
1431 	pfn = memory_bm_next_pfn(src);
1432 	while (pfn != BM_END_OF_MAP) {
1433 		memory_bm_set_bit(dst, pfn);
1434 		pfn = memory_bm_next_pfn(src);
1435 	}
1436 }
1437 
1438 static int check_header(struct swsusp_info *info)
1439 {
1440 	char *reason;
1441 
1442 	reason = check_image_kernel(info);
1443 	if (!reason && info->num_physpages != num_physpages)
1444 		reason = "memory size";
1445 	if (reason) {
1446 		printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1447 		return -EPERM;
1448 	}
1449 	return 0;
1450 }
1451 
1452 /**
1453  *	load header - check the image header and copy data from it
1454  */
1455 
1456 static int
1457 load_header(struct swsusp_info *info)
1458 {
1459 	int error;
1460 
1461 	restore_pblist = NULL;
1462 	error = check_header(info);
1463 	if (!error) {
1464 		nr_copy_pages = info->image_pages;
1465 		nr_meta_pages = info->pages - info->image_pages - 1;
1466 	}
1467 	return error;
1468 }
1469 
1470 /**
1471  *	unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1472  *	the corresponding bit in the memory bitmap @bm
1473  */
1474 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1475 {
1476 	int j;
1477 
1478 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1479 		if (unlikely(buf[j] == BM_END_OF_MAP))
1480 			break;
1481 
1482 		if (memory_bm_pfn_present(bm, buf[j]))
1483 			memory_bm_set_bit(bm, buf[j]);
1484 		else
1485 			return -EFAULT;
1486 	}
1487 
1488 	return 0;
1489 }
1490 
1491 /* List of "safe" pages that may be used to store data loaded from the suspend
1492  * image
1493  */
1494 static struct linked_page *safe_pages_list;
1495 
1496 #ifdef CONFIG_HIGHMEM
1497 /* struct highmem_pbe is used for creating the list of highmem pages that
1498  * should be restored atomically during the resume from disk, because the page
1499  * frames they have occupied before the suspend are in use.
1500  */
1501 struct highmem_pbe {
1502 	struct page *copy_page;	/* data is here now */
1503 	struct page *orig_page;	/* data was here before the suspend */
1504 	struct highmem_pbe *next;
1505 };
1506 
1507 /* List of highmem PBEs needed for restoring the highmem pages that were
1508  * allocated before the suspend and included in the suspend image, but have
1509  * also been allocated by the "resume" kernel, so their contents cannot be
1510  * written directly to their "original" page frames.
1511  */
1512 static struct highmem_pbe *highmem_pblist;
1513 
1514 /**
1515  *	count_highmem_image_pages - compute the number of highmem pages in the
1516  *	suspend image.  The bits in the memory bitmap @bm that correspond to the
1517  *	image pages are assumed to be set.
1518  */
1519 
1520 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1521 {
1522 	unsigned long pfn;
1523 	unsigned int cnt = 0;
1524 
1525 	memory_bm_position_reset(bm);
1526 	pfn = memory_bm_next_pfn(bm);
1527 	while (pfn != BM_END_OF_MAP) {
1528 		if (PageHighMem(pfn_to_page(pfn)))
1529 			cnt++;
1530 
1531 		pfn = memory_bm_next_pfn(bm);
1532 	}
1533 	return cnt;
1534 }
1535 
1536 /**
1537  *	prepare_highmem_image - try to allocate as many highmem pages as
1538  *	there are highmem image pages (@nr_highmem_p points to the variable
1539  *	containing the number of highmem image pages).  The pages that are
1540  *	"safe" (ie. will not be overwritten when the suspend image is
1541  *	restored) have the corresponding bits set in @bm (it must be
1542  *	unitialized).
1543  *
1544  *	NOTE: This function should not be called if there are no highmem
1545  *	image pages.
1546  */
1547 
1548 static unsigned int safe_highmem_pages;
1549 
1550 static struct memory_bitmap *safe_highmem_bm;
1551 
1552 static int
1553 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1554 {
1555 	unsigned int to_alloc;
1556 
1557 	if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1558 		return -ENOMEM;
1559 
1560 	if (get_highmem_buffer(PG_SAFE))
1561 		return -ENOMEM;
1562 
1563 	to_alloc = count_free_highmem_pages();
1564 	if (to_alloc > *nr_highmem_p)
1565 		to_alloc = *nr_highmem_p;
1566 	else
1567 		*nr_highmem_p = to_alloc;
1568 
1569 	safe_highmem_pages = 0;
1570 	while (to_alloc-- > 0) {
1571 		struct page *page;
1572 
1573 		page = alloc_page(__GFP_HIGHMEM);
1574 		if (!swsusp_page_is_free(page)) {
1575 			/* The page is "safe", set its bit the bitmap */
1576 			memory_bm_set_bit(bm, page_to_pfn(page));
1577 			safe_highmem_pages++;
1578 		}
1579 		/* Mark the page as allocated */
1580 		swsusp_set_page_forbidden(page);
1581 		swsusp_set_page_free(page);
1582 	}
1583 	memory_bm_position_reset(bm);
1584 	safe_highmem_bm = bm;
1585 	return 0;
1586 }
1587 
1588 /**
1589  *	get_highmem_page_buffer - for given highmem image page find the buffer
1590  *	that suspend_write_next() should set for its caller to write to.
1591  *
1592  *	If the page is to be saved to its "original" page frame or a copy of
1593  *	the page is to be made in the highmem, @buffer is returned.  Otherwise,
1594  *	the copy of the page is to be made in normal memory, so the address of
1595  *	the copy is returned.
1596  *
1597  *	If @buffer is returned, the caller of suspend_write_next() will write
1598  *	the page's contents to @buffer, so they will have to be copied to the
1599  *	right location on the next call to suspend_write_next() and it is done
1600  *	with the help of copy_last_highmem_page().  For this purpose, if
1601  *	@buffer is returned, @last_highmem page is set to the page to which
1602  *	the data will have to be copied from @buffer.
1603  */
1604 
1605 static struct page *last_highmem_page;
1606 
1607 static void *
1608 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1609 {
1610 	struct highmem_pbe *pbe;
1611 	void *kaddr;
1612 
1613 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1614 		/* We have allocated the "original" page frame and we can
1615 		 * use it directly to store the loaded page.
1616 		 */
1617 		last_highmem_page = page;
1618 		return buffer;
1619 	}
1620 	/* The "original" page frame has not been allocated and we have to
1621 	 * use a "safe" page frame to store the loaded page.
1622 	 */
1623 	pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1624 	if (!pbe) {
1625 		swsusp_free();
1626 		return ERR_PTR(-ENOMEM);
1627 	}
1628 	pbe->orig_page = page;
1629 	if (safe_highmem_pages > 0) {
1630 		struct page *tmp;
1631 
1632 		/* Copy of the page will be stored in high memory */
1633 		kaddr = buffer;
1634 		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1635 		safe_highmem_pages--;
1636 		last_highmem_page = tmp;
1637 		pbe->copy_page = tmp;
1638 	} else {
1639 		/* Copy of the page will be stored in normal memory */
1640 		kaddr = safe_pages_list;
1641 		safe_pages_list = safe_pages_list->next;
1642 		pbe->copy_page = virt_to_page(kaddr);
1643 	}
1644 	pbe->next = highmem_pblist;
1645 	highmem_pblist = pbe;
1646 	return kaddr;
1647 }
1648 
1649 /**
1650  *	copy_last_highmem_page - copy the contents of a highmem image from
1651  *	@buffer, where the caller of snapshot_write_next() has place them,
1652  *	to the right location represented by @last_highmem_page .
1653  */
1654 
1655 static void copy_last_highmem_page(void)
1656 {
1657 	if (last_highmem_page) {
1658 		void *dst;
1659 
1660 		dst = kmap_atomic(last_highmem_page, KM_USER0);
1661 		memcpy(dst, buffer, PAGE_SIZE);
1662 		kunmap_atomic(dst, KM_USER0);
1663 		last_highmem_page = NULL;
1664 	}
1665 }
1666 
1667 static inline int last_highmem_page_copied(void)
1668 {
1669 	return !last_highmem_page;
1670 }
1671 
1672 static inline void free_highmem_data(void)
1673 {
1674 	if (safe_highmem_bm)
1675 		memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1676 
1677 	if (buffer)
1678 		free_image_page(buffer, PG_UNSAFE_CLEAR);
1679 }
1680 #else
1681 static inline int get_safe_write_buffer(void) { return 0; }
1682 
1683 static unsigned int
1684 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1685 
1686 static inline int
1687 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1688 {
1689 	return 0;
1690 }
1691 
1692 static inline void *
1693 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1694 {
1695 	return ERR_PTR(-EINVAL);
1696 }
1697 
1698 static inline void copy_last_highmem_page(void) {}
1699 static inline int last_highmem_page_copied(void) { return 1; }
1700 static inline void free_highmem_data(void) {}
1701 #endif /* CONFIG_HIGHMEM */
1702 
1703 /**
1704  *	prepare_image - use the memory bitmap @bm to mark the pages that will
1705  *	be overwritten in the process of restoring the system memory state
1706  *	from the suspend image ("unsafe" pages) and allocate memory for the
1707  *	image.
1708  *
1709  *	The idea is to allocate a new memory bitmap first and then allocate
1710  *	as many pages as needed for the image data, but not to assign these
1711  *	pages to specific tasks initially.  Instead, we just mark them as
1712  *	allocated and create a lists of "safe" pages that will be used
1713  *	later.  On systems with high memory a list of "safe" highmem pages is
1714  *	also created.
1715  */
1716 
1717 #define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
1718 
1719 static int
1720 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1721 {
1722 	unsigned int nr_pages, nr_highmem;
1723 	struct linked_page *sp_list, *lp;
1724 	int error;
1725 
1726 	/* If there is no highmem, the buffer will not be necessary */
1727 	free_image_page(buffer, PG_UNSAFE_CLEAR);
1728 	buffer = NULL;
1729 
1730 	nr_highmem = count_highmem_image_pages(bm);
1731 	error = mark_unsafe_pages(bm);
1732 	if (error)
1733 		goto Free;
1734 
1735 	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
1736 	if (error)
1737 		goto Free;
1738 
1739 	duplicate_memory_bitmap(new_bm, bm);
1740 	memory_bm_free(bm, PG_UNSAFE_KEEP);
1741 	if (nr_highmem > 0) {
1742 		error = prepare_highmem_image(bm, &nr_highmem);
1743 		if (error)
1744 			goto Free;
1745 	}
1746 	/* Reserve some safe pages for potential later use.
1747 	 *
1748 	 * NOTE: This way we make sure there will be enough safe pages for the
1749 	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
1750 	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
1751 	 */
1752 	sp_list = NULL;
1753 	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1754 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1755 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1756 	while (nr_pages > 0) {
1757 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
1758 		if (!lp) {
1759 			error = -ENOMEM;
1760 			goto Free;
1761 		}
1762 		lp->next = sp_list;
1763 		sp_list = lp;
1764 		nr_pages--;
1765 	}
1766 	/* Preallocate memory for the image */
1767 	safe_pages_list = NULL;
1768 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1769 	while (nr_pages > 0) {
1770 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
1771 		if (!lp) {
1772 			error = -ENOMEM;
1773 			goto Free;
1774 		}
1775 		if (!swsusp_page_is_free(virt_to_page(lp))) {
1776 			/* The page is "safe", add it to the list */
1777 			lp->next = safe_pages_list;
1778 			safe_pages_list = lp;
1779 		}
1780 		/* Mark the page as allocated */
1781 		swsusp_set_page_forbidden(virt_to_page(lp));
1782 		swsusp_set_page_free(virt_to_page(lp));
1783 		nr_pages--;
1784 	}
1785 	/* Free the reserved safe pages so that chain_alloc() can use them */
1786 	while (sp_list) {
1787 		lp = sp_list->next;
1788 		free_image_page(sp_list, PG_UNSAFE_CLEAR);
1789 		sp_list = lp;
1790 	}
1791 	return 0;
1792 
1793  Free:
1794 	swsusp_free();
1795 	return error;
1796 }
1797 
1798 /**
1799  *	get_buffer - compute the address that snapshot_write_next() should
1800  *	set for its caller to write to.
1801  */
1802 
1803 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1804 {
1805 	struct pbe *pbe;
1806 	struct page *page;
1807 	unsigned long pfn = memory_bm_next_pfn(bm);
1808 
1809 	if (pfn == BM_END_OF_MAP)
1810 		return ERR_PTR(-EFAULT);
1811 
1812 	page = pfn_to_page(pfn);
1813 	if (PageHighMem(page))
1814 		return get_highmem_page_buffer(page, ca);
1815 
1816 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
1817 		/* We have allocated the "original" page frame and we can
1818 		 * use it directly to store the loaded page.
1819 		 */
1820 		return page_address(page);
1821 
1822 	/* The "original" page frame has not been allocated and we have to
1823 	 * use a "safe" page frame to store the loaded page.
1824 	 */
1825 	pbe = chain_alloc(ca, sizeof(struct pbe));
1826 	if (!pbe) {
1827 		swsusp_free();
1828 		return ERR_PTR(-ENOMEM);
1829 	}
1830 	pbe->orig_address = page_address(page);
1831 	pbe->address = safe_pages_list;
1832 	safe_pages_list = safe_pages_list->next;
1833 	pbe->next = restore_pblist;
1834 	restore_pblist = pbe;
1835 	return pbe->address;
1836 }
1837 
1838 /**
1839  *	snapshot_write_next - used for writing the system memory snapshot.
1840  *
1841  *	On the first call to it @handle should point to a zeroed
1842  *	snapshot_handle structure.  The structure gets updated and a pointer
1843  *	to it should be passed to this function every next time.
1844  *
1845  *	The @count parameter should contain the number of bytes the caller
1846  *	wants to write to the image.  It must not be zero.
1847  *
1848  *	On success the function returns a positive number.  Then, the caller
1849  *	is allowed to write up to the returned number of bytes to the memory
1850  *	location computed by the data_of() macro.  The number returned
1851  *	may be smaller than @count, but this only happens if the write would
1852  *	cross a page boundary otherwise.
1853  *
1854  *	The function returns 0 to indicate the "end of file" condition,
1855  *	and a negative number is returned on error.  In such cases the
1856  *	structure pointed to by @handle is not updated and should not be used
1857  *	any more.
1858  */
1859 
1860 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1861 {
1862 	static struct chain_allocator ca;
1863 	int error = 0;
1864 
1865 	/* Check if we have already loaded the entire image */
1866 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1867 		return 0;
1868 
1869 	if (handle->offset == 0) {
1870 		if (!buffer)
1871 			/* This makes the buffer be freed by swsusp_free() */
1872 			buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1873 
1874 		if (!buffer)
1875 			return -ENOMEM;
1876 
1877 		handle->buffer = buffer;
1878 	}
1879 	handle->sync_read = 1;
1880 	if (handle->prev < handle->cur) {
1881 		if (handle->prev == 0) {
1882 			error = load_header(buffer);
1883 			if (error)
1884 				return error;
1885 
1886 			error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
1887 			if (error)
1888 				return error;
1889 
1890 		} else if (handle->prev <= nr_meta_pages) {
1891 			error = unpack_orig_pfns(buffer, &copy_bm);
1892 			if (error)
1893 				return error;
1894 
1895 			if (handle->prev == nr_meta_pages) {
1896 				error = prepare_image(&orig_bm, &copy_bm);
1897 				if (error)
1898 					return error;
1899 
1900 				chain_init(&ca, GFP_ATOMIC, PG_SAFE);
1901 				memory_bm_position_reset(&orig_bm);
1902 				restore_pblist = NULL;
1903 				handle->buffer = get_buffer(&orig_bm, &ca);
1904 				handle->sync_read = 0;
1905 				if (IS_ERR(handle->buffer))
1906 					return PTR_ERR(handle->buffer);
1907 			}
1908 		} else {
1909 			copy_last_highmem_page();
1910 			handle->buffer = get_buffer(&orig_bm, &ca);
1911 			if (IS_ERR(handle->buffer))
1912 				return PTR_ERR(handle->buffer);
1913 			if (handle->buffer != buffer)
1914 				handle->sync_read = 0;
1915 		}
1916 		handle->prev = handle->cur;
1917 	}
1918 	handle->buf_offset = handle->cur_offset;
1919 	if (handle->cur_offset + count >= PAGE_SIZE) {
1920 		count = PAGE_SIZE - handle->cur_offset;
1921 		handle->cur_offset = 0;
1922 		handle->cur++;
1923 	} else {
1924 		handle->cur_offset += count;
1925 	}
1926 	handle->offset += count;
1927 	return count;
1928 }
1929 
1930 /**
1931  *	snapshot_write_finalize - must be called after the last call to
1932  *	snapshot_write_next() in case the last page in the image happens
1933  *	to be a highmem page and its contents should be stored in the
1934  *	highmem.  Additionally, it releases the memory that will not be
1935  *	used any more.
1936  */
1937 
1938 void snapshot_write_finalize(struct snapshot_handle *handle)
1939 {
1940 	copy_last_highmem_page();
1941 	/* Free only if we have loaded the image entirely */
1942 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
1943 		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
1944 		free_highmem_data();
1945 	}
1946 }
1947 
1948 int snapshot_image_loaded(struct snapshot_handle *handle)
1949 {
1950 	return !(!nr_copy_pages || !last_highmem_page_copied() ||
1951 			handle->cur <= nr_meta_pages + nr_copy_pages);
1952 }
1953 
1954 #ifdef CONFIG_HIGHMEM
1955 /* Assumes that @buf is ready and points to a "safe" page */
1956 static inline void
1957 swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
1958 {
1959 	void *kaddr1, *kaddr2;
1960 
1961 	kaddr1 = kmap_atomic(p1, KM_USER0);
1962 	kaddr2 = kmap_atomic(p2, KM_USER1);
1963 	memcpy(buf, kaddr1, PAGE_SIZE);
1964 	memcpy(kaddr1, kaddr2, PAGE_SIZE);
1965 	memcpy(kaddr2, buf, PAGE_SIZE);
1966 	kunmap_atomic(kaddr1, KM_USER0);
1967 	kunmap_atomic(kaddr2, KM_USER1);
1968 }
1969 
1970 /**
1971  *	restore_highmem - for each highmem page that was allocated before
1972  *	the suspend and included in the suspend image, and also has been
1973  *	allocated by the "resume" kernel swap its current (ie. "before
1974  *	resume") contents with the previous (ie. "before suspend") one.
1975  *
1976  *	If the resume eventually fails, we can call this function once
1977  *	again and restore the "before resume" highmem state.
1978  */
1979 
1980 int restore_highmem(void)
1981 {
1982 	struct highmem_pbe *pbe = highmem_pblist;
1983 	void *buf;
1984 
1985 	if (!pbe)
1986 		return 0;
1987 
1988 	buf = get_image_page(GFP_ATOMIC, PG_SAFE);
1989 	if (!buf)
1990 		return -ENOMEM;
1991 
1992 	while (pbe) {
1993 		swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
1994 		pbe = pbe->next;
1995 	}
1996 	free_image_page(buf, PG_UNSAFE_CLEAR);
1997 	return 0;
1998 }
1999 #endif /* CONFIG_HIGHMEM */
2000