xref: /openbmc/linux/kernel/power/swap.c (revision a09d2831)
1 /*
2  * linux/kernel/power/swap.c
3  *
4  * This file provides functions for reading the suspend image from
5  * and writing it to a swap partition.
6  *
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9  *
10  * This file is released under the GPLv2.
11  *
12  */
13 
14 #include <linux/module.h>
15 #include <linux/file.h>
16 #include <linux/delay.h>
17 #include <linux/bitops.h>
18 #include <linux/genhd.h>
19 #include <linux/device.h>
20 #include <linux/buffer_head.h>
21 #include <linux/bio.h>
22 #include <linux/blkdev.h>
23 #include <linux/swap.h>
24 #include <linux/swapops.h>
25 #include <linux/pm.h>
26 
27 #include "power.h"
28 
29 #define SWSUSP_SIG	"S1SUSPEND"
30 
31 struct swsusp_header {
32 	char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
33 	sector_t image;
34 	unsigned int flags;	/* Flags to pass to the "boot" kernel */
35 	char	orig_sig[10];
36 	char	sig[10];
37 } __attribute__((packed));
38 
39 static struct swsusp_header *swsusp_header;
40 
41 /**
42  *	The following functions are used for tracing the allocated
43  *	swap pages, so that they can be freed in case of an error.
44  */
45 
46 struct swsusp_extent {
47 	struct rb_node node;
48 	unsigned long start;
49 	unsigned long end;
50 };
51 
52 static struct rb_root swsusp_extents = RB_ROOT;
53 
54 static int swsusp_extents_insert(unsigned long swap_offset)
55 {
56 	struct rb_node **new = &(swsusp_extents.rb_node);
57 	struct rb_node *parent = NULL;
58 	struct swsusp_extent *ext;
59 
60 	/* Figure out where to put the new node */
61 	while (*new) {
62 		ext = container_of(*new, struct swsusp_extent, node);
63 		parent = *new;
64 		if (swap_offset < ext->start) {
65 			/* Try to merge */
66 			if (swap_offset == ext->start - 1) {
67 				ext->start--;
68 				return 0;
69 			}
70 			new = &((*new)->rb_left);
71 		} else if (swap_offset > ext->end) {
72 			/* Try to merge */
73 			if (swap_offset == ext->end + 1) {
74 				ext->end++;
75 				return 0;
76 			}
77 			new = &((*new)->rb_right);
78 		} else {
79 			/* It already is in the tree */
80 			return -EINVAL;
81 		}
82 	}
83 	/* Add the new node and rebalance the tree. */
84 	ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
85 	if (!ext)
86 		return -ENOMEM;
87 
88 	ext->start = swap_offset;
89 	ext->end = swap_offset;
90 	rb_link_node(&ext->node, parent, new);
91 	rb_insert_color(&ext->node, &swsusp_extents);
92 	return 0;
93 }
94 
95 /**
96  *	alloc_swapdev_block - allocate a swap page and register that it has
97  *	been allocated, so that it can be freed in case of an error.
98  */
99 
100 sector_t alloc_swapdev_block(int swap)
101 {
102 	unsigned long offset;
103 
104 	offset = swp_offset(get_swap_page_of_type(swap));
105 	if (offset) {
106 		if (swsusp_extents_insert(offset))
107 			swap_free(swp_entry(swap, offset));
108 		else
109 			return swapdev_block(swap, offset);
110 	}
111 	return 0;
112 }
113 
114 /**
115  *	free_all_swap_pages - free swap pages allocated for saving image data.
116  *	It also frees the extents used to register which swap entres had been
117  *	allocated.
118  */
119 
120 void free_all_swap_pages(int swap)
121 {
122 	struct rb_node *node;
123 
124 	while ((node = swsusp_extents.rb_node)) {
125 		struct swsusp_extent *ext;
126 		unsigned long offset;
127 
128 		ext = container_of(node, struct swsusp_extent, node);
129 		rb_erase(node, &swsusp_extents);
130 		for (offset = ext->start; offset <= ext->end; offset++)
131 			swap_free(swp_entry(swap, offset));
132 
133 		kfree(ext);
134 	}
135 }
136 
137 int swsusp_swap_in_use(void)
138 {
139 	return (swsusp_extents.rb_node != NULL);
140 }
141 
142 /*
143  * General things
144  */
145 
146 static unsigned short root_swap = 0xffff;
147 static struct block_device *resume_bdev;
148 
149 /**
150  *	submit - submit BIO request.
151  *	@rw:	READ or WRITE.
152  *	@off	physical offset of page.
153  *	@page:	page we're reading or writing.
154  *	@bio_chain: list of pending biod (for async reading)
155  *
156  *	Straight from the textbook - allocate and initialize the bio.
157  *	If we're reading, make sure the page is marked as dirty.
158  *	Then submit it and, if @bio_chain == NULL, wait.
159  */
160 static int submit(int rw, pgoff_t page_off, struct page *page,
161 			struct bio **bio_chain)
162 {
163 	const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
164 	struct bio *bio;
165 
166 	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
167 	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
168 	bio->bi_bdev = resume_bdev;
169 	bio->bi_end_io = end_swap_bio_read;
170 
171 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
172 		printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
173 			page_off);
174 		bio_put(bio);
175 		return -EFAULT;
176 	}
177 
178 	lock_page(page);
179 	bio_get(bio);
180 
181 	if (bio_chain == NULL) {
182 		submit_bio(bio_rw, bio);
183 		wait_on_page_locked(page);
184 		if (rw == READ)
185 			bio_set_pages_dirty(bio);
186 		bio_put(bio);
187 	} else {
188 		if (rw == READ)
189 			get_page(page);	/* These pages are freed later */
190 		bio->bi_private = *bio_chain;
191 		*bio_chain = bio;
192 		submit_bio(bio_rw, bio);
193 	}
194 	return 0;
195 }
196 
197 static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
198 {
199 	return submit(READ, page_off, virt_to_page(addr), bio_chain);
200 }
201 
202 static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
203 {
204 	return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
205 }
206 
207 static int wait_on_bio_chain(struct bio **bio_chain)
208 {
209 	struct bio *bio;
210 	struct bio *next_bio;
211 	int ret = 0;
212 
213 	if (bio_chain == NULL)
214 		return 0;
215 
216 	bio = *bio_chain;
217 	if (bio == NULL)
218 		return 0;
219 	while (bio) {
220 		struct page *page;
221 
222 		next_bio = bio->bi_private;
223 		page = bio->bi_io_vec[0].bv_page;
224 		wait_on_page_locked(page);
225 		if (!PageUptodate(page) || PageError(page))
226 			ret = -EIO;
227 		put_page(page);
228 		bio_put(bio);
229 		bio = next_bio;
230 	}
231 	*bio_chain = NULL;
232 	return ret;
233 }
234 
235 /*
236  * Saving part
237  */
238 
239 static int mark_swapfiles(sector_t start, unsigned int flags)
240 {
241 	int error;
242 
243 	bio_read_page(swsusp_resume_block, swsusp_header, NULL);
244 	if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
245 	    !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
246 		memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
247 		memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
248 		swsusp_header->image = start;
249 		swsusp_header->flags = flags;
250 		error = bio_write_page(swsusp_resume_block,
251 					swsusp_header, NULL);
252 	} else {
253 		printk(KERN_ERR "PM: Swap header not found!\n");
254 		error = -ENODEV;
255 	}
256 	return error;
257 }
258 
259 /**
260  *	swsusp_swap_check - check if the resume device is a swap device
261  *	and get its index (if so)
262  */
263 
264 static int swsusp_swap_check(void) /* This is called before saving image */
265 {
266 	int res;
267 
268 	res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
269 			&resume_bdev);
270 	if (res < 0)
271 		return res;
272 
273 	root_swap = res;
274 	res = blkdev_get(resume_bdev, FMODE_WRITE);
275 	if (res)
276 		return res;
277 
278 	res = set_blocksize(resume_bdev, PAGE_SIZE);
279 	if (res < 0)
280 		blkdev_put(resume_bdev, FMODE_WRITE);
281 
282 	return res;
283 }
284 
285 /**
286  *	write_page - Write one page to given swap location.
287  *	@buf:		Address we're writing.
288  *	@offset:	Offset of the swap page we're writing to.
289  *	@bio_chain:	Link the next write BIO here
290  */
291 
292 static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
293 {
294 	void *src;
295 
296 	if (!offset)
297 		return -ENOSPC;
298 
299 	if (bio_chain) {
300 		src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
301 		if (src) {
302 			memcpy(src, buf, PAGE_SIZE);
303 		} else {
304 			WARN_ON_ONCE(1);
305 			bio_chain = NULL;	/* Go synchronous */
306 			src = buf;
307 		}
308 	} else {
309 		src = buf;
310 	}
311 	return bio_write_page(offset, src, bio_chain);
312 }
313 
314 /*
315  *	The swap map is a data structure used for keeping track of each page
316  *	written to a swap partition.  It consists of many swap_map_page
317  *	structures that contain each an array of MAP_PAGE_SIZE swap entries.
318  *	These structures are stored on the swap and linked together with the
319  *	help of the .next_swap member.
320  *
321  *	The swap map is created during suspend.  The swap map pages are
322  *	allocated and populated one at a time, so we only need one memory
323  *	page to set up the entire structure.
324  *
325  *	During resume we also only need to use one swap_map_page structure
326  *	at a time.
327  */
328 
329 #define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(sector_t) - 1)
330 
331 struct swap_map_page {
332 	sector_t entries[MAP_PAGE_ENTRIES];
333 	sector_t next_swap;
334 };
335 
336 /**
337  *	The swap_map_handle structure is used for handling swap in
338  *	a file-alike way
339  */
340 
341 struct swap_map_handle {
342 	struct swap_map_page *cur;
343 	sector_t cur_swap;
344 	unsigned int k;
345 };
346 
347 static void release_swap_writer(struct swap_map_handle *handle)
348 {
349 	if (handle->cur)
350 		free_page((unsigned long)handle->cur);
351 	handle->cur = NULL;
352 }
353 
354 static int get_swap_writer(struct swap_map_handle *handle)
355 {
356 	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
357 	if (!handle->cur)
358 		return -ENOMEM;
359 	handle->cur_swap = alloc_swapdev_block(root_swap);
360 	if (!handle->cur_swap) {
361 		release_swap_writer(handle);
362 		return -ENOSPC;
363 	}
364 	handle->k = 0;
365 	return 0;
366 }
367 
368 static int swap_write_page(struct swap_map_handle *handle, void *buf,
369 				struct bio **bio_chain)
370 {
371 	int error = 0;
372 	sector_t offset;
373 
374 	if (!handle->cur)
375 		return -EINVAL;
376 	offset = alloc_swapdev_block(root_swap);
377 	error = write_page(buf, offset, bio_chain);
378 	if (error)
379 		return error;
380 	handle->cur->entries[handle->k++] = offset;
381 	if (handle->k >= MAP_PAGE_ENTRIES) {
382 		error = wait_on_bio_chain(bio_chain);
383 		if (error)
384 			goto out;
385 		offset = alloc_swapdev_block(root_swap);
386 		if (!offset)
387 			return -ENOSPC;
388 		handle->cur->next_swap = offset;
389 		error = write_page(handle->cur, handle->cur_swap, NULL);
390 		if (error)
391 			goto out;
392 		memset(handle->cur, 0, PAGE_SIZE);
393 		handle->cur_swap = offset;
394 		handle->k = 0;
395 	}
396  out:
397 	return error;
398 }
399 
400 static int flush_swap_writer(struct swap_map_handle *handle)
401 {
402 	if (handle->cur && handle->cur_swap)
403 		return write_page(handle->cur, handle->cur_swap, NULL);
404 	else
405 		return -EINVAL;
406 }
407 
408 /**
409  *	save_image - save the suspend image data
410  */
411 
412 static int save_image(struct swap_map_handle *handle,
413                       struct snapshot_handle *snapshot,
414                       unsigned int nr_to_write)
415 {
416 	unsigned int m;
417 	int ret;
418 	int nr_pages;
419 	int err2;
420 	struct bio *bio;
421 	struct timeval start;
422 	struct timeval stop;
423 
424 	printk(KERN_INFO "PM: Saving image data pages (%u pages) ...     ",
425 		nr_to_write);
426 	m = nr_to_write / 100;
427 	if (!m)
428 		m = 1;
429 	nr_pages = 0;
430 	bio = NULL;
431 	do_gettimeofday(&start);
432 	while (1) {
433 		ret = snapshot_read_next(snapshot, PAGE_SIZE);
434 		if (ret <= 0)
435 			break;
436 		ret = swap_write_page(handle, data_of(*snapshot), &bio);
437 		if (ret)
438 			break;
439 		if (!(nr_pages % m))
440 			printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
441 		nr_pages++;
442 	}
443 	err2 = wait_on_bio_chain(&bio);
444 	do_gettimeofday(&stop);
445 	if (!ret)
446 		ret = err2;
447 	if (!ret)
448 		printk(KERN_CONT "\b\b\b\bdone\n");
449 	else
450 		printk(KERN_CONT "\n");
451 	swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
452 	return ret;
453 }
454 
455 /**
456  *	enough_swap - Make sure we have enough swap to save the image.
457  *
458  *	Returns TRUE or FALSE after checking the total amount of swap
459  *	space avaiable from the resume partition.
460  */
461 
462 static int enough_swap(unsigned int nr_pages)
463 {
464 	unsigned int free_swap = count_swap_pages(root_swap, 1);
465 
466 	pr_debug("PM: Free swap pages: %u\n", free_swap);
467 	return free_swap > nr_pages + PAGES_FOR_IO;
468 }
469 
470 /**
471  *	swsusp_write - Write entire image and metadata.
472  *	@flags: flags to pass to the "boot" kernel in the image header
473  *
474  *	It is important _NOT_ to umount filesystems at this point. We want
475  *	them synced (in case something goes wrong) but we DO not want to mark
476  *	filesystem clean: it is not. (And it does not matter, if we resume
477  *	correctly, we'll mark system clean, anyway.)
478  */
479 
480 int swsusp_write(unsigned int flags)
481 {
482 	struct swap_map_handle handle;
483 	struct snapshot_handle snapshot;
484 	struct swsusp_info *header;
485 	int error;
486 
487 	error = swsusp_swap_check();
488 	if (error) {
489 		printk(KERN_ERR "PM: Cannot find swap device, try "
490 				"swapon -a.\n");
491 		return error;
492 	}
493 	memset(&snapshot, 0, sizeof(struct snapshot_handle));
494 	error = snapshot_read_next(&snapshot, PAGE_SIZE);
495 	if (error < PAGE_SIZE) {
496 		if (error >= 0)
497 			error = -EFAULT;
498 
499 		goto out;
500 	}
501 	header = (struct swsusp_info *)data_of(snapshot);
502 	if (!enough_swap(header->pages)) {
503 		printk(KERN_ERR "PM: Not enough free swap\n");
504 		error = -ENOSPC;
505 		goto out;
506 	}
507 	error = get_swap_writer(&handle);
508 	if (!error) {
509 		sector_t start = handle.cur_swap;
510 
511 		error = swap_write_page(&handle, header, NULL);
512 		if (!error)
513 			error = save_image(&handle, &snapshot,
514 					header->pages - 1);
515 
516 		if (!error) {
517 			flush_swap_writer(&handle);
518 			printk(KERN_INFO "PM: S");
519 			error = mark_swapfiles(start, flags);
520 			printk("|\n");
521 		}
522 	}
523 	if (error)
524 		free_all_swap_pages(root_swap);
525 
526 	release_swap_writer(&handle);
527  out:
528 	swsusp_close(FMODE_WRITE);
529 	return error;
530 }
531 
532 /**
533  *	The following functions allow us to read data using a swap map
534  *	in a file-alike way
535  */
536 
537 static void release_swap_reader(struct swap_map_handle *handle)
538 {
539 	if (handle->cur)
540 		free_page((unsigned long)handle->cur);
541 	handle->cur = NULL;
542 }
543 
544 static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
545 {
546 	int error;
547 
548 	if (!start)
549 		return -EINVAL;
550 
551 	handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
552 	if (!handle->cur)
553 		return -ENOMEM;
554 
555 	error = bio_read_page(start, handle->cur, NULL);
556 	if (error) {
557 		release_swap_reader(handle);
558 		return error;
559 	}
560 	handle->k = 0;
561 	return 0;
562 }
563 
564 static int swap_read_page(struct swap_map_handle *handle, void *buf,
565 				struct bio **bio_chain)
566 {
567 	sector_t offset;
568 	int error;
569 
570 	if (!handle->cur)
571 		return -EINVAL;
572 	offset = handle->cur->entries[handle->k];
573 	if (!offset)
574 		return -EFAULT;
575 	error = bio_read_page(offset, buf, bio_chain);
576 	if (error)
577 		return error;
578 	if (++handle->k >= MAP_PAGE_ENTRIES) {
579 		error = wait_on_bio_chain(bio_chain);
580 		handle->k = 0;
581 		offset = handle->cur->next_swap;
582 		if (!offset)
583 			release_swap_reader(handle);
584 		else if (!error)
585 			error = bio_read_page(offset, handle->cur, NULL);
586 	}
587 	return error;
588 }
589 
590 /**
591  *	load_image - load the image using the swap map handle
592  *	@handle and the snapshot handle @snapshot
593  *	(assume there are @nr_pages pages to load)
594  */
595 
596 static int load_image(struct swap_map_handle *handle,
597                       struct snapshot_handle *snapshot,
598                       unsigned int nr_to_read)
599 {
600 	unsigned int m;
601 	int error = 0;
602 	struct timeval start;
603 	struct timeval stop;
604 	struct bio *bio;
605 	int err2;
606 	unsigned nr_pages;
607 
608 	printk(KERN_INFO "PM: Loading image data pages (%u pages) ...     ",
609 		nr_to_read);
610 	m = nr_to_read / 100;
611 	if (!m)
612 		m = 1;
613 	nr_pages = 0;
614 	bio = NULL;
615 	do_gettimeofday(&start);
616 	for ( ; ; ) {
617 		error = snapshot_write_next(snapshot, PAGE_SIZE);
618 		if (error <= 0)
619 			break;
620 		error = swap_read_page(handle, data_of(*snapshot), &bio);
621 		if (error)
622 			break;
623 		if (snapshot->sync_read)
624 			error = wait_on_bio_chain(&bio);
625 		if (error)
626 			break;
627 		if (!(nr_pages % m))
628 			printk("\b\b\b\b%3d%%", nr_pages / m);
629 		nr_pages++;
630 	}
631 	err2 = wait_on_bio_chain(&bio);
632 	do_gettimeofday(&stop);
633 	if (!error)
634 		error = err2;
635 	if (!error) {
636 		printk("\b\b\b\bdone\n");
637 		snapshot_write_finalize(snapshot);
638 		if (!snapshot_image_loaded(snapshot))
639 			error = -ENODATA;
640 	} else
641 		printk("\n");
642 	swsusp_show_speed(&start, &stop, nr_to_read, "Read");
643 	return error;
644 }
645 
646 /**
647  *	swsusp_read - read the hibernation image.
648  *	@flags_p: flags passed by the "frozen" kernel in the image header should
649  *		  be written into this memeory location
650  */
651 
652 int swsusp_read(unsigned int *flags_p)
653 {
654 	int error;
655 	struct swap_map_handle handle;
656 	struct snapshot_handle snapshot;
657 	struct swsusp_info *header;
658 
659 	*flags_p = swsusp_header->flags;
660 	if (IS_ERR(resume_bdev)) {
661 		pr_debug("PM: Image device not initialised\n");
662 		return PTR_ERR(resume_bdev);
663 	}
664 
665 	memset(&snapshot, 0, sizeof(struct snapshot_handle));
666 	error = snapshot_write_next(&snapshot, PAGE_SIZE);
667 	if (error < PAGE_SIZE)
668 		return error < 0 ? error : -EFAULT;
669 	header = (struct swsusp_info *)data_of(snapshot);
670 	error = get_swap_reader(&handle, swsusp_header->image);
671 	if (!error)
672 		error = swap_read_page(&handle, header, NULL);
673 	if (!error)
674 		error = load_image(&handle, &snapshot, header->pages - 1);
675 	release_swap_reader(&handle);
676 
677 	if (!error)
678 		pr_debug("PM: Image successfully loaded\n");
679 	else
680 		pr_debug("PM: Error %d resuming\n", error);
681 	return error;
682 }
683 
684 /**
685  *      swsusp_check - Check for swsusp signature in the resume device
686  */
687 
688 int swsusp_check(void)
689 {
690 	int error;
691 
692 	resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
693 	if (!IS_ERR(resume_bdev)) {
694 		set_blocksize(resume_bdev, PAGE_SIZE);
695 		memset(swsusp_header, 0, PAGE_SIZE);
696 		error = bio_read_page(swsusp_resume_block,
697 					swsusp_header, NULL);
698 		if (error)
699 			goto put;
700 
701 		if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
702 			memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
703 			/* Reset swap signature now */
704 			error = bio_write_page(swsusp_resume_block,
705 						swsusp_header, NULL);
706 		} else {
707 			error = -EINVAL;
708 		}
709 
710 put:
711 		if (error)
712 			blkdev_put(resume_bdev, FMODE_READ);
713 		else
714 			pr_debug("PM: Signature found, resuming\n");
715 	} else {
716 		error = PTR_ERR(resume_bdev);
717 	}
718 
719 	if (error)
720 		pr_debug("PM: Error %d checking image file\n", error);
721 
722 	return error;
723 }
724 
725 /**
726  *	swsusp_close - close swap device.
727  */
728 
729 void swsusp_close(fmode_t mode)
730 {
731 	if (IS_ERR(resume_bdev)) {
732 		pr_debug("PM: Image device not initialised\n");
733 		return;
734 	}
735 
736 	blkdev_put(resume_bdev, mode);
737 }
738 
739 static int swsusp_header_init(void)
740 {
741 	swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
742 	if (!swsusp_header)
743 		panic("Could not allocate memory for swsusp_header\n");
744 	return 0;
745 }
746 
747 core_initcall(swsusp_header_init);
748