2 * linux/kernel/power/swsusp.c
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
10 * This file is released under the GPLv2.
12 * I'd like to thank the following people for their work:
14 * Pavel Machek <pavel@ucw.cz>:
15 * Modifications, defectiveness pointing, being with me at the very beginning,
16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
18 * Steve Doddi <dirk@loth.demon.co.uk>:
19 * Support the possibility of hardware state restoring.
21 * Raph <grey.havens@earthling.net>:
22 * Support for preserving states of network devices and virtual console
23 * (including X and svgatextmode)
25 * Kurt Garloff <garloff@suse.de>:
26 * Straightened the critical function in order to prevent compilers from
27 * playing tricks with local variables.
29 * Andreas Mohr <a.mohr@mailto.de>
31 * Alex Badea <vampire@go.ro>:
34 * Andreas Steinmetz <ast@domdv.de>:
35 * Added encrypted suspend option
37 * More state savers are welcome. Especially for the scsi layer...
39 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
42 #include <linux/module.h>
44 #include <linux/suspend.h>
45 #include <linux/smp_lock.h>
46 #include <linux/file.h>
47 #include <linux/utsname.h>
48 #include <linux/version.h>
49 #include <linux/delay.h>
50 #include <linux/reboot.h>
51 #include <linux/bitops.h>
52 #include <linux/vt_kern.h>
53 #include <linux/kbd_kern.h>
54 #include <linux/keyboard.h>
55 #include <linux/spinlock.h>
56 #include <linux/genhd.h>
57 #include <linux/kernel.h>
58 #include <linux/major.h>
59 #include <linux/swap.h>
61 #include <linux/device.h>
62 #include <linux/buffer_head.h>
63 #include <linux/swapops.h>
64 #include <linux/bootmem.h>
65 #include <linux/syscalls.h>
66 #include <linux/console.h>
67 #include <linux/highmem.h>
68 #include <linux/bio.h>
69 #include <linux/mount.h>
71 #include <asm/uaccess.h>
72 #include <asm/mmu_context.h>
73 #include <asm/pgtable.h>
74 #include <asm/tlbflush.h>
77 #include <linux/random.h>
78 #include <linux/crypto.h>
79 #include <asm/scatterlist.h>
87 /* References to section boundaries */
88 extern const void __nosave_begin
, __nosave_end
;
90 /* Variables to be preserved over suspend */
91 static int nr_copy_pages_check
;
93 extern char resume_file
[];
95 /* Local variables that should not be affected by save */
96 static unsigned int nr_copy_pages __nosavedata
= 0;
98 /* Suspend pagedir is allocated before final copy, therefore it
99 must be freed after resume
101 Warning: this is evil. There are actually two pagedirs at time of
102 resume. One is "pagedir_save", which is empty frame allocated at
103 time of suspend, that must be freed. Second is "pagedir_nosave",
104 allocated at time of resume, that travels through memory not to
105 collide with anything.
107 Warning: this is even more evil than it seems. Pagedirs this file
108 talks about are completely different from page directories used by
111 suspend_pagedir_t
*pagedir_nosave __nosavedata
= NULL
;
112 static suspend_pagedir_t
*pagedir_save
;
114 #define SWSUSP_SIG "S1SUSPEND"
116 static struct swsusp_header
{
117 char reserved
[PAGE_SIZE
- 20 - MAXKEY
- MAXIV
- sizeof(swp_entry_t
)];
118 u8 key_iv
[MAXKEY
+MAXIV
];
119 swp_entry_t swsusp_info
;
122 } __attribute__((packed
, aligned(PAGE_SIZE
))) swsusp_header
;
124 static struct swsusp_info swsusp_info
;
127 * XXX: We try to keep some more pages free so that I/O operations succeed
128 * without paging. Might this be more?
130 #define PAGES_FOR_IO 512
136 /* We memorize in swapfile_used what swap devices are used for suspension */
137 #define SWAPFILE_UNUSED 0
138 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
139 #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
141 static unsigned short swapfile_used
[MAX_SWAPFILES
];
142 static unsigned short root_swap
;
144 static int write_page(unsigned long addr
, swp_entry_t
* loc
);
145 static int bio_read_page(pgoff_t page_off
, void * page
);
147 static u8 key_iv
[MAXKEY
+MAXIV
];
149 #ifdef CONFIG_SWSUSP_ENCRYPT
151 static int crypto_init(int mode
, void **mem
)
156 struct crypto_tfm
*tfm
;
158 modemsg
= mode
? "suspend not possible" : "resume not possible";
160 tfm
= crypto_alloc_tfm(CIPHER
, CRYPTO_TFM_MODE_CBC
);
162 printk(KERN_ERR
"swsusp: no tfm, %s\n", modemsg
);
167 if(MAXKEY
< crypto_tfm_alg_min_keysize(tfm
)) {
168 printk(KERN_ERR
"swsusp: key buffer too small, %s\n", modemsg
);
174 get_random_bytes(key_iv
, MAXKEY
+MAXIV
);
176 len
= crypto_tfm_alg_max_keysize(tfm
);
180 if (crypto_cipher_setkey(tfm
, key_iv
, len
)) {
181 printk(KERN_ERR
"swsusp: key setup failure, %s\n", modemsg
);
182 error
= -EKEYREJECTED
;
186 len
= crypto_tfm_alg_ivsize(tfm
);
189 printk(KERN_ERR
"swsusp: iv buffer too small, %s\n", modemsg
);
194 crypto_cipher_set_iv(tfm
, key_iv
+MAXKEY
, len
);
200 fail
: crypto_free_tfm(tfm
);
204 static __inline__
void crypto_exit(void *mem
)
206 crypto_free_tfm((struct crypto_tfm
*)mem
);
209 static __inline__
int crypto_write(struct pbe
*p
, void *mem
)
212 struct scatterlist src
, dst
;
214 src
.page
= virt_to_page(p
->address
);
216 src
.length
= PAGE_SIZE
;
217 dst
.page
= virt_to_page((void *)&swsusp_header
);
219 dst
.length
= PAGE_SIZE
;
221 error
= crypto_cipher_encrypt((struct crypto_tfm
*)mem
, &dst
, &src
,
225 error
= write_page((unsigned long)&swsusp_header
,
230 static __inline__
int crypto_read(struct pbe
*p
, void *mem
)
233 struct scatterlist src
, dst
;
235 error
= bio_read_page(swp_offset(p
->swap_address
), (void *)p
->address
);
238 src
.length
= PAGE_SIZE
;
240 dst
.length
= PAGE_SIZE
;
241 src
.page
= dst
.page
= virt_to_page((void *)p
->address
);
243 error
= crypto_cipher_decrypt((struct crypto_tfm
*)mem
, &dst
,
249 static __inline__
int crypto_init(int mode
, void *mem
)
254 static __inline__
void crypto_exit(void *mem
)
258 static __inline__
int crypto_write(struct pbe
*p
, void *mem
)
260 return write_page(p
->address
, &(p
->swap_address
));
263 static __inline__
int crypto_read(struct pbe
*p
, void *mem
)
265 return bio_read_page(swp_offset(p
->swap_address
), (void *)p
->address
);
269 static int mark_swapfiles(swp_entry_t prev
)
273 rw_swap_page_sync(READ
,
274 swp_entry(root_swap
, 0),
275 virt_to_page((unsigned long)&swsusp_header
));
276 if (!memcmp("SWAP-SPACE",swsusp_header
.sig
, 10) ||
277 !memcmp("SWAPSPACE2",swsusp_header
.sig
, 10)) {
278 memcpy(swsusp_header
.orig_sig
,swsusp_header
.sig
, 10);
279 memcpy(swsusp_header
.sig
,SWSUSP_SIG
, 10);
280 memcpy(swsusp_header
.key_iv
, key_iv
, MAXKEY
+MAXIV
);
281 swsusp_header
.swsusp_info
= prev
;
282 error
= rw_swap_page_sync(WRITE
,
283 swp_entry(root_swap
, 0),
284 virt_to_page((unsigned long)
287 pr_debug("swsusp: Partition is not swap space.\n");
294 * Check whether the swap device is the specified resume
295 * device, irrespective of whether they are specified by
298 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
299 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
300 * and they'll be considered the same device. This is *necessary* for
301 * devfs, since the resume code can only recognize the form /dev/hda4,
302 * but the suspend code would see the long name.)
304 static int is_resume_device(const struct swap_info_struct
*swap_info
)
306 struct file
*file
= swap_info
->swap_file
;
307 struct inode
*inode
= file
->f_dentry
->d_inode
;
309 return S_ISBLK(inode
->i_mode
) &&
310 swsusp_resume_device
== MKDEV(imajor(inode
), iminor(inode
));
313 static int swsusp_swap_check(void) /* This is called before saving image */
317 len
=strlen(resume_file
);
320 spin_lock(&swap_lock
);
321 for (i
=0; i
<MAX_SWAPFILES
; i
++) {
322 if (!(swap_info
[i
].flags
& SWP_WRITEOK
)) {
323 swapfile_used
[i
]=SWAPFILE_UNUSED
;
326 printk(KERN_WARNING
"resume= option should be used to set suspend device" );
327 if (root_swap
== 0xFFFF) {
328 swapfile_used
[i
] = SWAPFILE_SUSPEND
;
331 swapfile_used
[i
] = SWAPFILE_IGNORED
;
333 /* we ignore all swap devices that are not the resume_file */
334 if (is_resume_device(&swap_info
[i
])) {
335 swapfile_used
[i
] = SWAPFILE_SUSPEND
;
338 swapfile_used
[i
] = SWAPFILE_IGNORED
;
343 spin_unlock(&swap_lock
);
344 return (root_swap
!= 0xffff) ? 0 : -ENODEV
;
348 * This is called after saving image so modification
349 * will be lost after resume... and that's what we want.
350 * we make the device unusable. A new call to
351 * lock_swapdevices can unlock the devices.
353 static void lock_swapdevices(void)
357 spin_lock(&swap_lock
);
358 for (i
= 0; i
< MAX_SWAPFILES
; i
++)
359 if (swapfile_used
[i
] == SWAPFILE_IGNORED
) {
360 swap_info
[i
].flags
^= SWP_WRITEOK
;
362 spin_unlock(&swap_lock
);
366 * write_page - Write one page to a fresh swap location.
367 * @addr: Address we're writing.
368 * @loc: Place to store the entry we used.
370 * Allocate a new swap entry and 'sync' it. Note we discard -EIO
371 * errors. That is an artifact left over from swsusp. It did not
372 * check the return of rw_swap_page_sync() at all, since most pages
373 * written back to swap would return -EIO.
374 * This is a partial improvement, since we will at least return other
375 * errors, though we need to eventually fix the damn code.
377 static int write_page(unsigned long addr
, swp_entry_t
* loc
)
382 entry
= get_swap_page();
383 if (swp_offset(entry
) &&
384 swapfile_used
[swp_type(entry
)] == SWAPFILE_SUSPEND
) {
385 error
= rw_swap_page_sync(WRITE
, entry
,
397 * data_free - Free the swap entries used by the saved image.
399 * Walk the list of used swap entries and free each one.
400 * This is only used for cleanup when suspend fails.
402 static void data_free(void)
407 for_each_pbe(p
, pagedir_nosave
) {
408 entry
= p
->swap_address
;
417 * data_write - Write saved image to swap.
419 * Walk the list of pages in the image and sync each one to swap.
421 static int data_write(void)
423 int error
= 0, i
= 0;
424 unsigned int mod
= nr_copy_pages
/ 100;
428 if ((error
= crypto_init(1, &tfm
)))
434 printk( "Writing data to swap (%d pages)... ", nr_copy_pages
);
435 for_each_pbe (p
, pagedir_nosave
) {
437 printk( "\b\b\b\b%3d%%", i
/ mod
);
438 if ((error
= crypto_write(p
, tfm
))) {
444 printk("\b\b\b\bdone\n");
449 static void dump_info(void)
451 pr_debug(" swsusp: Version: %u\n",swsusp_info
.version_code
);
452 pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info
.num_physpages
);
453 pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info
.uts
.sysname
);
454 pr_debug(" swsusp: UTS Node: %s\n",swsusp_info
.uts
.nodename
);
455 pr_debug(" swsusp: UTS Release: %s\n",swsusp_info
.uts
.release
);
456 pr_debug(" swsusp: UTS Version: %s\n",swsusp_info
.uts
.version
);
457 pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info
.uts
.machine
);
458 pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info
.uts
.domainname
);
459 pr_debug(" swsusp: CPUs: %d\n",swsusp_info
.cpus
);
460 pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info
.image_pages
);
461 pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info
.pagedir_pages
);
464 static void init_header(void)
466 memset(&swsusp_info
, 0, sizeof(swsusp_info
));
467 swsusp_info
.version_code
= LINUX_VERSION_CODE
;
468 swsusp_info
.num_physpages
= num_physpages
;
469 memcpy(&swsusp_info
.uts
, &system_utsname
, sizeof(system_utsname
));
471 swsusp_info
.suspend_pagedir
= pagedir_nosave
;
472 swsusp_info
.cpus
= num_online_cpus();
473 swsusp_info
.image_pages
= nr_copy_pages
;
476 static int close_swap(void)
482 error
= write_page((unsigned long)&swsusp_info
, &entry
);
485 error
= mark_swapfiles(entry
);
492 * free_pagedir_entries - Free pages used by the page directory.
494 * This is used during suspend for error recovery.
497 static void free_pagedir_entries(void)
501 for (i
= 0; i
< swsusp_info
.pagedir_pages
; i
++)
502 swap_free(swsusp_info
.pagedir
[i
]);
507 * write_pagedir - Write the array of pages holding the page directory.
508 * @last: Last swap entry we write (needed for header).
511 static int write_pagedir(void)
517 printk( "Writing pagedir...");
518 for_each_pb_page (pbe
, pagedir_nosave
) {
519 if ((error
= write_page((unsigned long)pbe
, &swsusp_info
.pagedir
[n
++])))
523 swsusp_info
.pagedir_pages
= n
;
524 printk("done (%u pages)\n", n
);
529 * write_suspend_image - Write entire image and metadata.
532 static int write_suspend_image(void)
537 if ((error
= data_write()))
540 if ((error
= write_pagedir()))
543 if ((error
= close_swap()))
546 memset(key_iv
, 0, MAXKEY
+MAXIV
);
549 free_pagedir_entries();
556 #ifdef CONFIG_HIGHMEM
557 struct highmem_page
{
560 struct highmem_page
*next
;
563 static struct highmem_page
*highmem_copy
;
565 static int save_highmem_zone(struct zone
*zone
)
567 unsigned long zone_pfn
;
568 mark_free_pages(zone
);
569 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
571 struct highmem_page
*save
;
573 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
579 page
= pfn_to_page(pfn
);
581 * This condition results from rvmalloc() sans vmalloc_32()
582 * and architectural memory reservations. This should be
583 * corrected eventually when the cases giving rise to this
584 * are better understood.
586 if (PageReserved(page
)) {
587 printk("highmem reserved page?!\n");
590 BUG_ON(PageNosave(page
));
591 if (PageNosaveFree(page
))
593 save
= kmalloc(sizeof(struct highmem_page
), GFP_ATOMIC
);
596 save
->next
= highmem_copy
;
598 save
->data
= (void *) get_zeroed_page(GFP_ATOMIC
);
603 kaddr
= kmap_atomic(page
, KM_USER0
);
604 memcpy(save
->data
, kaddr
, PAGE_SIZE
);
605 kunmap_atomic(kaddr
, KM_USER0
);
610 #endif /* CONFIG_HIGHMEM */
613 static int save_highmem(void)
615 #ifdef CONFIG_HIGHMEM
619 pr_debug("swsusp: Saving Highmem\n");
620 for_each_zone (zone
) {
621 if (is_highmem(zone
))
622 res
= save_highmem_zone(zone
);
630 static int restore_highmem(void)
632 #ifdef CONFIG_HIGHMEM
633 printk("swsusp: Restoring Highmem\n");
634 while (highmem_copy
) {
635 struct highmem_page
*save
= highmem_copy
;
637 highmem_copy
= save
->next
;
639 kaddr
= kmap_atomic(save
->page
, KM_USER0
);
640 memcpy(kaddr
, save
->data
, PAGE_SIZE
);
641 kunmap_atomic(kaddr
, KM_USER0
);
642 free_page((long) save
->data
);
650 static int pfn_is_nosave(unsigned long pfn
)
652 unsigned long nosave_begin_pfn
= __pa(&__nosave_begin
) >> PAGE_SHIFT
;
653 unsigned long nosave_end_pfn
= PAGE_ALIGN(__pa(&__nosave_end
)) >> PAGE_SHIFT
;
654 return (pfn
>= nosave_begin_pfn
) && (pfn
< nosave_end_pfn
);
658 * saveable - Determine whether a page should be cloned or not.
661 * We save a page if it's Reserved, and not in the range of pages
662 * statically defined as 'unsaveable', or if it isn't reserved, and
663 * isn't part of a free chunk of pages.
666 static int saveable(struct zone
* zone
, unsigned long * zone_pfn
)
668 unsigned long pfn
= *zone_pfn
+ zone
->zone_start_pfn
;
674 page
= pfn_to_page(pfn
);
675 BUG_ON(PageReserved(page
) && PageNosave(page
));
676 if (PageNosave(page
))
678 if (PageReserved(page
) && pfn_is_nosave(pfn
)) {
679 pr_debug("[nosave pfn 0x%lx]", pfn
);
682 if (PageNosaveFree(page
))
688 static void count_data_pages(void)
691 unsigned long zone_pfn
;
695 for_each_zone (zone
) {
696 if (is_highmem(zone
))
698 mark_free_pages(zone
);
699 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
700 nr_copy_pages
+= saveable(zone
, &zone_pfn
);
705 static void copy_data_pages(void)
708 unsigned long zone_pfn
;
709 struct pbe
* pbe
= pagedir_nosave
;
711 pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages
);
712 for_each_zone (zone
) {
713 if (is_highmem(zone
))
715 mark_free_pages(zone
);
716 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
717 if (saveable(zone
, &zone_pfn
)) {
719 page
= pfn_to_page(zone_pfn
+ zone
->zone_start_pfn
);
721 pbe
->orig_address
= (long) page_address(page
);
722 /* copy_page is not usable for copying task structs. */
723 memcpy((void *)pbe
->address
, (void *)pbe
->orig_address
, PAGE_SIZE
);
733 * calc_nr - Determine the number of pages needed for a pbe list.
736 static int calc_nr(int nr_copy
)
738 return nr_copy
+ (nr_copy
+PBES_PER_PAGE
-2)/(PBES_PER_PAGE
-1);
742 * free_pagedir - free pages allocated with alloc_pagedir()
745 static inline void free_pagedir(struct pbe
*pblist
)
750 pbe
= (pblist
+ PB_PAGE_SKIP
)->next
;
751 free_page((unsigned long)pblist
);
757 * fill_pb_page - Create a list of PBEs on a given memory page
760 static inline void fill_pb_page(struct pbe
*pbpage
)
765 pbpage
+= PB_PAGE_SKIP
;
768 while (++p
< pbpage
);
772 * create_pbe_list - Create a list of PBEs on top of a given chain
773 * of memory pages allocated with alloc_pagedir()
776 static void create_pbe_list(struct pbe
*pblist
, unsigned nr_pages
)
778 struct pbe
*pbpage
, *p
;
779 unsigned num
= PBES_PER_PAGE
;
781 for_each_pb_page (pbpage
, pblist
) {
785 fill_pb_page(pbpage
);
786 num
+= PBES_PER_PAGE
;
789 for (num
-= PBES_PER_PAGE
- 1, p
= pbpage
; num
< nr_pages
; p
++, num
++)
793 pr_debug("create_pbe_list(): initialized %d PBEs\n", num
);
797 * alloc_pagedir - Allocate the page directory.
799 * First, determine exactly how many pages we need and
802 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
803 * struct pbe elements (pbes) and the last element in the page points
806 * On each page we set up a list of struct_pbe elements.
809 static struct pbe
* alloc_pagedir(unsigned nr_pages
)
812 struct pbe
*pblist
, *pbe
;
817 pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages
);
818 pblist
= (struct pbe
*)get_zeroed_page(GFP_ATOMIC
| __GFP_COLD
);
819 for (pbe
= pblist
, num
= PBES_PER_PAGE
; pbe
&& num
< nr_pages
;
820 pbe
= pbe
->next
, num
+= PBES_PER_PAGE
) {
822 pbe
->next
= (struct pbe
*)get_zeroed_page(GFP_ATOMIC
| __GFP_COLD
);
824 if (!pbe
) { /* get_zeroed_page() failed */
825 free_pagedir(pblist
);
832 * free_image_pages - Free pages allocated for snapshot
835 static void free_image_pages(void)
839 for_each_pbe (p
, pagedir_save
) {
841 ClearPageNosave(virt_to_page(p
->address
));
842 free_page(p
->address
);
849 * alloc_image_pages - Allocate pages for the snapshot.
852 static int alloc_image_pages(void)
856 for_each_pbe (p
, pagedir_save
) {
857 p
->address
= get_zeroed_page(GFP_ATOMIC
| __GFP_COLD
);
860 SetPageNosave(virt_to_page(p
->address
));
865 /* Free pages we allocated for suspend. Suspend pages are alocated
866 * before atomic copy, so we need to free them after resume.
868 void swsusp_free(void)
870 BUG_ON(PageNosave(virt_to_page(pagedir_save
)));
871 BUG_ON(PageNosaveFree(virt_to_page(pagedir_save
)));
873 free_pagedir(pagedir_save
);
878 * enough_free_mem - Make sure we enough free memory to snapshot.
880 * Returns TRUE or FALSE after checking the number of available
884 static int enough_free_mem(void)
886 if (nr_free_pages() < (nr_copy_pages
+ PAGES_FOR_IO
)) {
887 pr_debug("swsusp: Not enough free pages: Have %d\n",
896 * enough_swap - Make sure we have enough swap to save the image.
898 * Returns TRUE or FALSE after checking the total amount of swap
901 * FIXME: si_swapinfo(&i) returns all swap devices information.
902 * We should only consider resume_device.
905 static int enough_swap(void)
910 if (i
.freeswap
< (nr_copy_pages
+ PAGES_FOR_IO
)) {
911 pr_debug("swsusp: Not enough swap. Need %ld\n",i
.freeswap
);
917 static int swsusp_alloc(void)
921 pagedir_nosave
= NULL
;
922 nr_copy_pages
= calc_nr(nr_copy_pages
);
923 nr_copy_pages_check
= nr_copy_pages
;
925 pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
926 nr_copy_pages
, PAGES_FOR_IO
, nr_free_pages());
928 if (!enough_free_mem())
934 if (MAX_PBES
< nr_copy_pages
/ PBES_PER_PAGE
+
935 !!(nr_copy_pages
% PBES_PER_PAGE
))
938 if (!(pagedir_save
= alloc_pagedir(nr_copy_pages
))) {
939 printk(KERN_ERR
"suspend: Allocating pagedir failed.\n");
942 create_pbe_list(pagedir_save
, nr_copy_pages
);
943 pagedir_nosave
= pagedir_save
;
944 if ((error
= alloc_image_pages())) {
945 printk(KERN_ERR
"suspend: Allocating image pages failed.\n");
953 static int suspend_prepare_image(void)
957 pr_debug("swsusp: critical section: \n");
958 if (save_highmem()) {
959 printk(KERN_CRIT
"Suspend machine: Not enough free pages for highmem\n");
966 printk("swsusp: Need to copy %u pages\n", nr_copy_pages
);
968 error
= swsusp_alloc();
972 /* During allocating of suspend pagedir, new cold pages may appear.
979 * End of critical section. From now on, we can write to memory,
980 * but we should not touch disk. This specially means we must _not_
981 * touch swap space! Except we must write out our image of course.
984 printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages
);
989 /* It is important _NOT_ to umount filesystems at this point. We want
990 * them synced (in case something goes wrong) but we DO not want to mark
991 * filesystem clean: it is not. (And it does not matter, if we resume
992 * correctly, we'll mark system clean, anyway.)
994 int swsusp_write(void)
999 error
= write_suspend_image();
1000 /* This will unlock ignored swap devices since writing is finished */
1007 extern asmlinkage
int swsusp_arch_suspend(void);
1008 extern asmlinkage
int swsusp_arch_resume(void);
1011 asmlinkage
int swsusp_save(void)
1013 return suspend_prepare_image();
1016 int swsusp_suspend(void)
1019 if ((error
= arch_prepare_suspend()))
1021 local_irq_disable();
1022 /* At this point, device_suspend() has been called, but *not*
1023 * device_power_down(). We *must* device_power_down() now.
1024 * Otherwise, drivers for some devices (e.g. interrupt controllers)
1025 * become desynchronized with the actual state of the hardware
1026 * at resume time, and evil weirdness ensues.
1028 if ((error
= device_power_down(PMSG_FREEZE
))) {
1029 printk(KERN_ERR
"Some devices failed to power down, aborting suspend\n");
1034 if ((error
= swsusp_swap_check())) {
1035 printk(KERN_ERR
"swsusp: cannot find swap device, try swapon -a.\n");
1041 save_processor_state();
1042 if ((error
= swsusp_arch_suspend()))
1043 printk(KERN_ERR
"Error %d suspending\n", error
);
1044 /* Restore control flow magically appears here */
1045 restore_processor_state();
1046 BUG_ON (nr_copy_pages_check
!= nr_copy_pages
);
1053 int swsusp_resume(void)
1056 local_irq_disable();
1057 if (device_power_down(PMSG_FREEZE
))
1058 printk(KERN_ERR
"Some devices failed to power down, very bad\n");
1059 /* We'll ignore saved state, but this gets preempt count (etc) right */
1060 save_processor_state();
1061 error
= swsusp_arch_resume();
1062 /* Code below is only ever reached in case of failure. Otherwise
1063 * execution continues at place where swsusp_arch_suspend was called
1066 restore_processor_state();
1068 touch_softlockup_watchdog();
1075 * On resume, for storing the PBE list and the image,
1076 * we can only use memory pages that do not conflict with the pages
1077 * which had been used before suspend.
1079 * We don't know which pages are usable until we allocate them.
1081 * Allocated but unusable (ie eaten) memory pages are linked together
1082 * to create a list, so that we can free them easily
1084 * We could have used a type other than (void *)
1085 * for this purpose, but ...
1087 static void **eaten_memory
= NULL
;
1089 static inline void eat_page(void *page
)
1094 eaten_memory
= page
;
1098 unsigned long get_usable_page(unsigned gfp_mask
)
1102 m
= get_zeroed_page(gfp_mask
);
1103 while (!PageNosaveFree(virt_to_page(m
))) {
1104 eat_page((void *)m
);
1105 m
= get_zeroed_page(gfp_mask
);
1112 void free_eaten_memory(void)
1120 m
= (unsigned long)c
;
1125 eaten_memory
= NULL
;
1126 pr_debug("swsusp: %d unused pages freed\n", i
);
1130 * check_pagedir - We ensure here that pages that the PBEs point to
1131 * won't collide with pages where we're going to restore from the loaded
1135 static int check_pagedir(struct pbe
*pblist
)
1139 /* This is necessary, so that we can free allocated pages
1140 * in case of failure
1142 for_each_pbe (p
, pblist
)
1145 for_each_pbe (p
, pblist
) {
1146 p
->address
= get_usable_page(GFP_ATOMIC
);
1154 * swsusp_pagedir_relocate - It is possible, that some memory pages
1155 * occupied by the list of PBEs collide with pages where we're going to
1156 * restore from the loaded pages later. We relocate them here.
1159 static struct pbe
* swsusp_pagedir_relocate(struct pbe
*pblist
)
1162 unsigned long zone_pfn
;
1163 struct pbe
*pbpage
, *tail
, *p
;
1165 int rel
= 0, error
= 0;
1167 if (!pblist
) /* a sanity check */
1170 pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
1171 swsusp_info
.pagedir_pages
);
1173 /* Set page flags */
1175 for_each_zone (zone
) {
1176 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
)
1177 SetPageNosaveFree(pfn_to_page(zone_pfn
+
1178 zone
->zone_start_pfn
));
1181 /* Clear orig addresses */
1183 for_each_pbe (p
, pblist
)
1184 ClearPageNosaveFree(virt_to_page(p
->orig_address
));
1186 tail
= pblist
+ PB_PAGE_SKIP
;
1188 /* Relocate colliding pages */
1190 for_each_pb_page (pbpage
, pblist
) {
1191 if (!PageNosaveFree(virt_to_page((unsigned long)pbpage
))) {
1192 m
= (void *)get_usable_page(GFP_ATOMIC
| __GFP_COLD
);
1197 memcpy(m
, (void *)pbpage
, PAGE_SIZE
);
1198 if (pbpage
== pblist
)
1199 pblist
= (struct pbe
*)m
;
1201 tail
->next
= (struct pbe
*)m
;
1203 eat_page((void *)pbpage
);
1204 pbpage
= (struct pbe
*)m
;
1206 /* We have to link the PBEs again */
1208 for (p
= pbpage
; p
< pbpage
+ PB_PAGE_SKIP
; p
++)
1209 if (p
->next
) /* needed to save the end */
1214 tail
= pbpage
+ PB_PAGE_SKIP
;
1218 printk("\nswsusp: Out of memory\n\n");
1219 free_pagedir(pblist
);
1220 free_eaten_memory();
1222 /* Is this even worth handling? It should never ever happen, and we
1223 have just lost user's state, anyway... */
1225 printk("swsusp: Relocated %d pages\n", rel
);
1231 * Using bio to read from swap.
1232 * This code requires a bit more work than just using buffer heads
1233 * but, it is the recommended way for 2.5/2.6.
1234 * The following are to signal the beginning and end of I/O. Bios
1235 * finish asynchronously, while we want them to happen synchronously.
1236 * A simple atomic_t, and a wait loop take care of this problem.
1239 static atomic_t io_done
= ATOMIC_INIT(0);
1241 static int end_io(struct bio
* bio
, unsigned int num
, int err
)
1243 if (!test_bit(BIO_UPTODATE
, &bio
->bi_flags
))
1244 panic("I/O error reading memory image");
1245 atomic_set(&io_done
, 0);
1249 static struct block_device
* resume_bdev
;
1252 * submit - submit BIO request.
1253 * @rw: READ or WRITE.
1254 * @off physical offset of page.
1255 * @page: page we're reading or writing.
1257 * Straight from the textbook - allocate and initialize the bio.
1258 * If we're writing, make sure the page is marked as dirty.
1259 * Then submit it and wait.
1262 static int submit(int rw
, pgoff_t page_off
, void * page
)
1267 bio
= bio_alloc(GFP_ATOMIC
, 1);
1270 bio
->bi_sector
= page_off
* (PAGE_SIZE
>> 9);
1272 bio
->bi_bdev
= resume_bdev
;
1273 bio
->bi_end_io
= end_io
;
1275 if (bio_add_page(bio
, virt_to_page(page
), PAGE_SIZE
, 0) < PAGE_SIZE
) {
1276 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off
);
1282 bio_set_pages_dirty(bio
);
1284 atomic_set(&io_done
, 1);
1285 submit_bio(rw
| (1 << BIO_RW_SYNC
), bio
);
1286 while (atomic_read(&io_done
))
1294 static int bio_read_page(pgoff_t page_off
, void * page
)
1296 return submit(READ
, page_off
, page
);
1299 static int bio_write_page(pgoff_t page_off
, void * page
)
1301 return submit(WRITE
, page_off
, page
);
1305 * Sanity check if this image makes sense with this kernel/swap context
1306 * I really don't think that it's foolproof but more than nothing..
1309 static const char * sanity_check(void)
1312 if (swsusp_info
.version_code
!= LINUX_VERSION_CODE
)
1313 return "kernel version";
1314 if (swsusp_info
.num_physpages
!= num_physpages
)
1315 return "memory size";
1316 if (strcmp(swsusp_info
.uts
.sysname
,system_utsname
.sysname
))
1317 return "system type";
1318 if (strcmp(swsusp_info
.uts
.release
,system_utsname
.release
))
1319 return "kernel release";
1320 if (strcmp(swsusp_info
.uts
.version
,system_utsname
.version
))
1322 if (strcmp(swsusp_info
.uts
.machine
,system_utsname
.machine
))
1325 /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */
1326 if (swsusp_info
.cpus
!= num_possible_cpus())
1327 return "number of cpus";
1333 static int check_header(void)
1335 const char * reason
= NULL
;
1338 if ((error
= bio_read_page(swp_offset(swsusp_header
.swsusp_info
), &swsusp_info
)))
1341 /* Is this same machine? */
1342 if ((reason
= sanity_check())) {
1343 printk(KERN_ERR
"swsusp: Resume mismatch: %s\n",reason
);
1346 nr_copy_pages
= swsusp_info
.image_pages
;
1350 static int check_sig(void)
1354 memset(&swsusp_header
, 0, sizeof(swsusp_header
));
1355 if ((error
= bio_read_page(0, &swsusp_header
)))
1357 if (!memcmp(SWSUSP_SIG
, swsusp_header
.sig
, 10)) {
1358 memcpy(swsusp_header
.sig
, swsusp_header
.orig_sig
, 10);
1359 memcpy(key_iv
, swsusp_header
.key_iv
, MAXKEY
+MAXIV
);
1360 memset(swsusp_header
.key_iv
, 0, MAXKEY
+MAXIV
);
1363 * Reset swap signature now.
1365 error
= bio_write_page(0, &swsusp_header
);
1370 pr_debug("swsusp: Signature found, resuming\n");
1375 * data_read - Read image pages from swap.
1377 * You do not need to check for overlaps, check_pagedir()
1381 static int data_read(struct pbe
*pblist
)
1386 int mod
= swsusp_info
.image_pages
/ 100;
1389 if ((error
= crypto_init(0, &tfm
)))
1395 printk("swsusp: Reading image data (%lu pages): ",
1396 swsusp_info
.image_pages
);
1398 for_each_pbe (p
, pblist
) {
1400 printk("\b\b\b\b%3d%%", i
/ mod
);
1402 if ((error
= crypto_read(p
, tfm
))) {
1409 printk("\b\b\b\bdone\n");
1415 * read_pagedir - Read page backup list pages from swap
1418 static int read_pagedir(struct pbe
*pblist
)
1420 struct pbe
*pbpage
, *p
;
1427 printk("swsusp: Reading pagedir (%lu pages)\n",
1428 swsusp_info
.pagedir_pages
);
1430 for_each_pb_page (pbpage
, pblist
) {
1431 unsigned long offset
= swp_offset(swsusp_info
.pagedir
[i
++]);
1435 p
= (pbpage
+ PB_PAGE_SKIP
)->next
;
1436 error
= bio_read_page(offset
, (void *)pbpage
);
1437 (pbpage
+ PB_PAGE_SKIP
)->next
= p
;
1444 free_pagedir(pblist
);
1446 BUG_ON(i
!= swsusp_info
.pagedir_pages
);
1452 static int check_suspend_image(void)
1456 if ((error
= check_sig()))
1459 if ((error
= check_header()))
1465 static int read_suspend_image(void)
1470 if (!(p
= alloc_pagedir(nr_copy_pages
)))
1473 if ((error
= read_pagedir(p
)))
1476 create_pbe_list(p
, nr_copy_pages
);
1478 if (!(pagedir_nosave
= swsusp_pagedir_relocate(p
)))
1481 /* Allocate memory for the image and read the data from swap */
1483 error
= check_pagedir(pagedir_nosave
);
1486 error
= data_read(pagedir_nosave
);
1488 if (error
) { /* We fail cleanly */
1489 free_eaten_memory();
1490 for_each_pbe (p
, pagedir_nosave
)
1492 free_page(p
->address
);
1495 free_pagedir(pagedir_nosave
);
1501 * swsusp_check - Check for saved image in swap
1504 int swsusp_check(void)
1508 resume_bdev
= open_by_devnum(swsusp_resume_device
, FMODE_READ
);
1509 if (!IS_ERR(resume_bdev
)) {
1510 set_blocksize(resume_bdev
, PAGE_SIZE
);
1511 error
= check_suspend_image();
1513 blkdev_put(resume_bdev
);
1515 error
= PTR_ERR(resume_bdev
);
1518 pr_debug("swsusp: resume file found\n");
1520 pr_debug("swsusp: Error %d check for resume file\n", error
);
1525 * swsusp_read - Read saved image from swap.
1528 int swsusp_read(void)
1532 if (IS_ERR(resume_bdev
)) {
1533 pr_debug("swsusp: block device not initialised\n");
1534 return PTR_ERR(resume_bdev
);
1537 error
= read_suspend_image();
1538 blkdev_put(resume_bdev
);
1539 memset(key_iv
, 0, MAXKEY
+MAXIV
);
1542 pr_debug("swsusp: Reading resume file was successful\n");
1544 pr_debug("swsusp: Error %d resuming\n", error
);
1549 * swsusp_close - close swap device.
1552 void swsusp_close(void)
1554 if (IS_ERR(resume_bdev
)) {
1555 pr_debug("swsusp: block device not initialised\n");
1559 blkdev_put(resume_bdev
);