2 * linux/kernel/power/snapshot.c
4 * This file provide system snapshot/restore functionality.
6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
8 * This file is released under the GPLv2, and is based on swsusp.c.
13 #include <linux/version.h>
14 #include <linux/module.h>
16 #include <linux/suspend.h>
17 #include <linux/smp_lock.h>
18 #include <linux/delay.h>
19 #include <linux/bitops.h>
20 #include <linux/spinlock.h>
21 #include <linux/kernel.h>
23 #include <linux/device.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
29 #include <asm/uaccess.h>
30 #include <asm/mmu_context.h>
31 #include <asm/pgtable.h>
32 #include <asm/tlbflush.h>
37 /* List of PBEs used for creating and restoring the suspend image */
38 struct pbe
*restore_pblist
;
40 static unsigned int nr_copy_pages
;
41 static unsigned int nr_meta_pages
;
42 static unsigned long *buffer
;
45 unsigned int count_highmem_pages(void)
48 unsigned long zone_pfn
;
52 if (is_highmem(zone
)) {
53 mark_free_pages(zone
);
54 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; zone_pfn
++) {
56 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
59 page
= pfn_to_page(pfn
);
60 if (PageReserved(page
))
62 if (PageNosaveFree(page
))
73 struct highmem_page
*next
;
76 static struct highmem_page
*highmem_copy
;
78 static int save_highmem_zone(struct zone
*zone
)
80 unsigned long zone_pfn
;
81 mark_free_pages(zone
);
82 for (zone_pfn
= 0; zone_pfn
< zone
->spanned_pages
; ++zone_pfn
) {
84 struct highmem_page
*save
;
86 unsigned long pfn
= zone_pfn
+ zone
->zone_start_pfn
;
92 page
= pfn_to_page(pfn
);
94 * This condition results from rvmalloc() sans vmalloc_32()
95 * and architectural memory reservations. This should be
96 * corrected eventually when the cases giving rise to this
97 * are better understood.
99 if (PageReserved(page
))
101 BUG_ON(PageNosave(page
));
102 if (PageNosaveFree(page
))
104 save
= kmalloc(sizeof(struct highmem_page
), GFP_ATOMIC
);
107 save
->next
= highmem_copy
;
109 save
->data
= (void *) get_zeroed_page(GFP_ATOMIC
);
114 kaddr
= kmap_atomic(page
, KM_USER0
);
115 memcpy(save
->data
, kaddr
, PAGE_SIZE
);
116 kunmap_atomic(kaddr
, KM_USER0
);
122 int save_highmem(void)
127 pr_debug("swsusp: Saving Highmem");
129 for_each_zone (zone
) {
130 if (is_highmem(zone
))
131 res
= save_highmem_zone(zone
);
139 int restore_highmem(void)
141 printk("swsusp: Restoring Highmem\n");
142 while (highmem_copy
) {
143 struct highmem_page
*save
= highmem_copy
;
145 highmem_copy
= save
->next
;
147 kaddr
= kmap_atomic(save
->page
, KM_USER0
);
148 memcpy(kaddr
, save
->data
, PAGE_SIZE
);
149 kunmap_atomic(kaddr
, KM_USER0
);
150 free_page((long) save
->data
);
156 static inline unsigned int count_highmem_pages(void) {return 0;}
157 static inline int save_highmem(void) {return 0;}
158 static inline int restore_highmem(void) {return 0;}
162 * @safe_needed - on resume, for storing the PBE list and the image,
163 * we can only use memory pages that do not conflict with the pages
164 * used before suspend.
166 * The unsafe pages are marked with the PG_nosave_free flag
167 * and we count them using unsafe_pages
172 #define PG_UNSAFE_CLEAR 1
173 #define PG_UNSAFE_KEEP 0
175 static unsigned int unsafe_pages
;
177 static void *alloc_image_page(gfp_t gfp_mask
, int safe_needed
)
181 res
= (void *)get_zeroed_page(gfp_mask
);
183 while (res
&& PageNosaveFree(virt_to_page(res
))) {
184 /* The page is unsafe, mark it for swsusp_free() */
185 SetPageNosave(virt_to_page(res
));
187 res
= (void *)get_zeroed_page(gfp_mask
);
190 SetPageNosave(virt_to_page(res
));
191 SetPageNosaveFree(virt_to_page(res
));
196 unsigned long get_safe_page(gfp_t gfp_mask
)
198 return (unsigned long)alloc_image_page(gfp_mask
, PG_SAFE
);
202 * free_image_page - free page represented by @addr, allocated with
203 * alloc_image_page (page flags set by it must be cleared)
206 static inline void free_image_page(void *addr
, int clear_nosave_free
)
208 ClearPageNosave(virt_to_page(addr
));
209 if (clear_nosave_free
)
210 ClearPageNosaveFree(virt_to_page(addr
));
211 free_page((unsigned long)addr
);
215 * pfn_is_nosave - check if given pfn is in the 'nosave' section
218 static inline int pfn_is_nosave(unsigned long pfn
)
220 unsigned long nosave_begin_pfn
= __pa(&__nosave_begin
) >> PAGE_SHIFT
;
221 unsigned long nosave_end_pfn
= PAGE_ALIGN(__pa(&__nosave_end
)) >> PAGE_SHIFT
;
222 return (pfn
>= nosave_begin_pfn
) && (pfn
< nosave_end_pfn
);
226 * saveable - Determine whether a page should be cloned or not.
229 * We save a page if it isn't Nosave, and is not in the range of pages
230 * statically defined as 'unsaveable', and it
231 * isn't a part of a free chunk of pages.
234 static struct page
*saveable_page(unsigned long pfn
)
241 page
= pfn_to_page(pfn
);
243 if (PageNosave(page
))
245 if (PageReserved(page
) && pfn_is_nosave(pfn
))
247 if (PageNosaveFree(page
))
253 unsigned int count_data_pages(void)
256 unsigned long pfn
, max_zone_pfn
;
259 for_each_zone (zone
) {
260 if (is_highmem(zone
))
262 mark_free_pages(zone
);
263 max_zone_pfn
= zone
->zone_start_pfn
+ zone
->spanned_pages
;
264 for (pfn
= zone
->zone_start_pfn
; pfn
< max_zone_pfn
; pfn
++)
265 n
+= !!saveable_page(pfn
);
270 static inline void copy_data_page(long *dst
, long *src
)
274 /* copy_page and memcpy are not usable for copying task structs. */
275 for (n
= PAGE_SIZE
/ sizeof(long); n
; n
--)
279 static void copy_data_pages(struct pbe
*pblist
)
282 unsigned long pfn
, max_zone_pfn
;
286 for_each_zone (zone
) {
287 if (is_highmem(zone
))
289 mark_free_pages(zone
);
290 max_zone_pfn
= zone
->zone_start_pfn
+ zone
->spanned_pages
;
291 for (pfn
= zone
->zone_start_pfn
; pfn
< max_zone_pfn
; pfn
++) {
292 struct page
*page
= saveable_page(pfn
);
295 void *ptr
= page_address(page
);
298 copy_data_page((void *)pbe
->address
, ptr
);
299 pbe
->orig_address
= (unsigned long)ptr
;
308 * free_pagedir - free pages allocated with alloc_pagedir()
311 static void free_pagedir(struct pbe
*pblist
, int clear_nosave_free
)
316 pbe
= (pblist
+ PB_PAGE_SKIP
)->next
;
317 free_image_page(pblist
, clear_nosave_free
);
323 * fill_pb_page - Create a list of PBEs on a given memory page
326 static inline void fill_pb_page(struct pbe
*pbpage
, unsigned int n
)
334 while (++p
< pbpage
);
338 * create_pbe_list - Create a list of PBEs on top of a given chain
339 * of memory pages allocated with alloc_pagedir()
341 * This function assumes that pages allocated by alloc_image_page() will
345 static inline void create_pbe_list(struct pbe
*pblist
, unsigned int nr_pages
)
348 unsigned int num
= PBES_PER_PAGE
;
350 for_each_pb_page (pbpage
, pblist
) {
354 fill_pb_page(pbpage
, PBES_PER_PAGE
);
355 num
+= PBES_PER_PAGE
;
358 num
-= PBES_PER_PAGE
;
359 fill_pb_page(pbpage
, nr_pages
- num
);
364 * alloc_pagedir - Allocate the page directory.
366 * First, determine exactly how many pages we need and
369 * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
370 * struct pbe elements (pbes) and the last element in the page points
373 * On each page we set up a list of struct_pbe elements.
376 static struct pbe
*alloc_pagedir(unsigned int nr_pages
, gfp_t gfp_mask
,
380 struct pbe
*pblist
, *pbe
;
385 pblist
= alloc_image_page(gfp_mask
, safe_needed
);
387 for (num
= PBES_PER_PAGE
; num
< nr_pages
; num
+= PBES_PER_PAGE
) {
389 free_pagedir(pblist
, PG_UNSAFE_CLEAR
);
393 pbe
->next
= alloc_image_page(gfp_mask
, safe_needed
);
396 create_pbe_list(pblist
, nr_pages
);
401 * Free pages we allocated for suspend. Suspend pages are alocated
402 * before atomic copy, so we need to free them after resume.
405 void swsusp_free(void)
408 unsigned long pfn
, max_zone_pfn
;
410 for_each_zone(zone
) {
411 max_zone_pfn
= zone
->zone_start_pfn
+ zone
->spanned_pages
;
412 for (pfn
= zone
->zone_start_pfn
; pfn
< max_zone_pfn
; pfn
++)
413 if (pfn_valid(pfn
)) {
414 struct page
*page
= pfn_to_page(pfn
);
416 if (PageNosave(page
) && PageNosaveFree(page
)) {
417 ClearPageNosave(page
);
418 ClearPageNosaveFree(page
);
419 free_page((long) page_address(page
));
425 restore_pblist
= NULL
;
431 * enough_free_mem - Make sure we enough free memory to snapshot.
433 * Returns TRUE or FALSE after checking the number of available
437 static int enough_free_mem(unsigned int nr_pages
)
443 if (!is_highmem(zone
))
444 n
+= zone
->free_pages
;
445 pr_debug("swsusp: available memory: %u pages\n", n
);
446 return n
> (nr_pages
+ PAGES_FOR_IO
+
447 (nr_pages
+ PBES_PER_PAGE
- 1) / PBES_PER_PAGE
);
450 static int alloc_data_pages(struct pbe
*pblist
, gfp_t gfp_mask
, int safe_needed
)
454 for_each_pbe (p
, pblist
) {
455 p
->address
= (unsigned long)alloc_image_page(gfp_mask
, safe_needed
);
462 static struct pbe
*swsusp_alloc(unsigned int nr_pages
)
466 pblist
= alloc_pagedir(nr_pages
, GFP_ATOMIC
| __GFP_COLD
, PG_ANY
);
468 printk(KERN_ERR
"suspend: Allocating pagedir failed.\n");
472 if (alloc_data_pages(pblist
, GFP_ATOMIC
| __GFP_COLD
, PG_ANY
)) {
473 printk(KERN_ERR
"suspend: Allocating image pages failed.\n");
481 asmlinkage
int swsusp_save(void)
483 unsigned int nr_pages
;
485 pr_debug("swsusp: critical section: \n");
488 nr_pages
= count_data_pages();
489 printk("swsusp: Need to copy %u pages\n", nr_pages
);
491 pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
493 (nr_pages
+ PBES_PER_PAGE
- 1) / PBES_PER_PAGE
,
494 PAGES_FOR_IO
, nr_free_pages());
496 if (!enough_free_mem(nr_pages
)) {
497 printk(KERN_ERR
"swsusp: Not enough free memory\n");
501 restore_pblist
= swsusp_alloc(nr_pages
);
505 /* During allocating of suspend pagedir, new cold pages may appear.
509 copy_data_pages(restore_pblist
);
512 * End of critical section. From now on, we can write to memory,
513 * but we should not touch disk. This specially means we must _not_
514 * touch swap space! Except we must write out our image of course.
517 nr_copy_pages
= nr_pages
;
518 nr_meta_pages
= (nr_pages
* sizeof(long) + PAGE_SIZE
- 1) >> PAGE_SHIFT
;
520 printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages
);
524 static void init_header(struct swsusp_info
*info
)
526 memset(info
, 0, sizeof(struct swsusp_info
));
527 info
->version_code
= LINUX_VERSION_CODE
;
528 info
->num_physpages
= num_physpages
;
529 memcpy(&info
->uts
, &system_utsname
, sizeof(system_utsname
));
530 info
->cpus
= num_online_cpus();
531 info
->image_pages
= nr_copy_pages
;
532 info
->pages
= nr_copy_pages
+ nr_meta_pages
+ 1;
533 info
->size
= info
->pages
;
534 info
->size
<<= PAGE_SHIFT
;
538 * pack_orig_addresses - the .orig_address fields of the PBEs from the
539 * list starting at @pbe are stored in the array @buf[] (1 page)
542 static inline struct pbe
*pack_orig_addresses(unsigned long *buf
, struct pbe
*pbe
)
546 for (j
= 0; j
< PAGE_SIZE
/ sizeof(long) && pbe
; j
++) {
547 buf
[j
] = pbe
->orig_address
;
551 for (; j
< PAGE_SIZE
/ sizeof(long); j
++)
557 * snapshot_read_next - used for reading the system memory snapshot.
559 * On the first call to it @handle should point to a zeroed
560 * snapshot_handle structure. The structure gets updated and a pointer
561 * to it should be passed to this function every next time.
563 * The @count parameter should contain the number of bytes the caller
564 * wants to read from the snapshot. It must not be zero.
566 * On success the function returns a positive number. Then, the caller
567 * is allowed to read up to the returned number of bytes from the memory
568 * location computed by the data_of() macro. The number returned
569 * may be smaller than @count, but this only happens if the read would
570 * cross a page boundary otherwise.
572 * The function returns 0 to indicate the end of data stream condition,
573 * and a negative number is returned on error. In such cases the
574 * structure pointed to by @handle is not updated and should not be used
578 int snapshot_read_next(struct snapshot_handle
*handle
, size_t count
)
580 if (handle
->cur
> nr_meta_pages
+ nr_copy_pages
)
583 /* This makes the buffer be freed by swsusp_free() */
584 buffer
= alloc_image_page(GFP_ATOMIC
, PG_ANY
);
588 if (!handle
->offset
) {
589 init_header((struct swsusp_info
*)buffer
);
590 handle
->buffer
= buffer
;
591 handle
->pbe
= restore_pblist
;
593 if (handle
->prev
< handle
->cur
) {
594 if (handle
->cur
<= nr_meta_pages
) {
595 handle
->pbe
= pack_orig_addresses(buffer
, handle
->pbe
);
597 handle
->pbe
= restore_pblist
;
599 handle
->buffer
= (void *)handle
->pbe
->address
;
600 handle
->pbe
= handle
->pbe
->next
;
602 handle
->prev
= handle
->cur
;
604 handle
->buf_offset
= handle
->cur_offset
;
605 if (handle
->cur_offset
+ count
>= PAGE_SIZE
) {
606 count
= PAGE_SIZE
- handle
->cur_offset
;
607 handle
->cur_offset
= 0;
610 handle
->cur_offset
+= count
;
612 handle
->offset
+= count
;
617 * mark_unsafe_pages - mark the pages that cannot be used for storing
618 * the image during resume, because they conflict with the pages that
619 * had been used before suspend
622 static int mark_unsafe_pages(struct pbe
*pblist
)
625 unsigned long pfn
, max_zone_pfn
;
628 if (!pblist
) /* a sanity check */
631 /* Clear page flags */
632 for_each_zone (zone
) {
633 max_zone_pfn
= zone
->zone_start_pfn
+ zone
->spanned_pages
;
634 for (pfn
= zone
->zone_start_pfn
; pfn
< max_zone_pfn
; pfn
++)
636 ClearPageNosaveFree(pfn_to_page(pfn
));
639 /* Mark orig addresses */
640 for_each_pbe (p
, pblist
) {
641 if (virt_addr_valid(p
->orig_address
))
642 SetPageNosaveFree(virt_to_page(p
->orig_address
));
652 static void copy_page_backup_list(struct pbe
*dst
, struct pbe
*src
)
654 /* We assume both lists contain the same number of elements */
656 dst
->orig_address
= src
->orig_address
;
662 static int check_header(struct swsusp_info
*info
)
666 if (info
->version_code
!= LINUX_VERSION_CODE
)
667 reason
= "kernel version";
668 if (info
->num_physpages
!= num_physpages
)
669 reason
= "memory size";
670 if (strcmp(info
->uts
.sysname
,system_utsname
.sysname
))
671 reason
= "system type";
672 if (strcmp(info
->uts
.release
,system_utsname
.release
))
673 reason
= "kernel release";
674 if (strcmp(info
->uts
.version
,system_utsname
.version
))
676 if (strcmp(info
->uts
.machine
,system_utsname
.machine
))
679 printk(KERN_ERR
"swsusp: Resume mismatch: %s\n", reason
);
686 * load header - check the image header and copy data from it
689 static int load_header(struct snapshot_handle
*handle
,
690 struct swsusp_info
*info
)
695 error
= check_header(info
);
697 pblist
= alloc_pagedir(info
->image_pages
, GFP_ATOMIC
, PG_ANY
);
700 restore_pblist
= pblist
;
701 handle
->pbe
= pblist
;
702 nr_copy_pages
= info
->image_pages
;
703 nr_meta_pages
= info
->pages
- info
->image_pages
- 1;
709 * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
710 * the PBEs in the list starting at @pbe
713 static inline struct pbe
*unpack_orig_addresses(unsigned long *buf
,
718 for (j
= 0; j
< PAGE_SIZE
/ sizeof(long) && pbe
; j
++) {
719 pbe
->orig_address
= buf
[j
];
726 * prepare_image - use metadata contained in the PBE list
727 * pointed to by restore_pblist to mark the pages that will
728 * be overwritten in the process of restoring the system
729 * memory state from the image ("unsafe" pages) and allocate
730 * memory for the image
732 * The idea is to allocate the PBE list first and then
733 * allocate as many pages as it's needed for the image data,
734 * but not to assign these pages to the PBEs initially.
735 * Instead, we just mark them as allocated and create a list
736 * of "safe" which will be used later
740 struct safe_page
*next
;
741 char padding
[PAGE_SIZE
- sizeof(void *)];
744 static struct safe_page
*safe_pages
;
746 static int prepare_image(struct snapshot_handle
*handle
)
749 unsigned int nr_pages
= nr_copy_pages
;
750 struct pbe
*p
, *pblist
= NULL
;
753 error
= mark_unsafe_pages(p
);
755 pblist
= alloc_pagedir(nr_pages
, GFP_ATOMIC
, PG_SAFE
);
757 copy_page_backup_list(pblist
, p
);
758 free_pagedir(p
, PG_UNSAFE_KEEP
);
763 if (!error
&& nr_pages
> unsafe_pages
) {
764 nr_pages
-= unsafe_pages
;
766 struct safe_page
*ptr
;
768 ptr
= (struct safe_page
*)get_zeroed_page(GFP_ATOMIC
);
773 if (!PageNosaveFree(virt_to_page(ptr
))) {
774 /* The page is "safe", add it to the list */
775 ptr
->next
= safe_pages
;
778 /* Mark the page as allocated */
779 SetPageNosave(virt_to_page(ptr
));
780 SetPageNosaveFree(virt_to_page(ptr
));
784 restore_pblist
= pblist
;
792 static void *get_buffer(struct snapshot_handle
*handle
)
794 struct pbe
*pbe
= handle
->pbe
, *last
= handle
->last_pbe
;
795 struct page
*page
= virt_to_page(pbe
->orig_address
);
797 if (PageNosave(page
) && PageNosaveFree(page
)) {
799 * We have allocated the "original" page frame and we can
800 * use it directly to store the read page
803 if (last
&& last
->next
)
805 return (void *)pbe
->orig_address
;
808 * The "original" page frame has not been allocated and we have to
809 * use a "safe" page frame to store the read page
811 pbe
->address
= (unsigned long)safe_pages
;
812 safe_pages
= safe_pages
->next
;
815 handle
->last_pbe
= pbe
;
816 return (void *)pbe
->address
;
820 * snapshot_write_next - used for writing the system memory snapshot.
822 * On the first call to it @handle should point to a zeroed
823 * snapshot_handle structure. The structure gets updated and a pointer
824 * to it should be passed to this function every next time.
826 * The @count parameter should contain the number of bytes the caller
827 * wants to write to the image. It must not be zero.
829 * On success the function returns a positive number. Then, the caller
830 * is allowed to write up to the returned number of bytes to the memory
831 * location computed by the data_of() macro. The number returned
832 * may be smaller than @count, but this only happens if the write would
833 * cross a page boundary otherwise.
835 * The function returns 0 to indicate the "end of file" condition,
836 * and a negative number is returned on error. In such cases the
837 * structure pointed to by @handle is not updated and should not be used
841 int snapshot_write_next(struct snapshot_handle
*handle
, size_t count
)
845 if (handle
->prev
&& handle
->cur
> nr_meta_pages
+ nr_copy_pages
)
848 /* This makes the buffer be freed by swsusp_free() */
849 buffer
= alloc_image_page(GFP_ATOMIC
, PG_ANY
);
854 handle
->buffer
= buffer
;
855 handle
->sync_read
= 1;
856 if (handle
->prev
< handle
->cur
) {
858 error
= load_header(handle
,
859 (struct swsusp_info
*)buffer
);
862 } else if (handle
->prev
<= nr_meta_pages
) {
863 handle
->pbe
= unpack_orig_addresses(buffer
,
866 error
= prepare_image(handle
);
869 handle
->pbe
= restore_pblist
;
870 handle
->last_pbe
= NULL
;
871 handle
->buffer
= get_buffer(handle
);
872 handle
->sync_read
= 0;
875 handle
->pbe
= handle
->pbe
->next
;
876 handle
->buffer
= get_buffer(handle
);
877 handle
->sync_read
= 0;
879 handle
->prev
= handle
->cur
;
881 handle
->buf_offset
= handle
->cur_offset
;
882 if (handle
->cur_offset
+ count
>= PAGE_SIZE
) {
883 count
= PAGE_SIZE
- handle
->cur_offset
;
884 handle
->cur_offset
= 0;
887 handle
->cur_offset
+= count
;
889 handle
->offset
+= count
;
893 int snapshot_image_loaded(struct snapshot_handle
*handle
)
895 return !(!handle
->pbe
|| handle
->pbe
->next
|| !nr_copy_pages
||
896 handle
->cur
<= nr_meta_pages
+ nr_copy_pages
);