2 * kexec.c - kexec system call
3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
9 #include <linux/capability.h>
11 #include <linux/file.h>
12 #include <linux/slab.h>
14 #include <linux/kexec.h>
15 #include <linux/spinlock.h>
16 #include <linux/list.h>
17 #include <linux/highmem.h>
18 #include <linux/syscalls.h>
19 #include <linux/reboot.h>
20 #include <linux/syscalls.h>
21 #include <linux/ioport.h>
22 #include <linux/hardirq.h>
23 #include <linux/elf.h>
24 #include <linux/elfcore.h>
27 #include <asm/uaccess.h>
29 #include <asm/system.h>
30 #include <asm/semaphore.h>
32 /* Per cpu memory for storing cpu states in case of system crash. */
33 note_buf_t
* crash_notes
;
35 /* Location of the reserved area for the crash kernel */
36 struct resource crashk_res
= {
37 .name
= "Crash kernel",
40 .flags
= IORESOURCE_BUSY
| IORESOURCE_MEM
43 int kexec_should_crash(struct task_struct
*p
)
45 if (in_interrupt() || !p
->pid
|| is_init(p
) || panic_on_oops
)
51 * When kexec transitions to the new kernel there is a one-to-one
52 * mapping between physical and virtual addresses. On processors
53 * where you can disable the MMU this is trivial, and easy. For
54 * others it is still a simple predictable page table to setup.
56 * In that environment kexec copies the new kernel to its final
57 * resting place. This means I can only support memory whose
58 * physical address can fit in an unsigned long. In particular
59 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
60 * If the assembly stub has more restrictive requirements
61 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
62 * defined more restrictively in <asm/kexec.h>.
64 * The code for the transition from the current kernel to the
65 * the new kernel is placed in the control_code_buffer, whose size
66 * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
67 * page of memory is necessary, but some architectures require more.
68 * Because this memory must be identity mapped in the transition from
69 * virtual to physical addresses it must live in the range
70 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
73 * The assembly stub in the control code buffer is passed a linked list
74 * of descriptor pages detailing the source pages of the new kernel,
75 * and the destination addresses of those source pages. As this data
76 * structure is not used in the context of the current OS, it must
79 * The code has been made to work with highmem pages and will use a
80 * destination page in its final resting place (if it happens
81 * to allocate it). The end product of this is that most of the
82 * physical address space, and most of RAM can be used.
84 * Future directions include:
85 * - allocating a page table with the control code buffer identity
86 * mapped, to simplify machine_kexec and make kexec_on_panic more
91 * KIMAGE_NO_DEST is an impossible destination address..., for
92 * allocating pages whose destination address we do not care about.
94 #define KIMAGE_NO_DEST (-1UL)
96 static int kimage_is_destination_range(struct kimage
*image
,
97 unsigned long start
, unsigned long end
);
98 static struct page
*kimage_alloc_page(struct kimage
*image
,
102 static int do_kimage_alloc(struct kimage
**rimage
, unsigned long entry
,
103 unsigned long nr_segments
,
104 struct kexec_segment __user
*segments
)
106 size_t segment_bytes
;
107 struct kimage
*image
;
111 /* Allocate a controlling structure */
113 image
= kzalloc(sizeof(*image
), GFP_KERNEL
);
118 image
->entry
= &image
->head
;
119 image
->last_entry
= &image
->head
;
120 image
->control_page
= ~0; /* By default this does not apply */
121 image
->start
= entry
;
122 image
->type
= KEXEC_TYPE_DEFAULT
;
124 /* Initialize the list of control pages */
125 INIT_LIST_HEAD(&image
->control_pages
);
127 /* Initialize the list of destination pages */
128 INIT_LIST_HEAD(&image
->dest_pages
);
130 /* Initialize the list of unuseable pages */
131 INIT_LIST_HEAD(&image
->unuseable_pages
);
133 /* Read in the segments */
134 image
->nr_segments
= nr_segments
;
135 segment_bytes
= nr_segments
* sizeof(*segments
);
136 result
= copy_from_user(image
->segment
, segments
, segment_bytes
);
141 * Verify we have good destination addresses. The caller is
142 * responsible for making certain we don't attempt to load
143 * the new image into invalid or reserved areas of RAM. This
144 * just verifies it is an address we can use.
146 * Since the kernel does everything in page size chunks ensure
147 * the destination addreses are page aligned. Too many
148 * special cases crop of when we don't do this. The most
149 * insidious is getting overlapping destination addresses
150 * simply because addresses are changed to page size
153 result
= -EADDRNOTAVAIL
;
154 for (i
= 0; i
< nr_segments
; i
++) {
155 unsigned long mstart
, mend
;
157 mstart
= image
->segment
[i
].mem
;
158 mend
= mstart
+ image
->segment
[i
].memsz
;
159 if ((mstart
& ~PAGE_MASK
) || (mend
& ~PAGE_MASK
))
161 if (mend
>= KEXEC_DESTINATION_MEMORY_LIMIT
)
165 /* Verify our destination addresses do not overlap.
166 * If we alloed overlapping destination addresses
167 * through very weird things can happen with no
168 * easy explanation as one segment stops on another.
171 for (i
= 0; i
< nr_segments
; i
++) {
172 unsigned long mstart
, mend
;
175 mstart
= image
->segment
[i
].mem
;
176 mend
= mstart
+ image
->segment
[i
].memsz
;
177 for (j
= 0; j
< i
; j
++) {
178 unsigned long pstart
, pend
;
179 pstart
= image
->segment
[j
].mem
;
180 pend
= pstart
+ image
->segment
[j
].memsz
;
181 /* Do the segments overlap ? */
182 if ((mend
> pstart
) && (mstart
< pend
))
187 /* Ensure our buffer sizes are strictly less than
188 * our memory sizes. This should always be the case,
189 * and it is easier to check up front than to be surprised
193 for (i
= 0; i
< nr_segments
; i
++) {
194 if (image
->segment
[i
].bufsz
> image
->segment
[i
].memsz
)
209 static int kimage_normal_alloc(struct kimage
**rimage
, unsigned long entry
,
210 unsigned long nr_segments
,
211 struct kexec_segment __user
*segments
)
214 struct kimage
*image
;
216 /* Allocate and initialize a controlling structure */
218 result
= do_kimage_alloc(&image
, entry
, nr_segments
, segments
);
225 * Find a location for the control code buffer, and add it
226 * the vector of segments so that it's pages will also be
227 * counted as destination pages.
230 image
->control_code_page
= kimage_alloc_control_pages(image
,
231 get_order(KEXEC_CONTROL_CODE_SIZE
));
232 if (!image
->control_code_page
) {
233 printk(KERN_ERR
"Could not allocate control_code_buffer\n");
247 static int kimage_crash_alloc(struct kimage
**rimage
, unsigned long entry
,
248 unsigned long nr_segments
,
249 struct kexec_segment __user
*segments
)
252 struct kimage
*image
;
256 /* Verify we have a valid entry point */
257 if ((entry
< crashk_res
.start
) || (entry
> crashk_res
.end
)) {
258 result
= -EADDRNOTAVAIL
;
262 /* Allocate and initialize a controlling structure */
263 result
= do_kimage_alloc(&image
, entry
, nr_segments
, segments
);
267 /* Enable the special crash kernel control page
270 image
->control_page
= crashk_res
.start
;
271 image
->type
= KEXEC_TYPE_CRASH
;
274 * Verify we have good destination addresses. Normally
275 * the caller is responsible for making certain we don't
276 * attempt to load the new image into invalid or reserved
277 * areas of RAM. But crash kernels are preloaded into a
278 * reserved area of ram. We must ensure the addresses
279 * are in the reserved area otherwise preloading the
280 * kernel could corrupt things.
282 result
= -EADDRNOTAVAIL
;
283 for (i
= 0; i
< nr_segments
; i
++) {
284 unsigned long mstart
, mend
;
286 mstart
= image
->segment
[i
].mem
;
287 mend
= mstart
+ image
->segment
[i
].memsz
- 1;
288 /* Ensure we are within the crash kernel limits */
289 if ((mstart
< crashk_res
.start
) || (mend
> crashk_res
.end
))
294 * Find a location for the control code buffer, and add
295 * the vector of segments so that it's pages will also be
296 * counted as destination pages.
299 image
->control_code_page
= kimage_alloc_control_pages(image
,
300 get_order(KEXEC_CONTROL_CODE_SIZE
));
301 if (!image
->control_code_page
) {
302 printk(KERN_ERR
"Could not allocate control_code_buffer\n");
316 static int kimage_is_destination_range(struct kimage
*image
,
322 for (i
= 0; i
< image
->nr_segments
; i
++) {
323 unsigned long mstart
, mend
;
325 mstart
= image
->segment
[i
].mem
;
326 mend
= mstart
+ image
->segment
[i
].memsz
;
327 if ((end
> mstart
) && (start
< mend
))
334 static struct page
*kimage_alloc_pages(gfp_t gfp_mask
, unsigned int order
)
338 pages
= alloc_pages(gfp_mask
, order
);
340 unsigned int count
, i
;
341 pages
->mapping
= NULL
;
342 set_page_private(pages
, order
);
344 for (i
= 0; i
< count
; i
++)
345 SetPageReserved(pages
+ i
);
351 static void kimage_free_pages(struct page
*page
)
353 unsigned int order
, count
, i
;
355 order
= page_private(page
);
357 for (i
= 0; i
< count
; i
++)
358 ClearPageReserved(page
+ i
);
359 __free_pages(page
, order
);
362 static void kimage_free_page_list(struct list_head
*list
)
364 struct list_head
*pos
, *next
;
366 list_for_each_safe(pos
, next
, list
) {
369 page
= list_entry(pos
, struct page
, lru
);
370 list_del(&page
->lru
);
371 kimage_free_pages(page
);
375 static struct page
*kimage_alloc_normal_control_pages(struct kimage
*image
,
378 /* Control pages are special, they are the intermediaries
379 * that are needed while we copy the rest of the pages
380 * to their final resting place. As such they must
381 * not conflict with either the destination addresses
382 * or memory the kernel is already using.
384 * The only case where we really need more than one of
385 * these are for architectures where we cannot disable
386 * the MMU and must instead generate an identity mapped
387 * page table for all of the memory.
389 * At worst this runs in O(N) of the image size.
391 struct list_head extra_pages
;
396 INIT_LIST_HEAD(&extra_pages
);
398 /* Loop while I can allocate a page and the page allocated
399 * is a destination page.
402 unsigned long pfn
, epfn
, addr
, eaddr
;
404 pages
= kimage_alloc_pages(GFP_KERNEL
, order
);
407 pfn
= page_to_pfn(pages
);
409 addr
= pfn
<< PAGE_SHIFT
;
410 eaddr
= epfn
<< PAGE_SHIFT
;
411 if ((epfn
>= (KEXEC_CONTROL_MEMORY_LIMIT
>> PAGE_SHIFT
)) ||
412 kimage_is_destination_range(image
, addr
, eaddr
)) {
413 list_add(&pages
->lru
, &extra_pages
);
419 /* Remember the allocated page... */
420 list_add(&pages
->lru
, &image
->control_pages
);
422 /* Because the page is already in it's destination
423 * location we will never allocate another page at
424 * that address. Therefore kimage_alloc_pages
425 * will not return it (again) and we don't need
426 * to give it an entry in image->segment[].
429 /* Deal with the destination pages I have inadvertently allocated.
431 * Ideally I would convert multi-page allocations into single
432 * page allocations, and add everyting to image->dest_pages.
434 * For now it is simpler to just free the pages.
436 kimage_free_page_list(&extra_pages
);
441 static struct page
*kimage_alloc_crash_control_pages(struct kimage
*image
,
444 /* Control pages are special, they are the intermediaries
445 * that are needed while we copy the rest of the pages
446 * to their final resting place. As such they must
447 * not conflict with either the destination addresses
448 * or memory the kernel is already using.
450 * Control pages are also the only pags we must allocate
451 * when loading a crash kernel. All of the other pages
452 * are specified by the segments and we just memcpy
453 * into them directly.
455 * The only case where we really need more than one of
456 * these are for architectures where we cannot disable
457 * the MMU and must instead generate an identity mapped
458 * page table for all of the memory.
460 * Given the low demand this implements a very simple
461 * allocator that finds the first hole of the appropriate
462 * size in the reserved memory region, and allocates all
463 * of the memory up to and including the hole.
465 unsigned long hole_start
, hole_end
, size
;
469 size
= (1 << order
) << PAGE_SHIFT
;
470 hole_start
= (image
->control_page
+ (size
- 1)) & ~(size
- 1);
471 hole_end
= hole_start
+ size
- 1;
472 while (hole_end
<= crashk_res
.end
) {
475 if (hole_end
> KEXEC_CONTROL_MEMORY_LIMIT
)
477 if (hole_end
> crashk_res
.end
)
479 /* See if I overlap any of the segments */
480 for (i
= 0; i
< image
->nr_segments
; i
++) {
481 unsigned long mstart
, mend
;
483 mstart
= image
->segment
[i
].mem
;
484 mend
= mstart
+ image
->segment
[i
].memsz
- 1;
485 if ((hole_end
>= mstart
) && (hole_start
<= mend
)) {
486 /* Advance the hole to the end of the segment */
487 hole_start
= (mend
+ (size
- 1)) & ~(size
- 1);
488 hole_end
= hole_start
+ size
- 1;
492 /* If I don't overlap any segments I have found my hole! */
493 if (i
== image
->nr_segments
) {
494 pages
= pfn_to_page(hole_start
>> PAGE_SHIFT
);
499 image
->control_page
= hole_end
;
505 struct page
*kimage_alloc_control_pages(struct kimage
*image
,
508 struct page
*pages
= NULL
;
510 switch (image
->type
) {
511 case KEXEC_TYPE_DEFAULT
:
512 pages
= kimage_alloc_normal_control_pages(image
, order
);
514 case KEXEC_TYPE_CRASH
:
515 pages
= kimage_alloc_crash_control_pages(image
, order
);
522 static int kimage_add_entry(struct kimage
*image
, kimage_entry_t entry
)
524 if (*image
->entry
!= 0)
527 if (image
->entry
== image
->last_entry
) {
528 kimage_entry_t
*ind_page
;
531 page
= kimage_alloc_page(image
, GFP_KERNEL
, KIMAGE_NO_DEST
);
535 ind_page
= page_address(page
);
536 *image
->entry
= virt_to_phys(ind_page
) | IND_INDIRECTION
;
537 image
->entry
= ind_page
;
538 image
->last_entry
= ind_page
+
539 ((PAGE_SIZE
/sizeof(kimage_entry_t
)) - 1);
541 *image
->entry
= entry
;
548 static int kimage_set_destination(struct kimage
*image
,
549 unsigned long destination
)
553 destination
&= PAGE_MASK
;
554 result
= kimage_add_entry(image
, destination
| IND_DESTINATION
);
556 image
->destination
= destination
;
562 static int kimage_add_page(struct kimage
*image
, unsigned long page
)
567 result
= kimage_add_entry(image
, page
| IND_SOURCE
);
569 image
->destination
+= PAGE_SIZE
;
575 static void kimage_free_extra_pages(struct kimage
*image
)
577 /* Walk through and free any extra destination pages I may have */
578 kimage_free_page_list(&image
->dest_pages
);
580 /* Walk through and free any unuseable pages I have cached */
581 kimage_free_page_list(&image
->unuseable_pages
);
584 static int kimage_terminate(struct kimage
*image
)
586 if (*image
->entry
!= 0)
589 *image
->entry
= IND_DONE
;
594 #define for_each_kimage_entry(image, ptr, entry) \
595 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
596 ptr = (entry & IND_INDIRECTION)? \
597 phys_to_virt((entry & PAGE_MASK)): ptr +1)
599 static void kimage_free_entry(kimage_entry_t entry
)
603 page
= pfn_to_page(entry
>> PAGE_SHIFT
);
604 kimage_free_pages(page
);
607 static void kimage_free(struct kimage
*image
)
609 kimage_entry_t
*ptr
, entry
;
610 kimage_entry_t ind
= 0;
615 kimage_free_extra_pages(image
);
616 for_each_kimage_entry(image
, ptr
, entry
) {
617 if (entry
& IND_INDIRECTION
) {
618 /* Free the previous indirection page */
619 if (ind
& IND_INDIRECTION
)
620 kimage_free_entry(ind
);
621 /* Save this indirection page until we are
626 else if (entry
& IND_SOURCE
)
627 kimage_free_entry(entry
);
629 /* Free the final indirection page */
630 if (ind
& IND_INDIRECTION
)
631 kimage_free_entry(ind
);
633 /* Handle any machine specific cleanup */
634 machine_kexec_cleanup(image
);
636 /* Free the kexec control pages... */
637 kimage_free_page_list(&image
->control_pages
);
641 static kimage_entry_t
*kimage_dst_used(struct kimage
*image
,
644 kimage_entry_t
*ptr
, entry
;
645 unsigned long destination
= 0;
647 for_each_kimage_entry(image
, ptr
, entry
) {
648 if (entry
& IND_DESTINATION
)
649 destination
= entry
& PAGE_MASK
;
650 else if (entry
& IND_SOURCE
) {
651 if (page
== destination
)
653 destination
+= PAGE_SIZE
;
660 static struct page
*kimage_alloc_page(struct kimage
*image
,
662 unsigned long destination
)
665 * Here we implement safeguards to ensure that a source page
666 * is not copied to its destination page before the data on
667 * the destination page is no longer useful.
669 * To do this we maintain the invariant that a source page is
670 * either its own destination page, or it is not a
671 * destination page at all.
673 * That is slightly stronger than required, but the proof
674 * that no problems will not occur is trivial, and the
675 * implementation is simply to verify.
677 * When allocating all pages normally this algorithm will run
678 * in O(N) time, but in the worst case it will run in O(N^2)
679 * time. If the runtime is a problem the data structures can
686 * Walk through the list of destination pages, and see if I
689 list_for_each_entry(page
, &image
->dest_pages
, lru
) {
690 addr
= page_to_pfn(page
) << PAGE_SHIFT
;
691 if (addr
== destination
) {
692 list_del(&page
->lru
);
700 /* Allocate a page, if we run out of memory give up */
701 page
= kimage_alloc_pages(gfp_mask
, 0);
704 /* If the page cannot be used file it away */
705 if (page_to_pfn(page
) >
706 (KEXEC_SOURCE_MEMORY_LIMIT
>> PAGE_SHIFT
)) {
707 list_add(&page
->lru
, &image
->unuseable_pages
);
710 addr
= page_to_pfn(page
) << PAGE_SHIFT
;
712 /* If it is the destination page we want use it */
713 if (addr
== destination
)
716 /* If the page is not a destination page use it */
717 if (!kimage_is_destination_range(image
, addr
,
722 * I know that the page is someones destination page.
723 * See if there is already a source page for this
724 * destination page. And if so swap the source pages.
726 old
= kimage_dst_used(image
, addr
);
729 unsigned long old_addr
;
730 struct page
*old_page
;
732 old_addr
= *old
& PAGE_MASK
;
733 old_page
= pfn_to_page(old_addr
>> PAGE_SHIFT
);
734 copy_highpage(page
, old_page
);
735 *old
= addr
| (*old
& ~PAGE_MASK
);
737 /* The old page I have found cannot be a
738 * destination page, so return it.
745 /* Place the page on the destination list I
748 list_add(&page
->lru
, &image
->dest_pages
);
755 static int kimage_load_normal_segment(struct kimage
*image
,
756 struct kexec_segment
*segment
)
759 unsigned long ubytes
, mbytes
;
761 unsigned char __user
*buf
;
765 ubytes
= segment
->bufsz
;
766 mbytes
= segment
->memsz
;
767 maddr
= segment
->mem
;
769 result
= kimage_set_destination(image
, maddr
);
776 size_t uchunk
, mchunk
;
778 page
= kimage_alloc_page(image
, GFP_HIGHUSER
, maddr
);
783 result
= kimage_add_page(image
, page_to_pfn(page
)
789 /* Start with a clear page */
790 memset(ptr
, 0, PAGE_SIZE
);
791 ptr
+= maddr
& ~PAGE_MASK
;
792 mchunk
= PAGE_SIZE
- (maddr
& ~PAGE_MASK
);
800 result
= copy_from_user(ptr
, buf
, uchunk
);
803 result
= (result
< 0) ? result
: -EIO
;
815 static int kimage_load_crash_segment(struct kimage
*image
,
816 struct kexec_segment
*segment
)
818 /* For crash dumps kernels we simply copy the data from
819 * user space to it's destination.
820 * We do things a page at a time for the sake of kmap.
823 unsigned long ubytes
, mbytes
;
825 unsigned char __user
*buf
;
829 ubytes
= segment
->bufsz
;
830 mbytes
= segment
->memsz
;
831 maddr
= segment
->mem
;
835 size_t uchunk
, mchunk
;
837 page
= pfn_to_page(maddr
>> PAGE_SHIFT
);
843 ptr
+= maddr
& ~PAGE_MASK
;
844 mchunk
= PAGE_SIZE
- (maddr
& ~PAGE_MASK
);
849 if (uchunk
> ubytes
) {
851 /* Zero the trailing part of the page */
852 memset(ptr
+ uchunk
, 0, mchunk
- uchunk
);
854 result
= copy_from_user(ptr
, buf
, uchunk
);
855 kexec_flush_icache_page(page
);
858 result
= (result
< 0) ? result
: -EIO
;
870 static int kimage_load_segment(struct kimage
*image
,
871 struct kexec_segment
*segment
)
873 int result
= -ENOMEM
;
875 switch (image
->type
) {
876 case KEXEC_TYPE_DEFAULT
:
877 result
= kimage_load_normal_segment(image
, segment
);
879 case KEXEC_TYPE_CRASH
:
880 result
= kimage_load_crash_segment(image
, segment
);
888 * Exec Kernel system call: for obvious reasons only root may call it.
890 * This call breaks up into three pieces.
891 * - A generic part which loads the new kernel from the current
892 * address space, and very carefully places the data in the
895 * - A generic part that interacts with the kernel and tells all of
896 * the devices to shut down. Preventing on-going dmas, and placing
897 * the devices in a consistent state so a later kernel can
900 * - A machine specific part that includes the syscall number
901 * and the copies the image to it's final destination. And
902 * jumps into the image at entry.
904 * kexec does not sync, or unmount filesystems so if you need
905 * that to happen you need to do that yourself.
907 struct kimage
*kexec_image
;
908 struct kimage
*kexec_crash_image
;
910 * A home grown binary mutex.
911 * Nothing can wait so this mutex is safe to use
912 * in interrupt context :)
914 static int kexec_lock
;
916 asmlinkage
long sys_kexec_load(unsigned long entry
, unsigned long nr_segments
,
917 struct kexec_segment __user
*segments
,
920 struct kimage
**dest_image
, *image
;
924 /* We only trust the superuser with rebooting the system. */
925 if (!capable(CAP_SYS_BOOT
))
929 * Verify we have a legal set of flags
930 * This leaves us room for future extensions.
932 if ((flags
& KEXEC_FLAGS
) != (flags
& ~KEXEC_ARCH_MASK
))
935 /* Verify we are on the appropriate architecture */
936 if (((flags
& KEXEC_ARCH_MASK
) != KEXEC_ARCH
) &&
937 ((flags
& KEXEC_ARCH_MASK
) != KEXEC_ARCH_DEFAULT
))
940 /* Put an artificial cap on the number
941 * of segments passed to kexec_load.
943 if (nr_segments
> KEXEC_SEGMENT_MAX
)
949 /* Because we write directly to the reserved memory
950 * region when loading crash kernels we need a mutex here to
951 * prevent multiple crash kernels from attempting to load
952 * simultaneously, and to prevent a crash kernel from loading
953 * over the top of a in use crash kernel.
955 * KISS: always take the mutex.
957 locked
= xchg(&kexec_lock
, 1);
961 dest_image
= &kexec_image
;
962 if (flags
& KEXEC_ON_CRASH
)
963 dest_image
= &kexec_crash_image
;
964 if (nr_segments
> 0) {
967 /* Loading another kernel to reboot into */
968 if ((flags
& KEXEC_ON_CRASH
) == 0)
969 result
= kimage_normal_alloc(&image
, entry
,
970 nr_segments
, segments
);
971 /* Loading another kernel to switch to if this one crashes */
972 else if (flags
& KEXEC_ON_CRASH
) {
973 /* Free any current crash dump kernel before
976 kimage_free(xchg(&kexec_crash_image
, NULL
));
977 result
= kimage_crash_alloc(&image
, entry
,
978 nr_segments
, segments
);
983 result
= machine_kexec_prepare(image
);
987 for (i
= 0; i
< nr_segments
; i
++) {
988 result
= kimage_load_segment(image
, &image
->segment
[i
]);
992 result
= kimage_terminate(image
);
996 /* Install the new kernel, and Uninstall the old */
997 image
= xchg(dest_image
, image
);
1000 locked
= xchg(&kexec_lock
, 0); /* Release the mutex */
1007 #ifdef CONFIG_COMPAT
1008 asmlinkage
long compat_sys_kexec_load(unsigned long entry
,
1009 unsigned long nr_segments
,
1010 struct compat_kexec_segment __user
*segments
,
1011 unsigned long flags
)
1013 struct compat_kexec_segment in
;
1014 struct kexec_segment out
, __user
*ksegments
;
1015 unsigned long i
, result
;
1017 /* Don't allow clients that don't understand the native
1018 * architecture to do anything.
1020 if ((flags
& KEXEC_ARCH_MASK
) == KEXEC_ARCH_DEFAULT
)
1023 if (nr_segments
> KEXEC_SEGMENT_MAX
)
1026 ksegments
= compat_alloc_user_space(nr_segments
* sizeof(out
));
1027 for (i
=0; i
< nr_segments
; i
++) {
1028 result
= copy_from_user(&in
, &segments
[i
], sizeof(in
));
1032 out
.buf
= compat_ptr(in
.buf
);
1033 out
.bufsz
= in
.bufsz
;
1035 out
.memsz
= in
.memsz
;
1037 result
= copy_to_user(&ksegments
[i
], &out
, sizeof(out
));
1042 return sys_kexec_load(entry
, nr_segments
, ksegments
, flags
);
1046 void crash_kexec(struct pt_regs
*regs
)
1051 /* Take the kexec_lock here to prevent sys_kexec_load
1052 * running on one cpu from replacing the crash kernel
1053 * we are using after a panic on a different cpu.
1055 * If the crash kernel was not located in a fixed area
1056 * of memory the xchg(&kexec_crash_image) would be
1057 * sufficient. But since I reuse the memory...
1059 locked
= xchg(&kexec_lock
, 1);
1061 if (kexec_crash_image
) {
1062 struct pt_regs fixed_regs
;
1063 crash_setup_regs(&fixed_regs
, regs
);
1064 machine_crash_shutdown(&fixed_regs
);
1065 machine_kexec(kexec_crash_image
);
1067 locked
= xchg(&kexec_lock
, 0);
1072 static u32
*append_elf_note(u32
*buf
, char *name
, unsigned type
, void *data
,
1075 struct elf_note note
;
1077 note
.n_namesz
= strlen(name
) + 1;
1078 note
.n_descsz
= data_len
;
1080 memcpy(buf
, ¬e
, sizeof(note
));
1081 buf
+= (sizeof(note
) + 3)/4;
1082 memcpy(buf
, name
, note
.n_namesz
);
1083 buf
+= (note
.n_namesz
+ 3)/4;
1084 memcpy(buf
, data
, note
.n_descsz
);
1085 buf
+= (note
.n_descsz
+ 3)/4;
1090 static void final_note(u32
*buf
)
1092 struct elf_note note
;
1097 memcpy(buf
, ¬e
, sizeof(note
));
1100 void crash_save_cpu(struct pt_regs
*regs
, int cpu
)
1102 struct elf_prstatus prstatus
;
1105 if ((cpu
< 0) || (cpu
>= NR_CPUS
))
1108 /* Using ELF notes here is opportunistic.
1109 * I need a well defined structure format
1110 * for the data I pass, and I need tags
1111 * on the data to indicate what information I have
1112 * squirrelled away. ELF notes happen to provide
1113 * all of that, so there is no need to invent something new.
1115 buf
= (u32
*)per_cpu_ptr(crash_notes
, cpu
);
1118 memset(&prstatus
, 0, sizeof(prstatus
));
1119 prstatus
.pr_pid
= current
->pid
;
1120 elf_core_copy_regs(&prstatus
.pr_reg
, regs
);
1121 buf
= append_elf_note(buf
, KEXEC_CORE_NOTE_NAME
, NT_PRSTATUS
,
1122 &prstatus
, sizeof(prstatus
));
1126 static int __init
crash_notes_memory_init(void)
1128 /* Allocate memory for saving cpu registers. */
1129 crash_notes
= alloc_percpu(note_buf_t
);
1131 printk("Kexec: Memory allocation for saving cpu register"
1132 " states failed\n");
1137 module_init(crash_notes_memory_init
)