4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Platform specific implementation code
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
37 #include <vm/hat_sfmmu.h>
38 #include <sys/iommu.h>
40 #include <sys/cpuvar.h>
41 #include <sys/intreg.h>
48 #include <sys/clock.h>
50 #include <sys/panic.h>
51 #include <vm/seg_kmem.h>
52 #include <sys/cpu_module.h>
53 #include <sys/callb.h>
54 #include <sys/machsystm.h>
55 #include <sys/vmsystm.h>
56 #include <sys/systm.h>
57 #include <sys/archsystm.h>
58 #include <sys/stack.h>
59 #include <sys/fs/ufs_fs.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/thread.h>
63 #include <vm/vm_dep.h>
65 extern void cpr_clear_bitmaps(void);
66 extern int cpr_setbit(pfn_t ppn
, int mapflag
);
67 extern int cpr_clrbit(pfn_t ppn
, int mapflag
);
68 extern pgcnt_t
cpr_scan_kvseg(int mapflag
, bitfunc_t bitfunc
, struct seg
*seg
);
69 extern pgcnt_t
cpr_count_seg_pages(int mapflag
, bitfunc_t bitfunc
);
70 extern void dtlb_wr_entry(uint_t
, tte_t
*, uint64_t *);
71 extern void itlb_wr_entry(uint_t
, tte_t
*, uint64_t *);
73 static int i_cpr_storage_desc_alloc(csd_t
**, pgcnt_t
*, csd_t
**, int);
74 static void i_cpr_storage_desc_init(csd_t
*, pgcnt_t
, csd_t
*);
75 static caddr_t
i_cpr_storage_data_alloc(pgcnt_t
, pgcnt_t
*, int);
76 static int cpr_dump_sensitive(vnode_t
*, csd_t
*);
77 static void i_cpr_clear_entries(uint64_t, uint64_t);
78 static void i_cpr_xcall(xcfunc_t
);
80 void i_cpr_storage_free(void);
82 extern void *i_cpr_data_page
;
83 extern int cpr_test_mode
;
84 extern int cpr_nbitmaps
;
85 extern char cpr_default_path
[];
86 extern caddr_t textva
, datava
;
88 static struct cpr_map_info cpr_prom_retain
[CPR_PROM_RETAIN_CNT
];
89 caddr_t cpr_vaddr
= NULL
;
91 static uint_t sensitive_pages_saved
;
92 static uint_t sensitive_size_saved
;
94 caddr_t i_cpr_storage_data_base
;
95 caddr_t i_cpr_storage_data_end
;
96 csd_t
*i_cpr_storage_desc_base
;
97 csd_t
*i_cpr_storage_desc_end
; /* one byte beyond last used descp */
98 csd_t
*i_cpr_storage_desc_last_used
; /* last used descriptor */
99 caddr_t sensitive_write_ptr
; /* position for next storage write */
101 size_t i_cpr_sensitive_bytes_dumped
;
102 pgcnt_t i_cpr_sensitive_pgs_dumped
;
103 pgcnt_t i_cpr_storage_data_sz
; /* in pages */
104 pgcnt_t i_cpr_storage_desc_pgcnt
; /* in pages */
106 ushort_t cpr_mach_type
= CPR_MACHTYPE_4U
;
107 static csu_md_t m_info
;
110 #define MAX_STORAGE_RETRY 3
111 #define MAX_STORAGE_ALLOC_RETRY 3
112 #define INITIAL_ALLOC_PCNT 40 /* starting allocation percentage */
113 #define INTEGRAL 100 /* to get 1% precision */
115 #define EXTRA_RATE 2 /* add EXTRA_RATE% extra space */
116 #define EXTRA_DESCS 10
118 #define CPR_NO_STORAGE_DESC 1
119 #define CPR_NO_STORAGE_DATA 2
126 * CPR miscellaneous support routines
128 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \
129 mode, 0600, vpp, CRCREAT, 0))
130 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \
131 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
135 * definitions for saving/restoring prom pages
137 static void *ppage_buf
;
138 static pgcnt_t ppage_count
;
139 static pfn_t
*pphys_list
;
140 static size_t pphys_list_size
;
142 typedef void (*tlb_rw_t
)(uint_t
, tte_t
*, uint64_t *);
143 typedef void (*tlb_filter_t
)(int, tte_t
*, uint64_t, void *);
146 * private struct for tlb handling
148 struct cpr_trans_info
{
155 uint64_t skip
; /* assumes TLB <= 64 locked entries */
157 typedef struct cpr_trans_info cti_t
;
161 * special handling for tlb info
163 #define WITHIN_OFW(va) \
164 (((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
166 #define WITHIN_NUCLEUS(va, base) \
167 (((va) >= (base)) && \
168 (((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
170 #define IS_BIGKTSB(va) \
172 ((va) >= (uint64_t)ktsb_base) && \
173 ((va) < (uint64_t)(ktsb_base + ktsb_sz)))
178 * the text from this file is linked to follow cpr_resume_setup.o;
179 * only add text between here and i_cpr_end_jumpback when it needs
180 * to be called during resume before we switch back to the kernel
181 * trap table. all the text in this range must fit within a page.
186 * each time a machine is reset, the prom uses an inconsistent set of phys
187 * pages and the cif cookie may differ as well. so prior to restoring the
188 * original prom, we have to use to use the new/tmp prom's translations
189 * when requesting prom services.
191 * cif_handler starts out as the original prom cookie, and that gets used
192 * by client_handler() to jump into the prom. here we splice-in a wrapper
193 * routine by writing cif_handler; client_handler() will now jump to the
194 * wrapper which switches the %tba to the new/tmp prom's trap table then
195 * jumps to the new cookie.
198 i_cpr_cif_setup(int action
)
200 extern void *i_cpr_orig_cif
, *cif_handler
;
201 extern int i_cpr_cif_wrapper(void *);
204 * save the original cookie and change the current cookie to the
205 * wrapper routine. later we just restore the original cookie.
207 if (action
== CIF_SPLICE
) {
208 i_cpr_orig_cif
= cif_handler
;
209 cif_handler
= (void *)i_cpr_cif_wrapper
;
210 } else if (action
== CIF_UNLINK
)
211 cif_handler
= i_cpr_orig_cif
;
216 * launch slave cpus into kernel text, pause them,
217 * and restore the original prom pages
222 extern void restart_other_cpu(int);
225 uint64_t kctx
= kcontextreg
;
228 * Do not allow setting page size codes in MMU primary context
229 * register while using cif wrapper. This is needed to work
230 * around OBP incorrect handling of this MMU register.
235 * reset cpu_ready_set so x_calls work properly
237 CPUSET_ZERO(cpu_ready_set
);
238 CPUSET_ADD(cpu_ready_set
, getprocessorid());
241 * setup cif to use the cookie from the new/tmp prom
242 * and setup tmp handling for calling prom services.
244 i_cpr_cif_setup(CIF_SPLICE
);
247 * at this point, only the nucleus and a few cpr pages are
248 * mapped in. once we switch to the kernel trap table,
249 * we can access the rest of kernel space.
251 prom_set_traptable(&trap_table
);
256 mutex_enter(&cpu_lock
);
258 * All of the slave cpus are not ready at this time,
259 * yet the cpu structures have various cpu_flags set;
260 * clear cpu_flags and mutex_ready.
261 * Since we are coming up from a CPU suspend, the slave cpus
264 for (cp
= CPU
->cpu_next
; cp
!= CPU
; cp
= cp
->cpu_next
) {
265 cp
->cpu_flags
= CPU_FROZEN
;
266 cp
->cpu_m
.mutex_ready
= 0;
269 for (cp
= CPU
->cpu_next
; cp
!= CPU
; cp
= cp
->cpu_next
)
270 restart_other_cpu(cp
->cpu_id
);
272 pause_cpus(NULL
, NULL
);
273 mutex_exit(&cpu_lock
);
275 i_cpr_xcall(i_cpr_clear_entries
);
277 i_cpr_clear_entries(0, 0);
280 * now unlink the cif wrapper; WARNING: do not call any
281 * prom_xxx() routines until after prom pages are restored.
283 i_cpr_cif_setup(CIF_UNLINK
);
285 (void) i_cpr_prom_pages(CPR_PROM_RESTORE
);
287 /* allow setting page size codes in MMU primary context register */
293 * end marker for jumpback page;
294 * this symbol is used to check the size of i_cpr_resume_setup()
295 * and the above text. For simplicity, the Makefile needs to
296 * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
299 i_cpr_end_jumpback(void)
305 * scan tlb entries with reader; when valid entries are found,
306 * the filter routine will selectively save/clear them
309 i_cpr_scan_tlb(cti_t
*ctip
)
315 for (tlb_index
= ctip
->index
; tlb_index
>= 0; tlb_index
--) {
316 (*ctip
->reader
)((uint_t
)tlb_index
, &tte
, &va_tag
);
317 if (va_tag
&& TTE_IS_VALID(&tte
))
318 (*ctip
->filter
)(tlb_index
, &tte
, va_tag
, ctip
);
324 * filter for locked tlb entries that reference the text/data nucleus
325 * and any bigktsb's; these will be reinstalled by cprboot on all cpus
329 i_cpr_lnb(int index
, tte_t
*ttep
, uint64_t va_tag
, void *ctrans
)
334 * record tlb data at ctip->dst; the target tlb index starts
335 * at the highest tlb offset and moves towards 0. the prom
336 * reserves both dtlb and itlb index 0. any selected entry
337 * also gets marked to prevent being flushed during resume
339 if (TTE_IS_LOCKED(ttep
) && (va_tag
== (uint64_t)textva
||
340 va_tag
== (uint64_t)datava
|| IS_BIGKTSB(va_tag
))) {
342 while ((1 << ctip
->index
) & ctip
->skip
)
344 ASSERT(ctip
->index
> 0);
345 ASSERT(ctip
->dst
< ctip
->tail
);
346 ctip
->dst
->tte
.ll
= ttep
->ll
;
347 ctip
->dst
->va_tag
= va_tag
;
348 ctip
->dst
->index
= ctip
->index
--;
356 * some tlb entries are stale, filter for unlocked entries
357 * within the prom virt range and clear them
360 i_cpr_ufw(int index
, tte_t
*ttep
, uint64_t va_tag
, void *ctrans
)
365 if (!TTE_IS_LOCKED(ttep
) && WITHIN_OFW(va_tag
)) {
367 bzero(&clr
, sizeof (clr
));
368 (*ctip
->writer
)((uint_t
)index
, &clr
.tte
, &clr
.va_tag
);
374 * some of the entries installed by cprboot are needed only on a
375 * short-term basis and need to be flushed to avoid clogging the tlbs.
376 * scan the dtte/itte arrays for items marked as temporary and clear
377 * dtlb/itlb entries using wrfunc.
380 i_cpr_clear_tmp(sutlb_t
*listp
, int max
, tlb_rw_t wrfunc
)
384 bzero(&clr
, sizeof (clr
));
385 for (tail
= listp
+ max
; listp
< tail
&& listp
->va_tag
; listp
++) {
387 (*wrfunc
)((uint_t
)listp
->index
, &clr
.tte
, &clr
.va_tag
);
394 i_cpr_clear_entries(uint64_t arg1
, uint64_t arg2
)
396 extern void demap_all(void);
399 i_cpr_clear_tmp(m_info
.dtte
, CPR_MAX_TLB
, dtlb_wr_entry
);
400 i_cpr_clear_tmp(m_info
.itte
, CPR_MAX_TLB
, itlb_wr_entry
);
403 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
404 * a second label for vtag_flushall. the call is made using
405 * vtag_flushall() instead of demap_all() due to runtime and
406 * krtld results with both older and newer cpu modules.
408 if (&demap_all
!= 0) {
414 * for older V9 cpus, scan tlbs and clear stale entries
416 bzero(&cti
, sizeof (cti
));
417 cti
.filter
= i_cpr_ufw
;
419 cti
.index
= cpunodes
[CPU
->cpu_id
].dtlb_size
- 1;
420 cti
.reader
= dtlb_rd_entry
;
421 cti
.writer
= dtlb_wr_entry
;
422 i_cpr_scan_tlb(&cti
);
424 cti
.index
= cpunodes
[CPU
->cpu_id
].itlb_size
- 1;
425 cti
.reader
= itlb_rd_entry
;
426 cti
.writer
= itlb_wr_entry
;
427 i_cpr_scan_tlb(&cti
);
432 * craft tlb info for tmp use during resume; this data gets used by
433 * cprboot to install tlb entries. we also mark each struct as tmp
434 * so those tlb entries will get flushed after switching to the kernel
435 * trap table. no data needs to be recorded for vaddr when it falls
436 * within the nucleus since we've already recorded nucleus ttes and
437 * a 8K tte would conflict with a 4MB tte. eg: the cpr module
438 * text/data may have been loaded into the text/data nucleus.
441 i_cpr_make_tte(cti_t
*ctip
, void *vaddr
, caddr_t nbase
)
446 if (WITHIN_NUCLEUS((caddr_t
)vaddr
, nbase
))
449 while ((1 << ctip
->index
) & ctip
->skip
)
451 ASSERT(ctip
->index
> 0);
452 ASSERT(ctip
->dst
< ctip
->tail
);
455 * without any global service available to lookup
456 * a tte by vaddr, we craft our own here:
458 ppn
= va_to_pfn(vaddr
);
459 rw
= (nbase
== datava
) ? TTE_HWWR_INT
: 0;
460 ctip
->dst
->tte
.tte_inthi
= TTE_VALID_INT
| TTE_PFN_INTHI(ppn
);
461 ctip
->dst
->tte
.tte_intlo
= TTE_PFN_INTLO(ppn
) | TTE_LCK_INT
|
462 TTE_CP_INT
| TTE_PRIV_INT
| rw
;
463 ctip
->dst
->va_tag
= ((uintptr_t)vaddr
& MMU_PAGEMASK
);
464 ctip
->dst
->index
= ctip
->index
--;
471 i_cpr_xcall(xcfunc_t func
)
473 uint_t pil
, reset_pil
;
480 setpil(XCALL_PIL
- 1);
482 xc_some(cpu_ready_set
, func
, 0, 0);
489 * restart paused slave cpus
492 i_cpr_machdep_setup(void)
495 CPR_DEBUG(CPR_DEBUG1
, "MP restarted...\n");
496 mutex_enter(&cpu_lock
);
498 mutex_exit(&cpu_lock
);
504 * Stop all interrupt activities in the system
507 i_cpr_stop_intr(void)
513 * Set machine up to take interrupts
516 i_cpr_enable_intr(void)
523 * record cpu nodes and ids
526 i_cpr_save_cpu_info(void)
528 struct sun4u_cpu_info
*scip
;
534 ASSERT(scip
< &m_info
.sci
[NCPU
]);
535 scip
->cpu_id
= cp
->cpu_id
;
536 scip
->node
= cpunodes
[cp
->cpu_id
].nodeid
;
538 } while ((cp
= cp
->cpu_next
) != CPU
);
543 * Write necessary machine dependent information to cpr state file,
544 * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
547 i_cpr_write_machdep(vnode_t
*vp
)
549 extern uint_t
getpstate(), getwstate();
550 extern uint_t i_cpr_tstack_size
;
551 const char ustr
[] = ": unix-tte 2drop false ;";
559 * ustr[] is used as temporary forth words during
560 * slave startup sequence, see sfmmu_mp_startup()
563 cmach
.md_magic
= (uint_t
)CPR_MACHDEP_MAGIC
;
564 cmach
.md_size
= sizeof (m_info
) + sizeof (ustr
);
566 if (rc
= cpr_write(vp
, (caddr_t
)&cmach
, sizeof (cmach
))) {
567 cpr_err(CE_WARN
, "Failed to write descriptor.");
572 * m_info is now cleared in i_cpr_dump_setup()
574 m_info
.ksb
= (uint32_t)STACK_BIAS
;
575 m_info
.kpstate
= (uint16_t)getpstate();
576 m_info
.kwstate
= (uint16_t)getwstate();
577 CPR_DEBUG(CPR_DEBUG1
, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
578 m_info
.ksb
, m_info
.kpstate
, m_info
.kwstate
);
580 ltp
= &ttolwp(curthread
)->lwp_qsav
;
581 m_info
.qsav_pc
= (cpr_ext
)ltp
->val
[0];
582 m_info
.qsav_sp
= (cpr_ext
)ltp
->val
[1];
585 * Set secondary context to INVALID_CONTEXT to force the HAT
586 * to re-setup the MMU registers and locked TTEs it needs for
589 m_info
.mmu_ctx_sec
= INVALID_CONTEXT
;
590 m_info
.mmu_ctx_pri
= KCONTEXT
;
592 tinfo
= (uintptr_t)curthread
;
593 m_info
.thrp
= (cpr_ptr
)tinfo
;
595 tinfo
= (uintptr_t)i_cpr_resume_setup
;
596 m_info
.func
= (cpr_ptr
)tinfo
;
599 * i_cpr_data_page is comprised of a 4K stack area and a few
600 * trailing data symbols; the page is shared by the prom and
601 * kernel during resume. the stack size is recorded here
602 * and used by cprboot to set %sp
604 tinfo
= (uintptr_t)&i_cpr_data_page
;
605 m_info
.tmp_stack
= (cpr_ptr
)tinfo
;
606 m_info
.tmp_stacksize
= i_cpr_tstack_size
;
608 m_info
.test_mode
= cpr_test_mode
;
610 i_cpr_save_cpu_info();
612 if (rc
= cpr_write(vp
, (caddr_t
)&m_info
, sizeof (m_info
))) {
613 cpr_err(CE_WARN
, "Failed to write machdep info.");
617 fmt
= "error writing %s forth info";
618 if (rc
= cpr_write(vp
, (caddr_t
)ustr
, sizeof (ustr
)))
619 cpr_err(CE_WARN
, fmt
, "unix-tte");
626 * Save miscellaneous information which needs to be written to the
627 * state file. This information is required to re-initialize
628 * kernel/prom handshaking.
631 i_cpr_save_machdep_info(void)
633 CPR_DEBUG(CPR_DEBUG5
, "jumpback size = 0x%lx\n",
634 (uintptr_t)&i_cpr_end_jumpback
-
635 (uintptr_t)i_cpr_resume_setup
);
638 * Verify the jumpback code all falls in one page.
640 if (((uintptr_t)&i_cpr_end_jumpback
& MMU_PAGEMASK
) !=
641 ((uintptr_t)i_cpr_resume_setup
& MMU_PAGEMASK
))
642 cpr_err(CE_PANIC
, "jumpback code exceeds one page.");
647 * cpu0 should contain bootcpu info
656 i_cpr_bootcpuid(void)
662 * Return the virtual address of the mapping area
665 i_cpr_map_setup(void)
668 * Allocate a virtual memory range spanned by an hmeblk.
669 * This would be 8 hments or 64k bytes. Starting VA
670 * must be 64k (8-page) aligned.
672 cpr_vaddr
= vmem_xalloc(heap_arena
,
673 mmu_ptob(NHMENTS
), mmu_ptob(NHMENTS
),
674 0, 0, NULL
, NULL
, VM_NOSLEEP
);
679 * create tmp locked tlb entries for a group of phys pages;
681 * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
682 * otherwise would fill up a tlb with locked entries
685 i_cpr_mapin(caddr_t vaddr
, uint_t pages
, pfn_t ppn
)
688 extern pfn_t curthreadpfn
;
689 extern int curthreadremapped
;
691 curthreadremapped
= (ppn
<= curthreadpfn
&& curthreadpfn
< ppn
+ pages
);
693 for (; pages
--; ppn
++, vaddr
+= MMU_PAGESIZE
) {
694 tte
.tte_inthi
= TTE_VALID_INT
| TTE_PFN_INTHI(ppn
);
695 tte
.tte_intlo
= TTE_PFN_INTLO(ppn
) | TTE_LCK_INT
|
696 TTE_CP_INT
| TTE_PRIV_INT
| TTE_HWWR_INT
;
697 sfmmu_dtlb_ld_kva(vaddr
, &tte
);
702 i_cpr_mapout(caddr_t vaddr
, uint_t pages
)
704 extern int curthreadremapped
;
706 if (curthreadremapped
&& vaddr
<= (caddr_t
)curthread
&&
707 (caddr_t
)curthread
< vaddr
+ pages
* MMU_PAGESIZE
)
708 curthreadremapped
= 0;
710 for (; pages
--; vaddr
+= MMU_PAGESIZE
)
711 vtag_flushpage(vaddr
, (uint64_t)ksfmmup
);
715 * We're done using the mapping area; release virtual space
718 i_cpr_map_destroy(void)
720 vmem_free(heap_arena
, cpr_vaddr
, mmu_ptob(NHMENTS
));
726 i_cpr_handle_xc(int flag
)
732 * This function takes care of pages which are not in kas or need to be
733 * taken care of in a special way. For example, panicbuf pages are not
734 * in kas and their pages are allocated via prom_retain().
737 i_cpr_count_special_kpages(int mapflag
, bitfunc_t bitfunc
)
739 struct cpr_map_info
*pri
, *tail
;
740 pgcnt_t pages
, total
= 0;
744 * Save information about prom retained panicbuf pages
746 if (bitfunc
== cpr_setbit
) {
747 pri
= &cpr_prom_retain
[CPR_PANICBUF
];
748 pri
->virt
= (cpr_ptr
)panicbuf
;
749 pri
->phys
= va_to_pa(panicbuf
);
750 pri
->size
= sizeof (panicbuf
);
754 * Go through the prom_retain array to tag those pages.
756 tail
= &cpr_prom_retain
[CPR_PROM_RETAIN_CNT
];
757 for (pri
= cpr_prom_retain
; pri
< tail
; pri
++) {
758 pages
= mmu_btopr(pri
->size
);
759 for (pfn
= ADDR_TO_PN(pri
->phys
); pages
--; pfn
++) {
760 if (pf_is_memory(pfn
)) {
761 if (bitfunc
== cpr_setbit
) {
762 if ((*bitfunc
)(pfn
, mapflag
) == 0)
775 * Free up memory-related resources here. We start by freeing buffers
776 * allocated during suspend initialization. Also, free up the mapping
777 * resources allocated in cpr_init().
780 i_cpr_free_memory_resources(void)
782 (void) i_cpr_prom_pages(CPR_PROM_FREE
);
784 i_cpr_storage_free();
789 * Derived from cpr_write_statefile().
790 * Save the sensitive pages to the storage area and do bookkeeping
791 * using the sensitive descriptors. Each descriptor will contain no more
792 * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
793 * of pages that statefile gets written to disk at each write.
794 * XXX The CPR_MAXCONTIG can be changed to the size of the compression
798 i_cpr_save_to_storage(void)
800 sensitive_size_saved
= 0;
801 sensitive_pages_saved
= 0;
802 sensitive_write_ptr
= i_cpr_storage_data_base
;
803 return (cpr_contig_pages(NULL
, SAVE_TO_STORAGE
));
808 * This routine allocates space to save the sensitive kernel pages,
809 * i.e. kernel data nucleus, kvalloc and kvseg segments.
810 * It's assumed that those segments are the only areas that can be
811 * contaminated by memory allocations during statefile dumping.
812 * The space allocated here contains:
813 * A list of descriptors describing the saved sensitive pages.
814 * The storage area for saving the compressed sensitive kernel pages.
815 * Since storage pages are allocated from segkmem, they need to be
816 * excluded when saving.
819 i_cpr_save_sensitive_kpages(void)
821 static const char pages_fmt
[] = "\n%s %s allocs\n"
822 " spages %ld, vpages %ld, diff %ld\n";
825 pgcnt_t pages
, spages
, vpages
;
830 * Tag sensitive kpages. Allocate space for storage descriptors
831 * and storage data area based on the resulting bitmaps.
832 * Note: The storage space will be part of the sensitive
833 * segment, so we need to tag kpages here before the storage
834 * is actually allocated just so their space won't be accounted
835 * for. They will not be part of the statefile although those
836 * pages will be claimed by cprboot.
840 spages
= i_cpr_count_sensitive_kpages(REGULAR_BITMAP
, cpr_setbit
);
841 vpages
= cpr_count_volatile_pages(REGULAR_BITMAP
, cpr_clrbit
);
842 pages
= spages
- vpages
;
844 str
= "i_cpr_save_sensitive_kpages:";
845 CPR_DEBUG(CPR_DEBUG7
, pages_fmt
, "before", str
, spages
, vpages
, pages
);
848 * Allocate space to save the clean sensitive kpages
850 for (retry_cnt
= 0; retry_cnt
< MAX_STORAGE_ALLOC_RETRY
; retry_cnt
++) {
852 * Alloc on first pass or realloc if we are retrying because
853 * of insufficient storage for sensitive pages
855 if (retry_cnt
== 0 || error
== ENOMEM
) {
856 if (i_cpr_storage_data_base
) {
857 kmem_free(i_cpr_storage_data_base
,
858 mmu_ptob(i_cpr_storage_data_sz
));
859 i_cpr_storage_data_base
= NULL
;
860 i_cpr_storage_data_sz
= 0;
862 addr
= i_cpr_storage_data_alloc(pages
,
863 &i_cpr_storage_data_sz
, retry_cnt
);
865 CPR_DEBUG(CPR_DEBUG7
,
866 "\n%s can't allocate data storage space!\n",
870 i_cpr_storage_data_base
= addr
;
871 i_cpr_storage_data_end
=
872 addr
+ mmu_ptob(i_cpr_storage_data_sz
);
876 * Allocate on first pass, only realloc if retry is because of
877 * insufficient descriptors, but reset contents on each pass
878 * (desc_alloc resets contents as well)
880 if (retry_cnt
== 0 || error
== -1) {
881 error
= i_cpr_storage_desc_alloc(
882 &i_cpr_storage_desc_base
, &i_cpr_storage_desc_pgcnt
,
883 &i_cpr_storage_desc_end
, retry_cnt
);
887 i_cpr_storage_desc_init(i_cpr_storage_desc_base
,
888 i_cpr_storage_desc_pgcnt
, i_cpr_storage_desc_end
);
892 * We are ready to save the sensitive kpages to storage.
893 * We cannot trust what's tagged in the bitmaps anymore
894 * after storage allocations. Clear up the bitmaps and
895 * retag the sensitive kpages again. The storage pages
896 * should be untagged.
901 i_cpr_count_sensitive_kpages(REGULAR_BITMAP
, cpr_setbit
);
902 vpages
= cpr_count_volatile_pages(REGULAR_BITMAP
, cpr_clrbit
);
904 CPR_DEBUG(CPR_DEBUG7
, pages_fmt
, "after ", str
,
905 spages
, vpages
, spages
- vpages
);
908 * Returns 0 on success, -1 if too few descriptors, and
909 * ENOMEM if not enough space to save sensitive pages
911 CPR_DEBUG(CPR_DEBUG1
, "compressing pages to storage...\n");
912 error
= i_cpr_save_to_storage();
914 /* Saving to storage succeeded */
915 CPR_DEBUG(CPR_DEBUG1
, "compressed %d pages\n",
916 sensitive_pages_saved
);
918 } else if (error
== -1)
919 CPR_DEBUG(CPR_DEBUG1
, "%s too few descriptors\n", str
);
928 * Estimate how much memory we will need to save
929 * the sensitive pages with compression.
932 i_cpr_storage_data_alloc(pgcnt_t pages
, pgcnt_t
*alloc_pages
, int retry_cnt
)
934 pgcnt_t alloc_pcnt
, last_pcnt
;
938 str
= "i_cpr_storage_data_alloc:";
939 if (retry_cnt
== 0) {
941 * common compression ratio is about 3:1
942 * initial storage allocation is estimated at 40%
943 * to cover the majority of cases
945 alloc_pcnt
= INITIAL_ALLOC_PCNT
;
946 *alloc_pages
= (pages
* alloc_pcnt
) / INTEGRAL
;
947 CPR_DEBUG(CPR_DEBUG7
, "%s sensitive pages: %ld\n", str
, pages
);
948 CPR_DEBUG(CPR_DEBUG7
,
949 "%s initial est pages: %ld, alloc %ld%%\n",
950 str
, *alloc_pages
, alloc_pcnt
);
953 * calculate the prior compression percentage (x100)
954 * from the last attempt to save sensitive pages
956 ASSERT(sensitive_pages_saved
!= 0);
957 last_pcnt
= (mmu_btopr(sensitive_size_saved
) * INTEGRAL
) /
958 sensitive_pages_saved
;
959 CPR_DEBUG(CPR_DEBUG7
, "%s last ratio %ld%%\n", str
, last_pcnt
);
962 * new estimated storage size is based on
963 * the larger ratio + 5% for each retry:
964 * pages * (last + [5%, 10%])
966 alloc_pcnt
= MAX(last_pcnt
, INITIAL_ALLOC_PCNT
) +
968 *alloc_pages
= (pages
* alloc_pcnt
) / INTEGRAL
;
969 CPR_DEBUG(CPR_DEBUG7
, "%s Retry est pages: %ld, alloc %ld%%\n",
970 str
, *alloc_pages
, alloc_pcnt
);
973 addr
= kmem_alloc(mmu_ptob(*alloc_pages
), KM_NOSLEEP
);
974 CPR_DEBUG(CPR_DEBUG7
, "%s alloc %ld pages\n", str
, *alloc_pages
);
980 i_cpr_storage_free(void)
982 /* Free descriptors */
983 if (i_cpr_storage_desc_base
) {
984 kmem_free(i_cpr_storage_desc_base
,
985 mmu_ptob(i_cpr_storage_desc_pgcnt
));
986 i_cpr_storage_desc_base
= NULL
;
987 i_cpr_storage_desc_pgcnt
= 0;
992 if (i_cpr_storage_data_base
) {
993 kmem_free(i_cpr_storage_data_base
,
994 mmu_ptob(i_cpr_storage_data_sz
));
995 i_cpr_storage_data_base
= NULL
;
996 i_cpr_storage_data_sz
= 0;
1002 * This routine is derived from cpr_compress_and_write().
1003 * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1004 * 2. Compress and save the clean sensitive pages into the storage area.
1007 i_cpr_compress_and_save(int chunks
, pfn_t spfn
, pgcnt_t pages
)
1009 extern char *cpr_compress_pages(cpd_t
*, pgcnt_t
, int);
1010 extern caddr_t i_cpr_storage_data_end
;
1011 uint_t remaining
, datalen
;
1019 * Fill next empty storage descriptor
1021 descp
= i_cpr_storage_desc_base
+ chunks
- 1;
1022 if (descp
>= i_cpr_storage_desc_end
) {
1023 CPR_DEBUG(CPR_DEBUG1
, "ran out of descriptors, base 0x%p, "
1024 "chunks %d, end 0x%p, descp 0x%p\n",
1025 (void *)i_cpr_storage_desc_base
, chunks
,
1026 (void *)i_cpr_storage_desc_end
, (void *)descp
);
1029 ASSERT(descp
->csd_dirty_spfn
== (uint_t
)-1);
1030 i_cpr_storage_desc_last_used
= descp
;
1032 descp
->csd_dirty_spfn
= spfn
;
1033 descp
->csd_dirty_npages
= pages
;
1035 i_cpr_mapin(CPR
->c_mapping_area
, pages
, spfn
);
1038 * try compressing pages and copy cpd fields
1039 * pfn is copied for debug use
1042 datap
= cpr_compress_pages(&cpd
, pages
, C_COMPRESSING
);
1043 datalen
= cpd
.cpd_length
;
1044 descp
->csd_clean_compressed
= (cpd
.cpd_flag
& CPD_COMPRESS
);
1046 descp
->csd_usum
= cpd
.cpd_usum
;
1047 descp
->csd_csum
= cpd
.cpd_csum
;
1053 * Save the raw or compressed data to the storage area pointed to by
1054 * sensitive_write_ptr. Make sure the storage space is big enough to
1055 * hold the result. Otherwise roll back to increase the storage space.
1057 descp
->csd_clean_sva
= (cpr_ptr
)sensitive_write_ptr
;
1058 descp
->csd_clean_sz
= datalen
;
1059 if ((sensitive_write_ptr
+ datalen
) < i_cpr_storage_data_end
) {
1060 extern void cprbcopy(void *, void *, size_t);
1062 cprbcopy(datap
, sensitive_write_ptr
, datalen
);
1063 sensitive_size_saved
+= datalen
;
1064 sensitive_pages_saved
+= descp
->csd_dirty_npages
;
1065 sensitive_write_ptr
+= datalen
;
1067 remaining
= (i_cpr_storage_data_end
- sensitive_write_ptr
);
1068 CPR_DEBUG(CPR_DEBUG1
, "i_cpr_compress_and_save: The storage "
1069 "space is too small!\ngot %d, want %d\n\n",
1070 remaining
, (remaining
+ datalen
));
1073 * Check to see if the content of the sensitive pages that we
1074 * just copied have changed during this small time window.
1076 test_usum
= checksum32(CPR
->c_mapping_area
, mmu_ptob(pages
));
1077 descp
->csd_usum
= cpd
.cpd_usum
;
1078 if (test_usum
!= descp
->csd_usum
) {
1079 CPR_DEBUG(CPR_DEBUG1
, "\nWARNING: "
1080 "i_cpr_compress_and_save: "
1081 "Data in the range of pfn 0x%lx to pfn "
1082 "0x%lx has changed after they are saved "
1083 "into storage.", spfn
, (spfn
+ pages
- 1));
1089 i_cpr_mapout(CPR
->c_mapping_area
, pages
);
1095 * This routine is derived from cpr_count_kpages().
1096 * It goes through kernel data nucleus and segkmem segments to select
1097 * pages in use and mark them in the corresponding bitmap.
1100 i_cpr_count_sensitive_kpages(int mapflag
, bitfunc_t bitfunc
)
1102 pgcnt_t kdata_cnt
= 0, segkmem_cnt
= 0;
1103 extern caddr_t e_moddata
;
1104 extern struct seg kvalloc
;
1105 extern struct seg kmem64
;
1109 * Kernel data nucleus pages
1111 size
= e_moddata
- s_data
;
1112 kdata_cnt
+= cpr_count_pages(s_data
, size
,
1113 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1116 * kvseg and kvalloc pages
1118 segkmem_cnt
+= cpr_scan_kvseg(mapflag
, bitfunc
, &kvseg
);
1119 segkmem_cnt
+= cpr_count_pages(kvalloc
.s_base
, kvalloc
.s_size
,
1120 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1122 /* segment to support kernel memory usage above 32-bit space (4GB) */
1124 segkmem_cnt
+= cpr_count_pages(kmem64
.s_base
, kmem64
.s_size
,
1125 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1127 CPR_DEBUG(CPR_DEBUG7
, "\ni_cpr_count_sensitive_kpages:\n"
1128 "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1129 kdata_cnt
, segkmem_cnt
, kdata_cnt
+ segkmem_cnt
);
1131 return (kdata_cnt
+ segkmem_cnt
);
1136 i_cpr_count_storage_pages(int mapflag
, bitfunc_t bitfunc
)
1140 if (i_cpr_storage_desc_base
) {
1141 count
+= cpr_count_pages((caddr_t
)i_cpr_storage_desc_base
,
1142 (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt
),
1143 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1145 if (i_cpr_storage_data_base
) {
1146 count
+= cpr_count_pages(i_cpr_storage_data_base
,
1147 (size_t)mmu_ptob(i_cpr_storage_data_sz
),
1148 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1155 * Derived from cpr_write_statefile().
1156 * Allocate (or reallocate after exhausting the supply) descriptors for each
1157 * chunk of contiguous sensitive kpages.
1160 i_cpr_storage_desc_alloc(csd_t
**basepp
, pgcnt_t
*pgsp
, csd_t
**endpp
,
1167 char *str
= "i_cpr_storage_desc_alloc:";
1170 * On initial allocation, add some extra to cover overhead caused
1171 * by the allocation for the storage area later.
1174 chunks
= cpr_contig_pages(NULL
, STORAGE_DESC_ALLOC
) +
1176 npages
= mmu_btopr(sizeof (**basepp
) * (pgcnt_t
)chunks
);
1177 CPR_DEBUG(CPR_DEBUG7
, "%s chunks %d, ", str
, chunks
);
1179 CPR_DEBUG(CPR_DEBUG7
, "%s retry %d: ", str
, retry
);
1182 /* Free old descriptors, if any */
1184 kmem_free((caddr_t
)*basepp
, mmu_ptob(*pgsp
));
1186 descp
= *basepp
= kmem_alloc(mmu_ptob(npages
), KM_NOSLEEP
);
1187 if (descp
== NULL
) {
1188 CPR_DEBUG(CPR_DEBUG7
, "%s no space for descriptors!\n", str
);
1193 len
= mmu_ptob(npages
);
1194 end
= *endpp
= descp
+ (len
/ (sizeof (**basepp
)));
1195 CPR_DEBUG(CPR_DEBUG7
, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1196 "%p, *endpp %p\n", npages
, len
, (len
/ (sizeof (**basepp
))),
1197 (void *)*basepp
, (void *)*endpp
);
1198 i_cpr_storage_desc_init(descp
, npages
, end
);
1203 i_cpr_storage_desc_init(csd_t
*descp
, pgcnt_t npages
, csd_t
*end
)
1205 size_t len
= mmu_ptob(npages
);
1207 /* Initialize the descriptors to something impossible. */
1211 * This condition is tested by an ASSERT
1213 for (; descp
< end
; descp
++)
1214 descp
->csd_dirty_spfn
= (uint_t
)-1;
1219 i_cpr_dump_sensitive_kpages(vnode_t
*vp
)
1222 uint_t spin_cnt
= 0;
1226 * These following two variables need to be reinitialized
1227 * for each cpr cycle.
1229 i_cpr_sensitive_bytes_dumped
= 0;
1230 i_cpr_sensitive_pgs_dumped
= 0;
1232 if (i_cpr_storage_desc_base
) {
1233 for (descp
= i_cpr_storage_desc_base
;
1234 descp
<= i_cpr_storage_desc_last_used
; descp
++) {
1235 if (error
= cpr_dump_sensitive(vp
, descp
))
1238 if ((spin_cnt
& 0x5F) == 1)
1244 CPR_DEBUG(CPR_DEBUG7
, "\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1245 i_cpr_sensitive_pgs_dumped
);
1251 * 1. Fill the cpr page descriptor with the info of the dirty pages
1253 * write the descriptor out. It will be used at resume.
1254 * 2. Write the clean data in stead of the dirty data out.
1255 * Note: to save space, the clean data is already compressed.
1258 cpr_dump_sensitive(vnode_t
*vp
, csd_t
*descp
)
1262 cpd_t cpd
; /* cpr page descriptor */
1264 pgcnt_t dirty_npages
;
1267 int clean_compressed
;
1268 extern uchar_t cpr_pagecopy
[];
1270 dirty_spfn
= descp
->csd_dirty_spfn
;
1271 dirty_npages
= descp
->csd_dirty_npages
;
1272 clean_sva
= (caddr_t
)descp
->csd_clean_sva
;
1273 clean_sz
= descp
->csd_clean_sz
;
1274 clean_compressed
= descp
->csd_clean_compressed
;
1276 /* Fill cpr page descriptor. */
1277 cpd
.cpd_magic
= (uint_t
)CPR_PAGE_MAGIC
;
1278 cpd
.cpd_pfn
= dirty_spfn
;
1279 cpd
.cpd_flag
= 0; /* must init to zero */
1280 cpd
.cpd_pages
= dirty_npages
;
1283 if ((cpd
.cpd_usum
= descp
->csd_usum
) != 0)
1284 cpd
.cpd_flag
|= CPD_USUM
;
1285 if ((cpd
.cpd_csum
= descp
->csd_csum
) != 0)
1286 cpd
.cpd_flag
|= CPD_CSUM
;
1289 STAT
->cs_dumped_statefsz
+= mmu_ptob(dirty_npages
);
1292 * The sensitive kpages are usually saved with compression
1293 * unless compression could not reduce the size of the data.
1294 * If user choose not to have the statefile compressed,
1295 * we need to decompress the data back before dumping it to disk.
1297 if (CPR
->c_flags
& C_COMPRESSING
) {
1298 cpd
.cpd_length
= clean_sz
;
1300 if (clean_compressed
)
1301 cpd
.cpd_flag
|= CPD_COMPRESS
;
1303 if (clean_compressed
) {
1304 cpd
.cpd_length
= decompress(clean_sva
, cpr_pagecopy
,
1305 clean_sz
, mmu_ptob(dirty_npages
));
1306 datap
= (caddr_t
)cpr_pagecopy
;
1307 ASSERT(cpd
.cpd_length
== mmu_ptob(dirty_npages
));
1309 cpd
.cpd_length
= clean_sz
;
1315 /* Write cpr page descriptor */
1316 error
= cpr_write(vp
, (caddr_t
)&cpd
, sizeof (cpd
));
1318 CPR_DEBUG(CPR_DEBUG7
, "descp: %p\n", (void *)descp
);
1320 debug_enter("cpr_dump_sensitive: cpr_write() page "
1321 "descriptor failed!\n");
1326 i_cpr_sensitive_bytes_dumped
+= sizeof (cpd_t
);
1328 /* Write page data */
1329 error
= cpr_write(vp
, (caddr_t
)datap
, cpd
.cpd_length
);
1331 CPR_DEBUG(CPR_DEBUG7
, "error: %x\n", error
);
1332 CPR_DEBUG(CPR_DEBUG7
, "descp: %p\n", (void *)descp
);
1333 CPR_DEBUG(CPR_DEBUG7
, "cpr_write(%p, %p , %lx)\n",
1334 (void *)vp
, (void *)datap
, cpd
.cpd_length
);
1336 debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1341 i_cpr_sensitive_bytes_dumped
+= cpd
.cpd_length
;
1342 i_cpr_sensitive_pgs_dumped
+= dirty_npages
;
1349 * Sanity check to make sure that we have dumped right amount
1350 * of pages from different sources to statefile.
1353 i_cpr_check_pgs_dumped(uint_t pgs_expected
, uint_t regular_pgs_dumped
)
1355 uint_t total_pgs_dumped
;
1357 total_pgs_dumped
= regular_pgs_dumped
+ i_cpr_sensitive_pgs_dumped
;
1359 CPR_DEBUG(CPR_DEBUG7
, "\ncheck_pgs: reg %d + sens %ld = %d, "
1360 "expect %d\n\n", regular_pgs_dumped
, i_cpr_sensitive_pgs_dumped
,
1361 total_pgs_dumped
, pgs_expected
);
1363 if (pgs_expected
== total_pgs_dumped
)
1371 i_cpr_reusefini(void)
1379 if (cpr_reusable_mode
)
1380 cpr_reusable_mode
= 0;
1382 if (rc
= cpr_open_deffile(FREAD
|FWRITE
, &vp
)) {
1384 cpr_err(CE_CONT
, "uadmin A_FREEZE AD_REUSEFINI "
1385 "(uadmin %d %d)\nmust be done with / mounted "
1386 "writeable.\n", A_FREEZE
, AD_REUSEFINI
);
1391 cdef
= kmem_alloc(sizeof (*cdef
), KM_SLEEP
);
1392 rc
= cpr_rdwr(UIO_READ
, vp
, cdef
, sizeof (*cdef
));
1395 cpr_err(CE_WARN
, "Failed reading %s, errno = %d",
1396 cpr_default_path
, rc
);
1397 } else if (cdef
->mini
.magic
!= CPR_DEFAULT_MAGIC
) {
1398 cpr_err(CE_WARN
, "bad magic number in %s, cannot restore "
1399 "prom values for %s", cpr_default_path
,
1400 cpr_enumerate_promprops(&bufp
, &size
));
1401 kmem_free(bufp
, size
);
1405 * clean up prom properties
1407 rc
= cpr_update_nvram(cdef
->props
);
1410 * invalidate the disk copy and turn off reusable
1412 cdef
->mini
.magic
= 0;
1413 cdef
->mini
.reusable
= 0;
1414 if (rc
= cpr_rdwr(UIO_WRITE
, vp
,
1415 &cdef
->mini
, sizeof (cdef
->mini
))) {
1416 cpr_err(CE_WARN
, "Failed writing %s, errno %d",
1417 cpr_default_path
, rc
);
1422 (void) VOP_CLOSE(vp
, FREAD
|FWRITE
, 1, (offset_t
)0, CRED(), NULL
);
1424 kmem_free(cdef
, sizeof (*cdef
));
1431 i_cpr_reuseinit(void)
1435 if (rc
= cpr_default_setup(1))
1439 * We need to validate default file
1441 rc
= cpr_validate_definfo(1);
1443 cpr_reusable_mode
= 1;
1444 else if (rc
== EROFS
) {
1445 cpr_err(CE_NOTE
, "reuseinit must be performed "
1446 "while / is mounted writeable");
1449 (void) cpr_default_setup(0);
1456 i_cpr_check_cprinfo(void)
1462 if (rc
= cpr_open_deffile(FREAD
, &vp
)) {
1464 cpr_err(CE_NOTE
, "cprinfo file does not "
1465 "exist. You must run 'uadmin %d %d' "
1466 "command while / is mounted writeable,\n"
1467 "then reboot and run 'uadmin %d %d' "
1468 "to create a reusable statefile",
1469 A_FREEZE
, AD_REUSEINIT
, A_FREEZE
, AD_REUSABLE
);
1473 rc
= cpr_rdwr(UIO_READ
, vp
, &mini
, sizeof (mini
));
1474 (void) VOP_CLOSE(vp
, FREAD
, 1, (offset_t
)0, CRED(), NULL
);
1478 cpr_err(CE_WARN
, "Failed reading %s, errno = %d",
1479 cpr_default_path
, rc
);
1480 } else if (mini
.magic
!= CPR_DEFAULT_MAGIC
) {
1481 cpr_err(CE_CONT
, "bad magic number in cprinfo file.\n"
1482 "You must run 'uadmin %d %d' while / is mounted "
1483 "writeable, then reboot and run 'uadmin %d %d' "
1484 "to create a reusable statefile\n",
1485 A_FREEZE
, AD_REUSEINIT
, A_FREEZE
, AD_REUSABLE
);
1494 i_cpr_reusable_supported(void)
1501 * find prom phys pages and alloc space for a tmp copy
1504 i_cpr_find_ppages(void)
1507 struct memlist
*pmem
;
1508 pgcnt_t npages
, pcnt
, scnt
, vcnt
;
1509 pfn_t ppn
, plast
, *dst
;
1512 cpr_clear_bitmaps();
1513 mapflag
= REGULAR_BITMAP
;
1516 * there should be a page_t for each phys page used by the kernel;
1517 * set a bit for each phys page not tracked by a page_t
1520 memlist_read_lock();
1521 for (pmem
= phys_install
; pmem
; pmem
= pmem
->ml_next
) {
1522 npages
= mmu_btop(pmem
->ml_size
);
1523 ppn
= mmu_btop(pmem
->ml_address
);
1524 for (plast
= ppn
+ npages
; ppn
< plast
; ppn
++) {
1525 if (page_numtopp_nolock(ppn
))
1527 (void) cpr_setbit(ppn
, mapflag
);
1531 memlist_read_unlock();
1534 * clear bits for phys pages in each segment
1536 scnt
= cpr_count_seg_pages(mapflag
, cpr_clrbit
);
1539 * set bits for phys pages referenced by the promvp vnode;
1540 * these pages are mostly comprised of forthdebug words
1543 for (pp
= promvp
.v_pages
; pp
; ) {
1544 if (cpr_setbit(pp
->p_offset
, mapflag
) == 0)
1547 if (pp
== promvp
.v_pages
)
1552 * total number of prom pages are:
1553 * (non-page_t pages - seg pages + vnode pages)
1555 ppage_count
= pcnt
- scnt
+ vcnt
;
1556 CPR_DEBUG(CPR_DEBUG1
,
1557 "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1558 pcnt
, scnt
, vcnt
, ppage_count
);
1561 * alloc array of pfn_t to store phys page list
1563 pphys_list_size
= ppage_count
* sizeof (pfn_t
);
1564 pphys_list
= kmem_alloc(pphys_list_size
, KM_NOSLEEP
);
1565 if (pphys_list
== NULL
) {
1566 cpr_err(CE_WARN
, "cannot alloc pphys_list");
1571 * phys pages referenced in the bitmap should be
1572 * those used by the prom; scan bitmap and save
1573 * a list of prom phys page numbers
1576 memlist_read_lock();
1577 for (pmem
= phys_install
; pmem
; pmem
= pmem
->ml_next
) {
1578 npages
= mmu_btop(pmem
->ml_size
);
1579 ppn
= mmu_btop(pmem
->ml_address
);
1580 for (plast
= ppn
+ npages
; ppn
< plast
; ppn
++) {
1581 if (cpr_isset(ppn
, mapflag
)) {
1582 ASSERT(dst
< (pphys_list
+ ppage_count
));
1587 memlist_read_unlock();
1590 * allocate space to store prom pages
1592 ppage_buf
= kmem_alloc(mmu_ptob(ppage_count
), KM_NOSLEEP
);
1593 if (ppage_buf
== NULL
) {
1594 kmem_free(pphys_list
, pphys_list_size
);
1596 cpr_err(CE_WARN
, "cannot alloc ppage_buf");
1605 * save prom pages to kmem pages
1608 i_cpr_save_ppages(void)
1610 pfn_t
*pphys
, *plast
;
1614 * map in each prom page and copy to a kmem page
1617 plast
= pphys_list
+ ppage_count
;
1618 for (pphys
= pphys_list
; pphys
< plast
; pphys
++) {
1619 i_cpr_mapin(cpr_vaddr
, 1, *pphys
);
1620 bcopy(cpr_vaddr
, dst
, MMU_PAGESIZE
);
1621 i_cpr_mapout(cpr_vaddr
, 1);
1622 dst
+= MMU_PAGESIZE
;
1625 CPR_DEBUG(CPR_DEBUG1
, "saved %ld prom pages\n", ppage_count
);
1630 * restore prom pages from kmem pages
1633 i_cpr_restore_ppages(void)
1635 pfn_t
*pphys
, *plast
;
1641 * map in each prom page and copy from a kmem page
1644 plast
= pphys_list
+ ppage_count
;
1645 for (pphys
= pphys_list
; pphys
< plast
; pphys
++) {
1646 i_cpr_mapin(cpr_vaddr
, 1, *pphys
);
1647 bcopy(src
, cpr_vaddr
, MMU_PAGESIZE
);
1648 i_cpr_mapout(cpr_vaddr
, 1);
1649 src
+= MMU_PAGESIZE
;
1654 CPR_DEBUG(CPR_DEBUG1
, "restored %ld prom pages\n", ppage_count
);
1659 * save/restore prom pages or free related allocs
1662 i_cpr_prom_pages(int action
)
1666 if (action
== CPR_PROM_SAVE
) {
1667 if (ppage_buf
== NULL
) {
1668 ASSERT(pphys_list
== NULL
);
1669 if (error
= i_cpr_find_ppages())
1671 i_cpr_save_ppages();
1673 } else if (action
== CPR_PROM_RESTORE
) {
1674 i_cpr_restore_ppages();
1675 } else if (action
== CPR_PROM_FREE
) {
1677 ASSERT(pphys_list_size
);
1678 kmem_free(pphys_list
, pphys_list_size
);
1680 pphys_list_size
= 0;
1683 ASSERT(ppage_count
);
1684 kmem_free(ppage_buf
, mmu_ptob(ppage_count
));
1685 CPR_DEBUG(CPR_DEBUG1
, "freed %ld prom pages\n",
1696 * record tlb data for the nucleus, bigktsb's, and the cpr module;
1697 * this data is later used by cprboot to install dtlb/itlb entries.
1698 * when we jump into the cpr module during the resume phase, those
1699 * mappings are needed until switching to the kernel trap table.
1700 * to make the dtte/itte info available during resume, we need
1701 * the info recorded prior to saving sensitive pages, otherwise
1702 * all the data would appear as NULLs.
1705 i_cpr_save_tlbinfo(void)
1710 * during resume - shortly after jumping into the cpr module,
1711 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1712 * index used for TSBs; skip is set so that any saved tte will
1713 * target other tlb offsets and prevent being lost during
1714 * resume. now scan the dtlb and save locked entries,
1715 * then add entries for the tmp stack / data page and the
1716 * cpr thread structure.
1718 cti
.dst
= m_info
.dtte
;
1719 cti
.tail
= cti
.dst
+ CPR_MAX_TLB
;
1720 cti
.reader
= dtlb_rd_entry
;
1722 cti
.filter
= i_cpr_lnb
;
1723 cti
.index
= cpunodes
[CPU
->cpu_id
].dtlb_size
- 1;
1725 if (utsb_dtlb_ttenum
!= -1)
1726 cti
.skip
= (1 << utsb_dtlb_ttenum
);
1728 if (utsb4m_dtlb_ttenum
!= -1)
1729 cti
.skip
|= (1 << utsb4m_dtlb_ttenum
);
1731 i_cpr_scan_tlb(&cti
);
1732 i_cpr_make_tte(&cti
, &i_cpr_data_page
, datava
);
1733 i_cpr_make_tte(&cti
, curthread
, datava
);
1736 * scan itlb and save locked entries; add an entry for
1737 * the first text page of the cpr module; cprboot will
1738 * jump to that page after restoring kernel pages.
1740 cti
.dst
= m_info
.itte
;
1741 cti
.tail
= cti
.dst
+ CPR_MAX_TLB
;
1742 cti
.reader
= itlb_rd_entry
;
1743 cti
.index
= cpunodes
[CPU
->cpu_id
].itlb_size
- 1;
1745 i_cpr_scan_tlb(&cti
);
1746 i_cpr_make_tte(&cti
, (void *)i_cpr_resume_setup
, textva
);
1752 i_cpr_dump_setup(vnode_t
*vp
)
1755 * zero out m_info and add info to dtte/itte arrays
1757 bzero(&m_info
, sizeof (m_info
));
1758 i_cpr_save_tlbinfo();
1764 i_cpr_is_supported(int sleeptype
)
1766 char es_prop
[] = "energystar-v2";
1769 extern int cpr_supported_override
;
1770 extern int cpr_platform_enable
;
1772 if (sleeptype
!= CPR_TODISK
)
1776 * The next statement tests if a specific platform has turned off
1779 if (cpr_supported_override
)
1783 * Do not inspect energystar-v* property if a platform has
1784 * specifically turned on cpr support
1786 if (cpr_platform_enable
)
1789 node
= prom_rootnode();
1790 if (prom_getproplen(node
, es_prop
) != -1)
1792 last
= strlen(es_prop
) - 1;
1793 es_prop
[last
] = '3';
1794 return (prom_getproplen(node
, es_prop
) != -1);
1799 * the actual size of the statefile data isn't known until after all the
1800 * compressed pages are written; even the inode size doesn't reflect the
1801 * data size since there are usually many extra fs blocks. for recording
1802 * the actual data size, the first sector of the statefile is copied to
1803 * a tmp buf, and the copy is later updated and flushed to disk.
1806 i_cpr_blockzero(char *base
, char **bufpp
, int *blkno
, vnode_t
*vp
)
1808 extern int cpr_flush_write(vnode_t
*);
1809 static char cpr_sector
[DEV_BSIZE
];
1810 cpr_ext bytes
, *dst
;
1813 * this routine is called after cdd_t and csu_md_t are copied
1814 * to cpr_buf; mini-hack alert: the save/update method creates
1815 * a dependency on the combined struct size being >= one sector
1816 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1817 * over 1K bytes and will probably grow with any changes.
1819 * copy when vp is NULL, flush when non-NULL
1822 ASSERT((*bufpp
- base
) >= DEV_BSIZE
);
1823 bcopy(base
, cpr_sector
, sizeof (cpr_sector
));
1826 bytes
= dbtob(*blkno
);
1827 dst
= &((cdd_t
*)cpr_sector
)->cdd_filesize
;
1828 bcopy(&bytes
, dst
, sizeof (bytes
));
1829 bcopy(cpr_sector
, base
, sizeof (cpr_sector
));
1830 *bufpp
= base
+ sizeof (cpr_sector
);
1831 *blkno
= cpr_statefile_offset();
1832 CPR_DEBUG(CPR_DEBUG1
, "statefile data size: %ld\n\n", bytes
);
1833 return (cpr_flush_write(vp
));
1839 * Allocate bitmaps according to the phys_install list.
1842 i_cpr_bitmap_setup(void)
1844 struct memlist
*pmem
;
1850 * The number of bitmap descriptors will be the count of
1851 * phys_install ranges plus 1 for a trailing NULL struct.
1854 for (pmem
= phys_install
; pmem
; pmem
= pmem
->ml_next
)
1857 if (cpr_nbitmaps
> (CPR_MAX_BMDESC
- 1)) {
1858 cpr_err(CE_WARN
, "too many physical memory ranges %d, max %d",
1859 cpr_nbitmaps
, CPR_MAX_BMDESC
- 1);
1863 /* Alloc an array of bitmap descriptors. */
1864 dp
= kmem_zalloc(cpr_nbitmaps
* sizeof (*dp
), KM_NOSLEEP
);
1869 tail
= dp
+ cpr_nbitmaps
;
1872 for (pmem
= phys_install
; pmem
; pmem
= pmem
->ml_next
) {
1873 size
= BITMAP_BYTES(pmem
->ml_size
);
1874 space
= kmem_zalloc(size
* 2, KM_NOSLEEP
);
1878 dp
->cbd_magic
= CPR_BITMAP_MAGIC
;
1879 dp
->cbd_spfn
= mmu_btop(pmem
->ml_address
);
1880 dp
->cbd_epfn
= mmu_btop(pmem
->ml_address
+ pmem
->ml_size
) - 1;
1881 dp
->cbd_size
= size
;
1882 dp
->cbd_reg_bitmap
= (cpr_ptr
)space
;
1883 dp
->cbd_vlt_bitmap
= (cpr_ptr
)((caddr_t
)space
+ size
);
1887 /* set magic for the last descriptor */
1888 ASSERT(dp
== (tail
- 1));
1889 dp
->cbd_magic
= CPR_BITMAP_MAGIC
;
1896 i_cpr_bitmap_cleanup(void)
1900 if (CPR
->c_bmda
== NULL
)
1902 for (dp
= CPR
->c_bmda
; dp
->cbd_size
; dp
++)
1903 kmem_free((void *)dp
->cbd_reg_bitmap
, dp
->cbd_size
* 2);
1904 kmem_free(CPR
->c_bmda
, cpr_nbitmaps
* sizeof (*CPR
->c_bmda
));
1911 * A "regular" and "volatile" bitmap are created for each range of
1912 * physical memory. The volatile maps are used to count and track pages
1913 * susceptible to heap corruption - caused by drivers that allocate mem
1914 * during VOP_DUMP(); the regular maps are used for all the other non-
1915 * susceptible pages. Before writing the bitmaps to the statefile,
1916 * each bitmap pair gets merged to simplify handling within cprboot.
1919 i_cpr_alloc_bitmaps(void)
1923 memlist_read_lock();
1924 err
= i_cpr_bitmap_setup();
1925 memlist_read_unlock();
1927 i_cpr_bitmap_cleanup();
1934 * Power down the system.
1937 i_cpr_power_down(int sleeptype
)
1940 char *wordexists
= "p\" power-off\" find nip swap l! ";
1941 char *req
= "power-off";
1943 ASSERT(sleeptype
== CPR_TODISK
);
1946 * is_defined has value -1 when defined
1948 prom_interpret(wordexists
, (uintptr_t)&is_defined
, 0, 0, 0, 0);
1950 CPR_DEBUG(CPR_DEBUG1
, "\ncpr: %s...\n", req
);
1951 prom_interpret(req
, 0, 0, 0, 0, 0);
1954 * Only returns if failed
1960 i_cpr_stop_other_cpus(void)
1966 * Save context for the specified CPU
1970 i_cpr_save_context(void *arg
)
1980 i_cpr_pre_resume_cpus(void)
1989 i_cpr_post_resume_cpus(void)
2001 i_cpr_alloc_cpus(void)
2009 i_cpr_free_cpus(void)
2015 i_cpr_save_configuration(dev_info_t
*dip
)
2018 * this is a no-op on sparc
2024 i_cpr_restore_configuration(dev_info_t
*dip
)
2027 * this is a no-op on sparc