5285 pass in cpu_pause_func via pause_cpus
[illumos-gate.git] / usr / src / uts / sun4u / os / cpr_impl.c
blob5825fe1ba1bd9161ce5b3dc405c0c40351e6c000
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Platform specific implementation code
30 #define SUNDDI_IMPL
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
36 #include <sys/mmu.h>
37 #include <vm/hat_sfmmu.h>
38 #include <sys/iommu.h>
39 #include <sys/scb.h>
40 #include <sys/cpuvar.h>
41 #include <sys/intreg.h>
42 #include <sys/pte.h>
43 #include <vm/hat.h>
44 #include <vm/page.h>
45 #include <vm/as.h>
46 #include <sys/cpr.h>
47 #include <sys/kmem.h>
48 #include <sys/clock.h>
49 #include <sys/kmem.h>
50 #include <sys/panic.h>
51 #include <vm/seg_kmem.h>
52 #include <sys/cpu_module.h>
53 #include <sys/callb.h>
54 #include <sys/machsystm.h>
55 #include <sys/vmsystm.h>
56 #include <sys/systm.h>
57 #include <sys/archsystm.h>
58 #include <sys/stack.h>
59 #include <sys/fs/ufs_fs.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/thread.h>
63 #include <vm/vm_dep.h>
65 extern void cpr_clear_bitmaps(void);
66 extern int cpr_setbit(pfn_t ppn, int mapflag);
67 extern int cpr_clrbit(pfn_t ppn, int mapflag);
68 extern pgcnt_t cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg);
69 extern pgcnt_t cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc);
70 extern void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
71 extern void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
73 static int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
74 static void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
75 static caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
76 static int cpr_dump_sensitive(vnode_t *, csd_t *);
77 static void i_cpr_clear_entries(uint64_t, uint64_t);
78 static void i_cpr_xcall(xcfunc_t);
80 void i_cpr_storage_free(void);
82 extern void *i_cpr_data_page;
83 extern int cpr_test_mode;
84 extern int cpr_nbitmaps;
85 extern char cpr_default_path[];
86 extern caddr_t textva, datava;
88 static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
89 caddr_t cpr_vaddr = NULL;
91 static uint_t sensitive_pages_saved;
92 static uint_t sensitive_size_saved;
94 caddr_t i_cpr_storage_data_base;
95 caddr_t i_cpr_storage_data_end;
96 csd_t *i_cpr_storage_desc_base;
97 csd_t *i_cpr_storage_desc_end; /* one byte beyond last used descp */
98 csd_t *i_cpr_storage_desc_last_used; /* last used descriptor */
99 caddr_t sensitive_write_ptr; /* position for next storage write */
101 size_t i_cpr_sensitive_bytes_dumped;
102 pgcnt_t i_cpr_sensitive_pgs_dumped;
103 pgcnt_t i_cpr_storage_data_sz; /* in pages */
104 pgcnt_t i_cpr_storage_desc_pgcnt; /* in pages */
106 ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
107 static csu_md_t m_info;
110 #define MAX_STORAGE_RETRY 3
111 #define MAX_STORAGE_ALLOC_RETRY 3
112 #define INITIAL_ALLOC_PCNT 40 /* starting allocation percentage */
113 #define INTEGRAL 100 /* to get 1% precision */
115 #define EXTRA_RATE 2 /* add EXTRA_RATE% extra space */
116 #define EXTRA_DESCS 10
118 #define CPR_NO_STORAGE_DESC 1
119 #define CPR_NO_STORAGE_DATA 2
121 #define CIF_SPLICE 0
122 #define CIF_UNLINK 1
126 * CPR miscellaneous support routines
128 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \
129 mode, 0600, vpp, CRCREAT, 0))
130 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \
131 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
132 (ssize_t *)NULL))
135 * definitions for saving/restoring prom pages
137 static void *ppage_buf;
138 static pgcnt_t ppage_count;
139 static pfn_t *pphys_list;
140 static size_t pphys_list_size;
142 typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
143 typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
146 * private struct for tlb handling
148 struct cpr_trans_info {
149 sutlb_t *dst;
150 sutlb_t *tail;
151 tlb_rw_t reader;
152 tlb_rw_t writer;
153 tlb_filter_t filter;
154 int index;
155 uint64_t skip; /* assumes TLB <= 64 locked entries */
157 typedef struct cpr_trans_info cti_t;
161 * special handling for tlb info
163 #define WITHIN_OFW(va) \
164 (((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
166 #define WITHIN_NUCLEUS(va, base) \
167 (((va) >= (base)) && \
168 (((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
170 #define IS_BIGKTSB(va) \
171 (enable_bigktsb && \
172 ((va) >= (uint64_t)ktsb_base) && \
173 ((va) < (uint64_t)(ktsb_base + ktsb_sz)))
177 * WARNING:
178 * the text from this file is linked to follow cpr_resume_setup.o;
179 * only add text between here and i_cpr_end_jumpback when it needs
180 * to be called during resume before we switch back to the kernel
181 * trap table. all the text in this range must fit within a page.
186 * each time a machine is reset, the prom uses an inconsistent set of phys
187 * pages and the cif cookie may differ as well. so prior to restoring the
188 * original prom, we have to use to use the new/tmp prom's translations
189 * when requesting prom services.
191 * cif_handler starts out as the original prom cookie, and that gets used
192 * by client_handler() to jump into the prom. here we splice-in a wrapper
193 * routine by writing cif_handler; client_handler() will now jump to the
194 * wrapper which switches the %tba to the new/tmp prom's trap table then
195 * jumps to the new cookie.
197 void
198 i_cpr_cif_setup(int action)
200 extern void *i_cpr_orig_cif, *cif_handler;
201 extern int i_cpr_cif_wrapper(void *);
204 * save the original cookie and change the current cookie to the
205 * wrapper routine. later we just restore the original cookie.
207 if (action == CIF_SPLICE) {
208 i_cpr_orig_cif = cif_handler;
209 cif_handler = (void *)i_cpr_cif_wrapper;
210 } else if (action == CIF_UNLINK)
211 cif_handler = i_cpr_orig_cif;
216 * launch slave cpus into kernel text, pause them,
217 * and restore the original prom pages
219 void
220 i_cpr_mp_setup(void)
222 extern void restart_other_cpu(int);
223 cpu_t *cp;
225 uint64_t kctx = kcontextreg;
228 * Do not allow setting page size codes in MMU primary context
229 * register while using cif wrapper. This is needed to work
230 * around OBP incorrect handling of this MMU register.
232 kcontextreg = 0;
235 * reset cpu_ready_set so x_calls work properly
237 CPUSET_ZERO(cpu_ready_set);
238 CPUSET_ADD(cpu_ready_set, getprocessorid());
241 * setup cif to use the cookie from the new/tmp prom
242 * and setup tmp handling for calling prom services.
244 i_cpr_cif_setup(CIF_SPLICE);
247 * at this point, only the nucleus and a few cpr pages are
248 * mapped in. once we switch to the kernel trap table,
249 * we can access the rest of kernel space.
251 prom_set_traptable(&trap_table);
253 if (ncpus > 1) {
254 sfmmu_init_tsbs();
256 mutex_enter(&cpu_lock);
258 * All of the slave cpus are not ready at this time,
259 * yet the cpu structures have various cpu_flags set;
260 * clear cpu_flags and mutex_ready.
261 * Since we are coming up from a CPU suspend, the slave cpus
262 * are frozen.
264 for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
265 cp->cpu_flags = CPU_FROZEN;
266 cp->cpu_m.mutex_ready = 0;
269 for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
270 restart_other_cpu(cp->cpu_id);
272 pause_cpus(NULL, NULL);
273 mutex_exit(&cpu_lock);
275 i_cpr_xcall(i_cpr_clear_entries);
276 } else
277 i_cpr_clear_entries(0, 0);
280 * now unlink the cif wrapper; WARNING: do not call any
281 * prom_xxx() routines until after prom pages are restored.
283 i_cpr_cif_setup(CIF_UNLINK);
285 (void) i_cpr_prom_pages(CPR_PROM_RESTORE);
287 /* allow setting page size codes in MMU primary context register */
288 kcontextreg = kctx;
293 * end marker for jumpback page;
294 * this symbol is used to check the size of i_cpr_resume_setup()
295 * and the above text. For simplicity, the Makefile needs to
296 * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
298 void
299 i_cpr_end_jumpback(void)
305 * scan tlb entries with reader; when valid entries are found,
306 * the filter routine will selectively save/clear them
308 static void
309 i_cpr_scan_tlb(cti_t *ctip)
311 uint64_t va_tag;
312 int tlb_index;
313 tte_t tte;
315 for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
316 (*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
317 if (va_tag && TTE_IS_VALID(&tte))
318 (*ctip->filter)(tlb_index, &tte, va_tag, ctip);
324 * filter for locked tlb entries that reference the text/data nucleus
325 * and any bigktsb's; these will be reinstalled by cprboot on all cpus
327 /* ARGSUSED */
328 static void
329 i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
331 cti_t *ctip;
334 * record tlb data at ctip->dst; the target tlb index starts
335 * at the highest tlb offset and moves towards 0. the prom
336 * reserves both dtlb and itlb index 0. any selected entry
337 * also gets marked to prevent being flushed during resume
339 if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
340 va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
341 ctip = ctrans;
342 while ((1 << ctip->index) & ctip->skip)
343 ctip->index--;
344 ASSERT(ctip->index > 0);
345 ASSERT(ctip->dst < ctip->tail);
346 ctip->dst->tte.ll = ttep->ll;
347 ctip->dst->va_tag = va_tag;
348 ctip->dst->index = ctip->index--;
349 ctip->dst->tmp = 0;
350 ctip->dst++;
356 * some tlb entries are stale, filter for unlocked entries
357 * within the prom virt range and clear them
359 static void
360 i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
362 sutlb_t clr;
363 cti_t *ctip;
365 if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
366 ctip = ctrans;
367 bzero(&clr, sizeof (clr));
368 (*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
374 * some of the entries installed by cprboot are needed only on a
375 * short-term basis and need to be flushed to avoid clogging the tlbs.
376 * scan the dtte/itte arrays for items marked as temporary and clear
377 * dtlb/itlb entries using wrfunc.
379 static void
380 i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
382 sutlb_t clr, *tail;
384 bzero(&clr, sizeof (clr));
385 for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
386 if (listp->tmp)
387 (*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
392 /* ARGSUSED */
393 static void
394 i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
396 extern void demap_all(void);
397 cti_t cti;
399 i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
400 i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
403 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
404 * a second label for vtag_flushall. the call is made using
405 * vtag_flushall() instead of demap_all() due to runtime and
406 * krtld results with both older and newer cpu modules.
408 if (&demap_all != 0) {
409 vtag_flushall();
410 return;
414 * for older V9 cpus, scan tlbs and clear stale entries
416 bzero(&cti, sizeof (cti));
417 cti.filter = i_cpr_ufw;
419 cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
420 cti.reader = dtlb_rd_entry;
421 cti.writer = dtlb_wr_entry;
422 i_cpr_scan_tlb(&cti);
424 cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
425 cti.reader = itlb_rd_entry;
426 cti.writer = itlb_wr_entry;
427 i_cpr_scan_tlb(&cti);
432 * craft tlb info for tmp use during resume; this data gets used by
433 * cprboot to install tlb entries. we also mark each struct as tmp
434 * so those tlb entries will get flushed after switching to the kernel
435 * trap table. no data needs to be recorded for vaddr when it falls
436 * within the nucleus since we've already recorded nucleus ttes and
437 * a 8K tte would conflict with a 4MB tte. eg: the cpr module
438 * text/data may have been loaded into the text/data nucleus.
440 static void
441 i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
443 pfn_t ppn;
444 uint_t rw;
446 if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
447 return;
449 while ((1 << ctip->index) & ctip->skip)
450 ctip->index--;
451 ASSERT(ctip->index > 0);
452 ASSERT(ctip->dst < ctip->tail);
455 * without any global service available to lookup
456 * a tte by vaddr, we craft our own here:
458 ppn = va_to_pfn(vaddr);
459 rw = (nbase == datava) ? TTE_HWWR_INT : 0;
460 ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
461 ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
462 TTE_CP_INT | TTE_PRIV_INT | rw;
463 ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
464 ctip->dst->index = ctip->index--;
465 ctip->dst->tmp = 1;
466 ctip->dst++;
470 static void
471 i_cpr_xcall(xcfunc_t func)
473 uint_t pil, reset_pil;
475 pil = getpil();
476 if (pil < XCALL_PIL)
477 reset_pil = 0;
478 else {
479 reset_pil = 1;
480 setpil(XCALL_PIL - 1);
482 xc_some(cpu_ready_set, func, 0, 0);
483 if (reset_pil)
484 setpil(pil);
489 * restart paused slave cpus
491 void
492 i_cpr_machdep_setup(void)
494 if (ncpus > 1) {
495 CPR_DEBUG(CPR_DEBUG1, "MP restarted...\n");
496 mutex_enter(&cpu_lock);
497 start_cpus();
498 mutex_exit(&cpu_lock);
504 * Stop all interrupt activities in the system
506 void
507 i_cpr_stop_intr(void)
509 (void) spl7();
513 * Set machine up to take interrupts
515 void
516 i_cpr_enable_intr(void)
518 (void) spl0();
523 * record cpu nodes and ids
525 static void
526 i_cpr_save_cpu_info(void)
528 struct sun4u_cpu_info *scip;
529 cpu_t *cp;
531 scip = m_info.sci;
532 cp = CPU;
533 do {
534 ASSERT(scip < &m_info.sci[NCPU]);
535 scip->cpu_id = cp->cpu_id;
536 scip->node = cpunodes[cp->cpu_id].nodeid;
537 scip++;
538 } while ((cp = cp->cpu_next) != CPU);
543 * Write necessary machine dependent information to cpr state file,
544 * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
547 i_cpr_write_machdep(vnode_t *vp)
549 extern uint_t getpstate(), getwstate();
550 extern uint_t i_cpr_tstack_size;
551 const char ustr[] = ": unix-tte 2drop false ;";
552 uintptr_t tinfo;
553 label_t *ltp;
554 cmd_t cmach;
555 char *fmt;
556 int rc;
559 * ustr[] is used as temporary forth words during
560 * slave startup sequence, see sfmmu_mp_startup()
563 cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
564 cmach.md_size = sizeof (m_info) + sizeof (ustr);
566 if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
567 cpr_err(CE_WARN, "Failed to write descriptor.");
568 return (rc);
572 * m_info is now cleared in i_cpr_dump_setup()
574 m_info.ksb = (uint32_t)STACK_BIAS;
575 m_info.kpstate = (uint16_t)getpstate();
576 m_info.kwstate = (uint16_t)getwstate();
577 CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
578 m_info.ksb, m_info.kpstate, m_info.kwstate);
580 ltp = &ttolwp(curthread)->lwp_qsav;
581 m_info.qsav_pc = (cpr_ext)ltp->val[0];
582 m_info.qsav_sp = (cpr_ext)ltp->val[1];
585 * Set secondary context to INVALID_CONTEXT to force the HAT
586 * to re-setup the MMU registers and locked TTEs it needs for
587 * TLB miss handling.
589 m_info.mmu_ctx_sec = INVALID_CONTEXT;
590 m_info.mmu_ctx_pri = KCONTEXT;
592 tinfo = (uintptr_t)curthread;
593 m_info.thrp = (cpr_ptr)tinfo;
595 tinfo = (uintptr_t)i_cpr_resume_setup;
596 m_info.func = (cpr_ptr)tinfo;
599 * i_cpr_data_page is comprised of a 4K stack area and a few
600 * trailing data symbols; the page is shared by the prom and
601 * kernel during resume. the stack size is recorded here
602 * and used by cprboot to set %sp
604 tinfo = (uintptr_t)&i_cpr_data_page;
605 m_info.tmp_stack = (cpr_ptr)tinfo;
606 m_info.tmp_stacksize = i_cpr_tstack_size;
608 m_info.test_mode = cpr_test_mode;
610 i_cpr_save_cpu_info();
612 if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
613 cpr_err(CE_WARN, "Failed to write machdep info.");
614 return (rc);
617 fmt = "error writing %s forth info";
618 if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
619 cpr_err(CE_WARN, fmt, "unix-tte");
621 return (rc);
626 * Save miscellaneous information which needs to be written to the
627 * state file. This information is required to re-initialize
628 * kernel/prom handshaking.
630 void
631 i_cpr_save_machdep_info(void)
633 CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n",
634 (uintptr_t)&i_cpr_end_jumpback -
635 (uintptr_t)i_cpr_resume_setup);
638 * Verify the jumpback code all falls in one page.
640 if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
641 ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
642 cpr_err(CE_PANIC, "jumpback code exceeds one page.");
647 * cpu0 should contain bootcpu info
649 cpu_t *
650 i_cpr_bootcpu(void)
652 return (&cpu0);
655 processorid_t
656 i_cpr_bootcpuid(void)
658 return (0);
662 * Return the virtual address of the mapping area
664 caddr_t
665 i_cpr_map_setup(void)
668 * Allocate a virtual memory range spanned by an hmeblk.
669 * This would be 8 hments or 64k bytes. Starting VA
670 * must be 64k (8-page) aligned.
672 cpr_vaddr = vmem_xalloc(heap_arena,
673 mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
674 0, 0, NULL, NULL, VM_NOSLEEP);
675 return (cpr_vaddr);
679 * create tmp locked tlb entries for a group of phys pages;
681 * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
682 * otherwise would fill up a tlb with locked entries
684 void
685 i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
687 tte_t tte;
688 extern pfn_t curthreadpfn;
689 extern int curthreadremapped;
691 curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
693 for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
694 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
695 tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
696 TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
697 sfmmu_dtlb_ld_kva(vaddr, &tte);
701 void
702 i_cpr_mapout(caddr_t vaddr, uint_t pages)
704 extern int curthreadremapped;
706 if (curthreadremapped && vaddr <= (caddr_t)curthread &&
707 (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
708 curthreadremapped = 0;
710 for (; pages--; vaddr += MMU_PAGESIZE)
711 vtag_flushpage(vaddr, (uint64_t)ksfmmup);
715 * We're done using the mapping area; release virtual space
717 void
718 i_cpr_map_destroy(void)
720 vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
721 cpr_vaddr = NULL;
724 /* ARGSUSED */
725 void
726 i_cpr_handle_xc(int flag)
732 * This function takes care of pages which are not in kas or need to be
733 * taken care of in a special way. For example, panicbuf pages are not
734 * in kas and their pages are allocated via prom_retain().
736 pgcnt_t
737 i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
739 struct cpr_map_info *pri, *tail;
740 pgcnt_t pages, total = 0;
741 pfn_t pfn;
744 * Save information about prom retained panicbuf pages
746 if (bitfunc == cpr_setbit) {
747 pri = &cpr_prom_retain[CPR_PANICBUF];
748 pri->virt = (cpr_ptr)panicbuf;
749 pri->phys = va_to_pa(panicbuf);
750 pri->size = sizeof (panicbuf);
754 * Go through the prom_retain array to tag those pages.
756 tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
757 for (pri = cpr_prom_retain; pri < tail; pri++) {
758 pages = mmu_btopr(pri->size);
759 for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
760 if (pf_is_memory(pfn)) {
761 if (bitfunc == cpr_setbit) {
762 if ((*bitfunc)(pfn, mapflag) == 0)
763 total++;
764 } else
765 total++;
770 return (total);
775 * Free up memory-related resources here. We start by freeing buffers
776 * allocated during suspend initialization. Also, free up the mapping
777 * resources allocated in cpr_init().
779 void
780 i_cpr_free_memory_resources(void)
782 (void) i_cpr_prom_pages(CPR_PROM_FREE);
783 i_cpr_map_destroy();
784 i_cpr_storage_free();
789 * Derived from cpr_write_statefile().
790 * Save the sensitive pages to the storage area and do bookkeeping
791 * using the sensitive descriptors. Each descriptor will contain no more
792 * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
793 * of pages that statefile gets written to disk at each write.
794 * XXX The CPR_MAXCONTIG can be changed to the size of the compression
795 * scratch area.
797 static int
798 i_cpr_save_to_storage(void)
800 sensitive_size_saved = 0;
801 sensitive_pages_saved = 0;
802 sensitive_write_ptr = i_cpr_storage_data_base;
803 return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
808 * This routine allocates space to save the sensitive kernel pages,
809 * i.e. kernel data nucleus, kvalloc and kvseg segments.
810 * It's assumed that those segments are the only areas that can be
811 * contaminated by memory allocations during statefile dumping.
812 * The space allocated here contains:
813 * A list of descriptors describing the saved sensitive pages.
814 * The storage area for saving the compressed sensitive kernel pages.
815 * Since storage pages are allocated from segkmem, they need to be
816 * excluded when saving.
819 i_cpr_save_sensitive_kpages(void)
821 static const char pages_fmt[] = "\n%s %s allocs\n"
822 " spages %ld, vpages %ld, diff %ld\n";
823 int retry_cnt;
824 int error = 0;
825 pgcnt_t pages, spages, vpages;
826 caddr_t addr;
827 char *str;
830 * Tag sensitive kpages. Allocate space for storage descriptors
831 * and storage data area based on the resulting bitmaps.
832 * Note: The storage space will be part of the sensitive
833 * segment, so we need to tag kpages here before the storage
834 * is actually allocated just so their space won't be accounted
835 * for. They will not be part of the statefile although those
836 * pages will be claimed by cprboot.
838 cpr_clear_bitmaps();
840 spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
841 vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
842 pages = spages - vpages;
844 str = "i_cpr_save_sensitive_kpages:";
845 CPR_DEBUG(CPR_DEBUG7, pages_fmt, "before", str, spages, vpages, pages);
848 * Allocate space to save the clean sensitive kpages
850 for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
852 * Alloc on first pass or realloc if we are retrying because
853 * of insufficient storage for sensitive pages
855 if (retry_cnt == 0 || error == ENOMEM) {
856 if (i_cpr_storage_data_base) {
857 kmem_free(i_cpr_storage_data_base,
858 mmu_ptob(i_cpr_storage_data_sz));
859 i_cpr_storage_data_base = NULL;
860 i_cpr_storage_data_sz = 0;
862 addr = i_cpr_storage_data_alloc(pages,
863 &i_cpr_storage_data_sz, retry_cnt);
864 if (addr == NULL) {
865 CPR_DEBUG(CPR_DEBUG7,
866 "\n%s can't allocate data storage space!\n",
867 str);
868 return (ENOMEM);
870 i_cpr_storage_data_base = addr;
871 i_cpr_storage_data_end =
872 addr + mmu_ptob(i_cpr_storage_data_sz);
876 * Allocate on first pass, only realloc if retry is because of
877 * insufficient descriptors, but reset contents on each pass
878 * (desc_alloc resets contents as well)
880 if (retry_cnt == 0 || error == -1) {
881 error = i_cpr_storage_desc_alloc(
882 &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
883 &i_cpr_storage_desc_end, retry_cnt);
884 if (error != 0)
885 return (error);
886 } else {
887 i_cpr_storage_desc_init(i_cpr_storage_desc_base,
888 i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
892 * We are ready to save the sensitive kpages to storage.
893 * We cannot trust what's tagged in the bitmaps anymore
894 * after storage allocations. Clear up the bitmaps and
895 * retag the sensitive kpages again. The storage pages
896 * should be untagged.
898 cpr_clear_bitmaps();
900 spages =
901 i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
902 vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
904 CPR_DEBUG(CPR_DEBUG7, pages_fmt, "after ", str,
905 spages, vpages, spages - vpages);
908 * Returns 0 on success, -1 if too few descriptors, and
909 * ENOMEM if not enough space to save sensitive pages
911 CPR_DEBUG(CPR_DEBUG1, "compressing pages to storage...\n");
912 error = i_cpr_save_to_storage();
913 if (error == 0) {
914 /* Saving to storage succeeded */
915 CPR_DEBUG(CPR_DEBUG1, "compressed %d pages\n",
916 sensitive_pages_saved);
917 break;
918 } else if (error == -1)
919 CPR_DEBUG(CPR_DEBUG1, "%s too few descriptors\n", str);
921 if (error == -1)
922 error = ENOMEM;
923 return (error);
928 * Estimate how much memory we will need to save
929 * the sensitive pages with compression.
931 static caddr_t
932 i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
934 pgcnt_t alloc_pcnt, last_pcnt;
935 caddr_t addr;
936 char *str;
938 str = "i_cpr_storage_data_alloc:";
939 if (retry_cnt == 0) {
941 * common compression ratio is about 3:1
942 * initial storage allocation is estimated at 40%
943 * to cover the majority of cases
945 alloc_pcnt = INITIAL_ALLOC_PCNT;
946 *alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
947 CPR_DEBUG(CPR_DEBUG7, "%s sensitive pages: %ld\n", str, pages);
948 CPR_DEBUG(CPR_DEBUG7,
949 "%s initial est pages: %ld, alloc %ld%%\n",
950 str, *alloc_pages, alloc_pcnt);
951 } else {
953 * calculate the prior compression percentage (x100)
954 * from the last attempt to save sensitive pages
956 ASSERT(sensitive_pages_saved != 0);
957 last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
958 sensitive_pages_saved;
959 CPR_DEBUG(CPR_DEBUG7, "%s last ratio %ld%%\n", str, last_pcnt);
962 * new estimated storage size is based on
963 * the larger ratio + 5% for each retry:
964 * pages * (last + [5%, 10%])
966 alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
967 (retry_cnt * 5);
968 *alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
969 CPR_DEBUG(CPR_DEBUG7, "%s Retry est pages: %ld, alloc %ld%%\n",
970 str, *alloc_pages, alloc_pcnt);
973 addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
974 CPR_DEBUG(CPR_DEBUG7, "%s alloc %ld pages\n", str, *alloc_pages);
975 return (addr);
979 void
980 i_cpr_storage_free(void)
982 /* Free descriptors */
983 if (i_cpr_storage_desc_base) {
984 kmem_free(i_cpr_storage_desc_base,
985 mmu_ptob(i_cpr_storage_desc_pgcnt));
986 i_cpr_storage_desc_base = NULL;
987 i_cpr_storage_desc_pgcnt = 0;
991 /* Data storage */
992 if (i_cpr_storage_data_base) {
993 kmem_free(i_cpr_storage_data_base,
994 mmu_ptob(i_cpr_storage_data_sz));
995 i_cpr_storage_data_base = NULL;
996 i_cpr_storage_data_sz = 0;
1002 * This routine is derived from cpr_compress_and_write().
1003 * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1004 * 2. Compress and save the clean sensitive pages into the storage area.
1007 i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1009 extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1010 extern caddr_t i_cpr_storage_data_end;
1011 uint_t remaining, datalen;
1012 uint32_t test_usum;
1013 char *datap;
1014 csd_t *descp;
1015 cpd_t cpd;
1016 int error;
1019 * Fill next empty storage descriptor
1021 descp = i_cpr_storage_desc_base + chunks - 1;
1022 if (descp >= i_cpr_storage_desc_end) {
1023 CPR_DEBUG(CPR_DEBUG1, "ran out of descriptors, base 0x%p, "
1024 "chunks %d, end 0x%p, descp 0x%p\n",
1025 (void *)i_cpr_storage_desc_base, chunks,
1026 (void *)i_cpr_storage_desc_end, (void *)descp);
1027 return (-1);
1029 ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1030 i_cpr_storage_desc_last_used = descp;
1032 descp->csd_dirty_spfn = spfn;
1033 descp->csd_dirty_npages = pages;
1035 i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1038 * try compressing pages and copy cpd fields
1039 * pfn is copied for debug use
1041 cpd.cpd_pfn = spfn;
1042 datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1043 datalen = cpd.cpd_length;
1044 descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1045 #ifdef DEBUG
1046 descp->csd_usum = cpd.cpd_usum;
1047 descp->csd_csum = cpd.cpd_csum;
1048 #endif
1050 error = 0;
1053 * Save the raw or compressed data to the storage area pointed to by
1054 * sensitive_write_ptr. Make sure the storage space is big enough to
1055 * hold the result. Otherwise roll back to increase the storage space.
1057 descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1058 descp->csd_clean_sz = datalen;
1059 if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1060 extern void cprbcopy(void *, void *, size_t);
1062 cprbcopy(datap, sensitive_write_ptr, datalen);
1063 sensitive_size_saved += datalen;
1064 sensitive_pages_saved += descp->csd_dirty_npages;
1065 sensitive_write_ptr += datalen;
1066 } else {
1067 remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1068 CPR_DEBUG(CPR_DEBUG1, "i_cpr_compress_and_save: The storage "
1069 "space is too small!\ngot %d, want %d\n\n",
1070 remaining, (remaining + datalen));
1071 #ifdef DEBUG
1073 * Check to see if the content of the sensitive pages that we
1074 * just copied have changed during this small time window.
1076 test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1077 descp->csd_usum = cpd.cpd_usum;
1078 if (test_usum != descp->csd_usum) {
1079 CPR_DEBUG(CPR_DEBUG1, "\nWARNING: "
1080 "i_cpr_compress_and_save: "
1081 "Data in the range of pfn 0x%lx to pfn "
1082 "0x%lx has changed after they are saved "
1083 "into storage.", spfn, (spfn + pages - 1));
1085 #endif
1086 error = ENOMEM;
1089 i_cpr_mapout(CPR->c_mapping_area, pages);
1090 return (error);
1095 * This routine is derived from cpr_count_kpages().
1096 * It goes through kernel data nucleus and segkmem segments to select
1097 * pages in use and mark them in the corresponding bitmap.
1099 pgcnt_t
1100 i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1102 pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1103 extern caddr_t e_moddata;
1104 extern struct seg kvalloc;
1105 extern struct seg kmem64;
1106 size_t size;
1109 * Kernel data nucleus pages
1111 size = e_moddata - s_data;
1112 kdata_cnt += cpr_count_pages(s_data, size,
1113 mapflag, bitfunc, DBG_SHOWRANGE);
1116 * kvseg and kvalloc pages
1118 segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1119 segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1120 mapflag, bitfunc, DBG_SHOWRANGE);
1122 /* segment to support kernel memory usage above 32-bit space (4GB) */
1123 if (kmem64.s_base)
1124 segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1125 mapflag, bitfunc, DBG_SHOWRANGE);
1127 CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_count_sensitive_kpages:\n"
1128 "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1129 kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt);
1131 return (kdata_cnt + segkmem_cnt);
1135 pgcnt_t
1136 i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1138 pgcnt_t count = 0;
1140 if (i_cpr_storage_desc_base) {
1141 count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1142 (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1143 mapflag, bitfunc, DBG_SHOWRANGE);
1145 if (i_cpr_storage_data_base) {
1146 count += cpr_count_pages(i_cpr_storage_data_base,
1147 (size_t)mmu_ptob(i_cpr_storage_data_sz),
1148 mapflag, bitfunc, DBG_SHOWRANGE);
1150 return (count);
1155 * Derived from cpr_write_statefile().
1156 * Allocate (or reallocate after exhausting the supply) descriptors for each
1157 * chunk of contiguous sensitive kpages.
1159 static int
1160 i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1161 int retry)
1163 pgcnt_t npages;
1164 int chunks;
1165 csd_t *descp, *end;
1166 size_t len;
1167 char *str = "i_cpr_storage_desc_alloc:";
1170 * On initial allocation, add some extra to cover overhead caused
1171 * by the allocation for the storage area later.
1173 if (retry == 0) {
1174 chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1175 EXTRA_DESCS;
1176 npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1177 CPR_DEBUG(CPR_DEBUG7, "%s chunks %d, ", str, chunks);
1178 } else {
1179 CPR_DEBUG(CPR_DEBUG7, "%s retry %d: ", str, retry);
1180 npages = *pgsp + 1;
1182 /* Free old descriptors, if any */
1183 if (*basepp)
1184 kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1186 descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1187 if (descp == NULL) {
1188 CPR_DEBUG(CPR_DEBUG7, "%s no space for descriptors!\n", str);
1189 return (ENOMEM);
1192 *pgsp = npages;
1193 len = mmu_ptob(npages);
1194 end = *endpp = descp + (len / (sizeof (**basepp)));
1195 CPR_DEBUG(CPR_DEBUG7, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1196 "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1197 (void *)*basepp, (void *)*endpp);
1198 i_cpr_storage_desc_init(descp, npages, end);
1199 return (0);
1202 static void
1203 i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1205 size_t len = mmu_ptob(npages);
1207 /* Initialize the descriptors to something impossible. */
1208 bzero(descp, len);
1209 #ifdef DEBUG
1211 * This condition is tested by an ASSERT
1213 for (; descp < end; descp++)
1214 descp->csd_dirty_spfn = (uint_t)-1;
1215 #endif
1219 i_cpr_dump_sensitive_kpages(vnode_t *vp)
1221 int error = 0;
1222 uint_t spin_cnt = 0;
1223 csd_t *descp;
1226 * These following two variables need to be reinitialized
1227 * for each cpr cycle.
1229 i_cpr_sensitive_bytes_dumped = 0;
1230 i_cpr_sensitive_pgs_dumped = 0;
1232 if (i_cpr_storage_desc_base) {
1233 for (descp = i_cpr_storage_desc_base;
1234 descp <= i_cpr_storage_desc_last_used; descp++) {
1235 if (error = cpr_dump_sensitive(vp, descp))
1236 return (error);
1237 spin_cnt++;
1238 if ((spin_cnt & 0x5F) == 1)
1239 cpr_spinning_bar();
1241 prom_printf(" \b");
1244 CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1245 i_cpr_sensitive_pgs_dumped);
1246 return (0);
1251 * 1. Fill the cpr page descriptor with the info of the dirty pages
1252 * and
1253 * write the descriptor out. It will be used at resume.
1254 * 2. Write the clean data in stead of the dirty data out.
1255 * Note: to save space, the clean data is already compressed.
1257 static int
1258 cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1260 int error = 0;
1261 caddr_t datap;
1262 cpd_t cpd; /* cpr page descriptor */
1263 pfn_t dirty_spfn;
1264 pgcnt_t dirty_npages;
1265 size_t clean_sz;
1266 caddr_t clean_sva;
1267 int clean_compressed;
1268 extern uchar_t cpr_pagecopy[];
1270 dirty_spfn = descp->csd_dirty_spfn;
1271 dirty_npages = descp->csd_dirty_npages;
1272 clean_sva = (caddr_t)descp->csd_clean_sva;
1273 clean_sz = descp->csd_clean_sz;
1274 clean_compressed = descp->csd_clean_compressed;
1276 /* Fill cpr page descriptor. */
1277 cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1278 cpd.cpd_pfn = dirty_spfn;
1279 cpd.cpd_flag = 0; /* must init to zero */
1280 cpd.cpd_pages = dirty_npages;
1282 #ifdef DEBUG
1283 if ((cpd.cpd_usum = descp->csd_usum) != 0)
1284 cpd.cpd_flag |= CPD_USUM;
1285 if ((cpd.cpd_csum = descp->csd_csum) != 0)
1286 cpd.cpd_flag |= CPD_CSUM;
1287 #endif
1289 STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1292 * The sensitive kpages are usually saved with compression
1293 * unless compression could not reduce the size of the data.
1294 * If user choose not to have the statefile compressed,
1295 * we need to decompress the data back before dumping it to disk.
1297 if (CPR->c_flags & C_COMPRESSING) {
1298 cpd.cpd_length = clean_sz;
1299 datap = clean_sva;
1300 if (clean_compressed)
1301 cpd.cpd_flag |= CPD_COMPRESS;
1302 } else {
1303 if (clean_compressed) {
1304 cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1305 clean_sz, mmu_ptob(dirty_npages));
1306 datap = (caddr_t)cpr_pagecopy;
1307 ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1308 } else {
1309 cpd.cpd_length = clean_sz;
1310 datap = clean_sva;
1312 cpd.cpd_csum = 0;
1315 /* Write cpr page descriptor */
1316 error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1317 if (error) {
1318 CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1319 #ifdef DEBUG
1320 debug_enter("cpr_dump_sensitive: cpr_write() page "
1321 "descriptor failed!\n");
1322 #endif
1323 return (error);
1326 i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1328 /* Write page data */
1329 error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1330 if (error) {
1331 CPR_DEBUG(CPR_DEBUG7, "error: %x\n", error);
1332 CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1333 CPR_DEBUG(CPR_DEBUG7, "cpr_write(%p, %p , %lx)\n",
1334 (void *)vp, (void *)datap, cpd.cpd_length);
1335 #ifdef DEBUG
1336 debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1337 #endif
1338 return (error);
1341 i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1342 i_cpr_sensitive_pgs_dumped += dirty_npages;
1344 return (error);
1349 * Sanity check to make sure that we have dumped right amount
1350 * of pages from different sources to statefile.
1353 i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1355 uint_t total_pgs_dumped;
1357 total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1359 CPR_DEBUG(CPR_DEBUG7, "\ncheck_pgs: reg %d + sens %ld = %d, "
1360 "expect %d\n\n", regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1361 total_pgs_dumped, pgs_expected);
1363 if (pgs_expected == total_pgs_dumped)
1364 return (0);
1366 return (EINVAL);
1371 i_cpr_reusefini(void)
1373 struct vnode *vp;
1374 cdef_t *cdef;
1375 size_t size;
1376 char *bufp;
1377 int rc;
1379 if (cpr_reusable_mode)
1380 cpr_reusable_mode = 0;
1382 if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1383 if (rc == EROFS) {
1384 cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1385 "(uadmin %d %d)\nmust be done with / mounted "
1386 "writeable.\n", A_FREEZE, AD_REUSEFINI);
1388 return (rc);
1391 cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1392 rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1394 if (rc) {
1395 cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1396 cpr_default_path, rc);
1397 } else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1398 cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1399 "prom values for %s", cpr_default_path,
1400 cpr_enumerate_promprops(&bufp, &size));
1401 kmem_free(bufp, size);
1402 rc = EINVAL;
1403 } else {
1405 * clean up prom properties
1407 rc = cpr_update_nvram(cdef->props);
1408 if (rc == 0) {
1410 * invalidate the disk copy and turn off reusable
1412 cdef->mini.magic = 0;
1413 cdef->mini.reusable = 0;
1414 if (rc = cpr_rdwr(UIO_WRITE, vp,
1415 &cdef->mini, sizeof (cdef->mini))) {
1416 cpr_err(CE_WARN, "Failed writing %s, errno %d",
1417 cpr_default_path, rc);
1422 (void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL);
1423 VN_RELE(vp);
1424 kmem_free(cdef, sizeof (*cdef));
1426 return (rc);
1431 i_cpr_reuseinit(void)
1433 int rc = 0;
1435 if (rc = cpr_default_setup(1))
1436 return (rc);
1439 * We need to validate default file
1441 rc = cpr_validate_definfo(1);
1442 if (rc == 0)
1443 cpr_reusable_mode = 1;
1444 else if (rc == EROFS) {
1445 cpr_err(CE_NOTE, "reuseinit must be performed "
1446 "while / is mounted writeable");
1449 (void) cpr_default_setup(0);
1451 return (rc);
1456 i_cpr_check_cprinfo(void)
1458 struct vnode *vp;
1459 cmini_t mini;
1460 int rc = 0;
1462 if (rc = cpr_open_deffile(FREAD, &vp)) {
1463 if (rc == ENOENT)
1464 cpr_err(CE_NOTE, "cprinfo file does not "
1465 "exist. You must run 'uadmin %d %d' "
1466 "command while / is mounted writeable,\n"
1467 "then reboot and run 'uadmin %d %d' "
1468 "to create a reusable statefile",
1469 A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1470 return (rc);
1473 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1474 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
1475 VN_RELE(vp);
1477 if (rc) {
1478 cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1479 cpr_default_path, rc);
1480 } else if (mini.magic != CPR_DEFAULT_MAGIC) {
1481 cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1482 "You must run 'uadmin %d %d' while / is mounted "
1483 "writeable, then reboot and run 'uadmin %d %d' "
1484 "to create a reusable statefile\n",
1485 A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1486 rc = EINVAL;
1489 return (rc);
1494 i_cpr_reusable_supported(void)
1496 return (1);
1501 * find prom phys pages and alloc space for a tmp copy
1503 static int
1504 i_cpr_find_ppages(void)
1506 struct page *pp;
1507 struct memlist *pmem;
1508 pgcnt_t npages, pcnt, scnt, vcnt;
1509 pfn_t ppn, plast, *dst;
1510 int mapflag;
1512 cpr_clear_bitmaps();
1513 mapflag = REGULAR_BITMAP;
1516 * there should be a page_t for each phys page used by the kernel;
1517 * set a bit for each phys page not tracked by a page_t
1519 pcnt = 0;
1520 memlist_read_lock();
1521 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1522 npages = mmu_btop(pmem->ml_size);
1523 ppn = mmu_btop(pmem->ml_address);
1524 for (plast = ppn + npages; ppn < plast; ppn++) {
1525 if (page_numtopp_nolock(ppn))
1526 continue;
1527 (void) cpr_setbit(ppn, mapflag);
1528 pcnt++;
1531 memlist_read_unlock();
1534 * clear bits for phys pages in each segment
1536 scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1539 * set bits for phys pages referenced by the promvp vnode;
1540 * these pages are mostly comprised of forthdebug words
1542 vcnt = 0;
1543 for (pp = promvp.v_pages; pp; ) {
1544 if (cpr_setbit(pp->p_offset, mapflag) == 0)
1545 vcnt++;
1546 pp = pp->p_vpnext;
1547 if (pp == promvp.v_pages)
1548 break;
1552 * total number of prom pages are:
1553 * (non-page_t pages - seg pages + vnode pages)
1555 ppage_count = pcnt - scnt + vcnt;
1556 CPR_DEBUG(CPR_DEBUG1,
1557 "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1558 pcnt, scnt, vcnt, ppage_count);
1561 * alloc array of pfn_t to store phys page list
1563 pphys_list_size = ppage_count * sizeof (pfn_t);
1564 pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1565 if (pphys_list == NULL) {
1566 cpr_err(CE_WARN, "cannot alloc pphys_list");
1567 return (ENOMEM);
1571 * phys pages referenced in the bitmap should be
1572 * those used by the prom; scan bitmap and save
1573 * a list of prom phys page numbers
1575 dst = pphys_list;
1576 memlist_read_lock();
1577 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1578 npages = mmu_btop(pmem->ml_size);
1579 ppn = mmu_btop(pmem->ml_address);
1580 for (plast = ppn + npages; ppn < plast; ppn++) {
1581 if (cpr_isset(ppn, mapflag)) {
1582 ASSERT(dst < (pphys_list + ppage_count));
1583 *dst++ = ppn;
1587 memlist_read_unlock();
1590 * allocate space to store prom pages
1592 ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1593 if (ppage_buf == NULL) {
1594 kmem_free(pphys_list, pphys_list_size);
1595 pphys_list = NULL;
1596 cpr_err(CE_WARN, "cannot alloc ppage_buf");
1597 return (ENOMEM);
1600 return (0);
1605 * save prom pages to kmem pages
1607 static void
1608 i_cpr_save_ppages(void)
1610 pfn_t *pphys, *plast;
1611 caddr_t dst;
1614 * map in each prom page and copy to a kmem page
1616 dst = ppage_buf;
1617 plast = pphys_list + ppage_count;
1618 for (pphys = pphys_list; pphys < plast; pphys++) {
1619 i_cpr_mapin(cpr_vaddr, 1, *pphys);
1620 bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1621 i_cpr_mapout(cpr_vaddr, 1);
1622 dst += MMU_PAGESIZE;
1625 CPR_DEBUG(CPR_DEBUG1, "saved %ld prom pages\n", ppage_count);
1630 * restore prom pages from kmem pages
1632 static void
1633 i_cpr_restore_ppages(void)
1635 pfn_t *pphys, *plast;
1636 caddr_t src;
1638 dcache_flushall();
1641 * map in each prom page and copy from a kmem page
1643 src = ppage_buf;
1644 plast = pphys_list + ppage_count;
1645 for (pphys = pphys_list; pphys < plast; pphys++) {
1646 i_cpr_mapin(cpr_vaddr, 1, *pphys);
1647 bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1648 i_cpr_mapout(cpr_vaddr, 1);
1649 src += MMU_PAGESIZE;
1652 dcache_flushall();
1654 CPR_DEBUG(CPR_DEBUG1, "restored %ld prom pages\n", ppage_count);
1659 * save/restore prom pages or free related allocs
1662 i_cpr_prom_pages(int action)
1664 int error;
1666 if (action == CPR_PROM_SAVE) {
1667 if (ppage_buf == NULL) {
1668 ASSERT(pphys_list == NULL);
1669 if (error = i_cpr_find_ppages())
1670 return (error);
1671 i_cpr_save_ppages();
1673 } else if (action == CPR_PROM_RESTORE) {
1674 i_cpr_restore_ppages();
1675 } else if (action == CPR_PROM_FREE) {
1676 if (pphys_list) {
1677 ASSERT(pphys_list_size);
1678 kmem_free(pphys_list, pphys_list_size);
1679 pphys_list = NULL;
1680 pphys_list_size = 0;
1682 if (ppage_buf) {
1683 ASSERT(ppage_count);
1684 kmem_free(ppage_buf, mmu_ptob(ppage_count));
1685 CPR_DEBUG(CPR_DEBUG1, "freed %ld prom pages\n",
1686 ppage_count);
1687 ppage_buf = NULL;
1688 ppage_count = 0;
1691 return (0);
1696 * record tlb data for the nucleus, bigktsb's, and the cpr module;
1697 * this data is later used by cprboot to install dtlb/itlb entries.
1698 * when we jump into the cpr module during the resume phase, those
1699 * mappings are needed until switching to the kernel trap table.
1700 * to make the dtte/itte info available during resume, we need
1701 * the info recorded prior to saving sensitive pages, otherwise
1702 * all the data would appear as NULLs.
1704 static void
1705 i_cpr_save_tlbinfo(void)
1707 cti_t cti = {0};
1710 * during resume - shortly after jumping into the cpr module,
1711 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1712 * index used for TSBs; skip is set so that any saved tte will
1713 * target other tlb offsets and prevent being lost during
1714 * resume. now scan the dtlb and save locked entries,
1715 * then add entries for the tmp stack / data page and the
1716 * cpr thread structure.
1718 cti.dst = m_info.dtte;
1719 cti.tail = cti.dst + CPR_MAX_TLB;
1720 cti.reader = dtlb_rd_entry;
1721 cti.writer = NULL;
1722 cti.filter = i_cpr_lnb;
1723 cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1725 if (utsb_dtlb_ttenum != -1)
1726 cti.skip = (1 << utsb_dtlb_ttenum);
1728 if (utsb4m_dtlb_ttenum != -1)
1729 cti.skip |= (1 << utsb4m_dtlb_ttenum);
1731 i_cpr_scan_tlb(&cti);
1732 i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1733 i_cpr_make_tte(&cti, curthread, datava);
1736 * scan itlb and save locked entries; add an entry for
1737 * the first text page of the cpr module; cprboot will
1738 * jump to that page after restoring kernel pages.
1740 cti.dst = m_info.itte;
1741 cti.tail = cti.dst + CPR_MAX_TLB;
1742 cti.reader = itlb_rd_entry;
1743 cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1744 cti.skip = 0;
1745 i_cpr_scan_tlb(&cti);
1746 i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1750 /* ARGSUSED */
1752 i_cpr_dump_setup(vnode_t *vp)
1755 * zero out m_info and add info to dtte/itte arrays
1757 bzero(&m_info, sizeof (m_info));
1758 i_cpr_save_tlbinfo();
1759 return (0);
1764 i_cpr_is_supported(int sleeptype)
1766 char es_prop[] = "energystar-v2";
1767 pnode_t node;
1768 int last;
1769 extern int cpr_supported_override;
1770 extern int cpr_platform_enable;
1772 if (sleeptype != CPR_TODISK)
1773 return (0);
1776 * The next statement tests if a specific platform has turned off
1777 * cpr support.
1779 if (cpr_supported_override)
1780 return (0);
1783 * Do not inspect energystar-v* property if a platform has
1784 * specifically turned on cpr support
1786 if (cpr_platform_enable)
1787 return (1);
1789 node = prom_rootnode();
1790 if (prom_getproplen(node, es_prop) != -1)
1791 return (1);
1792 last = strlen(es_prop) - 1;
1793 es_prop[last] = '3';
1794 return (prom_getproplen(node, es_prop) != -1);
1799 * the actual size of the statefile data isn't known until after all the
1800 * compressed pages are written; even the inode size doesn't reflect the
1801 * data size since there are usually many extra fs blocks. for recording
1802 * the actual data size, the first sector of the statefile is copied to
1803 * a tmp buf, and the copy is later updated and flushed to disk.
1806 i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1808 extern int cpr_flush_write(vnode_t *);
1809 static char cpr_sector[DEV_BSIZE];
1810 cpr_ext bytes, *dst;
1813 * this routine is called after cdd_t and csu_md_t are copied
1814 * to cpr_buf; mini-hack alert: the save/update method creates
1815 * a dependency on the combined struct size being >= one sector
1816 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1817 * over 1K bytes and will probably grow with any changes.
1819 * copy when vp is NULL, flush when non-NULL
1821 if (vp == NULL) {
1822 ASSERT((*bufpp - base) >= DEV_BSIZE);
1823 bcopy(base, cpr_sector, sizeof (cpr_sector));
1824 return (0);
1825 } else {
1826 bytes = dbtob(*blkno);
1827 dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1828 bcopy(&bytes, dst, sizeof (bytes));
1829 bcopy(cpr_sector, base, sizeof (cpr_sector));
1830 *bufpp = base + sizeof (cpr_sector);
1831 *blkno = cpr_statefile_offset();
1832 CPR_DEBUG(CPR_DEBUG1, "statefile data size: %ld\n\n", bytes);
1833 return (cpr_flush_write(vp));
1839 * Allocate bitmaps according to the phys_install list.
1841 static int
1842 i_cpr_bitmap_setup(void)
1844 struct memlist *pmem;
1845 cbd_t *dp, *tail;
1846 void *space;
1847 size_t size;
1850 * The number of bitmap descriptors will be the count of
1851 * phys_install ranges plus 1 for a trailing NULL struct.
1853 cpr_nbitmaps = 1;
1854 for (pmem = phys_install; pmem; pmem = pmem->ml_next)
1855 cpr_nbitmaps++;
1857 if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1858 cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1859 cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1860 return (EFBIG);
1863 /* Alloc an array of bitmap descriptors. */
1864 dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1865 if (dp == NULL) {
1866 cpr_nbitmaps = 0;
1867 return (ENOMEM);
1869 tail = dp + cpr_nbitmaps;
1871 CPR->c_bmda = dp;
1872 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1873 size = BITMAP_BYTES(pmem->ml_size);
1874 space = kmem_zalloc(size * 2, KM_NOSLEEP);
1875 if (space == NULL)
1876 return (ENOMEM);
1877 ASSERT(dp < tail);
1878 dp->cbd_magic = CPR_BITMAP_MAGIC;
1879 dp->cbd_spfn = mmu_btop(pmem->ml_address);
1880 dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1;
1881 dp->cbd_size = size;
1882 dp->cbd_reg_bitmap = (cpr_ptr)space;
1883 dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1884 dp++;
1887 /* set magic for the last descriptor */
1888 ASSERT(dp == (tail - 1));
1889 dp->cbd_magic = CPR_BITMAP_MAGIC;
1891 return (0);
1895 void
1896 i_cpr_bitmap_cleanup(void)
1898 cbd_t *dp;
1900 if (CPR->c_bmda == NULL)
1901 return;
1902 for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1903 kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1904 kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1905 CPR->c_bmda = NULL;
1906 cpr_nbitmaps = 0;
1911 * A "regular" and "volatile" bitmap are created for each range of
1912 * physical memory. The volatile maps are used to count and track pages
1913 * susceptible to heap corruption - caused by drivers that allocate mem
1914 * during VOP_DUMP(); the regular maps are used for all the other non-
1915 * susceptible pages. Before writing the bitmaps to the statefile,
1916 * each bitmap pair gets merged to simplify handling within cprboot.
1919 i_cpr_alloc_bitmaps(void)
1921 int err;
1923 memlist_read_lock();
1924 err = i_cpr_bitmap_setup();
1925 memlist_read_unlock();
1926 if (err)
1927 i_cpr_bitmap_cleanup();
1928 return (err);
1934 * Power down the system.
1937 i_cpr_power_down(int sleeptype)
1939 int is_defined = 0;
1940 char *wordexists = "p\" power-off\" find nip swap l! ";
1941 char *req = "power-off";
1943 ASSERT(sleeptype == CPR_TODISK);
1946 * is_defined has value -1 when defined
1948 prom_interpret(wordexists, (uintptr_t)&is_defined, 0, 0, 0, 0);
1949 if (is_defined) {
1950 CPR_DEBUG(CPR_DEBUG1, "\ncpr: %s...\n", req);
1951 prom_interpret(req, 0, 0, 0, 0, 0);
1954 * Only returns if failed
1956 return (EIO);
1959 void
1960 i_cpr_stop_other_cpus(void)
1962 stop_other_cpus();
1966 * Save context for the specified CPU
1968 /* ARGSUSED */
1969 void *
1970 i_cpr_save_context(void *arg)
1973 * Not yet
1975 ASSERT(0);
1976 return (NULL);
1979 void
1980 i_cpr_pre_resume_cpus(void)
1983 * Not yet
1985 ASSERT(0);
1988 void
1989 i_cpr_post_resume_cpus(void)
1992 * Not yet
1994 ASSERT(0);
1998 * nothing to do
2000 void
2001 i_cpr_alloc_cpus(void)
2006 * nothing to do
2008 void
2009 i_cpr_free_cpus(void)
2013 /* ARGSUSED */
2014 void
2015 i_cpr_save_configuration(dev_info_t *dip)
2018 * this is a no-op on sparc
2022 /* ARGSUSED */
2023 void
2024 i_cpr_restore_configuration(dev_info_t *dip)
2027 * this is a no-op on sparc