4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Fill in and write out the cpr state file
28 * 1. Allocate and write headers, ELF and cpr dump header
29 * 2. Allocate bitmaps according to phys_install
30 * 3. Tag kernel pages into corresponding bitmap
31 * 4. Write bitmaps to state file
32 * 5. Write actual physical page data to state file
35 #include <sys/types.h>
36 #include <sys/systm.h>
38 #include <sys/memlist.h>
40 #include <sys/vnode.h>
41 #include <sys/fs/ufs_inode.h>
42 #include <sys/errno.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
47 #include <vm/seg_kmem.h>
48 #include <vm/seg_kpm.h>
53 #include <sys/panic.h>
54 #include <sys/thread.h>
57 /* Local defines and variables */
58 #define BTOb(bytes) ((bytes) << 3) /* Bytes to bits, log2(NBBY) */
59 #define bTOB(bits) ((bits) >> 3) /* bits to Bytes, log2(NBBY) */
62 static uint_t cpr_pages_tobe_dumped
;
63 static uint_t cpr_regular_pgs_dumped
;
64 static int cpr_dump_regular_pages(vnode_t
*);
65 static int cpr_count_upages(int, bitfunc_t
);
66 static int cpr_compress_and_write(vnode_t
*, uint_t
, pfn_t
, pgcnt_t
);
69 int cpr_flush_write(vnode_t
*);
71 int cpr_contig_pages(vnode_t
*, int);
73 void cpr_clear_bitmaps();
75 extern size_t cpr_get_devsize(dev_t
);
76 extern int i_cpr_dump_setup(vnode_t
*);
77 extern int i_cpr_blockzero(char *, char **, int *, vnode_t
*);
78 extern int cpr_test_mode
;
79 int cpr_setbit(pfn_t
, int);
80 int cpr_clrbit(pfn_t
, int);
84 char *cpr_buf
, *cpr_buf_end
;
85 int cpr_buf_blocks
; /* size of cpr_buf in blocks */
86 size_t cpr_buf_size
; /* size of cpr_buf in bytes */
87 size_t cpr_bitmap_size
;
90 char *cpr_pagedata
; /* page buffer for compression / tmp copy */
91 size_t cpr_pagedata_size
; /* page buffer size in bytes */
94 static char *cpr_wptr
; /* keep track of where to write to next */
95 static int cpr_file_bn
; /* cpr state-file block offset */
96 static int cpr_disk_writes_ok
;
97 static size_t cpr_dev_space
= 0;
100 char cpr_pagecopy
[CPR_MAXCONTIG
* MMU_PAGESIZE
];
104 * On some platforms bcopy may modify the thread structure
105 * during bcopy (eg, to prevent cpu migration). If the
106 * range we are currently writing out includes our own
107 * thread structure then it will be snapshotted by bcopy
108 * including those modified members - and the updates made
109 * on exit from bcopy will no longer be seen when we later
110 * restore the mid-bcopy kthread_t. So if the range we
111 * need to copy overlaps with our thread structure we will
112 * use a simple byte copy.
115 cprbcopy(void *from
, void *to
, size_t bytes
)
117 extern int curthreadremapped
;
120 kthrend
= (caddr_t
)curthread
+ sizeof (kthread_t
) - 1;
121 if (curthreadremapped
|| (kthrend
>= (caddr_t
)from
&&
122 kthrend
< (caddr_t
)from
+ bytes
+ sizeof (kthread_t
) - 1)) {
123 caddr_t src
= from
, dst
= to
;
128 bcopy(from
, to
, bytes
);
133 * Allocate pages for buffers used in writing out the statefile
138 char *allocerr
= "Unable to allocate memory for cpr buffer";
142 * set the cpr write buffer size to at least the historic
143 * size (128k) or large enough to store the both the early
144 * set of statefile structures (well under 0x800) plus the
145 * bitmaps, and roundup to the next pagesize.
147 size
= PAGE_ROUNDUP(dbtob(4) + cpr_bitmap_size
);
148 cpr_buf_size
= MAX(size
, CPRBUFSZ
);
149 cpr_buf_blocks
= btodb(cpr_buf_size
);
150 cpr_buf
= kmem_alloc(cpr_buf_size
, KM_NOSLEEP
);
151 if (cpr_buf
== NULL
) {
152 cpr_err(CE_WARN
, allocerr
);
155 cpr_buf_end
= cpr_buf
+ cpr_buf_size
;
157 cpr_pagedata_size
= mmu_ptob(CPR_MAXCONTIG
+ 1);
158 cpr_pagedata
= kmem_alloc(cpr_pagedata_size
, KM_NOSLEEP
);
159 if (cpr_pagedata
== NULL
) {
160 kmem_free(cpr_buf
, cpr_buf_size
);
162 cpr_err(CE_WARN
, allocerr
);
171 * Set bitmap size in bytes based on phys_install.
174 cpr_set_bitmap_size(void)
176 struct memlist
*pmem
;
180 for (pmem
= phys_install
; pmem
; pmem
= pmem
->ml_next
)
181 size
+= pmem
->ml_size
;
182 memlist_read_unlock();
183 cpr_bitmap_size
= BITMAP_BYTES(size
);
188 * CPR dump header contains the following information:
189 * 1. header magic -- unique to cpr state file
190 * 2. kernel return pc & ppn for resume
191 * 3. current thread info
192 * 4. debug level and test mode
193 * 5. number of bitmaps allocated
194 * 6. number of page records
197 cpr_write_header(vnode_t
*vp
)
199 extern ushort_t cpr_mach_type
;
200 struct cpr_dump_desc cdump
;
201 pgcnt_t bitmap_pages
;
202 pgcnt_t kpages
, vpages
, upages
;
203 pgcnt_t
cpr_count_kpages(int mapflag
, bitfunc_t bitfunc
);
205 cdump
.cdd_magic
= (uint_t
)CPR_DUMP_MAGIC
;
206 cdump
.cdd_version
= CPR_VERSION
;
207 cdump
.cdd_machine
= cpr_mach_type
;
208 cdump
.cdd_debug
= cpr_debug
;
209 cdump
.cdd_test_mode
= cpr_test_mode
;
210 cdump
.cdd_bitmaprec
= cpr_nbitmaps
;
215 * Remember how many pages we plan to save to statefile.
216 * This information will be used for sanity checks.
217 * Untag those pages that will not be saved to statefile.
219 kpages
= cpr_count_kpages(REGULAR_BITMAP
, cpr_setbit
);
220 vpages
= cpr_count_volatile_pages(REGULAR_BITMAP
, cpr_clrbit
);
221 upages
= cpr_count_upages(REGULAR_BITMAP
, cpr_setbit
);
222 cdump
.cdd_dumppgsize
= kpages
- vpages
+ upages
;
223 cpr_pages_tobe_dumped
= cdump
.cdd_dumppgsize
;
224 CPR_DEBUG(CPR_DEBUG7
,
225 "\ncpr_write_header: kpages %ld - vpages %ld + upages %ld = %d\n",
226 kpages
, vpages
, upages
, cdump
.cdd_dumppgsize
);
229 * Some pages contain volatile data (cpr_buf and storage area for
230 * sensitive kpages), which are no longer needed after the statefile
231 * is dumped to disk. We have already untagged them from regular
232 * bitmaps. Now tag them into the volatile bitmaps. The pages in
233 * volatile bitmaps will be claimed during resume, and the resumed
234 * kernel will free them.
236 (void) cpr_count_volatile_pages(VOLATILE_BITMAP
, cpr_setbit
);
238 bitmap_pages
= mmu_btopr(cpr_bitmap_size
);
241 * Export accurate statefile size for statefile allocation retry.
242 * statefile_size = all the headers + total pages +
243 * number of pages used by the bitmaps.
244 * Roundup will be done in the file allocation code.
246 STAT
->cs_nocomp_statefsz
= sizeof (cdd_t
) + sizeof (cmd_t
) +
247 (sizeof (cbd_t
) * cdump
.cdd_bitmaprec
) +
248 (sizeof (cpd_t
) * cdump
.cdd_dumppgsize
) +
249 mmu_ptob(cdump
.cdd_dumppgsize
+ bitmap_pages
);
252 * If the estimated statefile is not big enough,
253 * go retry now to save un-necessary operations.
255 if (!(CPR
->c_flags
& C_COMPRESSING
) &&
256 (STAT
->cs_nocomp_statefsz
> STAT
->cs_est_statefsz
)) {
257 if (cpr_debug
& (CPR_DEBUG1
| CPR_DEBUG7
))
258 prom_printf("cpr_write_header: "
259 "STAT->cs_nocomp_statefsz > "
260 "STAT->cs_est_statefsz\n");
264 /* now write cpr dump descriptor */
265 return (cpr_write(vp
, (caddr_t
)&cdump
, sizeof (cdd_t
)));
270 * CPR dump tail record contains the following information:
271 * 1. header magic -- unique to cpr state file
272 * 2. all misc info that needs to be passed to cprboot or resumed kernel
275 cpr_write_terminator(vnode_t
*vp
)
277 cpr_term
.magic
= (uint_t
)CPR_TERM_MAGIC
;
278 cpr_term
.va
= (cpr_ptr
)&cpr_term
;
279 cpr_term
.pfn
= (cpr_ext
)va_to_pfn(&cpr_term
);
281 /* count the last one (flush) */
282 cpr_term
.real_statef_size
= STAT
->cs_real_statefsz
+
283 btod(cpr_wptr
- cpr_buf
) * DEV_BSIZE
;
285 CPR_DEBUG(CPR_DEBUG9
, "cpr_dump: Real Statefile Size: %ld\n",
286 STAT
->cs_real_statefsz
);
288 cpr_tod_get(&cpr_term
.tm_shutdown
);
290 return (cpr_write(vp
, (caddr_t
)&cpr_term
, sizeof (cpr_term
)));
294 * Write bitmap descriptor array, followed by merged bitmaps.
297 cpr_write_bitmap(vnode_t
*vp
)
299 char *rmap
, *vmap
, *dst
, *tail
;
305 if (err
= cpr_write(vp
, (caddr_t
)dp
, cpr_nbitmaps
* sizeof (*dp
)))
309 * merge regular and volatile bitmaps into tmp space
312 for (; dp
->cbd_size
; dp
++) {
313 rmap
= (char *)dp
->cbd_reg_bitmap
;
314 vmap
= (char *)dp
->cbd_vlt_bitmap
;
315 for (size
= dp
->cbd_size
; size
; size
-= bytes
) {
316 bytes
= min(size
, sizeof (cpr_pagecopy
));
317 tail
= &cpr_pagecopy
[bytes
];
318 for (dst
= cpr_pagecopy
; dst
< tail
; dst
++)
319 *dst
= *rmap
++ | *vmap
++;
320 if (err
= cpr_write(vp
, cpr_pagecopy
, bytes
))
330 cpr_write_statefile(vnode_t
*vp
)
333 extern int i_cpr_check_pgs_dumped();
334 void flush_windows(void);
341 * to get an accurate view of kas, we need to untag sensitive
342 * pages *before* dumping them because the disk driver makes
343 * allocations and changes kas along the way. The remaining
344 * pages referenced in the bitmaps are dumped out later as
347 str
= "cpr_write_statefile:";
348 spages
= i_cpr_count_sensitive_kpages(REGULAR_BITMAP
, cpr_clrbit
);
349 CPR_DEBUG(CPR_DEBUG7
, "%s untag %ld sens pages\n", str
, spages
);
352 * now it's OK to call a driver that makes allocations
354 cpr_disk_writes_ok
= 1;
357 * now write out the clean sensitive kpages
358 * according to the sensitive descriptors
360 error
= i_cpr_dump_sensitive_kpages(vp
);
362 CPR_DEBUG(CPR_DEBUG7
,
363 "%s cpr_dump_sensitive_kpages() failed!\n", str
);
368 * cpr_dump_regular_pages() counts cpr_regular_pgs_dumped
370 error
= cpr_dump_regular_pages(vp
);
372 CPR_DEBUG(CPR_DEBUG7
,
373 "%s cpr_dump_regular_pages() failed!\n", str
);
378 * sanity check to verify the right number of pages were dumped
380 error
= i_cpr_check_pgs_dumped(cpr_pages_tobe_dumped
,
381 cpr_regular_pgs_dumped
);
384 prom_printf("\n%s page count mismatch!\n", str
);
397 * creates the CPR state file, the following sections are
398 * written out in sequence:
399 * - writes the cpr dump header
400 * - writes the memory usage bitmaps
401 * - writes the platform dependent info
402 * - writes the remaining user pages
403 * - writes the kernel pages
409 cpr_dump(vnode_t
*vp
)
414 if (cpr_buf
== NULL
) {
415 ASSERT(cpr_pagedata
== NULL
);
416 if (error
= cpr_alloc_bufs())
419 /* point to top of internal buffer */
422 /* initialize global variables used by the write operation */
423 cpr_file_bn
= cpr_statefile_offset();
426 /* allocate bitmaps */
427 if (CPR
->c_bmda
== NULL
) {
428 if (error
= i_cpr_alloc_bitmaps()) {
429 cpr_err(CE_WARN
, "cannot allocate bitmaps");
434 if (error
= i_cpr_prom_pages(CPR_PROM_SAVE
))
437 if (error
= i_cpr_dump_setup(vp
))
441 * set internal cross checking; we dont want to call
442 * a disk driver that makes allocations until after
443 * sensitive pages are saved
445 cpr_disk_writes_ok
= 0;
448 * 1253112: heap corruption due to memory allocation when dumpping
450 * Theoretically on Sun4u only the kernel data nucleus, kvalloc and
451 * kvseg segments can be contaminated should memory allocations happen
452 * during sddump, which is not supposed to happen after the system
453 * is quiesced. Let's call the kernel pages that tend to be affected
454 * 'sensitive kpages' here. To avoid saving inconsistent pages, we
455 * will allocate some storage space to save the clean sensitive pages
456 * aside before statefile dumping takes place. Since there may not be
457 * much memory left at this stage, the sensitive pages will be
458 * compressed before they are saved into the storage area.
460 if (error
= i_cpr_save_sensitive_kpages()) {
461 CPR_DEBUG(CPR_DEBUG7
,
462 "cpr_dump: save_sensitive_kpages failed!\n");
467 * since all cpr allocations are done (space for sensitive kpages,
468 * bitmaps, cpr_buf), kas is stable, and now we can accurately
469 * count regular and sensitive kpages.
471 if (error
= cpr_write_header(vp
)) {
472 CPR_DEBUG(CPR_DEBUG7
,
473 "cpr_dump: cpr_write_header() failed!\n");
477 if (error
= i_cpr_write_machdep(vp
))
480 if (error
= i_cpr_blockzero(cpr_buf
, &cpr_wptr
, NULL
, NULL
))
483 if (error
= cpr_write_bitmap(vp
))
486 if (error
= cpr_write_statefile(vp
)) {
487 CPR_DEBUG(CPR_DEBUG7
,
488 "cpr_dump: cpr_write_statefile() failed!\n");
492 if (error
= cpr_write_terminator(vp
))
495 if (error
= cpr_flush_write(vp
))
498 if (error
= i_cpr_blockzero(cpr_buf
, &cpr_wptr
, &cpr_file_bn
, vp
))
508 * cpr_xwalk() is called many 100x with a range within kvseg or kvseg_reloc;
509 * a page-count from each range is accumulated at arg->pages.
512 cpr_xwalk(void *arg
, void *base
, size_t size
)
514 struct cpr_walkinfo
*cwip
= arg
;
516 cwip
->pages
+= cpr_count_pages(base
, size
,
517 cwip
->mapflag
, cwip
->bitfunc
, DBG_DONTSHOWRANGE
);
523 * cpr_walk() is called many 100x with a range within kvseg or kvseg_reloc;
524 * a page-count from each range is accumulated at arg->pages.
527 cpr_walk(void *arg
, void *base
, size_t size
)
530 caddr_t addr_end
= addr
+ size
;
533 * If we are about to start walking the range of addresses we
534 * carved out of the kernel heap for the large page heap walk
535 * heap_lp_arena to find what segments are actually populated
537 if (SEGKMEM_USE_LARGEPAGES
&&
538 addr
== heap_lp_base
&& addr_end
== heap_lp_end
&&
539 vmem_size(heap_lp_arena
, VMEM_ALLOC
) < size
) {
540 vmem_walk(heap_lp_arena
, VMEM_ALLOC
, cpr_xwalk
, arg
);
542 cpr_xwalk(arg
, base
, size
);
548 * faster scan of kvseg using vmem_walk() to visit
552 cpr_scan_kvseg(int mapflag
, bitfunc_t bitfunc
, struct seg
*seg
)
554 struct cpr_walkinfo cwinfo
;
556 bzero(&cwinfo
, sizeof (cwinfo
));
557 cwinfo
.mapflag
= mapflag
;
558 cwinfo
.bitfunc
= bitfunc
;
560 vmem_walk(heap_arena
, VMEM_ALLOC
, cpr_walk
, &cwinfo
);
562 if (cpr_debug
& CPR_DEBUG7
) {
563 prom_printf("walked %d sub-ranges, total pages %ld\n",
564 cwinfo
.ranges
, mmu_btop(cwinfo
.size
));
565 cpr_show_range(seg
->s_base
, seg
->s_size
,
566 mapflag
, bitfunc
, cwinfo
.pages
);
569 return (cwinfo
.pages
);
574 * cpr_walk_kpm() is called for every used area within the large
575 * segkpm virtual address window. A page-count is accumulated at
579 cpr_walk_kpm(void *arg
, void *base
, size_t size
)
581 struct cpr_walkinfo
*cwip
= arg
;
583 cwip
->pages
+= cpr_count_pages(base
, size
,
584 cwip
->mapflag
, cwip
->bitfunc
, DBG_DONTSHOWRANGE
);
591 * faster scan of segkpm using hat_kpm_walk() to visit only used ranges.
595 cpr_scan_segkpm(int mapflag
, bitfunc_t bitfunc
, struct seg
*seg
)
597 struct cpr_walkinfo cwinfo
;
602 bzero(&cwinfo
, sizeof (cwinfo
));
603 cwinfo
.mapflag
= mapflag
;
604 cwinfo
.bitfunc
= bitfunc
;
605 hat_kpm_walk(cpr_walk_kpm
, &cwinfo
);
607 if (cpr_debug
& CPR_DEBUG7
) {
608 prom_printf("walked %d sub-ranges, total pages %ld\n",
609 cwinfo
.ranges
, mmu_btop(cwinfo
.size
));
610 cpr_show_range(segkpm
->s_base
, segkpm
->s_size
,
611 mapflag
, bitfunc
, cwinfo
.pages
);
614 return (cwinfo
.pages
);
619 * Sparsely filled kernel segments are registered in kseg_table for
620 * easier lookup. See also block comment for cpr_count_seg_pages.
623 #define KSEG_SEG_ADDR 0 /* address of struct seg */
624 #define KSEG_PTR_ADDR 1 /* address of pointer to struct seg */
627 struct seg
**st_seg
; /* segment pointer or segment address */
628 pgcnt_t (*st_fcn
)(int, bitfunc_t
, struct seg
*); /* function to call */
629 int st_addrtype
; /* address type in st_seg */
632 ksegtbl_entry_t kseg_table
[] = {
633 {(struct seg
**)&kvseg
, cpr_scan_kvseg
, KSEG_SEG_ADDR
},
634 {&segkpm
, cpr_scan_segkpm
, KSEG_PTR_ADDR
},
640 * Compare seg with each entry in kseg_table; when there is a match
641 * return the entry pointer, otherwise return NULL.
643 static ksegtbl_entry_t
*
644 cpr_sparse_seg_check(struct seg
*seg
)
646 ksegtbl_entry_t
*ste
= &kseg_table
[0];
649 for (; ste
->st_seg
; ste
++) {
650 tseg
= (ste
->st_addrtype
== KSEG_PTR_ADDR
) ?
651 *ste
->st_seg
: (struct seg
*)ste
->st_seg
;
657 return ((ksegtbl_entry_t
*)NULL
);
662 * Count pages within each kernel segment; call cpr_sparse_seg_check()
663 * to find out whether a sparsely filled segment needs special
664 * treatment (e.g. kvseg).
665 * Todo: A "segop_cpr" like segop_dump should be introduced, the cpr
666 * module shouldn't need to know segment details like if it is
667 * sparsely filled or not (makes kseg_table obsolete).
670 cpr_count_seg_pages(int mapflag
, bitfunc_t bitfunc
)
674 ksegtbl_entry_t
*ste
;
677 for (segp
= AS_SEGFIRST(&kas
); segp
; segp
= AS_SEGNEXT(&kas
, segp
)) {
678 if (ste
= cpr_sparse_seg_check(segp
)) {
679 pages
+= (ste
->st_fcn
)(mapflag
, bitfunc
, segp
);
681 pages
+= cpr_count_pages(segp
->s_base
,
682 segp
->s_size
, mapflag
, bitfunc
, DBG_SHOWRANGE
);
691 * count kernel pages within kas and any special ranges
694 cpr_count_kpages(int mapflag
, bitfunc_t bitfunc
)
699 * Some pages need to be taken care of differently.
700 * eg: panicbuf pages of sun4m are not in kas but they need
701 * to be saved. On sun4u, the physical pages of panicbuf are
702 * allocated via prom_retain().
704 kas_cnt
= i_cpr_count_special_kpages(mapflag
, bitfunc
);
705 kas_cnt
+= cpr_count_seg_pages(mapflag
, bitfunc
);
707 CPR_DEBUG(CPR_DEBUG9
, "cpr_count_kpages: kas_cnt=%ld\n", kas_cnt
);
708 CPR_DEBUG(CPR_DEBUG7
, "\ncpr_count_kpages: %ld pages, 0x%lx bytes\n",
709 kas_cnt
, mmu_ptob(kas_cnt
));
716 * Set a bit corresponding to the arg phys page number;
717 * returns 0 when the ppn is valid and the corresponding
718 * map bit was clear, otherwise returns 1.
721 cpr_setbit(pfn_t ppn
, int mapflag
)
728 for (dp
= CPR
->c_bmda
; dp
->cbd_size
; dp
++) {
729 if (PPN_IN_RANGE(ppn
, dp
)) {
730 bitmap
= DESC_TO_MAP(dp
, mapflag
);
731 rel
= ppn
- dp
->cbd_spfn
;
732 if ((clr
= isclr(bitmap
, rel
)) != 0)
743 * Clear a bit corresponding to the arg phys page number.
746 cpr_clrbit(pfn_t ppn
, int mapflag
)
753 for (dp
= CPR
->c_bmda
; dp
->cbd_size
; dp
++) {
754 if (PPN_IN_RANGE(ppn
, dp
)) {
755 bitmap
= DESC_TO_MAP(dp
, mapflag
);
756 rel
= ppn
- dp
->cbd_spfn
;
757 if ((set
= isset(bitmap
, rel
)) != 0)
769 cpr_nobit(pfn_t ppn
, int mapflag
)
776 * Lookup a bit corresponding to the arg phys page number.
779 cpr_isset(pfn_t ppn
, int mapflag
)
785 for (dp
= CPR
->c_bmda
; dp
->cbd_size
; dp
++) {
786 if (PPN_IN_RANGE(ppn
, dp
)) {
787 bitmap
= DESC_TO_MAP(dp
, mapflag
);
788 rel
= ppn
- dp
->cbd_spfn
;
789 return (isset(bitmap
, rel
));
798 * Go thru all pages and pick up any page not caught during the invalidation
799 * stage. This is also used to save pages with cow lock or phys page lock held
800 * (none zero p_lckcnt or p_cowcnt)
803 cpr_count_upages(int mapflag
, bitfunc_t bitfunc
)
806 pgcnt_t dcnt
= 0, tcnt
= 0;
809 page0
= pp
= page_first();
812 if (pp
->p_vnode
== NULL
|| PP_ISKAS(pp
) ||
813 PP_ISFREE(pp
) && PP_ISAGED(pp
))
816 pfn
= page_pptonum(pp
);
817 if (pf_is_memory(pfn
)) {
819 if ((*bitfunc
)(pfn
, mapflag
) == 0)
820 dcnt
++; /* dirty count */
822 } while ((pp
= page_next(pp
)) != page0
);
824 STAT
->cs_upage2statef
= dcnt
;
825 CPR_DEBUG(CPR_DEBUG9
, "cpr_count_upages: dirty=%ld total=%ld\n",
827 CPR_DEBUG(CPR_DEBUG7
, "cpr_count_upages: %ld pages, 0x%lx bytes\n",
828 dcnt
, mmu_ptob(dcnt
));
829 page0
= NULL
; /* for Lint */
835 * try compressing pages based on cflag,
836 * and for DEBUG kernels, verify uncompressed data checksum;
838 * this routine replaces common code from
839 * i_cpr_compress_and_save() and cpr_compress_and_write()
842 cpr_compress_pages(cpd_t
*dp
, pgcnt_t pages
, int cflag
)
844 size_t nbytes
, clen
, len
;
848 nbytes
= mmu_ptob(pages
);
851 * set length to the original uncompressed data size;
852 * always init cpd_flag to zero
854 dp
->cpd_length
= nbytes
;
859 * Make a copy of the uncompressed data so we can checksum it.
860 * Compress that copy so the checksum works at the other end
862 cprbcopy(CPR
->c_mapping_area
, cpr_pagecopy
, nbytes
);
863 dp
->cpd_usum
= checksum32(cpr_pagecopy
, nbytes
);
864 dp
->cpd_flag
|= CPD_USUM
;
865 datap
= cpr_pagecopy
;
867 datap
= CPR
->c_mapping_area
;
872 * try compressing the raw data to cpr_pagedata;
873 * if there was a size reduction: record the new length,
874 * flag the compression, and point to the compressed data.
878 clen
= compress(datap
, cpr_pagedata
, nbytes
);
880 dp
->cpd_flag
|= CPD_COMPRESS
;
881 dp
->cpd_length
= clen
;
882 datap
= cpr_pagedata
;
884 dp
->cpd_csum
= checksum32(datap
, clen
);
885 dp
->cpd_flag
|= CPD_CSUM
;
888 * decompress the data back to a scratch area
889 * and compare the new checksum with the original
890 * checksum to verify the compression.
892 bzero(cpr_pagecopy
, sizeof (cpr_pagecopy
));
893 len
= decompress(datap
, cpr_pagecopy
,
894 clen
, sizeof (cpr_pagecopy
));
895 test_sum
= checksum32(cpr_pagecopy
, len
);
896 ASSERT(test_sum
== dp
->cpd_usum
);
906 * 1. Prepare cpr page descriptor and write it to file
907 * 2. Compress page data and write it out
910 cpr_compress_and_write(vnode_t
*vp
, uint_t va
, pfn_t pfn
, pgcnt_t npg
)
914 cpd_t cpd
; /* cpr page descriptor */
915 extern void i_cpr_mapin(caddr_t
, uint_t
, pfn_t
);
916 extern void i_cpr_mapout(caddr_t
, uint_t
);
918 i_cpr_mapin(CPR
->c_mapping_area
, npg
, pfn
);
920 CPR_DEBUG(CPR_DEBUG3
, "mapped-in %ld pages, vaddr 0x%p, pfn 0x%lx\n",
921 npg
, (void *)CPR
->c_mapping_area
, pfn
);
924 * Fill cpr page descriptor.
926 cpd
.cpd_magic
= (uint_t
)CPR_PAGE_MAGIC
;
930 STAT
->cs_dumped_statefsz
+= mmu_ptob(npg
);
932 datap
= cpr_compress_pages(&cpd
, npg
, CPR
->c_flags
& C_COMPRESSING
);
934 /* Write cpr page descriptor */
935 error
= cpr_write(vp
, (caddr_t
)&cpd
, sizeof (cpd_t
));
937 /* Write compressed page data */
938 error
= cpr_write(vp
, (caddr_t
)datap
, cpd
.cpd_length
);
941 * Unmap the pages for tlb and vac flushing
943 i_cpr_mapout(CPR
->c_mapping_area
, npg
);
946 CPR_DEBUG(CPR_DEBUG1
,
947 "cpr_compress_and_write: vp 0x%p va 0x%x ", (void *)vp
, va
);
948 CPR_DEBUG(CPR_DEBUG1
, "pfn 0x%lx blk %d err %d\n",
949 pfn
, cpr_file_bn
, error
);
951 cpr_regular_pgs_dumped
+= npg
;
959 cpr_write(vnode_t
*vp
, caddr_t buffer
, size_t size
)
961 caddr_t fromp
= buffer
;
962 size_t bytes
, wbytes
;
965 if (cpr_dev_space
== 0) {
966 if (vp
->v_type
== VBLK
) {
967 cpr_dev_space
= cpr_get_devsize(vp
->v_rdev
);
968 ASSERT(cpr_dev_space
);
970 cpr_dev_space
= 1; /* not used in this case */
974 * break the write into multiple part if request is large,
975 * calculate count up to buf page boundary, then write it out.
979 bytes
= MIN(size
, cpr_buf_end
- cpr_wptr
);
980 cprbcopy(fromp
, cpr_wptr
, bytes
);
984 if (cpr_wptr
< cpr_buf_end
)
985 return (0); /* buffer not full yet */
986 ASSERT(cpr_wptr
== cpr_buf_end
);
988 wbytes
= dbtob(cpr_file_bn
+ cpr_buf_blocks
);
989 if (vp
->v_type
== VBLK
) {
990 if (wbytes
> cpr_dev_space
)
993 if (wbytes
> VTOI(vp
)->i_size
)
997 CPR_DEBUG(CPR_DEBUG3
,
998 "cpr_write: frmp=%p wptr=%p cnt=%lx...",
999 (void *)fromp
, (void *)cpr_wptr
, bytes
);
1001 * cross check, this should not happen!
1003 if (cpr_disk_writes_ok
== 0) {
1004 prom_printf("cpr_write: disk write too early!\n");
1009 error
= fop_dump(vp
, cpr_buf
, cpr_file_bn
, cpr_buf_blocks
,
1012 CPR_DEBUG(CPR_DEBUG3
, "done\n");
1014 STAT
->cs_real_statefsz
+= cpr_buf_size
;
1017 cpr_err(CE_WARN
, "cpr_write error %d", error
);
1020 cpr_file_bn
+= cpr_buf_blocks
; /* Increment block count */
1021 cpr_wptr
= cpr_buf
; /* back to top of buffer */
1028 cpr_flush_write(vnode_t
*vp
)
1034 * Calculate remaining blocks in buffer, rounded up to nearest
1037 nblk
= btod(cpr_wptr
- cpr_buf
);
1040 error
= fop_dump(vp
, (caddr_t
)cpr_buf
, cpr_file_bn
, nblk
, NULL
);
1043 cpr_file_bn
+= nblk
;
1045 CPR_DEBUG(CPR_DEBUG2
, "cpr_flush_write: error (%d)\n",
1051 cpr_clear_bitmaps(void)
1055 for (dp
= CPR
->c_bmda
; dp
->cbd_size
; dp
++) {
1056 bzero((void *)dp
->cbd_reg_bitmap
,
1057 (size_t)dp
->cbd_size
* 2);
1059 CPR_DEBUG(CPR_DEBUG7
, "\ncleared reg and vlt bitmaps\n");
1063 cpr_contig_pages(vnode_t
*vp
, int flag
)
1065 int chunks
= 0, error
= 0;
1066 pgcnt_t i
, j
, totbit
;
1069 uint_t spin_cnt
= 0;
1070 extern int i_cpr_compress_and_save();
1072 for (dp
= CPR
->c_bmda
; dp
->cbd_size
; dp
++) {
1073 spfn
= dp
->cbd_spfn
;
1074 totbit
= BTOb(dp
->cbd_size
);
1075 i
= 0; /* Beginning of bitmap */
1077 while (i
< totbit
) {
1078 while ((j
< CPR_MAXCONTIG
) && ((j
+ i
) < totbit
)) {
1079 if (isset((char *)dp
->cbd_reg_bitmap
, j
+i
))
1081 else /* not contiguous anymore */
1087 if (flag
== SAVE_TO_STORAGE
) {
1088 error
= i_cpr_compress_and_save(
1089 chunks
, spfn
+ i
, j
);
1092 } else if (flag
== WRITE_TO_STATEFILE
) {
1093 error
= cpr_compress_and_write(vp
, 0,
1099 if ((spin_cnt
& 0x5F) == 1)
1106 if (j
!= CPR_MAXCONTIG
) {
1107 /* Stopped on a non-tagged page */
1115 if (flag
== STORAGE_DESC_ALLOC
)
1123 cpr_show_range(caddr_t vaddr
, size_t size
,
1124 int mapflag
, bitfunc_t bitfunc
, pgcnt_t count
)
1126 char *action
, *bname
;
1128 bname
= (mapflag
== REGULAR_BITMAP
) ? "regular" : "volatile";
1129 if (bitfunc
== cpr_setbit
)
1131 else if (bitfunc
== cpr_clrbit
)
1135 prom_printf("range (0x%p, 0x%p), %s bitmap, %s %ld\n",
1136 (void *)vaddr
, (void *)(vaddr
+ size
), bname
, action
, count
);
1141 cpr_count_pages(caddr_t sva
, size_t size
,
1142 int mapflag
, bitfunc_t bitfunc
, int showrange
)
1148 eva
= sva
+ PAGE_ROUNDUP(size
);
1149 for (va
= sva
; va
< eva
; va
+= MMU_PAGESIZE
) {
1150 pfn
= va_to_pfn(va
);
1151 if (pfn
!= PFN_INVALID
&& pf_is_memory(pfn
)) {
1152 if ((*bitfunc
)(pfn
, mapflag
) == 0)
1157 if ((cpr_debug
& CPR_DEBUG7
) && showrange
== DBG_SHOWRANGE
)
1158 cpr_show_range(sva
, size
, mapflag
, bitfunc
, count
);
1165 cpr_count_volatile_pages(int mapflag
, bitfunc_t bitfunc
)
1170 count
+= cpr_count_pages(cpr_buf
, cpr_buf_size
,
1171 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1174 count
+= cpr_count_pages(cpr_pagedata
, cpr_pagedata_size
,
1175 mapflag
, bitfunc
, DBG_SHOWRANGE
);
1177 count
+= i_cpr_count_storage_pages(mapflag
, bitfunc
);
1179 CPR_DEBUG(CPR_DEBUG7
, "cpr_count_vpages: %ld pages, 0x%lx bytes\n",
1180 count
, mmu_ptob(count
));
1186 cpr_dump_regular_pages(vnode_t
*vp
)
1190 cpr_regular_pgs_dumped
= 0;
1191 error
= cpr_contig_pages(vp
, WRITE_TO_STATEFILE
);
1193 CPR_DEBUG(CPR_DEBUG7
, "cpr_dump_regular_pages() done.\n");