6198 Let's EOL cachefs
[illumos-gate.git] / usr / src / uts / common / cpr / cpr_misc.c
blobb1906eb9fa8d06b71a979badaa18ba032e41d15e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/cpuvar.h>
30 #include <sys/vfs.h>
31 #include <sys/vnode.h>
32 #include <sys/pathname.h>
33 #include <sys/callb.h>
34 #include <sys/fs/ufs_inode.h>
35 #include <vm/anon.h>
36 #include <sys/fs/swapnode.h> /* for swapfs_minfree */
37 #include <sys/kmem.h>
38 #include <sys/cpr.h>
39 #include <sys/conf.h>
40 #include <sys/machclock.h>
43 * CPR miscellaneous support routines
45 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \
46 mode, 0600, vpp, CRCREAT, 0))
47 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \
48 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
49 (ssize_t *)NULL))
51 extern void clkset(time_t);
52 extern cpu_t *i_cpr_bootcpu(void);
53 extern caddr_t i_cpr_map_setup(void);
54 extern void i_cpr_free_memory_resources(void);
56 extern kmutex_t cpr_slock;
57 extern size_t cpr_buf_size;
58 extern char *cpr_buf;
59 extern size_t cpr_pagedata_size;
60 extern char *cpr_pagedata;
61 extern int cpr_bufs_allocated;
62 extern int cpr_bitmaps_allocated;
64 #if defined(__sparc)
65 static struct cprconfig cprconfig;
66 static int cprconfig_loaded = 0;
67 static int cpr_statefile_ok(vnode_t *, int);
68 static int cpr_p_online(cpu_t *, int);
69 static void cpr_save_mp_state(void);
70 #endif
72 int cpr_is_ufs(struct vfs *);
73 int cpr_is_zfs(struct vfs *);
75 char cpr_default_path[] = CPR_DEFAULT;
77 #define COMPRESS_PERCENT 40 /* approx compression ratio in percent */
78 #define SIZE_RATE 115 /* increase size by 15% */
79 #define INTEGRAL 100 /* for integer math */
83 * cmn_err() followed by a 1/4 second delay; this gives the
84 * logging service a chance to flush messages and helps avoid
85 * intermixing output from prom_printf().
87 /*PRINTFLIKE2*/
88 void
89 cpr_err(int ce, const char *fmt, ...)
91 va_list adx;
93 va_start(adx, fmt);
94 vcmn_err(ce, fmt, adx);
95 va_end(adx);
96 drv_usecwait(MICROSEC >> 2);
101 cpr_init(int fcn)
104 * Allow only one suspend/resume process.
106 if (mutex_tryenter(&cpr_slock) == 0)
107 return (EBUSY);
109 CPR->c_flags = 0;
110 CPR->c_substate = 0;
111 CPR->c_cprboot_magic = 0;
112 CPR->c_alloc_cnt = 0;
114 CPR->c_fcn = fcn;
115 if (fcn == AD_CPR_REUSABLE)
116 CPR->c_flags |= C_REUSABLE;
117 else
118 CPR->c_flags |= C_SUSPENDING;
119 if (fcn == AD_SUSPEND_TO_RAM || fcn == DEV_SUSPEND_TO_RAM) {
120 return (0);
122 #if defined(__sparc)
123 if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ)
124 CPR->c_flags |= C_COMPRESSING;
126 * reserve CPR_MAXCONTIG virtual pages for cpr_dump()
128 CPR->c_mapping_area = i_cpr_map_setup();
129 if (CPR->c_mapping_area == 0) { /* no space in kernelmap */
130 cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n");
131 mutex_exit(&cpr_slock);
132 return (EAGAIN);
134 if (cpr_debug & CPR_DEBUG3)
135 cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing "
136 "kas\n", (void *)CPR->c_mapping_area);
137 #endif
139 return (0);
143 * This routine releases any resources used during the checkpoint.
145 void
146 cpr_done(void)
148 cpr_stat_cleanup();
149 i_cpr_bitmap_cleanup();
152 * Free pages used by cpr buffers.
154 if (cpr_buf) {
155 kmem_free(cpr_buf, cpr_buf_size);
156 cpr_buf = NULL;
158 if (cpr_pagedata) {
159 kmem_free(cpr_pagedata, cpr_pagedata_size);
160 cpr_pagedata = NULL;
163 i_cpr_free_memory_resources();
164 mutex_exit(&cpr_slock);
165 cpr_err(CE_CONT, "System has been resumed.\n");
169 #if defined(__sparc)
171 * reads config data into cprconfig
173 static int
174 cpr_get_config(void)
176 static char config_path[] = CPR_CONFIG;
177 struct cprconfig *cf = &cprconfig;
178 struct vnode *vp;
179 char *fmt;
180 int err;
182 if (cprconfig_loaded)
183 return (0);
185 fmt = "cannot %s config file \"%s\", error %d\n";
186 if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) {
187 cpr_err(CE_CONT, fmt, "open", config_path, err);
188 return (err);
191 err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf));
192 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
193 VN_RELE(vp);
194 if (err) {
195 cpr_err(CE_CONT, fmt, "read", config_path, err);
196 return (err);
199 if (cf->cf_magic == CPR_CONFIG_MAGIC)
200 cprconfig_loaded = 1;
201 else {
202 cpr_err(CE_CONT, "invalid config file \"%s\", "
203 "rerun pmconfig(1M)\n", config_path);
204 err = EINVAL;
207 return (err);
212 * concat fs and path fields of the cprconfig structure;
213 * returns pointer to the base of static data
215 static char *
216 cpr_cprconfig_to_path(void)
218 static char full_path[MAXNAMELEN];
219 struct cprconfig *cf = &cprconfig;
220 char *ptr;
223 * build /fs/path without extra '/'
225 (void) strcpy(full_path, cf->cf_fs);
226 if (strcmp(cf->cf_fs, "/"))
227 (void) strcat(full_path, "/");
228 ptr = cf->cf_path;
229 if (*ptr == '/')
230 ptr++;
231 (void) strcat(full_path, ptr);
232 return (full_path);
237 * Verify that the information in the configuration file regarding the
238 * location for the statefile is still valid, depending on cf_type.
239 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be
240 * mounted on the same device as when pmconfig was last run,
241 * and the translation of that device to a node in the prom's
242 * device tree must be the same as when pmconfig was last run.
243 * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block
244 * special file, it must have no file system mounted on it,
245 * and the translation of that device to a node in the prom's
246 * device tree must be the same as when pmconfig was last run.
248 static int
249 cpr_verify_statefile_path(void)
251 struct cprconfig *cf = &cprconfig;
252 static const char long_name[] = "Statefile pathname is too long.\n";
253 static const char lookup_fmt[] = "Lookup failed for "
254 "cpr statefile device %s.\n";
255 static const char path_chg_fmt[] = "Device path for statefile "
256 "has changed from %s to %s.\t%s\n";
257 static const char rerun[] = "Please rerun pmconfig(1m).";
258 struct vfs *vfsp = NULL, *vfsp_save = rootvfs;
259 ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data;
260 ufsvfs_t *ufsvfsp_save = ufsvfsp;
261 int error;
262 struct vnode *vp;
263 char *slash, *tail, *longest;
264 char *errstr;
265 int found = 0;
266 union {
267 char un_devpath[OBP_MAXPATHLEN];
268 char un_sfpath[MAXNAMELEN];
269 } un;
270 #define devpath un.un_devpath
271 #define sfpath un.un_sfpath
273 ASSERT(cprconfig_loaded);
275 * We need not worry about locking or the timing of releasing
276 * the vnode, since we are single-threaded now.
279 switch (cf->cf_type) {
280 case CFT_SPEC:
281 error = i_devname_to_promname(cf->cf_devfs, devpath,
282 OBP_MAXPATHLEN);
283 if (error || strcmp(devpath, cf->cf_dev_prom)) {
284 cpr_err(CE_CONT, path_chg_fmt,
285 cf->cf_dev_prom, devpath, rerun);
286 return (error);
288 /*FALLTHROUGH*/
289 case CFT_ZVOL:
290 if (strlen(cf->cf_path) > sizeof (sfpath)) {
291 cpr_err(CE_CONT, long_name);
292 return (ENAMETOOLONG);
294 if ((error = lookupname(cf->cf_devfs,
295 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
296 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
297 return (error);
299 if (vp->v_type != VBLK)
300 errstr = "statefile must be a block device";
301 else if (vfs_devismounted(vp->v_rdev))
302 errstr = "statefile device must not "
303 "have a file system mounted on it";
304 else if (IS_SWAPVP(vp))
305 errstr = "statefile device must not "
306 "be configured as swap file";
307 else
308 errstr = NULL;
310 VN_RELE(vp);
311 if (errstr) {
312 cpr_err(CE_CONT, "%s.\n", errstr);
313 return (ENOTSUP);
316 return (error);
317 case CFT_UFS:
318 break; /* don't indent all the original code */
319 default:
320 cpr_err(CE_PANIC, "invalid cf_type");
324 * The original code for UFS statefile
326 if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) {
327 cpr_err(CE_CONT, long_name);
328 return (ENAMETOOLONG);
331 bzero(sfpath, sizeof (sfpath));
332 (void) strcpy(sfpath, cpr_cprconfig_to_path());
334 if (*sfpath != '/') {
335 cpr_err(CE_CONT, "Statefile pathname %s "
336 "must begin with a /\n", sfpath);
337 return (EINVAL);
341 * Find the longest prefix of the statefile pathname which
342 * is the mountpoint of a filesystem. This string must
343 * match the cf_fs field we read from the config file. Other-
344 * wise the user has changed things without running pmconfig.
346 tail = longest = sfpath + 1; /* pt beyond the leading "/" */
347 while ((slash = strchr(tail, '/')) != NULL) {
348 *slash = '\0'; /* temporarily terminate the string */
349 if ((error = lookupname(sfpath,
350 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
351 *slash = '/';
352 cpr_err(CE_CONT, "A directory in the "
353 "statefile path %s was not found.\n", sfpath);
354 VN_RELE(vp);
356 return (error);
359 vfs_list_read_lock();
360 vfsp = rootvfs;
361 do {
362 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
363 if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) {
364 found = 1;
365 break;
367 vfsp = vfsp->vfs_next;
368 } while (vfsp != rootvfs);
369 vfs_list_unlock();
372 * If we have found a filesystem mounted on the current
373 * path prefix, remember the end of the string in
374 * "longest". If it happens to be the the exact fs
375 * saved in the configuration file, save the current
376 * ufsvfsp so we can make additional checks further down.
378 if (found) {
379 longest = slash;
380 if (strcmp(cf->cf_fs, sfpath) == 0) {
381 ufsvfsp_save = ufsvfsp;
382 vfsp_save = vfsp;
384 found = 0;
387 VN_RELE(vp);
388 *slash = '/';
389 tail = slash + 1;
391 *longest = '\0';
392 if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) {
393 cpr_err(CE_CONT, "Filesystem containing "
394 "the statefile when pmconfig was run (%s) has "
395 "changed to %s. %s\n", cf->cf_fs, sfpath, rerun);
396 return (EINVAL);
399 if ((error = lookupname(cf->cf_devfs,
400 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
401 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
402 return (error);
405 if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) {
406 cpr_err(CE_CONT, "Filesystem containing "
407 "statefile no longer mounted on device %s. "
408 "See power.conf(4).", cf->cf_devfs);
409 VN_RELE(vp);
410 return (ENXIO);
412 VN_RELE(vp);
414 error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN);
415 if (error || strcmp(devpath, cf->cf_dev_prom)) {
416 cpr_err(CE_CONT, path_chg_fmt,
417 cf->cf_dev_prom, devpath, rerun);
418 return (error);
421 return (0);
425 * Make sure that the statefile can be used as a block special statefile
426 * (meaning that is exists and has nothing mounted on it)
427 * Returns errno if not a valid statefile.
430 cpr_check_spec_statefile(void)
432 int err;
434 if (err = cpr_get_config())
435 return (err);
436 ASSERT(cprconfig.cf_type == CFT_SPEC ||
437 cprconfig.cf_type == CFT_ZVOL);
439 if (cprconfig.cf_devfs == NULL)
440 return (ENXIO);
442 return (cpr_verify_statefile_path());
447 cpr_alloc_statefile(int alloc_retry)
449 register int rc = 0;
450 char *str;
453 * Statefile size validation. If checkpoint the first time, disk blocks
454 * allocation will be done; otherwise, just do file size check.
455 * if statefile allocation is being retried, C_VP will be inited
457 if (alloc_retry) {
458 str = "\n-->Retrying statefile allocation...";
459 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7))
460 prom_printf(str);
461 if (C_VP->v_type != VBLK)
462 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
463 } else {
465 * Open an exiting file for writing, the state file needs to be
466 * pre-allocated since we can't and don't want to do allocation
467 * during checkpoint (too much of the OS is disabled).
468 * - do a preliminary size checking here, if it is too small,
469 * allocate more space internally and retry.
470 * - check the vp to make sure it's the right type.
472 char *path = cpr_build_statefile_path();
474 if (path == NULL)
475 return (ENXIO);
476 else if (rc = cpr_verify_statefile_path())
477 return (rc);
479 if (rc = vn_open(path, UIO_SYSSPACE,
480 FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) {
481 cpr_err(CE_WARN, "cannot open statefile %s", path);
482 return (rc);
487 * Only ufs and block special statefiles supported
489 if (C_VP->v_type != VREG && C_VP->v_type != VBLK) {
490 cpr_err(CE_CONT,
491 "Statefile must be regular file or block special file.");
492 return (EACCES);
495 if (rc = cpr_statefile_ok(C_VP, alloc_retry))
496 return (rc);
498 if (C_VP->v_type != VBLK) {
500 * sync out the fs change due to the statefile reservation.
502 (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED());
505 * Validate disk blocks allocation for the state file.
506 * Ask the file system prepare itself for the dump operation.
508 if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) {
509 cpr_err(CE_CONT, "Error allocating "
510 "blocks for cpr statefile.");
511 return (rc);
514 return (0);
519 * Lookup device size and return available space in bytes.
520 * NOTE: Since prop_op(9E) can't tell the difference between a character
521 * and a block reference, it is ok to ask for "Size" instead of "Nblocks".
523 size_t
524 cpr_get_devsize(dev_t dev)
526 size_t bytes = 0;
528 bytes = cdev_Size(dev);
529 if (bytes == 0)
530 bytes = cdev_size(dev);
532 if (bytes > CPR_SPEC_OFFSET)
533 bytes -= CPR_SPEC_OFFSET;
534 else
535 bytes = 0;
537 return (bytes);
542 * increase statefile size
544 static int
545 cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize)
547 extern uchar_t cpr_pagecopy[];
548 struct inode *ip = VTOI(vp);
549 u_longlong_t offset;
550 int error, increase;
551 ssize_t resid;
553 rw_enter(&ip->i_contents, RW_READER);
554 increase = (ip->i_size < newsize);
555 offset = ip->i_size;
556 rw_exit(&ip->i_contents);
558 if (increase == 0)
559 return (0);
562 * write to each logical block to reserve disk space
564 error = 0;
565 cpr_pagecopy[0] = '1';
566 for (; offset < newsize; offset += ip->i_fs->fs_bsize) {
567 if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy,
568 ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0,
569 (rlim64_t)MAXOFF_T, CRED(), &resid)) {
570 if (error == ENOSPC) {
571 cpr_err(CE_WARN, "error %d while reserving "
572 "disk space for statefile %s\n"
573 "wanted %lld bytes, file is %lld short",
574 error, cpr_cprconfig_to_path(),
575 newsize, newsize - offset);
577 break;
580 return (error);
585 * do a simple estimate of the space needed to hold the statefile
586 * taking compression into account, but be fairly conservative
587 * so we have a better chance of completing; when dump fails,
588 * the retry cost is fairly high.
590 * Do disk blocks allocation for the state file if no space has
591 * been allocated yet. Since the state file will not be removed,
592 * allocation should only be done once.
594 static int
595 cpr_statefile_ok(vnode_t *vp, int alloc_retry)
597 extern size_t cpr_bitmap_size;
598 struct inode *ip = VTOI(vp);
599 const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */
600 u_longlong_t size, isize, ksize, raw_data;
601 char *str, *est_fmt;
602 size_t space;
603 int error;
606 * number of pages short for swapping.
608 STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv;
609 if (STAT->cs_nosw_pages < 0)
610 STAT->cs_nosw_pages = 0;
612 str = "cpr_statefile_ok:";
614 CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n",
615 k_anoninfo.ani_max, k_anoninfo.ani_phys_resv);
616 CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n",
617 MAX(availrmem - swapfs_minfree, 0),
618 k_anoninfo.ani_mem_resv);
619 CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n",
620 CURRENT_TOTAL_AVAILABLE_SWAP);
623 * try increasing filesize by 15%
625 if (alloc_retry) {
627 * block device doesn't get any bigger
629 if (vp->v_type == VBLK) {
630 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
631 prom_printf(
632 "Retry statefile on special file\n");
633 return (ENOMEM);
634 } else {
635 rw_enter(&ip->i_contents, RW_READER);
636 size = (ip->i_size * SIZE_RATE) / INTEGRAL;
637 rw_exit(&ip->i_contents);
639 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
640 prom_printf("Retry statefile size = %lld\n", size);
641 } else {
642 u_longlong_t cpd_size;
643 pgcnt_t npages, nback;
644 int ndvram;
646 ndvram = 0;
647 (void) callb_execute_class(CB_CL_CPR_FB,
648 (int)(uintptr_t)&ndvram);
649 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
650 prom_printf("ndvram size = %d\n", ndvram);
653 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages
655 npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit);
656 cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2));
657 raw_data = cpd_size + cpr_bitmap_size;
658 ksize = ndvram + mmu_ptob(npages);
660 est_fmt = "%s estimated size with "
661 "%scompression %lld, ksize %lld\n";
662 nback = mmu_ptob(STAT->cs_nosw_pages);
663 if (CPR->c_flags & C_COMPRESSING) {
664 size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) +
665 raw_data + ((nback * 10) / UCOMP_RATE);
666 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize);
667 } else {
668 size = ksize + raw_data + nback;
669 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ",
670 size, ksize);
675 * All this is much simpler for a block device
677 if (vp->v_type == VBLK) {
678 space = cpr_get_devsize(vp->v_rdev);
679 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
680 prom_printf("statefile dev size %lu\n", space);
683 * Export the estimated filesize info, this value will be
684 * compared before dumping out the statefile in the case of
685 * no compression.
687 STAT->cs_est_statefsz = size;
688 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
689 prom_printf("%s Estimated statefile size %llu, "
690 "space %lu\n", str, size, space);
691 if (size > space) {
692 cpr_err(CE_CONT, "Statefile partition too small.");
693 return (ENOMEM);
695 return (0);
696 } else {
697 if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) {
698 cpr_err(CE_CONT, "Statefile allocation retry failed\n");
699 return (ENOMEM);
703 * Estimate space needed for the state file.
705 * State file size in bytes:
706 * kernel size + non-cache pte seg +
707 * bitmap size + cpr state file headers size
708 * (round up to fs->fs_bsize)
710 size = blkroundup(ip->i_fs, size);
713 * Export the estimated filesize info, this value will be
714 * compared before dumping out the statefile in the case of
715 * no compression.
717 STAT->cs_est_statefsz = size;
718 error = cpr_grow_statefile(vp, size);
719 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) {
720 rw_enter(&ip->i_contents, RW_READER);
721 isize = ip->i_size;
722 rw_exit(&ip->i_contents);
723 prom_printf("%s Estimated statefile size %lld, "
724 "i_size %lld\n", str, size, isize);
727 return (error);
732 void
733 cpr_statef_close(void)
735 if (C_VP) {
736 if (!cpr_reusable_mode)
737 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
738 (void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED(), NULL);
739 VN_RELE(C_VP);
740 C_VP = 0;
746 * open cpr default file and display error
749 cpr_open_deffile(int mode, vnode_t **vpp)
751 int error;
753 if (error = cpr_open(cpr_default_path, mode, vpp))
754 cpr_err(CE_CONT, "cannot open \"%s\", error %d\n",
755 cpr_default_path, error);
756 return (error);
761 * write cdef_t to disk. This contains the original values of prom
762 * properties that we modify. We fill in the magic number of the file
763 * here as a signal to the booter code that the state file is valid.
764 * Be sure the file gets synced, since we may be shutting down the OS.
767 cpr_write_deffile(cdef_t *cdef)
769 struct vnode *vp;
770 char *str;
771 int rc;
773 if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp))
774 return (rc);
776 if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef)))
777 str = "write";
778 else if (rc = VOP_FSYNC(vp, FSYNC, CRED(), NULL))
779 str = "fsync";
780 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
781 VN_RELE(vp);
783 if (rc) {
784 cpr_err(CE_WARN, "%s error %d, file \"%s\"",
785 str, rc, cpr_default_path);
787 return (rc);
791 * Clear the magic number in the defaults file. This tells the booter
792 * program that the state file is not current and thus prevents
793 * any attempt to restore from an obsolete state file.
795 void
796 cpr_clear_definfo(void)
798 struct vnode *vp;
799 cmini_t mini;
801 if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) ||
802 cpr_open_deffile(FCREAT|FWRITE, &vp))
803 return;
804 mini.magic = mini.reusable = 0;
805 (void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini));
806 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
807 VN_RELE(vp);
811 * If the cpr default file is invalid, then we must not be in reusable mode
812 * if it is valid, it tells us our mode
815 cpr_get_reusable_mode(void)
817 struct vnode *vp;
818 cmini_t mini;
819 int rc;
821 if (cpr_open(cpr_default_path, FREAD, &vp))
822 return (0);
824 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
825 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
826 VN_RELE(vp);
827 if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC)
828 return (mini.reusable);
830 return (0);
832 #endif
835 * clock/time related routines
837 static time_t cpr_time_stamp;
840 void
841 cpr_tod_get(cpr_time_t *ctp)
843 timestruc_t ts;
845 mutex_enter(&tod_lock);
846 ts = TODOP_GET(tod_ops);
847 mutex_exit(&tod_lock);
848 ctp->tv_sec = (time32_t)ts.tv_sec;
849 ctp->tv_nsec = (int32_t)ts.tv_nsec;
852 void
853 cpr_tod_status_set(int tod_flag)
855 mutex_enter(&tod_lock);
856 tod_status_set(tod_flag);
857 mutex_exit(&tod_lock);
860 void
861 cpr_save_time(void)
863 cpr_time_stamp = gethrestime_sec();
867 * correct time based on saved time stamp or hardware clock
869 void
870 cpr_restore_time(void)
872 clkset(cpr_time_stamp);
875 #if defined(__sparc)
877 * CPU ONLINE/OFFLINE CODE
880 cpr_mp_offline(void)
882 cpu_t *cp, *bootcpu;
883 int rc = 0;
884 int brought_up_boot = 0;
887 * Do nothing for UP.
889 if (ncpus == 1)
890 return (0);
892 mutex_enter(&cpu_lock);
894 cpr_save_mp_state();
896 bootcpu = i_cpr_bootcpu();
897 if (!CPU_ACTIVE(bootcpu)) {
898 if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) {
899 mutex_exit(&cpu_lock);
900 return (rc);
902 brought_up_boot = 1;
905 cp = cpu_list;
906 do {
907 if (cp == bootcpu)
908 continue;
909 if (cp->cpu_flags & CPU_OFFLINE)
910 continue;
911 if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) {
912 mutex_exit(&cpu_lock);
913 return (rc);
915 } while ((cp = cp->cpu_next) != cpu_list);
916 if (brought_up_boot && (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)))
917 prom_printf("changed cpu %p to state %d\n",
918 (void *)bootcpu, CPU_CPR_ONLINE);
919 mutex_exit(&cpu_lock);
921 return (rc);
925 cpr_mp_online(void)
927 cpu_t *cp, *bootcpu = CPU;
928 int rc = 0;
931 * Do nothing for UP.
933 if (ncpus == 1)
934 return (0);
937 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags
938 * to indicate a cpu was online at the time of cpr_suspend();
939 * now restart those cpus that were marked as CPU_CPR_ONLINE
940 * and actually are offline.
942 mutex_enter(&cpu_lock);
943 for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) {
945 * Clear the CPU_FROZEN flag in all cases.
947 cp->cpu_flags &= ~CPU_FROZEN;
949 if (CPU_CPR_IS_OFFLINE(cp))
950 continue;
951 if (CPU_ACTIVE(cp))
952 continue;
953 if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) {
954 mutex_exit(&cpu_lock);
955 return (rc);
960 * turn off the boot cpu if it was offlined
962 if (CPU_CPR_IS_OFFLINE(bootcpu)) {
963 if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) {
964 mutex_exit(&cpu_lock);
965 return (rc);
968 mutex_exit(&cpu_lock);
969 return (0);
972 static void
973 cpr_save_mp_state(void)
975 cpu_t *cp;
977 ASSERT(MUTEX_HELD(&cpu_lock));
979 cp = cpu_list;
980 do {
981 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
982 if (CPU_ACTIVE(cp))
983 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
984 } while ((cp = cp->cpu_next) != cpu_list);
988 * change cpu to online/offline
990 static int
991 cpr_p_online(cpu_t *cp, int state)
993 int rc;
995 ASSERT(MUTEX_HELD(&cpu_lock));
997 switch (state) {
998 case CPU_CPR_ONLINE:
999 rc = cpu_online(cp);
1000 break;
1001 case CPU_CPR_OFFLINE:
1002 rc = cpu_offline(cp, CPU_FORCED);
1003 break;
1005 if (rc) {
1006 cpr_err(CE_WARN, "Failed to change processor %d to "
1007 "state %d, (errno %d)", cp->cpu_id, state, rc);
1009 return (rc);
1013 * Construct the pathname of the state file and return a pointer to
1014 * caller. Read the config file to get the mount point of the
1015 * filesystem and the pathname within fs.
1017 char *
1018 cpr_build_statefile_path(void)
1020 struct cprconfig *cf = &cprconfig;
1022 if (cpr_get_config())
1023 return (NULL);
1025 switch (cf->cf_type) {
1026 case CFT_UFS:
1027 if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) {
1028 cpr_err(CE_CONT, "Statefile path is too long.\n");
1029 return (NULL);
1031 return (cpr_cprconfig_to_path());
1032 case CFT_ZVOL:
1033 /*FALLTHROUGH*/
1034 case CFT_SPEC:
1035 return (cf->cf_devfs);
1036 default:
1037 cpr_err(CE_PANIC, "invalid statefile type");
1038 /*NOTREACHED*/
1039 return (NULL);
1044 cpr_statefile_is_spec(void)
1046 if (cpr_get_config())
1047 return (0);
1048 return (cprconfig.cf_type == CFT_SPEC);
1051 char *
1052 cpr_get_statefile_prom_path(void)
1054 struct cprconfig *cf = &cprconfig;
1056 ASSERT(cprconfig_loaded);
1057 ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC);
1058 ASSERT(cf->cf_type == CFT_SPEC || cf->cf_type == CFT_ZVOL);
1059 return (cf->cf_dev_prom);
1064 * XXX The following routines need to be in the vfs source code.
1068 cpr_is_ufs(struct vfs *vfsp)
1070 char *fsname;
1072 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1073 return (strcmp(fsname, "ufs") == 0);
1077 cpr_is_zfs(struct vfs *vfsp)
1079 char *fsname;
1081 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1082 return (strcmp(fsname, "zfs") == 0);
1086 * This is a list of file systems that are allowed to be writeable when a
1087 * reusable statefile checkpoint is taken. They must not have any state that
1088 * cannot be restored to consistency by simply rebooting using the checkpoint.
1089 * (In contrast to ufs and pcfs which have disk state that could get
1090 * out of sync with the in-kernel data).
1093 cpr_reusable_mount_check(void)
1095 struct vfs *vfsp;
1096 char *fsname;
1097 char **cpp;
1098 static char *cpr_writeok_fss[] = {
1099 "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs",
1100 "proc", "tmpfs", "ctfs", "objfs", "dev", NULL
1103 vfs_list_read_lock();
1104 vfsp = rootvfs;
1105 do {
1106 if (vfsp->vfs_flag & VFS_RDONLY) {
1107 vfsp = vfsp->vfs_next;
1108 continue;
1110 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1111 for (cpp = cpr_writeok_fss; *cpp; cpp++) {
1112 if (strcmp(fsname, *cpp) == 0)
1113 break;
1116 * if the inner loop reached the NULL terminator,
1117 * the current fs-type does not match any OK-type
1119 if (*cpp == NULL) {
1120 cpr_err(CE_CONT, "a filesystem of type %s is "
1121 "mounted read/write.\nReusable statefile requires "
1122 "no writeable filesystem of this type be mounted\n",
1123 fsname);
1124 vfs_list_unlock();
1125 return (EINVAL);
1127 vfsp = vfsp->vfs_next;
1128 } while (vfsp != rootvfs);
1129 vfs_list_unlock();
1130 return (0);
1134 * return statefile offset in DEV_BSIZE units
1137 cpr_statefile_offset(void)
1139 return (cprconfig.cf_type != CFT_UFS ? btod(CPR_SPEC_OFFSET) : 0);
1143 * Force a fresh read of the cprinfo per uadmin 3 call
1145 void
1146 cpr_forget_cprconfig(void)
1148 cprconfig_loaded = 0;
1150 #endif