uts: make emu10k non-verbose
[unleashed.git] / kernel / cpr / cpr_main.c
blob593466b5d0dbb729ee2a3e2327dc05e41625ddf6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * This module contains the guts of checkpoint-resume mechanism.
28 * All code in this module is platform independent.
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/callb.h>
34 #include <sys/processor.h>
35 #include <sys/machsystm.h>
36 #include <sys/clock.h>
37 #include <sys/vfs.h>
38 #include <sys/kmem.h>
39 #include <nfs/lm.h>
40 #include <sys/systm.h>
41 #include <sys/cpr.h>
42 #include <sys/bootconf.h>
43 #include <sys/cyclic.h>
44 #include <sys/filio.h>
45 #include <sys/fs/ufs_filio.h>
46 #include <sys/epm.h>
47 #include <sys/modctl.h>
48 #include <sys/reboot.h>
49 #include <sys/kdi.h>
50 #include <sys/promif.h>
51 #include <sys/srn.h>
52 #include <sys/cpr_impl.h>
54 #define PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
56 extern struct cpr_terminator cpr_term;
58 extern int cpr_alloc_statefile(int);
59 extern void cpr_start_kernel_threads(void);
60 extern void cpr_abbreviate_devpath(char *, char *);
61 extern void cpr_convert_promtime(cpr_time_t *);
62 extern void cpr_send_notice(void);
63 extern void cpr_set_bitmap_size(void);
64 extern void cpr_stat_init();
65 extern void cpr_statef_close(void);
66 extern void flush_windows(void);
67 extern void (*srn_signal)(int, int);
68 extern void init_cpu_syscall(struct cpu *);
69 extern void i_cpr_pre_resume_cpus();
70 extern void i_cpr_post_resume_cpus();
71 extern int cpr_is_ufs(struct vfs *);
73 extern int pm_powering_down;
74 extern kmutex_t srn_clone_lock;
75 extern int srn_inuse;
77 static int cpr_suspend(int);
78 static int cpr_resume(int);
79 static void cpr_suspend_init(int);
80 #if defined(__x86)
81 static int cpr_suspend_cpus(void);
82 static void cpr_resume_cpus(void);
83 #endif
84 static int cpr_all_online(void);
85 static void cpr_restore_offline(void);
87 cpr_time_t wholecycle_tv;
88 int cpr_suspend_succeeded;
89 pfn_t curthreadpfn;
90 int curthreadremapped;
92 extern cpuset_t cpu_ready_set;
94 extern processorid_t i_cpr_bootcpuid(void);
95 extern cpu_t *i_cpr_bootcpu(void);
96 extern void tsc_adjust_delta(hrtime_t tdelta);
97 extern void tsc_resume(void);
98 extern int tsc_resume_in_cyclic;
101 * Set this variable to 1, to have device drivers resume in an
102 * uniprocessor environment. This is to allow drivers that assume
103 * that they resume on a UP machine to continue to work. Should be
104 * deprecated once the broken drivers are fixed
106 int cpr_resume_uniproc = 0;
109 * save or restore abort_enable; this prevents a drop
110 * to kadb or prom during cpr_resume_devices() when
111 * there is no kbd present; see abort_sequence_enter()
113 static void
114 cpr_sae(int stash)
116 static int saved_ae = -1;
118 if (stash) {
119 saved_ae = abort_enable;
120 abort_enable = 0;
121 } else if (saved_ae != -1) {
122 abort_enable = saved_ae;
123 saved_ae = -1;
129 * The main switching point for cpr, this routine starts the ckpt
130 * and state file saving routines; on resume the control is
131 * returned back to here and it then calls the resume routine.
134 cpr_main(int sleeptype)
136 int rc, rc2;
137 label_t saveq;
138 klwp_t *tlwp = ttolwp(curthread);
140 if (sleeptype == CPR_TODISK) {
141 if ((rc = cpr_default_setup(1)) != 0)
142 return (rc);
143 ASSERT(tlwp);
144 saveq = tlwp->lwp_qsav;
147 if (sleeptype == CPR_TORAM) {
148 rc = cpr_suspend(sleeptype);
149 PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
150 if (rc == 0) {
151 int i_cpr_power_down(int sleeptype);
154 * From this point on, we should be at a high
155 * spl, interrupts disabled, and all but one
156 * cpu's paused (effectively UP/single threaded).
157 * So this is were we want to put ASSERTS()
158 * to let us know otherwise.
160 ASSERT(cpus_paused());
163 * Now do the work of actually putting this
164 * machine to sleep!
166 rc = i_cpr_power_down(sleeptype);
167 if (rc == 0) {
168 PMD(PMD_SX, ("back from successful suspend\n"))
171 * We do care about the return value from cpr_resume
172 * at this point, as it will tell us if one of the
173 * resume functions failed (cpr_resume_devices())
174 * However, for this to return and _not_ panic, means
175 * that we must be in one of the test functions. So
176 * check for that and return an appropriate message.
178 rc2 = cpr_resume(sleeptype);
179 if (rc2 != 0) {
180 ASSERT(cpr_test_point > 0);
181 cmn_err(CE_NOTE,
182 "cpr_resume returned non-zero: %d\n", rc2);
183 PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
185 ASSERT(!cpus_paused());
186 } else {
187 PMD(PMD_SX, ("failed suspend, resuming\n"))
188 rc = cpr_resume(sleeptype);
190 return (rc);
193 * Remember where we are for resume after reboot
195 if (!setjmp(&tlwp->lwp_qsav)) {
197 * try to checkpoint the system, if failed return back
198 * to userland, otherwise power off.
200 rc = cpr_suspend(sleeptype);
201 if (rc || cpr_reusable_mode) {
203 * We don't really want to go down, or
204 * something went wrong in suspend, do what we can
205 * to put the system back to an operable state then
206 * return back to userland.
208 PMD(PMD_SX, ("failed suspend, resuming\n"))
209 (void) cpr_resume(sleeptype);
210 PMD(PMD_SX, ("back from failed suspend resume\n"))
212 } else {
214 * This is the resumed side of longjmp, restore the previous
215 * longjmp pointer if there is one so this will be transparent
216 * to the world.
217 * This path is only for CPR_TODISK, where we reboot
219 ASSERT(sleeptype == CPR_TODISK);
220 tlwp->lwp_qsav = saveq;
221 CPR->c_flags &= ~C_SUSPENDING;
222 CPR->c_flags |= C_RESUMING;
225 * resume the system back to the original state
227 rc = cpr_resume(sleeptype);
228 PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
229 rc))
232 (void) cpr_default_setup(0);
234 return (rc);
238 #if defined(__sparc)
241 * check/disable or re-enable UFS logging
243 static void
244 cpr_log_status(int enable, int *svstat, vnode_t *vp)
246 int cmd, status, error;
247 char *str, *able;
248 fiolog_t fl;
249 refstr_t *mntpt;
251 str = "cpr_log_status";
252 bzero(&fl, sizeof (fl));
253 fl.error = FIOLOG_ENONE;
256 * when disabling, first get and save logging status (0 or 1)
258 if (enable == 0) {
259 if (error = fop_ioctl(vp, _FIOISLOG,
260 (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
261 mntpt = vfs_getmntpoint(vp->v_vfsp);
262 prom_printf("%s: \"%s\", cant get logging "
263 "status, error %d\n", str, refstr_value(mntpt),
264 error);
265 refstr_rele(mntpt);
266 return;
268 *svstat = status;
269 if (cpr_debug & CPR_DEBUG5) {
270 mntpt = vfs_getmntpoint(vp->v_vfsp);
271 errp("%s: \"%s\", logging status = %d\n",
272 str, refstr_value(mntpt), status);
273 refstr_rele(mntpt);
276 able = "disable";
277 cmd = _FIOLOGDISABLE;
278 } else {
279 able = "enable";
280 cmd = _FIOLOGENABLE;
284 * disable or re-enable logging when the saved status is 1
286 if (*svstat == 1) {
287 error = fop_ioctl(vp, cmd, (uintptr_t)&fl,
288 FKIOCTL, CRED(), NULL, NULL);
289 if (error) {
290 mntpt = vfs_getmntpoint(vp->v_vfsp);
291 prom_printf("%s: \"%s\", cant %s logging, error %d\n",
292 str, refstr_value(mntpt), able, error);
293 refstr_rele(mntpt);
294 } else {
295 if (cpr_debug & CPR_DEBUG5) {
296 mntpt = vfs_getmntpoint(vp->v_vfsp);
297 errp("%s: \"%s\", logging is now %sd\n",
298 str, refstr_value(mntpt), able);
299 refstr_rele(mntpt);
305 * when enabling logging, reset the saved status
306 * to unknown for next time
308 if (enable)
309 *svstat = -1;
313 * enable/disable UFS logging on filesystems containing cpr_default_path
314 * and cpr statefile. since the statefile can be on any fs, that fs
315 * needs to be handled separately. this routine and cprboot expect that
316 * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs. cprboot
317 * is loaded from the device with rootfs and uses the same device to open
318 * both CPR_CONFIG and CPR_DEFAULT (see common/support.c). moving either
319 * file outside of rootfs would cause errors during cprboot, plus cpr and
320 * fsck problems with the new fs if logging were enabled.
323 static int
324 cpr_ufs_logging(int enable)
326 static int def_status = -1, sf_status = -1;
327 struct vfs *vfsp;
328 char *fname;
329 vnode_t *vp;
330 int error;
332 if (cpr_reusable_mode)
333 return (0);
335 if (error = cpr_open_deffile(FREAD, &vp))
336 return (error);
337 vfsp = vp->v_vfsp;
338 if (!cpr_is_ufs(vfsp)) {
339 (void) fop_close(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
340 VN_RELE(vp);
341 return (0);
344 cpr_log_status(enable, &def_status, vp);
345 (void) fop_close(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
346 VN_RELE(vp);
348 fname = cpr_build_statefile_path();
349 if (fname == NULL)
350 return (ENOENT);
351 if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
352 0600, &vp, CRCREAT, 0)) {
353 prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
354 "error %d\n", fname, error);
355 return (error);
359 * check logging status for the statefile if it resides
360 * on a different fs and the type is a regular file
362 if (vp->v_vfsp != vfsp && vp->v_type == VREG)
363 cpr_log_status(enable, &sf_status, vp);
364 (void) fop_close(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
365 VN_RELE(vp);
367 return (0);
369 #endif
373 * Check if klmmod is loaded and call a lock manager service; if klmmod
374 * is not loaded, the services aren't needed and a call would trigger a
375 * modload, which would block since another thread would never run.
377 static void
378 cpr_lock_mgr(void (*service)(void))
380 if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
381 (*service)();
385 cpr_suspend_cpus(void)
387 int ret = 0;
388 extern void *i_cpr_save_context(void *arg);
390 mutex_enter(&cpu_lock);
393 * the machine could not have booted without a bootcpu
395 ASSERT(i_cpr_bootcpu() != NULL);
398 * bring all the offline cpus online
400 if ((ret = cpr_all_online())) {
401 mutex_exit(&cpu_lock);
402 return (ret);
406 * Set the affinity to be the boot processor
407 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
409 affinity_set(i_cpr_bootcpuid());
411 ASSERT(CPU->cpu_id == 0);
413 PMD(PMD_SX, ("curthread running on bootcpu\n"))
416 * pause all other running CPUs and save the CPU state at the sametime
418 pause_cpus(NULL, i_cpr_save_context);
420 mutex_exit(&cpu_lock);
422 return (0);
426 * Take the system down to a checkpointable state and write
427 * the state file, the following are sequentially executed:
429 * - Request all user threads to stop themselves
430 * - push out and invalidate user pages
431 * - bring statefile inode incore to prevent a miss later
432 * - request all daemons to stop
433 * - check and make sure all threads are stopped
434 * - sync the file system
435 * - suspend all devices
436 * - block intrpts
437 * - dump system state and memory to state file
438 * - SPARC code will not be called with CPR_TORAM, caller filters
440 static int
441 cpr_suspend(int sleeptype)
443 #if defined(__sparc)
444 int sf_realloc, nverr;
445 #endif
446 int rc = 0;
447 int skt_rc = 0;
449 PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
450 cpr_set_substate(C_ST_SUSPEND_BEGIN);
452 cpr_suspend_init(sleeptype);
454 cpr_save_time();
456 cpr_tod_get(&wholecycle_tv);
457 CPR_STAT_EVENT_START("Suspend Total");
459 i_cpr_alloc_cpus();
461 #if defined(__sparc)
462 ASSERT(sleeptype == CPR_TODISK);
463 if (!cpr_reusable_mode) {
465 * We need to validate default file before fs
466 * functionality is disabled.
468 if (rc = cpr_validate_definfo(0))
469 return (rc);
471 i_cpr_save_machdep_info();
472 #endif
474 PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
475 /* Stop PM scans ASAP */
476 (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
478 pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
479 NULL, NULL, PM_DEP_WAIT, NULL, 0);
481 #if defined(__sparc)
482 ASSERT(sleeptype == CPR_TODISK);
483 cpr_set_substate(C_ST_MP_OFFLINE);
484 if (rc = cpr_mp_offline())
485 return (rc);
486 #endif
488 * Ask Xorg to suspend the frame buffer, and wait for it to happen
490 mutex_enter(&srn_clone_lock);
491 if (srn_signal) {
492 PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
493 "SRN_SUSPEND_REQ)\n"))
494 srn_inuse = 1; /* because *(srn_signal) cv_waits */
495 (*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
496 srn_inuse = 0;
497 } else {
498 PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
500 mutex_exit(&srn_clone_lock);
503 * Ask the user threads to stop by themselves, but
504 * if they don't or can't after 3 retries, we give up on CPR.
505 * The 3 retry is not a random number because 2 is possible if
506 * a thread has been forked before the parent thread is stopped.
508 CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
509 CPR_STAT_EVENT_START(" stop users");
510 cpr_set_substate(C_ST_STOP_USER_THREADS);
511 PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
512 if (rc = cpr_stop_user_threads())
513 return (rc);
514 CPR_STAT_EVENT_END(" stop users");
515 CPR_DEBUG(CPR_DEBUG1, "done\n");
517 PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
518 pm_save_direct_levels();
521 * User threads are stopped. We will start communicating with the
522 * user via prom_printf (some debug output may have already happened)
523 * so let anybody who cares know about this (bug 4096122)
525 (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
527 PMD(PMD_SX, ("cpr_suspend: send notice\n"))
528 #ifndef DEBUG
529 cpr_send_notice();
530 if (cpr_debug)
531 prom_printf("\n");
532 #endif
534 PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
535 (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
538 * Reattach any drivers which originally exported the
539 * no-involuntary-power-cycles property. We need to do this before
540 * stopping kernel threads because modload is implemented using
541 * a kernel thread.
543 cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
544 PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
545 if (!pm_reattach_noinvol())
546 return (ENXIO);
548 #if defined(__sparc)
549 ASSERT(sleeptype == CPR_TODISK);
551 * if ufs logging is enabled, we need to disable before
552 * stopping kernel threads so that ufs delete and roll
553 * threads can do the work.
555 cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
556 if (rc = cpr_ufs_logging(0))
557 return (rc);
560 * Use sync_all to swap out all user pages and find out how much
561 * extra space needed for user pages that don't have back store
562 * space left.
564 CPR_STAT_EVENT_START(" swapout upages");
565 vfs_sync(SYNC_ALL);
566 CPR_STAT_EVENT_END(" swapout upages");
568 cpr_set_bitmap_size();
570 alloc_statefile:
572 * If our last state was C_ST_DUMP_NOSPC, we're trying to
573 * realloc the statefile, otherwise this is the first attempt.
575 sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
577 CPR_STAT_EVENT_START(" alloc statefile");
578 cpr_set_substate(C_ST_STATEF_ALLOC);
579 if (rc = cpr_alloc_statefile(sf_realloc)) {
580 if (sf_realloc)
581 errp("realloc failed\n");
582 return (rc);
584 CPR_STAT_EVENT_END(" alloc statefile");
587 * Sync the filesystem to preserve its integrity.
589 * This sync is also used to flush out all B_DELWRI buffers
590 * (fs cache) which are mapped and neither dirty nor referenced
591 * before cpr_invalidate_pages destroys them.
592 * fsflush does similar thing.
594 sync();
597 * destroy all clean file mapped kernel pages
599 CPR_STAT_EVENT_START(" clean pages");
600 CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
601 (void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
602 CPR_DEBUG(CPR_DEBUG1, ("done\n"));
603 CPR_STAT_EVENT_END(" clean pages");
604 #endif
608 * Hooks needed by lock manager prior to suspending.
609 * Refer to code for more comments.
611 PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
612 cpr_lock_mgr(lm_cprsuspend);
615 * Now suspend all the devices
617 CPR_STAT_EVENT_START(" stop drivers");
618 CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
619 cpr_set_substate(C_ST_SUSPEND_DEVICES);
620 pm_powering_down = 1;
621 PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
622 rc = cpr_suspend_devices(ddi_root_node());
623 pm_powering_down = 0;
624 if (rc)
625 return (rc);
626 CPR_DEBUG(CPR_DEBUG1, "done\n");
627 CPR_STAT_EVENT_END(" stop drivers");
630 * Stop all daemon activities
632 cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
633 PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
634 if (skt_rc = cpr_stop_kernel_threads())
635 return (skt_rc);
637 PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
638 (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
640 PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
641 pm_reattach_noinvol_fini();
643 cpr_sae(1);
645 PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
646 (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
648 if (sleeptype == CPR_TODISK) {
650 * It's safer to do tod_get before we disable all intr.
652 CPR_STAT_EVENT_START(" write statefile");
656 * it's time to ignore the outside world, stop the real time
657 * clock and disable any further intrpt activity.
659 PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
660 i_cpr_handle_xc(1); /* turn it on to disable xc assertion */
662 mutex_enter(&cpu_lock);
663 PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
664 cyclic_suspend();
665 mutex_exit(&cpu_lock);
668 * Due to the different methods of resuming the system between
669 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
670 * and CPR_TORAM (restart via reset into existing kernel image)
671 * cpus are not suspended and restored in the SPARC case, since it
672 * is necessary to restart the cpus and pause them before restoring
673 * the OBP image
676 #if defined(__x86)
678 /* pause aux cpus */
679 PMD(PMD_SX, ("pause aux cpus\n"))
681 cpr_set_substate(C_ST_MP_PAUSED);
683 if ((rc = cpr_suspend_cpus()) != 0)
684 return (rc);
685 #endif
687 PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
688 i_cpr_stop_intr();
689 CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
692 * Since we will now disable the mechanism that causes prom_printfs
693 * to power up (if needed) the console fb/monitor, we assert that
694 * it must be up now.
696 ASSERT(pm_cfb_is_up());
697 PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
698 prom_suspend_prepost();
700 #if defined(__sparc)
702 * getting ready to write ourself out, flush the register
703 * windows to make sure that our stack is good when we
704 * come back on the resume side.
706 flush_windows();
707 #endif
710 * For S3, we're done
712 if (sleeptype == CPR_TORAM) {
713 PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
714 cpr_set_substate(C_ST_NODUMP);
715 return (rc);
717 #if defined(__sparc)
719 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
721 * The system is quiesced at this point, we are ready to either dump
722 * to the state file for a extended sleep or a simple shutdown for
723 * systems with non-volatile memory.
727 * special handling for reusable:
729 if (cpr_reusable_mode) {
730 cpr_set_substate(C_ST_SETPROPS_1);
731 if (nverr = cpr_set_properties(1))
732 return (nverr);
735 cpr_set_substate(C_ST_DUMP);
736 rc = cpr_dump(C_VP);
739 * if any error occurred during dump, more
740 * special handling for reusable:
742 if (rc && cpr_reusable_mode) {
743 cpr_set_substate(C_ST_SETPROPS_0);
744 if (nverr = cpr_set_properties(0))
745 return (nverr);
748 if (rc == ENOSPC) {
749 cpr_set_substate(C_ST_DUMP_NOSPC);
750 (void) cpr_resume(sleeptype);
751 goto alloc_statefile;
752 } else if (rc == 0) {
753 if (cpr_reusable_mode) {
754 cpr_set_substate(C_ST_REUSABLE);
755 longjmp(&ttolwp(curthread)->lwp_qsav);
756 } else
757 rc = cpr_set_properties(1);
759 #endif
760 PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
761 return (rc);
764 void
765 cpr_resume_cpus(void)
768 * this is a cut down version of start_other_cpus()
769 * just do the initialization to wake the other cpus
772 #if defined(__x86)
774 * Initialize our syscall handlers
776 init_cpu_syscall(CPU);
778 #endif
780 i_cpr_pre_resume_cpus();
783 * Restart the paused cpus
785 mutex_enter(&cpu_lock);
786 start_cpus();
787 mutex_exit(&cpu_lock);
789 i_cpr_post_resume_cpus();
791 mutex_enter(&cpu_lock);
793 * clear the affinity set in cpr_suspend_cpus()
795 affinity_clear();
798 * offline all the cpus that were brought online during suspend
800 cpr_restore_offline();
802 mutex_exit(&cpu_lock);
805 void
806 cpr_unpause_cpus(void)
809 * Now restore the system back to what it was before we suspended
812 PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
814 mutex_enter(&cpu_lock);
816 * Restart the paused cpus
818 start_cpus();
821 * clear the affinity set in cpr_suspend_cpus()
823 affinity_clear();
826 * offline all the cpus that were brought online during suspend
828 cpr_restore_offline();
830 mutex_exit(&cpu_lock);
834 * Bring the system back up from a checkpoint, at this point
835 * the VM has been minimally restored by boot, the following
836 * are executed sequentially:
838 * - machdep setup and enable interrupts (mp startup if it's mp)
839 * - resume all devices
840 * - restart daemons
841 * - put all threads back on run queue
843 static int
844 cpr_resume(int sleeptype)
846 cpr_time_t pwron_tv, *ctp;
847 char *str;
848 int rc = 0;
851 * The following switch is used to resume the system
852 * that was suspended to a different level.
854 CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
855 PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
858 * Note:
860 * The rollback labels rb_xyz do not represent the cpr resume
861 * state when event 'xyz' has happened. Instead they represent
862 * the state during cpr suspend when event 'xyz' was being
863 * entered (and where cpr suspend failed). The actual call that
864 * failed may also need to be partially rolled back, since they
865 * aren't atomic in most cases. In other words, rb_xyz means
866 * "roll back all cpr suspend events that happened before 'xyz',
867 * and the one that caused the failure, if necessary."
869 switch (CPR->c_substate) {
870 #if defined(__sparc)
871 case C_ST_DUMP:
873 * This is most likely a full-fledged cpr_resume after
874 * a complete and successful cpr suspend. Just roll back
875 * everything.
877 ASSERT(sleeptype == CPR_TODISK);
878 break;
880 case C_ST_REUSABLE:
881 case C_ST_DUMP_NOSPC:
882 case C_ST_SETPROPS_0:
883 case C_ST_SETPROPS_1:
885 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
886 * special switch cases here. The other two do not have
887 * any state change during cpr_suspend() that needs to
888 * be rolled back. But these are exit points from
889 * cpr_suspend, so theoretically (or in the future), it
890 * is possible that a need for roll back of a state
891 * change arises between these exit points.
893 ASSERT(sleeptype == CPR_TODISK);
894 goto rb_dump;
895 #endif
897 case C_ST_NODUMP:
898 PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
899 goto rb_nodump;
901 case C_ST_STOP_KERNEL_THREADS:
902 PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
903 goto rb_stop_kernel_threads;
905 case C_ST_SUSPEND_DEVICES:
906 PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
907 goto rb_suspend_devices;
909 #if defined(__sparc)
910 case C_ST_STATEF_ALLOC:
911 ASSERT(sleeptype == CPR_TODISK);
912 goto rb_statef_alloc;
914 case C_ST_DISABLE_UFS_LOGGING:
915 ASSERT(sleeptype == CPR_TODISK);
916 goto rb_disable_ufs_logging;
917 #endif
919 case C_ST_PM_REATTACH_NOINVOL:
920 PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
921 goto rb_pm_reattach_noinvol;
923 case C_ST_STOP_USER_THREADS:
924 PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
925 goto rb_stop_user_threads;
927 #if defined(__sparc)
928 case C_ST_MP_OFFLINE:
929 PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
930 goto rb_mp_offline;
931 #endif
933 #if defined(__x86)
934 case C_ST_MP_PAUSED:
935 PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
936 goto rb_mp_paused;
937 #endif
940 default:
941 PMD(PMD_SX, ("cpr_resume: others\n"))
942 goto rb_others;
945 rb_all:
947 * perform platform-dependent initialization
949 if (cpr_suspend_succeeded)
950 i_cpr_machdep_setup();
953 * system did not really go down if we jump here
955 rb_dump:
957 * IMPORTANT: SENSITIVE RESUME SEQUENCE
959 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
961 rb_nodump:
963 * If we did suspend to RAM, we didn't generate a dump
965 PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
966 (void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
967 if (cpr_suspend_succeeded) {
968 PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
969 (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
972 prom_resume_prepost();
973 #if !defined(__sparc)
975 * Need to sync the software clock with the hardware clock.
976 * On Sparc, this occurs in the sparc-specific cbe. However
977 * on x86 this needs to be handled _before_ we bring other cpu's
978 * back online. So we call a resume function in timestamp.c
980 if (tsc_resume_in_cyclic == 0)
981 tsc_resume();
983 #endif
985 #if defined(__sparc)
986 if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
987 kdi_dvec_cpr_restart();
988 #endif
991 #if defined(__x86)
992 rb_mp_paused:
993 PT(PT_RMPO);
994 PMD(PMD_SX, ("resume aux cpus\n"))
996 if (cpr_suspend_succeeded) {
997 cpr_resume_cpus();
998 } else {
999 cpr_unpause_cpus();
1001 #endif
1004 * let the tmp callout catch up.
1006 PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
1007 (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
1009 i_cpr_enable_intr();
1011 mutex_enter(&cpu_lock);
1012 PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
1013 cyclic_resume();
1014 mutex_exit(&cpu_lock);
1016 PMD(PMD_SX, ("cpr_resume: handle xc\n"))
1017 i_cpr_handle_xc(0); /* turn it off to allow xc assertion */
1019 PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
1020 (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
1023 * statistics gathering
1025 if (cpr_suspend_succeeded) {
1027 * Prevent false alarm in tod_validate() due to tod
1028 * value change between suspend and resume
1030 cpr_tod_status_set(TOD_CPR_RESUME_DONE);
1032 cpr_convert_promtime(&pwron_tv);
1034 ctp = &cpr_term.tm_shutdown;
1035 if (sleeptype == CPR_TODISK)
1036 CPR_STAT_EVENT_END_TMZ(" write statefile", ctp);
1037 CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
1039 CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
1041 str = " prom time";
1042 CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
1043 ctp = &cpr_term.tm_cprboot_start;
1044 CPR_STAT_EVENT_END_TMZ(str, ctp);
1046 str = " read statefile";
1047 CPR_STAT_EVENT_START_TMZ(str, ctp);
1048 ctp = &cpr_term.tm_cprboot_end;
1049 CPR_STAT_EVENT_END_TMZ(str, ctp);
1052 rb_stop_kernel_threads:
1054 * Put all threads back to where they belong; get the kernel
1055 * daemons straightened up too. Note that the callback table
1056 * locked during cpr_stop_kernel_threads() is released only
1057 * in cpr_start_kernel_threads(). Ensure modunloading is
1058 * disabled before starting kernel threads, we don't want
1059 * modunload thread to start changing device tree underneath.
1061 PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
1062 modunload_disable();
1063 PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
1064 cpr_start_kernel_threads();
1066 rb_suspend_devices:
1067 CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
1068 CPR_STAT_EVENT_START(" start drivers");
1070 PMD(PMD_SX,
1071 ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1072 cpr_resume_uniproc))
1074 #if defined(__x86)
1076 * If cpr_resume_uniproc is set, then pause all the other cpus
1077 * apart from the current cpu, so that broken drivers that think
1078 * that they are on a uniprocessor machine will resume
1080 if (cpr_resume_uniproc) {
1081 mutex_enter(&cpu_lock);
1082 pause_cpus(NULL, NULL);
1083 mutex_exit(&cpu_lock);
1085 #endif
1088 * The policy here is to continue resume everything we can if we did
1089 * not successfully finish suspend; and panic if we are coming back
1090 * from a fully suspended system.
1092 PMD(PMD_SX, ("cpr_resume: resume devices\n"))
1093 rc = cpr_resume_devices(ddi_root_node(), 0);
1095 cpr_sae(0);
1097 str = "Failed to resume one or more devices.";
1099 if (rc) {
1100 if (CPR->c_substate == C_ST_DUMP ||
1101 (sleeptype == CPR_TORAM &&
1102 CPR->c_substate == C_ST_NODUMP)) {
1103 if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1104 PMD(PMD_SX, ("cpr_resume: resume device "
1105 "warn\n"))
1106 cpr_err(CE_WARN, str);
1107 } else {
1108 PMD(PMD_SX, ("cpr_resume: resume device "
1109 "panic\n"))
1110 cpr_err(CE_PANIC, str);
1112 } else {
1113 PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1114 cpr_err(CE_WARN, str);
1118 CPR_STAT_EVENT_END(" start drivers");
1119 CPR_DEBUG(CPR_DEBUG1, "done\n");
1121 #if defined(__x86)
1123 * If cpr_resume_uniproc is set, then unpause all the processors
1124 * that were paused before resuming the drivers
1126 if (cpr_resume_uniproc) {
1127 mutex_enter(&cpu_lock);
1128 start_cpus();
1129 mutex_exit(&cpu_lock);
1131 #endif
1134 * If we had disabled modunloading in this cpr resume cycle (i.e. we
1135 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
1136 * modunloading now.
1138 if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1139 PMD(PMD_SX, ("cpr_resume: modload enable\n"))
1140 modunload_enable();
1144 * Hooks needed by lock manager prior to resuming.
1145 * Refer to code for more comments.
1147 PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
1148 cpr_lock_mgr(lm_cprresume);
1150 #if defined(__sparc)
1152 * This is a partial (half) resume during cpr suspend, we
1153 * haven't yet given up on the suspend. On return from here,
1154 * cpr_suspend() will try to reallocate and retry the suspend.
1156 if (CPR->c_substate == C_ST_DUMP_NOSPC) {
1157 return (0);
1160 if (sleeptype == CPR_TODISK) {
1161 rb_statef_alloc:
1162 cpr_statef_close();
1164 rb_disable_ufs_logging:
1166 * if ufs logging was disabled, re-enable
1168 (void) cpr_ufs_logging(1);
1170 #endif
1172 rb_pm_reattach_noinvol:
1174 * When pm_reattach_noinvol() succeeds, modunload_thread will
1175 * remain disabled until after cpr suspend passes the
1176 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
1177 * cpr suspend reaches this state, we'll need to enable modunload
1178 * thread during rollback.
1180 if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
1181 CPR->c_substate == C_ST_STATEF_ALLOC ||
1182 CPR->c_substate == C_ST_SUSPEND_DEVICES ||
1183 CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1184 PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
1185 pm_reattach_noinvol_fini();
1188 PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
1189 (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1190 PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
1191 (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
1193 PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
1194 pm_restore_direct_levels();
1196 rb_stop_user_threads:
1197 CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1198 PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
1199 cpr_start_user_threads();
1200 CPR_DEBUG(CPR_DEBUG1, "done\n");
1202 * Ask Xorg to resume the frame buffer, and wait for it to happen
1204 mutex_enter(&srn_clone_lock);
1205 if (srn_signal) {
1206 PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1207 "SRN_NORMAL_RESUME)\n"))
1208 srn_inuse = 1; /* because (*srn_signal) cv_waits */
1209 (*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1210 srn_inuse = 0;
1211 } else {
1212 PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1214 mutex_exit(&srn_clone_lock);
1216 #if defined(__sparc)
1217 rb_mp_offline:
1218 if (cpr_mp_online())
1219 cpr_err(CE_WARN, "Failed to online all the processors.");
1220 #endif
1222 rb_others:
1223 PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1224 pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1225 PM_DEP_WAIT, NULL, 0);
1227 PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
1228 (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
1230 if (cpr_suspend_succeeded) {
1231 cpr_stat_record_events();
1234 #if defined(__sparc)
1235 if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
1236 cpr_clear_definfo();
1237 #endif
1239 i_cpr_free_cpus();
1240 CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1241 PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
1242 cpr_signal_user(SIGTHAW);
1243 CPR_DEBUG(CPR_DEBUG1, "done\n");
1245 CPR_STAT_EVENT_END("Resume Total");
1247 CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
1248 CPR_STAT_EVENT_END("WHOLE CYCLE");
1250 if (cpr_debug & CPR_DEBUG1)
1251 cmn_err(CE_CONT, "\nThe system is back where you left!\n");
1253 CPR_STAT_EVENT_START("POST CPR DELAY");
1255 #ifdef CPR_STAT
1256 ctp = &cpr_term.tm_shutdown;
1257 CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
1258 CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
1260 CPR_STAT_EVENT_PRINT();
1261 #endif /* CPR_STAT */
1263 PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
1264 return (rc);
1267 static void
1268 cpr_suspend_init(int sleeptype)
1270 cpr_time_t *ctp;
1272 cpr_stat_init();
1275 * If cpr_suspend() failed before cpr_dump() gets a chance
1276 * to reinitialize the terminator of the statefile,
1277 * the values of the old terminator will still linger around.
1278 * Since the terminator contains information that we need to
1279 * decide whether suspend succeeded or not, we need to
1280 * reinitialize it as early as possible.
1282 cpr_term.real_statef_size = 0;
1283 ctp = &cpr_term.tm_shutdown;
1284 bzero(ctp, sizeof (*ctp));
1285 ctp = &cpr_term.tm_cprboot_start;
1286 bzero(ctp, sizeof (*ctp));
1287 ctp = &cpr_term.tm_cprboot_end;
1288 bzero(ctp, sizeof (*ctp));
1290 if (sleeptype == CPR_TODISK) {
1292 * Lookup the physical address of our thread structure.
1293 * This should never be invalid and the entire thread structure
1294 * is expected to reside within the same pfn.
1296 curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1297 ASSERT(curthreadpfn != PFN_INVALID);
1298 ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1299 (caddr_t)curthread + sizeof (kthread_t) - 1));
1302 cpr_suspend_succeeded = 0;
1306 * bring all the offline cpus online
1308 static int
1309 cpr_all_online(void)
1311 int rc = 0;
1313 #ifdef __sparc
1315 * do nothing
1317 #else
1319 cpu_t *cp;
1321 ASSERT(MUTEX_HELD(&cpu_lock));
1323 cp = cpu_list;
1324 do {
1325 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1326 if (!CPU_ACTIVE(cp)) {
1327 if ((rc = cpu_online(cp)) != 0)
1328 break;
1329 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1331 } while ((cp = cp->cpu_next) != cpu_list);
1333 if (rc) {
1335 * an online operation failed so offline the cpus
1336 * that were onlined above to restore the system
1337 * to its original state
1339 cpr_restore_offline();
1341 #endif
1342 return (rc);
1346 * offline all the cpus that were brought online by cpr_all_online()
1348 static void
1349 cpr_restore_offline(void)
1352 #ifdef __sparc
1354 * do nothing
1356 #else
1358 cpu_t *cp;
1359 int rc = 0;
1361 ASSERT(MUTEX_HELD(&cpu_lock));
1363 cp = cpu_list;
1364 do {
1365 if (CPU_CPR_IS_ONLINE(cp)) {
1366 rc = cpu_offline(cp, 0);
1368 * this offline should work, since the cpu was
1369 * offline originally and was successfully onlined
1370 * by cpr_all_online()
1372 ASSERT(rc == 0);
1373 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1375 } while ((cp = cp->cpu_next) != cpu_list);
1377 #endif