kernel/cpr/cpr_main.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*
  27  * This module contains the guts of checkpoint-resume mechanism.
  28  * All code in this module is platform independent.
  29  */
  30
  31 #include <sys/types.h>
  32 #include <sys/errno.h>
  33 #include <sys/callb.h>
  34 #include <sys/processor.h>
  35 #include <sys/machsystm.h>
  36 #include <sys/clock.h>
  37 #include <sys/vfs.h>
  38 #include <sys/kmem.h>
  39 #include <nfs/lm.h>
  40 #include <sys/systm.h>
  41 #include <sys/cpr.h>
  42 #include <sys/bootconf.h>
  43 #include <sys/cyclic.h>
  44 #include <sys/filio.h>
  45 #include <sys/fs/ufs_filio.h>
  46 #include <sys/epm.h>
  47 #include <sys/modctl.h>
  48 #include <sys/reboot.h>
  49 #include <sys/kdi.h>
  50 #include <sys/promif.h>
  51 #include <sys/srn.h>
  52 #include <sys/cpr_impl.h>
  53
  54 #define PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
  55
  56 extern struct cpr_terminator cpr_term;
  57
  58 extern int cpr_alloc_statefile(int);
  59 extern void cpr_start_kernel_threads(void);
  60 extern void cpr_abbreviate_devpath(char *, char *);
  61 extern void cpr_convert_promtime(cpr_time_t *);
  62 extern void cpr_send_notice(void);
  63 extern void cpr_set_bitmap_size(void);
  64 extern void cpr_stat_init();
  65 extern void cpr_statef_close(void);
  66 extern void flush_windows(void);
  67 extern void (*srn_signal)(int, int);
  68 extern void init_cpu_syscall(struct cpu *);
  69 extern void i_cpr_pre_resume_cpus();
  70 extern void i_cpr_post_resume_cpus();
  71 extern int cpr_is_ufs(struct vfs *);
  72
  73 extern int pm_powering_down;
  74 extern kmutex_t srn_clone_lock;
  75 extern int srn_inuse;
  76
  77 static int cpr_suspend(int);
  78 static int cpr_resume(int);
  79 static void cpr_suspend_init(int);
  80 #if defined(__x86)
  81 static int cpr_suspend_cpus(void);
  82 static void cpr_resume_cpus(void);
  83 #endif
  84 static int cpr_all_online(void);
  85 static void cpr_restore_offline(void);
  86
  87 cpr_time_t wholecycle_tv;
  88 int cpr_suspend_succeeded;
  89 pfn_t curthreadpfn;
  90 int curthreadremapped;
  91
  92 extern cpuset_t cpu_ready_set;
  93
  94 extern processorid_t i_cpr_bootcpuid(void);
  95 extern cpu_t *i_cpr_bootcpu(void);
  96 extern void tsc_adjust_delta(hrtime_t tdelta);
  97 extern void tsc_resume(void);
  98 extern int tsc_resume_in_cyclic;
  99
 100 /*
 101  * Set this variable to 1, to have device drivers resume in an
 102  * uniprocessor environment. This is to allow drivers that assume
 103  * that they resume on a UP machine to continue to work. Should be
 104  * deprecated once the broken drivers are fixed
 105  */
 106 int cpr_resume_uniproc = 0;
 107
 108 /*
 109  * save or restore abort_enable;  this prevents a drop
 110  * to kadb or prom during cpr_resume_devices() when
 111  * there is no kbd present;  see abort_sequence_enter()
 112  */
 113 static void
 114 cpr_sae(int stash)
 115 {
 116         static int saved_ae = -1;
 117
 118         if (stash) {
 119                 saved_ae = abort_enable;
 120                 abort_enable = 0;
 121         } else if (saved_ae != -1) {
 122                 abort_enable = saved_ae;
 123                 saved_ae = -1;
 124         }
 125 }
 126
 127
 128 /*
 129  * The main switching point for cpr, this routine starts the ckpt
 130  * and state file saving routines; on resume the control is
 131  * returned back to here and it then calls the resume routine.
 132  */
 133 int
 134 cpr_main(int sleeptype)
 135 {
 136         int rc, rc2;
 137         label_t saveq;
 138         klwp_t *tlwp = ttolwp(curthread);
 139
 140         if (sleeptype == CPR_TODISK) {
 141                 if ((rc = cpr_default_setup(1)) != 0)
 142                         return (rc);
 143                 ASSERT(tlwp);
 144                 saveq = tlwp->lwp_qsav;
 145         }
 146
 147         if (sleeptype == CPR_TORAM) {
 148                 rc = cpr_suspend(sleeptype);
 149                 PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
 150                 if (rc == 0) {
 151                         int i_cpr_power_down(int sleeptype);
 152
 153                         /*
 154                          * From this point on, we should be at a high
 155                          * spl, interrupts disabled, and all but one
 156                          * cpu's paused (effectively UP/single threaded).
 157                          * So this is were we want to put ASSERTS()
 158                          * to let us know otherwise.
 159                          */
 160                         ASSERT(cpus_paused());
 161
 162                         /*
 163                          * Now do the work of actually putting this
 164                          * machine to sleep!
 165                          */
 166                         rc = i_cpr_power_down(sleeptype);
 167                         if (rc == 0) {
 168                                 PMD(PMD_SX, ("back from successful suspend\n"))
 169                         }
 170                         /*
 171                          * We do care about the return value from cpr_resume
 172                          * at this point, as it will tell us if one of the
 173                          * resume functions failed (cpr_resume_devices())
 174                          * However, for this to return and _not_ panic, means
 175                          * that we must be in one of the test functions.  So
 176                          * check for that and return an appropriate message.
 177                          */
 178                         rc2 = cpr_resume(sleeptype);
 179                         if (rc2 != 0) {
 180                                 ASSERT(cpr_test_point > 0);
 181                                 cmn_err(CE_NOTE,
 182                                     "cpr_resume returned non-zero: %d\n", rc2);
 183                                 PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
 184                         }
 185                         ASSERT(!cpus_paused());
 186                 } else {
 187                         PMD(PMD_SX, ("failed suspend, resuming\n"))
 188                         rc = cpr_resume(sleeptype);
 189                 }
 190                 return (rc);
 191         }
 192         /*
 193          * Remember where we are for resume after reboot
 194          */
 195         if (!setjmp(&tlwp->lwp_qsav)) {
 196                 /*
 197                  * try to checkpoint the system, if failed return back
 198                  * to userland, otherwise power off.
 199                  */
 200                 rc = cpr_suspend(sleeptype);
 201                 if (rc || cpr_reusable_mode) {
 202                         /*
 203                          * We don't really want to go down, or
 204                          * something went wrong in suspend, do what we can
 205                          * to put the system back to an operable state then
 206                          * return back to userland.
 207                          */
 208                         PMD(PMD_SX, ("failed suspend, resuming\n"))
 209                         (void) cpr_resume(sleeptype);
 210                         PMD(PMD_SX, ("back from failed suspend resume\n"))
 211                 }
 212         } else {
 213                 /*
 214                  * This is the resumed side of longjmp, restore the previous
 215                  * longjmp pointer if there is one so this will be transparent
 216                  * to the world.
 217                  * This path is only for CPR_TODISK, where we reboot
 218                  */
 219                 ASSERT(sleeptype == CPR_TODISK);
 220                 tlwp->lwp_qsav = saveq;
 221                 CPR->c_flags &= ~C_SUSPENDING;
 222                 CPR->c_flags |= C_RESUMING;
 223
 224                 /*
 225                  * resume the system back to the original state
 226                  */
 227                 rc = cpr_resume(sleeptype);
 228                 PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
 229                     rc))
 230         }
 231
 232         (void) cpr_default_setup(0);
 233
 234         return (rc);
 235 }
 236
 237
 238
 239
 240 /*
 241  * Check if klmmod is loaded and call a lock manager service; if klmmod
 242  * is not loaded, the services aren't needed and a call would trigger a
 243  * modload, which would block since another thread would never run.
 244  */
 245 static void
 246 cpr_lock_mgr(void (*service)(void))
 247 {
 248         if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
 249                 (*service)();
 250 }
 251
 252 int
 253 cpr_suspend_cpus(void)
 254 {
 255         int     ret = 0;
 256         extern void *i_cpr_save_context(void *arg);
 257
 258         mutex_enter(&cpu_lock);
 259
 260         /*
 261          * the machine could not have booted without a bootcpu
 262          */
 263         ASSERT(i_cpr_bootcpu() != NULL);
 264
 265         /*
 266          * bring all the offline cpus online
 267          */
 268         if ((ret = cpr_all_online())) {
 269                 mutex_exit(&cpu_lock);
 270                 return (ret);
 271         }
 272
 273         /*
 274          * Set the affinity to be the boot processor
 275          * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
 276          */
 277         affinity_set(i_cpr_bootcpuid());
 278
 279         ASSERT(CPU->cpu_id == 0);
 280
 281         PMD(PMD_SX, ("curthread running on bootcpu\n"))
 282
 283         /*
 284          * pause all other running CPUs and save the CPU state at the sametime
 285          */
 286         pause_cpus(NULL, i_cpr_save_context);
 287
 288         mutex_exit(&cpu_lock);
 289
 290         return (0);
 291 }
 292
 293 /*
 294  * Take the system down to a checkpointable state and write
 295  * the state file, the following are sequentially executed:
 296  *
 297  *    - Request all user threads to stop themselves
 298  *    - push out and invalidate user pages
 299  *    - bring statefile inode incore to prevent a miss later
 300  *    - request all daemons to stop
 301  *    - check and make sure all threads are stopped
 302  *    - sync the file system
 303  *    - suspend all devices
 304  *    - block intrpts
 305  *    - dump system state and memory to state file
 306  *    - SPARC code will not be called with CPR_TORAM, caller filters
 307  */
 308 static int
 309 cpr_suspend(int sleeptype)
 310 {
 311         int     rc = 0;
 312         int     skt_rc = 0;
 313
 314         PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
 315         cpr_set_substate(C_ST_SUSPEND_BEGIN);
 316
 317         cpr_suspend_init(sleeptype);
 318
 319         cpr_save_time();
 320
 321         cpr_tod_get(&wholecycle_tv);
 322         CPR_STAT_EVENT_START("Suspend Total");
 323
 324         i_cpr_alloc_cpus();
 325
 326
 327         PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
 328         /* Stop PM scans ASAP */
 329         (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
 330
 331         pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
 332             NULL, NULL, PM_DEP_WAIT, NULL, 0);
 333
 334         /*
 335          * Ask Xorg to suspend the frame buffer, and wait for it to happen
 336          */
 337         mutex_enter(&srn_clone_lock);
 338         if (srn_signal) {
 339                 PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
 340                     "SRN_SUSPEND_REQ)\n"))
 341                 srn_inuse = 1;  /* because *(srn_signal) cv_waits */
 342                 (*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
 343                 srn_inuse = 0;
 344         } else {
 345                 PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
 346         }
 347         mutex_exit(&srn_clone_lock);
 348
 349         /*
 350          * Ask the user threads to stop by themselves, but
 351          * if they don't or can't after 3 retries, we give up on CPR.
 352          * The 3 retry is not a random number because 2 is possible if
 353          * a thread has been forked before the parent thread is stopped.
 354          */
 355         CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
 356         CPR_STAT_EVENT_START("  stop users");
 357         cpr_set_substate(C_ST_STOP_USER_THREADS);
 358         PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
 359         if (rc = cpr_stop_user_threads())
 360                 return (rc);
 361         CPR_STAT_EVENT_END("  stop users");
 362         CPR_DEBUG(CPR_DEBUG1, "done\n");
 363
 364         PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
 365         pm_save_direct_levels();
 366
 367         /*
 368          * User threads are stopped.  We will start communicating with the
 369          * user via prom_printf (some debug output may have already happened)
 370          * so let anybody who cares know about this (bug 4096122)
 371          */
 372         (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
 373
 374         PMD(PMD_SX, ("cpr_suspend: send notice\n"))
 375 #ifndef DEBUG
 376         cpr_send_notice();
 377         if (cpr_debug)
 378                 prom_printf("\n");
 379 #endif
 380
 381         PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
 382         (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
 383
 384         /*
 385          * Reattach any drivers which originally exported the
 386          * no-involuntary-power-cycles property.  We need to do this before
 387          * stopping kernel threads because modload is implemented using
 388          * a kernel thread.
 389          */
 390         cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
 391         PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
 392         if (!pm_reattach_noinvol())
 393                 return (ENXIO);
 394
 395
 396
 397         /*
 398          * Hooks needed by lock manager prior to suspending.
 399          * Refer to code for more comments.
 400          */
 401         PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
 402         cpr_lock_mgr(lm_cprsuspend);
 403
 404         /*
 405          * Now suspend all the devices
 406          */
 407         CPR_STAT_EVENT_START("  stop drivers");
 408         CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
 409         cpr_set_substate(C_ST_SUSPEND_DEVICES);
 410         pm_powering_down = 1;
 411         PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
 412         rc = cpr_suspend_devices(ddi_root_node());
 413         pm_powering_down = 0;
 414         if (rc)
 415                 return (rc);
 416         CPR_DEBUG(CPR_DEBUG1, "done\n");
 417         CPR_STAT_EVENT_END("  stop drivers");
 418
 419         /*
 420          * Stop all daemon activities
 421          */
 422         cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
 423         PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
 424         if (skt_rc = cpr_stop_kernel_threads())
 425                 return (skt_rc);
 426
 427         PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
 428         (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
 429
 430         PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
 431         pm_reattach_noinvol_fini();
 432
 433         cpr_sae(1);
 434
 435         PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
 436         (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
 437
 438         if (sleeptype == CPR_TODISK) {
 439                 /*
 440                  * It's safer to do tod_get before we disable all intr.
 441                  */
 442                 CPR_STAT_EVENT_START("  write statefile");
 443         }
 444
 445         /*
 446          * it's time to ignore the outside world, stop the real time
 447          * clock and disable any further intrpt activity.
 448          */
 449         PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
 450         i_cpr_handle_xc(1);     /* turn it on to disable xc assertion */
 451
 452         mutex_enter(&cpu_lock);
 453         PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
 454         cyclic_suspend();
 455         mutex_exit(&cpu_lock);
 456
 457         /*
 458          * Due to the different methods of resuming the system between
 459          * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
 460          * and CPR_TORAM (restart via reset into existing kernel image)
 461          * cpus are not suspended and restored in the SPARC case, since it
 462          * is necessary to restart the cpus and pause them before restoring
 463          * the OBP image
 464          */
 465
 466 #if defined(__x86)
 467
 468         /* pause aux cpus */
 469         PMD(PMD_SX, ("pause aux cpus\n"))
 470
 471         cpr_set_substate(C_ST_MP_PAUSED);
 472
 473         if ((rc = cpr_suspend_cpus()) != 0)
 474                 return (rc);
 475 #endif
 476
 477         PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
 478         i_cpr_stop_intr();
 479         CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
 480
 481         /*
 482          * Since we will now disable the mechanism that causes prom_printfs
 483          * to power up (if needed) the console fb/monitor, we assert that
 484          * it must be up now.
 485          */
 486         ASSERT(pm_cfb_is_up());
 487         PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
 488         prom_suspend_prepost();
 489
 490
 491         /*
 492          * For S3, we're done
 493          */
 494         if (sleeptype == CPR_TORAM) {
 495                 PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
 496                 cpr_set_substate(C_ST_NODUMP);
 497                 return (rc);
 498         }
 499         PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
 500         return (rc);
 501 }
 502
 503 void
 504 cpr_resume_cpus(void)
 505 {
 506         /*
 507          * this is a cut down version of start_other_cpus()
 508          * just do the initialization to wake the other cpus
 509          */
 510
 511 #if defined(__x86)
 512         /*
 513          * Initialize our syscall handlers
 514          */
 515         init_cpu_syscall(CPU);
 516
 517 #endif
 518
 519         i_cpr_pre_resume_cpus();
 520
 521         /*
 522          * Restart the paused cpus
 523          */
 524         mutex_enter(&cpu_lock);
 525         start_cpus();
 526         mutex_exit(&cpu_lock);
 527
 528         i_cpr_post_resume_cpus();
 529
 530         mutex_enter(&cpu_lock);
 531         /*
 532          * clear the affinity set in cpr_suspend_cpus()
 533          */
 534         affinity_clear();
 535
 536         /*
 537          * offline all the cpus that were brought online during suspend
 538          */
 539         cpr_restore_offline();
 540
 541         mutex_exit(&cpu_lock);
 542 }
 543
 544 void
 545 cpr_unpause_cpus(void)
 546 {
 547         /*
 548          * Now restore the system back to what it was before we suspended
 549          */
 550
 551         PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
 552
 553         mutex_enter(&cpu_lock);
 554         /*
 555          * Restart the paused cpus
 556          */
 557         start_cpus();
 558
 559         /*
 560          * clear the affinity set in cpr_suspend_cpus()
 561          */
 562         affinity_clear();
 563
 564         /*
 565          * offline all the cpus that were brought online during suspend
 566          */
 567         cpr_restore_offline();
 568
 569         mutex_exit(&cpu_lock);
 570 }
 571
 572 /*
 573  * Bring the system back up from a checkpoint, at this point
 574  * the VM has been minimally restored by boot, the following
 575  * are executed sequentially:
 576  *
 577  *    - machdep setup and enable interrupts (mp startup if it's mp)
 578  *    - resume all devices
 579  *    - restart daemons
 580  *    - put all threads back on run queue
 581  */
 582 static int
 583 cpr_resume(int sleeptype)
 584 {
 585         cpr_time_t pwron_tv, *ctp;
 586         char *str;
 587         int rc = 0;
 588
 589         /*
 590          * The following switch is used to resume the system
 591          * that was suspended to a different level.
 592          */
 593         CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
 594         PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
 595
 596         /*
 597          * Note:
 598          *
 599          * The rollback labels rb_xyz do not represent the cpr resume
 600          * state when event 'xyz' has happened. Instead they represent
 601          * the state during cpr suspend when event 'xyz' was being
 602          * entered (and where cpr suspend failed). The actual call that
 603          * failed may also need to be partially rolled back, since they
 604          * aren't atomic in most cases.  In other words, rb_xyz means
 605          * "roll back all cpr suspend events that happened before 'xyz',
 606          * and the one that caused the failure, if necessary."
 607          */
 608         switch (CPR->c_substate) {
 609
 610         case C_ST_NODUMP:
 611                 PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
 612                 goto rb_nodump;
 613
 614         case C_ST_STOP_KERNEL_THREADS:
 615                 PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
 616                 goto rb_stop_kernel_threads;
 617
 618         case C_ST_SUSPEND_DEVICES:
 619                 PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
 620                 goto rb_suspend_devices;
 621
 622
 623         case C_ST_PM_REATTACH_NOINVOL:
 624                 PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
 625                 goto rb_pm_reattach_noinvol;
 626
 627         case C_ST_STOP_USER_THREADS:
 628                 PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
 629                 goto rb_stop_user_threads;
 630
 631
 632 #if defined(__x86)
 633         case C_ST_MP_PAUSED:
 634                 PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
 635                 goto rb_mp_paused;
 636 #endif
 637
 638
 639         default:
 640                 PMD(PMD_SX, ("cpr_resume: others\n"))
 641                 goto rb_others;
 642         }
 643
 644 rb_all:
 645         /*
 646          * perform platform-dependent initialization
 647          */
 648         if (cpr_suspend_succeeded)
 649                 i_cpr_machdep_setup();
 650
 651         /*
 652          * system did not really go down if we jump here
 653          */
 654 rb_dump:
 655         /*
 656          * IMPORTANT:  SENSITIVE RESUME SEQUENCE
 657          *
 658          * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
 659          */
 660 rb_nodump:
 661         /*
 662          * If we did suspend to RAM, we didn't generate a dump
 663          */
 664         PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
 665         (void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
 666         if (cpr_suspend_succeeded) {
 667                 PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
 668                 (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
 669         }
 670
 671         prom_resume_prepost();
 672         /*
 673          * Need to sync the software clock with the hardware clock.
 674          * On Sparc, this occurs in the sparc-specific cbe.  However
 675          * on x86 this needs to be handled _before_ we bring other cpu's
 676          * back online.  So we call a resume function in timestamp.c
 677          */
 678         if (tsc_resume_in_cyclic == 0)
 679                 tsc_resume();
 680
 681
 682
 683
 684 #if defined(__x86)
 685 rb_mp_paused:
 686         PT(PT_RMPO);
 687         PMD(PMD_SX, ("resume aux cpus\n"))
 688
 689         if (cpr_suspend_succeeded) {
 690                 cpr_resume_cpus();
 691         } else {
 692                 cpr_unpause_cpus();
 693         }
 694 #endif
 695
 696         /*
 697          * let the tmp callout catch up.
 698          */
 699         PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
 700         (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
 701
 702         i_cpr_enable_intr();
 703
 704         mutex_enter(&cpu_lock);
 705         PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
 706         cyclic_resume();
 707         mutex_exit(&cpu_lock);
 708
 709         PMD(PMD_SX, ("cpr_resume: handle xc\n"))
 710         i_cpr_handle_xc(0);     /* turn it off to allow xc assertion */
 711
 712         PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
 713         (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
 714
 715         /*
 716          * statistics gathering
 717          */
 718         if (cpr_suspend_succeeded) {
 719                 /*
 720                  * Prevent false alarm in tod_validate() due to tod
 721                  * value change between suspend and resume
 722                  */
 723                 cpr_tod_status_set(TOD_CPR_RESUME_DONE);
 724
 725                 cpr_convert_promtime(&pwron_tv);
 726
 727                 ctp = &cpr_term.tm_shutdown;
 728                 if (sleeptype == CPR_TODISK)
 729                         CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
 730                 CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
 731
 732                 CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
 733
 734                 str = "  prom time";
 735                 CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
 736                 ctp = &cpr_term.tm_cprboot_start;
 737                 CPR_STAT_EVENT_END_TMZ(str, ctp);
 738
 739                 str = "  read statefile";
 740                 CPR_STAT_EVENT_START_TMZ(str, ctp);
 741                 ctp = &cpr_term.tm_cprboot_end;
 742                 CPR_STAT_EVENT_END_TMZ(str, ctp);
 743         }
 744
 745 rb_stop_kernel_threads:
 746         /*
 747          * Put all threads back to where they belong; get the kernel
 748          * daemons straightened up too. Note that the callback table
 749          * locked during cpr_stop_kernel_threads() is released only
 750          * in cpr_start_kernel_threads(). Ensure modunloading is
 751          * disabled before starting kernel threads, we don't want
 752          * modunload thread to start changing device tree underneath.
 753          */
 754         PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
 755         modunload_disable();
 756         PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
 757         cpr_start_kernel_threads();
 758
 759 rb_suspend_devices:
 760         CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
 761         CPR_STAT_EVENT_START("  start drivers");
 762
 763         PMD(PMD_SX,
 764             ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
 765             cpr_resume_uniproc))
 766
 767 #if defined(__x86)
 768         /*
 769          * If cpr_resume_uniproc is set, then pause all the other cpus
 770          * apart from the current cpu, so that broken drivers that think
 771          * that they are on a uniprocessor machine will resume
 772          */
 773         if (cpr_resume_uniproc) {
 774                 mutex_enter(&cpu_lock);
 775                 pause_cpus(NULL, NULL);
 776                 mutex_exit(&cpu_lock);
 777         }
 778 #endif
 779
 780         /*
 781          * The policy here is to continue resume everything we can if we did
 782          * not successfully finish suspend; and panic if we are coming back
 783          * from a fully suspended system.
 784          */
 785         PMD(PMD_SX, ("cpr_resume: resume devices\n"))
 786         rc = cpr_resume_devices(ddi_root_node(), 0);
 787
 788         cpr_sae(0);
 789
 790         str = "Failed to resume one or more devices.";
 791
 792         if (rc) {
 793                 if (CPR->c_substate == C_ST_DUMP ||
 794                     (sleeptype == CPR_TORAM &&
 795                     CPR->c_substate == C_ST_NODUMP)) {
 796                         if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
 797                                 PMD(PMD_SX, ("cpr_resume: resume device "
 798                                     "warn\n"))
 799                                 cpr_err(CE_WARN, str);
 800                         } else {
 801                                 PMD(PMD_SX, ("cpr_resume: resume device "
 802                                     "panic\n"))
 803                                 cpr_err(CE_PANIC, str);
 804                         }
 805                 } else {
 806                         PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
 807                         cpr_err(CE_WARN, str);
 808                 }
 809         }
 810
 811         CPR_STAT_EVENT_END("  start drivers");
 812         CPR_DEBUG(CPR_DEBUG1, "done\n");
 813
 814 #if defined(__x86)
 815         /*
 816          * If cpr_resume_uniproc is set, then unpause all the processors
 817          * that were paused before resuming the drivers
 818          */
 819         if (cpr_resume_uniproc) {
 820                 mutex_enter(&cpu_lock);
 821                 start_cpus();
 822                 mutex_exit(&cpu_lock);
 823         }
 824 #endif
 825
 826         /*
 827          * If we had disabled modunloading in this cpr resume cycle (i.e. we
 828          * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
 829          * modunloading now.
 830          */
 831         if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
 832                 PMD(PMD_SX, ("cpr_resume: modload enable\n"))
 833                 modunload_enable();
 834         }
 835
 836         /*
 837          * Hooks needed by lock manager prior to resuming.
 838          * Refer to code for more comments.
 839          */
 840         PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
 841         cpr_lock_mgr(lm_cprresume);
 842
 843
 844 rb_pm_reattach_noinvol:
 845         /*
 846          * When pm_reattach_noinvol() succeeds, modunload_thread will
 847          * remain disabled until after cpr suspend passes the
 848          * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
 849          * cpr suspend reaches this state, we'll need to enable modunload
 850          * thread during rollback.
 851          */
 852         if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
 853             CPR->c_substate == C_ST_STATEF_ALLOC ||
 854             CPR->c_substate == C_ST_SUSPEND_DEVICES ||
 855             CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
 856                 PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
 857                 pm_reattach_noinvol_fini();
 858         }
 859
 860         PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
 861         (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
 862         PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
 863         (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
 864
 865         PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
 866         pm_restore_direct_levels();
 867
 868 rb_stop_user_threads:
 869         CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
 870         PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
 871         cpr_start_user_threads();
 872         CPR_DEBUG(CPR_DEBUG1, "done\n");
 873         /*
 874          * Ask Xorg to resume the frame buffer, and wait for it to happen
 875          */
 876         mutex_enter(&srn_clone_lock);
 877         if (srn_signal) {
 878                 PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
 879                     "SRN_NORMAL_RESUME)\n"))
 880                 srn_inuse = 1;          /* because (*srn_signal) cv_waits */
 881                 (*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
 882                 srn_inuse = 0;
 883         } else {
 884                 PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
 885         }
 886         mutex_exit(&srn_clone_lock);
 887
 888
 889 rb_others:
 890         PMD(PMD_SX, ("cpr_resume: dep thread\n"))
 891         pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
 892             PM_DEP_WAIT, NULL, 0);
 893
 894         PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
 895         (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
 896
 897         if (cpr_suspend_succeeded) {
 898                 cpr_stat_record_events();
 899         }
 900
 901
 902         i_cpr_free_cpus();
 903         CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
 904         PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
 905         cpr_signal_user(SIGTHAW);
 906         CPR_DEBUG(CPR_DEBUG1, "done\n");
 907
 908         CPR_STAT_EVENT_END("Resume Total");
 909
 910         CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
 911         CPR_STAT_EVENT_END("WHOLE CYCLE");
 912
 913         if (cpr_debug & CPR_DEBUG1)
 914                 cmn_err(CE_CONT, "\nThe system is back where you left!\n");
 915
 916         CPR_STAT_EVENT_START("POST CPR DELAY");
 917
 918 #ifdef CPR_STAT
 919         ctp = &cpr_term.tm_shutdown;
 920         CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
 921         CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
 922
 923         CPR_STAT_EVENT_PRINT();
 924 #endif /* CPR_STAT */
 925
 926         PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
 927         return (rc);
 928 }
 929
 930 static void
 931 cpr_suspend_init(int sleeptype)
 932 {
 933         cpr_time_t *ctp;
 934
 935         cpr_stat_init();
 936
 937         /*
 938          * If cpr_suspend() failed before cpr_dump() gets a chance
 939          * to reinitialize the terminator of the statefile,
 940          * the values of the old terminator will still linger around.
 941          * Since the terminator contains information that we need to
 942          * decide whether suspend succeeded or not, we need to
 943          * reinitialize it as early as possible.
 944          */
 945         cpr_term.real_statef_size = 0;
 946         ctp = &cpr_term.tm_shutdown;
 947         bzero(ctp, sizeof (*ctp));
 948         ctp = &cpr_term.tm_cprboot_start;
 949         bzero(ctp, sizeof (*ctp));
 950         ctp = &cpr_term.tm_cprboot_end;
 951         bzero(ctp, sizeof (*ctp));
 952
 953         if (sleeptype == CPR_TODISK) {
 954                 /*
 955                  * Lookup the physical address of our thread structure.
 956                  * This should never be invalid and the entire thread structure
 957                  * is expected to reside within the same pfn.
 958                  */
 959                 curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
 960                 ASSERT(curthreadpfn != PFN_INVALID);
 961                 ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
 962                     (caddr_t)curthread + sizeof (kthread_t) - 1));
 963         }
 964
 965         cpr_suspend_succeeded = 0;
 966 }
 967
 968 /*
 969  * bring all the offline cpus online
 970  */
 971 static int
 972 cpr_all_online(void)
 973 {
 974         int     rc = 0;
 975
 976
 977         cpu_t   *cp;
 978
 979         ASSERT(MUTEX_HELD(&cpu_lock));
 980
 981         cp = cpu_list;
 982         do {
 983                 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
 984                 if (!CPU_ACTIVE(cp)) {
 985                         if ((rc = cpu_online(cp)) != 0)
 986                                 break;
 987                         CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
 988                 }
 989         } while ((cp = cp->cpu_next) != cpu_list);
 990
 991         if (rc) {
 992                 /*
 993                  * an online operation failed so offline the cpus
 994                  * that were onlined above to restore the system
 995                  * to its original state
 996                  */
 997                 cpr_restore_offline();
 998         }
 999         return (rc);
1000 }
1001
1002 /*
1003  * offline all the cpus that were brought online by cpr_all_online()
1004  */
1005 static void
1006 cpr_restore_offline(void)
1007 {
1008
1009
1010         cpu_t   *cp;
1011         int     rc = 0;
1012
1013         ASSERT(MUTEX_HELD(&cpu_lock));
1014
1015         cp = cpu_list;
1016         do {
1017                 if (CPU_CPR_IS_ONLINE(cp)) {
1018                         rc =  cpu_offline(cp, 0);
1019                         /*
1020                          * this offline should work, since the cpu was
1021                          * offline originally and was successfully onlined
1022                          * by cpr_all_online()
1023                          */
1024                         ASSERT(rc == 0);
1025                         cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1026                 }
1027         } while ((cp = cp->cpu_next) != cpu_list);
1028
1029
1030 }