sys/kern/kern_shutdown.c

   1 /*-
   2  * Copyright (c) 1986, 1988, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  * (c) UNIX System Laboratories, Inc.
   5  * All or some portions of this file are derived from material licensed
   6  * to the University of California by American Telephone and Telegraph
   7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   8  * the permission of UNIX System Laboratories, Inc.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  *      @(#)kern_shutdown.c     8.3 (Berkeley) 1/21/94
  35  * $FreeBSD: src/sys/kern/kern_shutdown.c,v 1.72.2.12 2002/02/21 19:15:10 dillon Exp $
  36  */
  37
  38 #include "opt_ddb.h"
  39 #include "opt_ddb_trace.h"
  40 #include "opt_panic.h"
  41 #include "use_gpio.h"
  42
  43 #include <sys/param.h>
  44 #include <sys/systm.h>
  45 #include <sys/eventhandler.h>
  46 #include <sys/buf.h>
  47 #include <sys/disk.h>
  48 #include <sys/diskslice.h>
  49 #include <sys/reboot.h>
  50 #include <sys/proc.h>
  51 #include <sys/priv.h>
  52 #include <sys/fcntl.h>          /* FREAD        */
  53 #include <sys/stat.h>           /* S_IFCHR      */
  54 #include <sys/vnode.h>
  55 #include <sys/kernel.h>
  56 #include <sys/kerneldump.h>
  57 #include <sys/kthread.h>
  58 #include <sys/malloc.h>
  59 #include <sys/mount.h>
  60 #include <sys/queue.h>
  61 #include <sys/sysctl.h>
  62 #include <sys/vkernel.h>
  63 #include <sys/conf.h>
  64 #include <sys/sysproto.h>
  65 #include <sys/device.h>
  66 #include <sys/cons.h>
  67 #include <sys/kbio.h>
  68 #include <sys/shm.h>
  69 #include <sys/kern_syscall.h>
  70 #include <vm/vm_map.h>
  71 #include <vm/pmap.h>
  72
  73 #include <sys/thread2.h>
  74 #include <sys/buf2.h>
  75 #include <sys/mplock2.h>
  76
  77 #include <machine/cpu.h>
  78 #include <machine/clock.h>
  79 #include <machine/md_var.h>
  80 #include <machine/smp.h>                /* smp_active_mask, cpuid */
  81 #include <machine/vmparam.h>
  82 #include <machine/thread.h>
  83
  84 #include <sys/signalvar.h>
  85
  86 #include <sys/wdog.h>
  87 #include <dev/acpica/acpi_pvpanic/panic_notifier.h>
  88 #include <dev/misc/gpio/gpio.h>
  89
  90 #ifndef PANIC_REBOOT_WAIT_TIME
  91 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
  92 #endif
  93
  94 /*
  95  * Note that stdarg.h and the ANSI style va_start macro is used for both
  96  * ANSI and traditional C compilers.  We use the machine version to stay
  97  * within the confines of the kernel header files.
  98  */
  99 #include <machine/stdarg.h>
 100
 101 #ifdef DDB
 102 #include <ddb/ddb.h>
 103 #ifdef DDB_UNATTENDED
 104 int debugger_on_panic = 0;
 105 #else
 106 int debugger_on_panic = 1;
 107 #endif
 108 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
 109         &debugger_on_panic, 0, "Run debugger on kernel panic");
 110
 111 #ifdef DDB_TRACE
 112 int trace_on_panic = 1;
 113 #else
 114 int trace_on_panic = 0;
 115 #endif
 116 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
 117         &trace_on_panic, 0, "Print stack trace on kernel panic");
 118 #endif
 119
 120 static int sync_on_panic = 0;
 121 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
 122         &sync_on_panic, 0, "Do a sync before rebooting from a panic");
 123
 124 SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
 125
 126 /*
 127  * Variable panicstr contains argument to first call to panic; used as flag
 128  * to indicate that the kernel has already called panic.
 129  */
 130 const char *panicstr;
 131
 132 int dumping;                            /* system is dumping */
 133 static struct dumperinfo dumper;        /* selected dumper */
 134
 135 globaldata_t panic_cpu_gd;              /* which cpu took the panic */
 136 struct lwkt_tokref panic_tokens[LWKT_MAXTOKENS];
 137 int panic_tokens_count;
 138
 139 int bootverbose = 0;                    /* note: assignment to force non-bss */
 140 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW,
 141            &bootverbose, 0, "Verbose kernel messages");
 142
 143 int cold = 1;                           /* note: assignment to force non-bss */
 144 int dumplo;                             /* OBSOLETE - savecore compat */
 145 u_int64_t dumplo64;
 146
 147 static void boot (int) __dead2;
 148 static int setdumpdev (cdev_t dev);
 149 static void poweroff_wait (void *, int);
 150 static void print_uptime (void);
 151 static void shutdown_halt (void *junk, int howto);
 152 static void shutdown_panic (void *junk, int howto);
 153 static void shutdown_reset (void *junk, int howto);
 154 static int shutdown_busycount1(struct buf *bp, void *info);
 155 static int shutdown_busycount2(struct buf *bp, void *info);
 156 static void shutdown_cleanup_proc(struct proc *p);
 157
 158 /* register various local shutdown events */
 159 static void
 160 shutdown_conf(void *unused)
 161 {
 162         EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST);
 163         EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100);
 164         EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100);
 165         EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200);
 166 }
 167
 168 SYSINIT(shutdown_conf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, shutdown_conf, NULL);
 169
 170 /* ARGSUSED */
 171
 172 /*
 173  * The system call that results in a reboot
 174  *
 175  * MPALMOSTSAFE
 176  */
 177 int
 178 sys_reboot(struct reboot_args *uap)
 179 {
 180         struct thread *td = curthread;
 181         int error;
 182
 183         if ((error = priv_check(td, PRIV_REBOOT)))
 184                 return (error);
 185
 186         get_mplock();
 187         boot(uap->opt);
 188         rel_mplock();
 189         return (0);
 190 }
 191
 192 /*
 193  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
 194  */
 195 static int shutdown_howto = 0;
 196
 197 void
 198 shutdown_nice(int howto)
 199 {
 200         shutdown_howto = howto;
 201
 202         /* Send a signal to init(8) and have it shutdown the world */
 203         if (initproc != NULL) {
 204                 ksignal(initproc, SIGINT);
 205         } else {
 206                 /* No init(8) running, so simply reboot */
 207                 boot(RB_NOSYNC);
 208         }
 209         return;
 210 }
 211 static int      waittime = -1;
 212 struct pcb      dumppcb;
 213 struct thread   *dumpthread;
 214
 215 static void
 216 print_uptime(void)
 217 {
 218         int f;
 219         struct timespec ts;
 220
 221         getnanouptime(&ts);
 222         kprintf("Uptime: ");
 223         f = 0;
 224         if (ts.tv_sec >= 86400) {
 225                 kprintf("%ldd", ts.tv_sec / 86400);
 226                 ts.tv_sec %= 86400;
 227                 f = 1;
 228         }
 229         if (f || ts.tv_sec >= 3600) {
 230                 kprintf("%ldh", ts.tv_sec / 3600);
 231                 ts.tv_sec %= 3600;
 232                 f = 1;
 233         }
 234         if (f || ts.tv_sec >= 60) {
 235                 kprintf("%ldm", ts.tv_sec / 60);
 236                 ts.tv_sec %= 60;
 237                 f = 1;
 238         }
 239         kprintf("%lds\n", ts.tv_sec);
 240 }
 241
 242 /*
 243  *  Go through the rigmarole of shutting down..
 244  * this used to be in machdep.c but I'll be dammned if I could see
 245  * anything machine dependant in it.
 246  */
 247 static void
 248 boot(int howto)
 249 {
 250         /*
 251          * Get rid of any user scheduler baggage and then give
 252          * us a high priority.
 253          */
 254         if (curthread->td_release)
 255                 curthread->td_release(curthread);
 256         lwkt_setpri_self(TDPRI_MAX);
 257
 258         /* collect extra flags that shutdown_nice might have set */
 259         howto |= shutdown_howto;
 260
 261         /*
 262          * We really want to shutdown on the BSP.  Subsystems such as ACPI
 263          * can't power-down the box otherwise.
 264          */
 265         if (!CPUMASK_ISUP(smp_active_mask)) {
 266                 kprintf("boot() called on cpu#%d\n", mycpu->gd_cpuid);
 267         }
 268         if (panicstr == NULL && mycpu->gd_cpuid != 0) {
 269                 kprintf("Switching to cpu #0 for shutdown\n");
 270                 lwkt_setcpu_self(globaldata_find(0));
 271         }
 272         /*
 273          * Do any callouts that should be done BEFORE syncing the filesystems.
 274          */
 275         EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
 276
 277         /*
 278          * Try to get rid of any remaining FS references.  The calling
 279          * process, proc0, and init may still hold references.  The
 280          * VFS cache subsystem may still hold a root reference to root.
 281          *
 282          * XXX this needs work.  We really need to SIGSTOP all remaining
 283          * processes in order to avoid blowups due to proc0's filesystem
 284          * references going away.  For now just make sure that the init
 285          * process is stopped.
 286          */
 287         if (panicstr == NULL) {
 288                 shutdown_cleanup_proc(curproc);
 289                 shutdown_cleanup_proc(&proc0);
 290                 if (initproc) {
 291                         if (initproc != curproc) {
 292                                 ksignal(initproc, SIGSTOP);
 293                                 tsleep(boot, 0, "shutdn", hz / 20);
 294                         }
 295                         shutdown_cleanup_proc(initproc);
 296                 }
 297                 vfs_cache_setroot(NULL, NULL);
 298         }
 299
 300         /*
 301          * Now sync filesystems
 302          */
 303         if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
 304                 int iter, nbusy, pbusy;
 305                 int zcount;
 306
 307                 waittime = 0;
 308                 zcount = 0;
 309                 kprintf("\nsyncing disks... ");
 310
 311                 sys_sync(NULL);
 312
 313                 /*
 314                  * With soft updates, some buffers that are written will be
 315                  * remarked as dirty until other buffers are written.
 316                  *
 317                  * sys_sync() usually runs asynchronously, to give us a
 318                  * better chance of syncing the rest of the filesystems when
 319                  * one or more of them are stuck.
 320                  */
 321                 for (iter = pbusy = 0; iter < 20 + zcount; iter++) {
 322                         if (iter <= 10)
 323                                 nbusy = scan_all_buffers(shutdown_busycount1,
 324                                                          &iter);
 325                         else
 326                                 nbusy = scan_all_buffers(shutdown_busycount2,
 327                                                          &iter);
 328                         kprintf("%d ", nbusy);
 329                         if (nbusy == 0) {
 330                                 if (++zcount == 3)
 331                                         break;
 332                         } else {
 333                                 zcount = 0;
 334                         }
 335
 336                         /*
 337                          * There could be a lot to sync, only allow iter to
 338                          * proceed while there is progress.
 339                          */
 340                         if (nbusy < pbusy) {
 341                                 if (iter > 10)
 342                                         iter = 10;
 343                                 else
 344                                         iter = 0;
 345                         }
 346                         pbusy = nbusy;
 347
 348                         /*
 349                          * XXX:
 350                          * Process soft update work queue if buffers don't sync
 351                          * after 6 iterations by permitting the syncer to run.
 352                          */
 353                         if (iter > 5)
 354                                 bio_ops_sync(NULL);
 355
 356                         sys_sync(NULL);
 357                         tsleep(boot, 0, "shutdn", hz * iter / 20 + 1);
 358                 }
 359                 kprintf("\n");
 360
 361                 if (zcount < 3) {
 362                         /*
 363                          * Failed to sync all blocks. Indicate this and don't
 364                          * unmount filesystems (thus forcing an fsck on reboot).
 365                          */
 366                         kprintf("giving up on %d buffers\n", nbusy);
 367 #ifdef DDB
 368                         if (debugger_on_panic)
 369                                 Debugger("busy buffer problem");
 370 #endif /* DDB */
 371                         tsleep(boot, 0, "shutdn", hz * 5 + 1);
 372                 } else {
 373                         kprintf("done\n");
 374
 375                         /*
 376                          * Unmount filesystems
 377                          */
 378                         if (panicstr == NULL)
 379                                 vfs_unmountall();
 380                 }
 381                 tsleep(boot, 0, "shutdn", hz / 10 + 1);
 382         }
 383
 384         print_uptime();
 385
 386         /*
 387          * Dump before doing post_sync shutdown ops
 388          */
 389         crit_enter();
 390         if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) {
 391                 dumpsys();
 392         }
 393
 394         /*
 395          * Ok, now do things that assume all filesystem activity has
 396          * been completed.  This will also call the device shutdown
 397          * methods.
 398          */
 399         EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
 400
 401         /* Now that we're going to really halt the system... */
 402         EVENTHANDLER_INVOKE(shutdown_final, howto);
 403
 404         for(;;) ;       /* safety against shutdown_reset not working */
 405         /* NOTREACHED */
 406 }
 407
 408 /*
 409  * Pass 1 - Figure out if there are any busy or dirty buffers still present.
 410  *
 411  *      We ignore TMPFS mounts in this pass.
 412  */
 413 static int
 414 shutdown_busycount1(struct buf *bp, void *info __unused)
 415 {
 416         struct vnode *vp;
 417
 418         if ((vp = bp->b_vp) != NULL && vp->v_tag == VT_TMPFS)
 419                 return (0);
 420         if ((bp->b_flags & B_INVAL) == 0 && BUF_LOCKINUSE(bp))
 421                 return(1);
 422         if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)
 423                 return (1);
 424         return (0);
 425 }
 426
 427 /*
 428  * Pass 2 - only run after pass 1 has completed or has given up
 429  *
 430  *      We ignore TMPFS, NFS, MFS, and SMBFS mounts in this pass.
 431  */
 432 static int
 433 shutdown_busycount2(struct buf *bp, void *info)
 434 {
 435         struct vnode *vp;
 436         int *iterp = info;
 437         const char *mpath;
 438
 439         /*
 440          * Ignore tmpfs and nfs mounts
 441          */
 442         if ((vp = bp->b_vp) != NULL) {
 443                 if (vp->v_tag == VT_TMPFS)
 444                         return (0);
 445                 if (vp->v_tag == VT_NFS)
 446                         return (0);
 447                 if (vp->v_tag == VT_MFS)
 448                         return (0);
 449                 if (vp->v_tag == VT_SMBFS)
 450                         return (0);
 451         }
 452
 453         /*
 454          * Only count buffers stuck on I/O, ignore everything else
 455          */
 456         if (((bp->b_flags & B_INVAL) == 0 && BUF_LOCKINUSE(bp)) ||
 457             ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
 458                 /*
 459                  * Only count buffers undergoing write I/O
 460                  * on the related vnode.
 461                  */
 462                 if (bp->b_vp == NULL ||
 463                     bio_track_active(&bp->b_vp->v_track_write) == 0) {
 464                         return (0);
 465                 }
 466                 if (*iterp > 15) {
 467                         mpath = "?";
 468                         if (bp->b_vp->v_mount)
 469                                 mpath = bp->b_vp->v_mount->mnt_stat.f_mntonname;
 470
 471                         kprintf("%p on %s, flags:%08x, loffset:%jd, "
 472                                 "doffset:%jd\n",
 473                                 bp,
 474                                 mpath,
 475                                 bp->b_flags,
 476                                 (intmax_t)bp->b_loffset,
 477                                 (intmax_t)bp->b_bio2.bio_offset);
 478                 }
 479                 return(1);
 480         }
 481         return(0);
 482 }
 483
 484 /*
 485  * If the shutdown was a clean halt, behave accordingly.
 486  */
 487 static void
 488 shutdown_halt(void *junk, int howto)
 489 {
 490         if (howto & RB_HALT) {
 491                 kprintf("\n");
 492                 kprintf("The operating system has halted.\n");
 493 #ifdef _KERNEL_VIRTUAL
 494                 cpu_halt();
 495 #else
 496                 kprintf("Please press any key to reboot.\n\n");
 497                 switch (cngetc()) {
 498                 case -1:                /* No console, just die */
 499                         cpu_halt();
 500                         /* NOTREACHED */
 501                 default:
 502                         howto &= ~RB_HALT;
 503                         break;
 504                 }
 505 #endif
 506         }
 507 }
 508
 509 /*
 510  * Check to see if the system paniced, pause and then reboot
 511  * according to the specified delay.
 512  */
 513 static void
 514 shutdown_panic(void *junk, int howto)
 515 {
 516         int loop;
 517         int c;
 518
 519         if (howto & RB_DUMP) {
 520                 if (PANIC_REBOOT_WAIT_TIME != 0) {
 521                         if (PANIC_REBOOT_WAIT_TIME != -1) {
 522                                 kprintf("Automatic reboot in %d seconds - "
 523                                        "press a key on the console to abort\n",
 524                                         PANIC_REBOOT_WAIT_TIME);
 525                                 for (loop = PANIC_REBOOT_WAIT_TIME * 10;
 526                                      loop > 0; --loop) {
 527                                         DELAY(1000 * 100); /* 1/10th second */
 528                                         /* Did user type a key? */
 529                                         c = cncheckc();
 530                                         if (c != -1 && c != NOKEY)
 531                                                 break;
 532                                 }
 533                                 if (!loop)
 534                                         return;
 535                         }
 536                 } else { /* zero time specified - reboot NOW */
 537                         return;
 538                 }
 539                 kprintf("--> Press a key on the console to reboot,\n");
 540                 kprintf("--> or switch off the system now.\n");
 541                 cngetc();
 542         }
 543 }
 544
 545 /*
 546  * Everything done, now reset
 547  */
 548 static void
 549 shutdown_reset(void *junk, int howto)
 550 {
 551         kprintf("Rebooting...\n");
 552         DELAY(1000000); /* wait 1 sec for kprintf's to complete and be read */
 553         /* cpu_boot(howto); */ /* doesn't do anything at the moment */
 554         cpu_reset();
 555         /* NOTREACHED */ /* assuming reset worked */
 556 }
 557
 558 /*
 559  * Try to remove FS references in the specified process.  This function
 560  * is used during shutdown
 561  */
 562 static
 563 void
 564 shutdown_cleanup_proc(struct proc *p)
 565 {
 566         struct filedesc *fdp;
 567         struct vmspace *vm;
 568
 569         if (p == NULL)
 570                 return;
 571         if ((fdp = p->p_fd) != NULL) {
 572                 kern_closefrom(0);
 573                 if (fdp->fd_cdir) {
 574                         cache_drop(&fdp->fd_ncdir);
 575                         vrele(fdp->fd_cdir);
 576                         fdp->fd_cdir = NULL;
 577                 }
 578                 if (fdp->fd_rdir) {
 579                         cache_drop(&fdp->fd_nrdir);
 580                         vrele(fdp->fd_rdir);
 581                         fdp->fd_rdir = NULL;
 582                 }
 583                 if (fdp->fd_jdir) {
 584                         cache_drop(&fdp->fd_njdir);
 585                         vrele(fdp->fd_jdir);
 586                         fdp->fd_jdir = NULL;
 587                 }
 588         }
 589         if (p->p_vkernel)
 590                 vkernel_exit(p);
 591         if (p->p_textvp) {
 592                 vrele(p->p_textvp);
 593                 p->p_textvp = NULL;
 594         }
 595         vm = p->p_vmspace;
 596         if (vm != NULL) {
 597                 pmap_remove_pages(vmspace_pmap(vm),
 598                                   VM_MIN_USER_ADDRESS,
 599                                   VM_MAX_USER_ADDRESS);
 600                 vm_map_remove(&vm->vm_map,
 601                               VM_MIN_USER_ADDRESS,
 602                               VM_MAX_USER_ADDRESS);
 603         }
 604 }
 605
 606 /*
 607  * Magic number for savecore
 608  *
 609  * exported (symorder) and used at least by savecore(8)
 610  *
 611  * Mark it as used so that gcc doesn't optimize it away.
 612  */
 613 __attribute__((__used__))
 614         static u_long const dumpmag = 0x8fca0101UL;
 615
 616 __attribute__((__used__))
 617         static int      dumpsize = 0;           /* also for savecore */
 618
 619 static int      dodump = 1;
 620
 621 SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
 622     "Try to perform coredump on kernel panic");
 623
 624 void
 625 mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver,
 626     uint64_t dumplen, uint32_t blksz)
 627 {
 628         bzero(kdh, sizeof(*kdh));
 629         strncpy(kdh->magic, magic, sizeof(kdh->magic));
 630         strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
 631         kdh->version = htod32(KERNELDUMPVERSION);
 632         kdh->architectureversion = htod32(archver);
 633         kdh->dumplength = htod64(dumplen);
 634         kdh->dumptime = htod64(time_second);
 635         kdh->blocksize = htod32(blksz);
 636         strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
 637         strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
 638         if (panicstr != NULL)
 639                 strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
 640         kdh->parity = kerneldump_parity(kdh);
 641 }
 642
 643 static int
 644 setdumpdev(cdev_t dev)
 645 {
 646         int error;
 647         int doopen;
 648
 649         if (dev == NULL) {
 650                 disk_dumpconf(NULL, 0/*off*/);
 651                 dumpdev = NULL;
 652                 return (0);
 653         }
 654
 655         /*
 656          * We have to open the device before we can perform ioctls on it,
 657          * or the slice/label data may not be present.  Device opens are
 658          * usually tracked by specfs, but the dump device can be set in
 659          * early boot and may not be open so this is somewhat of a hack.
 660          */
 661         doopen = (dev->si_sysref.refcnt == 1);
 662         if (doopen) {
 663                 error = dev_dopen(dev, FREAD, S_IFCHR, proc0.p_ucred, NULL);
 664                 if (error)
 665                         return (error);
 666         }
 667         error = disk_dumpconf(dev, 1/*on*/);
 668         if (error == 0)
 669                 dumpdev = dev;
 670
 671         return error;
 672 }
 673
 674 /* ARGSUSED */
 675 static void dump_conf (void *dummy);
 676 static void
 677 dump_conf(void *dummy)
 678 {
 679         char *path;
 680         cdev_t dev;
 681         int _dummy;
 682
 683         path = kmalloc(MNAMELEN, M_TEMP, M_WAITOK);
 684         if (TUNABLE_STR_FETCH("dumpdev", path, MNAMELEN) != 0) {
 685                 /*
 686                  * Make sure all disk devices created so far have also been
 687                  * probed, and also make sure that the newly created device
 688                  * nodes for probed disks are ready, too.
 689                  *
 690                  * XXX - Delay an additional 2 seconds to help drivers which
 691                  *       pickup devices asynchronously and are not caught by
 692                  *       CAM's initial probe.
 693                  */
 694                 sync_devs();
 695                 tsleep(&_dummy, 0, "syncer", hz*2);
 696
 697                 dev = kgetdiskbyname(path);
 698                 if (dev != NULL)
 699                         dumpdev = dev;
 700         }
 701         kfree(path, M_TEMP);
 702         if (setdumpdev(dumpdev) != 0)
 703                 dumpdev = NULL;
 704 }
 705
 706 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL);
 707
 708 static int
 709 sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS)
 710 {
 711         int error;
 712         udev_t ndumpdev;
 713
 714         ndumpdev = dev2udev(dumpdev);
 715         error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req);
 716         if (error == 0 && req->newptr != NULL)
 717                 error = setdumpdev(udev2dev(ndumpdev, 0));
 718         return (error);
 719 }
 720
 721 SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
 722         0, sizeof dumpdev, sysctl_kern_dumpdev, "T,udev_t", "");
 723
 724 static struct panicerinfo *panic_notifier;
 725
 726 int
 727 set_panic_notifier(struct panicerinfo *info)
 728 {
 729         if (info == NULL)
 730                 panic_notifier = NULL;
 731         else if (panic_notifier != NULL)
 732                 return 1;
 733         else
 734                 panic_notifier = info;
 735
 736         return 0;
 737 }
 738
 739 /*
 740  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
 741  * and then reboots.  If we are called twice, then we avoid trying to sync
 742  * the disks as this often leads to recursive panics.
 743  */
 744 void
 745 panic(const char *fmt, ...)
 746 {
 747         int bootopt, newpanic;
 748         globaldata_t gd = mycpu;
 749         thread_t td = gd->gd_curthread;
 750         __va_list ap;
 751         static char buf[256];
 752
 753         /*
 754          * If a panic occurs on multiple cpus before the first is able to
 755          * halt the other cpus, only one cpu is allowed to take the panic.
 756          * Attempt to be verbose about this situation but if the kprintf()
 757          * itself panics don't let us overrun the kernel stack.
 758          *
 759          * Be very nasty about descheduling our thread at the lowest
 760          * level possible in an attempt to freeze the thread without
 761          * inducing further panics.
 762          *
 763          * Bumping gd_trap_nesting_level will also bypass assertions in
 764          * lwkt_switch() and allow us to switch away even if we are a
 765          * FAST interrupt or IPI.
 766          *
 767          * The setting of panic_cpu_gd also determines how kprintf()
 768          * spin-locks itself.  DDB can set panic_cpu_gd as well.
 769          */
 770         for (;;) {
 771                 globaldata_t xgd = panic_cpu_gd;
 772
 773                 /*
 774                  * Someone else got the panic cpu
 775                  */
 776                 if (xgd && xgd != gd) {
 777                         crit_enter();
 778                         ++mycpu->gd_trap_nesting_level;
 779                         if (mycpu->gd_trap_nesting_level < 25) {
 780                                 kprintf("SECONDARY PANIC ON CPU %d THREAD %p\n",
 781                                         mycpu->gd_cpuid, td);
 782                         }
 783                         td->td_release = NULL;  /* be a grinch */
 784                         for (;;) {
 785                                 lwkt_deschedule_self(td);
 786                                 lwkt_switch();
 787                         }
 788                         /* NOT REACHED */
 789                         /* --mycpu->gd_trap_nesting_level */
 790                         /* crit_exit() */
 791                 }
 792
 793                 /*
 794                  * Reentrant panic
 795                  */
 796                 if (xgd && xgd == gd)
 797                         break;
 798
 799                 /*
 800                  * We got it
 801                  */
 802                 if (atomic_cmpset_ptr(&panic_cpu_gd, NULL, gd))
 803                         break;
 804         }
 805         /*
 806          * Try to get the system into a working state.  Save information
 807          * we are about to destroy.
 808          */
 809         kvcreinitspin();
 810         if (panicstr == NULL) {
 811                 bcopy(td->td_toks_array, panic_tokens, sizeof(panic_tokens));
 812                 panic_tokens_count = td->td_toks_stop - &td->td_toks_base;
 813         }
 814         lwkt_relalltokens(td);
 815         td->td_toks_stop = &td->td_toks_base;
 816         if (gd->gd_spinlocks)
 817                 kprintf("panic with %d spinlocks held\n", gd->gd_spinlocks);
 818         gd->gd_spinlocks = 0;
 819
 820         /*
 821          * Setup
 822          */
 823         bootopt = RB_AUTOBOOT | RB_DUMP;
 824         if (sync_on_panic == 0)
 825                 bootopt |= RB_NOSYNC;
 826         newpanic = 0;
 827         if (panicstr) {
 828                 bootopt |= RB_NOSYNC;
 829         } else {
 830                 panicstr = fmt;
 831                 newpanic = 1;
 832         }
 833
 834         /*
 835          * Format the panic string.
 836          */
 837         __va_start(ap, fmt);
 838         kvsnprintf(buf, sizeof(buf), fmt, ap);
 839         if (panicstr == fmt)
 840                 panicstr = buf;
 841         __va_end(ap);
 842         if (panic_notifier != NULL)
 843                 panic_notifier->notifier(panic_notifier->arg);
 844         kprintf("panic: %s\n", buf);
 845         /* two separate prints in case of an unmapped page and trap */
 846         kprintf("cpuid = %d\n", mycpu->gd_cpuid);
 847
 848 #if (NGPIO > 0) && defined(ERROR_LED_ON_PANIC)
 849         led_switch("error", 1);
 850 #endif
 851
 852 #if defined(WDOG_DISABLE_ON_PANIC)
 853         wdog_disable();
 854 #endif
 855
 856         /*
 857          * Make sure kgdb knows who we are, there won't be a stoppcbs[]
 858          * entry since our cpu wasn't stopped.
 859          */
 860         savectx(&dumppcb);
 861         dumpthread = curthread;
 862
 863         /*
 864          * Enter the debugger or fall through & dump.  Entering the
 865          * debugger will stop cpus.  If not entering the debugger stop
 866          * cpus here.
 867          *
 868          * Limit the trace history to leave more panic data on a
 869          * potentially row-limited console.
 870          */
 871
 872 #if defined(DDB)
 873         if (newpanic && trace_on_panic)
 874                 print_backtrace(6);
 875         if (debugger_on_panic)
 876                 Debugger("panic");
 877         else
 878 #endif
 879         if (newpanic)
 880                 stop_cpus(mycpu->gd_other_cpus);
 881         boot(bootopt);
 882 }
 883
 884 /*
 885  * Support for poweroff delay.
 886  */
 887 #ifndef POWEROFF_DELAY
 888 # define POWEROFF_DELAY 5000
 889 #endif
 890 static int poweroff_delay = POWEROFF_DELAY;
 891
 892 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
 893         &poweroff_delay, 0, "");
 894
 895 static void
 896 poweroff_wait(void *junk, int howto)
 897 {
 898         if(!(howto & RB_POWEROFF) || poweroff_delay <= 0)
 899                 return;
 900         DELAY(poweroff_delay * 1000);
 901 }
 902
 903 /*
 904  * Some system processes (e.g. syncer) need to be stopped at appropriate
 905  * points in their main loops prior to a system shutdown, so that they
 906  * won't interfere with the shutdown process (e.g. by holding a disk buf
 907  * to cause sync to fail).  For each of these system processes, register
 908  * shutdown_kproc() as a handler for one of shutdown events.
 909  */
 910 static int kproc_shutdown_wait = 60;
 911 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
 912     &kproc_shutdown_wait, 0, "");
 913
 914 void
 915 shutdown_kproc(void *arg, int howto)
 916 {
 917         struct thread *td;
 918         struct proc *p;
 919         int error;
 920
 921         if (panicstr)
 922                 return;
 923
 924         td = (struct thread *)arg;
 925         if ((p = td->td_proc) != NULL) {
 926             kprintf("Waiting (max %d seconds) for system process `%s' to stop...",
 927                 kproc_shutdown_wait, p->p_comm);
 928         } else {
 929             kprintf("Waiting (max %d seconds) for system thread %s to stop...",
 930                 kproc_shutdown_wait, td->td_comm);
 931         }
 932         error = suspend_kproc(td, kproc_shutdown_wait * hz);
 933
 934         if (error == EWOULDBLOCK)
 935                 kprintf("timed out\n");
 936         else
 937                 kprintf("stopped\n");
 938 }
 939
 940 /* Registration of dumpers */
 941 int
 942 set_dumper(struct dumperinfo *di)
 943 {
 944         if (di == NULL) {
 945                 bzero(&dumper, sizeof(dumper));
 946                 return 0;
 947         }
 948
 949         if (dumper.dumper != NULL)
 950                 return (EBUSY);
 951
 952         dumper = *di;
 953         return 0;
 954 }
 955
 956 void
 957 dumpsys(void)
 958 {
 959 #if defined (_KERNEL_VIRTUAL)
 960         /* vkernels don't support dumps */
 961         kprintf("vkernels don't support dumps\n");
 962         return;
 963 #endif
 964         /*
 965          * If there is a dumper registered and we aren't dumping already, call
 966          * the machine dependent dumpsys (md_dumpsys) to do the hard work.
 967          *
 968          * XXX: while right now the md_dumpsys() of x86 and x86_64 could be
 969          *      factored out completely into here, I rather keep them machine
 970          *      dependent in case we ever add a platform which does not share
 971          *      the same dumpsys() code, such as arm.
 972          */
 973         if (dumper.dumper != NULL && !dumping) {
 974                 dumping++;
 975                 md_dumpsys(&dumper);
 976         }
 977 }
 978
 979 int dump_stop_usertds = 0;
 980
 981 static
 982 void
 983 need_user_resched_remote(void *dummy)
 984 {
 985         need_user_resched();
 986 }
 987
 988 void
 989 dump_reactivate_cpus(void)
 990 {
 991         globaldata_t gd;
 992         int cpu, seq;
 993
 994         dump_stop_usertds = 1;
 995
 996         need_user_resched();
 997
 998         for (cpu = 0; cpu < ncpus; cpu++) {
 999                 gd = globaldata_find(cpu);
1000                 seq = lwkt_send_ipiq(gd, need_user_resched_remote, NULL);
1001                 lwkt_wait_ipiq(gd, seq);
1002         }
1003
1004         restart_cpus(stopped_cpus);
1005 }