4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
29 * zoneadmd manages zones; one zoneadmd process is launched for each
30 * non-global zone on the system. This daemon juggles four jobs:
32 * - Implement setup and teardown of the zone "virtual platform": mount and
33 * unmount filesystems; create and destroy network interfaces; communicate
34 * with devfsadmd to lay out devices for the zone; instantiate the zone
35 * console device; configure process runtime attributes such as resource
36 * controls, pool bindings, fine-grained privileges.
38 * - Launch the zone's init(1M) process.
40 * - Implement a door server; clients (like zoneadm) connect to the door
41 * server and request zone state changes. The kernel is also a client of
42 * this door server. A request to halt or reboot the zone which originates
43 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
45 * One minor problem is that messages emitted by zoneadmd need to be passed
46 * back to the zoneadm process making the request. These messages need to
47 * be rendered in the client's locale; so, this is passed in as part of the
48 * request. The exception is the kernel upcall to zoneadmd, in which case
49 * messages are syslog'd.
51 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
53 * strings which do not need to be translated.
55 * - Act as a console server for zlogin -C processes; see comments in zcons.c
56 * for more information about the zone console architecture.
61 * A chief design constraint of zoneadmd is that it should be restartable in
62 * the case that the administrator kills it off, or it suffers a fatal error,
63 * without the running zone being impacted; this is akin to being able to
64 * reboot the service processor of a server without affecting the OS instance.
67 #include <sys/param.h>
69 #include <sys/types.h>
71 #include <sys/sysmacros.h>
74 #include <bsm/adt_event.h>
96 #include <sys/brand.h>
97 #include <libcontract.h>
98 #include <libcontract_priv.h>
99 #include <sys/brand.h>
100 #include <sys/contract/process.h>
101 #include <sys/ctfs.h>
102 #include <libdladm.h>
103 #include <sys/dls_mgmt.h>
106 #include <libzonecfg.h>
107 #include <zonestat_impl.h>
108 #include "zoneadmd.h"
110 static char *progname
;
111 char *zone_name
; /* zone which we are managing */
112 char pool_name
[MAXNAMELEN
];
113 char default_brand
[MAXNAMELEN
];
114 char brand_name
[MAXNAMELEN
];
115 boolean_t zone_isnative
;
116 boolean_t zone_iscluster
;
117 boolean_t zone_islabeled
;
118 boolean_t shutdown_in_progress
;
119 static zoneid_t zone_id
;
120 dladm_handle_t dld_handle
= NULL
;
122 static char pre_statechg_hook
[2 * MAXPATHLEN
];
123 static char post_statechg_hook
[2 * MAXPATHLEN
];
124 char query_hook
[2 * MAXPATHLEN
];
128 mutex_t lock
= DEFAULTMUTEX
; /* to serialize stuff */
129 mutex_t msglock
= DEFAULTMUTEX
; /* for calling setlocale() */
131 static sema_t scratch_sem
; /* for scratch zones */
133 static char zone_door_path
[MAXPATHLEN
];
134 static int zone_door
= -1;
136 boolean_t in_death_throes
= B_FALSE
; /* daemon is dying */
137 boolean_t bringup_failure_recovery
= B_FALSE
; /* ignore certain failures */
139 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
140 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
143 #define DEFAULT_LOCALE "C"
146 z_cmd_name(zone_cmd_t zcmd
)
148 /* This list needs to match the enum in sys/zone.h */
149 static const char *zcmdstr
[] = {
150 "ready", "boot", "forceboot", "reboot", "halt",
151 "note_uninstalling", "mount", "forcemount", "unmount",
155 if (zcmd
>= sizeof (zcmdstr
) / sizeof (*zcmdstr
))
158 return (zcmdstr
[(int)zcmd
]);
162 get_execbasename(char *execfullname
)
164 char *last_slash
, *execbasename
;
166 /* guard against '/' at end of command invocation */
168 last_slash
= strrchr(execfullname
, '/');
169 if (last_slash
== NULL
) {
170 execbasename
= execfullname
;
173 execbasename
= last_slash
+ 1;
174 if (*execbasename
== '\0') {
181 return (execbasename
);
187 (void) fprintf(stderr
, gettext("Usage: %s -z zonename\n"), progname
);
188 (void) fprintf(stderr
,
189 gettext("\tNote: %s should not be run directly.\n"), progname
);
200 localize_msg(char *locale
, const char *msg
)
204 (void) mutex_lock(&msglock
);
205 (void) setlocale(LC_MESSAGES
, locale
);
207 (void) setlocale(LC_MESSAGES
, DEFAULT_LOCALE
);
208 (void) mutex_unlock(&msglock
);
214 zerror(zlog_t
*zlogp
, boolean_t use_strerror
, const char *fmt
, ...)
217 char buf
[MAXPATHLEN
* 2]; /* enough space for err msg with a path */
219 int saved_errno
= errno
;
223 if (zlogp
== &logsys
)
224 (void) snprintf(buf
, sizeof (buf
), "[zone '%s'] ",
228 bp
= &(buf
[strlen(buf
)]);
231 * In theory, the locale pointer should be set to either "C" or a
232 * char array, so it should never be NULL
234 assert(zlogp
->locale
!= NULL
);
235 /* Locale is per process, but we are multi-threaded... */
236 fmt
= localize_msg(zlogp
->locale
, fmt
);
238 va_start(alist
, fmt
);
239 (void) vsnprintf(bp
, sizeof (buf
) - (bp
- buf
), fmt
, alist
);
241 bp
= &(buf
[strlen(buf
)]);
243 (void) snprintf(bp
, sizeof (buf
) - (bp
- buf
), ": %s",
244 strerror(saved_errno
));
245 if (zlogp
== &logsys
) {
246 (void) syslog(LOG_ERR
, "%s", buf
);
247 } else if (zlogp
->logfile
!= NULL
) {
248 (void) fprintf(zlogp
->logfile
, "%s\n", buf
);
253 buflen
= snprintf(zlogp
->log
, zlogp
->loglen
, "%s\n", buf
);
254 copylen
= MIN(buflen
, zlogp
->loglen
);
255 zlogp
->log
+= copylen
;
256 zlogp
->loglen
-= copylen
;
261 * Emit a warning for any boot arguments which are unrecognized. Since
262 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
263 * put the arguments into an argv style array, use getopt to process them,
264 * and put the resultant argument string back into outargs.
266 * During the filtering, we pull out any arguments which are truly "boot"
267 * arguments, leaving only those which are to be passed intact to the
268 * progenitor process. The one we support at the moment is -i, which
269 * indicates to the kernel which program should be launched as 'init'.
271 * A return of Z_INVAL indicates specifically that the arguments are
272 * not valid; this is a non-fatal error. Except for Z_OK, all other return
273 * values are treated as fatal.
276 filter_bootargs(zlog_t
*zlogp
, const char *inargs
, char *outargs
,
277 char *init_file
, char *badarg
)
279 int argc
= 0, argc_save
;
282 char *arg
, *lasts
, **argv
= NULL
, **argv_save
;
283 char zonecfg_args
[BOOTARGS_MAX
];
284 char scratchargs
[BOOTARGS_MAX
], *sargs
;
287 bzero(outargs
, BOOTARGS_MAX
);
288 bzero(badarg
, BOOTARGS_MAX
);
291 * If the user didn't specify transient boot arguments, check
292 * to see if there were any specified in the zone configuration,
293 * and use them if applicable.
295 if (inargs
== NULL
|| inargs
[0] == '\0') {
296 zone_dochandle_t handle
;
297 if ((handle
= zonecfg_init_handle()) == NULL
) {
298 zerror(zlogp
, B_TRUE
,
299 "getting zone configuration handle");
300 return (Z_BAD_HANDLE
);
302 err
= zonecfg_get_snapshot_handle(zone_name
, handle
);
304 zerror(zlogp
, B_FALSE
,
305 "invalid configuration snapshot");
306 zonecfg_fini_handle(handle
);
307 return (Z_BAD_HANDLE
);
310 bzero(zonecfg_args
, sizeof (zonecfg_args
));
311 (void) zonecfg_get_bootargs(handle
, zonecfg_args
,
312 sizeof (zonecfg_args
));
313 inargs
= zonecfg_args
;
314 zonecfg_fini_handle(handle
);
317 if (strlen(inargs
) >= BOOTARGS_MAX
) {
318 zerror(zlogp
, B_FALSE
, "boot argument string too long");
322 (void) strlcpy(scratchargs
, inargs
, sizeof (scratchargs
));
324 while ((arg
= strtok_r(sargs
, " \t", &lasts
)) != NULL
) {
329 if ((argv
= calloc(argc
+ 1, sizeof (char *))) == NULL
) {
330 zerror(zlogp
, B_FALSE
, "memory allocation failed");
337 (void) strlcpy(scratchargs
, inargs
, sizeof (scratchargs
));
340 while ((arg
= strtok_r(sargs
, " \t", &lasts
)) != NULL
) {
342 if ((argv
[i
] = strdup(arg
)) == NULL
) {
344 zerror(zlogp
, B_FALSE
, "memory allocation failed");
351 * We preserve compatibility with the Solaris system boot behavior,
354 * # reboot kernel/unix -s -m verbose
356 * In this example, kernel/unix tells the booter what file to
357 * boot. We don't want reboot in a zone to be gratuitously different,
358 * so we silently ignore the boot file, if necessary.
363 assert(argv
[0][0] != ' ');
364 assert(argv
[0][0] != '\t');
366 if (argv
[0][0] != '-' && argv
[0][0] != '\0') {
374 while ((c
= getopt(argc
, argv
, "fi:m:s")) != -1) {
378 * -i is handled by the runtime and is not passed
381 (void) strlcpy(init_file
, optarg
, MAXPATHLEN
);
384 /* This has already been processed by zoneadm */
388 /* These pass through unmolested */
389 (void) snprintf(outargs
, BOOTARGS_MAX
,
390 "%s -%c %s ", outargs
, c
, optarg
? optarg
: "");
394 * We warn about unknown arguments but pass them
395 * along anyway-- if someone wants to develop their
396 * own init replacement, they can pass it whatever
400 (void) snprintf(outargs
, BOOTARGS_MAX
,
401 "%s -%c", outargs
, optopt
);
402 (void) snprintf(badarg
, BOOTARGS_MAX
,
403 "%s -%c", badarg
, optopt
);
409 * For Solaris Zones we warn about and discard non-option arguments.
410 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
411 * to the kernel, we concat up all the other remaining boot args.
412 * and warn on them as a group.
416 while (optind
< argc
) {
417 (void) snprintf(badarg
, BOOTARGS_MAX
, "%s%s%s",
418 badarg
, strlen(badarg
) > 0 ? " " : "",
422 zerror(zlogp
, B_FALSE
, "WARNING: Unused or invalid boot "
423 "arguments `%s'.", badarg
);
427 for (i
= 0; i
< argc_save
; i
++) {
428 if (argv_save
[i
] != NULL
)
437 mkzonedir(zlog_t
*zlogp
)
441 * We must create and lock everyone but root out of ZONES_TMPDIR
442 * since anyone can open any UNIX domain socket, regardless of
443 * its file system permissions. Sigh...
445 if (mkdir(ZONES_TMPDIR
, S_IRWXU
) < 0 && errno
!= EEXIST
) {
446 zerror(zlogp
, B_TRUE
, "could not mkdir '%s'", ZONES_TMPDIR
);
450 if ((stat(ZONES_TMPDIR
, &st
) < 0) || !S_ISDIR(st
.st_mode
)) {
451 zerror(zlogp
, B_TRUE
, "'%s' is not a directory", ZONES_TMPDIR
);
454 (void) chmod(ZONES_TMPDIR
, S_IRWXU
);
459 * Run the brand's pre-state change callback, if it exists.
462 brand_prestatechg(zlog_t
*zlogp
, int state
, int cmd
)
464 char cmdbuf
[2 * MAXPATHLEN
];
467 if (pre_statechg_hook
[0] == '\0')
470 altroot
= zonecfg_get_root();
471 if (snprintf(cmdbuf
, sizeof (cmdbuf
), "%s %d %d %s", pre_statechg_hook
,
472 state
, cmd
, altroot
) > sizeof (cmdbuf
))
475 if (do_subproc(zlogp
, cmdbuf
, NULL
) != 0)
482 * Run the brand's post-state change callback, if it exists.
485 brand_poststatechg(zlog_t
*zlogp
, int state
, int cmd
)
487 char cmdbuf
[2 * MAXPATHLEN
];
490 if (post_statechg_hook
[0] == '\0')
493 altroot
= zonecfg_get_root();
494 if (snprintf(cmdbuf
, sizeof (cmdbuf
), "%s %d %d %s", post_statechg_hook
,
495 state
, cmd
, altroot
) > sizeof (cmdbuf
))
498 if (do_subproc(zlogp
, cmdbuf
, NULL
) != 0)
505 * Notify zonestatd of the new zone. If zonestatd is not running, this
509 notify_zonestatd(zoneid_t zoneid
)
515 fd
= open(ZS_DOOR_PATH
, O_RDONLY
);
519 cmd
[0] = ZSD_CMD_NEW_ZONE
;
521 params
.data_ptr
= (char *)&cmd
;
522 params
.data_size
= sizeof (cmd
);
523 params
.desc_ptr
= NULL
;
527 (void) door_call(fd
, ¶ms
);
532 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
533 * 'true' if this is being invoked as part of the processing for the "mount"
537 zone_ready(zlog_t
*zlogp
, zone_mnt_t mount_cmd
, int zstate
)
541 if (brand_prestatechg(zlogp
, zstate
, Z_READY
) != 0)
544 if ((err
= zonecfg_create_snapshot(zone_name
)) != Z_OK
) {
545 zerror(zlogp
, B_FALSE
, "unable to create snapshot: %s",
546 zonecfg_strerror(err
));
550 if ((zone_id
= vplat_create(zlogp
, mount_cmd
)) == -1) {
551 if ((err
= zonecfg_destroy_snapshot(zone_name
)) != Z_OK
)
552 zerror(zlogp
, B_FALSE
, "destroying snapshot: %s",
553 zonecfg_strerror(err
));
556 if (vplat_bringup(zlogp
, mount_cmd
, zone_id
) != 0) {
557 bringup_failure_recovery
= B_TRUE
;
558 (void) vplat_teardown(NULL
, (mount_cmd
!= Z_MNT_BOOT
), B_FALSE
);
559 if ((err
= zonecfg_destroy_snapshot(zone_name
)) != Z_OK
)
560 zerror(zlogp
, B_FALSE
, "destroying snapshot: %s",
561 zonecfg_strerror(err
));
565 if (brand_poststatechg(zlogp
, zstate
, Z_READY
) != 0)
572 * If something goes wrong, we up the zones's state to the target
573 * state, READY, and then invoke the hook as if we're halting.
575 (void) brand_poststatechg(zlogp
, ZONE_STATE_READY
, Z_HALT
);
585 fd
= open64(CTFS_ROOT
"/process/template", O_RDWR
);
590 * For now, zoneadmd doesn't do anything with the contract.
591 * Deliver no events, don't inherit, and allow it to be orphaned.
593 err
|= ct_tmpl_set_critical(fd
, 0);
594 err
|= ct_tmpl_set_informative(fd
, 0);
595 err
|= ct_pr_tmpl_set_fatal(fd
, CT_PR_EV_HWERR
);
596 err
|= ct_pr_tmpl_set_param(fd
, CT_PR_PGRPONLY
| CT_PR_REGENT
);
597 if (err
|| ct_tmpl_activate(fd
)) {
605 typedef struct fs_callback
{
612 mount_early_fs(void *data
, const char *spec
, const char *dir
,
613 const char *fstype
, const char *opt
)
615 zlog_t
*zlogp
= ((fs_callback_t
*)data
)->zlogp
;
616 zoneid_t zoneid
= ((fs_callback_t
*)data
)->zoneid
;
617 boolean_t mount_cmd
= ((fs_callback_t
*)data
)->mount_cmd
;
618 char rootpath
[MAXPATHLEN
];
625 /* determine the zone rootpath */
627 char zonepath
[MAXPATHLEN
];
628 char luroot
[MAXPATHLEN
];
630 if (zone_get_zonepath(zone_name
,
631 zonepath
, sizeof (zonepath
)) != Z_OK
) {
632 zerror(zlogp
, B_FALSE
, "unable to determine zone path");
636 (void) snprintf(luroot
, sizeof (luroot
), "%s/lu", zonepath
);
637 resolve_lofs(zlogp
, luroot
, sizeof (luroot
));
638 (void) strlcpy(rootpath
, luroot
, sizeof (rootpath
));
640 if (zone_get_rootpath(zone_name
,
641 rootpath
, sizeof (rootpath
)) != Z_OK
) {
642 zerror(zlogp
, B_FALSE
, "unable to determine zone root");
647 if ((rv
= valid_mount_path(zlogp
, rootpath
, spec
, dir
, fstype
)) < 0) {
648 zerror(zlogp
, B_FALSE
, "%s%s is not a valid mount point",
652 /* The mount point path doesn't exist, create it now. */
653 if (make_one_dir(zlogp
, rootpath
, dir
,
654 DEFAULT_DIR_MODE
, DEFAULT_DIR_USER
,
655 DEFAULT_DIR_GROUP
) != 0) {
656 zerror(zlogp
, B_FALSE
, "failed to create mount point");
661 * Now this might seem weird, but we need to invoke
662 * valid_mount_path() again. Why? Because it checks
663 * to make sure that the mount point path is canonical,
664 * which it can only do if the path exists, so now that
665 * we've created the path we have to verify it again.
667 if ((rv
= valid_mount_path(zlogp
, rootpath
, spec
, dir
,
669 zerror(zlogp
, B_FALSE
,
670 "%s%s is not a valid mount point", rootpath
, dir
);
675 if ((tmpl_fd
= init_template()) == -1) {
676 zerror(zlogp
, B_TRUE
, "failed to create contract");
680 if ((child
= fork()) == -1) {
681 (void) ct_tmpl_clear(tmpl_fd
);
682 (void) close(tmpl_fd
);
683 zerror(zlogp
, B_TRUE
, "failed to fork");
686 } else if (child
== 0) { /* child */
687 char opt_buf
[MAX_MNTOPT_STR
];
691 (void) ct_tmpl_clear(tmpl_fd
);
693 * Even though there are no procs running in the zone, we
694 * do this for paranoia's sake.
698 if (zone_enter(zoneid
) == -1) {
703 * The mount() system call is incredibly annoying.
704 * If options are specified, we need to copy them
705 * into a temporary buffer since the mount() system
706 * call will overwrite the options string. It will
707 * also fail if the new option string it wants to
708 * write is bigger than the one we passed in, so
709 * you must pass in a buffer of the maximum possible
710 * option string length. sigh.
712 (void) strlcpy(opt_buf
, opt
, sizeof (opt_buf
));
714 optlen
= MAX_MNTOPT_STR
;
715 mflag
= MS_OPTIONSTR
;
717 if (mount(spec
, dir
, mflag
, fstype
, NULL
, 0, opt
, optlen
) != 0)
723 if (contract_latest(&ct
) == -1)
725 (void) ct_tmpl_clear(tmpl_fd
);
726 (void) close(tmpl_fd
);
727 if (waitpid(child
, &child_status
, 0) != child
) {
728 /* unexpected: we must have been signalled */
729 (void) contract_abandon_id(ct
);
732 (void) contract_abandon_id(ct
);
733 if (WEXITSTATUS(child_status
) != 0) {
734 errno
= WEXITSTATUS(child_status
);
735 zerror(zlogp
, B_TRUE
, "mount of %s failed", dir
);
743 * If retstr is not NULL, the output of the subproc is returned in the str,
744 * otherwise it is output using zerror(). Any memory allocated for retstr
745 * should be freed by the caller.
748 do_subproc(zlog_t
*zlogp
, char *cmdbuf
, char **retstr
)
750 char buf
[1024]; /* arbitrary large amount */
756 if (retstr
!= NULL
) {
757 if ((*retstr
= malloc(1024)) == NULL
) {
758 zerror(zlogp
, B_FALSE
, "out of memory");
767 file
= popen(cmdbuf
, "r");
769 zerror(zlogp
, B_TRUE
, "could not launch: %s", cmdbuf
);
773 while (fgets(inbuf
, 1024, file
) != NULL
) {
774 if (retstr
== NULL
) {
775 if (zlogp
!= &logsys
)
776 zerror(zlogp
, B_FALSE
, "%s", inbuf
);
781 if ((p
= realloc(*retstr
, rd_cnt
+ 1024)) == NULL
) {
782 zerror(zlogp
, B_FALSE
, "out of memory");
788 inbuf
= *retstr
+ rd_cnt
;
791 status
= pclose(file
);
793 if (WIFSIGNALED(status
)) {
794 zerror(zlogp
, B_FALSE
, "%s unexpectedly terminated due to "
795 "signal %d", cmdbuf
, WTERMSIG(status
));
798 assert(WIFEXITED(status
));
799 if (WEXITSTATUS(status
) == ZEXIT_EXEC
) {
800 zerror(zlogp
, B_FALSE
, "failed to exec %s", cmdbuf
);
803 return (WEXITSTATUS(status
));
807 zone_bootup(zlog_t
*zlogp
, const char *bootargs
, int zstate
)
811 char zpath
[MAXPATHLEN
], initpath
[MAXPATHLEN
], init_file
[MAXPATHLEN
];
812 char nbootargs
[BOOTARGS_MAX
];
813 char cmdbuf
[MAXPATHLEN
];
816 zone_iptype_t iptype
;
817 boolean_t links_loaded
= B_FALSE
;
818 dladm_status_t status
;
819 char errmsg
[DLADM_STRSIZE
];
821 boolean_t restart_init
;
823 if (brand_prestatechg(zlogp
, zstate
, Z_BOOT
) != 0)
826 if ((zoneid
= getzoneidbyname(zone_name
)) == -1) {
827 zerror(zlogp
, B_TRUE
, "unable to get zoneid");
833 cb
.mount_cmd
= B_FALSE
;
835 /* Get a handle to the brand info for this zone */
836 if ((bh
= brand_open(brand_name
)) == NULL
) {
837 zerror(zlogp
, B_FALSE
, "unable to determine zone brand");
842 * Get the list of filesystems to mount from the brand
843 * configuration. These mounts are done via a thread that will
844 * enter the zone, so they are done from within the context of the
847 if (brand_platform_iter_mounts(bh
, mount_early_fs
, &cb
) != 0) {
848 zerror(zlogp
, B_FALSE
, "unable to mount filesystems");
854 * Get the brand's boot callback if it exists.
856 if (zone_get_zonepath(zone_name
, zpath
, sizeof (zpath
)) != Z_OK
) {
857 zerror(zlogp
, B_FALSE
, "unable to determine zone path");
861 (void) strcpy(cmdbuf
, EXEC_PREFIX
);
862 if (brand_get_boot(bh
, zone_name
, zpath
, cmdbuf
+ EXEC_LEN
,
863 sizeof (cmdbuf
) - EXEC_LEN
) != 0) {
864 zerror(zlogp
, B_FALSE
,
865 "unable to determine branded zone's boot callback");
870 /* Get the path for this zone's init(1M) (or equivalent) process. */
871 if (brand_get_initname(bh
, init_file
, MAXPATHLEN
) != 0) {
872 zerror(zlogp
, B_FALSE
,
873 "unable to determine zone's init(1M) location");
878 /* See if this zone's brand should restart init if it dies. */
879 restart_init
= brand_restartinit(bh
);
883 err
= filter_bootargs(zlogp
, bootargs
, nbootargs
, init_file
,
886 eventstream_write(Z_EVT_ZONE_BADARGS
);
887 else if (err
!= Z_OK
)
890 assert(init_file
[0] != '\0');
892 /* Try to anticipate possible problems: Make sure init is executable. */
893 if (zone_get_rootpath(zone_name
, zpath
, sizeof (zpath
)) != Z_OK
) {
894 zerror(zlogp
, B_FALSE
, "unable to determine zone root");
898 (void) snprintf(initpath
, sizeof (initpath
), "%s%s", zpath
, init_file
);
900 if (stat(initpath
, &st
) == -1) {
901 zerror(zlogp
, B_TRUE
, "could not stat %s", initpath
);
905 if ((st
.st_mode
& S_IXUSR
) == 0) {
906 zerror(zlogp
, B_FALSE
, "%s is not executable", initpath
);
911 * Exclusive stack zones interact with the dlmgmtd running in the
912 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
913 * booting, and loads its datalinks from the zone's datalink
914 * configuration file.
916 if (vplat_get_iptype(zlogp
, &iptype
) == 0 && iptype
== ZS_EXCLUSIVE
) {
917 status
= dladm_zone_boot(dld_handle
, zoneid
);
918 if (status
!= DLADM_STATUS_OK
) {
919 zerror(zlogp
, B_FALSE
, "unable to load zone datalinks: "
920 " %s", dladm_status2str(status
, errmsg
));
923 links_loaded
= B_TRUE
;
927 * If there is a brand 'boot' callback, execute it now to give the
928 * brand one last chance to do any additional setup before the zone
931 if ((strlen(cmdbuf
) > EXEC_LEN
) &&
932 (do_subproc(zlogp
, cmdbuf
, NULL
) != Z_OK
)) {
933 zerror(zlogp
, B_FALSE
, "%s failed", cmdbuf
);
937 if (zone_setattr(zoneid
, ZONE_ATTR_INITNAME
, init_file
, 0) == -1) {
938 zerror(zlogp
, B_TRUE
, "could not set zone boot file");
942 if (zone_setattr(zoneid
, ZONE_ATTR_BOOTARGS
, nbootargs
, 0) == -1) {
943 zerror(zlogp
, B_TRUE
, "could not set zone boot arguments");
947 if (!restart_init
&& zone_setattr(zoneid
, ZONE_ATTR_INITNORESTART
,
949 zerror(zlogp
, B_TRUE
, "could not set zone init-no-restart");
954 * Inform zonestatd of a new zone so that it can install a door for
955 * the zone to contact it.
957 notify_zonestatd(zone_id
);
959 if (zone_boot(zoneid
) == -1) {
960 zerror(zlogp
, B_TRUE
, "unable to boot zone");
964 if (brand_poststatechg(zlogp
, zstate
, Z_BOOT
) != 0)
971 * If something goes wrong, we up the zones's state to the target
972 * state, RUNNING, and then invoke the hook as if we're halting.
974 (void) brand_poststatechg(zlogp
, ZONE_STATE_RUNNING
, Z_HALT
);
976 (void) dladm_zone_halt(dld_handle
, zoneid
);
981 zone_halt(zlog_t
*zlogp
, boolean_t unmount_cmd
, boolean_t rebooting
, int zstate
)
985 if (brand_prestatechg(zlogp
, zstate
, Z_HALT
) != 0)
988 if (vplat_teardown(zlogp
, unmount_cmd
, rebooting
) != 0) {
989 if (!bringup_failure_recovery
)
990 zerror(zlogp
, B_FALSE
, "unable to destroy zone");
994 if ((err
= zonecfg_destroy_snapshot(zone_name
)) != Z_OK
)
995 zerror(zlogp
, B_FALSE
, "destroying snapshot: %s",
996 zonecfg_strerror(err
));
998 if (brand_poststatechg(zlogp
, zstate
, Z_HALT
) != 0)
1005 zone_graceful_shutdown(zlog_t
*zlogp
)
1009 char cmdbuf
[MAXPATHLEN
];
1010 brand_handle_t bh
= NULL
;
1011 char zpath
[MAXPATHLEN
];
1016 if (shutdown_in_progress
) {
1017 zerror(zlogp
, B_FALSE
, "shutdown already in progress");
1021 if ((zoneid
= getzoneidbyname(zone_name
)) == -1) {
1022 zerror(zlogp
, B_TRUE
, "unable to get zoneid");
1026 /* Get a handle to the brand info for this zone */
1027 if ((bh
= brand_open(brand_name
)) == NULL
) {
1028 zerror(zlogp
, B_FALSE
, "unable to determine zone brand");
1032 if (zone_get_zonepath(zone_name
, zpath
, sizeof (zpath
)) != Z_OK
) {
1033 zerror(zlogp
, B_FALSE
, "unable to determine zone path");
1039 * If there is a brand 'shutdown' callback, execute it now to give the
1040 * brand a chance to cleanup any custom configuration.
1042 (void) strcpy(cmdbuf
, EXEC_PREFIX
);
1043 if (brand_get_shutdown(bh
, zone_name
, zpath
, cmdbuf
+ EXEC_LEN
,
1044 sizeof (cmdbuf
) - EXEC_LEN
) != 0 || strlen(cmdbuf
) <= EXEC_LEN
) {
1045 (void) strcat(cmdbuf
, SHUTDOWN_DEFAULT
);
1049 if ((tmpl_fd
= init_template()) == -1) {
1050 zerror(zlogp
, B_TRUE
, "failed to create contract");
1054 if ((child
= fork()) == -1) {
1055 (void) ct_tmpl_clear(tmpl_fd
);
1056 (void) close(tmpl_fd
);
1057 zerror(zlogp
, B_TRUE
, "failed to fork");
1059 } else if (child
== 0) {
1060 (void) ct_tmpl_clear(tmpl_fd
);
1061 if (zone_enter(zoneid
) == -1) {
1064 _exit(execl("/bin/sh", "sh", "-c", cmdbuf
, (char *)NULL
));
1067 if (contract_latest(&ct
) == -1)
1069 (void) ct_tmpl_clear(tmpl_fd
);
1070 (void) close(tmpl_fd
);
1072 if (waitpid(child
, &child_status
, 0) != child
) {
1073 /* unexpected: we must have been signalled */
1074 (void) contract_abandon_id(ct
);
1078 (void) contract_abandon_id(ct
);
1079 if (WEXITSTATUS(child_status
) != 0) {
1080 errno
= WEXITSTATUS(child_status
);
1081 zerror(zlogp
, B_FALSE
, "unable to shutdown zone");
1085 shutdown_in_progress
= B_TRUE
;
1091 zone_wait_shutdown(zlog_t
*zlogp
)
1093 zone_state_t zstate
;
1094 uint64_t *tm
= NULL
;
1095 scf_simple_prop_t
*prop
= NULL
;
1100 /* Get default stop timeout from SMF framework */
1101 timeout
= SHUTDOWN_WAIT
;
1102 if ((prop
= scf_simple_prop_get(NULL
, SHUTDOWN_FMRI
, "stop",
1103 SCF_PROPERTY_TIMEOUT
)) != NULL
) {
1104 if ((tm
= scf_simple_prop_next_count(prop
)) != NULL
) {
1108 scf_simple_prop_free(prop
);
1111 /* allow time for zone to shutdown cleanly */
1112 for (tries
= 0; tries
< timeout
; tries
++) {
1114 if (zone_get_state(zone_name
, &zstate
) == Z_OK
&&
1115 zstate
== ZONE_STATE_INSTALLED
) {
1122 zerror(zlogp
, B_FALSE
, "unable to shutdown zone");
1124 shutdown_in_progress
= B_FALSE
;
1132 * Generate AUE_zone_state for a command that boots a zone.
1135 audit_put_record(zlog_t
*zlogp
, ucred_t
*uc
, int return_val
,
1138 adt_session_data_t
*ah
;
1139 adt_event_data_t
*event
;
1140 int pass_fail
, fail_reason
;
1142 if (!adt_audit_enabled())
1145 if (return_val
== 0) {
1146 pass_fail
= ADT_SUCCESS
;
1147 fail_reason
= ADT_SUCCESS
;
1149 pass_fail
= ADT_FAILURE
;
1150 fail_reason
= ADT_FAIL_VALUE_PROGRAM
;
1153 if (adt_start_session(&ah
, NULL
, 0)) {
1154 zerror(zlogp
, B_TRUE
, gettext("audit failure."));
1157 if (adt_set_from_ucred(ah
, uc
, ADT_NEW
)) {
1158 zerror(zlogp
, B_TRUE
, gettext("audit failure."));
1159 (void) adt_end_session(ah
);
1163 event
= adt_alloc_event(ah
, ADT_zone_state
);
1164 if (event
== NULL
) {
1165 zerror(zlogp
, B_TRUE
, gettext("audit failure."));
1166 (void) adt_end_session(ah
);
1169 event
->adt_zone_state
.zonename
= zone_name
;
1170 event
->adt_zone_state
.new_state
= new_state
;
1172 if (adt_put_event(event
, pass_fail
, fail_reason
))
1173 zerror(zlogp
, B_TRUE
, gettext("audit failure."));
1175 adt_free_event(event
);
1177 (void) adt_end_session(ah
);
1181 * The main routine for the door server that deals with zone state transitions.
1185 server(void *cookie
, char *args
, size_t alen
, door_desc_t
*dp
,
1189 const priv_set_t
*eset
;
1191 zone_state_t zstate
;
1193 zone_cmd_arg_t
*zargp
;
1195 boolean_t kernelcall
;
1199 zoneid_t zoneid
= -1;
1202 zone_cmd_rval_t
*rvalp
;
1203 size_t rlen
= getpagesize(); /* conservative */
1206 boolean_t wait_shut
= B_FALSE
;
1208 /* LINTED E_BAD_PTR_CAST_ALIGN */
1209 zargp
= (zone_cmd_arg_t
*)args
;
1212 * When we get the door unref message, we've fdetach'd the door, and
1213 * it is time for us to shut down zoneadmd.
1215 if (zargp
== DOOR_UNREF_DATA
) {
1217 * See comment at end of main() for info on the last rites.
1222 if (zargp
== NULL
) {
1223 (void) door_return(NULL
, 0, 0, 0);
1226 rvalp
= alloca(rlen
);
1228 zlog
.logfile
= NULL
;
1229 zlog
.buflen
= zlog
.loglen
= rlen
- sizeof (zone_cmd_rval_t
) + 1;
1230 zlog
.buf
= rvalp
->errbuf
;
1231 zlog
.log
= zlog
.buf
;
1232 /* defer initialization of zlog.locale until after credential check */
1235 if (alen
!= sizeof (zone_cmd_arg_t
)) {
1237 * This really shouldn't be happening.
1239 zerror(&logsys
, B_FALSE
, "argument size (%d bytes) "
1240 "unexpected (expected %d bytes)", alen
,
1241 sizeof (zone_cmd_arg_t
));
1246 if (door_ucred(&uc
) != 0) {
1247 zerror(&logsys
, B_TRUE
, "door_ucred");
1250 eset
= ucred_getprivset(uc
, PRIV_EFFECTIVE
);
1251 if (ucred_getzoneid(uc
) != GLOBAL_ZONEID
||
1252 (eset
!= NULL
? !priv_ismember(eset
, PRIV_SYS_CONFIG
) :
1253 ucred_geteuid(uc
) != 0)) {
1254 zerror(&logsys
, B_FALSE
, "insufficient privileges");
1258 kernelcall
= ucred_getpid(uc
) == 0;
1261 * This is safe because we only use a zlog_t throughout the
1262 * duration of a door call; i.e., by the time the pointer
1263 * might become invalid, the door call would be over.
1265 zlog
.locale
= kernelcall
? DEFAULT_LOCALE
: zargp
->locale
;
1267 (void) mutex_lock(&lock
);
1270 * Once we start to really die off, we don't want more connections.
1272 if (in_death_throes
) {
1273 (void) mutex_unlock(&lock
);
1275 (void) door_return(NULL
, 0, 0, 0);
1280 * Check for validity of command.
1282 if (cmd
!= Z_READY
&& cmd
!= Z_BOOT
&& cmd
!= Z_FORCEBOOT
&&
1283 cmd
!= Z_REBOOT
&& cmd
!= Z_SHUTDOWN
&& cmd
!= Z_HALT
&&
1284 cmd
!= Z_NOTE_UNINSTALLING
&& cmd
!= Z_MOUNT
&&
1285 cmd
!= Z_FORCEMOUNT
&& cmd
!= Z_UNMOUNT
) {
1286 zerror(&logsys
, B_FALSE
, "invalid command %d", (int)cmd
);
1290 if (kernelcall
&& (cmd
!= Z_HALT
&& cmd
!= Z_REBOOT
)) {
1294 zerror(&logsys
, B_FALSE
, "received unexpected kernel upcall %d",
1299 * We ignore the possibility of someone calling zone_create(2)
1300 * explicitly; all requests must come through zoneadmd.
1302 if (zone_get_state(zone_name
, &zstate
) != Z_OK
) {
1304 * Something terribly wrong happened
1306 zerror(&logsys
, B_FALSE
, "unable to determine state of zone");
1312 * Kernel-initiated requests may lose their validity if the
1313 * zone_t the kernel was referring to has gone away.
1315 if ((zoneid
= getzoneidbyname(zone_name
)) == -1 ||
1316 zone_getattr(zoneid
, ZONE_ATTR_UNIQID
, &uniqid
,
1317 sizeof (uniqid
)) == -1 || uniqid
!= zargp
->uniqid
) {
1319 * We're not talking about the same zone. The request
1320 * must have arrived too late. Return error.
1325 zlogp
= &logsys
; /* Log errors to syslog */
1329 * If we are being asked to forcibly mount or boot a zone, we
1330 * pretend that an INCOMPLETE zone is actually INSTALLED.
1332 if (zstate
== ZONE_STATE_INCOMPLETE
&&
1333 (cmd
== Z_FORCEBOOT
|| cmd
== Z_FORCEMOUNT
))
1334 zstate
= ZONE_STATE_INSTALLED
;
1337 case ZONE_STATE_CONFIGURED
:
1338 case ZONE_STATE_INCOMPLETE
:
1340 * Not our area of expertise; we just print a nice message
1343 zerror(zlogp
, B_FALSE
,
1344 "%s operation is invalid for zones in state '%s'",
1345 z_cmd_name(cmd
), zone_state_str(zstate
));
1348 case ZONE_STATE_INSTALLED
:
1351 rval
= zone_ready(zlogp
, Z_MNT_BOOT
, zstate
);
1353 eventstream_write(Z_EVT_ZONE_READIED
);
1357 eventstream_write(Z_EVT_ZONE_BOOTING
);
1358 if ((rval
= zone_ready(zlogp
, Z_MNT_BOOT
, zstate
))
1360 rval
= zone_bootup(zlogp
, zargp
->bootbuf
,
1363 audit_put_record(zlogp
, uc
, rval
, "boot");
1365 bringup_failure_recovery
= B_TRUE
;
1366 (void) zone_halt(zlogp
, B_FALSE
, B_FALSE
,
1368 eventstream_write(Z_EVT_ZONE_BOOTFAILED
);
1373 if (kernelcall
) /* Invalid; can't happen */
1376 * We could have two clients racing to halt this
1377 * zone; the second client loses, but its request
1378 * doesn't fail, since the zone is now in the desired
1381 zerror(zlogp
, B_FALSE
, "zone is already halted");
1385 if (kernelcall
) /* Invalid; can't happen */
1387 zerror(zlogp
, B_FALSE
, "%s operation is invalid "
1388 "for zones in state '%s'", z_cmd_name(cmd
),
1389 zone_state_str(zstate
));
1392 case Z_NOTE_UNINSTALLING
:
1393 if (kernelcall
) /* Invalid; can't happen */
1396 * Tell the console to print out a message about this.
1397 * Once it does, we will be in_death_throes.
1399 eventstream_write(Z_EVT_ZONE_UNINSTALLING
);
1403 if (kernelcall
) /* Invalid; can't happen */
1405 if (!zone_isnative
&& !zone_iscluster
&&
1408 * -U mounts the zone without lofs mounting
1409 * zone file systems back into the scratch
1410 * zone. This is required when mounting
1411 * non-native branded zones.
1413 (void) strlcpy(zargp
->bootbuf
, "-U",
1417 rval
= zone_ready(zlogp
,
1418 strcmp(zargp
->bootbuf
, "-U") == 0 ?
1419 Z_MNT_UPDATE
: Z_MNT_SCRATCH
, zstate
);
1423 eventstream_write(Z_EVT_ZONE_READIED
);
1426 * Get a handle to the default brand info.
1427 * We must always use the default brand file system
1428 * list when mounting the zone.
1430 if ((bh
= brand_open(default_brand
)) == NULL
) {
1436 * Get the list of filesystems to mount from
1437 * the brand configuration. These mounts are done
1438 * via a thread that will enter the zone, so they
1439 * are done from within the context of the zone.
1442 cb
.zoneid
= zone_id
;
1443 cb
.mount_cmd
= B_TRUE
;
1444 rval
= brand_platform_iter_mounts(bh
,
1445 mount_early_fs
, &cb
);
1450 * Ordinarily, /dev/fd would be mounted inside the zone
1451 * by svc:/system/filesystem/usr:default, but since
1452 * we're not booting the zone, we need to do this
1456 rval
= mount_early_fs(&cb
,
1457 "fd", "/dev/fd", "fd", NULL
);
1460 if (kernelcall
) /* Invalid; can't happen */
1462 zerror(zlogp
, B_FALSE
, "zone is already unmounted");
1468 case ZONE_STATE_READY
:
1472 * We could have two clients racing to ready this
1473 * zone; the second client loses, but its request
1474 * doesn't fail, since the zone is now in the desired
1477 zerror(zlogp
, B_FALSE
, "zone is already ready");
1481 (void) strlcpy(boot_args
, zargp
->bootbuf
,
1482 sizeof (boot_args
));
1483 eventstream_write(Z_EVT_ZONE_BOOTING
);
1484 rval
= zone_bootup(zlogp
, zargp
->bootbuf
, zstate
);
1485 audit_put_record(zlogp
, uc
, rval
, "boot");
1487 bringup_failure_recovery
= B_TRUE
;
1488 (void) zone_halt(zlogp
, B_FALSE
, B_TRUE
,
1490 eventstream_write(Z_EVT_ZONE_BOOTFAILED
);
1492 boot_args
[0] = '\0';
1495 if (kernelcall
) /* Invalid; can't happen */
1497 if ((rval
= zone_halt(zlogp
, B_FALSE
, B_FALSE
, zstate
))
1500 eventstream_write(Z_EVT_ZONE_HALTED
);
1504 case Z_NOTE_UNINSTALLING
:
1507 if (kernelcall
) /* Invalid; can't happen */
1509 zerror(zlogp
, B_FALSE
, "%s operation is invalid "
1510 "for zones in state '%s'", z_cmd_name(cmd
),
1511 zone_state_str(zstate
));
1517 case ZONE_STATE_MOUNTED
:
1520 if (kernelcall
) /* Invalid; can't happen */
1522 rval
= zone_halt(zlogp
, B_TRUE
, B_FALSE
, zstate
);
1524 eventstream_write(Z_EVT_ZONE_HALTED
);
1525 (void) sema_post(&scratch_sem
);
1529 if (kernelcall
) /* Invalid; can't happen */
1531 zerror(zlogp
, B_FALSE
, "%s operation is invalid "
1532 "for zones in state '%s'", z_cmd_name(cmd
),
1533 zone_state_str(zstate
));
1539 case ZONE_STATE_RUNNING
:
1540 case ZONE_STATE_SHUTTING_DOWN
:
1541 case ZONE_STATE_DOWN
:
1544 if ((rval
= zone_halt(zlogp
, B_FALSE
, B_TRUE
, zstate
))
1547 if ((rval
= zone_ready(zlogp
, Z_MNT_BOOT
, zstate
)) == 0)
1548 eventstream_write(Z_EVT_ZONE_READIED
);
1550 eventstream_write(Z_EVT_ZONE_HALTED
);
1554 * We could have two clients racing to boot this
1555 * zone; the second client loses, but its request
1556 * doesn't fail, since the zone is now in the desired
1559 zerror(zlogp
, B_FALSE
, "zone is already booted");
1563 if ((rval
= zone_halt(zlogp
, B_FALSE
, B_FALSE
, zstate
))
1566 eventstream_write(Z_EVT_ZONE_HALTED
);
1569 (void) strlcpy(boot_args
, zargp
->bootbuf
,
1570 sizeof (boot_args
));
1571 eventstream_write(Z_EVT_ZONE_REBOOTING
);
1572 if ((rval
= zone_halt(zlogp
, B_FALSE
, B_TRUE
, zstate
))
1574 eventstream_write(Z_EVT_ZONE_BOOTFAILED
);
1575 boot_args
[0] = '\0';
1578 if ((rval
= zone_ready(zlogp
, Z_MNT_BOOT
, zstate
))
1580 eventstream_write(Z_EVT_ZONE_BOOTFAILED
);
1581 boot_args
[0] = '\0';
1584 rval
= zone_bootup(zlogp
, zargp
->bootbuf
, zstate
);
1585 audit_put_record(zlogp
, uc
, rval
, "reboot");
1587 (void) zone_halt(zlogp
, B_FALSE
, B_TRUE
,
1589 eventstream_write(Z_EVT_ZONE_BOOTFAILED
);
1591 boot_args
[0] = '\0';
1594 if ((rval
= zone_graceful_shutdown(zlogp
)) == 0) {
1598 case Z_NOTE_UNINSTALLING
:
1601 zerror(zlogp
, B_FALSE
, "%s operation is invalid "
1602 "for zones in state '%s'", z_cmd_name(cmd
),
1603 zone_state_str(zstate
));
1613 * Because the state of the zone may have changed, we make sure
1614 * to wake the console poller, which is in charge of initiating
1615 * the shutdown procedure as necessary.
1617 eventstream_write(Z_EVT_NULL
);
1620 (void) mutex_unlock(&lock
);
1622 /* Wait for the Z_SHUTDOWN commands to complete */
1624 rval
= zone_wait_shutdown(zlogp
);
1634 (void) door_return((char *)rvalp
, rlen
, NULL
, 0);
1639 setup_door(zlog_t
*zlogp
)
1641 if ((zone_door
= door_create(server
, NULL
,
1642 DOOR_UNREF
| DOOR_REFUSE_DESC
| DOOR_NO_CANCEL
)) < 0) {
1643 zerror(zlogp
, B_TRUE
, "%s failed", "door_create");
1646 (void) fdetach(zone_door_path
);
1648 if (fattach(zone_door
, zone_door_path
) != 0) {
1649 zerror(zlogp
, B_TRUE
, "fattach to %s failed", zone_door_path
);
1650 (void) door_revoke(zone_door
);
1651 (void) fdetach(zone_door_path
);
1659 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1660 * is where zoneadmd itself will check to see that another instance of
1661 * zoneadmd isn't already controlling this zone.
1663 * The idea here is that we want to open the path to which we will
1664 * attach our door, lock it, and then make sure that no-one has beat us
1665 * to fattach(3c)ing onto it.
1667 * fattach(3c) is really a mount, so there are actually two possible
1668 * vnodes we could be dealing with. Our strategy is as follows:
1670 * - If the file we opened is a regular file (common case):
1671 * There is no fattach(3c)ed door, so we have a chance of becoming
1672 * the managing zoneadmd. We attempt to lock the file: if it is
1673 * already locked, that means someone else raced us here, so we
1674 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1675 * that beat us to it.
1677 * - If the file we opened is a namefs file:
1678 * This means there is already an established door fattach(3c)'ed
1679 * to the rendezvous path. We've lost the race, so we give up.
1680 * Note that in this case we also try to grab the file lock, and
1681 * will succeed in acquiring it since the vnode locked by the
1682 * "winning" zoneadmd was a regular one, and the one we locked was
1683 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1684 * we just return to zoneadm(1m) which knows to retry.
1687 make_daemon_exclusive(zlog_t
*zlogp
)
1693 zone_state_t zstate
;
1696 if ((err
= zone_get_state(zone_name
, &zstate
)) != Z_OK
) {
1697 zerror(zlogp
, B_FALSE
, "failed to get zone state: %s",
1698 zonecfg_strerror(err
));
1701 if ((doorfd
= open(zone_door_path
, O_CREAT
|O_RDWR
,
1702 S_IREAD
|S_IWRITE
)) < 0) {
1703 zerror(zlogp
, B_TRUE
, "failed to open %s", zone_door_path
);
1706 if (fstat(doorfd
, &st
) < 0) {
1707 zerror(zlogp
, B_TRUE
, "failed to stat %s", zone_door_path
);
1711 * Lock the file to synchronize with other zoneadmd
1713 flock
.l_type
= F_WRLCK
;
1714 flock
.l_whence
= SEEK_SET
;
1715 flock
.l_start
= (off_t
)0;
1716 flock
.l_len
= (off_t
)0;
1717 if (fcntl(doorfd
, F_SETLK
, &flock
) < 0) {
1719 * Someone else raced us here and grabbed the lock file
1720 * first. A warning here is inappropriate since nothing
1726 if (strcmp(st
.st_fstype
, "namefs") == 0) {
1727 struct door_info info
;
1730 * There is already something fattach()'ed to this file.
1731 * Lets see what the door is up to.
1733 if (door_info(doorfd
, &info
) == 0 && info
.di_target
!= -1) {
1735 * Another zoneadmd process seems to be in
1736 * control of the situation and we don't need to
1737 * be here. A warning here is inappropriate
1738 * since nothing went wrong.
1740 * If the door has been revoked, the zoneadmd
1741 * process currently managing the zone is going
1742 * away. We'll return control to zoneadm(1m)
1743 * which will try again (by which time zoneadmd
1744 * will hopefully have exited).
1750 * If we got this far, there's a fattach(3c)'ed door
1751 * that belongs to a process that has exited, which can
1752 * happen if the previous zoneadmd died unexpectedly.
1754 * Let user know that something is amiss, but that we can
1755 * recover; if the zone is in the installed state, then don't
1756 * message, since having a running zoneadmd isn't really
1757 * expected/needed. We want to keep occurences of this message
1758 * limited to times when zoneadmd is picking back up from a
1759 * zoneadmd that died while the zone was in some non-trivial
1762 if (zstate
> ZONE_STATE_INSTALLED
) {
1763 zerror(zlogp
, B_FALSE
,
1764 "zone '%s': WARNING: zone is in state '%s', but "
1765 "zoneadmd does not appear to be available; "
1766 "restarted zoneadmd to recover.",
1767 zone_name
, zone_state_str(zstate
));
1770 (void) fdetach(zone_door_path
);
1771 (void) close(doorfd
);
1776 (void) close(doorfd
);
1781 * Setup the brand's pre and post state change callbacks, as well as the
1782 * query callback, if any of these exist.
1785 brand_callback_init(brand_handle_t bh
, char *zone_name
)
1787 char zpath
[MAXPATHLEN
];
1789 if (zone_get_zonepath(zone_name
, zpath
, sizeof (zpath
)) != Z_OK
)
1792 (void) strlcpy(pre_statechg_hook
, EXEC_PREFIX
,
1793 sizeof (pre_statechg_hook
));
1795 if (brand_get_prestatechange(bh
, zone_name
, zpath
,
1796 pre_statechg_hook
+ EXEC_LEN
,
1797 sizeof (pre_statechg_hook
) - EXEC_LEN
) != 0)
1800 if (strlen(pre_statechg_hook
) <= EXEC_LEN
)
1801 pre_statechg_hook
[0] = '\0';
1803 (void) strlcpy(post_statechg_hook
, EXEC_PREFIX
,
1804 sizeof (post_statechg_hook
));
1806 if (brand_get_poststatechange(bh
, zone_name
, zpath
,
1807 post_statechg_hook
+ EXEC_LEN
,
1808 sizeof (post_statechg_hook
) - EXEC_LEN
) != 0)
1811 if (strlen(post_statechg_hook
) <= EXEC_LEN
)
1812 post_statechg_hook
[0] = '\0';
1814 (void) strlcpy(query_hook
, EXEC_PREFIX
,
1815 sizeof (query_hook
));
1817 if (brand_get_query(bh
, zone_name
, zpath
, query_hook
+ EXEC_LEN
,
1818 sizeof (query_hook
) - EXEC_LEN
) != 0)
1821 if (strlen(query_hook
) <= EXEC_LEN
)
1822 query_hook
[0] = '\0';
1828 main(int argc
, char *argv
[])
1832 priv_set_t
*privset
;
1833 zone_state_t zstate
;
1834 char parents_locale
[MAXPATHLEN
];
1847 size_t shstatelen
= getpagesize();
1854 progname
= get_execbasename(argv
[0]);
1857 * Make sure stderr is unbuffered
1859 (void) setbuffer(stderr
, NULL
, 0);
1862 * Get out of the way of mounted filesystems, since we will daemonize
1868 * Use the default system umask per PSARC 1998/110 rather than
1869 * anything that may have been set by the caller.
1871 (void) umask(CMASK
);
1874 * Initially we want to use our parent's locale.
1876 (void) setlocale(LC_ALL
, "");
1877 (void) textdomain(TEXT_DOMAIN
);
1878 (void) strlcpy(parents_locale
, setlocale(LC_MESSAGES
, NULL
),
1879 sizeof (parents_locale
));
1882 * This zlog_t is used for writing to stderr
1884 errlog
.logfile
= stderr
;
1885 errlog
.buflen
= errlog
.loglen
= 0;
1886 errlog
.buf
= errlog
.log
= NULL
;
1887 errlog
.locale
= parents_locale
;
1890 * We start off writing to stderr until we're ready to daemonize.
1897 while ((opt
= getopt(argc
, argv
, "R:z:")) != EOF
) {
1900 zonecfg_set_root(optarg
);
1910 if (zone_name
== NULL
)
1914 * Because usage() prints directly to stderr, it has gettext()
1915 * wrapping, which depends on the locale. But since zerror() calls
1916 * localize() which tweaks the locale, it is not safe to call zerror()
1917 * until after the last call to usage(). Fortunately, the last call
1918 * to usage() is just above and the first call to zerror() is just
1919 * below. Don't mess this up.
1921 if (strcmp(zone_name
, GLOBAL_ZONENAME
) == 0) {
1922 zerror(zlogp
, B_FALSE
, "cannot manage the %s zone",
1927 if (zone_get_id(zone_name
, &zid
) != 0) {
1928 zerror(zlogp
, B_FALSE
, "could not manage %s: %s", zone_name
,
1929 zonecfg_strerror(Z_NO_ZONE
));
1933 if ((err
= zone_get_state(zone_name
, &zstate
)) != Z_OK
) {
1934 zerror(zlogp
, B_FALSE
, "failed to get zone state: %s",
1935 zonecfg_strerror(err
));
1938 if (zstate
< ZONE_STATE_INCOMPLETE
) {
1939 zerror(zlogp
, B_FALSE
,
1940 "cannot manage a zone which is in state '%s'",
1941 zone_state_str(zstate
));
1945 if (zonecfg_default_brand(default_brand
,
1946 sizeof (default_brand
)) != Z_OK
) {
1947 zerror(zlogp
, B_FALSE
, "unable to determine default brand");
1951 /* Get a handle to the brand info for this zone */
1952 if (zone_get_brand(zone_name
, brand_name
, sizeof (brand_name
))
1954 zerror(zlogp
, B_FALSE
, "unable to determine zone brand");
1957 zone_isnative
= (strcmp(brand_name
, NATIVE_BRAND_NAME
) == 0);
1958 zone_islabeled
= (strcmp(brand_name
, LABELED_BRAND_NAME
) == 0);
1961 * In the alternate root environment, the only supported
1962 * operations are mount and unmount. In this case, just treat
1963 * the zone as native if it is cluster. Cluster zones can be
1964 * native for the purpose of LU or upgrade, and the cluster
1965 * brand may not exist in the miniroot (such as in net install
1968 if (strcmp(brand_name
, CLUSTER_BRAND_NAME
) == 0) {
1969 zone_iscluster
= B_TRUE
;
1970 if (zonecfg_in_alt_root()) {
1971 (void) strlcpy(brand_name
, default_brand
,
1972 sizeof (brand_name
));
1975 zone_iscluster
= B_FALSE
;
1978 if ((bh
= brand_open(brand_name
)) == NULL
) {
1979 zerror(zlogp
, B_FALSE
, "unable to open zone brand");
1983 /* Get state change brand hooks. */
1984 if (brand_callback_init(bh
, zone_name
) == -1) {
1985 zerror(zlogp
, B_TRUE
,
1986 "failed to initialize brand state change hooks");
1994 * Check that we have all privileges. It would be nice to pare
1995 * this down, but this is at least a first cut.
1997 if ((privset
= priv_allocset()) == NULL
) {
1998 zerror(zlogp
, B_TRUE
, "%s failed", "priv_allocset");
2002 if (getppriv(PRIV_EFFECTIVE
, privset
) != 0) {
2003 zerror(zlogp
, B_TRUE
, "%s failed", "getppriv");
2004 priv_freeset(privset
);
2008 if (priv_isfullset(privset
) == B_FALSE
) {
2009 zerror(zlogp
, B_FALSE
, "You lack sufficient privilege to "
2010 "run this command (all privs required)");
2011 priv_freeset(privset
);
2014 priv_freeset(privset
);
2016 if (mkzonedir(zlogp
) != 0)
2020 * Pre-fork: setup shared state
2022 if ((shstate
= (void *)mmap(NULL
, shstatelen
,
2023 PROT_READ
|PROT_WRITE
, MAP_SHARED
|MAP_ANON
, -1, (off_t
)0)) ==
2025 zerror(zlogp
, B_TRUE
, "%s failed", "mmap");
2028 if (sema_init(&shstate
->sem
, 0, USYNC_PROCESS
, NULL
) != 0) {
2029 zerror(zlogp
, B_TRUE
, "%s failed", "sema_init()");
2030 (void) munmap((char *)shstate
, shstatelen
);
2033 shstate
->log
.logfile
= NULL
;
2034 shstate
->log
.buflen
= shstatelen
- sizeof (*shstate
);
2035 shstate
->log
.loglen
= shstate
->log
.buflen
;
2036 shstate
->log
.buf
= (char *)shstate
+ sizeof (*shstate
);
2037 shstate
->log
.log
= shstate
->log
.buf
;
2038 shstate
->log
.locale
= parents_locale
;
2039 shstate
->status
= -1;
2042 * We need a SIGCHLD handler so the sema_wait() below will wake
2043 * up if the child dies without doing a sema_post().
2045 (void) sigset(SIGCHLD
, sigchld
);
2047 * We must mask SIGCHLD until after we've coped with the fork
2048 * sufficiently to deal with it; otherwise we can race and
2049 * receive the signal before pid has been initialized
2050 * (yes, this really happens).
2052 (void) sigemptyset(&block_cld
);
2053 (void) sigaddset(&block_cld
, SIGCHLD
);
2054 (void) sigprocmask(SIG_BLOCK
, &block_cld
, NULL
);
2057 * The parent only needs stderr after the fork, so close other fd's
2058 * that we inherited from zoneadm so that the parent doesn't have those
2059 * open while waiting. The child will close the rest after the fork.
2063 if ((ctfd
= init_template()) == -1) {
2064 zerror(zlogp
, B_TRUE
, "failed to create contract");
2069 * Do not let another thread localize a message while we are forking.
2071 (void) mutex_lock(&msglock
);
2073 (void) mutex_unlock(&msglock
);
2076 * In all cases (parent, child, and in the event of an error) we
2077 * don't want to cause creation of contracts on subsequent fork()s.
2079 (void) ct_tmpl_clear(ctfd
);
2083 zerror(zlogp
, B_TRUE
, "could not fork");
2086 } else if (pid
> 0) { /* parent */
2087 (void) sigprocmask(SIG_UNBLOCK
, &block_cld
, NULL
);
2089 * This marks a window of vulnerability in which we receive
2090 * the SIGCLD before falling into sema_wait (normally we would
2091 * get woken up from sema_wait with EINTR upon receipt of
2092 * SIGCLD). So we may need to use some other scheme like
2093 * sema_posting in the sigcld handler.
2096 (void) sema_wait(&shstate
->sem
);
2097 (void) sema_destroy(&shstate
->sem
);
2098 if (shstate
->status
!= 0)
2099 (void) waitpid(pid
, NULL
, WNOHANG
);
2101 * It's ok if we die with SIGPIPE. It's not like we could have
2102 * done anything about it.
2104 (void) fprintf(stderr
, "%s", shstate
->log
.buf
);
2105 _exit(shstate
->status
== 0 ? 0 : 1);
2109 * The child charges on.
2111 (void) sigset(SIGCHLD
, SIG_DFL
);
2112 (void) sigprocmask(SIG_UNBLOCK
, &block_cld
, NULL
);
2115 * SIGPIPE can be delivered if we write to a socket for which the
2116 * peer endpoint is gone. That can lead to too-early termination
2117 * of zoneadmd, and that's not good eats.
2119 (void) sigset(SIGPIPE
, SIG_IGN
);
2123 zlogp
= &shstate
->log
;
2126 * We don't need stdout/stderr from now on.
2131 * Initialize the syslog zlog_t. This needs to be done after
2132 * the call to closefrom().
2134 logsys
.buf
= logsys
.log
= NULL
;
2135 logsys
.buflen
= logsys
.loglen
= 0;
2136 logsys
.logfile
= NULL
;
2137 logsys
.locale
= DEFAULT_LOCALE
;
2139 openlog("zoneadmd", LOG_PID
, LOG_DAEMON
);
2142 * The eventstream is used to publish state changes in the zone
2143 * from the door threads to the console I/O poller.
2145 if (eventstream_init() == -1) {
2146 zerror(zlogp
, B_TRUE
, "unable to create eventstream");
2150 (void) snprintf(zone_door_path
, sizeof (zone_door_path
),
2151 "%s" ZONE_DOOR_PATH
, zonecfg_get_root(), zone_name
);
2154 * See if another zoneadmd is running for this zone. If not, then we
2155 * can now modify system state.
2157 if (make_daemon_exclusive(zlogp
) == -1)
2162 * Create/join a new session; we need to be careful of what we do with
2163 * the console from now on so we don't end up being the session leader
2164 * for the terminal we're going to be handing out.
2169 * This thread shouldn't be receiving any signals; in particular,
2170 * SIGCHLD should be received by the thread doing the fork().
2172 (void) sigfillset(&blockset
);
2173 (void) thr_sigsetmask(SIG_BLOCK
, &blockset
, NULL
);
2176 * Setup the console device and get ready to serve the console;
2177 * once this has completed, we're ready to let console clients
2178 * make an attempt to connect (they will block until
2179 * serve_console_sock() below gets called, and any pending
2180 * connection is accept()ed).
2182 if (!zonecfg_in_alt_root() && init_console(zlogp
) < 0)
2186 * Take the lock now, so that when the door server gets going, we
2187 * are guaranteed that it won't take a request until we are sure
2188 * that everything is completely set up. See the child_out: label
2189 * below to see why this matters.
2191 (void) mutex_lock(&lock
);
2193 /* Init semaphore for scratch zones. */
2194 if (sema_init(&scratch_sem
, 0, USYNC_THREAD
, NULL
) == -1) {
2195 zerror(zlogp
, B_TRUE
,
2196 "failed to initialize semaphore for scratch zone");
2200 /* open the dladm handle */
2201 if (dladm_open(&dld_handle
) != DLADM_STATUS_OK
) {
2202 zerror(zlogp
, B_FALSE
, "failed to open dladm handle");
2207 * Note: door setup must occur *after* the console is setup.
2208 * This is so that as zlogin tests the door to see if zoneadmd
2209 * is ready yet, we know that the console will get serviced
2210 * once door_info() indicates that the door is "up".
2212 if (setup_door(zlogp
) == -1)
2216 * Things seem OK so far; tell the parent process that we're done
2217 * with setup tasks. This will cause the parent to exit, signalling
2218 * to zoneadm, zlogin, or whatever forked it that we are ready to
2221 shstate
->status
= 0;
2222 (void) sema_post(&shstate
->sem
);
2223 (void) munmap((char *)shstate
, shstatelen
);
2226 (void) mutex_unlock(&lock
);
2229 * zlogp is now invalid, so reset it to the syslog logger.
2234 * Now that we are free of any parents, switch to the default locale.
2236 (void) setlocale(LC_ALL
, DEFAULT_LOCALE
);
2239 * At this point the setup portion of main() is basically done, so
2240 * we reuse this thread to manage the zone console. When
2241 * serve_console() has returned, we are past the point of no return
2242 * in the life of this zoneadmd.
2244 if (zonecfg_in_alt_root()) {
2246 * This is just awful, but mounted scratch zones don't (and
2247 * can't) have consoles. We just wait for unmount instead.
2249 while (sema_wait(&scratch_sem
) == EINTR
)
2252 serve_console(zlogp
);
2253 assert(in_death_throes
);
2257 * This is the next-to-last part of the exit interlock. Upon calling
2258 * fdetach(), the door will go unreferenced; once any
2259 * outstanding requests (like the door thread doing Z_HALT) are
2260 * done, the door will get an UNREF notification; when it handles
2261 * the UNREF, the door server will cause the exit. It's possible
2262 * that fdetach() can fail because the file is in use, in which
2263 * case we'll retry the operation.
2265 assert(!MUTEX_HELD(&lock
));
2267 if ((fdetach(zone_door_path
) == 0) || (errno
!= EBUSY
))
2277 if (shstate
!= NULL
) {
2278 shstate
->status
= -1;
2279 (void) sema_post(&shstate
->sem
);
2280 (void) munmap((char *)shstate
, shstatelen
);
2284 * This might trigger an unref notification, but if so,
2285 * we are still holding the lock, so our call to exit will
2286 * ultimately win the race and will publish the right exit
2289 if (zone_door
!= -1) {
2290 assert(MUTEX_HELD(&lock
));
2291 (void) door_revoke(zone_door
);
2292 (void) fdetach(zone_door_path
);
2295 if (dld_handle
!= NULL
)
2296 dladm_close(dld_handle
);
2298 return (1); /* return from main() forcibly exits an MT process */