4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
31 * This module contains functions used to bring up and tear down the
32 * Virtual Platform: [un]mounting file-systems, [un]plumbing network
33 * interfaces, [un]configuring devices, establishing resource controls,
34 * and creating/destroying the zone in the kernel. These actions, on
35 * the way up, ready the zone; on the way down, they halt the zone.
36 * See the much longer block comment at the beginning of zoneadmd.c
37 * for a bigger picture of how the whole program functions.
39 * This module also has primary responsibility for the layout of "scratch
40 * zones." These are mounted, but inactive, zones that are used during
41 * operating system upgrade and potentially other administrative action. The
42 * scratch zone environment is similar to the miniroot environment. The zone's
43 * actual root is mounted read-write on /a, and the standard paths (/usr,
44 * /sbin, /lib) all lead to read-only copies of the running system's binaries.
45 * This allows the administrative tools to manipulate the zone using "-R /a"
46 * without relying on any binaries in the zone itself.
48 * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
49 * environment), then we must resolve the lofs mounts used there to uncover
50 * writable (unshared) resources. Shared resources, though, are always
51 * read-only. In addition, if the "same" zone with a different root path is
52 * currently running, then "/b" inside the zone points to the running zone's
53 * root. This allows LU to synchronize configuration files during the upgrade
56 * To construct this environment, this module creates a tmpfs mount on
57 * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as
58 * described above is constructed on the fly. The zone is then created using
59 * $ZONEPATH/lu as the root.
61 * Note that scratch zones are inactive. The zone's bits are not running and
62 * likely cannot be run correctly until upgrade is done. Init is not running
63 * there, nor is SMF. Because of this, the "mounted" state of a scratch zone
64 * is not a part of the usual halt/ready/boot state machine.
67 #include <sys/param.h>
68 #include <sys/mount.h>
69 #include <sys/mntent.h>
70 #include <sys/socket.h>
71 #include <sys/utsname.h>
72 #include <sys/types.h>
74 #include <sys/sockio.h>
75 #include <sys/stropts.h>
77 #include <sys/systeminfo.h>
80 #include <libdllink.h>
81 #include <libdlvlan.h>
84 #include <arpa/inet.h>
85 #include <netinet/in.h>
86 #include <net/route.h>
100 #include <libdevinfo.h>
103 #include <libcontract.h>
104 #include <libcontract_priv.h>
105 #include <uuid/uuid.h>
107 #include <sys/mntio.h>
108 #include <sys/mnttab.h>
109 #include <sys/fs/autofs.h> /* for _autofssys() */
110 #include <sys/fs/lofs_info.h>
111 #include <sys/fs/zfs.h>
114 #include <sys/pool.h>
115 #include <sys/priocntl.h>
117 #include <libbrand.h>
118 #include <sys/brand.h>
119 #include <libzonecfg.h>
122 #include "zoneadmd.h"
123 #include <tsol/label.h>
124 #include <libtsnet.h>
125 #include <sys/priv.h>
126 #include <libinetutil.h>
128 #define V4_ADDR_LEN 32
129 #define V6_ADDR_LEN 128
131 #define RESOURCE_DEFAULT_OPTS \
132 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
134 #define DFSTYPES "/etc/dfs/fstypes"
135 #define MAXTNZLEN 2048
137 #define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
139 /* a reasonable estimate for the number of lwps per process */
140 #define LWPS_PER_PROCESS 10
142 /* for routing socket */
143 static int rts_seqno
= 0;
145 /* mangled zone name when mounting in an alternate root environment */
146 static char kernzone
[ZONENAME_MAX
];
148 /* array of cached mount entries for resolve_lofs */
149 static struct mnttab
*resolve_lofs_mnts
, *resolve_lofs_mnt_max
;
151 /* for Trusted Extensions */
152 static tsol_zcent_t
*get_zone_label(zlog_t
*, priv_set_t
*);
153 static int tsol_mounts(zlog_t
*, char *, char *);
154 static void tsol_unmounts(zlog_t
*, char *);
156 static m_label_t
*zlabel
= NULL
;
157 static m_label_t
*zid_label
= NULL
;
158 static priv_set_t
*zprivs
= NULL
;
160 /* from libsocket, not in any header file */
161 extern int getnetmaskbyaddr(struct in_addr
, struct in_addr
*);
164 extern char query_hook
[];
167 * For each "net" resource configured in zonecfg, we track a zone_addr_list_t
168 * node in a linked list that is sorted by linkid. The list is constructed as
169 * the xml configuration file is parsed, and the information
170 * contained in each node is added to the kernel before the zone is
171 * booted, to be retrieved and applied from within the exclusive-IP NGZ
174 typedef struct zone_addr_list
{
175 struct zone_addr_list
*za_next
;
176 datalink_id_t za_linkid
; /* datalink_id_t of interface */
177 struct zone_nwiftab za_nwiftab
; /* address, defrouter properties */
181 * An optimization for build_mnttable: reallocate (and potentially copy the
182 * data) only once every N times through the loop.
184 #define MNTTAB_HUNK 32
186 /* some handy macros */
187 #define SIN(s) ((struct sockaddr_in *)s)
188 #define SIN6(s) ((struct sockaddr_in6 *)s)
191 * Private autofs system call
193 extern int _autofssys(int, void *);
196 autofs_cleanup(zoneid_t zoneid
)
199 * Ask autofs to unmount all trigger nodes in the given zone.
201 return (_autofssys(AUTOFS_UNMOUNTALL
, (void *)zoneid
));
205 free_mnttable(struct mnttab
*mnt_array
, uint_t nelem
)
209 if (mnt_array
== NULL
)
211 for (i
= 0; i
< nelem
; i
++) {
212 free(mnt_array
[i
].mnt_mountp
);
213 free(mnt_array
[i
].mnt_fstype
);
214 free(mnt_array
[i
].mnt_special
);
215 free(mnt_array
[i
].mnt_mntopts
);
216 assert(mnt_array
[i
].mnt_time
== NULL
);
222 * Build the mount table for the zone rooted at "zroot", storing the resulting
223 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
227 build_mnttable(zlog_t
*zlogp
, const char *zroot
, size_t zrootlen
, FILE *mnttab
,
228 struct mnttab
**mnt_arrayp
, uint_t
*nelemp
)
239 while (getmntent(mnttab
, &mnt
) == 0) {
240 struct mnttab
*tmp_array
;
242 if (strncmp(mnt
.mnt_mountp
, zroot
, zrootlen
) != 0)
244 if (nmnt
% MNTTAB_HUNK
== 0) {
245 tmp_array
= realloc(mnts
,
246 (nmnt
+ MNTTAB_HUNK
) * sizeof (*mnts
));
247 if (tmp_array
== NULL
) {
248 free_mnttable(mnts
, nmnt
);
256 * Zero out any fields we're not using.
258 (void) memset(mnp
, 0, sizeof (*mnp
));
260 if (mnt
.mnt_special
!= NULL
)
261 mnp
->mnt_special
= strdup(mnt
.mnt_special
);
262 if (mnt
.mnt_mntopts
!= NULL
)
263 mnp
->mnt_mntopts
= strdup(mnt
.mnt_mntopts
);
264 mnp
->mnt_mountp
= strdup(mnt
.mnt_mountp
);
265 mnp
->mnt_fstype
= strdup(mnt
.mnt_fstype
);
266 if ((mnt
.mnt_special
!= NULL
&& mnp
->mnt_special
== NULL
) ||
267 (mnt
.mnt_mntopts
!= NULL
&& mnp
->mnt_mntopts
== NULL
) ||
268 mnp
->mnt_mountp
== NULL
|| mnp
->mnt_fstype
== NULL
) {
269 zerror(zlogp
, B_TRUE
, "memory allocation failed");
270 free_mnttable(mnts
, nmnt
);
280 * This is an optimization. The resolve_lofs function is used quite frequently
281 * to manipulate file paths, and on a machine with a large number of zones,
282 * there will be a huge number of mounted file systems. Thus, we trigger a
283 * reread of the list of mount points
286 lofs_discard_mnttab(void)
288 free_mnttable(resolve_lofs_mnts
,
289 resolve_lofs_mnt_max
- resolve_lofs_mnts
);
290 resolve_lofs_mnts
= resolve_lofs_mnt_max
= NULL
;
294 lofs_read_mnttab(zlog_t
*zlogp
)
299 if ((mnttab
= fopen(MNTTAB
, "r")) == NULL
)
301 if (build_mnttable(zlogp
, "", 0, mnttab
, &resolve_lofs_mnts
,
303 (void) fclose(mnttab
);
306 (void) fclose(mnttab
);
307 resolve_lofs_mnt_max
= resolve_lofs_mnts
+ nmnts
;
312 * This function loops over potential loopback mounts and symlinks in a given
313 * path and resolves them all down to an absolute path.
316 resolve_lofs(zlog_t
*zlogp
, char *path
, size_t pathlen
)
320 char tmppath
[MAXPATHLEN
];
321 boolean_t outside_altroot
;
323 if ((len
= resolvepath(path
, tmppath
, sizeof (tmppath
))) == -1)
326 (void) strlcpy(path
, tmppath
, sizeof (tmppath
));
328 /* This happens once per zoneadmd operation. */
329 if (resolve_lofs_mnts
== NULL
&& lofs_read_mnttab(zlogp
) == -1)
332 altroot
= zonecfg_get_root();
333 arlen
= strlen(altroot
);
334 outside_altroot
= B_FALSE
;
338 /* Search in reverse order to find longest match */
339 for (mnp
= resolve_lofs_mnt_max
- 1; mnp
>= resolve_lofs_mnts
;
341 if (mnp
->mnt_fstype
== NULL
||
342 mnp
->mnt_mountp
== NULL
||
343 mnp
->mnt_special
== NULL
)
345 len
= strlen(mnp
->mnt_mountp
);
346 if (strncmp(mnp
->mnt_mountp
, path
, len
) == 0 &&
347 (path
[len
] == '/' || path
[len
] == '\0'))
350 if (mnp
< resolve_lofs_mnts
)
352 /* If it's not a lofs then we're done */
353 if (strcmp(mnp
->mnt_fstype
, MNTTYPE_LOFS
) != 0)
355 if (outside_altroot
) {
357 int olen
= sizeof (MNTOPT_RO
) - 1;
360 * If we run into a read-only mount outside of the
361 * alternate root environment, then the user doesn't
362 * want this path to be made read-write.
364 if (mnp
->mnt_mntopts
!= NULL
&&
365 (cp
= strstr(mnp
->mnt_mntopts
, MNTOPT_RO
)) !=
367 (cp
== mnp
->mnt_mntopts
|| cp
[-1] == ',') &&
368 (cp
[olen
] == '\0' || cp
[olen
] == ',')) {
371 } else if (arlen
> 0 &&
372 (strncmp(mnp
->mnt_special
, altroot
, arlen
) != 0 ||
373 (mnp
->mnt_special
[arlen
] != '\0' &&
374 mnp
->mnt_special
[arlen
] != '/'))) {
375 outside_altroot
= B_TRUE
;
377 /* use temporary buffer because new path might be longer */
378 (void) snprintf(tmppath
, sizeof (tmppath
), "%s%s",
379 mnp
->mnt_special
, path
+ len
);
380 if ((len
= resolvepath(tmppath
, path
, pathlen
)) == -1)
387 * For a regular mount, check if a replacement lofs mount is needed because the
388 * referenced device is already mounted somewhere.
391 check_lofs_needed(zlog_t
*zlogp
, struct zone_fstab
*fsptr
)
394 zone_fsopt_t
*optptr
, *onext
;
396 /* This happens once per zoneadmd operation. */
397 if (resolve_lofs_mnts
== NULL
&& lofs_read_mnttab(zlogp
) == -1)
401 * If this special node isn't already in use, then it's ours alone;
402 * no need to worry about conflicting mounts.
404 for (mnp
= resolve_lofs_mnts
; mnp
< resolve_lofs_mnt_max
;
406 if (strcmp(mnp
->mnt_special
, fsptr
->zone_fs_special
) == 0)
409 if (mnp
>= resolve_lofs_mnt_max
)
413 * Convert this duplicate mount into a lofs mount.
415 (void) strlcpy(fsptr
->zone_fs_special
, mnp
->mnt_mountp
,
416 sizeof (fsptr
->zone_fs_special
));
417 (void) strlcpy(fsptr
->zone_fs_type
, MNTTYPE_LOFS
,
418 sizeof (fsptr
->zone_fs_type
));
419 fsptr
->zone_fs_raw
[0] = '\0';
422 * Discard all but one of the original options and set that to our
423 * default set of options used for resources.
425 optptr
= fsptr
->zone_fs_options
;
426 if (optptr
== NULL
) {
427 optptr
= malloc(sizeof (*optptr
));
428 if (optptr
== NULL
) {
429 zerror(zlogp
, B_TRUE
, "cannot mount %s",
434 while ((onext
= optptr
->zone_fsopt_next
) != NULL
) {
435 optptr
->zone_fsopt_next
= onext
->zone_fsopt_next
;
439 (void) strcpy(optptr
->zone_fsopt_opt
, RESOURCE_DEFAULT_OPTS
);
440 optptr
->zone_fsopt_next
= NULL
;
441 fsptr
->zone_fs_options
= optptr
;
446 make_one_dir(zlog_t
*zlogp
, const char *prefix
, const char *subdir
, mode_t mode
,
447 uid_t userid
, gid_t groupid
)
449 char path
[MAXPATHLEN
];
452 if (snprintf(path
, sizeof (path
), "%s%s", prefix
, subdir
) >
454 zerror(zlogp
, B_FALSE
, "pathname %s%s is too long", prefix
,
459 if (lstat(path
, &st
) == 0) {
461 * We don't check the file mode since presumably the zone
462 * administrator may have had good reason to change the mode,
463 * and we don't need to second guess him.
465 if (!S_ISDIR(st
.st_mode
)) {
466 if (S_ISREG(st
.st_mode
)) {
468 * Allow readonly mounts of /etc/ files; this
469 * is needed most by Trusted Extensions.
471 if (strncmp(subdir
, "/etc/",
472 strlen("/etc/")) != 0) {
473 zerror(zlogp
, B_FALSE
,
474 "%s is not in /etc", path
);
478 zerror(zlogp
, B_FALSE
,
479 "%s is not a directory", path
);
486 if (mkdirp(path
, mode
) != 0) {
488 zerror(zlogp
, B_FALSE
, "Could not mkdir %s.\nIt is on "
489 "a read-only file system in this local zone.\nMake "
490 "sure %s exists in the global zone.", path
, subdir
);
492 zerror(zlogp
, B_TRUE
, "mkdirp of %s failed", path
);
496 (void) chown(path
, userid
, groupid
);
501 free_remote_fstypes(char **types
)
507 for (i
= 0; types
[i
] != NULL
; i
++)
513 get_remote_fstypes(zlog_t
*zlogp
)
517 char buf
[MAXPATHLEN
];
518 char fstype
[MAXPATHLEN
];
522 if ((fp
= fopen(DFSTYPES
, "r")) == NULL
) {
523 zerror(zlogp
, B_TRUE
, "failed to open %s", DFSTYPES
);
527 * Count the number of lines
529 while (fgets(buf
, sizeof (buf
), fp
) != NULL
)
531 if (lines
== 0) /* didn't read anything; empty file */
535 * Allocate enough space for a NULL-terminated array.
537 types
= calloc(lines
+ 1, sizeof (char *));
539 zerror(zlogp
, B_TRUE
, "memory allocation failed");
543 while (fgets(buf
, sizeof (buf
), fp
) != NULL
) {
544 /* LINTED - fstype is big enough to hold buf */
545 if (sscanf(buf
, "%s", fstype
) == 0) {
546 zerror(zlogp
, B_FALSE
, "unable to parse %s", DFSTYPES
);
547 free_remote_fstypes(types
);
551 types
[i
] = strdup(fstype
);
552 if (types
[i
] == NULL
) {
553 zerror(zlogp
, B_TRUE
, "memory allocation failed");
554 free_remote_fstypes(types
);
566 is_remote_fstype(const char *fstype
, char *const *remote_fstypes
)
570 if (remote_fstypes
== NULL
)
572 for (i
= 0; remote_fstypes
[i
] != NULL
; i
++) {
573 if (strcmp(remote_fstypes
[i
], fstype
) == 0)
580 * This converts a zone root path (normally of the form .../root) to a Live
581 * Upgrade scratch zone root (of the form .../lu).
584 root_to_lu(zlog_t
*zlogp
, char *zroot
, size_t zrootlen
, boolean_t isresolved
)
586 if (!isresolved
&& zonecfg_in_alt_root())
587 resolve_lofs(zlogp
, zroot
, zrootlen
);
588 (void) strcpy(strrchr(zroot
, '/') + 1, "lu");
592 * The general strategy for unmounting filesystems is as follows:
594 * - Remote filesystems may be dead, and attempting to contact them as
595 * part of a regular unmount may hang forever; we want to always try to
596 * forcibly unmount such filesystems and only fall back to regular
597 * unmounts if the filesystem doesn't support forced unmounts.
599 * - We don't want to unnecessarily corrupt metadata on local
600 * filesystems (ie UFS), so we want to start off with graceful unmounts,
601 * and only escalate to doing forced unmounts if we get stuck.
603 * We start off walking backwards through the mount table. This doesn't
604 * give us strict ordering but ensures that we try to unmount submounts
605 * first. We thus limit the number of failed umount2(2) calls.
607 * The mechanism for determining if we're stuck is to count the number
608 * of failed unmounts each iteration through the mount table. This
609 * gives us an upper bound on the number of filesystems which remain
610 * mounted (autofs trigger nodes are dealt with separately). If at the
611 * end of one unmount+autofs_cleanup cycle we still have the same number
612 * of mounts that we started out with, we're stuck and try a forced
613 * unmount. If that fails (filesystem doesn't support forced unmounts)
614 * then we bail and are unable to teardown the zone. If it succeeds,
615 * we're no longer stuck so we continue with our policy of trying
616 * graceful mounts first.
618 * Zone must be down (ie, no processes or threads active).
621 unmount_filesystems(zlog_t
*zlogp
, zoneid_t zoneid
, boolean_t unmount_cmd
)
627 char zroot
[MAXPATHLEN
+ 1];
629 uint_t oldcount
= UINT_MAX
;
630 boolean_t stuck
= B_FALSE
;
631 char **remote_fstypes
= NULL
;
633 if (zone_get_rootpath(zone_name
, zroot
, sizeof (zroot
)) != Z_OK
) {
634 zerror(zlogp
, B_FALSE
, "unable to determine zone root");
638 root_to_lu(zlogp
, zroot
, sizeof (zroot
), B_FALSE
);
640 (void) strcat(zroot
, "/");
641 zrootlen
= strlen(zroot
);
644 * For Trusted Extensions unmount each higher level zone's mount
645 * of our zone's /export/home
648 tsol_unmounts(zlogp
, zone_name
);
650 if ((mnttab
= fopen(MNTTAB
, "r")) == NULL
) {
651 zerror(zlogp
, B_TRUE
, "failed to open %s", MNTTAB
);
655 * Use our hacky mntfs ioctl so we see everything, even mounts with
658 if (ioctl(fileno(mnttab
), MNTIOC_SHOWHIDDEN
, NULL
) < 0) {
659 zerror(zlogp
, B_TRUE
, "unable to configure %s", MNTTAB
);
665 * Build the list of remote fstypes so we know which ones we
666 * should forcibly unmount.
668 remote_fstypes
= get_remote_fstypes(zlogp
);
669 for (; /* ever */; ) {
679 * MNTTAB gives us a way to walk through mounted
680 * filesystems; we need to be able to walk them in
681 * reverse order, so we build a list of all mounted
684 if (build_mnttable(zlogp
, zroot
, zrootlen
, mnttab
, &mnts
,
689 for (i
= 0; i
< nmnt
; i
++) {
690 mnp
= &mnts
[nmnt
- i
- 1]; /* access in reverse order */
691 path
= mnp
->mnt_mountp
;
694 * Try forced unmount first for remote filesystems.
696 * Not all remote filesystems support forced unmounts,
697 * so if this fails (ENOTSUP) we'll continue on
698 * and try a regular unmount.
700 if (is_remote_fstype(mnp
->mnt_fstype
, remote_fstypes
)) {
701 if (umount2(path
, MS_FORCE
) == 0)
705 * Try forced unmount if we're stuck.
708 if (umount2(path
, MS_FORCE
) == 0) {
713 * The first failure indicates a
714 * mount we won't be able to get
715 * rid of automatically, so we
719 zerror(zlogp
, B_FALSE
,
720 "unable to unmount '%s'", path
);
721 free_mnttable(mnts
, nmnt
);
726 * Try regular unmounts for everything else.
728 if (!unmounted
&& umount2(path
, 0) != 0)
731 free_mnttable(mnts
, nmnt
);
735 if (newcount
>= oldcount
) {
737 * Last round didn't unmount anything; we're stuck and
738 * should start trying forced unmounts.
745 * Autofs doesn't let you unmount its trigger nodes from
746 * userland so we have to tell the kernel to cleanup for us.
748 if (autofs_cleanup(zoneid
) != 0) {
749 zerror(zlogp
, B_TRUE
, "unable to remove autofs nodes");
756 free_remote_fstypes(remote_fstypes
);
757 (void) fclose(mnttab
);
758 return (error
? -1 : 0);
762 fs_compare(const void *m1
, const void *m2
)
764 struct zone_fstab
*i
= (struct zone_fstab
*)m1
;
765 struct zone_fstab
*j
= (struct zone_fstab
*)m2
;
767 return (strcmp(i
->zone_fs_dir
, j
->zone_fs_dir
));
771 * Fork and exec (and wait for) the mentioned binary with the provided
772 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2),
773 * returns the exit status otherwise.
775 * If we were unable to exec the provided pathname (for whatever
776 * reason), we return the special token ZEXIT_EXEC. The current value
777 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
778 * consumers of this function; any future consumers must make sure this
782 forkexec(zlog_t
*zlogp
, const char *path
, char *const argv
[])
785 int child_status
= 0;
788 * Do not let another thread localize a message while we are forking.
790 (void) mutex_lock(&msglock
);
792 (void) mutex_unlock(&msglock
);
793 if (child_pid
== -1) {
794 zerror(zlogp
, B_TRUE
, "could not fork for %s", argv
[0]);
796 } else if (child_pid
== 0) {
798 /* redirect stdin, stdout & stderr to /dev/null */
799 (void) open("/dev/null", O_RDONLY
); /* stdin */
800 (void) open("/dev/null", O_WRONLY
); /* stdout */
801 (void) open("/dev/null", O_WRONLY
); /* stderr */
802 (void) execv(path
, argv
);
804 * Since we are in the child, there is no point calling zerror()
805 * since there is nobody waiting to consume it. So exit with a
806 * special code that the parent will recognize and call zerror()
812 (void) waitpid(child_pid
, &child_status
, 0);
815 if (WIFSIGNALED(child_status
)) {
816 zerror(zlogp
, B_FALSE
, "%s unexpectedly terminated due to "
817 "signal %d", path
, WTERMSIG(child_status
));
820 assert(WIFEXITED(child_status
));
821 if (WEXITSTATUS(child_status
) == ZEXIT_EXEC
) {
822 zerror(zlogp
, B_FALSE
, "failed to exec %s", path
);
825 return (WEXITSTATUS(child_status
));
829 isregfile(const char *path
)
833 if (stat64(path
, &st
) == -1)
836 return (S_ISREG(st
.st_mode
));
840 dofsck(zlog_t
*zlogp
, const char *fstype
, const char *rawdev
)
842 char cmdbuf
[MAXPATHLEN
];
847 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
848 * that would cost us an extra fork/exec without buying us anything.
850 if (snprintf(cmdbuf
, sizeof (cmdbuf
), "/usr/lib/fs/%s/fsck", fstype
)
851 >= sizeof (cmdbuf
)) {
852 zerror(zlogp
, B_FALSE
, "file-system type %s too long", fstype
);
857 * If it doesn't exist, that's OK: we verified this previously
860 if (isregfile(cmdbuf
) == -1)
866 argv
[3] = (char *)rawdev
;
869 status
= forkexec(zlogp
, cmdbuf
, argv
);
870 if (status
== 0 || status
== -1)
872 zerror(zlogp
, B_FALSE
, "fsck of '%s' failed with exit status %d; "
873 "run fsck manually", rawdev
, status
);
878 domount(zlog_t
*zlogp
, const char *fstype
, const char *opts
,
879 const char *special
, const char *directory
)
881 char cmdbuf
[MAXPATHLEN
];
886 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
887 * that would cost us an extra fork/exec without buying us anything.
889 if (snprintf(cmdbuf
, sizeof (cmdbuf
), "/usr/lib/fs/%s/mount", fstype
)
890 >= sizeof (cmdbuf
)) {
891 zerror(zlogp
, B_FALSE
, "file-system type %s too long", fstype
);
895 if (opts
[0] == '\0') {
896 argv
[1] = (char *)special
;
897 argv
[2] = (char *)directory
;
901 argv
[2] = (char *)opts
;
902 argv
[3] = (char *)special
;
903 argv
[4] = (char *)directory
;
907 status
= forkexec(zlogp
, cmdbuf
, argv
);
908 if (status
== 0 || status
== -1)
911 zerror(zlogp
, B_FALSE
, "\"%s %s %s\" "
912 "failed with exit code %d",
913 cmdbuf
, special
, directory
, status
);
915 zerror(zlogp
, B_FALSE
, "\"%s -o %s %s %s\" "
916 "failed with exit code %d",
917 cmdbuf
, opts
, special
, directory
, status
);
922 * Check if a given mount point path exists.
923 * If it does, make sure it doesn't contain any symlinks.
924 * Note that if "leaf" is false we're checking an intermediate
925 * component of the mount point path, so it must be a directory.
926 * If "leaf" is true, then we're checking the entire mount point
927 * path, so the mount point itself can be anything aside from a
930 * If the path is invalid then a negative value is returned. If the
931 * path exists and is a valid mount point path then 0 is returned.
932 * If the path doesn't exist return a positive value.
935 valid_mount_point(zlog_t
*zlogp
, const char *path
, const boolean_t leaf
)
938 char respath
[MAXPATHLEN
];
941 if (lstat(path
, &statbuf
) != 0) {
944 zerror(zlogp
, B_TRUE
, "can't stat %s", path
);
947 if (S_ISLNK(statbuf
.st_mode
)) {
948 zerror(zlogp
, B_FALSE
, "%s is a symlink", path
);
951 if (!leaf
&& !S_ISDIR(statbuf
.st_mode
)) {
952 zerror(zlogp
, B_FALSE
, "%s is not a directory", path
);
955 if ((res
= resolvepath(path
, respath
, sizeof (respath
))) == -1) {
956 zerror(zlogp
, B_TRUE
, "unable to resolve path %s", path
);
960 if (strcmp(path
, respath
) != 0) {
962 * We don't like ".."s, "."s, or "//"s throwing us off
964 zerror(zlogp
, B_FALSE
, "%s is not a canonical path", path
);
971 * Validate a mount point path. A valid mount point path is an
972 * absolute path that either doesn't exist, or, if it does exists it
973 * must be an absolute canonical path that doesn't have any symbolic
974 * links in it. The target of a mount point path can be any filesystem
975 * object. (Different filesystems can support different mount points,
976 * for example "lofs" and "mntfs" both support files and directories
977 * while "ufs" just supports directories.)
979 * If the path is invalid then a negative value is returned. If the
980 * path exists and is a valid mount point path then 0 is returned.
981 * If the path doesn't exist return a positive value.
984 valid_mount_path(zlog_t
*zlogp
, const char *rootpath
, const char *spec
,
985 const char *dir
, const char *fstype
)
987 char abspath
[MAXPATHLEN
], *slashp
, *slashp_next
;
991 * Sanity check the target mount point path.
992 * It must be a non-null string that starts with a '/'.
995 /* Something went wrong. */
996 zerror(zlogp
, B_FALSE
, "invalid mount directory, "
997 "type: \"%s\", special: \"%s\", dir: \"%s\"",
1003 * Join rootpath and dir. Make sure abspath ends with '/', this
1004 * is added to all paths (even non-directory paths) to allow us
1005 * to detect the end of paths below. If the path already ends
1006 * in a '/', then that's ok too (although we'll fail the
1007 * cannonical path check in valid_mount_point()).
1009 if (snprintf(abspath
, sizeof (abspath
),
1010 "%s%s/", rootpath
, dir
) >= sizeof (abspath
)) {
1011 zerror(zlogp
, B_FALSE
, "pathname %s%s is too long",
1017 * Starting with rootpath, verify the mount path one component
1018 * at a time. Continue until we've evaluated all of abspath.
1020 slashp
= &abspath
[strlen(rootpath
)];
1021 assert(*slashp
== '/');
1023 slashp_next
= strchr(slashp
+ 1, '/');
1025 if (slashp_next
!= NULL
) {
1026 /* This is an intermediary mount path component. */
1027 rv
= valid_mount_point(zlogp
, abspath
, B_FALSE
);
1029 /* This is the last component of the mount path. */
1030 rv
= valid_mount_point(zlogp
, abspath
, B_TRUE
);
1035 } while ((slashp
= slashp_next
) != NULL
);
1040 mount_one_dev_device_cb(void *arg
, const char *match
, const char *name
)
1042 di_prof_t prof
= arg
;
1045 return (di_prof_add_dev(prof
, match
));
1046 return (di_prof_add_map(prof
, match
, name
));
1050 mount_one_dev_symlink_cb(void *arg
, const char *source
, const char *target
)
1052 di_prof_t prof
= arg
;
1054 return (di_prof_add_symlink(prof
, source
, target
));
1058 vplat_get_iptype(zlog_t
*zlogp
, zone_iptype_t
*iptypep
)
1060 zone_dochandle_t handle
;
1062 if ((handle
= zonecfg_init_handle()) == NULL
) {
1063 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
1066 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
1067 zerror(zlogp
, B_FALSE
, "invalid configuration");
1068 zonecfg_fini_handle(handle
);
1071 if (zonecfg_get_iptype(handle
, iptypep
) != Z_OK
) {
1072 zerror(zlogp
, B_FALSE
, "invalid ip-type configuration");
1073 zonecfg_fini_handle(handle
);
1076 zonecfg_fini_handle(handle
);
1081 * Apply the standard lists of devices/symlinks/mappings and the user-specified
1082 * list of devices (via zonecfg) to the /dev filesystem. The filesystem will
1083 * use these as a profile/filter to determine what exists in /dev.
1086 mount_one_dev(zlog_t
*zlogp
, char *devpath
, zone_mnt_t mount_cmd
)
1088 char brand
[MAXNAMELEN
];
1089 zone_dochandle_t handle
= NULL
;
1090 brand_handle_t bh
= NULL
;
1091 struct zone_devtab ztab
;
1092 di_prof_t prof
= NULL
;
1095 zone_iptype_t iptype
;
1096 const char *curr_iptype
;
1098 if (di_prof_init(devpath
, &prof
)) {
1099 zerror(zlogp
, B_TRUE
, "failed to initialize profile");
1104 * Get a handle to the brand info for this zone.
1105 * If we are mounting the zone, then we must always use the default
1106 * brand device mounts.
1108 if (ALT_MOUNT(mount_cmd
)) {
1109 (void) strlcpy(brand
, default_brand
, sizeof (brand
));
1111 (void) strlcpy(brand
, brand_name
, sizeof (brand
));
1114 if ((bh
= brand_open(brand
)) == NULL
) {
1115 zerror(zlogp
, B_FALSE
, "unable to determine zone brand");
1119 if (vplat_get_iptype(zlogp
, &iptype
) < 0) {
1120 zerror(zlogp
, B_TRUE
, "unable to determine ip-type");
1125 curr_iptype
= "shared";
1128 curr_iptype
= "exclusive";
1132 if (brand_platform_iter_devices(bh
, zone_name
,
1133 mount_one_dev_device_cb
, prof
, curr_iptype
) != 0) {
1134 zerror(zlogp
, B_TRUE
, "failed to add standard device");
1138 if (brand_platform_iter_link(bh
,
1139 mount_one_dev_symlink_cb
, prof
) != 0) {
1140 zerror(zlogp
, B_TRUE
, "failed to add standard symlink");
1144 /* Add user-specified devices and directories */
1145 if ((handle
= zonecfg_init_handle()) == NULL
) {
1146 zerror(zlogp
, B_FALSE
, "can't initialize zone handle");
1149 if (err
= zonecfg_get_handle(zone_name
, handle
)) {
1150 zerror(zlogp
, B_FALSE
, "can't get handle for zone "
1151 "%s: %s", zone_name
, zonecfg_strerror(err
));
1154 if (err
= zonecfg_setdevent(handle
)) {
1155 zerror(zlogp
, B_FALSE
, "%s: %s", zone_name
,
1156 zonecfg_strerror(err
));
1159 while (zonecfg_getdevent(handle
, &ztab
) == Z_OK
) {
1160 if (di_prof_add_dev(prof
, ztab
.zone_dev_match
)) {
1161 zerror(zlogp
, B_TRUE
, "failed to add "
1162 "user-specified device");
1166 (void) zonecfg_enddevent(handle
);
1168 /* Send profile to kernel */
1169 if (di_prof_commit(prof
)) {
1170 zerror(zlogp
, B_TRUE
, "failed to commit profile");
1180 zonecfg_fini_handle(handle
);
1187 mount_one(zlog_t
*zlogp
, struct zone_fstab
*fsptr
, const char *rootpath
,
1188 zone_mnt_t mount_cmd
)
1190 char path
[MAXPATHLEN
];
1191 char optstr
[MAX_MNTOPT_STR
];
1192 zone_fsopt_t
*optptr
;
1195 if ((rv
= valid_mount_path(zlogp
, rootpath
, fsptr
->zone_fs_special
,
1196 fsptr
->zone_fs_dir
, fsptr
->zone_fs_type
)) < 0) {
1197 zerror(zlogp
, B_FALSE
, "%s%s is not a valid mount point",
1198 rootpath
, fsptr
->zone_fs_dir
);
1200 } else if (rv
> 0) {
1201 /* The mount point path doesn't exist, create it now. */
1202 if (make_one_dir(zlogp
, rootpath
, fsptr
->zone_fs_dir
,
1203 DEFAULT_DIR_MODE
, DEFAULT_DIR_USER
,
1204 DEFAULT_DIR_GROUP
) != 0) {
1205 zerror(zlogp
, B_FALSE
, "failed to create mount point");
1210 * Now this might seem weird, but we need to invoke
1211 * valid_mount_path() again. Why? Because it checks
1212 * to make sure that the mount point path is canonical,
1213 * which it can only do if the path exists, so now that
1214 * we've created the path we have to verify it again.
1216 if ((rv
= valid_mount_path(zlogp
, rootpath
,
1217 fsptr
->zone_fs_special
, fsptr
->zone_fs_dir
,
1218 fsptr
->zone_fs_type
)) < 0) {
1219 zerror(zlogp
, B_FALSE
,
1220 "%s%s is not a valid mount point",
1221 rootpath
, fsptr
->zone_fs_dir
);
1226 (void) snprintf(path
, sizeof (path
), "%s%s", rootpath
,
1227 fsptr
->zone_fs_dir
);
1230 * In general the strategy here is to do just as much verification as
1231 * necessary to avoid crashing or otherwise doing something bad; if the
1232 * administrator initiated the operation via zoneadm(1m), he'll get
1233 * auto-verification which will let him know what's wrong. If he
1234 * modifies the zone configuration of a running zone and doesn't attempt
1235 * to verify that it's OK we won't crash but won't bother trying to be
1236 * too helpful either. zoneadm verify is only a couple keystrokes away.
1238 if (!zonecfg_valid_fs_type(fsptr
->zone_fs_type
)) {
1239 zerror(zlogp
, B_FALSE
, "cannot mount %s on %s: "
1240 "invalid file-system type %s", fsptr
->zone_fs_special
,
1241 fsptr
->zone_fs_dir
, fsptr
->zone_fs_type
);
1246 * If we're looking at an alternate root environment, then construct
1247 * read-only loopback mounts as necessary. Note that any special
1248 * paths for lofs zone mounts in an alternate root must have
1249 * already been pre-pended with any alternate root path by the
1252 if (zonecfg_in_alt_root()) {
1255 if (stat64(fsptr
->zone_fs_special
, &st
) != -1 &&
1256 S_ISBLK(st
.st_mode
)) {
1258 * If we're going to mount a block device we need
1259 * to check if that device is already mounted
1260 * somewhere else, and if so, do a lofs mount
1261 * of the device instead of a direct mount
1263 if (check_lofs_needed(zlogp
, fsptr
) == -1)
1265 } else if (strcmp(fsptr
->zone_fs_type
, MNTTYPE_LOFS
) == 0) {
1267 * For lofs mounts, the special node is inside the
1268 * alternate root. We need lofs resolution for
1269 * this case in order to get at the underlying
1272 resolve_lofs(zlogp
, fsptr
->zone_fs_special
,
1273 sizeof (fsptr
->zone_fs_special
));
1278 * Run 'fsck -m' if there's a device to fsck.
1280 if (fsptr
->zone_fs_raw
[0] != '\0' &&
1281 dofsck(zlogp
, fsptr
->zone_fs_type
, fsptr
->zone_fs_raw
) != 0) {
1283 } else if (isregfile(fsptr
->zone_fs_special
) == 1 &&
1284 dofsck(zlogp
, fsptr
->zone_fs_type
, fsptr
->zone_fs_special
) != 0) {
1289 * Build up mount option string.
1292 if (fsptr
->zone_fs_options
!= NULL
) {
1293 (void) strlcpy(optstr
, fsptr
->zone_fs_options
->zone_fsopt_opt
,
1295 for (optptr
= fsptr
->zone_fs_options
->zone_fsopt_next
;
1296 optptr
!= NULL
; optptr
= optptr
->zone_fsopt_next
) {
1297 (void) strlcat(optstr
, ",", sizeof (optstr
));
1298 (void) strlcat(optstr
, optptr
->zone_fsopt_opt
,
1303 if ((rv
= domount(zlogp
, fsptr
->zone_fs_type
, optstr
,
1304 fsptr
->zone_fs_special
, path
)) != 0)
1308 * The mount succeeded. If this was not a mount of /dev then
1311 if (strcmp(fsptr
->zone_fs_type
, MNTTYPE_DEV
) != 0)
1315 * We just mounted an instance of a /dev filesystem, so now we
1316 * need to configure it.
1318 return (mount_one_dev(zlogp
, path
, mount_cmd
));
1322 free_fs_data(struct zone_fstab
*fsarray
, uint_t nelem
)
1326 if (fsarray
== NULL
)
1328 for (i
= 0; i
< nelem
; i
++)
1329 zonecfg_free_fs_option_list(fsarray
[i
].zone_fs_options
);
1334 * This function initiates the creation of a small Solaris Environment for
1335 * scratch zone. The Environment creation process is split up into two
1336 * functions(build_mounted_pre_var() and build_mounted_post_var()). It
1337 * is done this way because:
1338 * We need to have both /etc and /var in the root of the scratchzone.
1339 * We loopback mount zone's own /etc and /var into the root of the
1340 * scratch zone. Unlike /etc, /var can be a seperate filesystem. So we
1341 * need to delay the mount of /var till the zone's root gets populated.
1342 * So mounting of localdirs[](/etc and /var) have been moved to the
1343 * build_mounted_post_var() which gets called only after the zone
1344 * specific filesystems are mounted.
1346 * Note that the scratch zone we set up for updating the zone (Z_MNT_UPDATE)
1347 * does not loopback mount the zone's own /etc and /var into the root of the
1351 build_mounted_pre_var(zlog_t
*zlogp
, char *rootpath
,
1352 size_t rootlen
, const char *zonepath
, char *luroot
, size_t lurootlen
)
1354 char tmp
[MAXPATHLEN
], fromdir
[MAXPATHLEN
];
1356 static const char *mkdirs
[] = {
1357 "/system", "/system/contract", "/system/object", "/proc",
1358 "/dev", "/tmp", "/a", NULL
1364 resolve_lofs(zlogp
, rootpath
, rootlen
);
1365 (void) snprintf(luroot
, lurootlen
, "%s/lu", zonepath
);
1366 resolve_lofs(zlogp
, luroot
, lurootlen
);
1367 (void) snprintf(tmp
, sizeof (tmp
), "%s/bin", luroot
);
1368 (void) symlink("./usr/bin", tmp
);
1371 * These are mostly special mount points; not handled here. (See
1372 * zone_mount_early.)
1374 for (cpp
= mkdirs
; *cpp
!= NULL
; cpp
++) {
1375 (void) snprintf(tmp
, sizeof (tmp
), "%s%s", luroot
, *cpp
);
1376 if (mkdir(tmp
, 0755) != 0) {
1377 zerror(zlogp
, B_TRUE
, "cannot create %s", tmp
);
1382 * This is here to support lucopy. If there's an instance of this same
1383 * zone on the current running system, then we mount its root up as
1384 * read-only inside the scratch zone.
1386 (void) zonecfg_get_uuid(zone_name
, uuid
);
1387 altstr
= strdup(zonecfg_get_root());
1388 if (altstr
== NULL
) {
1389 zerror(zlogp
, B_TRUE
, "memory allocation failed");
1392 zonecfg_set_root("");
1393 (void) strlcpy(tmp
, zone_name
, sizeof (tmp
));
1394 (void) zonecfg_get_name_by_uuid(uuid
, tmp
, sizeof (tmp
));
1395 if (zone_get_rootpath(tmp
, fromdir
, sizeof (fromdir
)) == Z_OK
&&
1396 strcmp(fromdir
, rootpath
) != 0) {
1397 (void) snprintf(tmp
, sizeof (tmp
), "%s/b", luroot
);
1398 if (mkdir(tmp
, 0755) != 0) {
1399 zerror(zlogp
, B_TRUE
, "cannot create %s", tmp
);
1402 if (domount(zlogp
, MNTTYPE_LOFS
, RESOURCE_DEFAULT_OPTS
, fromdir
,
1404 zerror(zlogp
, B_TRUE
, "cannot mount %s on %s", tmp
,
1409 zonecfg_set_root(altstr
);
1412 if ((fp
= zonecfg_open_scratch(luroot
, B_TRUE
)) == NULL
) {
1413 zerror(zlogp
, B_TRUE
, "cannot open zone mapfile");
1416 (void) ftruncate(fileno(fp
), 0);
1417 if (zonecfg_add_scratch(fp
, zone_name
, kernzone
, "/") == -1) {
1418 zerror(zlogp
, B_TRUE
, "cannot add zone mapfile entry");
1420 zonecfg_close_scratch(fp
);
1421 (void) snprintf(tmp
, sizeof (tmp
), "%s/a", luroot
);
1422 if (domount(zlogp
, MNTTYPE_LOFS
, "", rootpath
, tmp
) != 0)
1424 (void) strlcpy(rootpath
, tmp
, rootlen
);
1430 build_mounted_post_var(zlog_t
*zlogp
, zone_mnt_t mount_cmd
, char *rootpath
,
1433 char tmp
[MAXPATHLEN
], fromdir
[MAXPATHLEN
];
1435 const char **loopdirs
;
1436 const char **tmpdirs
;
1437 static const char *localdirs
[] = {
1438 "/etc", "/var", NULL
1440 static const char *scr_loopdirs
[] = {
1441 "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
1444 static const char *upd_loopdirs
[] = {
1445 "/etc", "/kernel", "/lib", "/opt", "/platform", "/sbin",
1446 "/usr", "/var", NULL
1448 static const char *scr_tmpdirs
[] = {
1449 "/tmp", "/var/run", NULL
1451 static const char *upd_tmpdirs
[] = {
1452 "/tmp", "/var/run", "/var/tmp", NULL
1456 if (mount_cmd
== Z_MNT_SCRATCH
) {
1458 * These are mounted read-write from the zone undergoing
1459 * upgrade. We must be careful not to 'leak' things from the
1460 * main system into the zone, and this accomplishes that goal.
1462 for (cpp
= localdirs
; *cpp
!= NULL
; cpp
++) {
1463 (void) snprintf(tmp
, sizeof (tmp
), "%s%s", luroot
,
1465 (void) snprintf(fromdir
, sizeof (fromdir
), "%s%s",
1467 if (mkdir(tmp
, 0755) != 0) {
1468 zerror(zlogp
, B_TRUE
, "cannot create %s", tmp
);
1471 if (domount(zlogp
, MNTTYPE_LOFS
, "", fromdir
, tmp
)
1473 zerror(zlogp
, B_TRUE
, "cannot mount %s on %s",
1480 if (mount_cmd
== Z_MNT_UPDATE
)
1481 loopdirs
= upd_loopdirs
;
1483 loopdirs
= scr_loopdirs
;
1486 * These are things mounted read-only from the running system because
1487 * they contain binaries that must match system.
1489 for (cpp
= loopdirs
; *cpp
!= NULL
; cpp
++) {
1490 (void) snprintf(tmp
, sizeof (tmp
), "%s%s", luroot
, *cpp
);
1491 if (mkdir(tmp
, 0755) != 0) {
1492 if (errno
!= EEXIST
) {
1493 zerror(zlogp
, B_TRUE
, "cannot create %s", tmp
);
1496 if (lstat(tmp
, &st
) != 0) {
1497 zerror(zlogp
, B_TRUE
, "cannot stat %s", tmp
);
1501 * Ignore any non-directories encountered. These are
1502 * things that have been converted into symlinks
1503 * (/etc/fs and /etc/lib) and no longer need a lofs
1506 if (!S_ISDIR(st
.st_mode
))
1509 if (domount(zlogp
, MNTTYPE_LOFS
, RESOURCE_DEFAULT_OPTS
, *cpp
,
1511 zerror(zlogp
, B_TRUE
, "cannot mount %s on %s", tmp
,
1517 if (mount_cmd
== Z_MNT_UPDATE
)
1518 tmpdirs
= upd_tmpdirs
;
1520 tmpdirs
= scr_tmpdirs
;
1523 * These are things with tmpfs mounted inside.
1525 for (cpp
= tmpdirs
; *cpp
!= NULL
; cpp
++) {
1526 (void) snprintf(tmp
, sizeof (tmp
), "%s%s", luroot
, *cpp
);
1527 if (mount_cmd
== Z_MNT_SCRATCH
&& mkdir(tmp
, 0755) != 0 &&
1529 zerror(zlogp
, B_TRUE
, "cannot create %s", tmp
);
1534 * We could set the mode for /tmp when we do the mkdir but
1535 * since that can be modified by the umask we will just set
1536 * the correct mode for /tmp now.
1538 if (strcmp(*cpp
, "/tmp") == 0 && chmod(tmp
, 01777) != 0) {
1539 zerror(zlogp
, B_TRUE
, "cannot chmod %s", tmp
);
1543 if (domount(zlogp
, MNTTYPE_TMPFS
, "", "swap", tmp
) != 0) {
1544 zerror(zlogp
, B_TRUE
, "cannot mount swap on %s", *cpp
);
1551 typedef struct plat_gmount_cb_data
{
1553 struct zone_fstab
**pgcd_fs_tab
;
1555 } plat_gmount_cb_data_t
;
1558 * plat_gmount_cb() is a callback function invoked by libbrand to iterate
1559 * through all global brand platform mounts.
1562 plat_gmount_cb(void *data
, const char *spec
, const char *dir
,
1563 const char *fstype
, const char *opt
)
1565 plat_gmount_cb_data_t
*cp
= data
;
1566 zlog_t
*zlogp
= cp
->pgcd_zlogp
;
1567 struct zone_fstab
*fs_ptr
= *cp
->pgcd_fs_tab
;
1568 int num_fs
= *cp
->pgcd_num_fs
;
1569 struct zone_fstab
*fsp
, *tmp_ptr
;
1572 if ((tmp_ptr
= realloc(fs_ptr
, num_fs
* sizeof (*tmp_ptr
))) == NULL
) {
1573 zerror(zlogp
, B_TRUE
, "memory allocation failed");
1578 fsp
= &fs_ptr
[num_fs
- 1];
1580 /* update the callback struct passed in */
1581 *cp
->pgcd_fs_tab
= fs_ptr
;
1582 *cp
->pgcd_num_fs
= num_fs
;
1584 fsp
->zone_fs_raw
[0] = '\0';
1585 (void) strlcpy(fsp
->zone_fs_special
, spec
,
1586 sizeof (fsp
->zone_fs_special
));
1587 (void) strlcpy(fsp
->zone_fs_dir
, dir
, sizeof (fsp
->zone_fs_dir
));
1588 (void) strlcpy(fsp
->zone_fs_type
, fstype
, sizeof (fsp
->zone_fs_type
));
1589 fsp
->zone_fs_options
= NULL
;
1590 if ((opt
!= NULL
) &&
1591 (zonecfg_add_fs_option(fsp
, (char *)opt
) != Z_OK
)) {
1592 zerror(zlogp
, B_FALSE
, "error adding property");
1600 mount_filesystems_fsent(zone_dochandle_t handle
, zlog_t
*zlogp
,
1601 struct zone_fstab
**fs_tabp
, int *num_fsp
, zone_mnt_t mount_cmd
)
1603 struct zone_fstab
*tmp_ptr
, *fs_ptr
, *fsp
, fstab
;
1609 if (zonecfg_setfsent(handle
) != Z_OK
) {
1610 zerror(zlogp
, B_FALSE
, "invalid configuration");
1613 while (zonecfg_getfsent(handle
, &fstab
) == Z_OK
) {
1615 * ZFS filesystems will not be accessible under an alternate
1616 * root, since the pool will not be known. Ignore them in this
1619 if (ALT_MOUNT(mount_cmd
) &&
1620 strcmp(fstab
.zone_fs_type
, MNTTYPE_ZFS
) == 0)
1624 if ((tmp_ptr
= realloc(fs_ptr
,
1625 num_fs
* sizeof (*tmp_ptr
))) == NULL
) {
1626 zerror(zlogp
, B_TRUE
, "memory allocation failed");
1627 (void) zonecfg_endfsent(handle
);
1630 /* update the pointers passed in */
1635 fsp
= &fs_ptr
[num_fs
- 1];
1636 (void) strlcpy(fsp
->zone_fs_dir
,
1637 fstab
.zone_fs_dir
, sizeof (fsp
->zone_fs_dir
));
1638 (void) strlcpy(fsp
->zone_fs_raw
, fstab
.zone_fs_raw
,
1639 sizeof (fsp
->zone_fs_raw
));
1640 (void) strlcpy(fsp
->zone_fs_type
, fstab
.zone_fs_type
,
1641 sizeof (fsp
->zone_fs_type
));
1642 fsp
->zone_fs_options
= fstab
.zone_fs_options
;
1645 * For all lofs mounts, make sure that the 'special'
1646 * entry points inside the alternate root. The
1647 * source path for a lofs mount in a given zone needs
1648 * to be relative to the root of the boot environment
1649 * that contains the zone. Note that we don't do this
1650 * for non-lofs mounts since they will have a device
1651 * as a backing store and device paths must always be
1652 * specified relative to the current boot environment.
1654 fsp
->zone_fs_special
[0] = '\0';
1655 if (strcmp(fsp
->zone_fs_type
, MNTTYPE_LOFS
) == 0) {
1656 (void) strlcat(fsp
->zone_fs_special
, zonecfg_get_root(),
1657 sizeof (fsp
->zone_fs_special
));
1659 (void) strlcat(fsp
->zone_fs_special
, fstab
.zone_fs_special
,
1660 sizeof (fsp
->zone_fs_special
));
1662 (void) zonecfg_endfsent(handle
);
1667 mount_filesystems(zlog_t
*zlogp
, zone_mnt_t mount_cmd
)
1669 char rootpath
[MAXPATHLEN
];
1670 char zonepath
[MAXPATHLEN
];
1671 char brand
[MAXNAMELEN
];
1672 char luroot
[MAXPATHLEN
];
1674 struct zone_fstab
*fs_ptr
= NULL
;
1675 zone_dochandle_t handle
= NULL
;
1676 zone_state_t zstate
;
1678 plat_gmount_cb_data_t cb
;
1680 if (zone_get_state(zone_name
, &zstate
) != Z_OK
||
1681 (zstate
!= ZONE_STATE_READY
&& zstate
!= ZONE_STATE_MOUNTED
)) {
1682 zerror(zlogp
, B_FALSE
,
1683 "zone must be in '%s' or '%s' state to mount file-systems",
1684 zone_state_str(ZONE_STATE_READY
),
1685 zone_state_str(ZONE_STATE_MOUNTED
));
1689 if (zone_get_zonepath(zone_name
, zonepath
, sizeof (zonepath
)) != Z_OK
) {
1690 zerror(zlogp
, B_TRUE
, "unable to determine zone path");
1694 if (zone_get_rootpath(zone_name
, rootpath
, sizeof (rootpath
)) != Z_OK
) {
1695 zerror(zlogp
, B_TRUE
, "unable to determine zone root");
1699 if ((handle
= zonecfg_init_handle()) == NULL
) {
1700 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
1703 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
||
1704 zonecfg_setfsent(handle
) != Z_OK
) {
1705 zerror(zlogp
, B_FALSE
, "invalid configuration");
1710 * If we are mounting the zone, then we must always use the default
1711 * brand global mounts.
1713 if (ALT_MOUNT(mount_cmd
)) {
1714 (void) strlcpy(brand
, default_brand
, sizeof (brand
));
1716 (void) strlcpy(brand
, brand_name
, sizeof (brand
));
1719 /* Get a handle to the brand info for this zone */
1720 if ((bh
= brand_open(brand
)) == NULL
) {
1721 zerror(zlogp
, B_FALSE
, "unable to determine zone brand");
1722 zonecfg_fini_handle(handle
);
1727 * Get the list of global filesystems to mount from the brand
1730 cb
.pgcd_zlogp
= zlogp
;
1731 cb
.pgcd_fs_tab
= &fs_ptr
;
1732 cb
.pgcd_num_fs
= &num_fs
;
1733 if (brand_platform_iter_gmounts(bh
, zonepath
,
1734 plat_gmount_cb
, &cb
) != 0) {
1735 zerror(zlogp
, B_FALSE
, "unable to mount filesystems");
1737 zonecfg_fini_handle(handle
);
1743 * Iterate through the rest of the filesystems. Sort them all,
1744 * then mount them in sorted order. This is to make sure the
1745 * higher level directories (e.g., /usr) get mounted before
1746 * any beneath them (e.g., /usr/local).
1748 if (mount_filesystems_fsent(handle
, zlogp
, &fs_ptr
, &num_fs
,
1752 zonecfg_fini_handle(handle
);
1756 * Normally when we mount a zone all the zone filesystems
1757 * get mounted relative to rootpath, which is usually
1758 * <zonepath>/root. But when mounting a zone for administration
1759 * purposes via the zone "mount" state, build_mounted_pre_var()
1760 * updates rootpath to be <zonepath>/lu/a so we'll mount all
1761 * the zones filesystems there instead.
1763 * build_mounted_pre_var() and build_mounted_post_var() will
1764 * also do some extra work to create directories and lofs mount
1765 * a bunch of global zone file system paths into <zonepath>/lu.
1767 * This allows us to be able to enter the zone (now rooted at
1768 * <zonepath>/lu) and run the upgrade/patch tools that are in the
1769 * global zone and have them upgrade the to-be-modified zone's
1770 * files mounted on /a. (Which mirrors the existing standard
1771 * upgrade environment.)
1773 * There is of course one catch. When doing the upgrade
1774 * we need <zoneroot>/lu/dev to be the /dev filesystem
1775 * for the zone and we don't want to have any /dev filesystem
1776 * mounted at <zoneroot>/lu/a/dev. Since /dev is specified
1777 * as a normal zone filesystem by default we'll try to mount
1778 * it at <zoneroot>/lu/a/dev, so we have to detect this
1779 * case and instead mount it at <zoneroot>/lu/dev.
1781 * All this work is done in three phases:
1782 * 1) Create and populate lu directory (build_mounted_pre_var()).
1783 * 2) Mount the required filesystems as per the zone configuration.
1784 * 3) Set up the rest of the scratch zone environment
1785 * (build_mounted_post_var()).
1787 if (ALT_MOUNT(mount_cmd
) && !build_mounted_pre_var(zlogp
,
1788 rootpath
, sizeof (rootpath
), zonepath
, luroot
, sizeof (luroot
)))
1791 qsort(fs_ptr
, num_fs
, sizeof (*fs_ptr
), fs_compare
);
1793 for (i
= 0; i
< num_fs
; i
++) {
1794 if (ALT_MOUNT(mount_cmd
) &&
1795 strcmp(fs_ptr
[i
].zone_fs_dir
, "/dev") == 0) {
1796 size_t slen
= strlen(rootpath
) - 2;
1799 * By default we'll try to mount /dev as /a/dev
1800 * but /dev is special and always goes at the top
1801 * so strip the trailing '/a' from the rootpath.
1803 assert(strcmp(&rootpath
[slen
], "/a") == 0);
1804 rootpath
[slen
] = '\0';
1805 if (mount_one(zlogp
, &fs_ptr
[i
], rootpath
, mount_cmd
)
1808 rootpath
[slen
] = '/';
1811 if (mount_one(zlogp
, &fs_ptr
[i
], rootpath
, mount_cmd
) != 0)
1814 if (ALT_MOUNT(mount_cmd
) &&
1815 !build_mounted_post_var(zlogp
, mount_cmd
, rootpath
, luroot
))
1819 * For Trusted Extensions cross-mount each lower level /export/home
1821 if (mount_cmd
== Z_MNT_BOOT
&&
1822 tsol_mounts(zlogp
, zone_name
, rootpath
) != 0)
1825 free_fs_data(fs_ptr
, num_fs
);
1828 * Everything looks fine.
1834 zonecfg_fini_handle(handle
);
1835 free_fs_data(fs_ptr
, num_fs
);
1839 /* caller makes sure neither parameter is NULL */
1841 addr2netmask(char *prefixstr
, int maxprefixlen
, uchar_t
*maskstr
)
1845 prefixlen
= atoi(prefixstr
);
1846 if (prefixlen
< 0 || prefixlen
> maxprefixlen
)
1848 while (prefixlen
> 0) {
1849 if (prefixlen
>= 8) {
1854 *maskstr
|= 1 << (8 - prefixlen
);
1861 * Tear down all interfaces belonging to the given zone. This should
1862 * be called with the zone in a state other than "running", so that
1863 * interfaces can't be assigned to the zone after this returns.
1865 * If anything goes wrong, log an error message and return an error.
1868 unconfigure_shared_network_interfaces(zlog_t
*zlogp
, zoneid_t zone_id
)
1871 struct lifconf lifc
;
1872 struct lifreq
*lifrp
, lifrl
;
1873 int64_t lifc_flags
= LIFC_NOXMIT
| LIFC_ALLZONES
;
1874 int num_ifs
, s
, i
, ret_code
= 0;
1878 if ((s
= socket(AF_INET
, SOCK_DGRAM
, 0)) < 0) {
1879 zerror(zlogp
, B_TRUE
, "could not get socket");
1883 lifn
.lifn_family
= AF_UNSPEC
;
1884 lifn
.lifn_flags
= (int)lifc_flags
;
1885 if (ioctl(s
, SIOCGLIFNUM
, (char *)&lifn
) < 0) {
1886 zerror(zlogp
, B_TRUE
,
1887 "could not determine number of network interfaces");
1891 num_ifs
= lifn
.lifn_count
;
1892 bufsize
= num_ifs
* sizeof (struct lifreq
);
1893 if ((buf
= malloc(bufsize
)) == NULL
) {
1894 zerror(zlogp
, B_TRUE
, "memory allocation failed");
1898 lifc
.lifc_family
= AF_UNSPEC
;
1899 lifc
.lifc_flags
= (int)lifc_flags
;
1900 lifc
.lifc_len
= bufsize
;
1901 lifc
.lifc_buf
= buf
;
1902 if (ioctl(s
, SIOCGLIFCONF
, (char *)&lifc
) < 0) {
1903 zerror(zlogp
, B_TRUE
, "could not get configured network "
1908 lifrp
= lifc
.lifc_req
;
1909 for (i
= lifc
.lifc_len
/ sizeof (struct lifreq
); i
> 0; i
--, lifrp
++) {
1911 if ((s
= socket(lifrp
->lifr_addr
.ss_family
, SOCK_DGRAM
, 0)) <
1913 zerror(zlogp
, B_TRUE
, "%s: could not get socket",
1918 (void) memset(&lifrl
, 0, sizeof (lifrl
));
1919 (void) strncpy(lifrl
.lifr_name
, lifrp
->lifr_name
,
1920 sizeof (lifrl
.lifr_name
));
1921 if (ioctl(s
, SIOCGLIFZONE
, (caddr_t
)&lifrl
) < 0) {
1924 * Interface may have been removed by admin or
1925 * another zone halting.
1928 zerror(zlogp
, B_TRUE
,
1929 "%s: could not determine the zone to which this "
1930 "network interface is bound", lifrl
.lifr_name
);
1934 if (lifrl
.lifr_zoneid
== zone_id
) {
1935 if (ioctl(s
, SIOCLIFREMOVEIF
, (caddr_t
)&lifrl
) < 0) {
1936 zerror(zlogp
, B_TRUE
,
1937 "%s: could not remove network interface",
1952 static union sockunion
{
1954 struct sockaddr_in sin
;
1955 struct sockaddr_dl sdl
;
1956 struct sockaddr_in6 sin6
;
1960 struct rt_msghdr hdr
;
1965 salen(struct sockaddr
*sa
)
1967 switch (sa
->sa_family
) {
1969 return (sizeof (struct sockaddr_in
));
1971 return (sizeof (struct sockaddr_dl
));
1973 return (sizeof (struct sockaddr_in6
));
1975 return (sizeof (struct sockaddr
));
1979 #define ROUNDUP_LONG(a) \
1980 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
1983 * Look up which zone is using a given IP address. The address in question
1984 * is expected to have been stuffed into the structure to which lifr points
1985 * via a previous SIOCGLIFADDR ioctl().
1987 * This is done using black router socket magic.
1989 * Return the name of the zone on success or NULL on failure.
1991 * This is a lot of code for a simple task; a new ioctl request to take care
1992 * of this might be a useful RFE.
1996 who_is_using(zlog_t
*zlogp
, struct lifreq
*lifr
)
1998 static char answer
[ZONENAME_MAX
];
2001 char *cp
= rtmsg
.space
;
2002 struct sockaddr_dl
*ifp
= NULL
;
2003 struct sockaddr
*sa
;
2004 char save_if_name
[LIFNAMSIZ
];
2009 if ((s
= socket(PF_ROUTE
, SOCK_RAW
, 0)) < 0) {
2010 zerror(zlogp
, B_TRUE
, "could not get routing socket");
2014 if (lifr
->lifr_addr
.ss_family
== AF_INET
) {
2015 struct sockaddr_in
*sin4
;
2017 so_dst
.sa
.sa_family
= AF_INET
;
2018 sin4
= (struct sockaddr_in
*)&lifr
->lifr_addr
;
2019 so_dst
.sin
.sin_addr
= sin4
->sin_addr
;
2021 struct sockaddr_in6
*sin6
;
2023 so_dst
.sa
.sa_family
= AF_INET6
;
2024 sin6
= (struct sockaddr_in6
*)&lifr
->lifr_addr
;
2025 so_dst
.sin6
.sin6_addr
= sin6
->sin6_addr
;
2028 so_ifp
.sa
.sa_family
= AF_LINK
;
2030 (void) memset(&rtmsg
, 0, sizeof (rtmsg
));
2031 rtmsg
.hdr
.rtm_type
= RTM_GET
;
2032 rtmsg
.hdr
.rtm_flags
= RTF_UP
| RTF_HOST
;
2033 rtmsg
.hdr
.rtm_version
= RTM_VERSION
;
2034 rtmsg
.hdr
.rtm_seq
= ++rts_seqno
;
2035 rtmsg
.hdr
.rtm_addrs
= RTA_IFP
| RTA_DST
;
2037 l
= ROUNDUP_LONG(salen(&so_dst
.sa
));
2038 (void) memmove(cp
, &(so_dst
), l
);
2040 l
= ROUNDUP_LONG(salen(&so_ifp
.sa
));
2041 (void) memmove(cp
, &(so_ifp
), l
);
2044 rtmsg
.hdr
.rtm_msglen
= l
= cp
- (char *)&rtmsg
;
2046 if ((rlen
= write(s
, &rtmsg
, l
)) < 0) {
2047 zerror(zlogp
, B_TRUE
, "writing to routing socket");
2049 } else if (rlen
< (int)rtmsg
.hdr
.rtm_msglen
) {
2050 zerror(zlogp
, B_TRUE
,
2051 "write to routing socket got only %d for len\n", rlen
);
2055 l
= read(s
, &rtmsg
, sizeof (rtmsg
));
2056 } while (l
> 0 && (rtmsg
.hdr
.rtm_seq
!= rts_seqno
||
2057 rtmsg
.hdr
.rtm_pid
!= pid
));
2059 zerror(zlogp
, B_TRUE
, "reading from routing socket");
2063 if (rtmsg
.hdr
.rtm_version
!= RTM_VERSION
) {
2064 zerror(zlogp
, B_FALSE
,
2065 "routing message version %d not understood",
2066 rtmsg
.hdr
.rtm_version
);
2069 if (rtmsg
.hdr
.rtm_msglen
!= (ushort_t
)l
) {
2070 zerror(zlogp
, B_FALSE
, "message length mismatch, "
2071 "expected %d bytes, returned %d bytes",
2072 rtmsg
.hdr
.rtm_msglen
, l
);
2075 if (rtmsg
.hdr
.rtm_errno
!= 0) {
2076 errno
= rtmsg
.hdr
.rtm_errno
;
2077 zerror(zlogp
, B_TRUE
, "RTM_GET routing socket message");
2080 if ((rtmsg
.hdr
.rtm_addrs
& RTA_IFP
) == 0) {
2081 zerror(zlogp
, B_FALSE
, "network interface not found");
2084 cp
= ((char *)(&rtmsg
.hdr
+ 1));
2085 for (i
= 1; i
!= 0; i
<<= 1) {
2086 /* LINTED E_BAD_PTR_CAST_ALIGN */
2087 sa
= (struct sockaddr
*)cp
;
2089 if ((i
& rtmsg
.hdr
.rtm_addrs
) != 0)
2090 cp
+= ROUNDUP_LONG(salen(sa
));
2093 if (sa
->sa_family
== AF_LINK
&&
2094 ((struct sockaddr_dl
*)sa
)->sdl_nlen
!= 0)
2095 ifp
= (struct sockaddr_dl
*)sa
;
2099 zerror(zlogp
, B_FALSE
, "network interface could not be "
2105 * We need to set the I/F name to what we got above, then do the
2106 * appropriate ioctl to get its zone name. But lifr->lifr_name is
2107 * used by the calling function to do a REMOVEIF, so if we leave the
2108 * "good" zone's I/F name in place, *that* I/F will be removed instead
2109 * of the bad one. So we save the old (bad) I/F name before over-
2110 * writing it and doing the ioctl, then restore it after the ioctl.
2112 (void) strlcpy(save_if_name
, lifr
->lifr_name
, sizeof (save_if_name
));
2113 (void) strncpy(lifr
->lifr_name
, ifp
->sdl_data
, ifp
->sdl_nlen
);
2114 lifr
->lifr_name
[ifp
->sdl_nlen
] = '\0';
2115 i
= ioctl(s
, SIOCGLIFZONE
, lifr
);
2116 (void) strlcpy(lifr
->lifr_name
, save_if_name
, sizeof (save_if_name
));
2118 zerror(zlogp
, B_TRUE
,
2119 "%s: could not determine the zone network interface "
2120 "belongs to", lifr
->lifr_name
);
2123 if (getzonenamebyid(lifr
->lifr_zoneid
, answer
, sizeof (answer
)) < 0)
2124 (void) snprintf(answer
, sizeof (answer
), "%d",
2127 if (strlen(answer
) > 0)
2133 * Configures a single interface: a new virtual interface is added, based on
2134 * the physical interface nwiftabptr->zone_nwif_physical, with the address
2135 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that
2136 * the "address" can be an IPv6 address (with a /prefixlength required), an
2137 * IPv4 address (with a /prefixlength optional), or a name; for the latter,
2138 * an IPv4 name-to-address resolution will be attempted.
2140 * If anything goes wrong, we log an detailed error message, attempt to tear
2141 * down whatever we set up and return an error.
2144 configure_one_interface(zlog_t
*zlogp
, zoneid_t zone_id
,
2145 struct zone_nwiftab
*nwiftabptr
)
2148 struct sockaddr_in netmask4
;
2149 struct sockaddr_in6 netmask6
;
2150 struct sockaddr_storage laddr
;
2153 char *slashp
= strchr(nwiftabptr
->zone_nwif_address
, '/');
2155 boolean_t got_netmask
= B_FALSE
;
2156 boolean_t is_loopback
= B_FALSE
;
2157 char addrstr4
[INET_ADDRSTRLEN
];
2160 res
= zonecfg_valid_net_address(nwiftabptr
->zone_nwif_address
, &lifr
);
2162 zerror(zlogp
, B_FALSE
, "%s: %s", zonecfg_strerror(res
),
2163 nwiftabptr
->zone_nwif_address
);
2166 af
= lifr
.lifr_addr
.ss_family
;
2168 in4
= ((struct sockaddr_in
*)(&lifr
.lifr_addr
))->sin_addr
;
2169 if ((s
= socket(af
, SOCK_DGRAM
, 0)) < 0) {
2170 zerror(zlogp
, B_TRUE
, "could not get socket");
2175 * This is a similar kind of "hack" like in addif() to get around
2176 * the problem of SIOCLIFADDIF. The problem is that this ioctl
2177 * does not include the netmask when adding a logical interface.
2178 * To get around this problem, we first add the logical interface
2179 * with a 0 address. After that, we set the netmask if provided.
2180 * Finally we set the interface address.
2182 laddr
= lifr
.lifr_addr
;
2183 (void) strlcpy(lifr
.lifr_name
, nwiftabptr
->zone_nwif_physical
,
2184 sizeof (lifr
.lifr_name
));
2185 (void) memset(&lifr
.lifr_addr
, 0, sizeof (lifr
.lifr_addr
));
2187 if (ioctl(s
, SIOCLIFADDIF
, (caddr_t
)&lifr
) < 0) {
2189 * Here, we know that the interface can't be brought up.
2190 * A similar warning message was already printed out to
2191 * the console by zoneadm(1M) so instead we log the
2192 * message to syslog and continue.
2194 zerror(&logsys
, B_TRUE
, "WARNING: skipping network interface "
2195 "'%s' which may not be present/plumbed in the "
2196 "global zone.", lifr
.lifr_name
);
2201 /* Preserve literal IPv4 address for later potential printing. */
2203 (void) inet_ntop(AF_INET
, &in4
, addrstr4
, INET_ADDRSTRLEN
);
2205 lifr
.lifr_zoneid
= zone_id
;
2206 if (ioctl(s
, SIOCSLIFZONE
, (caddr_t
)&lifr
) < 0) {
2207 zerror(zlogp
, B_TRUE
, "%s: could not place network interface "
2208 "into zone", lifr
.lifr_name
);
2213 * Loopback interface will use the default netmask assigned, if no
2216 if (strcmp(nwiftabptr
->zone_nwif_physical
, "lo0") == 0) {
2217 is_loopback
= B_TRUE
;
2219 if (af
== AF_INET
) {
2221 * The IPv4 netmask can be determined either
2222 * directly if a prefix length was supplied with
2223 * the address or via the netmasks database. Not
2224 * being able to determine it is a common failure,
2225 * but it often is not fatal to operation of the
2226 * interface. In that case, a warning will be
2227 * printed after the rest of the interface's
2228 * parameters have been configured.
2230 (void) memset(&netmask4
, 0, sizeof (netmask4
));
2231 if (slashp
!= NULL
) {
2232 if (addr2netmask(slashp
+ 1, V4_ADDR_LEN
,
2233 (uchar_t
*)&netmask4
.sin_addr
) != 0) {
2235 zerror(zlogp
, B_FALSE
,
2236 "%s: invalid prefix length in %s",
2238 nwiftabptr
->zone_nwif_address
);
2241 got_netmask
= B_TRUE
;
2242 } else if (getnetmaskbyaddr(in4
,
2243 &netmask4
.sin_addr
) == 0) {
2244 got_netmask
= B_TRUE
;
2247 netmask4
.sin_family
= af
;
2248 (void) memcpy(&lifr
.lifr_addr
, &netmask4
,
2252 (void) memset(&netmask6
, 0, sizeof (netmask6
));
2253 if (addr2netmask(slashp
+ 1, V6_ADDR_LEN
,
2254 (uchar_t
*)&netmask6
.sin6_addr
) != 0) {
2256 zerror(zlogp
, B_FALSE
,
2257 "%s: invalid prefix length in %s",
2259 nwiftabptr
->zone_nwif_address
);
2262 got_netmask
= B_TRUE
;
2263 netmask6
.sin6_family
= af
;
2264 (void) memcpy(&lifr
.lifr_addr
, &netmask6
,
2268 ioctl(s
, SIOCSLIFNETMASK
, (caddr_t
)&lifr
) < 0) {
2269 zerror(zlogp
, B_TRUE
, "%s: could not set netmask",
2274 /* Set the interface address */
2275 lifr
.lifr_addr
= laddr
;
2276 if (ioctl(s
, SIOCSLIFADDR
, (caddr_t
)&lifr
) < 0) {
2277 zerror(zlogp
, B_TRUE
,
2278 "%s: could not set IP address to %s",
2279 lifr
.lifr_name
, nwiftabptr
->zone_nwif_address
);
2283 if (ioctl(s
, SIOCGLIFFLAGS
, (caddr_t
)&lifr
) < 0) {
2284 zerror(zlogp
, B_TRUE
, "%s: could not get flags",
2288 lifr
.lifr_flags
|= IFF_UP
;
2289 if (ioctl(s
, SIOCSLIFFLAGS
, (caddr_t
)&lifr
) < 0) {
2290 int save_errno
= errno
;
2294 * If we failed with something other than EADDRNOTAVAIL,
2295 * then skip to the end. Otherwise, look up our address,
2296 * then call a function to determine which zone is already
2297 * using that address.
2299 if (errno
!= EADDRNOTAVAIL
) {
2300 zerror(zlogp
, B_TRUE
,
2301 "%s: could not bring network interface up",
2305 if (ioctl(s
, SIOCGLIFADDR
, (caddr_t
)&lifr
) < 0) {
2306 zerror(zlogp
, B_TRUE
, "%s: could not get address",
2310 zone_using
= who_is_using(zlogp
, &lifr
);
2312 if (zone_using
== NULL
)
2313 zerror(zlogp
, B_TRUE
,
2314 "%s: could not bring network interface up",
2317 zerror(zlogp
, B_TRUE
, "%s: could not bring network "
2318 "interface up: address in use by zone '%s'",
2319 lifr
.lifr_name
, zone_using
);
2323 if (!got_netmask
&& !is_loopback
) {
2325 * A common, but often non-fatal problem, is that the system
2326 * cannot find the netmask for an interface address. This is
2327 * often caused by it being only in /etc/inet/netmasks, but
2328 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
2329 * in that. This doesn't show up at boot because the netmask
2330 * is obtained from /etc/inet/netmasks when no network
2331 * interfaces are up, but isn't consulted when NIS/NIS+ is
2332 * available. We warn the user here that something like this
2333 * has happened and we're just running with a default and
2334 * possible incorrect netmask.
2336 char buffer
[INET6_ADDRSTRLEN
];
2338 const char *nomatch
= "no matching subnet found in netmasks(4)";
2341 addr
= &((struct sockaddr_in
*)
2342 (&lifr
.lifr_addr
))->sin_addr
;
2344 addr
= &((struct sockaddr_in6
*)
2345 (&lifr
.lifr_addr
))->sin6_addr
;
2348 * Find out what netmask the interface is going to be using.
2349 * If we just brought up an IPMP data address on an underlying
2350 * interface above, the address will have already migrated, so
2351 * the SIOCGLIFNETMASK won't be able to find it (but we need
2352 * to bring the address up to get the actual netmask). Just
2353 * omit printing the actual netmask in this corner-case.
2355 if (ioctl(s
, SIOCGLIFNETMASK
, (caddr_t
)&lifr
) < 0 ||
2356 inet_ntop(af
, addr
, buffer
, sizeof (buffer
)) == NULL
) {
2357 zerror(zlogp
, B_FALSE
, "WARNING: %s; using default.",
2360 zerror(zlogp
, B_FALSE
,
2361 "WARNING: %s: %s: %s; using default of %s.",
2362 lifr
.lifr_name
, nomatch
, addrstr4
, buffer
);
2367 * If a default router was specified for this interface
2368 * set the route now. Ignore if already set.
2370 if (strlen(nwiftabptr
->zone_nwif_defrouter
) > 0) {
2377 argv
[3] = nwiftabptr
->zone_nwif_physical
;
2378 argv
[4] = "default";
2379 argv
[5] = nwiftabptr
->zone_nwif_defrouter
;
2382 status
= forkexec(zlogp
, "/usr/sbin/route", argv
);
2383 if (status
!= 0 && status
!= EEXIST
)
2384 zerror(zlogp
, B_FALSE
, "Unable to set route for "
2385 "interface %s to %s\n",
2386 nwiftabptr
->zone_nwif_physical
,
2387 nwiftabptr
->zone_nwif_defrouter
);
2393 (void) ioctl(s
, SIOCLIFREMOVEIF
, (caddr_t
)&lifr
);
2399 * Sets up network interfaces based on information from the zone configuration.
2400 * IPv4 and IPv6 loopback interfaces are set up "for free", modeling the global
2403 * If anything goes wrong, we log a general error message, attempt to tear down
2404 * whatever we set up, and return an error.
2407 configure_shared_network_interfaces(zlog_t
*zlogp
)
2409 zone_dochandle_t handle
;
2410 struct zone_nwiftab nwiftab
, loopback_iftab
;
2413 if ((zoneid
= getzoneidbyname(zone_name
)) == ZONE_ID_UNDEFINED
) {
2414 zerror(zlogp
, B_TRUE
, "unable to get zoneid");
2418 if ((handle
= zonecfg_init_handle()) == NULL
) {
2419 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
2422 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
2423 zerror(zlogp
, B_FALSE
, "invalid configuration");
2424 zonecfg_fini_handle(handle
);
2427 if (zonecfg_setnwifent(handle
) == Z_OK
) {
2429 if (zonecfg_getnwifent(handle
, &nwiftab
) != Z_OK
)
2431 if (configure_one_interface(zlogp
, zoneid
, &nwiftab
) !=
2433 (void) zonecfg_endnwifent(handle
);
2434 zonecfg_fini_handle(handle
);
2438 (void) zonecfg_endnwifent(handle
);
2440 zonecfg_fini_handle(handle
);
2441 if (is_system_labeled()) {
2443 * Labeled zones share the loopback interface
2444 * so it is not plumbed for shared stack instances.
2448 (void) strlcpy(loopback_iftab
.zone_nwif_physical
, "lo0",
2449 sizeof (loopback_iftab
.zone_nwif_physical
));
2450 (void) strlcpy(loopback_iftab
.zone_nwif_address
, "127.0.0.1",
2451 sizeof (loopback_iftab
.zone_nwif_address
));
2452 loopback_iftab
.zone_nwif_defrouter
[0] = '\0';
2453 if (configure_one_interface(zlogp
, zoneid
, &loopback_iftab
) != Z_OK
)
2456 /* Always plumb up the IPv6 loopback interface. */
2457 (void) strlcpy(loopback_iftab
.zone_nwif_address
, "::1/128",
2458 sizeof (loopback_iftab
.zone_nwif_address
));
2459 if (configure_one_interface(zlogp
, zoneid
, &loopback_iftab
) != Z_OK
)
2465 zdlerror(zlog_t
*zlogp
, dladm_status_t err
, const char *dlname
, const char *str
)
2467 char errmsg
[DLADM_STRSIZE
];
2469 (void) dladm_status2str(err
, errmsg
);
2470 zerror(zlogp
, B_FALSE
, "%s '%s': %s", str
, dlname
, errmsg
);
2474 add_datalink(zlog_t
*zlogp
, char *zone_name
, datalink_id_t linkid
, char *dlname
)
2477 boolean_t cpuset
, poolset
;
2480 /* First check if it's in use by global zone. */
2481 if (zonecfg_ifname_exists(AF_INET
, dlname
) ||
2482 zonecfg_ifname_exists(AF_INET6
, dlname
)) {
2483 zerror(zlogp
, B_FALSE
, "WARNING: skipping network interface "
2484 "'%s' which is used in the global zone", dlname
);
2488 /* Set zoneid of this link. */
2489 err
= dladm_set_linkprop(dld_handle
, linkid
, "zone", &zone_name
, 1,
2491 if (err
!= DLADM_STATUS_OK
) {
2492 zdlerror(zlogp
, err
, dlname
,
2493 "WARNING: unable to add network interface");
2498 * Set the pool of this link if the zone has a pool and
2499 * neither the cpus nor the pool datalink property is
2502 err
= dladm_linkprop_is_set(dld_handle
, linkid
, DLADM_PROP_VAL_CURRENT
,
2504 if (err
!= DLADM_STATUS_OK
) {
2505 zdlerror(zlogp
, err
, dlname
,
2506 "WARNING: unable to check if cpus link property is set");
2508 err
= dladm_linkprop_is_set(dld_handle
, linkid
, DLADM_PROP_VAL_CURRENT
,
2510 if (err
!= DLADM_STATUS_OK
) {
2511 zdlerror(zlogp
, err
, dlname
,
2512 "WARNING: unable to check if pool link property is set");
2515 if ((strlen(pool_name
) != 0) && !cpuset
&& !poolset
) {
2517 err
= dladm_set_linkprop(dld_handle
, linkid
, "pool",
2518 &poolp
, 1, DLADM_OPT_ACTIVE
);
2519 if (err
!= DLADM_STATUS_OK
) {
2520 zerror(zlogp
, B_FALSE
, "WARNING: unable to set "
2521 "pool %s to datalink %s", pool_name
, dlname
);
2522 bzero(pool_name
, sizeof (pool_name
));
2525 bzero(pool_name
, sizeof (pool_name
));
2531 sockaddr_to_str(sa_family_t af
, const struct sockaddr
*sockaddr
,
2532 char *straddr
, size_t len
)
2534 struct sockaddr_in
*sin
;
2535 struct sockaddr_in6
*sin6
;
2536 const char *str
= NULL
;
2538 if (af
== AF_INET
) {
2539 /* LINTED E_BAD_PTR_CAST_ALIGN */
2540 sin
= SIN(sockaddr
);
2541 str
= inet_ntop(AF_INET
, (void *)&sin
->sin_addr
, straddr
, len
);
2542 } else if (af
== AF_INET6
) {
2543 /* LINTED E_BAD_PTR_CAST_ALIGN */
2544 sin6
= SIN6(sockaddr
);
2545 str
= inet_ntop(AF_INET6
, (void *)&sin6
->sin6_addr
, straddr
,
2549 return (str
!= NULL
);
2553 ipv4_prefixlen(struct sockaddr_in
*sin
)
2555 struct sockaddr_in
*m
;
2556 struct sockaddr_storage mask
;
2559 m
->sin_family
= AF_INET
;
2560 if (getnetmaskbyaddr(sin
->sin_addr
, &m
->sin_addr
) == 0) {
2561 return (mask2plen((struct sockaddr
*)&mask
));
2562 } else if (IN_CLASSA(htonl(sin
->sin_addr
.s_addr
))) {
2564 } else if (IN_CLASSB(ntohl(sin
->sin_addr
.s_addr
))) {
2566 } else if (IN_CLASSC(ntohl(sin
->sin_addr
.s_addr
))) {
2573 zone_setattr_network(int type
, zoneid_t zoneid
, datalink_id_t linkid
,
2574 void *buf
, size_t bufsize
)
2576 zone_net_data_t
*zndata
;
2580 znsize
= sizeof (*zndata
) + bufsize
;
2581 zndata
= calloc(1, znsize
);
2584 zndata
->zn_type
= type
;
2585 zndata
->zn_len
= bufsize
;
2586 zndata
->zn_linkid
= linkid
;
2587 bcopy(buf
, zndata
->zn_val
, zndata
->zn_len
);
2588 err
= zone_setattr(zoneid
, ZONE_ATTR_NETWORK
, zndata
, znsize
);
2594 add_net_for_linkid(zlog_t
*zlogp
, zoneid_t zoneid
, zone_addr_list_t
*start
)
2597 char **astr
, *address
;
2598 dladm_status_t dlstatus
;
2599 char *ip_nospoof
= "ip-nospoof";
2600 int nnet
, naddr
, err
= 0, j
;
2601 size_t zlen
, cpleft
;
2602 zone_addr_list_t
*ptr
, *end
;
2603 char tmp
[INET6_ADDRSTRLEN
], *maskstr
;
2605 struct in6_addr
*routes
= NULL
;
2607 datalink_id_t linkid
;
2609 assert(start
!= NULL
);
2610 naddr
= 0; /* number of addresses */
2611 nnet
= 0; /* number of net resources */
2612 linkid
= start
->za_linkid
;
2613 for (ptr
= start
; ptr
!= NULL
&& ptr
->za_linkid
== linkid
;
2614 ptr
= ptr
->za_next
) {
2618 zlen
= nnet
* (INET6_ADDRSTRLEN
+ 1);
2619 astr
= calloc(1, nnet
* sizeof (uintptr_t));
2620 zaddr
= calloc(1, zlen
);
2621 if (astr
== NULL
|| zaddr
== NULL
) {
2628 for (ptr
= start
; ptr
!= end
; ptr
= ptr
->za_next
) {
2629 address
= ptr
->za_nwiftab
.zone_nwif_allowed_address
;
2630 if (address
[0] == '\0')
2632 (void) snprintf(tmp
, sizeof (tmp
), "%s", address
);
2634 * Validate the data. zonecfg_valid_net_address() clobbers
2635 * the /<mask> in the address string.
2637 if (zonecfg_valid_net_address(address
, &lifr
) != Z_OK
) {
2638 zerror(zlogp
, B_FALSE
, "invalid address [%s]\n",
2644 * convert any hostnames to numeric address strings.
2646 if (!sockaddr_to_str(lifr
.lifr_addr
.ss_family
,
2647 (const struct sockaddr
*)&lifr
.lifr_addr
, cp
, cpleft
)) {
2652 * make a copy of the numeric string for the data needed
2653 * by the "allowed-ips" datalink property.
2655 astr
[j
] = strdup(cp
);
2656 if (astr
[j
] == NULL
) {
2662 * compute the default netmask from the address, if necessary
2664 if ((maskstr
= strchr(tmp
, '/')) == NULL
) {
2667 if (lifr
.lifr_addr
.ss_family
== AF_INET
) {
2668 prefixlen
= ipv4_prefixlen(
2669 SIN(&lifr
.lifr_addr
));
2671 struct sockaddr_in6
*sin6
;
2673 sin6
= SIN6(&lifr
.lifr_addr
);
2674 if (IN6_IS_ADDR_LINKLOCAL(&sin6
->sin6_addr
))
2679 (void) snprintf(tmp
, sizeof (tmp
), "%d", prefixlen
);
2684 /* append the "/<netmask>" */
2685 (void) strlcat(cp
, "/", cpleft
);
2686 (void) strlcat(cp
, maskstr
, cpleft
);
2687 (void) strlcat(cp
, ",", cpleft
);
2688 cp
+= strnlen(cp
, zlen
);
2689 cpleft
= &zaddr
[INET6_ADDRSTRLEN
] - cp
;
2691 naddr
= j
; /* the actual number of addresses in the net resource */
2692 assert(naddr
<= nnet
);
2695 * zonecfg has already verified that the defrouter property can only
2696 * be set if there is at least one address defined for the net resource.
2697 * If j is 0, there are no addresses defined, and therefore no routers
2698 * to configure, and we are done at that point.
2703 /* over-write last ',' with '\0' */
2704 zaddr
[strnlen(zaddr
, zlen
) + 1] = '\0';
2707 * First make sure L3 protection is not already set on the link.
2709 dlstatus
= dladm_linkprop_is_set(dld_handle
, linkid
, DLADM_OPT_ACTIVE
,
2710 "protection", &is_set
);
2711 if (dlstatus
!= DLADM_STATUS_OK
) {
2713 zerror(zlogp
, B_FALSE
, "unable to check if protection is set");
2718 zerror(zlogp
, B_FALSE
, "Protection is already set");
2721 dlstatus
= dladm_linkprop_is_set(dld_handle
, linkid
, DLADM_OPT_ACTIVE
,
2722 "allowed-ips", &is_set
);
2723 if (dlstatus
!= DLADM_STATUS_OK
) {
2725 zerror(zlogp
, B_FALSE
, "unable to check if allowed-ips is set");
2729 zerror(zlogp
, B_FALSE
, "allowed-ips is already set");
2735 * Enable ip-nospoof for the link, and add address to the allowed-ips
2738 dlstatus
= dladm_set_linkprop(dld_handle
, linkid
, "protection",
2739 &ip_nospoof
, 1, DLADM_OPT_ACTIVE
);
2740 if (dlstatus
!= DLADM_STATUS_OK
) {
2741 zerror(zlogp
, B_FALSE
, "could not set protection\n");
2745 dlstatus
= dladm_set_linkprop(dld_handle
, linkid
, "allowed-ips",
2746 astr
, naddr
, DLADM_OPT_ACTIVE
);
2747 if (dlstatus
!= DLADM_STATUS_OK
) {
2748 zerror(zlogp
, B_FALSE
, "could not set allowed-ips\n");
2753 /* now set the address in the data-store */
2754 err
= zone_setattr_network(ZONE_NETWORK_ADDRESS
, zoneid
, linkid
,
2755 zaddr
, strnlen(zaddr
, zlen
) + 1);
2760 * add the defaultrouters
2762 routes
= calloc(1, nnet
* sizeof (*routes
));
2764 for (ptr
= start
; ptr
!= end
; ptr
= ptr
->za_next
) {
2765 address
= ptr
->za_nwiftab
.zone_nwif_defrouter
;
2766 if (address
[0] == '\0')
2768 if (strchr(address
, '/') == NULL
&& strchr(address
, ':') != 0) {
2770 * zonecfg_valid_net_address() expects numeric IPv6
2771 * addresses to have a CIDR format netmask.
2773 (void) snprintf(tmp
, sizeof (tmp
), "/%d", V6_ADDR_LEN
);
2774 (void) strlcat(address
, tmp
, INET6_ADDRSTRLEN
);
2776 if (zonecfg_valid_net_address(address
, &lifr
) != Z_OK
) {
2777 zerror(zlogp
, B_FALSE
,
2778 "invalid router [%s]\n", address
);
2782 if (lifr
.lifr_addr
.ss_family
== AF_INET6
) {
2783 routes
[j
] = SIN6(&lifr
.lifr_addr
)->sin6_addr
;
2785 IN6_INADDR_TO_V4MAPPED(&SIN(&lifr
.lifr_addr
)->sin_addr
,
2792 err
= zone_setattr_network(ZONE_NETWORK_DEFROUTER
, zoneid
,
2793 linkid
, routes
, j
* sizeof (*routes
));
2797 for (j
= 0; j
< naddr
; j
++)
2806 add_net(zlog_t
*zlogp
, zoneid_t zoneid
, zone_addr_list_t
*zalist
)
2808 zone_addr_list_t
*ptr
;
2809 datalink_id_t linkid
;
2815 linkid
= zalist
->za_linkid
;
2817 err
= add_net_for_linkid(zlogp
, zoneid
, zalist
);
2821 for (ptr
= zalist
; ptr
!= NULL
; ptr
= ptr
->za_next
) {
2822 if (ptr
->za_linkid
== linkid
)
2824 linkid
= ptr
->za_linkid
;
2825 err
= add_net_for_linkid(zlogp
, zoneid
, ptr
);
2833 * Add "new" to the list of network interfaces to be configured by
2834 * add_net on zone boot in "old". The list of interfaces in "old" is
2835 * sorted by datalink_id_t, with interfaces sorted FIFO for a given
2838 * Returns the merged list of IP interfaces containing "old" and "new"
2840 static zone_addr_list_t
*
2841 add_ip_interface(zone_addr_list_t
*old
, zone_addr_list_t
*new)
2843 zone_addr_list_t
*ptr
, *next
;
2844 datalink_id_t linkid
= new->za_linkid
;
2850 for (ptr
= old
; ptr
!= NULL
; ptr
= ptr
->za_next
) {
2851 if (ptr
->za_linkid
== linkid
)
2855 /* linkid does not already exist, add to the beginning */
2860 * adding to the middle of the list; ptr points at the first
2861 * occurrence of linkid. Find the last occurrence.
2863 while ((next
= ptr
->za_next
) != NULL
) {
2864 if (next
->za_linkid
!= linkid
)
2868 /* insert new after ptr */
2869 new->za_next
= next
;
2875 free_ip_interface(zone_addr_list_t
*zalist
)
2877 zone_addr_list_t
*ptr
, *new;
2879 for (ptr
= zalist
; ptr
!= NULL
; ) {
2887 * Add the kernel access control information for the interface names.
2888 * If anything goes wrong, we log a general error message, attempt to tear down
2889 * whatever we set up, and return an error.
2892 configure_exclusive_network_interfaces(zlog_t
*zlogp
, zoneid_t zoneid
)
2894 zone_dochandle_t handle
;
2895 struct zone_nwiftab nwiftab
;
2896 char rootpath
[MAXPATHLEN
];
2897 char path
[MAXPATHLEN
];
2898 datalink_id_t linkid
;
2899 di_prof_t prof
= NULL
;
2900 boolean_t added
= B_FALSE
;
2901 zone_addr_list_t
*zalist
= NULL
, *new;
2903 if ((handle
= zonecfg_init_handle()) == NULL
) {
2904 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
2907 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
2908 zerror(zlogp
, B_FALSE
, "invalid configuration");
2909 zonecfg_fini_handle(handle
);
2913 if (zonecfg_setnwifent(handle
) != Z_OK
) {
2914 zonecfg_fini_handle(handle
);
2919 if (zonecfg_getnwifent(handle
, &nwiftab
) != Z_OK
)
2923 if (zone_get_devroot(zone_name
, rootpath
,
2924 sizeof (rootpath
)) != Z_OK
) {
2925 (void) zonecfg_endnwifent(handle
);
2926 zonecfg_fini_handle(handle
);
2927 zerror(zlogp
, B_TRUE
,
2928 "unable to determine dev root");
2931 (void) snprintf(path
, sizeof (path
), "%s%s", rootpath
,
2933 if (di_prof_init(path
, &prof
) != 0) {
2934 (void) zonecfg_endnwifent(handle
);
2935 zonecfg_fini_handle(handle
);
2936 zerror(zlogp
, B_TRUE
,
2937 "failed to initialize profile");
2943 * Create the /dev entry for backward compatibility.
2944 * Only create the /dev entry if it's not in use.
2945 * Note that the zone still boots when the assigned
2946 * interface is inaccessible, used by others, etc.
2947 * Also, when vanity naming is used, some interface do
2948 * do not have corresponding /dev node names (for example,
2949 * vanity named aggregations). The /dev entry is not
2950 * created in that case. The /dev/net entry is always
2953 if (dladm_name2info(dld_handle
, nwiftab
.zone_nwif_physical
,
2954 &linkid
, NULL
, NULL
, NULL
) == DLADM_STATUS_OK
&&
2955 add_datalink(zlogp
, zone_name
, linkid
,
2956 nwiftab
.zone_nwif_physical
) == 0) {
2959 (void) zonecfg_endnwifent(handle
);
2960 zonecfg_fini_handle(handle
);
2961 zerror(zlogp
, B_TRUE
, "failed to add network device");
2964 /* set up the new IP interface, and add them all later */
2965 new = malloc(sizeof (*new));
2967 zerror(zlogp
, B_TRUE
, "no memory for %s",
2968 nwiftab
.zone_nwif_physical
);
2969 zonecfg_fini_handle(handle
);
2970 free_ip_interface(zalist
);
2972 bzero(new, sizeof (*new));
2973 new->za_nwiftab
= nwiftab
;
2974 new->za_linkid
= linkid
;
2975 zalist
= add_ip_interface(zalist
, new);
2977 if (zalist
!= NULL
) {
2978 if ((errno
= add_net(zlogp
, zoneid
, zalist
)) != 0) {
2979 (void) zonecfg_endnwifent(handle
);
2980 zonecfg_fini_handle(handle
);
2981 zerror(zlogp
, B_TRUE
, "failed to add address");
2982 free_ip_interface(zalist
);
2985 free_ip_interface(zalist
);
2987 (void) zonecfg_endnwifent(handle
);
2988 zonecfg_fini_handle(handle
);
2990 if (prof
!= NULL
&& added
) {
2991 if (di_prof_commit(prof
) != 0) {
2992 zerror(zlogp
, B_TRUE
, "failed to commit profile");
3003 remove_datalink_pool(zlog_t
*zlogp
, zoneid_t zoneid
)
3006 zone_iptype_t iptype
;
3008 datalink_id_t
*dllink
, *dllinks
= NULL
;
3011 if (strlen(pool_name
) == 0)
3014 if (zone_getattr(zoneid
, ZONE_ATTR_FLAGS
, &flags
,
3015 sizeof (flags
)) < 0) {
3016 if (vplat_get_iptype(zlogp
, &iptype
) < 0) {
3017 zerror(zlogp
, B_FALSE
, "unable to determine ip-type");
3021 if (flags
& ZF_NET_EXCL
)
3022 iptype
= ZS_EXCLUSIVE
;
3027 if (iptype
== ZS_EXCLUSIVE
) {
3029 * Get the datalink count and for each datalink,
3030 * attempt to clear the pool property and clear
3033 if (zone_list_datalink(zoneid
, &dlnum
, NULL
) != 0) {
3034 zerror(zlogp
, B_TRUE
, "unable to count network "
3042 if ((dllinks
= malloc(dlnum
* sizeof (datalink_id_t
)))
3044 zerror(zlogp
, B_TRUE
, "memory allocation failed");
3047 if (zone_list_datalink(zoneid
, &dlnum
, dllinks
) != 0) {
3048 zerror(zlogp
, B_TRUE
, "unable to list network "
3053 bzero(pool_name
, sizeof (pool_name
));
3054 for (i
= 0, dllink
= dllinks
; i
< dlnum
; i
++, dllink
++) {
3055 err
= dladm_set_linkprop(dld_handle
, *dllink
, "pool",
3056 NULL
, 0, DLADM_OPT_ACTIVE
);
3057 if (err
!= DLADM_STATUS_OK
) {
3058 zerror(zlogp
, B_TRUE
,
3059 "WARNING: unable to clear pool");
3068 remove_datalink_protect(zlog_t
*zlogp
, zoneid_t zoneid
)
3071 zone_iptype_t iptype
;
3073 dladm_status_t dlstatus
;
3074 datalink_id_t
*dllink
, *dllinks
= NULL
;
3076 if (zone_getattr(zoneid
, ZONE_ATTR_FLAGS
, &flags
,
3077 sizeof (flags
)) < 0) {
3078 if (vplat_get_iptype(zlogp
, &iptype
) < 0) {
3079 zerror(zlogp
, B_FALSE
, "unable to determine ip-type");
3083 if (flags
& ZF_NET_EXCL
)
3084 iptype
= ZS_EXCLUSIVE
;
3089 if (iptype
!= ZS_EXCLUSIVE
)
3093 * Get the datalink count and for each datalink,
3094 * attempt to clear the pool property and clear
3097 if (zone_list_datalink(zoneid
, &dlnum
, NULL
) != 0) {
3098 zerror(zlogp
, B_TRUE
, "unable to count network interfaces");
3105 if ((dllinks
= malloc(dlnum
* sizeof (datalink_id_t
))) == NULL
) {
3106 zerror(zlogp
, B_TRUE
, "memory allocation failed");
3109 if (zone_list_datalink(zoneid
, &dlnum
, dllinks
) != 0) {
3110 zerror(zlogp
, B_TRUE
, "unable to list network interfaces");
3115 for (i
= 0, dllink
= dllinks
; i
< dlnum
; i
++, dllink
++) {
3116 char dlerr
[DLADM_STRSIZE
];
3118 dlstatus
= dladm_set_linkprop(dld_handle
, *dllink
,
3119 "protection", NULL
, 0, DLADM_OPT_ACTIVE
);
3120 if (dlstatus
== DLADM_STATUS_NOTFOUND
) {
3121 /* datalink does not belong to the GZ */
3124 if (dlstatus
!= DLADM_STATUS_OK
) {
3125 zerror(zlogp
, B_FALSE
,
3126 dladm_status2str(dlstatus
, dlerr
));
3130 dlstatus
= dladm_set_linkprop(dld_handle
, *dllink
,
3131 "allowed-ips", NULL
, 0, DLADM_OPT_ACTIVE
);
3132 if (dlstatus
!= DLADM_STATUS_OK
) {
3133 zerror(zlogp
, B_FALSE
,
3134 dladm_status2str(dlstatus
, dlerr
));
3144 unconfigure_exclusive_network_interfaces(zlog_t
*zlogp
, zoneid_t zoneid
)
3149 * The kernel shutdown callback for the dls module should have removed
3150 * all datalinks from this zone. If any remain, then there's a
3153 if (zone_list_datalink(zoneid
, &dlnum
, NULL
) != 0) {
3154 zerror(zlogp
, B_TRUE
, "unable to list network interfaces");
3158 zerror(zlogp
, B_FALSE
,
3159 "datalinks remain in zone after shutdown");
3166 tcp_abort_conn(zlog_t
*zlogp
, zoneid_t zoneid
,
3167 const struct sockaddr_storage
*local
, const struct sockaddr_storage
*remote
)
3170 struct strioctl ioc
;
3171 tcp_ioc_abort_conn_t conn
;
3174 conn
.ac_local
= *local
;
3175 conn
.ac_remote
= *remote
;
3176 conn
.ac_start
= TCPS_SYN_SENT
;
3177 conn
.ac_end
= TCPS_TIME_WAIT
;
3178 conn
.ac_zoneid
= zoneid
;
3180 ioc
.ic_cmd
= TCP_IOC_ABORT_CONN
;
3181 ioc
.ic_timout
= -1; /* infinite timeout */
3182 ioc
.ic_len
= sizeof (conn
);
3183 ioc
.ic_dp
= (char *)&conn
;
3185 if ((fd
= open("/dev/tcp", O_RDONLY
)) < 0) {
3186 zerror(zlogp
, B_TRUE
, "unable to open %s", "/dev/tcp");
3190 error
= ioctl(fd
, I_STR
, &ioc
);
3192 if (error
== 0 || errno
== ENOENT
) /* ENOENT is not an error */
3198 tcp_abort_connections(zlog_t
*zlogp
, zoneid_t zoneid
)
3200 struct sockaddr_storage l
, r
;
3201 struct sockaddr_in
*local
, *remote
;
3202 struct sockaddr_in6
*local6
, *remote6
;
3206 * Abort IPv4 connections.
3208 bzero(&l
, sizeof (*local
));
3209 local
= (struct sockaddr_in
*)&l
;
3210 local
->sin_family
= AF_INET
;
3211 local
->sin_addr
.s_addr
= INADDR_ANY
;
3212 local
->sin_port
= 0;
3214 bzero(&r
, sizeof (*remote
));
3215 remote
= (struct sockaddr_in
*)&r
;
3216 remote
->sin_family
= AF_INET
;
3217 remote
->sin_addr
.s_addr
= INADDR_ANY
;
3218 remote
->sin_port
= 0;
3220 if ((error
= tcp_abort_conn(zlogp
, zoneid
, &l
, &r
)) != 0)
3224 * Abort IPv6 connections.
3226 bzero(&l
, sizeof (*local6
));
3227 local6
= (struct sockaddr_in6
*)&l
;
3228 local6
->sin6_family
= AF_INET6
;
3229 local6
->sin6_port
= 0;
3230 local6
->sin6_addr
= in6addr_any
;
3232 bzero(&r
, sizeof (*remote6
));
3233 remote6
= (struct sockaddr_in6
*)&r
;
3234 remote6
->sin6_family
= AF_INET6
;
3235 remote6
->sin6_port
= 0;
3236 remote6
->sin6_addr
= in6addr_any
;
3238 if ((error
= tcp_abort_conn(zlogp
, zoneid
, &l
, &r
)) != 0)
3244 get_privset(zlog_t
*zlogp
, priv_set_t
*privs
, zone_mnt_t mount_cmd
)
3247 zone_dochandle_t handle
;
3248 char *privname
= NULL
;
3250 if ((handle
= zonecfg_init_handle()) == NULL
) {
3251 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
3254 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
3255 zerror(zlogp
, B_FALSE
, "invalid configuration");
3256 zonecfg_fini_handle(handle
);
3260 if (ALT_MOUNT(mount_cmd
)) {
3261 zone_iptype_t iptype
;
3262 const char *curr_iptype
;
3264 if (zonecfg_get_iptype(handle
, &iptype
) != Z_OK
) {
3265 zerror(zlogp
, B_TRUE
, "unable to determine ip-type");
3266 zonecfg_fini_handle(handle
);
3272 curr_iptype
= "shared";
3275 curr_iptype
= "exclusive";
3279 if (zonecfg_default_privset(privs
, curr_iptype
) == Z_OK
) {
3280 zonecfg_fini_handle(handle
);
3283 zerror(zlogp
, B_FALSE
,
3284 "failed to determine the zone's default privilege set");
3285 zonecfg_fini_handle(handle
);
3289 switch (zonecfg_get_privset(handle
, privs
, &privname
)) {
3293 case Z_PRIV_PROHIBITED
:
3294 zerror(zlogp
, B_FALSE
, "privilege \"%s\" is not permitted "
3295 "within the zone's privilege set", privname
);
3297 case Z_PRIV_REQUIRED
:
3298 zerror(zlogp
, B_FALSE
, "required privilege \"%s\" is missing "
3299 "from the zone's privilege set", privname
);
3301 case Z_PRIV_UNKNOWN
:
3302 zerror(zlogp
, B_FALSE
, "unknown privilege \"%s\" specified "
3303 "in the zone's privilege set", privname
);
3306 zerror(zlogp
, B_FALSE
, "failed to determine the zone's "
3312 zonecfg_fini_handle(handle
);
3317 get_rctls(zlog_t
*zlogp
, char **bufp
, size_t *bufsizep
)
3319 nvlist_t
*nvl
= NULL
;
3320 char *nvl_packed
= NULL
;
3321 size_t nvl_size
= 0;
3322 nvlist_t
**nvlv
= NULL
;
3325 zone_dochandle_t handle
;
3326 struct zone_rctltab rctltab
;
3327 rctlblk_t
*rctlblk
= NULL
;
3334 if ((handle
= zonecfg_init_handle()) == NULL
) {
3335 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
3338 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
3339 zerror(zlogp
, B_FALSE
, "invalid configuration");
3340 zonecfg_fini_handle(handle
);
3344 rctltab
.zone_rctl_valptr
= NULL
;
3345 if (nvlist_alloc(&nvl
, NV_UNIQUE_NAME
, 0) != 0) {
3346 zerror(zlogp
, B_TRUE
, "%s failed", "nvlist_alloc");
3351 * Allow the administrator to control both the maximum number of
3352 * process table slots and the maximum number of lwps with just the
3353 * max-processes property. If only the max-processes property is set,
3354 * we add a max-lwps property with a limit derived from max-processes.
3356 if (zonecfg_get_aliased_rctl(handle
, ALIAS_MAXPROCS
, &maxprocs
)
3358 zonecfg_get_aliased_rctl(handle
, ALIAS_MAXLWPS
, &maxlwps
)
3360 if (zonecfg_set_aliased_rctl(handle
, ALIAS_MAXLWPS
,
3361 maxprocs
* LWPS_PER_PROCESS
) != Z_OK
) {
3362 zerror(zlogp
, B_FALSE
, "unable to set max-lwps alias");
3367 if (zonecfg_setrctlent(handle
) != Z_OK
) {
3368 zerror(zlogp
, B_FALSE
, "%s failed", "zonecfg_setrctlent");
3372 if ((rctlblk
= malloc(rctlblk_size())) == NULL
) {
3373 zerror(zlogp
, B_TRUE
, "memory allocation failed");
3376 while (zonecfg_getrctlent(handle
, &rctltab
) == Z_OK
) {
3377 struct zone_rctlvaltab
*rctlval
;
3379 const char *name
= rctltab
.zone_rctl_name
;
3381 /* zoneadm should have already warned about unknown rctls. */
3382 if (!zonecfg_is_rctl(name
)) {
3383 zonecfg_free_rctl_value_list(rctltab
.zone_rctl_valptr
);
3384 rctltab
.zone_rctl_valptr
= NULL
;
3388 for (rctlval
= rctltab
.zone_rctl_valptr
; rctlval
!= NULL
;
3389 rctlval
= rctlval
->zone_rctlval_next
) {
3392 if (count
== 0) { /* ignore */
3393 continue; /* Nothing to free */
3395 if ((nvlv
= malloc(sizeof (*nvlv
) * count
)) == NULL
)
3398 for (rctlval
= rctltab
.zone_rctl_valptr
; rctlval
!= NULL
;
3399 rctlval
= rctlval
->zone_rctlval_next
, i
++) {
3400 if (nvlist_alloc(&nvlv
[i
], NV_UNIQUE_NAME
, 0) != 0) {
3401 zerror(zlogp
, B_TRUE
, "%s failed",
3405 if (zonecfg_construct_rctlblk(rctlval
, rctlblk
)
3407 zerror(zlogp
, B_FALSE
, "invalid rctl value: "
3408 "(priv=%s,limit=%s,action=%s)",
3409 rctlval
->zone_rctlval_priv
,
3410 rctlval
->zone_rctlval_limit
,
3411 rctlval
->zone_rctlval_action
);
3414 if (!zonecfg_valid_rctl(name
, rctlblk
)) {
3415 zerror(zlogp
, B_FALSE
,
3416 "(priv=%s,limit=%s,action=%s) is not a "
3417 "valid value for rctl '%s'",
3418 rctlval
->zone_rctlval_priv
,
3419 rctlval
->zone_rctlval_limit
,
3420 rctlval
->zone_rctlval_action
,
3424 if (nvlist_add_uint64(nvlv
[i
], "privilege",
3425 rctlblk_get_privilege(rctlblk
)) != 0) {
3426 zerror(zlogp
, B_FALSE
, "%s failed",
3427 "nvlist_add_uint64");
3430 if (nvlist_add_uint64(nvlv
[i
], "limit",
3431 rctlblk_get_value(rctlblk
)) != 0) {
3432 zerror(zlogp
, B_FALSE
, "%s failed",
3433 "nvlist_add_uint64");
3436 if (nvlist_add_uint64(nvlv
[i
], "action",
3437 (uint_t
)rctlblk_get_local_action(rctlblk
, NULL
))
3439 zerror(zlogp
, B_FALSE
, "%s failed",
3440 "nvlist_add_uint64");
3444 zonecfg_free_rctl_value_list(rctltab
.zone_rctl_valptr
);
3445 rctltab
.zone_rctl_valptr
= NULL
;
3446 if (nvlist_add_nvlist_array(nvl
, (char *)name
, nvlv
, count
)
3448 zerror(zlogp
, B_FALSE
, "%s failed",
3449 "nvlist_add_nvlist_array");
3452 for (i
= 0; i
< count
; i
++)
3453 nvlist_free(nvlv
[i
]);
3458 (void) zonecfg_endrctlent(handle
);
3460 if (rctlcount
== 0) {
3464 if (nvlist_pack(nvl
, &nvl_packed
, &nvl_size
, NV_ENCODE_NATIVE
, 0)
3466 zerror(zlogp
, B_FALSE
, "%s failed", "nvlist_pack");
3472 *bufsizep
= nvl_size
;
3476 zonecfg_free_rctl_value_list(rctltab
.zone_rctl_valptr
);
3477 if (error
&& nvl_packed
!= NULL
)
3484 zonecfg_fini_handle(handle
);
3489 get_implicit_datasets(zlog_t
*zlogp
, char **retstr
)
3491 char cmdbuf
[2 * MAXPATHLEN
];
3493 if (query_hook
[0] == '\0')
3496 if (snprintf(cmdbuf
, sizeof (cmdbuf
), "%s datasets", query_hook
)
3500 if (do_subproc(zlogp
, cmdbuf
, retstr
) != 0)
3507 get_datasets(zlog_t
*zlogp
, char **bufp
, size_t *bufsizep
)
3509 zone_dochandle_t handle
;
3510 struct zone_dstab dstab
;
3511 size_t total
, offset
, len
;
3514 char *implicit_datasets
= NULL
;
3515 int implicit_len
= 0;
3520 if ((handle
= zonecfg_init_handle()) == NULL
) {
3521 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
3524 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
3525 zerror(zlogp
, B_FALSE
, "invalid configuration");
3526 zonecfg_fini_handle(handle
);
3530 if (get_implicit_datasets(zlogp
, &implicit_datasets
) != 0) {
3531 zerror(zlogp
, B_FALSE
, "getting implicit datasets failed");
3535 if (zonecfg_setdsent(handle
) != Z_OK
) {
3536 zerror(zlogp
, B_FALSE
, "%s failed", "zonecfg_setdsent");
3541 while (zonecfg_getdsent(handle
, &dstab
) == Z_OK
)
3542 total
+= strlen(dstab
.zone_dataset_name
) + 1;
3543 (void) zonecfg_enddsent(handle
);
3545 if (implicit_datasets
!= NULL
)
3546 implicit_len
= strlen(implicit_datasets
);
3547 if (implicit_len
> 0)
3548 total
+= implicit_len
+ 1;
3555 if ((str
= malloc(total
)) == NULL
) {
3556 zerror(zlogp
, B_TRUE
, "memory allocation failed");
3560 if (zonecfg_setdsent(handle
) != Z_OK
) {
3561 zerror(zlogp
, B_FALSE
, "%s failed", "zonecfg_setdsent");
3565 while (zonecfg_getdsent(handle
, &dstab
) == Z_OK
) {
3566 len
= strlen(dstab
.zone_dataset_name
);
3567 (void) strlcpy(str
+ offset
, dstab
.zone_dataset_name
,
3570 if (offset
< total
- 1)
3571 str
[offset
++] = ',';
3573 (void) zonecfg_enddsent(handle
);
3575 if (implicit_len
> 0)
3576 (void) strlcpy(str
+ offset
, implicit_datasets
, total
- offset
);
3583 if (error
!= 0 && str
!= NULL
)
3586 zonecfg_fini_handle(handle
);
3587 if (implicit_datasets
!= NULL
)
3588 free(implicit_datasets
);
3594 validate_datasets(zlog_t
*zlogp
)
3596 zone_dochandle_t handle
;
3597 struct zone_dstab dstab
;
3599 libzfs_handle_t
*hdl
;
3601 if ((handle
= zonecfg_init_handle()) == NULL
) {
3602 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
3605 if (zonecfg_get_snapshot_handle(zone_name
, handle
) != Z_OK
) {
3606 zerror(zlogp
, B_FALSE
, "invalid configuration");
3607 zonecfg_fini_handle(handle
);
3611 if (zonecfg_setdsent(handle
) != Z_OK
) {
3612 zerror(zlogp
, B_FALSE
, "invalid configuration");
3613 zonecfg_fini_handle(handle
);
3617 if ((hdl
= libzfs_init()) == NULL
) {
3618 zerror(zlogp
, B_FALSE
, "opening ZFS library");
3619 zonecfg_fini_handle(handle
);
3623 while (zonecfg_getdsent(handle
, &dstab
) == Z_OK
) {
3625 if ((zhp
= zfs_open(hdl
, dstab
.zone_dataset_name
,
3626 ZFS_TYPE_FILESYSTEM
)) == NULL
) {
3627 zerror(zlogp
, B_FALSE
, "cannot open ZFS dataset '%s'",
3628 dstab
.zone_dataset_name
);
3629 zonecfg_fini_handle(handle
);
3635 * Automatically set the 'zoned' property. We check the value
3636 * first because we'll get EPERM if it is already set.
3638 if (!zfs_prop_get_int(zhp
, ZFS_PROP_ZONED
) &&
3639 zfs_prop_set(zhp
, zfs_prop_to_name(ZFS_PROP_ZONED
),
3641 zerror(zlogp
, B_FALSE
, "cannot set 'zoned' "
3642 "property for ZFS dataset '%s'\n",
3643 dstab
.zone_dataset_name
);
3644 zonecfg_fini_handle(handle
);
3652 (void) zonecfg_enddsent(handle
);
3654 zonecfg_fini_handle(handle
);
3661 * Return true if the path is its own zfs file system. We determine this
3662 * by stat-ing the path to see if it is zfs and stat-ing the parent to see
3663 * if it is a different fs.
3666 is_zonepath_zfs(char *zonepath
)
3671 struct statvfs64 buf1
, buf2
;
3673 if (statvfs64(zonepath
, &buf1
) != 0)
3676 if (strcmp(buf1
.f_basetype
, "zfs") != 0)
3679 if ((path
= strdup(zonepath
)) == NULL
)
3682 parent
= dirname(path
);
3683 res
= statvfs64(parent
, &buf2
);
3689 if (buf1
.f_fsid
== buf2
.f_fsid
)
3696 * Verify the MAC label in the root dataset for the zone.
3697 * If the label exists, it must match the label configured for the zone.
3698 * Otherwise if there's no label on the dataset, create one here.
3702 validate_rootds_label(zlog_t
*zlogp
, char *rootpath
, m_label_t
*zone_sl
)
3706 libzfs_handle_t
*hdl
;
3708 char zonepath
[MAXPATHLEN
];
3709 char ds_hexsl
[MAXNAMELEN
];
3711 if (!is_system_labeled())
3714 if (zone_get_zonepath(zone_name
, zonepath
, sizeof (zonepath
)) != Z_OK
) {
3715 zerror(zlogp
, B_TRUE
, "unable to determine zone path");
3719 if (!is_zonepath_zfs(zonepath
))
3722 if ((hdl
= libzfs_init()) == NULL
) {
3723 zerror(zlogp
, B_FALSE
, "opening ZFS library");
3727 if ((zhp
= zfs_path_to_zhandle(hdl
, rootpath
,
3728 ZFS_TYPE_FILESYSTEM
)) == NULL
) {
3729 zerror(zlogp
, B_FALSE
, "cannot open ZFS dataset for path '%s'",
3735 /* Get the mlslabel property if it exists. */
3736 if ((zfs_prop_get(zhp
, ZFS_PROP_MLSLABEL
, ds_hexsl
, MAXNAMELEN
,
3737 NULL
, NULL
, 0, B_TRUE
) != 0) ||
3738 (strcmp(ds_hexsl
, ZFS_MLSLABEL_DEFAULT
) == 0)) {
3742 * No label on the dataset (or default only); create one.
3743 * (Only do this automatic labeling for the labeled brand.)
3745 if (strcmp(brand_name
, LABELED_BRAND_NAME
) != 0) {
3750 error
= l_to_str_internal(zone_sl
, &str2
);
3757 if ((error
= zfs_prop_set(zhp
,
3758 zfs_prop_to_name(ZFS_PROP_MLSLABEL
), str2
)) != 0) {
3759 zerror(zlogp
, B_FALSE
, "cannot set 'mlslabel' "
3760 "property for root dataset at '%s'\n", rootpath
);
3766 /* Convert the retrieved dataset label to binary form. */
3767 error
= hexstr_to_label(ds_hexsl
, &ds_sl
);
3769 zerror(zlogp
, B_FALSE
, "invalid 'mlslabel' "
3770 "property on root dataset at '%s'\n", rootpath
);
3771 goto out
; /* exit with error */
3775 * Perform a MAC check by comparing the zone label with the
3778 error
= (!blequal(zone_sl
, &ds_sl
));
3780 zerror(zlogp
, B_FALSE
, "Rootpath dataset has mismatched label");
3789 * Mount lower level home directories into/from current zone
3790 * Share exported directories specified in dfstab for zone
3793 tsol_mounts(zlog_t
*zlogp
, char *zone_name
, char *rootpath
)
3795 zoneid_t
*zids
= NULL
;
3796 priv_set_t
*zid_privs
;
3797 const priv_impl_info_t
*ip
= NULL
;
3798 uint_t nzents_saved
;
3801 char readonly
[] = "ro";
3802 struct zone_fstab lower_fstab
;
3805 if (!is_system_labeled())
3808 if (zid_label
== NULL
) {
3809 zid_label
= m_label_alloc(MAC_LABEL
);
3810 if (zid_label
== NULL
)
3814 /* Make sure our zone has an /export/home dir */
3815 (void) make_one_dir(zlogp
, rootpath
, "/export/home",
3816 DEFAULT_DIR_MODE
, DEFAULT_DIR_USER
, DEFAULT_DIR_GROUP
);
3818 lower_fstab
.zone_fs_raw
[0] = '\0';
3819 (void) strlcpy(lower_fstab
.zone_fs_type
, MNTTYPE_LOFS
,
3820 sizeof (lower_fstab
.zone_fs_type
));
3821 lower_fstab
.zone_fs_options
= NULL
;
3822 (void) zonecfg_add_fs_option(&lower_fstab
, readonly
);
3825 * Get the list of zones from the kernel
3827 if (zone_list(NULL
, &nzents
) != 0) {
3828 zerror(zlogp
, B_TRUE
, "unable to list zones");
3829 zonecfg_free_fs_option_list(lower_fstab
.zone_fs_options
);
3834 zonecfg_free_fs_option_list(lower_fstab
.zone_fs_options
);
3838 zids
= malloc(nzents
* sizeof (zoneid_t
));
3840 zerror(zlogp
, B_TRUE
, "memory allocation failed");
3843 nzents_saved
= nzents
;
3845 if (zone_list(zids
, &nzents
) != 0) {
3846 zerror(zlogp
, B_TRUE
, "unable to list zones");
3847 zonecfg_free_fs_option_list(lower_fstab
.zone_fs_options
);
3851 if (nzents
!= nzents_saved
) {
3852 /* list changed, try again */
3857 ip
= getprivimplinfo();
3858 if ((zid_privs
= priv_allocset()) == NULL
) {
3859 zerror(zlogp
, B_TRUE
, "%s failed", "priv_allocset");
3860 zonecfg_free_fs_option_list(
3861 lower_fstab
.zone_fs_options
);
3866 for (i
= 0; i
< nzents
; i
++) {
3867 char zid_name
[ZONENAME_MAX
];
3868 zone_state_t zid_state
;
3869 char zid_rpath
[MAXPATHLEN
];
3870 struct stat stat_buf
;
3872 if (zids
[i
] == GLOBAL_ZONEID
)
3875 if (getzonenamebyid(zids
[i
], zid_name
, ZONENAME_MAX
) == -1)
3879 * Do special setup for the zone we are booting
3881 if (strcmp(zid_name
, zone_name
) == 0) {
3882 struct zone_fstab autofs_fstab
;
3883 char map_path
[MAXPATHLEN
];
3887 * Create auto_home_<zone> map for this zone
3888 * in the global zone. The non-global zone entry
3889 * will be created by automount when the zone
3893 (void) snprintf(autofs_fstab
.zone_fs_special
,
3894 MAXPATHLEN
, "auto_home_%s", zid_name
);
3896 (void) snprintf(autofs_fstab
.zone_fs_dir
, MAXPATHLEN
,
3897 "/zone/%s/home", zid_name
);
3899 (void) snprintf(map_path
, sizeof (map_path
),
3900 "/etc/%s", autofs_fstab
.zone_fs_special
);
3902 * If the map file doesn't exist create a template
3904 if ((fd
= open(map_path
, O_RDWR
| O_CREAT
| O_EXCL
,
3905 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
)) != -1) {
3907 char map_rec
[MAXPATHLEN
];
3909 len
= snprintf(map_rec
, sizeof (map_rec
),
3910 "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n",
3911 autofs_fstab
.zone_fs_special
, rootpath
);
3912 (void) write(fd
, map_rec
, len
);
3917 * Mount auto_home_<zone> in the global zone if absent.
3918 * If it's already of type autofs, then
3919 * don't mount it again.
3921 if ((stat(autofs_fstab
.zone_fs_dir
, &stat_buf
) == -1) ||
3922 strcmp(stat_buf
.st_fstype
, MNTTYPE_AUTOFS
) != 0) {
3923 char optstr
[] = "indirect,ignore,nobrowse";
3925 (void) make_one_dir(zlogp
, "",
3926 autofs_fstab
.zone_fs_dir
, DEFAULT_DIR_MODE
,
3927 DEFAULT_DIR_USER
, DEFAULT_DIR_GROUP
);
3930 * Mount will fail if automounter has already
3931 * processed the auto_home_<zonename> map
3933 (void) domount(zlogp
, MNTTYPE_AUTOFS
, optstr
,
3934 autofs_fstab
.zone_fs_special
,
3935 autofs_fstab
.zone_fs_dir
);
3941 if (zone_get_state(zid_name
, &zid_state
) != Z_OK
||
3942 (zid_state
!= ZONE_STATE_READY
&&
3943 zid_state
!= ZONE_STATE_RUNNING
))
3944 /* Skip over zones without mounted filesystems */
3947 if (zone_getattr(zids
[i
], ZONE_ATTR_SLBL
, zid_label
,
3948 sizeof (m_label_t
)) < 0)
3949 /* Skip over zones with unspecified label */
3952 if (zone_getattr(zids
[i
], ZONE_ATTR_ROOT
, zid_rpath
,
3953 sizeof (zid_rpath
)) == -1)
3954 /* Skip over zones with bad path */
3957 if (zone_getattr(zids
[i
], ZONE_ATTR_PRIVSET
, zid_privs
,
3958 sizeof (priv_chunk_t
) * ip
->priv_setsize
) == -1)
3959 /* Skip over zones with bad privs */
3963 * Reading down is valid according to our label model
3964 * but some customers want to disable it because it
3965 * allows execute down and other possible attacks.
3966 * Therefore, we restrict this feature to zones that
3967 * have the NET_MAC_AWARE privilege which is required
3968 * for NFS read-down semantics.
3970 if ((bldominates(zlabel
, zid_label
)) &&
3971 (priv_ismember(zprivs
, PRIV_NET_MAC_AWARE
))) {
3973 * Our zone dominates this one.
3974 * Create a lofs mount from lower zone's /export/home
3976 (void) snprintf(lower_fstab
.zone_fs_dir
, MAXPATHLEN
,
3977 "%s/zone/%s/export/home", rootpath
, zid_name
);
3980 * If the target is already an LOFS mount
3981 * then don't do it again.
3983 if ((stat(lower_fstab
.zone_fs_dir
, &stat_buf
) == -1) ||
3984 strcmp(stat_buf
.st_fstype
, MNTTYPE_LOFS
) != 0) {
3986 if (snprintf(lower_fstab
.zone_fs_special
,
3987 MAXPATHLEN
, "%s/export",
3988 zid_rpath
) > MAXPATHLEN
)
3992 * Make sure the lower-level home exists
3994 if (make_one_dir(zlogp
,
3995 lower_fstab
.zone_fs_special
, "/home",
3996 DEFAULT_DIR_MODE
, DEFAULT_DIR_USER
,
3997 DEFAULT_DIR_GROUP
) != 0)
4000 (void) strlcat(lower_fstab
.zone_fs_special
,
4001 "/home", MAXPATHLEN
);
4004 * Mount can fail because the lower-level
4005 * zone may have already done a mount up.
4007 (void) mount_one(zlogp
, &lower_fstab
, "",
4010 } else if ((bldominates(zid_label
, zlabel
)) &&
4011 (priv_ismember(zid_privs
, PRIV_NET_MAC_AWARE
))) {
4013 * This zone dominates our zone.
4014 * Create a lofs mount from our zone's /export/home
4016 if (snprintf(lower_fstab
.zone_fs_dir
, MAXPATHLEN
,
4017 "%s/zone/%s/export/home", zid_rpath
,
4018 zone_name
) > MAXPATHLEN
)
4022 * If the target is already an LOFS mount
4023 * then don't do it again.
4025 if ((stat(lower_fstab
.zone_fs_dir
, &stat_buf
) == -1) ||
4026 strcmp(stat_buf
.st_fstype
, MNTTYPE_LOFS
) != 0) {
4028 (void) snprintf(lower_fstab
.zone_fs_special
,
4029 MAXPATHLEN
, "%s/export/home", rootpath
);
4032 * Mount can fail because the higher-level
4033 * zone may have already done a mount down.
4035 (void) mount_one(zlogp
, &lower_fstab
, "",
4040 zonecfg_free_fs_option_list(lower_fstab
.zone_fs_options
);
4041 priv_freeset(zid_privs
);
4045 * Now share any exported directories from this zone.
4046 * Each zone can have its own dfstab.
4049 argv
[0] = "zoneshare";
4051 argv
[2] = zone_name
;
4054 (void) forkexec(zlogp
, "/usr/lib/zones/zoneshare", argv
);
4055 /* Don't check for errors since they don't affect the zone */
4061 * Unmount lofs mounts from higher level zones
4062 * Unshare nfs exported directories
4065 tsol_unmounts(zlog_t
*zlogp
, char *zone_name
)
4067 zoneid_t
*zids
= NULL
;
4068 uint_t nzents_saved
;
4072 char path
[MAXPATHLEN
];
4074 if (!is_system_labeled())
4078 * Get the list of zones from the kernel
4080 if (zone_list(NULL
, &nzents
) != 0) {
4084 if (zid_label
== NULL
) {
4085 zid_label
= m_label_alloc(MAC_LABEL
);
4086 if (zid_label
== NULL
)
4094 zids
= malloc(nzents
* sizeof (zoneid_t
));
4096 zerror(zlogp
, B_TRUE
, "memory allocation failed");
4099 nzents_saved
= nzents
;
4101 if (zone_list(zids
, &nzents
) != 0) {
4105 if (nzents
!= nzents_saved
) {
4106 /* list changed, try again */
4111 for (i
= 0; i
< nzents
; i
++) {
4112 char zid_name
[ZONENAME_MAX
];
4113 zone_state_t zid_state
;
4114 char zid_rpath
[MAXPATHLEN
];
4116 if (zids
[i
] == GLOBAL_ZONEID
)
4119 if (getzonenamebyid(zids
[i
], zid_name
, ZONENAME_MAX
) == -1)
4123 * Skip the zone we are halting
4125 if (strcmp(zid_name
, zone_name
) == 0)
4128 if ((zone_getattr(zids
[i
], ZONE_ATTR_STATUS
, &zid_state
,
4129 sizeof (zid_state
)) < 0) ||
4130 (zid_state
< ZONE_IS_READY
))
4131 /* Skip over zones without mounted filesystems */
4134 if (zone_getattr(zids
[i
], ZONE_ATTR_SLBL
, zid_label
,
4135 sizeof (m_label_t
)) < 0)
4136 /* Skip over zones with unspecified label */
4139 if (zone_getattr(zids
[i
], ZONE_ATTR_ROOT
, zid_rpath
,
4140 sizeof (zid_rpath
)) == -1)
4141 /* Skip over zones with bad path */
4144 if (zlabel
!= NULL
&& bldominates(zid_label
, zlabel
)) {
4146 * This zone dominates our zone.
4147 * Unmount the lofs mount of our zone's /export/home
4150 if (snprintf(path
, MAXPATHLEN
,
4151 "%s/zone/%s/export/home", zid_rpath
,
4152 zone_name
) > MAXPATHLEN
)
4155 /* Skip over mount failures */
4156 (void) umount(path
);
4162 * Unmount global zone autofs trigger for this zone
4164 (void) snprintf(path
, MAXPATHLEN
, "/zone/%s/home", zone_name
);
4165 /* Skip over mount failures */
4166 (void) umount(path
);
4169 * Next unshare any exported directories from this zone.
4172 argv
[0] = "zoneunshare";
4174 argv
[2] = zone_name
;
4177 (void) forkexec(zlogp
, "/usr/lib/zones/zoneunshare", argv
);
4178 /* Don't check for errors since they don't affect the zone */
4181 * Finally, deallocate any devices in the zone.
4184 argv
[0] = "deallocate";
4186 argv
[2] = zone_name
;
4189 (void) forkexec(zlogp
, "/usr/sbin/deallocate", argv
);
4190 /* Don't check for errors since they don't affect the zone */
4194 * Fetch the Trusted Extensions label and multi-level ports (MLPs) for
4197 static tsol_zcent_t
*
4198 get_zone_label(zlog_t
*zlogp
, priv_set_t
*privs
)
4201 tsol_zcent_t
*zcent
= NULL
;
4202 char line
[MAXTNZLEN
];
4204 if ((fp
= fopen(TNZONECFG_PATH
, "r")) == NULL
) {
4205 zerror(zlogp
, B_TRUE
, "%s", TNZONECFG_PATH
);
4209 while (fgets(line
, sizeof (line
), fp
) != NULL
) {
4211 * Check for malformed database
4213 if (strlen(line
) == MAXTNZLEN
- 1)
4215 if ((zcent
= tsol_sgetzcent(line
, NULL
, NULL
)) == NULL
)
4217 if (strcmp(zcent
->zc_name
, zone_name
) == 0)
4219 tsol_freezcent(zcent
);
4224 if (zcent
== NULL
) {
4225 zerror(zlogp
, B_FALSE
, "zone requires a label assignment. "
4226 "See tnzonecfg(4)");
4229 zlabel
= m_label_alloc(MAC_LABEL
);
4231 * Save this zone's privileges for later read-down processing
4233 if ((zprivs
= priv_allocset()) == NULL
) {
4234 zerror(zlogp
, B_TRUE
, "%s failed", "priv_allocset");
4237 priv_copyset(privs
, zprivs
);
4244 * Add the Trusted Extensions multi-level ports for this zone.
4247 set_mlps(zlog_t
*zlogp
, zoneid_t zoneid
, tsol_zcent_t
*zcent
)
4252 if (!is_system_labeled())
4255 tsme
.tsme_zoneid
= zoneid
;
4256 tsme
.tsme_flags
= 0;
4257 for (mlp
= zcent
->zc_private_mlp
; !TSOL_MLP_END(mlp
); mlp
++) {
4258 tsme
.tsme_mlp
= *mlp
;
4259 if (tnmlp(TNDB_LOAD
, &tsme
) != 0) {
4260 zerror(zlogp
, B_TRUE
, "cannot set zone-specific MLP "
4261 "on %d-%d/%d", mlp
->mlp_port
,
4262 mlp
->mlp_port_upper
, mlp
->mlp_ipp
);
4266 tsme
.tsme_flags
= TSOL_MEF_SHARED
;
4267 for (mlp
= zcent
->zc_shared_mlp
; !TSOL_MLP_END(mlp
); mlp
++) {
4268 tsme
.tsme_mlp
= *mlp
;
4269 if (tnmlp(TNDB_LOAD
, &tsme
) != 0) {
4270 zerror(zlogp
, B_TRUE
, "cannot set shared MLP "
4271 "on %d-%d/%d", mlp
->mlp_port
,
4272 mlp
->mlp_port_upper
, mlp
->mlp_ipp
);
4278 remove_mlps(zlog_t
*zlogp
, zoneid_t zoneid
)
4282 if (!is_system_labeled())
4285 (void) memset(&tsme
, 0, sizeof (tsme
));
4286 tsme
.tsme_zoneid
= zoneid
;
4287 if (tnmlp(TNDB_FLUSH
, &tsme
) != 0)
4288 zerror(zlogp
, B_TRUE
, "cannot flush MLPs");
4292 prtmount(const struct mnttab
*fs
, void *x
) {
4293 zerror((zlog_t
*)x
, B_FALSE
, " %s", fs
->mnt_mountp
);
4298 * Look for zones running on the main system that are using this root (or any
4299 * subdirectory of it). Return B_TRUE and print an error if a conflicting zone
4300 * is found or if we can't tell.
4303 duplicate_zone_root(zlog_t
*zlogp
, const char *rootpath
)
4305 zoneid_t
*zids
= NULL
;
4309 char zroot
[MAXPATHLEN
];
4310 char zonename
[ZONENAME_MAX
];
4314 zids
= malloc(nzids
* sizeof (*zids
));
4316 zerror(zlogp
, B_TRUE
, "memory allocation failed");
4319 if (zone_list(zids
, &nzids
) == 0)
4324 rlen
= strlen(rootpath
);
4327 * Ignore errors; they just mean that the zone has disappeared
4328 * while we were busy.
4330 if (zone_getattr(zids
[--nzids
], ZONE_ATTR_ROOT
, zroot
,
4331 sizeof (zroot
)) == -1)
4333 zlen
= strlen(zroot
);
4336 if (strncmp(rootpath
, zroot
, zlen
) == 0 &&
4337 (zroot
[zlen
] == '\0' || zroot
[zlen
] == '/') &&
4338 (rootpath
[zlen
] == '\0' || rootpath
[zlen
] == '/')) {
4339 if (getzonenamebyid(zids
[nzids
], zonename
,
4340 sizeof (zonename
)) == -1)
4341 (void) snprintf(zonename
, sizeof (zonename
),
4342 "id %d", (int)zids
[nzids
]);
4343 zerror(zlogp
, B_FALSE
,
4344 "zone root %s already in use by zone %s",
4345 rootpath
, zonename
);
4355 * Search for loopback mounts that use this same source node (same device and
4356 * inode). Return B_TRUE if there is one or if we can't tell.
4359 duplicate_reachable_path(zlog_t
*zlogp
, const char *rootpath
)
4361 struct stat64 rst
, zst
;
4364 if (stat64(rootpath
, &rst
) == -1) {
4365 zerror(zlogp
, B_TRUE
, "can't stat %s", rootpath
);
4368 if (resolve_lofs_mnts
== NULL
&& lofs_read_mnttab(zlogp
) == -1)
4370 for (mnp
= resolve_lofs_mnts
; mnp
< resolve_lofs_mnt_max
; mnp
++) {
4371 if (mnp
->mnt_fstype
== NULL
||
4372 strcmp(MNTTYPE_LOFS
, mnp
->mnt_fstype
) != 0)
4374 /* We're looking at a loopback mount. Stat it. */
4375 if (mnp
->mnt_special
!= NULL
&&
4376 stat64(mnp
->mnt_special
, &zst
) != -1 &&
4377 rst
.st_dev
== zst
.st_dev
&& rst
.st_ino
== zst
.st_ino
) {
4378 zerror(zlogp
, B_FALSE
,
4379 "zone root %s is reachable through %s",
4380 rootpath
, mnp
->mnt_mountp
);
4388 * Set memory cap and pool info for the zone's resource management
4392 setup_zone_rm(zlog_t
*zlogp
, char *zone_name
, zoneid_t zoneid
)
4396 struct zone_mcaptab mcap
;
4397 char sched
[MAXNAMELEN
];
4398 zone_dochandle_t handle
= NULL
;
4401 if ((handle
= zonecfg_init_handle()) == NULL
) {
4402 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
4403 return (Z_BAD_HANDLE
);
4406 if ((res
= zonecfg_get_snapshot_handle(zone_name
, handle
)) != Z_OK
) {
4407 zerror(zlogp
, B_FALSE
, "invalid configuration");
4408 zonecfg_fini_handle(handle
);
4413 * If a memory cap is configured, set the cap in the kernel using
4414 * zone_setattr() and make sure the rcapd SMF service is enabled.
4416 if (zonecfg_getmcapent(handle
, &mcap
) == Z_OK
) {
4420 num
= (uint64_t)strtoull(mcap
.zone_physmem_cap
, NULL
, 10);
4421 if (zone_setattr(zoneid
, ZONE_ATTR_PHYS_MCAP
, &num
, 0) == -1) {
4422 zerror(zlogp
, B_TRUE
, "could not set zone memory cap");
4423 zonecfg_fini_handle(handle
);
4427 if (zonecfg_enable_rcapd(smf_err
, sizeof (smf_err
)) != Z_OK
) {
4428 zerror(zlogp
, B_FALSE
, "enabling system/rcap service "
4429 "failed: %s", smf_err
);
4430 zonecfg_fini_handle(handle
);
4435 /* Get the scheduling class set in the zone configuration. */
4436 if (zonecfg_get_sched_class(handle
, sched
, sizeof (sched
)) == Z_OK
&&
4437 strlen(sched
) > 0) {
4438 if (zone_setattr(zoneid
, ZONE_ATTR_SCHED_CLASS
, sched
,
4439 strlen(sched
)) == -1)
4440 zerror(zlogp
, B_TRUE
, "WARNING: unable to set the "
4441 "default scheduling class");
4443 } else if (zonecfg_get_aliased_rctl(handle
, ALIAS_SHARES
, &tmp
)
4446 * If the zone has the zone.cpu-shares rctl set then we want to
4447 * use the Fair Share Scheduler (FSS) for processes in the
4448 * zone. Check what scheduling class the zone would be running
4449 * in by default so we can print a warning and modify the class
4450 * if we wouldn't be using FSS.
4452 char class_name
[PC_CLNMSZ
];
4454 if (zonecfg_get_dflt_sched_class(handle
, class_name
,
4455 sizeof (class_name
)) != Z_OK
) {
4456 zerror(zlogp
, B_FALSE
, "WARNING: unable to determine "
4457 "the zone's scheduling class");
4459 } else if (strcmp("FSS", class_name
) != 0) {
4460 zerror(zlogp
, B_FALSE
, "WARNING: The zone.cpu-shares "
4461 "rctl is set but\nFSS is not the default "
4462 "scheduling class for\nthis zone. FSS will be "
4463 "used for processes\nin the zone but to get the "
4464 "full benefit of FSS,\nit should be the default "
4465 "scheduling class.\nSee dispadmin(1M) for more "
4468 if (zone_setattr(zoneid
, ZONE_ATTR_SCHED_CLASS
, "FSS",
4469 strlen("FSS")) == -1)
4470 zerror(zlogp
, B_TRUE
, "WARNING: unable to set "
4471 "zone scheduling class to FSS");
4476 * The next few blocks of code attempt to set up temporary pools as
4477 * well as persistent pools. In all cases we call the functions
4478 * unconditionally. Within each funtion the code will check if the
4479 * zone is actually configured for a temporary pool or persistent pool
4480 * and just return if there is nothing to do.
4482 * If we are rebooting we want to attempt to reuse any temporary pool
4483 * that was previously set up. zonecfg_bind_tmp_pool() will do the
4484 * right thing in all cases (reuse or create) based on the current
4487 if ((res
= zonecfg_bind_tmp_pool(handle
, zoneid
, pool_err
,
4488 sizeof (pool_err
))) != Z_OK
) {
4489 if (res
== Z_POOL
|| res
== Z_POOL_CREATE
|| res
== Z_POOL_BIND
)
4490 zerror(zlogp
, B_FALSE
, "%s: %s\ndedicated-cpu setting "
4491 "cannot be instantiated", zonecfg_strerror(res
),
4494 zerror(zlogp
, B_FALSE
, "could not bind zone to "
4495 "temporary pool: %s", zonecfg_strerror(res
));
4496 zonecfg_fini_handle(handle
);
4497 return (Z_POOL_BIND
);
4501 * Check if we need to warn about poold not being enabled.
4503 if (zonecfg_warn_poold(handle
)) {
4504 zerror(zlogp
, B_FALSE
, "WARNING: A range of dedicated-cpus has "
4505 "been specified\nbut the dynamic pool service is not "
4506 "enabled.\nThe system will not dynamically adjust the\n"
4507 "processor allocation within the specified range\n"
4508 "until svc:/system/pools/dynamic is enabled.\n"
4512 /* The following is a warning, not an error. */
4513 if ((res
= zonecfg_bind_pool(handle
, zoneid
, pool_err
,
4514 sizeof (pool_err
))) != Z_OK
) {
4515 if (res
== Z_POOL_BIND
)
4516 zerror(zlogp
, B_FALSE
, "WARNING: unable to bind to "
4517 "pool '%s'; using default pool.", pool_err
);
4518 else if (res
== Z_POOL
)
4519 zerror(zlogp
, B_FALSE
, "WARNING: %s: %s",
4520 zonecfg_strerror(res
), pool_err
);
4522 zerror(zlogp
, B_FALSE
, "WARNING: %s",
4523 zonecfg_strerror(res
));
4526 /* Update saved pool name in case it has changed */
4527 (void) zonecfg_get_poolname(handle
, zone_name
, pool_name
,
4528 sizeof (pool_name
));
4530 zonecfg_fini_handle(handle
);
4535 report_prop_err(zlog_t
*zlogp
, const char *name
, const char *value
, int res
)
4539 zerror(zlogp
, B_FALSE
, "%s property value is too large.", name
);
4542 case Z_INVALID_PROPERTY
:
4543 zerror(zlogp
, B_FALSE
, "%s property value \"%s\" is not valid",
4548 zerror(zlogp
, B_TRUE
, "fetching property %s: %d", name
, res
);
4554 * Sets the hostid of the new zone based on its configured value. The zone's
4555 * zone_t structure must already exist in kernel memory. 'zlogp' refers to the
4556 * log used to report errors and warnings and must be non-NULL. 'zone_namep'
4557 * is the name of the new zone and must be non-NULL. 'zoneid' is the numeric
4558 * ID of the new zone.
4560 * This function returns zero on success and a nonzero error code on failure.
4563 setup_zone_hostid(zone_dochandle_t handle
, zlog_t
*zlogp
, zoneid_t zoneid
)
4566 char hostidp
[HW_HOSTID_LEN
];
4567 unsigned int hostid
;
4569 res
= zonecfg_get_hostid(handle
, hostidp
, sizeof (hostidp
));
4571 if (res
== Z_BAD_PROPERTY
) {
4573 } else if (res
!= Z_OK
) {
4574 report_prop_err(zlogp
, "hostid", hostidp
, res
);
4578 hostid
= (unsigned int)strtoul(hostidp
, NULL
, 16);
4579 if ((res
= zone_setattr(zoneid
, ZONE_ATTR_HOSTID
, &hostid
,
4580 sizeof (hostid
))) != 0) {
4581 zerror(zlogp
, B_TRUE
,
4582 "zone hostid is not valid: %s: %d", hostidp
, res
);
4590 setup_zone_fs_allowed(zone_dochandle_t handle
, zlog_t
*zlogp
, zoneid_t zoneid
)
4592 char fsallowedp
[ZONE_FS_ALLOWED_MAX
];
4595 res
= zonecfg_get_fs_allowed(handle
, fsallowedp
, sizeof (fsallowedp
));
4597 if (res
== Z_BAD_PROPERTY
) {
4599 } else if (res
!= Z_OK
) {
4600 report_prop_err(zlogp
, "fs-allowed", fsallowedp
, res
);
4604 if (zone_setattr(zoneid
, ZONE_ATTR_FS_ALLOWED
, &fsallowedp
,
4605 sizeof (fsallowedp
)) != 0) {
4606 zerror(zlogp
, B_TRUE
,
4607 "fs-allowed couldn't be set: %s: %d", fsallowedp
, res
);
4615 setup_zone_attrs(zlog_t
*zlogp
, char *zone_namep
, zoneid_t zoneid
)
4617 zone_dochandle_t handle
;
4620 if ((handle
= zonecfg_init_handle()) == NULL
) {
4621 zerror(zlogp
, B_TRUE
, "getting zone configuration handle");
4622 return (Z_BAD_HANDLE
);
4624 if ((res
= zonecfg_get_snapshot_handle(zone_namep
, handle
)) != Z_OK
) {
4625 zerror(zlogp
, B_FALSE
, "invalid configuration");
4629 if ((res
= setup_zone_hostid(handle
, zlogp
, zoneid
)) != Z_OK
)
4632 if ((res
= setup_zone_fs_allowed(handle
, zlogp
, zoneid
)) != Z_OK
)
4636 zonecfg_fini_handle(handle
);
4641 vplat_create(zlog_t
*zlogp
, zone_mnt_t mount_cmd
)
4645 char rootpath
[MAXPATHLEN
];
4646 char *rctlbuf
= NULL
;
4647 size_t rctlbufsz
= 0;
4648 char *zfsbuf
= NULL
;
4649 size_t zfsbufsz
= 0;
4650 zoneid_t zoneid
= -1;
4654 tsol_zcent_t
*zcent
= NULL
;
4658 zone_iptype_t iptype
;
4660 if (zone_get_rootpath(zone_name
, rootpath
, sizeof (rootpath
)) != Z_OK
) {
4661 zerror(zlogp
, B_TRUE
, "unable to determine zone root");
4664 if (zonecfg_in_alt_root())
4665 resolve_lofs(zlogp
, rootpath
, sizeof (rootpath
));
4667 if (vplat_get_iptype(zlogp
, &iptype
) < 0) {
4668 zerror(zlogp
, B_TRUE
, "unable to determine ip-type");
4676 flags
= ZCF_NET_EXCL
;
4680 if ((privs
= priv_allocset()) == NULL
) {
4681 zerror(zlogp
, B_TRUE
, "%s failed", "priv_allocset");
4684 priv_emptyset(privs
);
4685 if (get_privset(zlogp
, privs
, mount_cmd
) != 0)
4688 if (mount_cmd
== Z_MNT_BOOT
&&
4689 get_rctls(zlogp
, &rctlbuf
, &rctlbufsz
) != 0) {
4690 zerror(zlogp
, B_FALSE
, "Unable to get list of rctls");
4694 if (get_datasets(zlogp
, &zfsbuf
, &zfsbufsz
) != 0) {
4695 zerror(zlogp
, B_FALSE
, "Unable to get list of ZFS datasets");
4699 if (mount_cmd
== Z_MNT_BOOT
&& is_system_labeled()) {
4700 zcent
= get_zone_label(zlogp
, privs
);
4701 if (zcent
!= NULL
) {
4702 match
= zcent
->zc_match
;
4703 doi
= zcent
->zc_doi
;
4704 *zlabel
= zcent
->zc_label
;
4708 if (validate_rootds_label(zlogp
, rootpath
, zlabel
) != 0)
4715 * We must do this scan twice. First, we look for zones running on the
4716 * main system that are using this root (or any subdirectory of it).
4717 * Next, we reduce to the shortest path and search for loopback mounts
4718 * that use this same source node (same device and inode).
4720 if (duplicate_zone_root(zlogp
, rootpath
))
4722 if (duplicate_reachable_path(zlogp
, rootpath
))
4725 if (ALT_MOUNT(mount_cmd
)) {
4726 root_to_lu(zlogp
, rootpath
, sizeof (rootpath
), B_TRUE
);
4729 * Forge up a special root for this zone. When a zone is
4730 * mounted, we can't let the zone have its own root because the
4731 * tools that will be used in this "scratch zone" need access
4732 * to both the zone's resources and the running machine's
4735 * Note that the mkdir here also catches read-only filesystems.
4737 if (mkdir(rootpath
, 0755) != 0 && errno
!= EEXIST
) {
4738 zerror(zlogp
, B_TRUE
, "cannot create %s", rootpath
);
4741 if (domount(zlogp
, "tmpfs", "", "swap", rootpath
) != 0)
4745 if (zonecfg_in_alt_root()) {
4747 * If we are mounting up a zone in an alternate root partition,
4748 * then we have some additional work to do before starting the
4749 * zone. First, resolve the root path down so that we're not
4750 * fooled by duplicates. Then forge up an internal name for
4753 if ((fp
= zonecfg_open_scratch("", B_TRUE
)) == NULL
) {
4754 zerror(zlogp
, B_TRUE
, "cannot open mapfile");
4757 if (zonecfg_lock_scratch(fp
) != 0) {
4758 zerror(zlogp
, B_TRUE
, "cannot lock mapfile");
4761 if (zonecfg_find_scratch(fp
, zone_name
, zonecfg_get_root(),
4763 zerror(zlogp
, B_FALSE
, "scratch zone already running");
4766 /* This is the preferred name */
4767 (void) snprintf(kernzone
, sizeof (kernzone
), "SUNWlu-%s",
4770 while (zonecfg_reverse_scratch(fp
, kernzone
, NULL
, 0, NULL
,
4772 /* This is just an arbitrary name; note "." usage */
4773 (void) snprintf(kernzone
, sizeof (kernzone
),
4774 "SUNWlu.%08lX%08lX", random(), random());
4780 if ((zoneid
= zone_create(kzone
, rootpath
, privs
, rctlbuf
,
4781 rctlbufsz
, zfsbuf
, zfsbufsz
, &xerr
, match
, doi
, zlabel
,
4783 if (xerr
== ZE_AREMOUNTS
) {
4784 if (zonecfg_find_mounts(rootpath
, NULL
, NULL
) < 1) {
4785 zerror(zlogp
, B_FALSE
,
4786 "An unknown file-system is mounted on "
4787 "a subdirectory of %s", rootpath
);
4790 zerror(zlogp
, B_FALSE
,
4791 "These file-systems are mounted on "
4792 "subdirectories of %s:", rootpath
);
4793 (void) zonecfg_find_mounts(rootpath
,
4796 } else if (xerr
== ZE_CHROOTED
) {
4797 zerror(zlogp
, B_FALSE
, "%s: "
4798 "cannot create a zone from a chrooted "
4799 "environment", "zone_create");
4800 } else if (xerr
== ZE_LABELINUSE
) {
4801 char zonename
[ZONENAME_MAX
];
4802 (void) getzonenamebyid(getzoneidbylabel(zlabel
),
4803 zonename
, ZONENAME_MAX
);
4804 zerror(zlogp
, B_FALSE
, "The zone label is already "
4805 "used by the zone '%s'.", zonename
);
4807 zerror(zlogp
, B_TRUE
, "%s failed", "zone_create");
4812 if (zonecfg_in_alt_root() &&
4813 zonecfg_add_scratch(fp
, zone_name
, kernzone
,
4814 zonecfg_get_root()) == -1) {
4815 zerror(zlogp
, B_TRUE
, "cannot add mapfile entry");
4820 * The following actions are not performed when merely mounting a zone
4821 * for administrative use.
4823 if (mount_cmd
== Z_MNT_BOOT
) {
4825 struct brand_attr attr
;
4826 char modname
[MAXPATHLEN
];
4828 if (setup_zone_attrs(zlogp
, zone_name
, zoneid
) != Z_OK
)
4831 if ((bh
= brand_open(brand_name
)) == NULL
) {
4832 zerror(zlogp
, B_FALSE
,
4833 "unable to determine brand name");
4837 if (!is_system_labeled() &&
4838 (strcmp(brand_name
, LABELED_BRAND_NAME
) == 0)) {
4840 zerror(zlogp
, B_FALSE
,
4841 "cannot boot labeled zone on unlabeled system");
4846 * If this brand requires any kernel support, now is the time to
4847 * get it loaded and initialized.
4849 if (brand_get_modname(bh
, modname
, MAXPATHLEN
) < 0) {
4851 zerror(zlogp
, B_FALSE
,
4852 "unable to determine brand kernel module");
4857 if (strlen(modname
) > 0) {
4858 (void) strlcpy(attr
.ba_brandname
, brand_name
,
4859 sizeof (attr
.ba_brandname
));
4860 (void) strlcpy(attr
.ba_modname
, modname
,
4861 sizeof (attr
.ba_modname
));
4862 if (zone_setattr(zoneid
, ZONE_ATTR_BRAND
, &attr
,
4863 sizeof (attr
) != 0)) {
4864 zerror(zlogp
, B_TRUE
,
4865 "could not set zone brand attribute.");
4870 if (setup_zone_rm(zlogp
, zone_name
, zoneid
) != Z_OK
)
4873 set_mlps(zlogp
, zoneid
, zcent
);
4881 (void) zone_shutdown(zoneid
);
4882 (void) zone_destroy(zoneid
);
4884 if (rctlbuf
!= NULL
)
4886 priv_freeset(privs
);
4888 zonecfg_close_scratch(fp
);
4889 lofs_discard_mnttab();
4891 tsol_freezcent(zcent
);
4896 * Enter the zone and write a /etc/zones/index file there. This allows
4897 * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
4898 * details from inside the zone.
4901 write_index_file(zoneid_t zoneid
)
4905 struct zoneent
*zep
;
4910 char uuidstr
[UUID_PRINTABLE_STRING_LENGTH
];
4912 /* Locate the zone entry in the global zone's index file */
4913 if ((zef
= setzoneent()) == NULL
)
4915 while ((zep
= getzoneent_private(zef
)) != NULL
) {
4916 if (strcmp(zep
->zone_name
, zone_name
) == 0)
4924 if ((tmpl_fd
= init_template()) == -1) {
4929 if ((child
= fork()) == -1) {
4930 (void) ct_tmpl_clear(tmpl_fd
);
4931 (void) close(tmpl_fd
);
4936 /* parent waits for child to finish */
4939 if (contract_latest(&ct
) == -1)
4941 (void) ct_tmpl_clear(tmpl_fd
);
4942 (void) close(tmpl_fd
);
4943 (void) waitpid(child
, NULL
, 0);
4944 (void) contract_abandon_id(ct
);
4948 /* child enters zone and sets up index file */
4949 (void) ct_tmpl_clear(tmpl_fd
);
4950 if (zone_enter(zoneid
) != -1) {
4951 (void) mkdir(ZONE_CONFIG_ROOT
, ZONE_CONFIG_MODE
);
4952 (void) chown(ZONE_CONFIG_ROOT
, ZONE_CONFIG_UID
,
4954 fd
= open(ZONE_INDEX_FILE
, O_WRONLY
|O_CREAT
|O_TRUNC
,
4956 if (fd
!= -1 && (zet
= fdopen(fd
, "w")) != NULL
) {
4957 (void) fchown(fd
, ZONE_INDEX_UID
, ZONE_INDEX_GID
);
4958 if (uuid_is_null(zep
->zone_uuid
))
4961 uuid_unparse(zep
->zone_uuid
, uuidstr
);
4962 (void) fprintf(zet
, "%s:%s:/:%s\n", zep
->zone_name
,
4963 zone_state_str(zep
->zone_state
),
4972 vplat_bringup(zlog_t
*zlogp
, zone_mnt_t mount_cmd
, zoneid_t zoneid
)
4974 char zonepath
[MAXPATHLEN
];
4976 if (mount_cmd
== Z_MNT_BOOT
&& validate_datasets(zlogp
) != 0) {
4977 lofs_discard_mnttab();
4982 * Before we try to mount filesystems we need to create the
4983 * attribute backing store for /dev
4985 if (zone_get_zonepath(zone_name
, zonepath
, sizeof (zonepath
)) != Z_OK
) {
4986 lofs_discard_mnttab();
4989 resolve_lofs(zlogp
, zonepath
, sizeof (zonepath
));
4991 /* Make /dev directory owned by root, grouped sys */
4992 if (make_one_dir(zlogp
, zonepath
, "/dev", DEFAULT_DIR_MODE
,
4994 lofs_discard_mnttab();
4998 if (mount_filesystems(zlogp
, mount_cmd
) != 0) {
4999 lofs_discard_mnttab();
5003 if (mount_cmd
== Z_MNT_BOOT
) {
5004 zone_iptype_t iptype
;
5006 if (vplat_get_iptype(zlogp
, &iptype
) < 0) {
5007 zerror(zlogp
, B_TRUE
, "unable to determine ip-type");
5008 lofs_discard_mnttab();
5014 /* Always do this to make lo0 get configured */
5015 if (configure_shared_network_interfaces(zlogp
) != 0) {
5016 lofs_discard_mnttab();
5021 if (configure_exclusive_network_interfaces(zlogp
,
5024 lofs_discard_mnttab();
5031 write_index_file(zoneid
);
5033 lofs_discard_mnttab();
5038 lu_root_teardown(zlog_t
*zlogp
)
5040 char zroot
[MAXPATHLEN
];
5042 if (zone_get_rootpath(zone_name
, zroot
, sizeof (zroot
)) != Z_OK
) {
5043 zerror(zlogp
, B_FALSE
, "unable to determine zone root");
5046 root_to_lu(zlogp
, zroot
, sizeof (zroot
), B_FALSE
);
5049 * At this point, the processes are gone, the filesystems (save the
5050 * root) are unmounted, and the zone is on death row. But there may
5051 * still be creds floating about in the system that reference the
5052 * zone_t, and which pin down zone_rootvp causing this call to fail
5053 * with EBUSY. Thus, we try for a little while before just giving up.
5054 * (How I wish this were not true, and umount2 just did the right
5055 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
5057 if (umount2(zroot
, MS_FORCE
) != 0) {
5058 if (errno
== ENOTSUP
&& umount2(zroot
, 0) == 0)
5060 if (errno
== EBUSY
) {
5063 while (--tries
>= 0) {
5065 if (umount2(zroot
, 0) == 0)
5071 zerror(zlogp
, B_TRUE
, "unable to unmount '%s'", zroot
);
5077 * Only zones in an alternate root environment have scratch zone
5080 if (zonecfg_in_alt_root()) {
5084 if ((fp
= zonecfg_open_scratch("", B_FALSE
)) == NULL
) {
5085 zerror(zlogp
, B_TRUE
, "cannot open mapfile");
5089 if (zonecfg_lock_scratch(fp
) != 0)
5090 zerror(zlogp
, B_TRUE
, "cannot lock mapfile");
5091 else if (zonecfg_delete_scratch(fp
, kernzone
) != 0)
5092 zerror(zlogp
, B_TRUE
, "cannot delete map entry");
5095 zonecfg_close_scratch(fp
);
5103 vplat_teardown(zlog_t
*zlogp
, boolean_t unmount_cmd
, boolean_t rebooting
)
5109 char zpath
[MAXPATHLEN
];
5110 char cmdbuf
[MAXPATHLEN
];
5111 brand_handle_t bh
= NULL
;
5112 dladm_status_t status
;
5113 char errmsg
[DLADM_STRSIZE
];
5117 if (zonecfg_in_alt_root()) {
5120 if ((fp
= zonecfg_open_scratch("", B_FALSE
)) == NULL
) {
5121 zerror(zlogp
, B_TRUE
, "unable to open map file");
5124 if (zonecfg_find_scratch(fp
, zone_name
, zonecfg_get_root(),
5125 kernzone
, sizeof (kernzone
)) != 0) {
5126 zerror(zlogp
, B_FALSE
, "unable to find scratch zone");
5127 zonecfg_close_scratch(fp
);
5130 zonecfg_close_scratch(fp
);
5134 if ((zoneid
= getzoneidbyname(kzone
)) == ZONE_ID_UNDEFINED
) {
5135 if (!bringup_failure_recovery
)
5136 zerror(zlogp
, B_TRUE
, "unable to get zoneid");
5138 (void) lu_root_teardown(zlogp
);
5142 if (remove_datalink_pool(zlogp
, zoneid
) != 0) {
5143 zerror(zlogp
, B_FALSE
, "unable clear datalink pool property");
5147 if (remove_datalink_protect(zlogp
, zoneid
) != 0) {
5148 zerror(zlogp
, B_FALSE
,
5149 "unable clear datalink protect property");
5154 * The datalinks assigned to the zone will be removed from the NGZ as
5155 * part of zone_shutdown() so that we need to remove protect/pool etc.
5156 * before zone_shutdown(). Even if the shutdown itself fails, the zone
5157 * will not be able to violate any constraints applied because the
5158 * datalinks are no longer available to the zone.
5160 if (zone_shutdown(zoneid
) != 0) {
5161 zerror(zlogp
, B_TRUE
, "unable to shutdown zone");
5165 /* Get the zonepath of this zone */
5166 if (zone_get_zonepath(zone_name
, zpath
, sizeof (zpath
)) != Z_OK
) {
5167 zerror(zlogp
, B_FALSE
, "unable to determine zone path");
5171 /* Get a handle to the brand info for this zone */
5172 if ((bh
= brand_open(brand_name
)) == NULL
) {
5173 zerror(zlogp
, B_FALSE
, "unable to determine zone brand");
5177 * If there is a brand 'halt' callback, execute it now to give the
5178 * brand a chance to cleanup any custom configuration.
5180 (void) strcpy(cmdbuf
, EXEC_PREFIX
);
5181 if (brand_get_halt(bh
, zone_name
, zpath
, cmdbuf
+ EXEC_LEN
,
5182 sizeof (cmdbuf
) - EXEC_LEN
) < 0) {
5184 zerror(zlogp
, B_FALSE
, "unable to determine branded zone's "
5190 if ((strlen(cmdbuf
) > EXEC_LEN
) &&
5191 (do_subproc(zlogp
, cmdbuf
, NULL
) != Z_OK
)) {
5192 zerror(zlogp
, B_FALSE
, "%s failed", cmdbuf
);
5197 zone_iptype_t iptype
;
5199 if (zone_getattr(zoneid
, ZONE_ATTR_FLAGS
, &flags
,
5200 sizeof (flags
)) < 0) {
5201 if (vplat_get_iptype(zlogp
, &iptype
) < 0) {
5202 zerror(zlogp
, B_TRUE
, "unable to determine "
5207 if (flags
& ZF_NET_EXCL
)
5208 iptype
= ZS_EXCLUSIVE
;
5215 if (unconfigure_shared_network_interfaces(zlogp
,
5217 zerror(zlogp
, B_FALSE
, "unable to unconfigure "
5218 "network interfaces in zone");
5223 if (unconfigure_exclusive_network_interfaces(zlogp
,
5225 zerror(zlogp
, B_FALSE
, "unable to unconfigure "
5226 "network interfaces in zone");
5229 status
= dladm_zone_halt(dld_handle
, zoneid
);
5230 if (status
!= DLADM_STATUS_OK
) {
5231 zerror(zlogp
, B_FALSE
, "unable to notify "
5232 "dlmgmtd of zone halt: %s",
5233 dladm_status2str(status
, errmsg
));
5239 if (!unmount_cmd
&& tcp_abort_connections(zlogp
, zoneid
) != 0) {
5240 zerror(zlogp
, B_TRUE
, "unable to abort TCP connections");
5244 if (unmount_filesystems(zlogp
, zoneid
, unmount_cmd
) != 0) {
5245 zerror(zlogp
, B_FALSE
,
5246 "unable to unmount file systems in zone");
5251 * If we are rebooting then we normally don't want to destroy an
5252 * existing temporary pool at this point so that we can just reuse it
5253 * when the zone boots back up. However, it is also possible we were
5254 * running with a temporary pool and the zone configuration has been
5255 * modified to no longer use a temporary pool. In that case we need
5256 * to destroy the temporary pool now. This case looks like the case
5257 * where we never had a temporary pool configured but
5258 * zonecfg_destroy_tmp_pool will do the right thing either way.
5261 boolean_t destroy_tmp_pool
= B_TRUE
;
5264 struct zone_psettab pset_tab
;
5265 zone_dochandle_t handle
;
5267 if ((handle
= zonecfg_init_handle()) != NULL
&&
5268 zonecfg_get_handle(zone_name
, handle
) == Z_OK
&&
5269 zonecfg_lookup_pset(handle
, &pset_tab
) == Z_OK
)
5270 destroy_tmp_pool
= B_FALSE
;
5272 zonecfg_fini_handle(handle
);
5275 if (destroy_tmp_pool
) {
5276 if ((res
= zonecfg_destroy_tmp_pool(zone_name
, pool_err
,
5277 sizeof (pool_err
))) != Z_OK
) {
5279 zerror(zlogp
, B_FALSE
, pool_err
);
5284 remove_mlps(zlogp
, zoneid
);
5286 if (zone_destroy(zoneid
) != 0) {
5287 zerror(zlogp
, B_TRUE
, "unable to destroy zone");
5292 * Special teardown for alternate boot environments: remove the tmpfs
5293 * root for the zone and then remove it from the map file.
5295 if (unmount_cmd
&& lu_root_teardown(zlogp
) != 0)
5298 lofs_discard_mnttab();
5302 lofs_discard_mnttab();