2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
11 * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
41 #include "opt_inet6.h"
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
48 #include <sys/sysmsg.h>
49 #include <sys/malloc.h>
50 #include <sys/nlookup.h>
51 #include <sys/namecache.h>
55 #include <sys/socket.h>
56 #include <sys/sysctl.h>
57 #include <sys/kern_syscall.h>
59 #include <netinet/in.h>
60 #include <netinet6/in6_var.h>
62 static struct prison
*prison_find(int);
63 static void prison_ipcache_init(struct prison
*);
65 __read_mostly
static prison_cap_t prison_default_caps
;
67 MALLOC_DEFINE(M_PRISON
, "prison", "Prison structures");
69 SYSCTL_NODE(, OID_AUTO
, jail
, CTLFLAG_RW
, 0,
70 "All jails settings");
72 SYSCTL_NODE(_jail
, OID_AUTO
, defaults
, CTLFLAG_RW
, 0,
73 "Default options for jails");
75 /*#define PRISON_DEBUG*/
77 __read_mostly
static int prison_debug
;
78 SYSCTL_INT(_jail
, OID_AUTO
, debug
, CTLFLAG_RW
, &prison_debug
, 0,
82 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, set_hostname_allowed
, CTLFLAG_RW
,
83 &prison_default_caps
, 1, PRISON_CAP_SYS_SET_HOSTNAME
,
84 "Processes in jail can set their hostnames");
86 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, socket_unixiproute_only
, CTLFLAG_RW
,
87 &prison_default_caps
, 0, PRISON_CAP_NET_UNIXIPROUTE
,
88 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
90 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, sysvipc_allowed
, CTLFLAG_RW
,
91 &prison_default_caps
, 0, PRISON_CAP_SYS_SYSVIPC
,
92 "Processes in jail can use System V IPC primitives");
94 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, chflags_allowed
, CTLFLAG_RW
,
95 &prison_default_caps
, 0, PRISON_CAP_VFS_CHFLAGS
,
96 "Processes in jail can alter system file flags");
98 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, allow_raw_sockets
, CTLFLAG_RW
,
99 &prison_default_caps
, 0, PRISON_CAP_NET_RAW_SOCKETS
,
100 "Process in jail can create raw sockets");
102 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, allow_listen_override
, CTLFLAG_RW
,
103 &prison_default_caps
, 0, PRISON_CAP_NET_LISTEN_OVERRIDE
,
104 "Process in jail can override host wildcard listen");
106 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, vfs_mount_nullfs
, CTLFLAG_RW
,
107 &prison_default_caps
, 0, PRISON_CAP_VFS_MOUNT_NULLFS
,
108 "Process in jail can mount nullfs(5) filesystems");
110 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, vfs_mount_tmpfs
, CTLFLAG_RW
,
111 &prison_default_caps
, 0, PRISON_CAP_VFS_MOUNT_TMPFS
,
112 "Process in jail can mount tmpfs(5) filesystems");
114 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, vfs_mount_devfs
, CTLFLAG_RW
,
115 &prison_default_caps
, 0, PRISON_CAP_VFS_MOUNT_DEVFS
,
116 "Process in jail can mount devfs(5) filesystems");
118 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, vfs_mount_procfs
, CTLFLAG_RW
,
119 &prison_default_caps
, 0, PRISON_CAP_VFS_MOUNT_PROCFS
,
120 "Process in jail can mount procfs(5) filesystems");
122 SYSCTL_BIT64(_jail_defaults
, OID_AUTO
, vfs_mount_fusefs
, CTLFLAG_RW
,
123 &prison_default_caps
, 0, PRISON_CAP_VFS_MOUNT_FUSEFS
,
124 "Process in jail can mount fuse filesystems");
126 static int lastprid
= 0;
127 static int prisoncount
= 0;
129 static struct lock jail_lock
=
130 LOCK_INITIALIZER("jail", 0, LK_CANRECURSE
);
132 LIST_HEAD(prisonlist
, prison
);
133 static struct prisonlist allprison
= LIST_HEAD_INITIALIZER(&allprison
);
136 kern_jail_attach(int jid
)
138 struct proc
*p
= curthread
->td_proc
;
143 pr
= prison_find(jid
);
147 error
= kern_chroot(&pr
->pr_root
);
152 lwkt_gettoken(&p
->p_token
);
155 p
->p_flags
|= P_JAILED
;
156 caps_set_locked(p
, SYSCAP_RESTRICTEDROOT
, __SYSCAP_ALL
);
157 lwkt_reltoken(&p
->p_token
);
163 assign_prison_id(struct prison
*pr
)
168 tryprid
= lastprid
+ 1;
169 if (tryprid
== JAIL_MAX
)
172 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
174 LIST_FOREACH(tpr
, &allprison
, pr_list
) {
175 if (tpr
->pr_id
!= tryprid
)
178 if (tryprid
== JAIL_MAX
) {
179 lockmgr(&jail_lock
, LK_RELEASE
);
184 pr
->pr_id
= lastprid
= tryprid
;
185 lockmgr(&jail_lock
, LK_RELEASE
);
191 kern_jail(struct prison
*pr
, struct jail
*j
)
194 struct nlookupdata nd
;
196 error
= nlookup_init(&nd
, j
->path
, UIO_USERSPACE
, NLC_FOLLOW
);
201 error
= nlookup(&nd
);
206 cache_copy(&nd
.nl_nch
, &pr
->pr_root
);
208 varsymset_init(&pr
->pr_varsymset
, NULL
);
209 prison_ipcache_init(pr
);
211 error
= assign_prison_id(pr
);
213 varsymset_clean(&pr
->pr_varsymset
);
218 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
219 LIST_INSERT_HEAD(&allprison
, pr
, pr_list
);
221 lockmgr(&jail_lock
, LK_RELEASE
);
223 error
= prison_sysctl_create(pr
);
227 error
= kern_jail_attach(pr
->pr_id
);
235 prison_sysctl_done(pr
);
238 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
239 LIST_REMOVE(pr
, pr_list
);
241 lockmgr(&jail_lock
, LK_RELEASE
);
242 varsymset_clean(&pr
->pr_varsymset
);
250 * jail_args(syscallarg(struct jail *) jail)
255 sys_jail(struct sysmsg
*sysmsg
, const struct jail_args
*uap
)
258 struct jail_ip_storage
*jip
;
263 sysmsg
->sysmsg_result
= -1;
265 error
= caps_priv_check_self(SYSCAP_NOJAIL_CREATE
);
269 error
= copyin(uap
->jail
, &jversion
, sizeof(jversion
));
273 pr
= kmalloc(sizeof(*pr
), M_PRISON
, M_WAITOK
| M_ZERO
);
274 SLIST_INIT(&pr
->pr_ips
);
275 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
279 /* Single IPv4 jails. */
282 struct sockaddr_in ip4addr
;
284 error
= copyin(uap
->jail
, &jv0
, sizeof(jv0
));
289 j
.hostname
= jv0
.hostname
;
291 jip
= kmalloc(sizeof(*jip
), M_PRISON
, M_WAITOK
| M_ZERO
);
292 ip4addr
.sin_family
= AF_INET
;
293 ip4addr
.sin_addr
.s_addr
= htonl(jv0
.ip_number
);
294 memcpy(&jip
->ip
, &ip4addr
, sizeof(ip4addr
));
295 SLIST_INSERT_HEAD(&pr
->pr_ips
, jip
, entries
);
301 * DragonFly multi noIP/IPv4/IPv6 jails
303 * NOTE: This version is unsupported by FreeBSD
304 * (which uses version 2 instead).
307 error
= copyin(uap
->jail
, &j
, sizeof(j
));
311 for (int i
= 0; i
< j
.n_ips
; i
++) {
312 jip
= kmalloc(sizeof(*jip
), M_PRISON
,
314 SLIST_INSERT_HEAD(&pr
->pr_ips
, jip
, entries
);
315 error
= copyin(&j
.ips
[i
], &jip
->ip
,
316 sizeof(struct sockaddr_storage
));
326 error
= copyinstr(j
.hostname
, &pr
->pr_host
, sizeof(pr
->pr_host
), 0);
330 /* Use default capabilities as a template */
331 pr
->pr_caps
= prison_default_caps
;
333 error
= kern_jail(pr
, &j
);
337 sysmsg
->sysmsg_result
= pr
->pr_id
;
338 lockmgr(&jail_lock
, LK_RELEASE
);
344 while (!SLIST_EMPTY(&pr
->pr_ips
)) {
345 jip
= SLIST_FIRST(&pr
->pr_ips
);
346 SLIST_REMOVE_HEAD(&pr
->pr_ips
, entries
);
347 kfree(jip
, M_PRISON
);
349 lockmgr(&jail_lock
, LK_RELEASE
);
356 * int jail_attach(int jid);
361 sys_jail_attach(struct sysmsg
*sysmsg
, const struct jail_attach_args
*uap
)
365 error
= caps_priv_check_self(SYSCAP_NOJAIL_ATTACH
);
368 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
369 error
= kern_jail_attach(uap
->jid
);
370 lockmgr(&jail_lock
, LK_RELEASE
);
375 prison_ipcache_init(struct prison
*pr
)
377 struct jail_ip_storage
*jis
;
378 struct sockaddr_in
*ip4
;
379 struct sockaddr_in6
*ip6
;
381 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
382 SLIST_FOREACH(jis
, &pr
->pr_ips
, entries
) {
383 switch (jis
->ip
.ss_family
) {
385 ip4
= (struct sockaddr_in
*)&jis
->ip
;
386 if ((ntohl(ip4
->sin_addr
.s_addr
) >> IN_CLASSA_NSHIFT
) ==
388 /* loopback address */
389 if (pr
->local_ip4
== NULL
)
393 if (pr
->nonlocal_ip4
== NULL
)
394 pr
->nonlocal_ip4
= ip4
;
399 ip6
= (struct sockaddr_in6
*)&jis
->ip
;
400 if (IN6_IS_ADDR_LOOPBACK(&ip6
->sin6_addr
)) {
401 /* loopback address */
402 if (pr
->local_ip6
== NULL
)
406 if (pr
->nonlocal_ip6
== NULL
)
407 pr
->nonlocal_ip6
= ip6
;
412 lockmgr(&jail_lock
, LK_RELEASE
);
416 * Changes INADDR_LOOPBACK for a valid jail address.
417 * ip is in network byte order.
418 * Returns 1 if the ip is among jail valid ips.
419 * Returns 0 if is not among jail valid ips or
420 * if couldn't replace INADDR_LOOPBACK for a valid
424 prison_replace_wildcards(struct thread
*td
, struct sockaddr
*ip
)
426 struct sockaddr_in
*ip4
= (struct sockaddr_in
*)ip
;
427 struct sockaddr_in6
*ip6
= (struct sockaddr_in6
*)ip
;
430 if (td
->td_proc
== NULL
|| td
->td_ucred
== NULL
)
432 if ((pr
= td
->td_ucred
->cr_prison
) == NULL
)
435 if ((ip
->sa_family
== AF_INET
&&
436 ip4
->sin_addr
.s_addr
== htonl(INADDR_ANY
)) ||
437 (ip
->sa_family
== AF_INET6
&&
438 IN6_IS_ADDR_UNSPECIFIED(&ip6
->sin6_addr
)))
440 if ((ip
->sa_family
== AF_INET
&&
441 ip4
->sin_addr
.s_addr
== htonl(INADDR_LOOPBACK
)) ||
442 (ip
->sa_family
== AF_INET6
&&
443 IN6_IS_ADDR_LOOPBACK(&ip6
->sin6_addr
))) {
444 if (!prison_get_local(pr
, ip
->sa_family
, ip
) &&
445 !prison_get_nonlocal(pr
, ip
->sa_family
, ip
))
450 if (jailed_ip(pr
, ip
))
456 * Convert the localhost IP to the actual jail IP
459 prison_remote_ip(struct thread
*td
, struct sockaddr
*ip
)
461 struct sockaddr_in
*ip4
= (struct sockaddr_in
*)ip
;
462 struct sockaddr_in6
*ip6
= (struct sockaddr_in6
*)ip
;
465 if (td
== NULL
|| td
->td_proc
== NULL
|| td
->td_ucred
== NULL
)
467 if ((pr
= td
->td_ucred
->cr_prison
) == NULL
)
469 if ((ip
->sa_family
== AF_INET
&&
470 ip4
->sin_addr
.s_addr
== htonl(INADDR_LOOPBACK
)) ||
471 (ip
->sa_family
== AF_INET6
&&
472 IN6_IS_ADDR_LOOPBACK(&ip6
->sin6_addr
))) {
473 if (!prison_get_local(pr
, ip
->sa_family
, ip
) &&
474 !prison_get_nonlocal(pr
, ip
->sa_family
, ip
))
483 * Convert the jail IP back to localhost
485 * Used by getsockname() and getpeername() to convert the in-jail loopback
486 * address back to LOCALHOST. For example, 127.0.0.2 -> 127.0.0.1. The
487 * idea is that programs running inside the jail should be unaware that they
488 * are using a different loopback IP than the host.
490 __read_mostly
static struct in6_addr sin6_localhost
= IN6ADDR_LOOPBACK_INIT
;
493 prison_local_ip(struct thread
*td
, struct sockaddr
*ip
)
495 struct sockaddr_in
*ip4
= (struct sockaddr_in
*)ip
;
496 struct sockaddr_in6
*ip6
= (struct sockaddr_in6
*)ip
;
499 if (td
== NULL
|| td
->td_proc
== NULL
|| td
->td_ucred
== NULL
)
501 if ((pr
= td
->td_ucred
->cr_prison
) == NULL
)
503 if (ip
->sa_family
== AF_INET
&& pr
->local_ip4
&&
504 pr
->local_ip4
->sin_addr
.s_addr
== ip4
->sin_addr
.s_addr
&&
505 pr
->local_ip4
->sin_addr
.s_addr
!= htonl(INADDR_LOOPBACK
)) {
506 ip4
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
509 if (ip
->sa_family
== AF_INET6
&& pr
->local_ip6
&&
510 bcmp(&pr
->local_ip6
->sin6_addr
, &ip6
->sin6_addr
,
511 sizeof(ip6
->sin6_addr
)) == 0) {
512 bcopy(&sin6_localhost
, &ip6
->sin6_addr
, sizeof(ip6
->sin6_addr
));
519 * Prison get non loopback ip:
520 * - af is the address family of the ip we want (AF_INET|AF_INET6).
521 * - If ip != NULL, put the first IP address that is not a loopback address
524 * ip is in network by order and we don't touch it unless we find a valid ip.
525 * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
526 * or NULL. This struct may not be modified.
529 prison_get_nonlocal(struct prison
*pr
, sa_family_t af
, struct sockaddr
*ip
)
531 struct sockaddr_in
*ip4
= (struct sockaddr_in
*)ip
;
532 struct sockaddr_in6
*ip6
= (struct sockaddr_in6
*)ip
;
534 /* Check if it is cached */
537 if (ip4
!= NULL
&& pr
->nonlocal_ip4
!= NULL
)
538 ip4
->sin_addr
.s_addr
= pr
->nonlocal_ip4
->sin_addr
.s_addr
;
539 return (struct sockaddr
*)pr
->nonlocal_ip4
;
542 if (ip6
!= NULL
&& pr
->nonlocal_ip6
!= NULL
)
543 ip6
->sin6_addr
= pr
->nonlocal_ip6
->sin6_addr
;
544 return (struct sockaddr
*)pr
->nonlocal_ip6
;
552 * Prison get loopback ip.
553 * - af is the address family of the ip we want (AF_INET|AF_INET6).
554 * - If ip != NULL, put the first IP address that is not a loopback address
557 * ip is in network by order and we don't touch it unless we find a valid ip.
558 * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
559 * or NULL. This struct may not be modified.
562 prison_get_local(struct prison
*pr
, sa_family_t af
, struct sockaddr
*ip
)
564 struct sockaddr_in
*ip4
= (struct sockaddr_in
*)ip
;
565 struct sockaddr_in6
*ip6
= (struct sockaddr_in6
*)ip
;
567 /* Check if it is cached */
570 if (ip4
!= NULL
&& pr
->local_ip4
!= NULL
)
571 ip4
->sin_addr
.s_addr
= pr
->local_ip4
->sin_addr
.s_addr
;
572 return (struct sockaddr
*)pr
->local_ip4
;
575 if (ip6
!= NULL
&& pr
->local_ip6
!= NULL
)
576 ip6
->sin6_addr
= pr
->local_ip6
->sin6_addr
;
577 return (struct sockaddr
*)pr
->local_ip6
;
584 /* Check if the IP is among ours, if it is return 1, else 0 */
586 jailed_ip(struct prison
*pr
, const struct sockaddr
*ip
)
588 const struct jail_ip_storage
*jis
;
589 const struct sockaddr_in
*jip4
, *ip4
;
590 const struct sockaddr_in6
*jip6
, *ip6
;
594 ip4
= (const struct sockaddr_in
*)ip
;
595 ip6
= (const struct sockaddr_in6
*)ip
;
597 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
598 SLIST_FOREACH(jis
, &pr
->pr_ips
, entries
) {
599 switch (ip
->sa_family
) {
601 jip4
= (const struct sockaddr_in
*) &jis
->ip
;
602 if (jip4
->sin_family
== AF_INET
&&
603 ip4
->sin_addr
.s_addr
== jip4
->sin_addr
.s_addr
) {
604 lockmgr(&jail_lock
, LK_RELEASE
);
609 jip6
= (const struct sockaddr_in6
*) &jis
->ip
;
610 if (jip6
->sin6_family
== AF_INET6
&&
611 IN6_ARE_ADDR_EQUAL(&ip6
->sin6_addr
,
613 lockmgr(&jail_lock
, LK_RELEASE
);
619 lockmgr(&jail_lock
, LK_RELEASE
);
625 prison_if(struct ucred
*cred
, struct sockaddr
*sa
)
628 struct sockaddr_in
*sai
= (struct sockaddr_in
*) sa
;
630 pr
= cred
->cr_prison
;
632 if (((sai
->sin_family
!= AF_INET
) && (sai
->sin_family
!= AF_INET6
))
633 && PRISON_CAP_ISSET(pr
->pr_caps
, PRISON_CAP_NET_UNIXIPROUTE
))
635 else if ((sai
->sin_family
!= AF_INET
) && (sai
->sin_family
!= AF_INET6
))
637 else if (jailed_ip(pr
, sa
))
643 * Returns a prison instance, or NULL on failure.
645 static struct prison
*
646 prison_find(int prid
)
650 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
651 LIST_FOREACH(pr
, &allprison
, pr_list
) {
652 if (pr
->pr_id
== prid
)
655 lockmgr(&jail_lock
, LK_RELEASE
);
661 sysctl_jail_list(SYSCTL_HANDLER_ARGS
)
663 struct thread
*td
= curthread
;
664 struct jail_ip_storage
*jip
;
666 struct sockaddr_in6
*jsin6
;
668 struct sockaddr_in
*jsin
;
671 unsigned int jlssize
, jlsused
;
673 char *jls
; /* Jail list */
674 char *oip
; /* Output ip */
675 char *fullpath
, *freepath
;
679 if (jailed(td
->td_ucred
))
688 jlssize
= (count
* 1024);
689 jls
= kmalloc(jlssize
+ 1, M_TEMP
, M_WAITOK
| M_ZERO
);
690 if (count
< prisoncount
) {
696 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
697 LIST_FOREACH(pr
, &allprison
, pr_list
) {
698 error
= cache_fullpath(lp
->lwp_proc
, &pr
->pr_root
, NULL
,
699 &fullpath
, &freepath
, 0);
702 if (jlsused
&& jlsused
< jlssize
)
703 jls
[jlsused
++] = '\n';
704 count
= ksnprintf(jls
+ jlsused
, (jlssize
- jlsused
),
706 pr
->pr_id
, pr
->pr_host
, fullpath
);
707 kfree(freepath
, M_TEMP
);
713 SLIST_FOREACH(jip
, &pr
->pr_ips
, entries
) {
714 char buf
[INET_ADDRSTRLEN
];
716 jsin
= (struct sockaddr_in
*)&jip
->ip
;
718 switch(jsin
->sin_family
) {
720 oip
= kinet_ntoa(jsin
->sin_addr
, buf
);
724 jsin6
= (struct sockaddr_in6
*)&jip
->ip
;
725 oip
= ip6_sprintf(&jsin6
->sin6_addr
);
733 if ((jlssize
- jlsused
) < (strlen(oip
) + 1)) {
737 count
= ksnprintf(jls
+ jlsused
, (jlssize
- jlsused
),
747 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
749 error
= SYSCTL_OUT(req
, jls
, jlsused
);
751 lockmgr(&jail_lock
, LK_RELEASE
);
757 SYSCTL_OID(_jail
, OID_AUTO
, list
, CTLTYPE_STRING
| CTLFLAG_RD
, NULL
, 0,
758 sysctl_jail_list
, "A", "List of active jails");
761 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS
)
765 injail
= jailed(req
->td
->td_ucred
);
766 error
= SYSCTL_OUT(req
, &injail
, sizeof(injail
));
771 SYSCTL_PROC(_jail
, OID_AUTO
, jailed
,
772 CTLTYPE_INT
| CTLFLAG_RD
| CTLFLAG_NOLOCK
, NULL
, 0,
773 sysctl_jail_jailed
, "I", "Process in jail?");
779 prison_hold(struct prison
*pr
)
781 atomic_add_int(&pr
->pr_ref
, 1);
783 if (prison_debug
> 0) {
794 prison_free(struct prison
*pr
)
796 struct jail_ip_storage
*jls
;
799 if (prison_debug
> 0) {
804 KKASSERT(pr
->pr_ref
> 0);
805 if (atomic_fetchadd_int(&pr
->pr_ref
, -1) != 1)
809 * The global jail lock is needed on the last ref to adjust
812 lockmgr(&jail_lock
, LK_EXCLUSIVE
);
814 lockmgr(&jail_lock
, LK_RELEASE
);
817 LIST_REMOVE(pr
, pr_list
);
823 while (!SLIST_EMPTY(&pr
->pr_ips
)) {
824 jls
= SLIST_FIRST(&pr
->pr_ips
);
825 SLIST_REMOVE_HEAD(&pr
->pr_ips
, entries
);
826 kfree(jls
, M_PRISON
);
828 lockmgr(&jail_lock
, LK_RELEASE
);
830 if (pr
->pr_linux
!= NULL
)
831 kfree(pr
->pr_linux
, M_PRISON
);
832 varsymset_clean(&pr
->pr_varsymset
);
834 /* Release the sysctl tree */
835 prison_sysctl_done(pr
);
837 cache_drop(&pr
->pr_root
);
842 * Check if permisson for a specific privilege is granted within jail.
847 prison_priv_check(struct ucred
*cred
, int cap
)
849 struct prison
*pr
= cred
->cr_prison
;
854 switch (cap
& ~__SYSCAP_XFLAGS
) {
855 case SYSCAP_RESTRICTEDROOT
: /* meta group 1 */
856 /* RESTRICTEDROOT fallbacks disallowed in jails */
858 case SYSCAP_SENSITIVEROOT
: /* meta group 2 */
859 case SYSCAP_NOEXEC
: /* meta group 3 */
860 case SYSCAP_NOCRED
: /* meta group 4 */
862 case SYSCAP_NOJAIL
: /* meta group 5 */
863 /* all jail ops disallowed in jails */
865 case SYSCAP_NONET
: /* meta group 6 */
867 case SYSCAP_NONET_SENSITIVE
: /* meta group 7 */
868 /* all sensitive network ops disallowed in jails */
870 case SYSCAP_NOVFS
: /* meta group 8 */
871 case SYSCAP_NOVFS_SENSITIVE
: /* meta group 9 */
872 case SYSCAP_NOMOUNT
: /* meta group 10 */
873 case SYSCAP_NO11
: /* meta group 11 */
874 case SYSCAP_NO12
: /* meta group 12 */
875 case SYSCAP_NO13
: /* meta group 13 */
876 case SYSCAP_NO14
: /* meta group 14 */
877 case SYSCAP_NO15
: /* meta group 15 */
880 /* ----- */ /* group 1 - disallowed */
882 case SYSCAP_NOPROC_TRESPASS
: /* group 2 allowed */
883 case SYSCAP_NOPROC_SETLOGIN
:
884 case SYSCAP_NOPROC_SETRLIMIT
:
885 case SYSCAP_NOSYSCTL_WR
:
886 case SYSCAP_NOVARSYM_SYS
:
887 case SYSCAP_NOSETHOSTNAME
:
888 case SYSCAP_NOQUOTA_WR
:
889 case SYSCAP_NODEBUG_UNPRIV
:
891 case SYSCAP_NOSCHED_CPUSET
:
892 case SYSCAP_NOSETTIME
:
895 case SYSCAP_NOEXEC_SUID
: /* group 3 allowed */
896 case SYSCAP_NOEXEC_SGID
:
899 case SYSCAP_NOCRED_SETUID
: /* group 4 allowed */
900 case SYSCAP_NOCRED_SETGID
:
901 case SYSCAP_NOCRED_SETEUID
:
902 case SYSCAP_NOCRED_SETEGID
:
903 case SYSCAP_NOCRED_SETREUID
:
904 case SYSCAP_NOCRED_SETREGID
:
905 case SYSCAP_NOCRED_SETRESUID
:
906 case SYSCAP_NOCRED_SETRESGID
:
907 case SYSCAP_NOCRED_SETGROUPS
:
910 case SYSCAP_NOJAIL_CREATE
: /* group 5 disallowed */
911 case SYSCAP_NOJAIL_ATTACH
:
914 case SYSCAP_NONET_RESPORT
: /* group 6 mostly allowed */
916 * Allow reserved ports
919 case SYSCAP_NONET_RAW
:
921 * Conditionally allow creating raw sockets in jail.
923 if (PRISON_CAP_ISSET(pr
->pr_caps
,
924 PRISON_CAP_NET_RAW_SOCKETS
))
929 /* ----- */ /* group 7 - disallowed */
931 case SYSCAP_NOVFS_SYSFLAGS
: /* group 8 - allowed */
932 case SYSCAP_NOVFS_CHOWN
:
933 case SYSCAP_NOVFS_CHMOD
:
934 case SYSCAP_NOVFS_LINK
:
935 case SYSCAP_NOVFS_CHFLAGS_DEV
:
936 case SYSCAP_NOVFS_SETATTR
:
937 case SYSCAP_NOVFS_SETGID
:
938 case SYSCAP_NOVFS_GENERATION
:
939 case SYSCAP_NOVFS_RETAINSUGID
:
942 case SYSCAP_NOVFS_MKNOD_BAD
: /* group 9 - allowed */
943 case SYSCAP_NOVFS_MKNOD_WHT
:
944 case SYSCAP_NOVFS_MKNOD_DIR
:
945 case SYSCAP_NOVFS_MKNOD_DEV
:
946 case SYSCAP_NOVFS_IOCTL
:
947 case SYSCAP_NOVFS_CHROOT
:
948 case SYSCAP_NOVFS_REVOKE
:
951 case SYSCAP_NOMOUNT_NULLFS
: /* group 10 - conditional */
952 if (PRISON_CAP_ISSET(pr
->pr_caps
, PRISON_CAP_VFS_MOUNT_NULLFS
))
956 case SYSCAP_NOMOUNT_DEVFS
:
957 if (PRISON_CAP_ISSET(pr
->pr_caps
, PRISON_CAP_VFS_MOUNT_DEVFS
))
961 case SYSCAP_NOMOUNT_TMPFS
:
962 if (PRISON_CAP_ISSET(pr
->pr_caps
, PRISON_CAP_VFS_MOUNT_TMPFS
))
966 case SYSCAP_NOMOUNT_PROCFS
:
967 if (PRISON_CAP_ISSET(pr
->pr_caps
, PRISON_CAP_VFS_MOUNT_PROCFS
))
971 case SYSCAP_NOMOUNT_FUSE
:
972 if (PRISON_CAP_ISSET(pr
->pr_caps
, PRISON_CAP_VFS_MOUNT_FUSEFS
))
976 case SYSCAP_NOMOUNT_UMOUNT
:
980 /* otherwise disallow */
987 * Create a per-jail sysctl tree to control the prison
990 prison_sysctl_create(struct prison
*pr
)
994 ksnprintf(id_str
, 6, "%d", pr
->pr_id
);
996 pr
->pr_sysctl_ctx
= (struct sysctl_ctx_list
*) kmalloc(
997 sizeof(struct sysctl_ctx_list
), M_PRISON
, M_WAITOK
| M_ZERO
);
999 sysctl_ctx_init(pr
->pr_sysctl_ctx
);
1001 /* Main jail node */
1002 pr
->pr_sysctl_tree
= SYSCTL_ADD_NODE(pr
->pr_sysctl_ctx
,
1003 SYSCTL_STATIC_CHILDREN(_jail
),
1004 OID_AUTO
, id_str
, CTLFLAG_RD
, 0,
1005 "Jail specific settings");
1007 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1008 OID_AUTO
, "sys_set_hostname", CTLFLAG_RW
,
1009 &pr
->pr_caps
, 0, PRISON_CAP_SYS_SET_HOSTNAME
,
1010 "Processes in jail can set their hostnames");
1012 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1013 OID_AUTO
, "sys_sysvipc", CTLFLAG_RW
,
1014 &pr
->pr_caps
, 0, PRISON_CAP_SYS_SYSVIPC
,
1015 "Processes in jail can use System V IPC primitives");
1017 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1018 OID_AUTO
, "net_unixiproute", CTLFLAG_RW
,
1019 &pr
->pr_caps
, 0, PRISON_CAP_NET_UNIXIPROUTE
,
1020 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
1022 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1023 OID_AUTO
, "net_raw_sockets", CTLFLAG_RW
,
1024 &pr
->pr_caps
, 0, PRISON_CAP_NET_RAW_SOCKETS
,
1025 "Process in jail can create raw sockets");
1027 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1028 OID_AUTO
, "allow_listen_override", CTLFLAG_RW
,
1029 &pr
->pr_caps
, 0, PRISON_CAP_NET_LISTEN_OVERRIDE
,
1030 "Process in jail can create raw sockets");
1032 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1033 OID_AUTO
, "vfs_chflags", CTLFLAG_RW
,
1034 &pr
->pr_caps
, 0, PRISON_CAP_VFS_CHFLAGS
,
1035 "Process in jail can override host wildcard listen");
1037 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1038 OID_AUTO
, "vfs_mount_nullfs", CTLFLAG_RW
,
1039 &pr
->pr_caps
, 0, PRISON_CAP_VFS_MOUNT_NULLFS
,
1040 "Processes in jail can mount nullfs(5) filesystems");
1042 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1043 OID_AUTO
, "vfs_mount_tmpfs", CTLFLAG_RW
,
1044 &pr
->pr_caps
, 0, PRISON_CAP_VFS_MOUNT_TMPFS
,
1045 "Processes in jail can mount tmpfs(5) filesystems");
1047 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1048 OID_AUTO
, "vfs_mount_devfs", CTLFLAG_RW
,
1049 &pr
->pr_caps
, 0, PRISON_CAP_VFS_MOUNT_DEVFS
,
1050 "Processes in jail can mount devfs(5) filesystems");
1052 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1053 OID_AUTO
, "vfs_mount_procfs", CTLFLAG_RW
,
1054 &pr
->pr_caps
, 0, PRISON_CAP_VFS_MOUNT_PROCFS
,
1055 "Processes in jail can mount procfs(5) filesystems");
1057 SYSCTL_ADD_BIT64(pr
->pr_sysctl_ctx
, SYSCTL_CHILDREN(pr
->pr_sysctl_tree
),
1058 OID_AUTO
, "vfs_mount_fusefs", CTLFLAG_RW
,
1059 &pr
->pr_caps
, 0, PRISON_CAP_VFS_MOUNT_FUSEFS
,
1060 "Processes in jail can mount fuse filesystems");
1066 prison_sysctl_done(struct prison
*pr
)
1068 if (pr
->pr_sysctl_tree
) {
1069 sysctl_ctx_free(pr
->pr_sysctl_ctx
);
1070 kfree(pr
->pr_sysctl_ctx
, M_PRISON
);
1071 pr
->pr_sysctl_tree
= NULL
;