Merge branch 'merges' of git://repo.or.cz/unleashed into merges
[unleashed.git] / usr / src / lib / libipadm / common / ipadm_ngz.c
blob66f066b939440bbd4921db0e936104c44724df02
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <priv_utils.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <strings.h>
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <unistd.h>
35 #include <zone.h>
36 #include <libipadm.h>
37 #include <libdladm.h>
38 #include <libdllink.h>
39 #include <net/route.h>
40 #include <netinet/in.h>
41 #include <net/route.h>
42 #include <errno.h>
43 #include <inet/ip.h>
44 #include <string.h>
45 #include <libinetutil.h>
46 #include <unistd.h>
47 #include <libipadm_impl.h>
48 #include <sys/brand.h>
50 #define ROUNDUP_LONG(a) \
51 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
52 #define HOST_MASK 0xffffffffU
54 typedef struct ngz_walk_data_s {
55 ipadm_handle_t ngz_iph;
56 zoneid_t ngz_zoneid;
57 char *ngz_ifname;
58 boolean_t ngz_s10c;
59 ipadm_status_t ngz_ipstatus;
60 persist_cb_t ngz_persist_if;
61 } ngz_walk_data_t;
64 * Tell the kernel to add, delete or change a route
66 static void
67 i_ipadm_rtioctl4(int rtsock,
68 int action, /* RTM_DELETE, etc */
69 in_addr_t dst,
70 in_addr_t gate,
71 uint_t masklen,
72 char *ifname,
73 uint8_t metric,
74 int flags)
76 static int rt_sock_seqno = 0;
77 struct {
78 struct rt_msghdr w_rtm;
79 struct sockaddr_in w_dst;
80 struct sockaddr_in w_gate;
81 uint8_t w_space[512];
82 } w;
83 struct sockaddr_in w_mask;
84 struct sockaddr_dl w_ifp;
85 uint8_t *cp;
86 long cc;
88 again:
89 (void) memset(&w, 0, sizeof (w));
90 (void) memset(&w_mask, 0, sizeof (w_mask));
91 (void) memset(&w_ifp, 0, sizeof (w_ifp));
92 cp = w.w_space;
93 w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
94 2 * ROUNDUP_LONG(sizeof (struct sockaddr_in));
95 w.w_rtm.rtm_version = RTM_VERSION;
96 w.w_rtm.rtm_type = action;
97 w.w_rtm.rtm_flags = (flags | RTF_ZONE);
98 w.w_rtm.rtm_seq = ++rt_sock_seqno;
99 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
100 if (metric != 0 || action == RTM_CHANGE) {
101 w.w_rtm.rtm_rmx.rmx_hopcount = metric;
102 w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
104 w.w_dst.sin_family = AF_INET;
105 w.w_dst.sin_addr.s_addr = dst;
106 w.w_gate.sin_family = AF_INET;
107 w.w_gate.sin_addr.s_addr = gate;
108 if (masklen == HOST_MASK) {
109 w.w_rtm.rtm_flags |= RTF_HOST;
110 } else {
111 struct sockaddr_storage m4;
113 w.w_rtm.rtm_addrs |= RTA_NETMASK;
114 w_mask.sin_family = AF_INET;
115 if (plen2mask(masklen, AF_INET, (struct sockaddr *)&m4) != 0) {
116 return;
118 w_mask.sin_addr = ((struct sockaddr_in *)&m4)->sin_addr;
119 (void) memmove(cp, &w_mask, sizeof (w_mask));
120 cp += ROUNDUP_LONG(sizeof (struct sockaddr_in));
121 w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in));
123 w_ifp.sdl_family = AF_LINK;
124 w.w_rtm.rtm_addrs |= RTA_IFP;
125 w_ifp.sdl_index = if_nametoindex(ifname);
126 (void) memmove(cp, &w_ifp, sizeof (w_ifp));
127 w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_dl));
129 cc = write(rtsock, &w, w.w_rtm.rtm_msglen);
130 if (cc < 0) {
131 if (errno == ESRCH && (action == RTM_CHANGE ||
132 action == RTM_DELETE)) {
133 if (action == RTM_CHANGE) {
134 action = RTM_ADD;
135 goto again;
137 return;
139 return;
140 } else if (cc != w.w_rtm.rtm_msglen) {
141 return;
145 static void
146 i_ipadm_rtioctl6(int rtsock,
147 int action, /* RTM_DELETE, etc */
148 in6_addr_t dst,
149 in6_addr_t gate,
150 uint_t prefix_length,
151 char *ifname,
152 int flags)
154 static int rt_sock_seqno = 0;
155 struct {
156 struct rt_msghdr w_rtm;
157 struct sockaddr_in6 w_dst;
158 struct sockaddr_in6 w_gate;
159 uint8_t w_space[512];
160 } w;
161 struct sockaddr_in6 w_mask;
162 struct sockaddr_dl w_ifp;
163 uint8_t *cp;
164 long cc;
166 again:
167 (void) memset(&w, 0, sizeof (w));
168 (void) memset(&w_mask, 0, sizeof (w_mask));
169 (void) memset(&w_ifp, 0, sizeof (w_ifp));
170 cp = w.w_space;
171 w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
172 2 * ROUNDUP_LONG(sizeof (struct sockaddr_in6));
173 w.w_rtm.rtm_version = RTM_VERSION;
174 w.w_rtm.rtm_type = action;
175 w.w_rtm.rtm_flags = (flags | RTF_ZONE);
176 w.w_rtm.rtm_seq = ++rt_sock_seqno;
177 w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
178 w.w_dst.sin6_family = AF_INET6;
179 w.w_dst.sin6_addr = dst;
180 w.w_gate.sin6_family = AF_INET6;
181 w.w_gate.sin6_addr = gate;
182 if (prefix_length == IPV6_ABITS) {
183 w.w_rtm.rtm_flags |= RTF_HOST;
184 } else {
185 struct sockaddr_storage m6;
187 w.w_rtm.rtm_addrs |= RTA_NETMASK;
188 w_mask.sin6_family = AF_INET6;
189 if (plen2mask(prefix_length, AF_INET6,
190 (struct sockaddr *)&m6) != 0) {
191 return;
193 w_mask.sin6_addr = ((struct sockaddr_in6 *)&m6)->sin6_addr;
194 (void) memmove(cp, &w_mask, sizeof (w_mask));
195 cp += ROUNDUP_LONG(sizeof (struct sockaddr_in6));
196 w.w_rtm.rtm_msglen +=
197 ROUNDUP_LONG(sizeof (struct sockaddr_in6));
199 w_ifp.sdl_family = AF_LINK;
200 w.w_rtm.rtm_addrs |= RTA_IFP;
201 w_ifp.sdl_index = if_nametoindex(ifname);
202 (void) memmove(cp, &w_ifp, sizeof (w_ifp));
203 w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_dl));
205 cc = write(rtsock, &w, w.w_rtm.rtm_msglen);
206 if (cc < 0) {
207 if (errno == ESRCH && (action == RTM_CHANGE ||
208 action == RTM_DELETE)) {
209 if (action == RTM_CHANGE) {
210 action = RTM_ADD;
211 goto again;
213 return;
215 return;
216 } else if (cc != w.w_rtm.rtm_msglen) {
217 return;
222 * Return TRUE if running in a Solaris 10 Container.
224 static boolean_t
225 i_ipadm_zone_is_s10c(zoneid_t zoneid)
227 char brand[MAXNAMELEN];
229 if (zone_getattr(zoneid, ZONE_ATTR_BRAND, brand, sizeof (brand)) < 0)
230 return (B_FALSE);
231 return (strcmp(brand, NATIVE_BRAND_NAME) != 0);
235 * Configure addresses on link. `buf' is a string of comma-separated
236 * IP addresses.
238 static ipadm_status_t
239 i_ipadm_ngz_addr(ipadm_handle_t iph, char *link, char *buf)
241 ipadm_status_t ipstatus;
242 ipadm_addrobj_t ipaddr;
243 char *cp;
245 for (cp = strtok(buf, ","); cp != NULL; cp = strtok(NULL, ",")) {
246 ipstatus = ipadm_create_addrobj(IPADM_ADDR_STATIC, link,
247 &ipaddr);
248 if (ipstatus != IPADM_SUCCESS)
249 return (ipstatus);
251 * ipadm_set_addr does the appropriate name resolution and
252 * sets up the ipadm_static_addr field.
254 ipstatus = ipadm_set_addr(ipaddr, cp, AF_UNSPEC);
255 if (ipstatus != IPADM_SUCCESS) {
256 ipadm_destroy_addrobj(ipaddr);
257 return (ipstatus);
260 ipstatus = ipadm_create_addr(iph, ipaddr,
261 (IPADM_OPT_ACTIVE | IPADM_OPT_UP));
262 if (ipstatus != IPADM_SUCCESS) {
263 ipadm_destroy_addrobj(ipaddr);
264 return (ipstatus);
266 ipadm_destroy_addrobj(ipaddr);
268 return (IPADM_SUCCESS);
272 * The (*persist_if)() will set up persistent information for the interface,
273 * based on what interface families are required, so just resolve the
274 * address and inform the callback about the linkname, and required address
275 * families.
277 static ipadm_status_t
278 i_ipadm_ngz_persist_if(char *link, char *buf,
279 void (*ngz_persist_if)(char *, boolean_t, boolean_t))
281 char *cp, *slashp, addr[INET6_ADDRSTRLEN];
282 ipadm_status_t ipstatus;
283 struct sockaddr_storage ss;
284 boolean_t v4 = B_FALSE;
285 boolean_t v6 = B_FALSE;
287 for (cp = strtok(buf, ","); cp != NULL; cp = strtok(NULL, ",")) {
288 /* remove the /<masklen> that's always added by zoneadmd */
289 slashp = strchr(cp, '/');
290 (void) strlcpy(addr, cp, (slashp - cp + 1));
292 /* resolve the address to find the family */
293 bzero(&ss, sizeof (ss));
294 ipstatus = i_ipadm_resolve_addr(addr, AF_UNSPEC, &ss);
295 if (ipstatus != IPADM_SUCCESS)
296 return (ipstatus);
297 switch (ss.ss_family) {
298 case AF_INET:
299 v4 = B_TRUE;
300 break;
301 case AF_INET6:
302 v6 = B_TRUE;
303 break;
304 default:
305 return (IPADM_BAD_ADDR);
308 (*ngz_persist_if)(link, v4, v6);
309 return (IPADM_SUCCESS);
312 static void
313 i_ipadm_create_ngz_route(int rtsock, char *link, uint8_t *buf, size_t buflen)
315 struct in6_addr defrouter;
316 boolean_t isv6;
317 struct in_addr gw4;
318 uint8_t *cp;
319 const in6_addr_t ipv6_all_zeros = { { { 0, 0, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0 } } };
322 if (rtsock == -1)
323 return;
325 for (cp = buf; cp < buf + buflen; cp += sizeof (defrouter)) {
326 bcopy(cp, &defrouter, sizeof (defrouter));
327 if (IN6_IS_ADDR_UNSPECIFIED(&defrouter))
328 break;
329 isv6 = !IN6_IS_ADDR_V4MAPPED(&defrouter);
330 if (isv6) {
331 i_ipadm_rtioctl6(rtsock, RTM_ADD, ipv6_all_zeros,
332 defrouter, 0, link, RTF_GATEWAY);
333 } else {
334 IN6_V4MAPPED_TO_INADDR(&defrouter, &gw4);
335 i_ipadm_rtioctl4(rtsock, RTM_ADD, INADDR_ANY,
336 gw4.s_addr, 0, link, 0, RTF_GATEWAY);
342 * Wrapper function to zone_getattr() for retrieving from-gz attributes that
343 * were made availabe for exclusive IP non-global zones by zoneadmd from teh
344 * global zone.
346 static ipadm_status_t
347 i_ipadm_zone_get_network(zoneid_t zoneid, datalink_id_t linkid, int type,
348 void *buf, size_t *bufsize)
350 zone_net_data_t *zndata;
352 zndata = calloc(1, sizeof (*zndata) + *bufsize);
353 if (zndata == NULL)
354 return (IPADM_NO_MEMORY);
355 zndata->zn_type = type;
356 zndata->zn_linkid = linkid;
357 zndata->zn_len = *bufsize;
359 if (zone_getattr(zoneid, ZONE_ATTR_NETWORK, zndata,
360 sizeof (*zndata) + *bufsize) < 0) {
361 return (ipadm_errno2status(errno));
363 *bufsize = zndata->zn_len;
364 bcopy(zndata->zn_val, buf, *bufsize);
365 return (IPADM_SUCCESS);
369 * Callback function that configures a single datalink in a non-global zone.
371 static int
372 i_ipadm_zone_network_attr(dladm_handle_t dh, datalink_id_t linkid, void *arg)
374 ngz_walk_data_t *nwd = arg;
375 zoneid_t zoneid = nwd->ngz_zoneid;
376 uint8_t buf[PIPE_BUF];
377 dladm_status_t dlstatus;
378 ipadm_status_t ipstatus;
379 char link[MAXLINKNAMELEN];
380 ipadm_handle_t iph = nwd->ngz_iph;
381 int rtsock = iph->iph_rtsock;
382 char *ifname = nwd->ngz_ifname;
383 boolean_t s10c = nwd->ngz_s10c;
384 boolean_t is_ipmgmtd = (iph->iph_flags & IPH_IPMGMTD);
385 size_t bufsize = sizeof (buf);
387 bzero(buf, bufsize);
388 ipstatus = i_ipadm_zone_get_network(zoneid, linkid,
389 ZONE_NETWORK_ADDRESS, buf, &bufsize);
390 if (ipstatus != IPADM_SUCCESS)
391 goto fail;
393 dlstatus = dladm_datalink_id2info(dh, linkid, NULL, NULL,
394 NULL, link, sizeof (link));
395 if (dlstatus != DLADM_STATUS_OK)
396 return (DLADM_WALK_CONTINUE);
399 * if ifname has been specified, then skip interfaces that don't match
401 if (ifname != NULL && strcmp(ifname, link) != 0)
402 return (DLADM_WALK_CONTINUE);
405 * Plumb the interface and configure addresses on for S10 Containers.
406 * We need to always do this for S10C because ipadm persistent
407 * configuration is not available in S10C. For ipkg zones,
408 * we skip the actual plumbing/configuration, but will call the
409 * (*ngz_persist_if)() callback to create the persistent state for the
410 * interface. The interface will be configured in ipkg zones when
411 * ipadm_enable_if() is invoked to restore persistent configuration.
413 if (is_ipmgmtd && !s10c) {
414 (void) i_ipadm_ngz_persist_if(link, (char *)buf,
415 nwd->ngz_persist_if);
416 return (DLADM_WALK_CONTINUE);
418 ipstatus = i_ipadm_ngz_addr(iph, link, (char *)buf);
419 if (ipstatus != IPADM_SUCCESS)
420 goto fail;
422 /* apply any default router information. */
423 bufsize = sizeof (buf);
424 bzero(buf, bufsize);
425 ipstatus = i_ipadm_zone_get_network(zoneid, linkid,
426 ZONE_NETWORK_DEFROUTER, buf, &bufsize);
427 if (ipstatus != IPADM_SUCCESS)
428 goto fail;
430 i_ipadm_create_ngz_route(rtsock, link, buf, bufsize);
432 return (DLADM_WALK_CONTINUE);
433 fail:
434 if (ifname != NULL) {
435 nwd->ngz_ipstatus = ipstatus;
436 return (DLADM_WALK_TERMINATE);
438 return (DLADM_WALK_CONTINUE);
442 * ipmgmt_net_from_gz_init() initializes exclusive-IP stack non-global zones by
443 * extracting configuration that has been saved in the kernel and applying
444 * that information to the appropriate datalinks for the zone. If an ifname
445 * argument is passed in, only the selected IP interface corresponding to
446 * datalink will be initialized, otherwise all datalinks will be plumbed for IP
447 * and IP address and route information will be configured.
449 ipadm_status_t
450 ipadm_init_net_from_gz(ipadm_handle_t iph, char *ifname,
451 void (*persist_if)(char *, boolean_t, boolean_t))
453 ngz_walk_data_t nwd;
454 uint64_t flags;
455 dladm_handle_t dlh = iph->iph_dlh;
456 datalink_id_t linkid;
458 if (iph->iph_zoneid == GLOBAL_ZONEID)
459 return (IPADM_NOTSUP);
461 if (ifname != NULL &&
462 i_ipadm_get_flags(iph, ifname, AF_INET, &flags) != IPADM_SUCCESS &&
463 i_ipadm_get_flags(iph, ifname, AF_INET6, &flags) != IPADM_SUCCESS)
464 return (IPADM_ENXIO);
466 if (ifname != NULL && !(flags & IFF_L3PROTECT))
467 return (IPADM_SUCCESS); /* nothing to initialize */
469 nwd.ngz_iph = iph;
470 nwd.ngz_zoneid = iph->iph_zoneid;
471 nwd.ngz_ifname = ifname;
472 nwd.ngz_persist_if = persist_if;
473 nwd.ngz_s10c = i_ipadm_zone_is_s10c(iph->iph_zoneid);
474 nwd.ngz_ipstatus = IPADM_SUCCESS;
475 if (ifname != NULL) {
476 if (dladm_name2info(dlh, ifname, &linkid, NULL, NULL,
477 NULL) != DLADM_STATUS_OK) {
478 return (IPADM_ENXIO);
480 (void) i_ipadm_zone_network_attr(dlh, linkid, &nwd);
481 } else {
482 (void) dladm_walk_datalink_id(i_ipadm_zone_network_attr, dlh,
483 &nwd, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE,
484 DLADM_OPT_PERSIST);
486 return (nwd.ngz_ipstatus);