2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3 * Copyright (c) 2003 Ryan McBride. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.7 2008/03/07 11:34:20 sephe Exp $
32 /*#include "opt_bpf.h"*/
34 #include "opt_inet6.h"
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <machine/limits.h>
42 #include <sys/malloc.h>
44 #include <sys/module.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52 #include <sys/in_cksum.h>
53 #include <sys/socket.h>
54 #include <sys/vnode.h>
56 #include <machine/stdarg.h>
59 #include <net/ethernet.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/route.h>
64 #include <net/if_clone.h>
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
83 #include <crypto/sha1.h>
84 #include <netinet/ip_carp.h>
87 #define CARP_IFNAME "carp"
88 static MALLOC_DEFINE(M_CARP
, "CARP", "CARP interfaces");
89 static MALLOC_DEFINE(M_IFNET
, "IFNET", "IFNET CARP?");
90 SYSCTL_DECL(_net_inet_carp
);
93 struct ifnet
*sc_ifp
; /* Interface clue */
94 struct ifnet
*sc_carpdev
; /* Pointer to parent interface */
95 struct in_ifaddr
*sc_ia
; /* primary iface address */
96 struct ip_moptions sc_imo
;
98 struct in6_ifaddr
*sc_ia6
; /* primary iface address v6 */
99 struct ip6_moptions sc_im6o
;
101 TAILQ_ENTRY(carp_softc
) sc_list
;
103 enum { INIT
= 0, BACKUP
, MASTER
} sc_state
;
108 int sc_sendad_errors
;
109 #define CARP_SENDAD_MAX_ERRORS 3
110 int sc_sendad_success
;
111 #define CARP_SENDAD_MIN_SUCCESS 3
117 int sc_advbase
; /* seconds */
119 u_int64_t sc_counter
;
122 #define CARP_HMAC_PAD 64
123 unsigned char sc_key
[CARP_KEY_LEN
];
124 unsigned char sc_pad
[CARP_HMAC_PAD
];
127 struct callout sc_ad_tmo
; /* advertisement timeout */
128 struct callout sc_md_tmo
; /* master down timeout */
129 struct callout sc_md6_tmo
; /* master down timeout */
131 LIST_ENTRY(carp_softc
) sc_next
; /* Interface clue */
133 #define SC2IFP(sc) ((sc)->sc_ifp)
135 int carp_suppress_preempt
= 0;
136 int carp_opts
[CARPCTL_MAXID
] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
137 SYSCTL_INT(_net_inet_carp
, CARPCTL_ALLOW
, allow
, CTLFLAG_RW
,
138 &carp_opts
[CARPCTL_ALLOW
], 0, "Accept incoming CARP packets");
139 SYSCTL_INT(_net_inet_carp
, CARPCTL_PREEMPT
, preempt
, CTLFLAG_RW
,
140 &carp_opts
[CARPCTL_PREEMPT
], 0, "high-priority backup preemption mode");
141 SYSCTL_INT(_net_inet_carp
, CARPCTL_LOG
, log
, CTLFLAG_RW
,
142 &carp_opts
[CARPCTL_LOG
], 0, "log bad carp packets");
143 SYSCTL_INT(_net_inet_carp
, CARPCTL_ARPBALANCE
, arpbalance
, CTLFLAG_RW
,
144 &carp_opts
[CARPCTL_ARPBALANCE
], 0, "balance arp responses");
145 SYSCTL_INT(_net_inet_carp
, OID_AUTO
, suppress_preempt
, CTLFLAG_RD
,
146 &carp_suppress_preempt
, 0, "Preemption is suppressed");
148 struct carpstats carpstats
;
149 SYSCTL_STRUCT(_net_inet_carp
, CARPCTL_STATS
, stats
, CTLFLAG_RW
,
150 &carpstats
, carpstats
,
151 "CARP statistics (struct carpstats, netinet/ip_carp.h)");
154 TAILQ_HEAD(, carp_softc
) vhif_vrs
;
157 struct ifnet
*vhif_ifp
;
158 struct lock vhif_lock
;
161 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
162 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
164 #define CARP_LOCK_INIT(cif) lockinit(&(cif)->vhif_lock, "carp_if", 0, LK_NOWAIT);
165 #define CARP_LOCK_DESTROY(cif) ;
166 #define CARP_LOCK_ASSERT(cif) ;
167 #define CARP_LOCK(cif) lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE);
168 #define CARP_UNLOCK(cif) lockmgr(&(cif)->vhif_lock, LK_RELEASE);
170 #define CARP_SCLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE);
171 #define CARP_SCUNLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE);
172 #define CARP_SCLOCK_ASSERT(sc) ;
174 #define CARP_LOG(...) do { \
175 if (carp_opts[CARPCTL_LOG] > 0) \
176 log(LOG_INFO, __VA_ARGS__); \
179 #define CARP_DEBUG(...) do { \
180 if (carp_opts[CARPCTL_LOG] > 1) \
181 log(LOG_DEBUG, __VA_ARGS__); \
184 static void carp_hmac_prepare(struct carp_softc
*);
185 static void carp_hmac_generate(struct carp_softc
*, u_int32_t
*,
187 static int carp_hmac_verify(struct carp_softc
*, u_int32_t
*,
189 static void carp_setroute(struct carp_softc
*, int);
190 static void carp_input_c(struct mbuf
*, struct carp_header
*, sa_family_t
);
191 static int carp_clone_create(struct if_clone
*, int);
192 static void carp_clone_destroy(struct ifnet
*);
193 static void carpdetach(struct carp_softc
*, int);
194 static int carp_prepare_ad(struct mbuf
*, struct carp_softc
*,
195 struct carp_header
*);
196 static void carp_send_ad_all(void);
197 static void carp_send_ad(void *);
198 static void carp_send_ad_locked(struct carp_softc
*);
199 static void carp_send_arp(struct carp_softc
*);
200 static void carp_master_down(void *);
201 static void carp_master_down_locked(struct carp_softc
*);
202 static int carp_ioctl(struct ifnet
*, u_long
, caddr_t
, struct ucred
*);
203 static int carp_looutput(struct ifnet
*, struct mbuf
*, struct sockaddr
*,
205 static void carp_start(struct ifnet
*);
206 static void carp_setrun(struct carp_softc
*, sa_family_t
);
207 static void carp_set_state(struct carp_softc
*, int);
208 static int carp_addrcount(struct carp_if
*, struct in_ifaddr
*, int);
209 enum { CARP_COUNT_MASTER
, CARP_COUNT_RUNNING
};
211 static void carp_multicast_cleanup(struct carp_softc
*);
212 static int carp_set_addr(struct carp_softc
*, struct sockaddr_in
*);
213 static int carp_del_addr(struct carp_softc
*, struct sockaddr_in
*);
214 static void carp_carpdev_state_locked(struct carp_if
*);
215 static void carp_sc_state_locked(struct carp_softc
*);
217 static void carp_send_na(struct carp_softc
*);
218 static int carp_set_addr6(struct carp_softc
*, struct sockaddr_in6
*);
219 static int carp_del_addr6(struct carp_softc
*, struct sockaddr_in6
*);
220 static void carp_multicast6_cleanup(struct carp_softc
*);
223 static LIST_HEAD(, carp_softc
) carpif_list
;
225 struct if_clone carp_cloner
= IF_CLONE_INITIALIZER(CARP_IFNAME
, carp_clone_create
, carp_clone_destroy
, 0, IF_MAXUNIT
);
227 static eventhandler_tag if_detach_event_tag
;
229 static __inline u_int16_t
230 carp_cksum(struct mbuf
*m
, int len
)
232 return (in_cksum(m
, len
));
236 carp_hmac_prepare(struct carp_softc
*sc
)
238 u_int8_t version
= CARP_VERSION
, type
= CARP_ADVERTISEMENT
;
239 u_int8_t vhid
= sc
->sc_vhid
& 0xff;
240 struct ifaddr_container
*ifac
;
249 /* XXX: possible race here */
251 /* compute ipad from key */
252 bzero(sc
->sc_pad
, sizeof(sc
->sc_pad
));
253 bcopy(sc
->sc_key
, sc
->sc_pad
, sizeof(sc
->sc_key
));
254 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
255 sc
->sc_pad
[i
] ^= 0x36;
257 /* precompute first part of inner hash */
258 SHA1Init(&sc
->sc_sha1
);
259 SHA1Update(&sc
->sc_sha1
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
260 SHA1Update(&sc
->sc_sha1
, (void *)&version
, sizeof(version
));
261 SHA1Update(&sc
->sc_sha1
, (void *)&type
, sizeof(type
));
262 SHA1Update(&sc
->sc_sha1
, (void *)&vhid
, sizeof(vhid
));
264 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
265 struct ifaddr
*ifa
= ifac
->ifa
;
267 if (ifa
->ifa_addr
->sa_family
== AF_INET
)
268 SHA1Update(&sc
->sc_sha1
,
269 (void *)&ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
,
270 sizeof(struct in_addr
));
274 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
275 struct ifaddr
*ifa
= ifac
->ifa
;
277 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
278 in6
= ifatoia6(ifa
)->ia_addr
.sin6_addr
;
279 in6_clearscope(&in6
);
280 SHA1Update(&sc
->sc_sha1
, (void *)&in6
, sizeof(in6
));
285 /* convert ipad to opad */
286 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
287 sc
->sc_pad
[i
] ^= 0x36 ^ 0x5c;
294 carp_hmac_generate(struct carp_softc
*sc
, u_int32_t counter
[2],
295 unsigned char md
[20])
299 /* fetch first half of inner hash */
300 bcopy(&sc
->sc_sha1
, &sha1ctx
, sizeof(sha1ctx
));
302 SHA1Update(&sha1ctx
, (void *)counter
, sizeof(sc
->sc_counter
));
303 SHA1Final(md
, &sha1ctx
);
307 SHA1Update(&sha1ctx
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
308 SHA1Update(&sha1ctx
, md
, 20);
309 SHA1Final(md
, &sha1ctx
);
313 carp_hmac_verify(struct carp_softc
*sc
, u_int32_t counter
[2],
314 unsigned char md
[20])
316 unsigned char md2
[20];
318 CARP_SCLOCK_ASSERT(sc
);
320 carp_hmac_generate(sc
, counter
, md2
);
322 return (bcmp(md
, md2
, sizeof(md2
)));
326 carp_setroute(struct carp_softc
*sc
, int cmd
)
328 struct ifaddr_container
*ifac
;
331 CARP_SCLOCK_ASSERT(sc
);
334 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
335 struct ifaddr
*ifa
= ifac
->ifa
;
337 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
338 sc
->sc_carpdev
!= NULL
) {
339 int count
= carp_addrcount(
340 (struct carp_if
*)sc
->sc_carpdev
->if_carp
,
341 ifatoia(ifa
), CARP_COUNT_MASTER
);
343 if ((cmd
== RTM_ADD
&& count
== 1) ||
344 (cmd
== RTM_DELETE
&& count
== 0))
345 rtinit(ifa
, cmd
, RTF_UP
| RTF_HOST
);
348 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
361 carp_clone_create(struct if_clone
*ifc
, int unit
)
364 struct carp_softc
*sc
;
367 MALLOC(sc
, struct carp_softc
*, sizeof(*sc
), M_CARP
, M_WAITOK
|M_ZERO
);
368 ifp
= SC2IFP(sc
) = kmalloc(sizeof(struct ifnet
), M_IFNET
, M_WAITOK
|M_ZERO
);
370 sc
->sc_flags_backup
= 0;
372 sc
->sc_advbase
= CARP_DFLTINTV
;
373 sc
->sc_vhid
= -1; /* required setting */
375 sc
->sc_init_counter
= 1;
376 sc
->sc_naddrs
= sc
->sc_naddrs6
= 0; /* M_ZERO? */
379 sc
->sc_im6o
.im6o_multicast_hlim
= CARP_DFLTTL
;
382 /* sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/
384 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
385 sc->sc_imo.imo_multicast_vif = -1;
387 callout_init(&sc
->sc_ad_tmo
);
388 callout_init(&sc
->sc_md_tmo
);
389 callout_init(&sc
->sc_md6_tmo
);
392 if_initname(ifp
, CARP_IFNAME
, unit
);
393 ifp
->if_mtu
= ETHERMTU
;
394 ifp
->if_flags
= IFF_LOOPBACK
;
395 ifp
->if_ioctl
= carp_ioctl
;
396 ifp
->if_output
= carp_looutput
;
397 ifp
->if_start
= carp_start
;
398 ifp
->if_type
= IFT_CARP
;
399 ifp
->if_snd
.ifq_maxlen
= ifqmaxlen
;
401 if_attach(ifp
, NULL
);
402 bpfattach(ifp
, DLT_NULL
, sizeof(u_int
));
405 LIST_INSERT_HEAD(&carpif_list
, sc
, sc_next
);
412 carp_clone_destroy(struct ifnet
*ifp
)
414 struct carp_softc
*sc
= ifp
->if_softc
;
418 carpdetach(sc
, 1); /* Returns unlocked. */
421 LIST_REMOVE(sc
, sc_next
);
425 /* if_free_type(ifp, IFT_ETHER);*/
426 /* kfree(sc->sc_imo.imo_membership, M_CARP); */
431 * This function can be called on CARP interface destroy path,
432 * and in case of the removal of the underlying interface as
433 * well. We differentiate these two cases. In the latter case
434 * we do not cleanup our multicast memberships, since they
435 * are already freed. Also, in the latter case we do not
436 * release the lock on return, because the function will be
437 * called once more, for another CARP instance on the same
441 carpdetach(struct carp_softc
*sc
, int unlock
)
445 callout_stop(&sc
->sc_ad_tmo
);
446 callout_stop(&sc
->sc_md_tmo
);
447 callout_stop(&sc
->sc_md6_tmo
);
450 carp_suppress_preempt
--;
453 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
)
454 carp_suppress_preempt
--;
455 sc
->sc_sendad_errors
= 0;
457 carp_set_state(sc
, INIT
);
458 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
461 carp_multicast_cleanup(sc
);
463 carp_multicast6_cleanup(sc
);
466 if (sc
->sc_carpdev
!= NULL
) {
467 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
468 CARP_LOCK_ASSERT(cif
);
469 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
470 if (!--cif
->vhif_nvrs
) {
471 ifpromisc(sc
->sc_carpdev
, 0);
472 sc
->sc_carpdev
->if_carp
= NULL
;
473 CARP_LOCK_DESTROY(cif
);
477 sc
->sc_carpdev
= NULL
;
481 /* Detach an interface from the carp. */
483 carp_ifdetach(void *arg __unused
, struct ifnet
*ifp
)
485 struct carp_if
*cif
= (struct carp_if
*)ifp
->if_carp
;
486 struct carp_softc
*sc
, *nextsc
;
492 * XXX: At the end of for() cycle the lock will be destroyed.
495 for (sc
= TAILQ_FIRST(&cif
->vhif_vrs
); sc
; sc
= nextsc
) {
496 nextsc
= TAILQ_NEXT(sc
, sc_list
);
503 * process input packet.
504 * we have rearranged checks order compared to the rfc,
505 * but it seems more efficient this way or not possible otherwise.
508 carp_input(struct mbuf
*m
, int hlen
)
510 struct ip
*ip
= mtod(m
, struct ip
*);
511 struct carp_header
*ch
;
514 carpstats
.carps_ipackets
++;
516 if (!carp_opts
[CARPCTL_ALLOW
]) {
521 /* check if received on a valid carp interface */
522 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
523 carpstats
.carps_badif
++;
524 CARP_LOG("carp_input: packet received on non-carp "
526 m
->m_pkthdr
.rcvif
->if_xname
);
531 /* verify that the IP TTL is 255. */
532 if (ip
->ip_ttl
!= CARP_DFLTTL
) {
533 carpstats
.carps_badttl
++;
534 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
536 m
->m_pkthdr
.rcvif
->if_xname
);
541 iplen
= ip
->ip_hl
<< 2;
543 if (m
->m_pkthdr
.len
< iplen
+ sizeof(*ch
)) {
544 carpstats
.carps_badlen
++;
545 CARP_LOG("carp_input: received len %zd < "
546 "sizeof(struct carp_header)\n",
547 m
->m_len
- sizeof(struct ip
));
552 if (iplen
+ sizeof(*ch
) < m
->m_len
) {
553 if ((m
= m_pullup(m
, iplen
+ sizeof(*ch
))) == NULL
) {
554 carpstats
.carps_hdrops
++;
555 CARP_LOG("carp_input: pullup failed\n");
558 ip
= mtod(m
, struct ip
*);
560 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
563 * verify that the received packet length is
564 * equal to the CARP header
566 len
= iplen
+ sizeof(*ch
);
567 if (len
> m
->m_pkthdr
.len
) {
568 carpstats
.carps_badlen
++;
569 CARP_LOG("carp_input: packet too short %d on %s\n",
571 m
->m_pkthdr
.rcvif
->if_xname
);
576 if ((m
= m_pullup(m
, len
)) == NULL
) {
577 carpstats
.carps_hdrops
++;
580 ip
= mtod(m
, struct ip
*);
581 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
583 /* verify the CARP checksum */
585 if (carp_cksum(m
, len
- iplen
)) {
586 carpstats
.carps_badsum
++;
587 CARP_LOG("carp_input: checksum failed on %s\n",
588 m
->m_pkthdr
.rcvif
->if_xname
);
594 carp_input_c(m
, ch
, AF_INET
);
599 carp6_input(struct mbuf
**mp
, int *offp
, int proto
)
601 struct mbuf
*m
= *mp
;
602 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
603 struct carp_header
*ch
;
606 carpstats
.carps_ipackets6
++;
608 if (!carp_opts
[CARPCTL_ALLOW
]) {
610 return (IPPROTO_DONE
);
613 /* check if received on a valid carp interface */
614 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
615 carpstats
.carps_badif
++;
616 CARP_LOG("carp6_input: packet received on non-carp "
618 m
->m_pkthdr
.rcvif
->if_xname
);
620 return (IPPROTO_DONE
);
623 /* verify that the IP TTL is 255 */
624 if (ip6
->ip6_hlim
!= CARP_DFLTTL
) {
625 carpstats
.carps_badttl
++;
626 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
628 m
->m_pkthdr
.rcvif
->if_xname
);
630 return (IPPROTO_DONE
);
633 /* verify that we have a complete carp packet */
635 IP6_EXTHDR_GET(ch
, struct carp_header
*, m
, *offp
, sizeof(*ch
));
637 carpstats
.carps_badlen
++;
638 CARP_LOG("carp6_input: packet size %u too small\n", len
);
639 return (IPPROTO_DONE
);
643 /* verify the CARP checksum */
645 if (carp_cksum(m
, sizeof(*ch
))) {
646 carpstats
.carps_badsum
++;
647 CARP_LOG("carp6_input: checksum failed, on %s\n",
648 m
->m_pkthdr
.rcvif
->if_xname
);
650 return (IPPROTO_DONE
);
654 carp_input_c(m
, ch
, AF_INET6
);
655 return (IPPROTO_DONE
);
660 carp_input_c(struct mbuf
*m
, struct carp_header
*ch
, sa_family_t af
)
662 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
;
663 struct carp_softc
*sc
;
664 u_int64_t tmp_counter
;
665 struct timeval sc_tv
, ch_tv
;
667 /* verify that the VHID is valid on the receiving interface */
668 CARP_LOCK(ifp
->if_carp
);
669 TAILQ_FOREACH(sc
, &((struct carp_if
*)ifp
->if_carp
)->vhif_vrs
, sc_list
)
670 if (sc
->sc_vhid
== ch
->carp_vhid
)
673 if (!sc
|| !((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
674 carpstats
.carps_badvhid
++;
675 CARP_UNLOCK(ifp
->if_carp
);
680 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
681 SC2IFP(sc
)->if_ipackets
++;
682 SC2IFP(sc
)->if_ibytes
+= m
->m_pkthdr
.len
;
684 if (SC2IFP(sc
)->if_bpf
) {
685 struct ip
*ip
= mtod(m
, struct ip
*);
687 /* BPF wants net byte order */
688 ip
->ip_len
= htons(ip
->ip_len
+ (ip
->ip_hl
<< 2));
689 ip
->ip_off
= htons(ip
->ip_off
);
690 bpf_mtap(SC2IFP(sc
)->if_bpf
, m
);
693 /* verify the CARP version. */
694 if (ch
->carp_version
!= CARP_VERSION
) {
695 carpstats
.carps_badver
++;
696 SC2IFP(sc
)->if_ierrors
++;
697 CARP_UNLOCK(ifp
->if_carp
);
698 CARP_LOG("%s; invalid version %d\n",
699 SC2IFP(sc
)->if_xname
,
705 /* verify the hash */
706 if (carp_hmac_verify(sc
, ch
->carp_counter
, ch
->carp_md
)) {
707 carpstats
.carps_badauth
++;
708 SC2IFP(sc
)->if_ierrors
++;
709 CARP_UNLOCK(ifp
->if_carp
);
710 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc
)->if_xname
);
715 tmp_counter
= ntohl(ch
->carp_counter
[0]);
716 tmp_counter
= tmp_counter
<<32;
717 tmp_counter
+= ntohl(ch
->carp_counter
[1]);
719 /* XXX Replay protection goes here */
721 sc
->sc_init_counter
= 0;
722 sc
->sc_counter
= tmp_counter
;
724 sc_tv
.tv_sec
= sc
->sc_advbase
;
725 if (carp_suppress_preempt
&& sc
->sc_advskew
< 240)
726 sc_tv
.tv_usec
= 240 * 1000000 / 256;
728 sc_tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
729 ch_tv
.tv_sec
= ch
->carp_advbase
;
730 ch_tv
.tv_usec
= ch
->carp_advskew
* 1000000 / 256;
732 switch (sc
->sc_state
) {
737 * If we receive an advertisement from a master who's going to
738 * be more frequent than us, go into BACKUP state.
740 if (timevalcmp(&sc_tv
, &ch_tv
, >) ||
741 timevalcmp(&sc_tv
, &ch_tv
, ==)) {
742 callout_stop(&sc
->sc_ad_tmo
);
743 CARP_DEBUG("%s: MASTER -> BACKUP "
744 "(more frequent advertisement received)\n",
745 SC2IFP(sc
)->if_xname
);
746 carp_set_state(sc
, BACKUP
);
748 carp_setroute(sc
, RTM_DELETE
);
753 * If we're pre-empting masters who advertise slower than us,
754 * and this one claims to be slower, treat him as down.
756 if (carp_opts
[CARPCTL_PREEMPT
] &&
757 timevalcmp(&sc_tv
, &ch_tv
, <)) {
758 CARP_DEBUG("%s: BACKUP -> MASTER "
759 "(preempting a slower master)\n",
760 SC2IFP(sc
)->if_xname
);
761 carp_master_down_locked(sc
);
766 * If the master is going to advertise at such a low frequency
767 * that he's guaranteed to time out, we'd might as well just
768 * treat him as timed out now.
770 sc_tv
.tv_sec
= sc
->sc_advbase
* 3;
771 if (timevalcmp(&sc_tv
, &ch_tv
, <)) {
772 CARP_DEBUG("%s: BACKUP -> MASTER "
773 "(master timed out)\n",
774 SC2IFP(sc
)->if_xname
);
775 carp_master_down_locked(sc
);
780 * Otherwise, we reset the counter and wait for the next
787 CARP_UNLOCK(ifp
->if_carp
);
794 carp_prepare_ad(struct mbuf
*m
, struct carp_softc
*sc
, struct carp_header
*ch
)
797 struct ifnet
*ifp
= SC2IFP(sc
);
799 if (sc
->sc_init_counter
) {
800 /* this could also be seconds since unix epoch */
801 sc
->sc_counter
= karc4random();
802 sc
->sc_counter
= sc
->sc_counter
<< 32;
803 sc
->sc_counter
+= karc4random();
807 ch
->carp_counter
[0] = htonl((sc
->sc_counter
>>32)&0xffffffff);
808 ch
->carp_counter
[1] = htonl(sc
->sc_counter
&0xffffffff);
810 carp_hmac_generate(sc
, ch
->carp_counter
, ch
->carp_md
);
812 /* Tag packet for carp_output */
813 mtag
= m_tag_get(PACKET_TAG_CARP
, sizeof(struct ifnet
*), MB_DONTWAIT
);
816 SC2IFP(sc
)->if_oerrors
++;
819 bcopy(&ifp
, (caddr_t
)(mtag
+ 1), sizeof(struct ifnet
*));
820 m_tag_prepend(m
, mtag
);
826 carp_send_ad_all(void)
828 struct carp_softc
*sc
;
830 LIST_FOREACH(sc
, &carpif_list
, sc_next
) {
831 if (sc
->sc_carpdev
== NULL
)
834 if ((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
) &&
835 sc
->sc_state
== MASTER
)
836 carp_send_ad_locked(sc
);
842 carp_send_ad(void *v
)
844 struct carp_softc
*sc
= v
;
847 carp_send_ad_locked(sc
);
852 carp_send_ad_locked(struct carp_softc
*sc
)
854 struct carp_header ch
;
856 struct carp_header
*ch_ptr
;
858 int len
, advbase
, advskew
;
861 /* bow out if we've lost our UPness or RUNNINGuiness */
862 if (!((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
866 advbase
= sc
->sc_advbase
;
867 if (!carp_suppress_preempt
|| sc
->sc_advskew
> 240)
868 advskew
= sc
->sc_advskew
;
872 tv
.tv_usec
= advskew
* 1000000 / 256;
875 ch
.carp_version
= CARP_VERSION
;
876 ch
.carp_type
= CARP_ADVERTISEMENT
;
877 ch
.carp_vhid
= sc
->sc_vhid
;
878 ch
.carp_advbase
= advbase
;
879 ch
.carp_advskew
= advskew
;
880 ch
.carp_authlen
= 7; /* XXX DEFINE */
881 ch
.carp_pad1
= 0; /* must be zero */
888 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
890 SC2IFP(sc
)->if_oerrors
++;
891 carpstats
.carps_onomem
++;
892 /* XXX maybe less ? */
893 if (advbase
!= 255 || advskew
!= 255)
894 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
898 len
= sizeof(*ip
) + sizeof(ch
);
899 m
->m_pkthdr
.len
= len
;
900 m
->m_pkthdr
.rcvif
= NULL
;
902 MH_ALIGN(m
, m
->m_len
);
903 m
->m_flags
|= M_MCAST
;
904 ip
= mtod(m
, struct ip
*);
905 ip
->ip_v
= IPVERSION
;
906 ip
->ip_hl
= sizeof(*ip
) >> 2;
907 ip
->ip_tos
= IPTOS_LOWDELAY
;
909 ip
->ip_id
= ip_newid();
911 ip
->ip_ttl
= CARP_DFLTTL
;
912 ip
->ip_p
= IPPROTO_CARP
;
914 ip
->ip_src
.s_addr
= sc
->sc_ia
->ia_addr
.sin_addr
.s_addr
;
915 ip
->ip_dst
.s_addr
= htonl(INADDR_CARP_GROUP
);
917 ch_ptr
= (struct carp_header
*)(&ip
[1]);
918 bcopy(&ch
, ch_ptr
, sizeof(ch
));
919 if (carp_prepare_ad(m
, sc
, ch_ptr
))
922 m
->m_data
+= sizeof(*ip
);
923 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip
));
924 m
->m_data
-= sizeof(*ip
);
926 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
927 SC2IFP(sc
)->if_opackets
++;
928 SC2IFP(sc
)->if_obytes
+= len
;
929 carpstats
.carps_opackets
++;
931 if (ip_output(m
, NULL
, NULL
, IP_RAWOUTPUT
, &sc
->sc_imo
, NULL
)) {
932 SC2IFP(sc
)->if_oerrors
++;
933 if (sc
->sc_sendad_errors
< INT_MAX
)
934 sc
->sc_sendad_errors
++;
935 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
936 carp_suppress_preempt
++;
937 if (carp_suppress_preempt
== 1) {
943 sc
->sc_sendad_success
= 0;
945 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
946 if (++sc
->sc_sendad_success
>=
947 CARP_SENDAD_MIN_SUCCESS
) {
948 carp_suppress_preempt
--;
949 sc
->sc_sendad_errors
= 0;
952 sc
->sc_sendad_errors
= 0;
960 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
962 SC2IFP(sc
)->if_oerrors
++;
963 carpstats
.carps_onomem
++;
964 /* XXX maybe less ? */
965 if (advbase
!= 255 || advskew
!= 255)
966 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
970 len
= sizeof(*ip6
) + sizeof(ch
);
971 m
->m_pkthdr
.len
= len
;
972 m
->m_pkthdr
.rcvif
= NULL
;
974 MH_ALIGN(m
, m
->m_len
);
975 m
->m_flags
|= M_MCAST
;
976 ip6
= mtod(m
, struct ip6_hdr
*);
977 bzero(ip6
, sizeof(*ip6
));
978 ip6
->ip6_vfc
|= IPV6_VERSION
;
979 ip6
->ip6_hlim
= CARP_DFLTTL
;
980 ip6
->ip6_nxt
= IPPROTO_CARP
;
981 bcopy(&sc
->sc_ia6
->ia_addr
.sin6_addr
, &ip6
->ip6_src
,
982 sizeof(struct in6_addr
));
983 /* set the multicast destination */
985 ip6
->ip6_dst
.s6_addr16
[0] = htons(0xff02);
986 ip6
->ip6_dst
.s6_addr8
[15] = 0x12;
987 if (in6_setscope(&ip6
->ip6_dst
, sc
->sc_carpdev
, NULL
) != 0) {
988 SC2IFP(sc
)->if_oerrors
++;
990 CARP_LOG("%s: in6_setscope failed\n", __func__
);
994 ch_ptr
= (struct carp_header
*)(&ip6
[1]);
995 bcopy(&ch
, ch_ptr
, sizeof(ch
));
996 if (carp_prepare_ad(m
, sc
, ch_ptr
))
999 m
->m_data
+= sizeof(*ip6
);
1000 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip6
));
1001 m
->m_data
-= sizeof(*ip6
);
1003 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
1004 SC2IFP(sc
)->if_opackets
++;
1005 SC2IFP(sc
)->if_obytes
+= len
;
1006 carpstats
.carps_opackets6
++;
1008 if (ip6_output(m
, NULL
, NULL
, 0, &sc
->sc_im6o
, NULL
, NULL
)) {
1009 SC2IFP(sc
)->if_oerrors
++;
1010 if (sc
->sc_sendad_errors
< INT_MAX
)
1011 sc
->sc_sendad_errors
++;
1012 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
1013 carp_suppress_preempt
++;
1014 if (carp_suppress_preempt
== 1) {
1020 sc
->sc_sendad_success
= 0;
1022 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
1023 if (++sc
->sc_sendad_success
>=
1024 CARP_SENDAD_MIN_SUCCESS
) {
1025 carp_suppress_preempt
--;
1026 sc
->sc_sendad_errors
= 0;
1029 sc
->sc_sendad_errors
= 0;
1034 if (advbase
!= 255 || advskew
!= 255)
1035 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1041 * Broadcast a gratuitous ARP request containing
1042 * the virtual router MAC address for each IP address
1043 * associated with the virtual router.
1046 carp_send_arp(struct carp_softc
*sc
)
1048 struct ifaddr_container
*ifac
;
1050 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
1051 struct ifaddr
*ifa
= ifac
->ifa
;
1053 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
1055 lwkt_serialize_enter(sc
->sc_carpdev
->if_serializer
);
1056 arp_ifinit2(sc
->sc_carpdev
, ifa
, IF_LLADDR(sc
->sc_ifp
));
1057 lwkt_serialize_exit(sc
->sc_carpdev
->if_serializer
);
1059 DELAY(1000); /* XXX */
1065 carp_send_na(struct carp_softc
*sc
)
1067 struct ifaddr_container
*ifac
;
1068 struct in6_addr
*in6
;
1069 static struct in6_addr mcast
= IN6ADDR_LINKLOCAL_ALLNODES_INIT
;
1071 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
1072 struct ifaddr
*ifa
= ifac
->ifa
;
1074 if (ifa
->ifa_addr
->sa_family
!= AF_INET6
)
1077 in6
= &ifatoia6(ifa
)->ia_addr
.sin6_addr
;
1078 nd6_na_output(sc
->sc_carpdev
, &mcast
, in6
,
1079 ND_NA_FLAG_OVERRIDE
, 1, NULL
);
1080 DELAY(1000); /* XXX */
1086 carp_addrcount(struct carp_if
*cif
, struct in_ifaddr
*ia
, int type
)
1088 struct carp_softc
*vh
;
1091 CARP_LOCK_ASSERT(cif
);
1093 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1094 if ((type
== CARP_COUNT_RUNNING
&&
1095 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) ||
1096 (type
== CARP_COUNT_MASTER
&& vh
->sc_state
== MASTER
)) {
1097 struct ifaddr_container
*ifac
;
1099 TAILQ_FOREACH(ifac
, &SC2IFP(vh
)->if_addrheads
[mycpuid
],
1101 struct ifaddr
*ifa
= ifac
->ifa
;
1103 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
1104 ia
->ia_addr
.sin_addr
.s_addr
==
1105 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
)
1114 carp_iamatch(void *v
, struct in_ifaddr
*ia
,
1115 struct in_addr
*isaddr
, u_int8_t
**enaddr
)
1117 struct carp_if
*cif
= v
;
1118 struct carp_softc
*vh
;
1119 int index
, count
= 0;
1123 if (carp_opts
[CARPCTL_ARPBALANCE
]) {
1125 * XXX proof of concept implementation.
1126 * We use the source ip to decide which virtual host should
1127 * handle the request. If we're master of that virtual host,
1128 * then we respond, otherwise, just drop the arp packet on
1131 count
= carp_addrcount(cif
, ia
, CARP_COUNT_RUNNING
);
1133 /* should never reach this */
1138 /* this should be a hash, like pf_hash() */
1139 index
= ntohl(isaddr
->s_addr
) % count
;
1142 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1143 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) {
1144 struct ifaddr_container
*ifac
;
1146 TAILQ_FOREACH(ifac
, &SC2IFP(vh
)->if_addrheads
[mycpuid
], ifa_link
) {
1147 struct ifaddr
*ifa
= ifac
->ifa
;
1149 if (ifa
->ifa_addr
->sa_family
==
1151 ia
->ia_addr
.sin_addr
.s_addr
==
1152 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
) {
1153 if (count
== index
) {
1156 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1170 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1171 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1172 vh
->sc_state
== MASTER
) {
1173 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1185 carp_iamatch6(void *v
, struct in6_addr
*taddr
)
1187 struct carp_if
*cif
= v
;
1188 struct carp_softc
*vh
;
1191 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1192 struct ifaddr_container
*ifac
;
1194 TAILQ_FOREACH(ifac
, &SC2IFP(vh
)->if_addrheads
[mycpuid
], ifa_link
) {
1195 struct ifaddr
*ifa
= ifac
->ifa
;
1197 if (IN6_ARE_ADDR_EQUAL(taddr
,
1198 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1199 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1200 vh
->sc_state
== MASTER
) {
1212 carp_macmatch6(void *v
, struct mbuf
*m
, const struct in6_addr
*taddr
)
1215 struct carp_if
*cif
= v
;
1216 struct carp_softc
*sc
;
1219 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
) {
1220 struct ifaddr_container
*ifac
;
1222 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
1223 struct ifaddr
*ifa
= ifac
->ifa
;
1225 if (IN6_ARE_ADDR_EQUAL(taddr
,
1226 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1227 (SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
)) {
1228 struct ifnet
*ifp
= SC2IFP(sc
);
1229 mtag
= m_tag_get(PACKET_TAG_CARP
,
1230 sizeof(struct ifnet
*), MB_DONTWAIT
);
1232 /* better a bit than nothing */
1234 return (IF_LLADDR(sc
->sc_ifp
));
1236 bcopy(&ifp
, (caddr_t
)(mtag
+ 1),
1237 sizeof(struct ifnet
*));
1238 m_tag_prepend(m
, mtag
);
1241 return (IF_LLADDR(sc
->sc_ifp
));
1252 carp_forus(void *v
, void *dhost
)
1254 struct carp_if
*cif
= v
;
1255 struct carp_softc
*vh
;
1256 u_int8_t
*ena
= dhost
;
1259 * XXX: See here for check on MAC adr is not for virtual use
1263 if (ena
[0] || ena
[1] || ena
[2] != 0x5e || ena
[3] || ena
[4] != 1)
1269 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
)
1270 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1271 vh
->sc_state
== MASTER
&&
1272 !bcmp(dhost
, IF_LLADDR(vh
->sc_ifp
), ETHER_ADDR_LEN
)) {
1274 return (SC2IFP(vh
));
1282 carp_master_down(void *v
)
1284 struct carp_softc
*sc
= v
;
1286 lwkt_serialize_enter(sc
->sc_ifp
->if_serializer
);
1287 carp_master_down_locked(sc
);
1288 lwkt_serialize_exit(sc
->sc_ifp
->if_serializer
);
1292 carp_master_down_locked(struct carp_softc
*sc
)
1295 CARP_SCLOCK_ASSERT(sc
);
1297 switch (sc
->sc_state
) {
1299 kprintf("%s: master_down event in INIT state\n",
1300 SC2IFP(sc
)->if_xname
);
1305 carp_set_state(sc
, MASTER
);
1306 carp_send_ad_locked(sc
);
1312 carp_setroute(sc
, RTM_ADD
);
1318 * When in backup state, af indicates whether to reset the master down timer
1319 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1322 carp_setrun(struct carp_softc
*sc
, sa_family_t af
)
1326 if (sc
->sc_carpdev
== NULL
) {
1327 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1328 carp_set_state(sc
, INIT
);
1332 if (SC2IFP(sc
)->if_flags
& IFF_UP
&&
1333 sc
->sc_vhid
> 0 && (sc
->sc_naddrs
|| sc
->sc_naddrs6
))
1334 SC2IFP(sc
)->if_flags
|= IFF_RUNNING
;
1336 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1337 carp_setroute(sc
, RTM_DELETE
);
1341 switch (sc
->sc_state
) {
1343 if (carp_opts
[CARPCTL_PREEMPT
] && !carp_suppress_preempt
) {
1344 carp_send_ad_locked(sc
);
1349 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1350 SC2IFP(sc
)->if_xname
);
1351 carp_set_state(sc
, MASTER
);
1352 carp_setroute(sc
, RTM_ADD
);
1354 CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc
)->if_xname
);
1355 carp_set_state(sc
, BACKUP
);
1356 carp_setroute(sc
, RTM_DELETE
);
1361 callout_stop(&sc
->sc_ad_tmo
);
1362 tv
.tv_sec
= 3 * sc
->sc_advbase
;
1363 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1367 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1368 carp_master_down
, sc
);
1373 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1374 carp_master_down
, sc
);
1379 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1380 carp_master_down
, sc
);
1382 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1383 carp_master_down
, sc
);
1388 tv
.tv_sec
= sc
->sc_advbase
;
1389 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1390 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1397 carp_multicast_cleanup(struct carp_softc
*sc
)
1399 struct ip_moptions
*imo
= &sc
->sc_imo
;
1400 u_int16_t n
= imo
->imo_num_memberships
;
1402 /* Clean up our own multicast memberships */
1404 if (imo
->imo_membership
[n
] != NULL
) {
1405 in_delmulti(imo
->imo_membership
[n
]);
1406 imo
->imo_membership
[n
] = NULL
;
1409 imo
->imo_num_memberships
= 0;
1410 imo
->imo_multicast_ifp
= NULL
;
1415 carp_multicast6_cleanup(struct carp_softc
*sc
)
1417 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1419 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1420 struct in6_multi_mship
*imm
=
1421 LIST_FIRST(&im6o
->im6o_memberships
);
1423 LIST_REMOVE(imm
, i6mm_chain
);
1424 in6_leavegroup(imm
);
1426 im6o
->im6o_multicast_ifp
= NULL
;
1431 carp_set_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1434 struct carp_if
*cif
;
1435 struct in_ifaddr
*ia
, *ia_if
;
1436 struct ip_moptions
*imo
= &sc
->sc_imo
;
1437 struct in_addr addr
;
1438 u_long iaddr
= htonl(sin
->sin_addr
.s_addr
);
1441 if (sin
->sin_addr
.s_addr
== 0)
1443 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1445 carp_set_state(sc
, INIT
);
1449 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1454 /* we have to do it by hands to check we won't match on us */
1455 ia_if
= NULL
; own
= 0;
1456 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
1457 /* and, yeah, we need a multicast-capable iface too */
1458 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1459 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1460 (iaddr
& ia
->ia_subnetmask
) == ia
->ia_subnet
) {
1463 if (sin
->sin_addr
.s_addr
==
1464 ia
->ia_addr
.sin_addr
.s_addr
)
1471 return (EADDRNOTAVAIL
);
1476 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1477 (imo
->imo_multicast_ifp
&& imo
->imo_multicast_ifp
!= ifp
))
1478 return (EADDRNOTAVAIL
);
1480 if (imo
->imo_num_memberships
== 0) {
1481 addr
.s_addr
= htonl(INADDR_CARP_GROUP
);
1482 if ((imo
->imo_membership
[0] = in_addmulti(&addr
, ifp
)) == NULL
)
1484 imo
->imo_num_memberships
++;
1485 imo
->imo_multicast_ifp
= ifp
;
1486 imo
->imo_multicast_ttl
= CARP_DFLTTL
;
1487 imo
->imo_multicast_loop
= 0;
1490 if (!ifp
->if_carp
) {
1492 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1494 if ((error
= ifpromisc(ifp
, 1))) {
1499 CARP_LOCK_INIT(cif
);
1501 cif
->vhif_ifp
= ifp
;
1502 TAILQ_INIT(&cif
->vhif_vrs
);
1506 struct carp_softc
*vr
;
1508 cif
= (struct carp_if
*)ifp
->if_carp
;
1510 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1511 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1518 sc
->sc_carpdev
= ifp
;
1520 { /* XXX prevent endless loop if already in queue */
1521 struct carp_softc
*vr
, *after
= NULL
;
1523 cif
= (struct carp_if
*)ifp
->if_carp
;
1525 /* XXX: cif should not change, right? So we still hold the lock */
1526 CARP_LOCK_ASSERT(cif
);
1528 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1531 if (vr
->sc_vhid
< sc
->sc_vhid
)
1536 /* We're trying to keep things in order */
1537 if (after
== NULL
) {
1538 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1540 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1547 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1552 carp_sc_state_locked(sc
);
1560 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1566 carp_del_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1570 if (!--sc
->sc_naddrs
) {
1571 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1572 struct ip_moptions
*imo
= &sc
->sc_imo
;
1575 callout_stop(&sc
->sc_ad_tmo
);
1576 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1577 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1579 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1580 imo
->imo_multicast_ifp
= NULL
;
1581 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1582 if (!--cif
->vhif_nvrs
) {
1583 sc
->sc_carpdev
->if_carp
= NULL
;
1584 CARP_LOCK_DESTROY(cif
);
1585 FREE(cif
, M_IFADDR
);
1596 carp_set_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1599 struct carp_if
*cif
;
1600 struct in6_ifaddr
*ia
, *ia_if
;
1601 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1602 struct in6_multi_mship
*imm
;
1603 struct in6_addr in6
;
1606 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
1607 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1608 carp_set_state(sc
, INIT
);
1610 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1615 /* we have to do it by hands to check we won't match on us */
1616 ia_if
= NULL
; own
= 0;
1617 for (ia
= in6_ifaddr
; ia
; ia
= ia
->ia_next
) {
1620 for (i
= 0; i
< 4; i
++) {
1621 if ((sin6
->sin6_addr
.s6_addr32
[i
] &
1622 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]) !=
1623 (ia
->ia_addr
.sin6_addr
.s6_addr32
[i
] &
1624 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]))
1627 /* and, yeah, we need a multicast-capable iface too */
1628 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1629 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1633 if (IN6_ARE_ADDR_EQUAL(&sin6
->sin6_addr
,
1634 &ia
->ia_addr
.sin6_addr
))
1640 return (EADDRNOTAVAIL
);
1644 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1645 (im6o
->im6o_multicast_ifp
&& im6o
->im6o_multicast_ifp
!= ifp
))
1646 return (EADDRNOTAVAIL
);
1648 if (!sc
->sc_naddrs6
) {
1649 im6o
->im6o_multicast_ifp
= ifp
;
1651 /* join CARP multicast address */
1652 bzero(&in6
, sizeof(in6
));
1653 in6
.s6_addr16
[0] = htons(0xff02);
1654 in6
.s6_addr8
[15] = 0x12;
1655 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1657 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1659 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1661 /* join solicited multicast address */
1662 bzero(&in6
, sizeof(in6
));
1663 in6
.s6_addr16
[0] = htons(0xff02);
1664 in6
.s6_addr32
[1] = 0;
1665 in6
.s6_addr32
[2] = htonl(1);
1666 in6
.s6_addr32
[3] = sin6
->sin6_addr
.s6_addr32
[3];
1667 in6
.s6_addr8
[12] = 0xff;
1668 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1670 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1672 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1675 if (!ifp
->if_carp
) {
1676 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1678 if ((error
= ifpromisc(ifp
, 1))) {
1683 CARP_LOCK_INIT(cif
);
1685 cif
->vhif_ifp
= ifp
;
1686 TAILQ_INIT(&cif
->vhif_vrs
);
1690 struct carp_softc
*vr
;
1692 cif
= (struct carp_if
*)ifp
->if_carp
;
1694 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1695 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1702 sc
->sc_carpdev
= ifp
;
1704 { /* XXX prevent endless loop if already in queue */
1705 struct carp_softc
*vr
, *after
= NULL
;
1707 cif
= (struct carp_if
*)ifp
->if_carp
;
1708 CARP_LOCK_ASSERT(cif
);
1710 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1713 if (vr
->sc_vhid
< sc
->sc_vhid
)
1718 /* We're trying to keep things in order */
1719 if (after
== NULL
) {
1720 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1722 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1729 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1732 carp_sc_state_locked(sc
);
1740 /* clean up multicast memberships */
1741 if (!sc
->sc_naddrs6
) {
1742 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1743 imm
= LIST_FIRST(&im6o
->im6o_memberships
);
1744 LIST_REMOVE(imm
, i6mm_chain
);
1745 in6_leavegroup(imm
);
1752 carp_del_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1756 if (!--sc
->sc_naddrs6
) {
1757 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1758 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1761 callout_stop(&sc
->sc_ad_tmo
);
1762 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1763 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1765 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1766 struct in6_multi_mship
*imm
=
1767 LIST_FIRST(&im6o
->im6o_memberships
);
1769 LIST_REMOVE(imm
, i6mm_chain
);
1770 in6_leavegroup(imm
);
1772 im6o
->im6o_multicast_ifp
= NULL
;
1773 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1774 if (!--cif
->vhif_nvrs
) {
1775 CARP_LOCK_DESTROY(cif
);
1776 sc
->sc_carpdev
->if_carp
= NULL
;
1777 FREE(cif
, M_IFADDR
);
1787 carp_ioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t addr
, struct ucred
*creds
)
1789 struct carp_softc
*sc
= ifp
->if_softc
, *vr
;
1790 struct carpreq carpr
;
1793 struct ifaliasreq
*ifra
;
1794 int locked
= 0, error
= 0;
1796 ifa
= (struct ifaddr
*)addr
;
1797 ifra
= (struct ifaliasreq
*)addr
;
1798 ifr
= (struct ifreq
*)addr
;
1803 switch (ifa
->ifa_addr
->sa_family
) {
1806 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1807 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1808 sizeof(struct sockaddr
));
1809 error
= carp_set_addr(sc
, satosin(ifa
->ifa_addr
));
1814 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1815 error
= carp_set_addr6(sc
, satosin6(ifa
->ifa_addr
));
1819 error
= EAFNOSUPPORT
;
1825 switch (ifa
->ifa_addr
->sa_family
) {
1828 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1829 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1830 sizeof(struct sockaddr
));
1831 error
= carp_set_addr(sc
, satosin(&ifra
->ifra_addr
));
1836 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1837 error
= carp_set_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1841 error
= EAFNOSUPPORT
;
1847 switch (ifa
->ifa_addr
->sa_family
) {
1850 error
= carp_del_addr(sc
, satosin(&ifra
->ifra_addr
));
1855 error
= carp_del_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1859 error
= EAFNOSUPPORT
;
1865 if (sc
->sc_carpdev
) {
1869 if (sc
->sc_state
!= INIT
&& !(ifr
->ifr_flags
& IFF_UP
)) {
1870 callout_stop(&sc
->sc_ad_tmo
);
1871 callout_stop(&sc
->sc_md_tmo
);
1872 callout_stop(&sc
->sc_md6_tmo
);
1873 if (sc
->sc_state
== MASTER
)
1874 carp_send_ad_locked(sc
);
1875 carp_set_state(sc
, INIT
);
1877 } else if (sc
->sc_state
== INIT
&& (ifr
->ifr_flags
& IFF_UP
)) {
1878 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1884 error
= suser(curthread
);
1887 if ((error
= copyin(ifr
->ifr_data
, &carpr
, sizeof carpr
)))
1890 if (sc
->sc_carpdev
) {
1894 if (sc
->sc_state
!= INIT
&& carpr
.carpr_state
!= sc
->sc_state
) {
1895 switch (carpr
.carpr_state
) {
1897 callout_stop(&sc
->sc_ad_tmo
);
1898 carp_set_state(sc
, BACKUP
);
1900 carp_setroute(sc
, RTM_DELETE
);
1903 carp_master_down_locked(sc
);
1909 if (carpr
.carpr_vhid
> 0) {
1910 if (carpr
.carpr_vhid
> 255) {
1914 if (sc
->sc_carpdev
) {
1915 struct carp_if
*cif
;
1916 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1917 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1919 vr
->sc_vhid
== carpr
.carpr_vhid
)
1922 sc
->sc_vhid
= carpr
.carpr_vhid
;
1923 IF_LLADDR(sc
->sc_ifp
)[0] = 0;
1924 IF_LLADDR(sc
->sc_ifp
)[1] = 0;
1925 IF_LLADDR(sc
->sc_ifp
)[2] = 0x5e;
1926 IF_LLADDR(sc
->sc_ifp
)[3] = 0;
1927 IF_LLADDR(sc
->sc_ifp
)[4] = 1;
1928 IF_LLADDR(sc
->sc_ifp
)[5] = sc
->sc_vhid
;
1931 if (carpr
.carpr_advbase
> 0 || carpr
.carpr_advskew
> 0) {
1932 if (carpr
.carpr_advskew
>= 255) {
1936 if (carpr
.carpr_advbase
> 255) {
1940 sc
->sc_advbase
= carpr
.carpr_advbase
;
1941 sc
->sc_advskew
= carpr
.carpr_advskew
;
1944 bcopy(carpr
.carpr_key
, sc
->sc_key
, sizeof(sc
->sc_key
));
1954 /* XXX: lockless read */
1955 bzero(&carpr
, sizeof(carpr
));
1956 carpr
.carpr_state
= sc
->sc_state
;
1957 carpr
.carpr_vhid
= sc
->sc_vhid
;
1958 carpr
.carpr_advbase
= sc
->sc_advbase
;
1959 carpr
.carpr_advskew
= sc
->sc_advskew
;
1960 error
= suser(curthread
);
1962 bcopy(sc
->sc_key
, carpr
.carpr_key
,
1963 sizeof(carpr
.carpr_key
));
1964 error
= copyout(&carpr
, ifr
->ifr_data
, sizeof(carpr
));
1974 carp_hmac_prepare(sc
);
1980 * XXX: this is looutput. We should eventually use it from there.
1983 carp_looutput(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*dst
,
1988 M_ASSERTPKTHDR(m
); /* check if we have the packet header */
1990 if (rt
&& rt
->rt_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
1992 return (rt
->rt_flags
& RTF_BLACKHOLE
? 0 :
1993 rt
->rt_flags
& RTF_HOST
? EHOSTUNREACH
: ENETUNREACH
);
1997 ifp
->if_obytes
+= m
->m_pkthdr
.len
;
1999 /* BPF writes need to be handled specially. */
2000 if (dst
->sa_family
== AF_UNSPEC
) {
2001 bcopy(dst
->sa_data
, &af
, sizeof(af
));
2002 dst
->sa_family
= af
;
2006 switch (dst
->sa_family
) {
2014 return (EAFNOSUPPORT
);
2017 return(if_simloop(ifp
, m
, dst
->sa_family
, 0));
2021 * Start output on carp interface. This function should never be called.
2024 carp_start(struct ifnet
*ifp
)
2027 kprintf("%s: start called\n", ifp
->if_xname
);
2032 carp_output(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*sa
,
2036 struct carp_softc
*sc
;
2037 struct ifnet
*carp_ifp
;
2042 switch (sa
->sa_family
) {
2055 mtag
= m_tag_find(m
, PACKET_TAG_CARP
, NULL
);
2059 bcopy(mtag
+ 1, &carp_ifp
, sizeof(struct ifnet
*));
2060 sc
= carp_ifp
->if_softc
;
2062 /* Set the source MAC address to Virtual Router MAC Address */
2063 switch (ifp
->if_type
) {
2066 struct ether_header
*eh
;
2068 eh
= mtod(m
, struct ether_header
*);
2069 eh
->ether_shost
[0] = 0;
2070 eh
->ether_shost
[1] = 0;
2071 eh
->ether_shost
[2] = 0x5e;
2072 eh
->ether_shost
[3] = 0;
2073 eh
->ether_shost
[4] = 1;
2074 eh
->ether_shost
[5] = sc
->sc_vhid
;
2078 kprintf("%s: carp is not supported for this interface type\n",
2080 return (EOPNOTSUPP
);
2088 carp_set_state(struct carp_softc
*sc
, int state
)
2092 CARP_SCLOCK_ASSERT(sc
);
2094 if (sc
->sc_state
== state
)
2097 sc
->sc_state
= state
;
2100 SC2IFP(sc
)->if_link_state
= LINK_STATE_DOWN
;
2103 SC2IFP(sc
)->if_link_state
= LINK_STATE_UP
;
2106 SC2IFP(sc
)->if_link_state
= LINK_STATE_UNKNOWN
;
2109 rt_ifmsg(SC2IFP(sc
));
2113 carp_carpdev_state(void *v
)
2115 struct carp_if
*cif
= v
;
2118 carp_carpdev_state_locked(cif
);
2123 carp_carpdev_state_locked(struct carp_if
*cif
)
2125 struct carp_softc
*sc
;
2127 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
)
2128 carp_sc_state_locked(sc
);
2132 carp_sc_state_locked(struct carp_softc
*sc
)
2134 CARP_SCLOCK_ASSERT(sc
);
2136 if ( !(sc
->sc_carpdev
->if_flags
& IFF_UP
)) {
2137 sc
->sc_flags_backup
= SC2IFP(sc
)->if_flags
;
2138 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
2139 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
2140 callout_stop(&sc
->sc_ad_tmo
);
2141 callout_stop(&sc
->sc_md_tmo
);
2142 callout_stop(&sc
->sc_md6_tmo
);
2143 carp_set_state(sc
, INIT
);
2145 if (!sc
->sc_suppress
) {
2146 carp_suppress_preempt
++;
2147 if (carp_suppress_preempt
== 1) {
2153 sc
->sc_suppress
= 1;
2155 SC2IFP(sc
)->if_flags
|= sc
->sc_flags_backup
;
2156 carp_set_state(sc
, INIT
);
2158 if (sc
->sc_suppress
)
2159 carp_suppress_preempt
--;
2160 sc
->sc_suppress
= 0;
2167 carp_modevent(module_t mod
, int type
, void *data
)
2171 if_detach_event_tag
= EVENTHANDLER_REGISTER(ifnet_departure_event
,
2172 carp_ifdetach
, NULL
, EVENTHANDLER_PRI_ANY
);
2173 if (if_detach_event_tag
== NULL
)
2176 LIST_INIT(&carpif_list
);
2177 if_clone_attach(&carp_cloner
);
2181 EVENTHANDLER_DEREGISTER(ifnet_departure_event
, if_detach_event_tag
);
2182 if_clone_detach(&carp_cloner
);
2192 static moduledata_t carp_mod
= {
2198 DECLARE_MODULE(carp
, carp_mod
, SI_SUB_PSEUDO
, SI_ORDER_ANY
);