2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3 * Copyright (c) 2003 Ryan McBride. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $
33 #include "opt_inet6.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/in_cksum.h>
39 #include <sys/limits.h>
41 #include <sys/malloc.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
50 #include <machine/stdarg.h>
51 #include <crypto/sha1.h>
54 #include <net/ethernet.h>
56 #include <net/if_dl.h>
57 #include <net/if_types.h>
58 #include <net/route.h>
59 #include <net/if_clone.h>
62 #include <netinet/in.h>
63 #include <netinet/in_var.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip_var.h>
67 #include <netinet/if_ether.h>
71 #include <netinet/icmp6.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet6/scope6_var.h>
75 #include <netinet6/nd6.h>
78 #include <netinet/ip_carp.h>
80 #define CARP_IFNAME "carp"
81 static MALLOC_DEFINE(M_CARP
, "CARP", "CARP interfaces");
82 static MALLOC_DEFINE(M_IFNET
, "IFNET", "IFNET CARP?");
83 SYSCTL_DECL(_net_inet_carp
);
86 struct ifnet
*sc_ifp
; /* Interface clue */
87 struct ifnet
*sc_carpdev
; /* parent interface */
88 struct in_ifaddr
*sc_ia
; /* primary iface address */
89 struct ip_moptions sc_imo
;
91 struct in6_ifaddr
*sc_ia6
; /* primary iface address v6 */
92 struct ip6_moptions sc_im6o
;
94 TAILQ_ENTRY(carp_softc
) sc_list
;
96 enum { INIT
= 0, BACKUP
, MASTER
}
102 int sc_sendad_errors
;
103 #define CARP_SENDAD_MAX_ERRORS 3
104 int sc_sendad_success
;
105 #define CARP_SENDAD_MIN_SUCCESS 3
111 int sc_advbase
; /* seconds */
116 #define CARP_HMAC_PAD 64
117 unsigned char sc_key
[CARP_KEY_LEN
];
118 unsigned char sc_pad
[CARP_HMAC_PAD
];
121 struct callout sc_ad_tmo
; /* advertisement timeout */
122 struct callout sc_md_tmo
; /* master down timeout */
123 struct callout sc_md6_tmo
; /* master down timeout */
125 LIST_ENTRY(carp_softc
) sc_next
; /* Interface clue */
127 #define SC2IFP(sc) ((sc)->sc_ifp)
130 TAILQ_HEAD(, carp_softc
) vhif_vrs
;
133 struct ifnet
*vhif_ifp
;
134 struct lock vhif_lock
;
137 enum { CARP_COUNT_MASTER
, CARP_COUNT_RUNNING
};
139 int carp_suppress_preempt
= 0;
140 int carp_opts
[CARPCTL_MAXID
] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
141 SYSCTL_INT(_net_inet_carp
, CARPCTL_ALLOW
, allow
, CTLFLAG_RW
,
142 &carp_opts
[CARPCTL_ALLOW
], 0, "Accept incoming CARP packets");
143 SYSCTL_INT(_net_inet_carp
, CARPCTL_PREEMPT
, preempt
, CTLFLAG_RW
,
144 &carp_opts
[CARPCTL_PREEMPT
], 0, "high-priority backup preemption mode");
145 SYSCTL_INT(_net_inet_carp
, CARPCTL_LOG
, log
, CTLFLAG_RW
,
146 &carp_opts
[CARPCTL_LOG
], 0, "log bad carp packets");
147 SYSCTL_INT(_net_inet_carp
, CARPCTL_ARPBALANCE
, arpbalance
, CTLFLAG_RW
,
148 &carp_opts
[CARPCTL_ARPBALANCE
], 0, "balance arp responses");
149 SYSCTL_INT(_net_inet_carp
, OID_AUTO
, suppress_preempt
, CTLFLAG_RD
,
150 &carp_suppress_preempt
, 0, "Preemption is suppressed");
152 struct carpstats carpstats
;
153 SYSCTL_STRUCT(_net_inet_carp
, CARPCTL_STATS
, stats
, CTLFLAG_RW
,
154 &carpstats
, carpstats
,
155 "CARP statistics (struct carpstats, netinet/ip_carp.h)");
157 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
158 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
160 #define CARP_LOCK_INIT(cif) lockinit(&(cif)->vhif_lock, "carp_if", 0, 0);
161 #define CARP_LOCK_DESTROY(cif) ;
162 #define CARP_LOCK_ASSERT(cif) ;
163 #define CARP_LOCK(cif) lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE);
164 #define CARP_UNLOCK(cif) lockmgr(&(cif)->vhif_lock, LK_RELEASE);
166 #define CARP_SCLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE);
167 #define CARP_SCUNLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE);
168 #define CARP_SCLOCK_ASSERT(sc) ;
170 #define CARP_LOG(...) do { \
171 if (carp_opts[CARPCTL_LOG] > 0) \
172 log(LOG_INFO, __VA_ARGS__); \
175 #define CARP_DEBUG(...) do { \
176 if (carp_opts[CARPCTL_LOG] > 1) \
177 log(LOG_DEBUG, __VA_ARGS__); \
180 static void carp_hmac_prepare(struct carp_softc
*);
181 static void carp_hmac_generate(struct carp_softc
*, uint32_t *,
183 static int carp_hmac_verify(struct carp_softc
*, uint32_t *,
185 static void carp_setroute(struct carp_softc
*, int);
186 static void carp_input_c(struct mbuf
*, struct carp_header
*, sa_family_t
);
187 static int carp_clone_create(struct if_clone
*, int);
188 static void carp_clone_destroy(struct ifnet
*);
189 static void carpdetach(struct carp_softc
*, int);
190 static int carp_prepare_ad(struct mbuf
*, struct carp_softc
*,
191 struct carp_header
*);
192 static void carp_send_ad_all(void);
193 static void carp_send_ad(void *);
194 static void carp_send_ad_locked(struct carp_softc
*);
195 static void carp_send_arp(struct carp_softc
*);
196 static void carp_master_down(void *);
197 static void carp_master_down_locked(struct carp_softc
*);
198 static int carp_ioctl(struct ifnet
*, u_long
, caddr_t
, struct ucred
*);
199 static int carp_looutput(struct ifnet
*, struct mbuf
*, struct sockaddr
*,
201 static void carp_start(struct ifnet
*);
202 static void carp_setrun(struct carp_softc
*, sa_family_t
);
203 static void carp_set_state(struct carp_softc
*, int);
204 static int carp_addrcount(struct carp_if
*, struct in_ifaddr
*, int);
206 static void carp_multicast_cleanup(struct carp_softc
*);
207 static int carp_set_addr(struct carp_softc
*, struct sockaddr_in
*);
208 static int carp_del_addr(struct carp_softc
*, struct sockaddr_in
*);
209 static void carp_carpdev_state_locked(struct carp_if
*);
210 static void carp_sc_state_locked(struct carp_softc
*);
212 static void carp_send_na(struct carp_softc
*);
213 static int carp_set_addr6(struct carp_softc
*, struct sockaddr_in6
*);
214 static int carp_del_addr6(struct carp_softc
*, struct sockaddr_in6
*);
215 static void carp_multicast6_cleanup(struct carp_softc
*);
218 static LIST_HEAD(, carp_softc
) carpif_list
;
220 static struct if_clone carp_cloner
=
221 IF_CLONE_INITIALIZER(CARP_IFNAME
, carp_clone_create
, carp_clone_destroy
,
224 static eventhandler_tag carp_ifdetach_event
;
226 static __inline
uint16_t
227 carp_cksum(struct mbuf
*m
, int len
)
229 return (in_cksum(m
, len
));
233 carp_hmac_prepare(struct carp_softc
*sc
)
235 uint8_t version
= CARP_VERSION
, type
= CARP_ADVERTISEMENT
;
236 uint8_t vhid
= sc
->sc_vhid
& 0xff;
237 struct ifaddr_container
*ifac
;
246 /* XXX: possible race here */
248 /* compute ipad from key */
249 bzero(sc
->sc_pad
, sizeof(sc
->sc_pad
));
250 bcopy(sc
->sc_key
, sc
->sc_pad
, sizeof(sc
->sc_key
));
251 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
252 sc
->sc_pad
[i
] ^= 0x36;
254 /* precompute first part of inner hash */
255 SHA1Init(&sc
->sc_sha1
);
256 SHA1Update(&sc
->sc_sha1
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
257 SHA1Update(&sc
->sc_sha1
, (void *)&version
, sizeof(version
));
258 SHA1Update(&sc
->sc_sha1
, (void *)&type
, sizeof(type
));
259 SHA1Update(&sc
->sc_sha1
, (void *)&vhid
, sizeof(vhid
));
261 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
262 struct ifaddr
*ifa
= ifac
->ifa
;
264 if (ifa
->ifa_addr
->sa_family
== AF_INET
)
265 SHA1Update(&sc
->sc_sha1
,
266 (void *)&ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
,
267 sizeof(struct in_addr
));
271 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
272 struct ifaddr
*ifa
= ifac
->ifa
;
274 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
275 in6
= ifatoia6(ifa
)->ia_addr
.sin6_addr
;
276 in6_clearscope(&in6
);
277 SHA1Update(&sc
->sc_sha1
, (void *)&in6
, sizeof(in6
));
282 /* convert ipad to opad */
283 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
284 sc
->sc_pad
[i
] ^= 0x36 ^ 0x5c;
291 carp_hmac_generate(struct carp_softc
*sc
, uint32_t counter
[2],
292 unsigned char md
[20])
296 /* fetch first half of inner hash */
297 bcopy(&sc
->sc_sha1
, &sha1ctx
, sizeof(sha1ctx
));
299 SHA1Update(&sha1ctx
, (void *)counter
, sizeof(sc
->sc_counter
));
300 SHA1Final(md
, &sha1ctx
);
304 SHA1Update(&sha1ctx
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
305 SHA1Update(&sha1ctx
, md
, 20);
306 SHA1Final(md
, &sha1ctx
);
310 carp_hmac_verify(struct carp_softc
*sc
, uint32_t counter
[2],
311 unsigned char md
[20])
313 unsigned char md2
[20];
315 CARP_SCLOCK_ASSERT(sc
);
317 carp_hmac_generate(sc
, counter
, md2
);
319 return (bcmp(md
, md2
, sizeof(md2
)));
323 carp_setroute(struct carp_softc
*sc
, int cmd
)
325 struct ifaddr_container
*ifac
;
328 CARP_SCLOCK_ASSERT(sc
);
331 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
332 struct ifaddr
*ifa
= ifac
->ifa
;
334 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
335 sc
->sc_carpdev
!= NULL
) {
336 int count
= carp_addrcount(
337 (struct carp_if
*)sc
->sc_carpdev
->if_carp
,
338 ifatoia(ifa
), CARP_COUNT_MASTER
);
340 if ((cmd
== RTM_ADD
&& count
== 1) ||
341 (cmd
== RTM_DELETE
&& count
== 0))
342 rtinit(ifa
, cmd
, RTF_UP
| RTF_HOST
);
345 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
358 carp_clone_create(struct if_clone
*ifc
, int unit
)
361 struct carp_softc
*sc
;
364 MALLOC(sc
, struct carp_softc
*, sizeof(*sc
), M_CARP
, M_WAITOK
|M_ZERO
);
365 ifp
= SC2IFP(sc
) = kmalloc(sizeof(struct ifnet
), M_IFNET
, M_WAITOK
|M_ZERO
);
367 sc
->sc_flags_backup
= 0;
369 sc
->sc_advbase
= CARP_DFLTINTV
;
370 sc
->sc_vhid
= -1; /* required setting */
372 sc
->sc_init_counter
= 1;
373 sc
->sc_naddrs
= sc
->sc_naddrs6
= 0; /* M_ZERO? */
376 sc
->sc_im6o
.im6o_multicast_hlim
= CARP_DFLTTL
;
379 /* sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/
381 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
382 sc->sc_imo.imo_multicast_vif = -1;
384 callout_init(&sc
->sc_ad_tmo
);
385 callout_init(&sc
->sc_md_tmo
);
386 callout_init(&sc
->sc_md6_tmo
);
389 if_initname(ifp
, CARP_IFNAME
, unit
);
390 ifp
->if_mtu
= ETHERMTU
;
391 ifp
->if_flags
= IFF_LOOPBACK
;
392 ifp
->if_ioctl
= carp_ioctl
;
393 ifp
->if_output
= carp_looutput
;
394 ifp
->if_start
= carp_start
;
395 ifp
->if_type
= IFT_CARP
;
396 ifp
->if_snd
.ifq_maxlen
= ifqmaxlen
;
398 if_attach(ifp
, NULL
);
399 bpfattach(ifp
, DLT_NULL
, sizeof(u_int
));
402 LIST_INSERT_HEAD(&carpif_list
, sc
, sc_next
);
409 carp_clone_destroy(struct ifnet
*ifp
)
411 struct carp_softc
*sc
= ifp
->if_softc
;
415 carpdetach(sc
, 1); /* Returns unlocked. */
418 LIST_REMOVE(sc
, sc_next
);
422 /* if_free_type(ifp, IFT_ETHER);*/
423 /* kfree(sc->sc_imo.imo_membership, M_CARP); */
428 * This function can be called on CARP interface destroy path,
429 * and in case of the removal of the underlying interface as
430 * well. We differentiate these two cases. In the latter case
431 * we do not cleanup our multicast memberships, since they
432 * are already freed. Also, in the latter case we do not
433 * release the lock on return, because the function will be
434 * called once more, for another CARP instance on the same
438 carpdetach(struct carp_softc
*sc
, int unlock
)
442 callout_stop(&sc
->sc_ad_tmo
);
443 callout_stop(&sc
->sc_md_tmo
);
444 callout_stop(&sc
->sc_md6_tmo
);
447 carp_suppress_preempt
--;
450 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
)
451 carp_suppress_preempt
--;
452 sc
->sc_sendad_errors
= 0;
454 carp_set_state(sc
, INIT
);
455 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
458 carp_multicast_cleanup(sc
);
460 carp_multicast6_cleanup(sc
);
463 if (sc
->sc_carpdev
!= NULL
) {
464 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
465 CARP_LOCK_ASSERT(cif
);
466 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
467 if (!--cif
->vhif_nvrs
) {
468 ifpromisc(sc
->sc_carpdev
, 0);
469 sc
->sc_carpdev
->if_carp
= NULL
;
470 CARP_LOCK_DESTROY(cif
);
474 sc
->sc_carpdev
= NULL
;
478 /* Detach an interface from the carp. */
480 carp_ifdetach(void *arg __unused
, struct ifnet
*ifp
)
482 struct carp_if
*cif
= (struct carp_if
*)ifp
->if_carp
;
483 struct carp_softc
*sc
, *nextsc
;
489 * XXX: At the end of for() cycle the lock will be destroyed.
492 for (sc
= TAILQ_FIRST(&cif
->vhif_vrs
); sc
; sc
= nextsc
) {
493 nextsc
= TAILQ_NEXT(sc
, sc_list
);
500 * process input packet.
501 * we have rearranged checks order compared to the rfc,
502 * but it seems more efficient this way or not possible otherwise.
505 carp_input(struct mbuf
*m
, ...)
507 struct ip
*ip
= mtod(m
, struct ip
*);
508 struct carp_header
*ch
;
509 int iplen
, len
, hlen
;
513 hlen
= __va_arg(ap
, int);
516 carpstats
.carps_ipackets
++;
518 if (!carp_opts
[CARPCTL_ALLOW
]) {
523 /* check if received on a valid carp interface */
524 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
525 carpstats
.carps_badif
++;
526 CARP_LOG("carp_input: packet received on non-carp "
528 m
->m_pkthdr
.rcvif
->if_xname
);
533 /* verify that the IP TTL is 255. */
534 if (ip
->ip_ttl
!= CARP_DFLTTL
) {
535 carpstats
.carps_badttl
++;
536 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
538 m
->m_pkthdr
.rcvif
->if_xname
);
543 iplen
= ip
->ip_hl
<< 2;
545 if (m
->m_pkthdr
.len
< iplen
+ sizeof(*ch
)) {
546 carpstats
.carps_badlen
++;
547 CARP_LOG("carp_input: received len %zd < "
548 "sizeof(struct carp_header)\n",
549 m
->m_len
- sizeof(struct ip
));
554 if (iplen
+ sizeof(*ch
) < m
->m_len
) {
555 if ((m
= m_pullup(m
, iplen
+ sizeof(*ch
))) == NULL
) {
556 carpstats
.carps_hdrops
++;
557 CARP_LOG("carp_input: pullup failed\n");
560 ip
= mtod(m
, struct ip
*);
562 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
565 * verify that the received packet length is
566 * equal to the CARP header
568 len
= iplen
+ sizeof(*ch
);
569 if (len
> m
->m_pkthdr
.len
) {
570 carpstats
.carps_badlen
++;
571 CARP_LOG("carp_input: packet too short %d on %s\n",
573 m
->m_pkthdr
.rcvif
->if_xname
);
578 if ((m
= m_pullup(m
, len
)) == NULL
) {
579 carpstats
.carps_hdrops
++;
582 ip
= mtod(m
, struct ip
*);
583 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
585 /* verify the CARP checksum */
587 if (carp_cksum(m
, len
- iplen
)) {
588 carpstats
.carps_badsum
++;
589 CARP_LOG("carp_input: checksum failed on %s\n",
590 m
->m_pkthdr
.rcvif
->if_xname
);
596 carp_input_c(m
, ch
, AF_INET
);
601 carp6_input(struct mbuf
**mp
, int *offp
, int proto
)
603 struct mbuf
*m
= *mp
;
604 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
605 struct carp_header
*ch
;
608 carpstats
.carps_ipackets6
++;
610 if (!carp_opts
[CARPCTL_ALLOW
]) {
612 return (IPPROTO_DONE
);
615 /* check if received on a valid carp interface */
616 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
617 carpstats
.carps_badif
++;
618 CARP_LOG("carp6_input: packet received on non-carp "
620 m
->m_pkthdr
.rcvif
->if_xname
);
622 return (IPPROTO_DONE
);
625 /* verify that the IP TTL is 255 */
626 if (ip6
->ip6_hlim
!= CARP_DFLTTL
) {
627 carpstats
.carps_badttl
++;
628 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
630 m
->m_pkthdr
.rcvif
->if_xname
);
632 return (IPPROTO_DONE
);
635 /* verify that we have a complete carp packet */
637 IP6_EXTHDR_GET(ch
, struct carp_header
*, m
, *offp
, sizeof(*ch
));
639 carpstats
.carps_badlen
++;
640 CARP_LOG("carp6_input: packet size %u too small\n", len
);
641 return (IPPROTO_DONE
);
645 /* verify the CARP checksum */
647 if (carp_cksum(m
, sizeof(*ch
))) {
648 carpstats
.carps_badsum
++;
649 CARP_LOG("carp6_input: checksum failed, on %s\n",
650 m
->m_pkthdr
.rcvif
->if_xname
);
652 return (IPPROTO_DONE
);
656 carp_input_c(m
, ch
, AF_INET6
);
657 return (IPPROTO_DONE
);
662 carp_input_c(struct mbuf
*m
, struct carp_header
*ch
, sa_family_t af
)
664 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
;
665 struct carp_softc
*sc
;
666 uint64_t tmp_counter
;
667 struct timeval sc_tv
, ch_tv
;
669 /* verify that the VHID is valid on the receiving interface */
670 CARP_LOCK(ifp
->if_carp
);
671 TAILQ_FOREACH(sc
, &((struct carp_if
*)ifp
->if_carp
)->vhif_vrs
, sc_list
)
672 if (sc
->sc_vhid
== ch
->carp_vhid
)
675 if (!sc
|| !((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
676 carpstats
.carps_badvhid
++;
677 CARP_UNLOCK(ifp
->if_carp
);
682 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
683 SC2IFP(sc
)->if_ipackets
++;
684 SC2IFP(sc
)->if_ibytes
+= m
->m_pkthdr
.len
;
686 if (SC2IFP(sc
)->if_bpf
) {
687 struct ip
*ip
= mtod(m
, struct ip
*);
689 /* BPF wants net byte order */
690 ip
->ip_len
= htons(ip
->ip_len
+ (ip
->ip_hl
<< 2));
691 ip
->ip_off
= htons(ip
->ip_off
);
692 bpf_mtap(SC2IFP(sc
)->if_bpf
, m
);
695 /* verify the CARP version. */
696 if (ch
->carp_version
!= CARP_VERSION
) {
697 carpstats
.carps_badver
++;
698 SC2IFP(sc
)->if_ierrors
++;
699 CARP_UNLOCK(ifp
->if_carp
);
700 CARP_LOG("%s; invalid version %d\n",
701 SC2IFP(sc
)->if_xname
,
707 /* verify the hash */
708 if (carp_hmac_verify(sc
, ch
->carp_counter
, ch
->carp_md
)) {
709 carpstats
.carps_badauth
++;
710 SC2IFP(sc
)->if_ierrors
++;
711 CARP_UNLOCK(ifp
->if_carp
);
712 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc
)->if_xname
);
717 tmp_counter
= ntohl(ch
->carp_counter
[0]);
718 tmp_counter
= tmp_counter
<<32;
719 tmp_counter
+= ntohl(ch
->carp_counter
[1]);
721 /* XXX Replay protection goes here */
723 sc
->sc_init_counter
= 0;
724 sc
->sc_counter
= tmp_counter
;
726 sc_tv
.tv_sec
= sc
->sc_advbase
;
727 if (carp_suppress_preempt
&& sc
->sc_advskew
< 240)
728 sc_tv
.tv_usec
= 240 * 1000000 / 256;
730 sc_tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
731 ch_tv
.tv_sec
= ch
->carp_advbase
;
732 ch_tv
.tv_usec
= ch
->carp_advskew
* 1000000 / 256;
734 switch (sc
->sc_state
) {
739 * If we receive an advertisement from a master who's going to
740 * be more frequent than us, go into BACKUP state.
742 if (timevalcmp(&sc_tv
, &ch_tv
, >) ||
743 timevalcmp(&sc_tv
, &ch_tv
, ==)) {
744 callout_stop(&sc
->sc_ad_tmo
);
745 CARP_DEBUG("%s: MASTER -> BACKUP "
746 "(more frequent advertisement received)\n",
747 SC2IFP(sc
)->if_xname
);
748 carp_set_state(sc
, BACKUP
);
750 carp_setroute(sc
, RTM_DELETE
);
755 * If we're pre-empting masters who advertise slower than us,
756 * and this one claims to be slower, treat him as down.
758 if (carp_opts
[CARPCTL_PREEMPT
] &&
759 timevalcmp(&sc_tv
, &ch_tv
, <)) {
760 CARP_DEBUG("%s: BACKUP -> MASTER "
761 "(preempting a slower master)\n",
762 SC2IFP(sc
)->if_xname
);
763 carp_master_down_locked(sc
);
768 * If the master is going to advertise at such a low frequency
769 * that he's guaranteed to time out, we'd might as well just
770 * treat him as timed out now.
772 sc_tv
.tv_sec
= sc
->sc_advbase
* 3;
773 if (timevalcmp(&sc_tv
, &ch_tv
, <)) {
774 CARP_DEBUG("%s: BACKUP -> MASTER "
775 "(master timed out)\n",
776 SC2IFP(sc
)->if_xname
);
777 carp_master_down_locked(sc
);
782 * Otherwise, we reset the counter and wait for the next
789 CARP_UNLOCK(ifp
->if_carp
);
796 carp_prepare_ad(struct mbuf
*m
, struct carp_softc
*sc
, struct carp_header
*ch
)
799 struct ifnet
*ifp
= SC2IFP(sc
);
801 if (sc
->sc_init_counter
) {
802 /* this could also be seconds since unix epoch */
803 sc
->sc_counter
= karc4random();
804 sc
->sc_counter
= sc
->sc_counter
<< 32;
805 sc
->sc_counter
+= karc4random();
809 ch
->carp_counter
[0] = htonl((sc
->sc_counter
>>32)&0xffffffff);
810 ch
->carp_counter
[1] = htonl(sc
->sc_counter
&0xffffffff);
812 carp_hmac_generate(sc
, ch
->carp_counter
, ch
->carp_md
);
814 /* Tag packet for carp_output */
815 mtag
= m_tag_get(PACKET_TAG_CARP
, sizeof(struct ifnet
*), MB_DONTWAIT
);
818 SC2IFP(sc
)->if_oerrors
++;
821 bcopy(&ifp
, (caddr_t
)(mtag
+ 1), sizeof(struct ifnet
*));
822 m_tag_prepend(m
, mtag
);
828 carp_send_ad_all(void)
830 struct carp_softc
*sc
;
832 LIST_FOREACH(sc
, &carpif_list
, sc_next
) {
833 if (sc
->sc_carpdev
== NULL
)
836 if ((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
) &&
837 sc
->sc_state
== MASTER
)
838 carp_send_ad_locked(sc
);
844 carp_send_ad(void *v
)
846 struct carp_softc
*sc
= v
;
849 carp_send_ad_locked(sc
);
854 carp_send_ad_locked(struct carp_softc
*sc
)
856 struct carp_header ch
;
858 struct carp_header
*ch_ptr
;
860 int len
, advbase
, advskew
;
863 /* bow out if we've lost our UPness or RUNNINGuiness */
864 if (!((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
868 advbase
= sc
->sc_advbase
;
869 if (!carp_suppress_preempt
|| sc
->sc_advskew
> 240)
870 advskew
= sc
->sc_advskew
;
874 tv
.tv_usec
= advskew
* 1000000 / 256;
877 ch
.carp_version
= CARP_VERSION
;
878 ch
.carp_type
= CARP_ADVERTISEMENT
;
879 ch
.carp_vhid
= sc
->sc_vhid
;
880 ch
.carp_advbase
= advbase
;
881 ch
.carp_advskew
= advskew
;
882 ch
.carp_authlen
= 7; /* XXX DEFINE */
883 ch
.carp_pad1
= 0; /* must be zero */
890 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
892 SC2IFP(sc
)->if_oerrors
++;
893 carpstats
.carps_onomem
++;
894 /* XXX maybe less ? */
895 if (advbase
!= 255 || advskew
!= 255)
896 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
900 len
= sizeof(*ip
) + sizeof(ch
);
901 m
->m_pkthdr
.len
= len
;
902 m
->m_pkthdr
.rcvif
= NULL
;
904 MH_ALIGN(m
, m
->m_len
);
905 m
->m_flags
|= M_MCAST
;
906 ip
= mtod(m
, struct ip
*);
907 ip
->ip_v
= IPVERSION
;
908 ip
->ip_hl
= sizeof(*ip
) >> 2;
909 ip
->ip_tos
= IPTOS_LOWDELAY
;
911 ip
->ip_id
= ip_newid();
913 ip
->ip_ttl
= CARP_DFLTTL
;
914 ip
->ip_p
= IPPROTO_CARP
;
916 ip
->ip_src
.s_addr
= sc
->sc_ia
->ia_addr
.sin_addr
.s_addr
;
917 ip
->ip_dst
.s_addr
= htonl(INADDR_CARP_GROUP
);
919 ch_ptr
= (struct carp_header
*)(&ip
[1]);
920 bcopy(&ch
, ch_ptr
, sizeof(ch
));
921 if (carp_prepare_ad(m
, sc
, ch_ptr
))
924 m
->m_data
+= sizeof(*ip
);
925 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip
));
926 m
->m_data
-= sizeof(*ip
);
928 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
929 SC2IFP(sc
)->if_opackets
++;
930 SC2IFP(sc
)->if_obytes
+= len
;
931 carpstats
.carps_opackets
++;
933 if (ip_output(m
, NULL
, NULL
, IP_RAWOUTPUT
, &sc
->sc_imo
, NULL
)) {
934 SC2IFP(sc
)->if_oerrors
++;
935 if (sc
->sc_sendad_errors
< INT_MAX
)
936 sc
->sc_sendad_errors
++;
937 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
938 carp_suppress_preempt
++;
939 if (carp_suppress_preempt
== 1) {
945 sc
->sc_sendad_success
= 0;
947 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
948 if (++sc
->sc_sendad_success
>=
949 CARP_SENDAD_MIN_SUCCESS
) {
950 carp_suppress_preempt
--;
951 sc
->sc_sendad_errors
= 0;
954 sc
->sc_sendad_errors
= 0;
962 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
964 SC2IFP(sc
)->if_oerrors
++;
965 carpstats
.carps_onomem
++;
966 /* XXX maybe less ? */
967 if (advbase
!= 255 || advskew
!= 255)
968 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
972 len
= sizeof(*ip6
) + sizeof(ch
);
973 m
->m_pkthdr
.len
= len
;
974 m
->m_pkthdr
.rcvif
= NULL
;
976 MH_ALIGN(m
, m
->m_len
);
977 m
->m_flags
|= M_MCAST
;
978 ip6
= mtod(m
, struct ip6_hdr
*);
979 bzero(ip6
, sizeof(*ip6
));
980 ip6
->ip6_vfc
|= IPV6_VERSION
;
981 ip6
->ip6_hlim
= CARP_DFLTTL
;
982 ip6
->ip6_nxt
= IPPROTO_CARP
;
983 bcopy(&sc
->sc_ia6
->ia_addr
.sin6_addr
, &ip6
->ip6_src
,
984 sizeof(struct in6_addr
));
985 /* set the multicast destination */
987 ip6
->ip6_dst
.s6_addr16
[0] = htons(0xff02);
988 ip6
->ip6_dst
.s6_addr8
[15] = 0x12;
989 if (in6_setscope(&ip6
->ip6_dst
, sc
->sc_carpdev
, NULL
) != 0) {
990 SC2IFP(sc
)->if_oerrors
++;
992 CARP_LOG("%s: in6_setscope failed\n", __func__
);
996 ch_ptr
= (struct carp_header
*)(&ip6
[1]);
997 bcopy(&ch
, ch_ptr
, sizeof(ch
));
998 if (carp_prepare_ad(m
, sc
, ch_ptr
))
1001 m
->m_data
+= sizeof(*ip6
);
1002 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip6
));
1003 m
->m_data
-= sizeof(*ip6
);
1005 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
1006 SC2IFP(sc
)->if_opackets
++;
1007 SC2IFP(sc
)->if_obytes
+= len
;
1008 carpstats
.carps_opackets6
++;
1010 if (ip6_output(m
, NULL
, NULL
, 0, &sc
->sc_im6o
, NULL
, NULL
)) {
1011 SC2IFP(sc
)->if_oerrors
++;
1012 if (sc
->sc_sendad_errors
< INT_MAX
)
1013 sc
->sc_sendad_errors
++;
1014 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
1015 carp_suppress_preempt
++;
1016 if (carp_suppress_preempt
== 1) {
1022 sc
->sc_sendad_success
= 0;
1024 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
1025 if (++sc
->sc_sendad_success
>=
1026 CARP_SENDAD_MIN_SUCCESS
) {
1027 carp_suppress_preempt
--;
1028 sc
->sc_sendad_errors
= 0;
1031 sc
->sc_sendad_errors
= 0;
1036 if (advbase
!= 255 || advskew
!= 255)
1037 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1043 * Broadcast a gratuitous ARP request containing
1044 * the virtual router MAC address for each IP address
1045 * associated with the virtual router.
1048 carp_send_arp(struct carp_softc
*sc
)
1050 struct ifaddr_container
*ifac
;
1052 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
1053 struct ifaddr
*ifa
= ifac
->ifa
;
1055 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
1057 arp_ifinit2(sc
->sc_carpdev
, ifa
, IF_LLADDR(sc
->sc_ifp
));
1059 DELAY(1000); /* XXX */
1065 carp_send_na(struct carp_softc
*sc
)
1067 struct ifaddr_container
*ifac
;
1068 struct in6_addr
*in6
;
1069 static struct in6_addr mcast
= IN6ADDR_LINKLOCAL_ALLNODES_INIT
;
1071 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
1072 struct ifaddr
*ifa
= ifac
->ifa
;
1074 if (ifa
->ifa_addr
->sa_family
!= AF_INET6
)
1077 in6
= &ifatoia6(ifa
)->ia_addr
.sin6_addr
;
1078 nd6_na_output(sc
->sc_carpdev
, &mcast
, in6
,
1079 ND_NA_FLAG_OVERRIDE
, 1, NULL
);
1080 DELAY(1000); /* XXX */
1086 carp_addrcount(struct carp_if
*cif
, struct in_ifaddr
*ia
, int type
)
1088 struct carp_softc
*vh
;
1091 CARP_LOCK_ASSERT(cif
);
1093 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1094 if ((type
== CARP_COUNT_RUNNING
&&
1095 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) ||
1096 (type
== CARP_COUNT_MASTER
&& vh
->sc_state
== MASTER
)) {
1097 struct ifaddr_container
*ifac
;
1099 TAILQ_FOREACH(ifac
, &SC2IFP(vh
)->if_addrheads
[mycpuid
],
1101 struct ifaddr
*ifa
= ifac
->ifa
;
1103 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
1104 ia
->ia_addr
.sin_addr
.s_addr
==
1105 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
)
1114 carp_iamatch(void *v
, struct in_ifaddr
*ia
,
1115 struct in_addr
*isaddr
, uint8_t **enaddr
)
1117 struct carp_if
*cif
= v
;
1118 struct carp_softc
*vh
;
1119 int index
, count
= 0;
1123 if (carp_opts
[CARPCTL_ARPBALANCE
]) {
1125 * XXX proof of concept implementation.
1126 * We use the source ip to decide which virtual host should
1127 * handle the request. If we're master of that virtual host,
1128 * then we respond, otherwise, just drop the arp packet on
1131 count
= carp_addrcount(cif
, ia
, CARP_COUNT_RUNNING
);
1133 /* should never reach this */
1138 /* this should be a hash, like pf_hash() */
1139 index
= ntohl(isaddr
->s_addr
) % count
;
1142 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1143 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) {
1144 struct ifaddr_container
*ifac
;
1146 TAILQ_FOREACH(ifac
, &SC2IFP(vh
)->if_addrheads
[mycpuid
], ifa_link
) {
1147 struct ifaddr
*ifa
= ifac
->ifa
;
1149 if (ifa
->ifa_addr
->sa_family
==
1151 ia
->ia_addr
.sin_addr
.s_addr
==
1152 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
) {
1153 if (count
== index
) {
1156 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1170 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1171 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1172 vh
->sc_state
== MASTER
) {
1173 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1185 carp_iamatch6(void *v
, struct in6_addr
*taddr
)
1187 struct carp_if
*cif
= v
;
1188 struct carp_softc
*vh
;
1191 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1192 struct ifaddr_container
*ifac
;
1194 TAILQ_FOREACH(ifac
, &SC2IFP(vh
)->if_addrheads
[mycpuid
], ifa_link
) {
1195 struct ifaddr
*ifa
= ifac
->ifa
;
1197 if (IN6_ARE_ADDR_EQUAL(taddr
,
1198 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1199 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1200 vh
->sc_state
== MASTER
) {
1212 carp_macmatch6(void *v
, struct mbuf
*m
, const struct in6_addr
*taddr
)
1215 struct carp_if
*cif
= v
;
1216 struct carp_softc
*sc
;
1219 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
) {
1220 struct ifaddr_container
*ifac
;
1222 TAILQ_FOREACH(ifac
, &SC2IFP(sc
)->if_addrheads
[mycpuid
], ifa_link
) {
1223 struct ifaddr
*ifa
= ifac
->ifa
;
1225 if (IN6_ARE_ADDR_EQUAL(taddr
,
1226 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1227 (SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
)) {
1228 struct ifnet
*ifp
= SC2IFP(sc
);
1229 mtag
= m_tag_get(PACKET_TAG_CARP
,
1230 sizeof(struct ifnet
*), MB_DONTWAIT
);
1232 /* better a bit than nothing */
1234 return (IF_LLADDR(sc
->sc_ifp
));
1236 bcopy(&ifp
, (caddr_t
)(mtag
+ 1),
1237 sizeof(struct ifnet
*));
1238 m_tag_prepend(m
, mtag
);
1241 return (IF_LLADDR(sc
->sc_ifp
));
1252 carp_forus(void *v
, void *dhost
)
1254 struct carp_if
*cif
= v
;
1255 struct carp_softc
*vh
;
1256 uint8_t *ena
= dhost
;
1259 * XXX: See here for check on MAC adr is not for virtual use
1263 if (ena
[0] || ena
[1] || ena
[2] != 0x5e || ena
[3] || ena
[4] != 1)
1269 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
)
1270 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1271 vh
->sc_state
== MASTER
&&
1272 !bcmp(dhost
, IF_LLADDR(vh
->sc_ifp
), ETHER_ADDR_LEN
)) {
1274 return (SC2IFP(vh
));
1282 carp_master_down(void *v
)
1284 struct carp_softc
*sc
= v
;
1286 lwkt_serialize_enter(sc
->sc_ifp
->if_serializer
);
1287 carp_master_down_locked(sc
);
1288 lwkt_serialize_exit(sc
->sc_ifp
->if_serializer
);
1292 carp_master_down_locked(struct carp_softc
*sc
)
1295 CARP_SCLOCK_ASSERT(sc
);
1297 switch (sc
->sc_state
) {
1299 kprintf("%s: master_down event in INIT state\n",
1300 SC2IFP(sc
)->if_xname
);
1305 carp_set_state(sc
, MASTER
);
1306 carp_send_ad_locked(sc
);
1312 carp_setroute(sc
, RTM_ADD
);
1318 * When in backup state, af indicates whether to reset the master down timer
1319 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1322 carp_setrun(struct carp_softc
*sc
, sa_family_t af
)
1326 if (sc
->sc_carpdev
== NULL
) {
1327 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1328 carp_set_state(sc
, INIT
);
1332 if (SC2IFP(sc
)->if_flags
& IFF_UP
&&
1333 sc
->sc_vhid
> 0 && (sc
->sc_naddrs
|| sc
->sc_naddrs6
))
1334 SC2IFP(sc
)->if_flags
|= IFF_RUNNING
;
1336 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1337 carp_setroute(sc
, RTM_DELETE
);
1341 switch (sc
->sc_state
) {
1343 if (carp_opts
[CARPCTL_PREEMPT
] && !carp_suppress_preempt
) {
1344 carp_send_ad_locked(sc
);
1349 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1350 SC2IFP(sc
)->if_xname
);
1351 carp_set_state(sc
, MASTER
);
1352 carp_setroute(sc
, RTM_ADD
);
1354 CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc
)->if_xname
);
1355 carp_set_state(sc
, BACKUP
);
1356 carp_setroute(sc
, RTM_DELETE
);
1361 callout_stop(&sc
->sc_ad_tmo
);
1362 tv
.tv_sec
= 3 * sc
->sc_advbase
;
1363 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1367 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1368 carp_master_down
, sc
);
1373 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1374 carp_master_down
, sc
);
1379 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1380 carp_master_down
, sc
);
1382 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1383 carp_master_down
, sc
);
1388 tv
.tv_sec
= sc
->sc_advbase
;
1389 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1390 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1397 carp_multicast_cleanup(struct carp_softc
*sc
)
1399 struct ip_moptions
*imo
= &sc
->sc_imo
;
1400 uint16_t n
= imo
->imo_num_memberships
;
1402 /* Clean up our own multicast memberships */
1404 if (imo
->imo_membership
[n
] != NULL
) {
1405 in_delmulti(imo
->imo_membership
[n
]);
1406 imo
->imo_membership
[n
] = NULL
;
1409 imo
->imo_num_memberships
= 0;
1410 imo
->imo_multicast_ifp
= NULL
;
1415 carp_multicast6_cleanup(struct carp_softc
*sc
)
1417 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1419 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1420 struct in6_multi_mship
*imm
=
1421 LIST_FIRST(&im6o
->im6o_memberships
);
1423 LIST_REMOVE(imm
, i6mm_chain
);
1424 in6_leavegroup(imm
);
1426 im6o
->im6o_multicast_ifp
= NULL
;
1431 carp_set_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1434 struct carp_if
*cif
;
1435 struct in_ifaddr
*ia
, *ia_if
;
1436 struct in_ifaddr_container
*iac
;
1437 struct ip_moptions
*imo
= &sc
->sc_imo
;
1438 struct in_addr addr
;
1439 u_long iaddr
= htonl(sin
->sin_addr
.s_addr
);
1442 if (sin
->sin_addr
.s_addr
== 0)
1444 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1446 carp_set_state(sc
, INIT
);
1450 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1455 /* we have to do it by hands to check we won't match on us */
1456 ia_if
= NULL
; own
= 0;
1457 TAILQ_FOREACH(iac
, &in_ifaddrheads
[mycpuid
], ia_link
) {
1460 /* and, yeah, we need a multicast-capable iface too */
1461 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1462 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1463 (iaddr
& ia
->ia_subnetmask
) == ia
->ia_subnet
) {
1466 if (sin
->sin_addr
.s_addr
==
1467 ia
->ia_addr
.sin_addr
.s_addr
)
1474 return (EADDRNOTAVAIL
);
1479 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1480 (imo
->imo_multicast_ifp
&& imo
->imo_multicast_ifp
!= ifp
))
1481 return (EADDRNOTAVAIL
);
1483 if (imo
->imo_num_memberships
== 0) {
1484 addr
.s_addr
= htonl(INADDR_CARP_GROUP
);
1485 if ((imo
->imo_membership
[0] = in_addmulti(&addr
, ifp
)) == NULL
)
1487 imo
->imo_num_memberships
++;
1488 imo
->imo_multicast_ifp
= ifp
;
1489 imo
->imo_multicast_ttl
= CARP_DFLTTL
;
1490 imo
->imo_multicast_loop
= 0;
1493 if (!ifp
->if_carp
) {
1495 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1497 if ((error
= ifpromisc(ifp
, 1))) {
1502 CARP_LOCK_INIT(cif
);
1504 cif
->vhif_ifp
= ifp
;
1505 TAILQ_INIT(&cif
->vhif_vrs
);
1509 struct carp_softc
*vr
;
1511 cif
= (struct carp_if
*)ifp
->if_carp
;
1513 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1514 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1521 sc
->sc_carpdev
= ifp
;
1523 { /* XXX prevent endless loop if already in queue */
1524 struct carp_softc
*vr
, *after
= NULL
;
1526 cif
= (struct carp_if
*)ifp
->if_carp
;
1528 /* XXX: cif should not change, right? So we still hold the lock */
1529 CARP_LOCK_ASSERT(cif
);
1531 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1534 if (vr
->sc_vhid
< sc
->sc_vhid
)
1539 /* We're trying to keep things in order */
1540 if (after
== NULL
) {
1541 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1543 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1550 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1555 carp_sc_state_locked(sc
);
1563 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1569 carp_del_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1573 if (!--sc
->sc_naddrs
) {
1574 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1575 struct ip_moptions
*imo
= &sc
->sc_imo
;
1578 callout_stop(&sc
->sc_ad_tmo
);
1579 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1580 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1582 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1583 imo
->imo_multicast_ifp
= NULL
;
1584 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1585 if (!--cif
->vhif_nvrs
) {
1586 sc
->sc_carpdev
->if_carp
= NULL
;
1587 CARP_LOCK_DESTROY(cif
);
1588 FREE(cif
, M_IFADDR
);
1599 carp_set_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1602 struct carp_if
*cif
;
1603 struct in6_ifaddr
*ia
, *ia_if
;
1604 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1605 struct in6_multi_mship
*imm
;
1606 struct in6_addr in6
;
1609 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
1610 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1611 carp_set_state(sc
, INIT
);
1613 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1618 /* we have to do it by hands to check we won't match on us */
1619 ia_if
= NULL
; own
= 0;
1620 for (ia
= in6_ifaddr
; ia
; ia
= ia
->ia_next
) {
1623 for (i
= 0; i
< 4; i
++) {
1624 if ((sin6
->sin6_addr
.s6_addr32
[i
] &
1625 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]) !=
1626 (ia
->ia_addr
.sin6_addr
.s6_addr32
[i
] &
1627 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]))
1630 /* and, yeah, we need a multicast-capable iface too */
1631 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1632 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1636 if (IN6_ARE_ADDR_EQUAL(&sin6
->sin6_addr
,
1637 &ia
->ia_addr
.sin6_addr
))
1643 return (EADDRNOTAVAIL
);
1647 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1648 (im6o
->im6o_multicast_ifp
&& im6o
->im6o_multicast_ifp
!= ifp
))
1649 return (EADDRNOTAVAIL
);
1651 if (!sc
->sc_naddrs6
) {
1652 im6o
->im6o_multicast_ifp
= ifp
;
1654 /* join CARP multicast address */
1655 bzero(&in6
, sizeof(in6
));
1656 in6
.s6_addr16
[0] = htons(0xff02);
1657 in6
.s6_addr8
[15] = 0x12;
1658 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1660 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1662 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1664 /* join solicited multicast address */
1665 bzero(&in6
, sizeof(in6
));
1666 in6
.s6_addr16
[0] = htons(0xff02);
1667 in6
.s6_addr32
[1] = 0;
1668 in6
.s6_addr32
[2] = htonl(1);
1669 in6
.s6_addr32
[3] = sin6
->sin6_addr
.s6_addr32
[3];
1670 in6
.s6_addr8
[12] = 0xff;
1671 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1673 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1675 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1678 if (!ifp
->if_carp
) {
1679 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1681 if ((error
= ifpromisc(ifp
, 1))) {
1686 CARP_LOCK_INIT(cif
);
1688 cif
->vhif_ifp
= ifp
;
1689 TAILQ_INIT(&cif
->vhif_vrs
);
1693 struct carp_softc
*vr
;
1695 cif
= (struct carp_if
*)ifp
->if_carp
;
1697 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1698 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1705 sc
->sc_carpdev
= ifp
;
1707 { /* XXX prevent endless loop if already in queue */
1708 struct carp_softc
*vr
, *after
= NULL
;
1710 cif
= (struct carp_if
*)ifp
->if_carp
;
1711 CARP_LOCK_ASSERT(cif
);
1713 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1716 if (vr
->sc_vhid
< sc
->sc_vhid
)
1721 /* We're trying to keep things in order */
1722 if (after
== NULL
) {
1723 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1725 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1732 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1735 carp_sc_state_locked(sc
);
1743 /* clean up multicast memberships */
1744 if (!sc
->sc_naddrs6
) {
1745 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1746 imm
= LIST_FIRST(&im6o
->im6o_memberships
);
1747 LIST_REMOVE(imm
, i6mm_chain
);
1748 in6_leavegroup(imm
);
1755 carp_del_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1759 if (!--sc
->sc_naddrs6
) {
1760 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1761 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1764 callout_stop(&sc
->sc_ad_tmo
);
1765 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1766 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1768 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1769 struct in6_multi_mship
*imm
=
1770 LIST_FIRST(&im6o
->im6o_memberships
);
1772 LIST_REMOVE(imm
, i6mm_chain
);
1773 in6_leavegroup(imm
);
1775 im6o
->im6o_multicast_ifp
= NULL
;
1776 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1777 if (!--cif
->vhif_nvrs
) {
1778 CARP_LOCK_DESTROY(cif
);
1779 sc
->sc_carpdev
->if_carp
= NULL
;
1780 FREE(cif
, M_IFADDR
);
1790 carp_ioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t addr
, struct ucred
*creds
)
1792 struct carp_softc
*sc
= ifp
->if_softc
, *vr
;
1793 struct carpreq carpr
;
1796 struct ifaliasreq
*ifra
;
1797 int locked
= 0, error
= 0;
1799 ifa
= (struct ifaddr
*)addr
;
1800 ifra
= (struct ifaliasreq
*)addr
;
1801 ifr
= (struct ifreq
*)addr
;
1806 switch (ifa
->ifa_addr
->sa_family
) {
1809 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1810 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1811 sizeof(struct sockaddr
));
1812 error
= carp_set_addr(sc
, satosin(ifa
->ifa_addr
));
1817 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1818 error
= carp_set_addr6(sc
, satosin6(ifa
->ifa_addr
));
1822 error
= EAFNOSUPPORT
;
1828 switch (ifa
->ifa_addr
->sa_family
) {
1831 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1832 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1833 sizeof(struct sockaddr
));
1834 error
= carp_set_addr(sc
, satosin(&ifra
->ifra_addr
));
1839 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1840 error
= carp_set_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1844 error
= EAFNOSUPPORT
;
1850 switch (ifa
->ifa_addr
->sa_family
) {
1853 error
= carp_del_addr(sc
, satosin(&ifra
->ifra_addr
));
1858 error
= carp_del_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1862 error
= EAFNOSUPPORT
;
1868 if (sc
->sc_carpdev
) {
1872 if (sc
->sc_state
!= INIT
&& !(ifr
->ifr_flags
& IFF_UP
)) {
1873 callout_stop(&sc
->sc_ad_tmo
);
1874 callout_stop(&sc
->sc_md_tmo
);
1875 callout_stop(&sc
->sc_md6_tmo
);
1876 if (sc
->sc_state
== MASTER
)
1877 carp_send_ad_locked(sc
);
1878 carp_set_state(sc
, INIT
);
1880 } else if (sc
->sc_state
== INIT
&& (ifr
->ifr_flags
& IFF_UP
)) {
1881 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1887 error
= suser(curthread
);
1890 if ((error
= copyin(ifr
->ifr_data
, &carpr
, sizeof carpr
)))
1893 if (sc
->sc_carpdev
) {
1897 if (sc
->sc_state
!= INIT
&& carpr
.carpr_state
!= sc
->sc_state
) {
1898 switch (carpr
.carpr_state
) {
1900 callout_stop(&sc
->sc_ad_tmo
);
1901 carp_set_state(sc
, BACKUP
);
1903 carp_setroute(sc
, RTM_DELETE
);
1906 carp_master_down_locked(sc
);
1912 if (carpr
.carpr_vhid
> 0) {
1913 if (carpr
.carpr_vhid
> 255) {
1917 if (sc
->sc_carpdev
) {
1918 struct carp_if
*cif
;
1919 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1920 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1922 vr
->sc_vhid
== carpr
.carpr_vhid
)
1925 sc
->sc_vhid
= carpr
.carpr_vhid
;
1926 IF_LLADDR(sc
->sc_ifp
)[0] = 0;
1927 IF_LLADDR(sc
->sc_ifp
)[1] = 0;
1928 IF_LLADDR(sc
->sc_ifp
)[2] = 0x5e;
1929 IF_LLADDR(sc
->sc_ifp
)[3] = 0;
1930 IF_LLADDR(sc
->sc_ifp
)[4] = 1;
1931 IF_LLADDR(sc
->sc_ifp
)[5] = sc
->sc_vhid
;
1934 if (carpr
.carpr_advbase
> 0 || carpr
.carpr_advskew
> 0) {
1935 if (carpr
.carpr_advskew
>= 255) {
1939 if (carpr
.carpr_advbase
> 255) {
1943 sc
->sc_advbase
= carpr
.carpr_advbase
;
1944 sc
->sc_advskew
= carpr
.carpr_advskew
;
1947 bcopy(carpr
.carpr_key
, sc
->sc_key
, sizeof(sc
->sc_key
));
1957 /* XXX: lockless read */
1958 bzero(&carpr
, sizeof(carpr
));
1959 carpr
.carpr_state
= sc
->sc_state
;
1960 carpr
.carpr_vhid
= sc
->sc_vhid
;
1961 carpr
.carpr_advbase
= sc
->sc_advbase
;
1962 carpr
.carpr_advskew
= sc
->sc_advskew
;
1963 error
= suser(curthread
);
1965 bcopy(sc
->sc_key
, carpr
.carpr_key
,
1966 sizeof(carpr
.carpr_key
));
1967 error
= copyout(&carpr
, ifr
->ifr_data
, sizeof(carpr
));
1977 carp_hmac_prepare(sc
);
1983 * XXX: this is looutput. We should eventually use it from there.
1986 carp_looutput(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*dst
,
1991 M_ASSERTPKTHDR(m
); /* check if we have the packet header */
1993 if (rt
&& rt
->rt_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
1995 return (rt
->rt_flags
& RTF_BLACKHOLE
? 0 :
1996 rt
->rt_flags
& RTF_HOST
? EHOSTUNREACH
: ENETUNREACH
);
2000 ifp
->if_obytes
+= m
->m_pkthdr
.len
;
2002 /* BPF writes need to be handled specially. */
2003 if (dst
->sa_family
== AF_UNSPEC
) {
2004 bcopy(dst
->sa_data
, &af
, sizeof(af
));
2005 dst
->sa_family
= af
;
2009 switch (dst
->sa_family
) {
2017 return (EAFNOSUPPORT
);
2020 return(if_simloop(ifp
, m
, dst
->sa_family
, 0));
2024 * Start output on carp interface. This function should never be called.
2027 carp_start(struct ifnet
*ifp
)
2030 kprintf("%s: start called\n", ifp
->if_xname
);
2035 carp_output(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*sa
,
2039 struct carp_softc
*sc
;
2040 struct ifnet
*carp_ifp
;
2045 switch (sa
->sa_family
) {
2058 mtag
= m_tag_find(m
, PACKET_TAG_CARP
, NULL
);
2062 bcopy(mtag
+ 1, &carp_ifp
, sizeof(struct ifnet
*));
2063 sc
= carp_ifp
->if_softc
;
2065 /* Set the source MAC address to Virtual Router MAC Address */
2066 switch (ifp
->if_type
) {
2069 struct ether_header
*eh
;
2071 eh
= mtod(m
, struct ether_header
*);
2072 eh
->ether_shost
[0] = 0;
2073 eh
->ether_shost
[1] = 0;
2074 eh
->ether_shost
[2] = 0x5e;
2075 eh
->ether_shost
[3] = 0;
2076 eh
->ether_shost
[4] = 1;
2077 eh
->ether_shost
[5] = sc
->sc_vhid
;
2081 kprintf("%s: carp is not supported for this interface type\n",
2083 return (EOPNOTSUPP
);
2091 carp_set_state(struct carp_softc
*sc
, int state
)
2095 CARP_SCLOCK_ASSERT(sc
);
2097 if (sc
->sc_state
== state
)
2100 sc
->sc_state
= state
;
2103 SC2IFP(sc
)->if_link_state
= LINK_STATE_DOWN
;
2106 SC2IFP(sc
)->if_link_state
= LINK_STATE_UP
;
2109 SC2IFP(sc
)->if_link_state
= LINK_STATE_UNKNOWN
;
2112 rt_ifmsg(SC2IFP(sc
));
2116 carp_carpdev_state(void *v
)
2118 struct carp_if
*cif
= v
;
2121 carp_carpdev_state_locked(cif
);
2126 carp_carpdev_state_locked(struct carp_if
*cif
)
2128 struct carp_softc
*sc
;
2130 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
)
2131 carp_sc_state_locked(sc
);
2135 carp_sc_state_locked(struct carp_softc
*sc
)
2137 CARP_SCLOCK_ASSERT(sc
);
2139 if ( !(sc
->sc_carpdev
->if_flags
& IFF_UP
)) {
2140 sc
->sc_flags_backup
= SC2IFP(sc
)->if_flags
;
2141 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
2142 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
2143 callout_stop(&sc
->sc_ad_tmo
);
2144 callout_stop(&sc
->sc_md_tmo
);
2145 callout_stop(&sc
->sc_md6_tmo
);
2146 carp_set_state(sc
, INIT
);
2148 if (!sc
->sc_suppress
) {
2149 carp_suppress_preempt
++;
2150 if (carp_suppress_preempt
== 1) {
2156 sc
->sc_suppress
= 1;
2158 SC2IFP(sc
)->if_flags
|= sc
->sc_flags_backup
;
2159 carp_set_state(sc
, INIT
);
2161 if (sc
->sc_suppress
)
2162 carp_suppress_preempt
--;
2163 sc
->sc_suppress
= 0;
2170 carp_modevent(module_t mod
, int type
, void *data
)
2174 LIST_INIT(&carpif_list
);
2175 carp_ifdetach_event
=
2176 EVENTHANDLER_REGISTER(ifnet_detach_event
, carp_ifdetach
, NULL
,
2177 EVENTHANDLER_PRI_ANY
);
2178 if_clone_attach(&carp_cloner
);
2182 EVENTHANDLER_DEREGISTER(ifnet_detach_event
,
2183 carp_ifdetach_event
);
2184 if_clone_detach(&carp_cloner
);
2194 static moduledata_t carp_mod
= {
2199 DECLARE_MODULE(carp
, carp_mod
, SI_SUB_PSEUDO
, SI_ORDER_ANY
);