2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3 * Copyright (c) 2003 Ryan McBride. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.6 2008/01/11 11:59:40 sephe Exp $
32 /*#include "opt_bpf.h"*/
34 #include "opt_inet6.h"
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <machine/limits.h>
42 #include <sys/malloc.h>
44 #include <sys/module.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52 #include <sys/in_cksum.h>
53 #include <sys/socket.h>
54 #include <sys/vnode.h>
56 #include <machine/stdarg.h>
59 #include <net/ethernet.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/route.h>
64 #include <net/if_clone.h>
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
83 #include <crypto/sha1.h>
84 #include <netinet/ip_carp.h>
87 #define CARP_IFNAME "carp"
88 static MALLOC_DEFINE(M_CARP
, "CARP", "CARP interfaces");
89 static MALLOC_DEFINE(M_IFNET
, "IFNET", "IFNET CARP?");
90 SYSCTL_DECL(_net_inet_carp
);
93 struct ifnet
*sc_ifp
; /* Interface clue */
94 struct ifnet
*sc_carpdev
; /* Pointer to parent interface */
95 struct in_ifaddr
*sc_ia
; /* primary iface address */
96 struct ip_moptions sc_imo
;
98 struct in6_ifaddr
*sc_ia6
; /* primary iface address v6 */
99 struct ip6_moptions sc_im6o
;
101 TAILQ_ENTRY(carp_softc
) sc_list
;
103 enum { INIT
= 0, BACKUP
, MASTER
} sc_state
;
108 int sc_sendad_errors
;
109 #define CARP_SENDAD_MAX_ERRORS 3
110 int sc_sendad_success
;
111 #define CARP_SENDAD_MIN_SUCCESS 3
117 int sc_advbase
; /* seconds */
119 u_int64_t sc_counter
;
122 #define CARP_HMAC_PAD 64
123 unsigned char sc_key
[CARP_KEY_LEN
];
124 unsigned char sc_pad
[CARP_HMAC_PAD
];
127 struct callout sc_ad_tmo
; /* advertisement timeout */
128 struct callout sc_md_tmo
; /* master down timeout */
129 struct callout sc_md6_tmo
; /* master down timeout */
131 LIST_ENTRY(carp_softc
) sc_next
; /* Interface clue */
133 #define SC2IFP(sc) ((sc)->sc_ifp)
135 int carp_suppress_preempt
= 0;
136 int carp_opts
[CARPCTL_MAXID
] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
137 SYSCTL_INT(_net_inet_carp
, CARPCTL_ALLOW
, allow
, CTLFLAG_RW
,
138 &carp_opts
[CARPCTL_ALLOW
], 0, "Accept incoming CARP packets");
139 SYSCTL_INT(_net_inet_carp
, CARPCTL_PREEMPT
, preempt
, CTLFLAG_RW
,
140 &carp_opts
[CARPCTL_PREEMPT
], 0, "high-priority backup preemption mode");
141 SYSCTL_INT(_net_inet_carp
, CARPCTL_LOG
, log
, CTLFLAG_RW
,
142 &carp_opts
[CARPCTL_LOG
], 0, "log bad carp packets");
143 SYSCTL_INT(_net_inet_carp
, CARPCTL_ARPBALANCE
, arpbalance
, CTLFLAG_RW
,
144 &carp_opts
[CARPCTL_ARPBALANCE
], 0, "balance arp responses");
145 SYSCTL_INT(_net_inet_carp
, OID_AUTO
, suppress_preempt
, CTLFLAG_RD
,
146 &carp_suppress_preempt
, 0, "Preemption is suppressed");
148 struct carpstats carpstats
;
149 SYSCTL_STRUCT(_net_inet_carp
, CARPCTL_STATS
, stats
, CTLFLAG_RW
,
150 &carpstats
, carpstats
,
151 "CARP statistics (struct carpstats, netinet/ip_carp.h)");
154 TAILQ_HEAD(, carp_softc
) vhif_vrs
;
157 struct ifnet
*vhif_ifp
;
158 struct lock vhif_lock
;
161 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
162 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
164 #define CARP_LOCK_INIT(cif) lockinit(&(cif)->vhif_lock, "carp_if", 0, LK_NOWAIT);
165 #define CARP_LOCK_DESTROY(cif) ;
166 #define CARP_LOCK_ASSERT(cif) ;
167 #define CARP_LOCK(cif) lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE);
168 #define CARP_UNLOCK(cif) lockmgr(&(cif)->vhif_lock, LK_RELEASE);
170 #define CARP_SCLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE);
171 #define CARP_SCUNLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE);
172 #define CARP_SCLOCK_ASSERT(sc) ;
174 #define CARP_LOG(...) do { \
175 if (carp_opts[CARPCTL_LOG] > 0) \
176 log(LOG_INFO, __VA_ARGS__); \
179 #define CARP_DEBUG(...) do { \
180 if (carp_opts[CARPCTL_LOG] > 1) \
181 log(LOG_DEBUG, __VA_ARGS__); \
184 static void carp_hmac_prepare(struct carp_softc
*);
185 static void carp_hmac_generate(struct carp_softc
*, u_int32_t
*,
187 static int carp_hmac_verify(struct carp_softc
*, u_int32_t
*,
189 static void carp_setroute(struct carp_softc
*, int);
190 static void carp_input_c(struct mbuf
*, struct carp_header
*, sa_family_t
);
191 static int carp_clone_create(struct if_clone
*, int);
192 static void carp_clone_destroy(struct ifnet
*);
193 static void carpdetach(struct carp_softc
*, int);
194 static int carp_prepare_ad(struct mbuf
*, struct carp_softc
*,
195 struct carp_header
*);
196 static void carp_send_ad_all(void);
197 static void carp_send_ad(void *);
198 static void carp_send_ad_locked(struct carp_softc
*);
199 static void carp_send_arp(struct carp_softc
*);
200 static void carp_master_down(void *);
201 static void carp_master_down_locked(struct carp_softc
*);
202 static int carp_ioctl(struct ifnet
*, u_long
, caddr_t
, struct ucred
*);
203 static int carp_looutput(struct ifnet
*, struct mbuf
*, struct sockaddr
*,
205 static void carp_start(struct ifnet
*);
206 static void carp_setrun(struct carp_softc
*, sa_family_t
);
207 static void carp_set_state(struct carp_softc
*, int);
208 static int carp_addrcount(struct carp_if
*, struct in_ifaddr
*, int);
209 enum { CARP_COUNT_MASTER
, CARP_COUNT_RUNNING
};
211 static void carp_multicast_cleanup(struct carp_softc
*);
212 static int carp_set_addr(struct carp_softc
*, struct sockaddr_in
*);
213 static int carp_del_addr(struct carp_softc
*, struct sockaddr_in
*);
214 static void carp_carpdev_state_locked(struct carp_if
*);
215 static void carp_sc_state_locked(struct carp_softc
*);
217 static void carp_send_na(struct carp_softc
*);
218 static int carp_set_addr6(struct carp_softc
*, struct sockaddr_in6
*);
219 static int carp_del_addr6(struct carp_softc
*, struct sockaddr_in6
*);
220 static void carp_multicast6_cleanup(struct carp_softc
*);
223 static LIST_HEAD(, carp_softc
) carpif_list
;
225 struct if_clone carp_cloner
= IF_CLONE_INITIALIZER(CARP_IFNAME
, carp_clone_create
, carp_clone_destroy
, 0, IF_MAXUNIT
);
227 static eventhandler_tag if_detach_event_tag
;
229 static __inline u_int16_t
230 carp_cksum(struct mbuf
*m
, int len
)
232 return (in_cksum(m
, len
));
236 carp_hmac_prepare(struct carp_softc
*sc
)
238 u_int8_t version
= CARP_VERSION
, type
= CARP_ADVERTISEMENT
;
239 u_int8_t vhid
= sc
->sc_vhid
& 0xff;
249 /* XXX: possible race here */
251 /* compute ipad from key */
252 bzero(sc
->sc_pad
, sizeof(sc
->sc_pad
));
253 bcopy(sc
->sc_key
, sc
->sc_pad
, sizeof(sc
->sc_key
));
254 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
255 sc
->sc_pad
[i
] ^= 0x36;
257 /* precompute first part of inner hash */
258 SHA1Init(&sc
->sc_sha1
);
259 SHA1Update(&sc
->sc_sha1
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
260 SHA1Update(&sc
->sc_sha1
, (void *)&version
, sizeof(version
));
261 SHA1Update(&sc
->sc_sha1
, (void *)&type
, sizeof(type
));
262 SHA1Update(&sc
->sc_sha1
, (void *)&vhid
, sizeof(vhid
));
264 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
265 if (ifa
->ifa_addr
->sa_family
== AF_INET
)
266 SHA1Update(&sc
->sc_sha1
,
267 (void *)&ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
,
268 sizeof(struct in_addr
));
272 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
273 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
274 in6
= ifatoia6(ifa
)->ia_addr
.sin6_addr
;
275 in6_clearscope(&in6
);
276 SHA1Update(&sc
->sc_sha1
, (void *)&in6
, sizeof(in6
));
281 /* convert ipad to opad */
282 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
283 sc
->sc_pad
[i
] ^= 0x36 ^ 0x5c;
290 carp_hmac_generate(struct carp_softc
*sc
, u_int32_t counter
[2],
291 unsigned char md
[20])
295 /* fetch first half of inner hash */
296 bcopy(&sc
->sc_sha1
, &sha1ctx
, sizeof(sha1ctx
));
298 SHA1Update(&sha1ctx
, (void *)counter
, sizeof(sc
->sc_counter
));
299 SHA1Final(md
, &sha1ctx
);
303 SHA1Update(&sha1ctx
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
304 SHA1Update(&sha1ctx
, md
, 20);
305 SHA1Final(md
, &sha1ctx
);
309 carp_hmac_verify(struct carp_softc
*sc
, u_int32_t counter
[2],
310 unsigned char md
[20])
312 unsigned char md2
[20];
314 CARP_SCLOCK_ASSERT(sc
);
316 carp_hmac_generate(sc
, counter
, md2
);
318 return (bcmp(md
, md2
, sizeof(md2
)));
322 carp_setroute(struct carp_softc
*sc
, int cmd
)
327 CARP_SCLOCK_ASSERT(sc
);
330 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
331 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
332 sc
->sc_carpdev
!= NULL
) {
333 int count
= carp_addrcount(
334 (struct carp_if
*)sc
->sc_carpdev
->if_carp
,
335 ifatoia(ifa
), CARP_COUNT_MASTER
);
337 if ((cmd
== RTM_ADD
&& count
== 1) ||
338 (cmd
== RTM_DELETE
&& count
== 0))
339 rtinit(ifa
, cmd
, RTF_UP
| RTF_HOST
);
342 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
355 carp_clone_create(struct if_clone
*ifc
, int unit
)
358 struct carp_softc
*sc
;
361 MALLOC(sc
, struct carp_softc
*, sizeof(*sc
), M_CARP
, M_WAITOK
|M_ZERO
);
362 ifp
= SC2IFP(sc
) = kmalloc(sizeof(struct ifnet
), M_IFNET
, M_WAITOK
|M_ZERO
);
364 sc
->sc_flags_backup
= 0;
366 sc
->sc_advbase
= CARP_DFLTINTV
;
367 sc
->sc_vhid
= -1; /* required setting */
369 sc
->sc_init_counter
= 1;
370 sc
->sc_naddrs
= sc
->sc_naddrs6
= 0; /* M_ZERO? */
373 sc
->sc_im6o
.im6o_multicast_hlim
= CARP_DFLTTL
;
376 /* sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/
378 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
379 sc->sc_imo.imo_multicast_vif = -1;
381 callout_init(&sc
->sc_ad_tmo
);
382 callout_init(&sc
->sc_md_tmo
);
383 callout_init(&sc
->sc_md6_tmo
);
386 if_initname(ifp
, CARP_IFNAME
, unit
);
387 ifp
->if_mtu
= ETHERMTU
;
388 ifp
->if_flags
= IFF_LOOPBACK
;
389 ifp
->if_ioctl
= carp_ioctl
;
390 ifp
->if_output
= carp_looutput
;
391 ifp
->if_start
= carp_start
;
392 ifp
->if_type
= IFT_CARP
;
393 ifp
->if_snd
.ifq_maxlen
= ifqmaxlen
;
395 if_attach(ifp
, NULL
);
396 bpfattach(ifp
, DLT_NULL
, sizeof(u_int
));
399 LIST_INSERT_HEAD(&carpif_list
, sc
, sc_next
);
406 carp_clone_destroy(struct ifnet
*ifp
)
408 struct carp_softc
*sc
= ifp
->if_softc
;
412 carpdetach(sc
, 1); /* Returns unlocked. */
415 LIST_REMOVE(sc
, sc_next
);
419 /* if_free_type(ifp, IFT_ETHER);*/
420 /* kfree(sc->sc_imo.imo_membership, M_CARP); */
425 * This function can be called on CARP interface destroy path,
426 * and in case of the removal of the underlying interface as
427 * well. We differentiate these two cases. In the latter case
428 * we do not cleanup our multicast memberships, since they
429 * are already freed. Also, in the latter case we do not
430 * release the lock on return, because the function will be
431 * called once more, for another CARP instance on the same
435 carpdetach(struct carp_softc
*sc
, int unlock
)
439 callout_stop(&sc
->sc_ad_tmo
);
440 callout_stop(&sc
->sc_md_tmo
);
441 callout_stop(&sc
->sc_md6_tmo
);
444 carp_suppress_preempt
--;
447 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
)
448 carp_suppress_preempt
--;
449 sc
->sc_sendad_errors
= 0;
451 carp_set_state(sc
, INIT
);
452 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
455 carp_multicast_cleanup(sc
);
457 carp_multicast6_cleanup(sc
);
460 if (sc
->sc_carpdev
!= NULL
) {
461 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
462 CARP_LOCK_ASSERT(cif
);
463 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
464 if (!--cif
->vhif_nvrs
) {
465 ifpromisc(sc
->sc_carpdev
, 0);
466 sc
->sc_carpdev
->if_carp
= NULL
;
467 CARP_LOCK_DESTROY(cif
);
471 sc
->sc_carpdev
= NULL
;
475 /* Detach an interface from the carp. */
477 carp_ifdetach(void *arg __unused
, struct ifnet
*ifp
)
479 struct carp_if
*cif
= (struct carp_if
*)ifp
->if_carp
;
480 struct carp_softc
*sc
, *nextsc
;
486 * XXX: At the end of for() cycle the lock will be destroyed.
489 for (sc
= TAILQ_FIRST(&cif
->vhif_vrs
); sc
; sc
= nextsc
) {
490 nextsc
= TAILQ_NEXT(sc
, sc_list
);
497 * process input packet.
498 * we have rearranged checks order compared to the rfc,
499 * but it seems more efficient this way or not possible otherwise.
502 carp_input(struct mbuf
*m
, int hlen
)
504 struct ip
*ip
= mtod(m
, struct ip
*);
505 struct carp_header
*ch
;
508 carpstats
.carps_ipackets
++;
510 if (!carp_opts
[CARPCTL_ALLOW
]) {
515 /* check if received on a valid carp interface */
516 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
517 carpstats
.carps_badif
++;
518 CARP_LOG("carp_input: packet received on non-carp "
520 m
->m_pkthdr
.rcvif
->if_xname
);
525 /* verify that the IP TTL is 255. */
526 if (ip
->ip_ttl
!= CARP_DFLTTL
) {
527 carpstats
.carps_badttl
++;
528 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
530 m
->m_pkthdr
.rcvif
->if_xname
);
535 iplen
= ip
->ip_hl
<< 2;
537 if (m
->m_pkthdr
.len
< iplen
+ sizeof(*ch
)) {
538 carpstats
.carps_badlen
++;
539 CARP_LOG("carp_input: received len %zd < "
540 "sizeof(struct carp_header)\n",
541 m
->m_len
- sizeof(struct ip
));
546 if (iplen
+ sizeof(*ch
) < m
->m_len
) {
547 if ((m
= m_pullup(m
, iplen
+ sizeof(*ch
))) == NULL
) {
548 carpstats
.carps_hdrops
++;
549 CARP_LOG("carp_input: pullup failed\n");
552 ip
= mtod(m
, struct ip
*);
554 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
557 * verify that the received packet length is
558 * equal to the CARP header
560 len
= iplen
+ sizeof(*ch
);
561 if (len
> m
->m_pkthdr
.len
) {
562 carpstats
.carps_badlen
++;
563 CARP_LOG("carp_input: packet too short %d on %s\n",
565 m
->m_pkthdr
.rcvif
->if_xname
);
570 if ((m
= m_pullup(m
, len
)) == NULL
) {
571 carpstats
.carps_hdrops
++;
574 ip
= mtod(m
, struct ip
*);
575 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
577 /* verify the CARP checksum */
579 if (carp_cksum(m
, len
- iplen
)) {
580 carpstats
.carps_badsum
++;
581 CARP_LOG("carp_input: checksum failed on %s\n",
582 m
->m_pkthdr
.rcvif
->if_xname
);
588 carp_input_c(m
, ch
, AF_INET
);
593 carp6_input(struct mbuf
**mp
, int *offp
, int proto
)
595 struct mbuf
*m
= *mp
;
596 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
597 struct carp_header
*ch
;
600 carpstats
.carps_ipackets6
++;
602 if (!carp_opts
[CARPCTL_ALLOW
]) {
604 return (IPPROTO_DONE
);
607 /* check if received on a valid carp interface */
608 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
609 carpstats
.carps_badif
++;
610 CARP_LOG("carp6_input: packet received on non-carp "
612 m
->m_pkthdr
.rcvif
->if_xname
);
614 return (IPPROTO_DONE
);
617 /* verify that the IP TTL is 255 */
618 if (ip6
->ip6_hlim
!= CARP_DFLTTL
) {
619 carpstats
.carps_badttl
++;
620 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
622 m
->m_pkthdr
.rcvif
->if_xname
);
624 return (IPPROTO_DONE
);
627 /* verify that we have a complete carp packet */
629 IP6_EXTHDR_GET(ch
, struct carp_header
*, m
, *offp
, sizeof(*ch
));
631 carpstats
.carps_badlen
++;
632 CARP_LOG("carp6_input: packet size %u too small\n", len
);
633 return (IPPROTO_DONE
);
637 /* verify the CARP checksum */
639 if (carp_cksum(m
, sizeof(*ch
))) {
640 carpstats
.carps_badsum
++;
641 CARP_LOG("carp6_input: checksum failed, on %s\n",
642 m
->m_pkthdr
.rcvif
->if_xname
);
644 return (IPPROTO_DONE
);
648 carp_input_c(m
, ch
, AF_INET6
);
649 return (IPPROTO_DONE
);
654 carp_input_c(struct mbuf
*m
, struct carp_header
*ch
, sa_family_t af
)
656 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
;
657 struct carp_softc
*sc
;
658 u_int64_t tmp_counter
;
659 struct timeval sc_tv
, ch_tv
;
661 /* verify that the VHID is valid on the receiving interface */
662 CARP_LOCK(ifp
->if_carp
);
663 TAILQ_FOREACH(sc
, &((struct carp_if
*)ifp
->if_carp
)->vhif_vrs
, sc_list
)
664 if (sc
->sc_vhid
== ch
->carp_vhid
)
667 if (!sc
|| !((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
668 carpstats
.carps_badvhid
++;
669 CARP_UNLOCK(ifp
->if_carp
);
674 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
675 SC2IFP(sc
)->if_ipackets
++;
676 SC2IFP(sc
)->if_ibytes
+= m
->m_pkthdr
.len
;
678 if (SC2IFP(sc
)->if_bpf
) {
679 struct ip
*ip
= mtod(m
, struct ip
*);
681 /* BPF wants net byte order */
682 ip
->ip_len
= htons(ip
->ip_len
+ (ip
->ip_hl
<< 2));
683 ip
->ip_off
= htons(ip
->ip_off
);
684 bpf_mtap(SC2IFP(sc
)->if_bpf
, m
);
687 /* verify the CARP version. */
688 if (ch
->carp_version
!= CARP_VERSION
) {
689 carpstats
.carps_badver
++;
690 SC2IFP(sc
)->if_ierrors
++;
691 CARP_UNLOCK(ifp
->if_carp
);
692 CARP_LOG("%s; invalid version %d\n",
693 SC2IFP(sc
)->if_xname
,
699 /* verify the hash */
700 if (carp_hmac_verify(sc
, ch
->carp_counter
, ch
->carp_md
)) {
701 carpstats
.carps_badauth
++;
702 SC2IFP(sc
)->if_ierrors
++;
703 CARP_UNLOCK(ifp
->if_carp
);
704 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc
)->if_xname
);
709 tmp_counter
= ntohl(ch
->carp_counter
[0]);
710 tmp_counter
= tmp_counter
<<32;
711 tmp_counter
+= ntohl(ch
->carp_counter
[1]);
713 /* XXX Replay protection goes here */
715 sc
->sc_init_counter
= 0;
716 sc
->sc_counter
= tmp_counter
;
718 sc_tv
.tv_sec
= sc
->sc_advbase
;
719 if (carp_suppress_preempt
&& sc
->sc_advskew
< 240)
720 sc_tv
.tv_usec
= 240 * 1000000 / 256;
722 sc_tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
723 ch_tv
.tv_sec
= ch
->carp_advbase
;
724 ch_tv
.tv_usec
= ch
->carp_advskew
* 1000000 / 256;
726 switch (sc
->sc_state
) {
731 * If we receive an advertisement from a master who's going to
732 * be more frequent than us, go into BACKUP state.
734 if (timevalcmp(&sc_tv
, &ch_tv
, >) ||
735 timevalcmp(&sc_tv
, &ch_tv
, ==)) {
736 callout_stop(&sc
->sc_ad_tmo
);
737 CARP_DEBUG("%s: MASTER -> BACKUP "
738 "(more frequent advertisement received)\n",
739 SC2IFP(sc
)->if_xname
);
740 carp_set_state(sc
, BACKUP
);
742 carp_setroute(sc
, RTM_DELETE
);
747 * If we're pre-empting masters who advertise slower than us,
748 * and this one claims to be slower, treat him as down.
750 if (carp_opts
[CARPCTL_PREEMPT
] &&
751 timevalcmp(&sc_tv
, &ch_tv
, <)) {
752 CARP_DEBUG("%s: BACKUP -> MASTER "
753 "(preempting a slower master)\n",
754 SC2IFP(sc
)->if_xname
);
755 carp_master_down_locked(sc
);
760 * If the master is going to advertise at such a low frequency
761 * that he's guaranteed to time out, we'd might as well just
762 * treat him as timed out now.
764 sc_tv
.tv_sec
= sc
->sc_advbase
* 3;
765 if (timevalcmp(&sc_tv
, &ch_tv
, <)) {
766 CARP_DEBUG("%s: BACKUP -> MASTER "
767 "(master timed out)\n",
768 SC2IFP(sc
)->if_xname
);
769 carp_master_down_locked(sc
);
774 * Otherwise, we reset the counter and wait for the next
781 CARP_UNLOCK(ifp
->if_carp
);
788 carp_prepare_ad(struct mbuf
*m
, struct carp_softc
*sc
, struct carp_header
*ch
)
791 struct ifnet
*ifp
= SC2IFP(sc
);
793 if (sc
->sc_init_counter
) {
794 /* this could also be seconds since unix epoch */
795 sc
->sc_counter
= karc4random();
796 sc
->sc_counter
= sc
->sc_counter
<< 32;
797 sc
->sc_counter
+= karc4random();
801 ch
->carp_counter
[0] = htonl((sc
->sc_counter
>>32)&0xffffffff);
802 ch
->carp_counter
[1] = htonl(sc
->sc_counter
&0xffffffff);
804 carp_hmac_generate(sc
, ch
->carp_counter
, ch
->carp_md
);
806 /* Tag packet for carp_output */
807 mtag
= m_tag_get(PACKET_TAG_CARP
, sizeof(struct ifnet
*), MB_DONTWAIT
);
810 SC2IFP(sc
)->if_oerrors
++;
813 bcopy(&ifp
, (caddr_t
)(mtag
+ 1), sizeof(struct ifnet
*));
814 m_tag_prepend(m
, mtag
);
820 carp_send_ad_all(void)
822 struct carp_softc
*sc
;
824 LIST_FOREACH(sc
, &carpif_list
, sc_next
) {
825 if (sc
->sc_carpdev
== NULL
)
828 if ((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
) &&
829 sc
->sc_state
== MASTER
)
830 carp_send_ad_locked(sc
);
836 carp_send_ad(void *v
)
838 struct carp_softc
*sc
= v
;
841 carp_send_ad_locked(sc
);
846 carp_send_ad_locked(struct carp_softc
*sc
)
848 struct carp_header ch
;
850 struct carp_header
*ch_ptr
;
852 int len
, advbase
, advskew
;
855 /* bow out if we've lost our UPness or RUNNINGuiness */
856 if (!((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
860 advbase
= sc
->sc_advbase
;
861 if (!carp_suppress_preempt
|| sc
->sc_advskew
> 240)
862 advskew
= sc
->sc_advskew
;
866 tv
.tv_usec
= advskew
* 1000000 / 256;
869 ch
.carp_version
= CARP_VERSION
;
870 ch
.carp_type
= CARP_ADVERTISEMENT
;
871 ch
.carp_vhid
= sc
->sc_vhid
;
872 ch
.carp_advbase
= advbase
;
873 ch
.carp_advskew
= advskew
;
874 ch
.carp_authlen
= 7; /* XXX DEFINE */
875 ch
.carp_pad1
= 0; /* must be zero */
882 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
884 SC2IFP(sc
)->if_oerrors
++;
885 carpstats
.carps_onomem
++;
886 /* XXX maybe less ? */
887 if (advbase
!= 255 || advskew
!= 255)
888 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
892 len
= sizeof(*ip
) + sizeof(ch
);
893 m
->m_pkthdr
.len
= len
;
894 m
->m_pkthdr
.rcvif
= NULL
;
896 MH_ALIGN(m
, m
->m_len
);
897 m
->m_flags
|= M_MCAST
;
898 ip
= mtod(m
, struct ip
*);
899 ip
->ip_v
= IPVERSION
;
900 ip
->ip_hl
= sizeof(*ip
) >> 2;
901 ip
->ip_tos
= IPTOS_LOWDELAY
;
903 ip
->ip_id
= ip_newid();
905 ip
->ip_ttl
= CARP_DFLTTL
;
906 ip
->ip_p
= IPPROTO_CARP
;
908 ip
->ip_src
.s_addr
= sc
->sc_ia
->ia_addr
.sin_addr
.s_addr
;
909 ip
->ip_dst
.s_addr
= htonl(INADDR_CARP_GROUP
);
911 ch_ptr
= (struct carp_header
*)(&ip
[1]);
912 bcopy(&ch
, ch_ptr
, sizeof(ch
));
913 if (carp_prepare_ad(m
, sc
, ch_ptr
))
916 m
->m_data
+= sizeof(*ip
);
917 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip
));
918 m
->m_data
-= sizeof(*ip
);
920 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
921 SC2IFP(sc
)->if_opackets
++;
922 SC2IFP(sc
)->if_obytes
+= len
;
923 carpstats
.carps_opackets
++;
925 if (ip_output(m
, NULL
, NULL
, IP_RAWOUTPUT
, &sc
->sc_imo
, NULL
)) {
926 SC2IFP(sc
)->if_oerrors
++;
927 if (sc
->sc_sendad_errors
< INT_MAX
)
928 sc
->sc_sendad_errors
++;
929 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
930 carp_suppress_preempt
++;
931 if (carp_suppress_preempt
== 1) {
937 sc
->sc_sendad_success
= 0;
939 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
940 if (++sc
->sc_sendad_success
>=
941 CARP_SENDAD_MIN_SUCCESS
) {
942 carp_suppress_preempt
--;
943 sc
->sc_sendad_errors
= 0;
946 sc
->sc_sendad_errors
= 0;
954 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
956 SC2IFP(sc
)->if_oerrors
++;
957 carpstats
.carps_onomem
++;
958 /* XXX maybe less ? */
959 if (advbase
!= 255 || advskew
!= 255)
960 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
964 len
= sizeof(*ip6
) + sizeof(ch
);
965 m
->m_pkthdr
.len
= len
;
966 m
->m_pkthdr
.rcvif
= NULL
;
968 MH_ALIGN(m
, m
->m_len
);
969 m
->m_flags
|= M_MCAST
;
970 ip6
= mtod(m
, struct ip6_hdr
*);
971 bzero(ip6
, sizeof(*ip6
));
972 ip6
->ip6_vfc
|= IPV6_VERSION
;
973 ip6
->ip6_hlim
= CARP_DFLTTL
;
974 ip6
->ip6_nxt
= IPPROTO_CARP
;
975 bcopy(&sc
->sc_ia6
->ia_addr
.sin6_addr
, &ip6
->ip6_src
,
976 sizeof(struct in6_addr
));
977 /* set the multicast destination */
979 ip6
->ip6_dst
.s6_addr16
[0] = htons(0xff02);
980 ip6
->ip6_dst
.s6_addr8
[15] = 0x12;
981 if (in6_setscope(&ip6
->ip6_dst
, sc
->sc_carpdev
, NULL
) != 0) {
982 SC2IFP(sc
)->if_oerrors
++;
984 CARP_LOG("%s: in6_setscope failed\n", __func__
);
988 ch_ptr
= (struct carp_header
*)(&ip6
[1]);
989 bcopy(&ch
, ch_ptr
, sizeof(ch
));
990 if (carp_prepare_ad(m
, sc
, ch_ptr
))
993 m
->m_data
+= sizeof(*ip6
);
994 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip6
));
995 m
->m_data
-= sizeof(*ip6
);
997 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
998 SC2IFP(sc
)->if_opackets
++;
999 SC2IFP(sc
)->if_obytes
+= len
;
1000 carpstats
.carps_opackets6
++;
1002 if (ip6_output(m
, NULL
, NULL
, 0, &sc
->sc_im6o
, NULL
, NULL
)) {
1003 SC2IFP(sc
)->if_oerrors
++;
1004 if (sc
->sc_sendad_errors
< INT_MAX
)
1005 sc
->sc_sendad_errors
++;
1006 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
1007 carp_suppress_preempt
++;
1008 if (carp_suppress_preempt
== 1) {
1014 sc
->sc_sendad_success
= 0;
1016 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
1017 if (++sc
->sc_sendad_success
>=
1018 CARP_SENDAD_MIN_SUCCESS
) {
1019 carp_suppress_preempt
--;
1020 sc
->sc_sendad_errors
= 0;
1023 sc
->sc_sendad_errors
= 0;
1028 if (advbase
!= 255 || advskew
!= 255)
1029 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1035 * Broadcast a gratuitous ARP request containing
1036 * the virtual router MAC address for each IP address
1037 * associated with the virtual router.
1040 carp_send_arp(struct carp_softc
*sc
)
1044 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
1046 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
1048 lwkt_serialize_enter(sc
->sc_carpdev
->if_serializer
);
1049 arp_ifinit2(sc
->sc_carpdev
, ifa
, IF_LLADDR(sc
->sc_ifp
));
1050 lwkt_serialize_exit(sc
->sc_carpdev
->if_serializer
);
1052 DELAY(1000); /* XXX */
1058 carp_send_na(struct carp_softc
*sc
)
1061 struct in6_addr
*in6
;
1062 static struct in6_addr mcast
= IN6ADDR_LINKLOCAL_ALLNODES_INIT
;
1064 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
1066 if (ifa
->ifa_addr
->sa_family
!= AF_INET6
)
1069 in6
= &ifatoia6(ifa
)->ia_addr
.sin6_addr
;
1070 nd6_na_output(sc
->sc_carpdev
, &mcast
, in6
,
1071 ND_NA_FLAG_OVERRIDE
, 1, NULL
);
1072 DELAY(1000); /* XXX */
1078 carp_addrcount(struct carp_if
*cif
, struct in_ifaddr
*ia
, int type
)
1080 struct carp_softc
*vh
;
1084 CARP_LOCK_ASSERT(cif
);
1086 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1087 if ((type
== CARP_COUNT_RUNNING
&&
1088 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) ||
1089 (type
== CARP_COUNT_MASTER
&& vh
->sc_state
== MASTER
)) {
1090 TAILQ_FOREACH(ifa
, &SC2IFP(vh
)->if_addrlist
,
1092 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
1093 ia
->ia_addr
.sin_addr
.s_addr
==
1094 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
)
1103 carp_iamatch(void *v
, struct in_ifaddr
*ia
,
1104 struct in_addr
*isaddr
, u_int8_t
**enaddr
)
1106 struct carp_if
*cif
= v
;
1107 struct carp_softc
*vh
;
1108 int index
, count
= 0;
1113 if (carp_opts
[CARPCTL_ARPBALANCE
]) {
1115 * XXX proof of concept implementation.
1116 * We use the source ip to decide which virtual host should
1117 * handle the request. If we're master of that virtual host,
1118 * then we respond, otherwise, just drop the arp packet on
1121 count
= carp_addrcount(cif
, ia
, CARP_COUNT_RUNNING
);
1123 /* should never reach this */
1128 /* this should be a hash, like pf_hash() */
1129 index
= ntohl(isaddr
->s_addr
) % count
;
1132 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1133 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) {
1134 TAILQ_FOREACH(ifa
, &SC2IFP(vh
)->if_addrlist
,
1136 if (ifa
->ifa_addr
->sa_family
==
1138 ia
->ia_addr
.sin_addr
.s_addr
==
1139 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
) {
1140 if (count
== index
) {
1143 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1157 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1158 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1159 vh
->sc_state
== MASTER
) {
1160 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1172 carp_iamatch6(void *v
, struct in6_addr
*taddr
)
1174 struct carp_if
*cif
= v
;
1175 struct carp_softc
*vh
;
1179 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1180 TAILQ_FOREACH(ifa
, &SC2IFP(vh
)->if_addrlist
, ifa_list
) {
1181 if (IN6_ARE_ADDR_EQUAL(taddr
,
1182 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1183 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1184 vh
->sc_state
== MASTER
) {
1196 carp_macmatch6(void *v
, struct mbuf
*m
, const struct in6_addr
*taddr
)
1199 struct carp_if
*cif
= v
;
1200 struct carp_softc
*sc
;
1204 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
) {
1205 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
1206 if (IN6_ARE_ADDR_EQUAL(taddr
,
1207 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1208 (SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
)) {
1209 struct ifnet
*ifp
= SC2IFP(sc
);
1210 mtag
= m_tag_get(PACKET_TAG_CARP
,
1211 sizeof(struct ifnet
*), MB_DONTWAIT
);
1213 /* better a bit than nothing */
1215 return (IF_LLADDR(sc
->sc_ifp
));
1217 bcopy(&ifp
, (caddr_t
)(mtag
+ 1),
1218 sizeof(struct ifnet
*));
1219 m_tag_prepend(m
, mtag
);
1222 return (IF_LLADDR(sc
->sc_ifp
));
1233 carp_forus(void *v
, void *dhost
)
1235 struct carp_if
*cif
= v
;
1236 struct carp_softc
*vh
;
1237 u_int8_t
*ena
= dhost
;
1240 * XXX: See here for check on MAC adr is not for virtual use
1244 if (ena
[0] || ena
[1] || ena
[2] != 0x5e || ena
[3] || ena
[4] != 1)
1250 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
)
1251 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1252 vh
->sc_state
== MASTER
&&
1253 !bcmp(dhost
, IF_LLADDR(vh
->sc_ifp
), ETHER_ADDR_LEN
)) {
1255 return (SC2IFP(vh
));
1263 carp_master_down(void *v
)
1265 struct carp_softc
*sc
= v
;
1267 lwkt_serialize_enter(sc
->sc_ifp
->if_serializer
);
1268 carp_master_down_locked(sc
);
1269 lwkt_serialize_exit(sc
->sc_ifp
->if_serializer
);
1273 carp_master_down_locked(struct carp_softc
*sc
)
1276 CARP_SCLOCK_ASSERT(sc
);
1278 switch (sc
->sc_state
) {
1280 kprintf("%s: master_down event in INIT state\n",
1281 SC2IFP(sc
)->if_xname
);
1286 carp_set_state(sc
, MASTER
);
1287 carp_send_ad_locked(sc
);
1293 carp_setroute(sc
, RTM_ADD
);
1299 * When in backup state, af indicates whether to reset the master down timer
1300 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1303 carp_setrun(struct carp_softc
*sc
, sa_family_t af
)
1307 if (sc
->sc_carpdev
== NULL
) {
1308 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1309 carp_set_state(sc
, INIT
);
1313 if (SC2IFP(sc
)->if_flags
& IFF_UP
&&
1314 sc
->sc_vhid
> 0 && (sc
->sc_naddrs
|| sc
->sc_naddrs6
))
1315 SC2IFP(sc
)->if_flags
|= IFF_RUNNING
;
1317 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1318 carp_setroute(sc
, RTM_DELETE
);
1322 switch (sc
->sc_state
) {
1324 if (carp_opts
[CARPCTL_PREEMPT
] && !carp_suppress_preempt
) {
1325 carp_send_ad_locked(sc
);
1330 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1331 SC2IFP(sc
)->if_xname
);
1332 carp_set_state(sc
, MASTER
);
1333 carp_setroute(sc
, RTM_ADD
);
1335 CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc
)->if_xname
);
1336 carp_set_state(sc
, BACKUP
);
1337 carp_setroute(sc
, RTM_DELETE
);
1342 callout_stop(&sc
->sc_ad_tmo
);
1343 tv
.tv_sec
= 3 * sc
->sc_advbase
;
1344 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1348 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1349 carp_master_down
, sc
);
1354 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1355 carp_master_down
, sc
);
1360 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1361 carp_master_down
, sc
);
1363 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1364 carp_master_down
, sc
);
1369 tv
.tv_sec
= sc
->sc_advbase
;
1370 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1371 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1378 carp_multicast_cleanup(struct carp_softc
*sc
)
1380 struct ip_moptions
*imo
= &sc
->sc_imo
;
1381 u_int16_t n
= imo
->imo_num_memberships
;
1383 /* Clean up our own multicast memberships */
1385 if (imo
->imo_membership
[n
] != NULL
) {
1386 in_delmulti(imo
->imo_membership
[n
]);
1387 imo
->imo_membership
[n
] = NULL
;
1390 imo
->imo_num_memberships
= 0;
1391 imo
->imo_multicast_ifp
= NULL
;
1396 carp_multicast6_cleanup(struct carp_softc
*sc
)
1398 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1400 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1401 struct in6_multi_mship
*imm
=
1402 LIST_FIRST(&im6o
->im6o_memberships
);
1404 LIST_REMOVE(imm
, i6mm_chain
);
1405 in6_leavegroup(imm
);
1407 im6o
->im6o_multicast_ifp
= NULL
;
1412 carp_set_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1415 struct carp_if
*cif
;
1416 struct in_ifaddr
*ia
, *ia_if
;
1417 struct ip_moptions
*imo
= &sc
->sc_imo
;
1418 struct in_addr addr
;
1419 u_long iaddr
= htonl(sin
->sin_addr
.s_addr
);
1422 if (sin
->sin_addr
.s_addr
== 0)
1424 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1426 carp_set_state(sc
, INIT
);
1430 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1435 /* we have to do it by hands to check we won't match on us */
1436 ia_if
= NULL
; own
= 0;
1437 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
1438 /* and, yeah, we need a multicast-capable iface too */
1439 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1440 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1441 (iaddr
& ia
->ia_subnetmask
) == ia
->ia_subnet
) {
1444 if (sin
->sin_addr
.s_addr
==
1445 ia
->ia_addr
.sin_addr
.s_addr
)
1452 return (EADDRNOTAVAIL
);
1457 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1458 (imo
->imo_multicast_ifp
&& imo
->imo_multicast_ifp
!= ifp
))
1459 return (EADDRNOTAVAIL
);
1461 if (imo
->imo_num_memberships
== 0) {
1462 addr
.s_addr
= htonl(INADDR_CARP_GROUP
);
1463 if ((imo
->imo_membership
[0] = in_addmulti(&addr
, ifp
)) == NULL
)
1465 imo
->imo_num_memberships
++;
1466 imo
->imo_multicast_ifp
= ifp
;
1467 imo
->imo_multicast_ttl
= CARP_DFLTTL
;
1468 imo
->imo_multicast_loop
= 0;
1471 if (!ifp
->if_carp
) {
1473 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1475 if ((error
= ifpromisc(ifp
, 1))) {
1480 CARP_LOCK_INIT(cif
);
1482 cif
->vhif_ifp
= ifp
;
1483 TAILQ_INIT(&cif
->vhif_vrs
);
1487 struct carp_softc
*vr
;
1489 cif
= (struct carp_if
*)ifp
->if_carp
;
1491 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1492 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1499 sc
->sc_carpdev
= ifp
;
1501 { /* XXX prevent endless loop if already in queue */
1502 struct carp_softc
*vr
, *after
= NULL
;
1504 cif
= (struct carp_if
*)ifp
->if_carp
;
1506 /* XXX: cif should not change, right? So we still hold the lock */
1507 CARP_LOCK_ASSERT(cif
);
1509 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1512 if (vr
->sc_vhid
< sc
->sc_vhid
)
1517 /* We're trying to keep things in order */
1518 if (after
== NULL
) {
1519 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1521 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1528 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1533 carp_sc_state_locked(sc
);
1541 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1547 carp_del_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1551 if (!--sc
->sc_naddrs
) {
1552 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1553 struct ip_moptions
*imo
= &sc
->sc_imo
;
1556 callout_stop(&sc
->sc_ad_tmo
);
1557 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1558 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1560 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1561 imo
->imo_multicast_ifp
= NULL
;
1562 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1563 if (!--cif
->vhif_nvrs
) {
1564 sc
->sc_carpdev
->if_carp
= NULL
;
1565 CARP_LOCK_DESTROY(cif
);
1566 FREE(cif
, M_IFADDR
);
1577 carp_set_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1580 struct carp_if
*cif
;
1581 struct in6_ifaddr
*ia
, *ia_if
;
1582 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1583 struct in6_multi_mship
*imm
;
1584 struct in6_addr in6
;
1587 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
1588 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1589 carp_set_state(sc
, INIT
);
1591 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1596 /* we have to do it by hands to check we won't match on us */
1597 ia_if
= NULL
; own
= 0;
1598 for (ia
= in6_ifaddr
; ia
; ia
= ia
->ia_next
) {
1601 for (i
= 0; i
< 4; i
++) {
1602 if ((sin6
->sin6_addr
.s6_addr32
[i
] &
1603 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]) !=
1604 (ia
->ia_addr
.sin6_addr
.s6_addr32
[i
] &
1605 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]))
1608 /* and, yeah, we need a multicast-capable iface too */
1609 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1610 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1614 if (IN6_ARE_ADDR_EQUAL(&sin6
->sin6_addr
,
1615 &ia
->ia_addr
.sin6_addr
))
1621 return (EADDRNOTAVAIL
);
1625 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1626 (im6o
->im6o_multicast_ifp
&& im6o
->im6o_multicast_ifp
!= ifp
))
1627 return (EADDRNOTAVAIL
);
1629 if (!sc
->sc_naddrs6
) {
1630 im6o
->im6o_multicast_ifp
= ifp
;
1632 /* join CARP multicast address */
1633 bzero(&in6
, sizeof(in6
));
1634 in6
.s6_addr16
[0] = htons(0xff02);
1635 in6
.s6_addr8
[15] = 0x12;
1636 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1638 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1640 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1642 /* join solicited multicast address */
1643 bzero(&in6
, sizeof(in6
));
1644 in6
.s6_addr16
[0] = htons(0xff02);
1645 in6
.s6_addr32
[1] = 0;
1646 in6
.s6_addr32
[2] = htonl(1);
1647 in6
.s6_addr32
[3] = sin6
->sin6_addr
.s6_addr32
[3];
1648 in6
.s6_addr8
[12] = 0xff;
1649 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1651 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1653 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1656 if (!ifp
->if_carp
) {
1657 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1659 if ((error
= ifpromisc(ifp
, 1))) {
1664 CARP_LOCK_INIT(cif
);
1666 cif
->vhif_ifp
= ifp
;
1667 TAILQ_INIT(&cif
->vhif_vrs
);
1671 struct carp_softc
*vr
;
1673 cif
= (struct carp_if
*)ifp
->if_carp
;
1675 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1676 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1683 sc
->sc_carpdev
= ifp
;
1685 { /* XXX prevent endless loop if already in queue */
1686 struct carp_softc
*vr
, *after
= NULL
;
1688 cif
= (struct carp_if
*)ifp
->if_carp
;
1689 CARP_LOCK_ASSERT(cif
);
1691 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1694 if (vr
->sc_vhid
< sc
->sc_vhid
)
1699 /* We're trying to keep things in order */
1700 if (after
== NULL
) {
1701 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1703 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1710 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1713 carp_sc_state_locked(sc
);
1721 /* clean up multicast memberships */
1722 if (!sc
->sc_naddrs6
) {
1723 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1724 imm
= LIST_FIRST(&im6o
->im6o_memberships
);
1725 LIST_REMOVE(imm
, i6mm_chain
);
1726 in6_leavegroup(imm
);
1733 carp_del_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1737 if (!--sc
->sc_naddrs6
) {
1738 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1739 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1742 callout_stop(&sc
->sc_ad_tmo
);
1743 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1744 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1746 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1747 struct in6_multi_mship
*imm
=
1748 LIST_FIRST(&im6o
->im6o_memberships
);
1750 LIST_REMOVE(imm
, i6mm_chain
);
1751 in6_leavegroup(imm
);
1753 im6o
->im6o_multicast_ifp
= NULL
;
1754 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1755 if (!--cif
->vhif_nvrs
) {
1756 CARP_LOCK_DESTROY(cif
);
1757 sc
->sc_carpdev
->if_carp
= NULL
;
1758 FREE(cif
, M_IFADDR
);
1768 carp_ioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t addr
, struct ucred
*creds
)
1770 struct carp_softc
*sc
= ifp
->if_softc
, *vr
;
1771 struct carpreq carpr
;
1774 struct ifaliasreq
*ifra
;
1775 int locked
= 0, error
= 0;
1777 ifa
= (struct ifaddr
*)addr
;
1778 ifra
= (struct ifaliasreq
*)addr
;
1779 ifr
= (struct ifreq
*)addr
;
1784 switch (ifa
->ifa_addr
->sa_family
) {
1787 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1788 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1789 sizeof(struct sockaddr
));
1790 error
= carp_set_addr(sc
, satosin(ifa
->ifa_addr
));
1795 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1796 error
= carp_set_addr6(sc
, satosin6(ifa
->ifa_addr
));
1800 error
= EAFNOSUPPORT
;
1806 switch (ifa
->ifa_addr
->sa_family
) {
1809 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1810 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1811 sizeof(struct sockaddr
));
1812 error
= carp_set_addr(sc
, satosin(&ifra
->ifra_addr
));
1817 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1818 error
= carp_set_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1822 error
= EAFNOSUPPORT
;
1828 switch (ifa
->ifa_addr
->sa_family
) {
1831 error
= carp_del_addr(sc
, satosin(&ifra
->ifra_addr
));
1836 error
= carp_del_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1840 error
= EAFNOSUPPORT
;
1846 if (sc
->sc_carpdev
) {
1850 if (sc
->sc_state
!= INIT
&& !(ifr
->ifr_flags
& IFF_UP
)) {
1851 callout_stop(&sc
->sc_ad_tmo
);
1852 callout_stop(&sc
->sc_md_tmo
);
1853 callout_stop(&sc
->sc_md6_tmo
);
1854 if (sc
->sc_state
== MASTER
)
1855 carp_send_ad_locked(sc
);
1856 carp_set_state(sc
, INIT
);
1858 } else if (sc
->sc_state
== INIT
&& (ifr
->ifr_flags
& IFF_UP
)) {
1859 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1865 error
= suser(curthread
);
1868 if ((error
= copyin(ifr
->ifr_data
, &carpr
, sizeof carpr
)))
1871 if (sc
->sc_carpdev
) {
1875 if (sc
->sc_state
!= INIT
&& carpr
.carpr_state
!= sc
->sc_state
) {
1876 switch (carpr
.carpr_state
) {
1878 callout_stop(&sc
->sc_ad_tmo
);
1879 carp_set_state(sc
, BACKUP
);
1881 carp_setroute(sc
, RTM_DELETE
);
1884 carp_master_down_locked(sc
);
1890 if (carpr
.carpr_vhid
> 0) {
1891 if (carpr
.carpr_vhid
> 255) {
1895 if (sc
->sc_carpdev
) {
1896 struct carp_if
*cif
;
1897 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1898 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1900 vr
->sc_vhid
== carpr
.carpr_vhid
)
1903 sc
->sc_vhid
= carpr
.carpr_vhid
;
1904 IF_LLADDR(sc
->sc_ifp
)[0] = 0;
1905 IF_LLADDR(sc
->sc_ifp
)[1] = 0;
1906 IF_LLADDR(sc
->sc_ifp
)[2] = 0x5e;
1907 IF_LLADDR(sc
->sc_ifp
)[3] = 0;
1908 IF_LLADDR(sc
->sc_ifp
)[4] = 1;
1909 IF_LLADDR(sc
->sc_ifp
)[5] = sc
->sc_vhid
;
1912 if (carpr
.carpr_advbase
> 0 || carpr
.carpr_advskew
> 0) {
1913 if (carpr
.carpr_advskew
>= 255) {
1917 if (carpr
.carpr_advbase
> 255) {
1921 sc
->sc_advbase
= carpr
.carpr_advbase
;
1922 sc
->sc_advskew
= carpr
.carpr_advskew
;
1925 bcopy(carpr
.carpr_key
, sc
->sc_key
, sizeof(sc
->sc_key
));
1935 /* XXX: lockless read */
1936 bzero(&carpr
, sizeof(carpr
));
1937 carpr
.carpr_state
= sc
->sc_state
;
1938 carpr
.carpr_vhid
= sc
->sc_vhid
;
1939 carpr
.carpr_advbase
= sc
->sc_advbase
;
1940 carpr
.carpr_advskew
= sc
->sc_advskew
;
1941 error
= suser(curthread
);
1943 bcopy(sc
->sc_key
, carpr
.carpr_key
,
1944 sizeof(carpr
.carpr_key
));
1945 error
= copyout(&carpr
, ifr
->ifr_data
, sizeof(carpr
));
1955 carp_hmac_prepare(sc
);
1961 * XXX: this is looutput. We should eventually use it from there.
1964 carp_looutput(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*dst
,
1969 M_ASSERTPKTHDR(m
); /* check if we have the packet header */
1971 if (rt
&& rt
->rt_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
1973 return (rt
->rt_flags
& RTF_BLACKHOLE
? 0 :
1974 rt
->rt_flags
& RTF_HOST
? EHOSTUNREACH
: ENETUNREACH
);
1978 ifp
->if_obytes
+= m
->m_pkthdr
.len
;
1980 /* BPF writes need to be handled specially. */
1981 if (dst
->sa_family
== AF_UNSPEC
) {
1982 bcopy(dst
->sa_data
, &af
, sizeof(af
));
1983 dst
->sa_family
= af
;
1987 switch (dst
->sa_family
) {
1995 return (EAFNOSUPPORT
);
1998 return(if_simloop(ifp
, m
, dst
->sa_family
, 0));
2002 * Start output on carp interface. This function should never be called.
2005 carp_start(struct ifnet
*ifp
)
2008 kprintf("%s: start called\n", ifp
->if_xname
);
2013 carp_output(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*sa
,
2017 struct carp_softc
*sc
;
2018 struct ifnet
*carp_ifp
;
2023 switch (sa
->sa_family
) {
2036 mtag
= m_tag_find(m
, PACKET_TAG_CARP
, NULL
);
2040 bcopy(mtag
+ 1, &carp_ifp
, sizeof(struct ifnet
*));
2041 sc
= carp_ifp
->if_softc
;
2043 /* Set the source MAC address to Virtual Router MAC Address */
2044 switch (ifp
->if_type
) {
2047 struct ether_header
*eh
;
2049 eh
= mtod(m
, struct ether_header
*);
2050 eh
->ether_shost
[0] = 0;
2051 eh
->ether_shost
[1] = 0;
2052 eh
->ether_shost
[2] = 0x5e;
2053 eh
->ether_shost
[3] = 0;
2054 eh
->ether_shost
[4] = 1;
2055 eh
->ether_shost
[5] = sc
->sc_vhid
;
2059 kprintf("%s: carp is not supported for this interface type\n",
2061 return (EOPNOTSUPP
);
2069 carp_set_state(struct carp_softc
*sc
, int state
)
2073 CARP_SCLOCK_ASSERT(sc
);
2075 if (sc
->sc_state
== state
)
2078 sc
->sc_state
= state
;
2081 SC2IFP(sc
)->if_link_state
= LINK_STATE_DOWN
;
2084 SC2IFP(sc
)->if_link_state
= LINK_STATE_UP
;
2087 SC2IFP(sc
)->if_link_state
= LINK_STATE_UNKNOWN
;
2090 rt_ifmsg(SC2IFP(sc
));
2094 carp_carpdev_state(void *v
)
2096 struct carp_if
*cif
= v
;
2099 carp_carpdev_state_locked(cif
);
2104 carp_carpdev_state_locked(struct carp_if
*cif
)
2106 struct carp_softc
*sc
;
2108 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
)
2109 carp_sc_state_locked(sc
);
2113 carp_sc_state_locked(struct carp_softc
*sc
)
2115 CARP_SCLOCK_ASSERT(sc
);
2117 if ( !(sc
->sc_carpdev
->if_flags
& IFF_UP
)) {
2118 sc
->sc_flags_backup
= SC2IFP(sc
)->if_flags
;
2119 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
2120 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
2121 callout_stop(&sc
->sc_ad_tmo
);
2122 callout_stop(&sc
->sc_md_tmo
);
2123 callout_stop(&sc
->sc_md6_tmo
);
2124 carp_set_state(sc
, INIT
);
2126 if (!sc
->sc_suppress
) {
2127 carp_suppress_preempt
++;
2128 if (carp_suppress_preempt
== 1) {
2134 sc
->sc_suppress
= 1;
2136 SC2IFP(sc
)->if_flags
|= sc
->sc_flags_backup
;
2137 carp_set_state(sc
, INIT
);
2139 if (sc
->sc_suppress
)
2140 carp_suppress_preempt
--;
2141 sc
->sc_suppress
= 0;
2148 carp_modevent(module_t mod
, int type
, void *data
)
2152 if_detach_event_tag
= EVENTHANDLER_REGISTER(ifnet_departure_event
,
2153 carp_ifdetach
, NULL
, EVENTHANDLER_PRI_ANY
);
2154 if (if_detach_event_tag
== NULL
)
2157 LIST_INIT(&carpif_list
);
2158 if_clone_attach(&carp_cloner
);
2162 EVENTHANDLER_DEREGISTER(ifnet_departure_event
, if_detach_event_tag
);
2163 if_clone_detach(&carp_cloner
);
2173 static moduledata_t carp_mod
= {
2179 DECLARE_MODULE(carp
, carp_mod
, SI_SUB_PSEUDO
, SI_ORDER_ANY
);