2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3 * Copyright (c) 2003 Ryan McBride. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.3 2007/08/27 16:15:42 hasso Exp $
32 /*#include "opt_bpf.h"*/
34 #include "opt_inet6.h"
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <machine/limits.h>
42 #include <sys/malloc.h>
44 #include <sys/module.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 #include <sys/filio.h>
51 #include <sys/sockio.h>
52 #include <sys/in_cksum.h>
53 #include <sys/socket.h>
54 #include <sys/vnode.h>
56 #include <machine/stdarg.h>
59 #include <net/ethernet.h>
61 #include <net/if_dl.h>
62 #include <net/if_types.h>
63 #include <net/route.h>
66 #include <netinet/in.h>
67 #include <netinet/in_var.h>
68 #include <netinet/in_systm.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/if_ether.h>
75 #include <netinet/icmp6.h>
76 #include <netinet/ip6.h>
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79 #include <netinet6/nd6.h>
82 #include <crypto/sha1.h>
83 #include <netinet/ip_carp.h>
86 #define CARP_IFNAME "carp"
87 static MALLOC_DEFINE(M_CARP
, "CARP", "CARP interfaces");
88 static MALLOC_DEFINE(M_IFNET
, "IFNET", "IFNET CARP?");
89 SYSCTL_DECL(_net_inet_carp
);
92 struct ifnet
*sc_ifp
; /* Interface clue */
93 struct ifnet
*sc_carpdev
; /* Pointer to parent interface */
94 struct in_ifaddr
*sc_ia
; /* primary iface address */
95 struct ip_moptions sc_imo
;
97 struct in6_ifaddr
*sc_ia6
; /* primary iface address v6 */
98 struct ip6_moptions sc_im6o
;
100 TAILQ_ENTRY(carp_softc
) sc_list
;
102 enum { INIT
= 0, BACKUP
, MASTER
} sc_state
;
107 int sc_sendad_errors
;
108 #define CARP_SENDAD_MAX_ERRORS 3
109 int sc_sendad_success
;
110 #define CARP_SENDAD_MIN_SUCCESS 3
116 int sc_advbase
; /* seconds */
118 u_int64_t sc_counter
;
121 #define CARP_HMAC_PAD 64
122 unsigned char sc_key
[CARP_KEY_LEN
];
123 unsigned char sc_pad
[CARP_HMAC_PAD
];
126 struct callout sc_ad_tmo
; /* advertisement timeout */
127 struct callout sc_md_tmo
; /* master down timeout */
128 struct callout sc_md6_tmo
; /* master down timeout */
130 LIST_ENTRY(carp_softc
) sc_next
; /* Interface clue */
132 #define SC2IFP(sc) ((sc)->sc_ifp)
134 int carp_suppress_preempt
= 0;
135 int carp_opts
[CARPCTL_MAXID
] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
136 SYSCTL_INT(_net_inet_carp
, CARPCTL_ALLOW
, allow
, CTLFLAG_RW
,
137 &carp_opts
[CARPCTL_ALLOW
], 0, "Accept incoming CARP packets");
138 SYSCTL_INT(_net_inet_carp
, CARPCTL_PREEMPT
, preempt
, CTLFLAG_RW
,
139 &carp_opts
[CARPCTL_PREEMPT
], 0, "high-priority backup preemption mode");
140 SYSCTL_INT(_net_inet_carp
, CARPCTL_LOG
, log
, CTLFLAG_RW
,
141 &carp_opts
[CARPCTL_LOG
], 0, "log bad carp packets");
142 SYSCTL_INT(_net_inet_carp
, CARPCTL_ARPBALANCE
, arpbalance
, CTLFLAG_RW
,
143 &carp_opts
[CARPCTL_ARPBALANCE
], 0, "balance arp responses");
144 SYSCTL_INT(_net_inet_carp
, OID_AUTO
, suppress_preempt
, CTLFLAG_RD
,
145 &carp_suppress_preempt
, 0, "Preemption is suppressed");
147 struct carpstats carpstats
;
148 SYSCTL_STRUCT(_net_inet_carp
, CARPCTL_STATS
, stats
, CTLFLAG_RW
,
149 &carpstats
, carpstats
,
150 "CARP statistics (struct carpstats, netinet/ip_carp.h)");
153 TAILQ_HEAD(, carp_softc
) vhif_vrs
;
156 struct ifnet
*vhif_ifp
;
157 struct lock vhif_lock
;
160 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
161 #define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
163 #define CARP_LOCK_INIT(cif) lockinit(&(cif)->vhif_lock, "carp_if", 0, LK_NOWAIT);
164 #define CARP_LOCK_DESTROY(cif) ;
165 #define CARP_LOCK_ASSERT(cif) ;
166 #define CARP_LOCK(cif) lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE);
167 #define CARP_UNLOCK(cif) lockmgr(&(cif)->vhif_lock, LK_RELEASE);
169 #define CARP_SCLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE);
170 #define CARP_SCUNLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE);
171 #define CARP_SCLOCK_ASSERT(sc) ;
173 #define CARP_LOG(...) do { \
174 if (carp_opts[CARPCTL_LOG] > 0) \
175 log(LOG_INFO, __VA_ARGS__); \
178 #define CARP_DEBUG(...) do { \
179 if (carp_opts[CARPCTL_LOG] > 1) \
180 log(LOG_DEBUG, __VA_ARGS__); \
183 static void carp_hmac_prepare(struct carp_softc
*);
184 static void carp_hmac_generate(struct carp_softc
*, u_int32_t
*,
186 static int carp_hmac_verify(struct carp_softc
*, u_int32_t
*,
188 static void carp_setroute(struct carp_softc
*, int);
189 static void carp_input_c(struct mbuf
*, struct carp_header
*, sa_family_t
);
190 static int carp_clone_create(struct if_clone
*, int);
191 static void carp_clone_destroy(struct ifnet
*);
192 static void carpdetach(struct carp_softc
*, int);
193 static int carp_prepare_ad(struct mbuf
*, struct carp_softc
*,
194 struct carp_header
*);
195 static void carp_send_ad_all(void);
196 static void carp_send_ad(void *);
197 static void carp_send_ad_locked(struct carp_softc
*);
198 static void carp_send_arp(struct carp_softc
*);
199 static void carp_master_down(void *);
200 static void carp_master_down_locked(struct carp_softc
*);
201 static int carp_ioctl(struct ifnet
*, u_long
, caddr_t
, struct ucred
*);
202 static int carp_looutput(struct ifnet
*, struct mbuf
*, struct sockaddr
*,
204 static void carp_start(struct ifnet
*);
205 static void carp_setrun(struct carp_softc
*, sa_family_t
);
206 static void carp_set_state(struct carp_softc
*, int);
207 static int carp_addrcount(struct carp_if
*, struct in_ifaddr
*, int);
208 enum { CARP_COUNT_MASTER
, CARP_COUNT_RUNNING
};
210 static void carp_multicast_cleanup(struct carp_softc
*);
211 static int carp_set_addr(struct carp_softc
*, struct sockaddr_in
*);
212 static int carp_del_addr(struct carp_softc
*, struct sockaddr_in
*);
213 static void carp_carpdev_state_locked(struct carp_if
*);
214 static void carp_sc_state_locked(struct carp_softc
*);
216 static void carp_send_na(struct carp_softc
*);
217 static int carp_set_addr6(struct carp_softc
*, struct sockaddr_in6
*);
218 static int carp_del_addr6(struct carp_softc
*, struct sockaddr_in6
*);
219 static void carp_multicast6_cleanup(struct carp_softc
*);
222 static LIST_HEAD(, carp_softc
) carpif_list
;
224 struct if_clone carp_cloner
= IF_CLONE_INITIALIZER(CARP_IFNAME
, carp_clone_create
, carp_clone_destroy
, 0, IF_MAXUNIT
);
226 static eventhandler_tag if_detach_event_tag
;
228 static __inline u_int16_t
229 carp_cksum(struct mbuf
*m
, int len
)
231 return (in_cksum(m
, len
));
235 carp_hmac_prepare(struct carp_softc
*sc
)
237 u_int8_t version
= CARP_VERSION
, type
= CARP_ADVERTISEMENT
;
238 u_int8_t vhid
= sc
->sc_vhid
& 0xff;
248 /* XXX: possible race here */
250 /* compute ipad from key */
251 bzero(sc
->sc_pad
, sizeof(sc
->sc_pad
));
252 bcopy(sc
->sc_key
, sc
->sc_pad
, sizeof(sc
->sc_key
));
253 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
254 sc
->sc_pad
[i
] ^= 0x36;
256 /* precompute first part of inner hash */
257 SHA1Init(&sc
->sc_sha1
);
258 SHA1Update(&sc
->sc_sha1
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
259 SHA1Update(&sc
->sc_sha1
, (void *)&version
, sizeof(version
));
260 SHA1Update(&sc
->sc_sha1
, (void *)&type
, sizeof(type
));
261 SHA1Update(&sc
->sc_sha1
, (void *)&vhid
, sizeof(vhid
));
263 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
264 if (ifa
->ifa_addr
->sa_family
== AF_INET
)
265 SHA1Update(&sc
->sc_sha1
,
266 (void *)&ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
,
267 sizeof(struct in_addr
));
271 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
272 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
273 in6
= ifatoia6(ifa
)->ia_addr
.sin6_addr
;
274 in6_clearscope(&in6
);
275 SHA1Update(&sc
->sc_sha1
, (void *)&in6
, sizeof(in6
));
280 /* convert ipad to opad */
281 for (i
= 0; i
< sizeof(sc
->sc_pad
); i
++)
282 sc
->sc_pad
[i
] ^= 0x36 ^ 0x5c;
289 carp_hmac_generate(struct carp_softc
*sc
, u_int32_t counter
[2],
290 unsigned char md
[20])
294 /* fetch first half of inner hash */
295 bcopy(&sc
->sc_sha1
, &sha1ctx
, sizeof(sha1ctx
));
297 SHA1Update(&sha1ctx
, (void *)counter
, sizeof(sc
->sc_counter
));
298 SHA1Final(md
, &sha1ctx
);
302 SHA1Update(&sha1ctx
, sc
->sc_pad
, sizeof(sc
->sc_pad
));
303 SHA1Update(&sha1ctx
, md
, 20);
304 SHA1Final(md
, &sha1ctx
);
308 carp_hmac_verify(struct carp_softc
*sc
, u_int32_t counter
[2],
309 unsigned char md
[20])
311 unsigned char md2
[20];
313 CARP_SCLOCK_ASSERT(sc
);
315 carp_hmac_generate(sc
, counter
, md2
);
317 return (bcmp(md
, md2
, sizeof(md2
)));
321 carp_setroute(struct carp_softc
*sc
, int cmd
)
326 CARP_SCLOCK_ASSERT(sc
);
329 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
330 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
331 sc
->sc_carpdev
!= NULL
) {
332 int count
= carp_addrcount(
333 (struct carp_if
*)sc
->sc_carpdev
->if_carp
,
334 ifatoia(ifa
), CARP_COUNT_MASTER
);
336 if ((cmd
== RTM_ADD
&& count
== 1) ||
337 (cmd
== RTM_DELETE
&& count
== 0))
338 rtinit(ifa
, cmd
, RTF_UP
| RTF_HOST
);
341 if (ifa
->ifa_addr
->sa_family
== AF_INET6
) {
354 carp_clone_create(struct if_clone
*ifc
, int unit
)
357 struct carp_softc
*sc
;
360 MALLOC(sc
, struct carp_softc
*, sizeof(*sc
), M_CARP
, M_WAITOK
|M_ZERO
);
361 ifp
= SC2IFP(sc
) = kmalloc(sizeof(struct ifnet
), M_IFNET
, M_WAITOK
|M_ZERO
);
368 sc
->sc_flags_backup
= 0;
370 sc
->sc_advbase
= CARP_DFLTINTV
;
371 sc
->sc_vhid
= -1; /* required setting */
373 sc
->sc_init_counter
= 1;
374 sc
->sc_naddrs
= sc
->sc_naddrs6
= 0; /* M_ZERO? */
377 sc
->sc_im6o
.im6o_multicast_hlim
= CARP_DFLTTL
;
380 /* sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/
382 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
383 sc->sc_imo.imo_multicast_vif = -1;
385 callout_init(&sc
->sc_ad_tmo
);
386 callout_init(&sc
->sc_md_tmo
);
387 callout_init(&sc
->sc_md6_tmo
);
390 if_initname(ifp
, CARP_IFNAME
, unit
);
391 ifp
->if_mtu
= ETHERMTU
;
392 ifp
->if_flags
= IFF_LOOPBACK
;
393 ifp
->if_ioctl
= carp_ioctl
;
394 ifp
->if_output
= carp_looutput
;
395 ifp
->if_start
= carp_start
;
396 ifp
->if_type
= IFT_CARP
;
397 ifp
->if_snd
.ifq_maxlen
= ifqmaxlen
;
399 if_attach(ifp
, NULL
);
400 bpfattach(ifp
, DLT_NULL
, sizeof(u_int
));
403 LIST_INSERT_HEAD(&carpif_list
, sc
, sc_next
);
410 carp_clone_destroy(struct ifnet
*ifp
)
412 struct carp_softc
*sc
= ifp
->if_softc
;
416 carpdetach(sc
, 1); /* Returns unlocked. */
419 LIST_REMOVE(sc
, sc_next
);
423 /* if_free_type(ifp, IFT_ETHER);*/
424 /* kfree(sc->sc_imo.imo_membership, M_CARP); */
429 * This function can be called on CARP interface destroy path,
430 * and in case of the removal of the underlying interface as
431 * well. We differentiate these two cases. In the latter case
432 * we do not cleanup our multicast memberships, since they
433 * are already freed. Also, in the latter case we do not
434 * release the lock on return, because the function will be
435 * called once more, for another CARP instance on the same
439 carpdetach(struct carp_softc
*sc
, int unlock
)
443 callout_stop(&sc
->sc_ad_tmo
);
444 callout_stop(&sc
->sc_md_tmo
);
445 callout_stop(&sc
->sc_md6_tmo
);
448 carp_suppress_preempt
--;
451 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
)
452 carp_suppress_preempt
--;
453 sc
->sc_sendad_errors
= 0;
455 carp_set_state(sc
, INIT
);
456 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
459 carp_multicast_cleanup(sc
);
461 carp_multicast6_cleanup(sc
);
464 if (sc
->sc_carpdev
!= NULL
) {
465 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
466 CARP_LOCK_ASSERT(cif
);
467 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
468 if (!--cif
->vhif_nvrs
) {
469 ifpromisc(sc
->sc_carpdev
, 0);
470 sc
->sc_carpdev
->if_carp
= NULL
;
471 CARP_LOCK_DESTROY(cif
);
475 sc
->sc_carpdev
= NULL
;
479 /* Detach an interface from the carp. */
481 carp_ifdetach(void *arg __unused
, struct ifnet
*ifp
)
483 struct carp_if
*cif
= (struct carp_if
*)ifp
->if_carp
;
484 struct carp_softc
*sc
, *nextsc
;
490 * XXX: At the end of for() cycle the lock will be destroyed.
493 for (sc
= TAILQ_FIRST(&cif
->vhif_vrs
); sc
; sc
= nextsc
) {
494 nextsc
= TAILQ_NEXT(sc
, sc_list
);
501 * process input packet.
502 * we have rearranged checks order compared to the rfc,
503 * but it seems more efficient this way or not possible otherwise.
506 carp_input(struct mbuf
*m
, int hlen
)
508 struct ip
*ip
= mtod(m
, struct ip
*);
509 struct carp_header
*ch
;
512 carpstats
.carps_ipackets
++;
514 if (!carp_opts
[CARPCTL_ALLOW
]) {
519 /* check if received on a valid carp interface */
520 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
521 carpstats
.carps_badif
++;
522 CARP_LOG("carp_input: packet received on non-carp "
524 m
->m_pkthdr
.rcvif
->if_xname
);
529 /* verify that the IP TTL is 255. */
530 if (ip
->ip_ttl
!= CARP_DFLTTL
) {
531 carpstats
.carps_badttl
++;
532 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
534 m
->m_pkthdr
.rcvif
->if_xname
);
539 iplen
= ip
->ip_hl
<< 2;
541 if (m
->m_pkthdr
.len
< iplen
+ sizeof(*ch
)) {
542 carpstats
.carps_badlen
++;
543 CARP_LOG("carp_input: received len %zd < "
544 "sizeof(struct carp_header)\n",
545 m
->m_len
- sizeof(struct ip
));
550 if (iplen
+ sizeof(*ch
) < m
->m_len
) {
551 if ((m
= m_pullup(m
, iplen
+ sizeof(*ch
))) == NULL
) {
552 carpstats
.carps_hdrops
++;
553 CARP_LOG("carp_input: pullup failed\n");
556 ip
= mtod(m
, struct ip
*);
558 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
561 * verify that the received packet length is
562 * equal to the CARP header
564 len
= iplen
+ sizeof(*ch
);
565 if (len
> m
->m_pkthdr
.len
) {
566 carpstats
.carps_badlen
++;
567 CARP_LOG("carp_input: packet too short %d on %s\n",
569 m
->m_pkthdr
.rcvif
->if_xname
);
574 if ((m
= m_pullup(m
, len
)) == NULL
) {
575 carpstats
.carps_hdrops
++;
578 ip
= mtod(m
, struct ip
*);
579 ch
= (struct carp_header
*)((char *)ip
+ iplen
);
581 /* verify the CARP checksum */
583 if (carp_cksum(m
, len
- iplen
)) {
584 carpstats
.carps_badsum
++;
585 CARP_LOG("carp_input: checksum failed on %s\n",
586 m
->m_pkthdr
.rcvif
->if_xname
);
592 carp_input_c(m
, ch
, AF_INET
);
597 carp6_input(struct mbuf
**mp
, int *offp
, int proto
)
599 struct mbuf
*m
= *mp
;
600 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
601 struct carp_header
*ch
;
604 carpstats
.carps_ipackets6
++;
606 if (!carp_opts
[CARPCTL_ALLOW
]) {
608 return (IPPROTO_DONE
);
611 /* check if received on a valid carp interface */
612 if (m
->m_pkthdr
.rcvif
->if_carp
== NULL
) {
613 carpstats
.carps_badif
++;
614 CARP_LOG("carp6_input: packet received on non-carp "
616 m
->m_pkthdr
.rcvif
->if_xname
);
618 return (IPPROTO_DONE
);
621 /* verify that the IP TTL is 255 */
622 if (ip6
->ip6_hlim
!= CARP_DFLTTL
) {
623 carpstats
.carps_badttl
++;
624 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
626 m
->m_pkthdr
.rcvif
->if_xname
);
628 return (IPPROTO_DONE
);
631 /* verify that we have a complete carp packet */
633 IP6_EXTHDR_GET(ch
, struct carp_header
*, m
, *offp
, sizeof(*ch
));
635 carpstats
.carps_badlen
++;
636 CARP_LOG("carp6_input: packet size %u too small\n", len
);
637 return (IPPROTO_DONE
);
641 /* verify the CARP checksum */
643 if (carp_cksum(m
, sizeof(*ch
))) {
644 carpstats
.carps_badsum
++;
645 CARP_LOG("carp6_input: checksum failed, on %s\n",
646 m
->m_pkthdr
.rcvif
->if_xname
);
648 return (IPPROTO_DONE
);
652 carp_input_c(m
, ch
, AF_INET6
);
653 return (IPPROTO_DONE
);
658 carp_input_c(struct mbuf
*m
, struct carp_header
*ch
, sa_family_t af
)
660 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
;
661 struct carp_softc
*sc
;
662 u_int64_t tmp_counter
;
663 struct timeval sc_tv
, ch_tv
;
665 /* verify that the VHID is valid on the receiving interface */
666 CARP_LOCK(ifp
->if_carp
);
667 TAILQ_FOREACH(sc
, &((struct carp_if
*)ifp
->if_carp
)->vhif_vrs
, sc_list
)
668 if (sc
->sc_vhid
== ch
->carp_vhid
)
671 if (!sc
|| !((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
672 carpstats
.carps_badvhid
++;
673 CARP_UNLOCK(ifp
->if_carp
);
678 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
679 SC2IFP(sc
)->if_ipackets
++;
680 SC2IFP(sc
)->if_ibytes
+= m
->m_pkthdr
.len
;
682 if (SC2IFP(sc
)->if_bpf
) {
683 struct ip
*ip
= mtod(m
, struct ip
*);
685 /* BPF wants net byte order */
686 ip
->ip_len
= htons(ip
->ip_len
+ (ip
->ip_hl
<< 2));
687 ip
->ip_off
= htons(ip
->ip_off
);
688 bpf_mtap(SC2IFP(sc
)->if_bpf
, m
);
691 /* verify the CARP version. */
692 if (ch
->carp_version
!= CARP_VERSION
) {
693 carpstats
.carps_badver
++;
694 SC2IFP(sc
)->if_ierrors
++;
695 CARP_UNLOCK(ifp
->if_carp
);
696 CARP_LOG("%s; invalid version %d\n",
697 SC2IFP(sc
)->if_xname
,
703 /* verify the hash */
704 if (carp_hmac_verify(sc
, ch
->carp_counter
, ch
->carp_md
)) {
705 carpstats
.carps_badauth
++;
706 SC2IFP(sc
)->if_ierrors
++;
707 CARP_UNLOCK(ifp
->if_carp
);
708 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc
)->if_xname
);
713 tmp_counter
= ntohl(ch
->carp_counter
[0]);
714 tmp_counter
= tmp_counter
<<32;
715 tmp_counter
+= ntohl(ch
->carp_counter
[1]);
717 /* XXX Replay protection goes here */
719 sc
->sc_init_counter
= 0;
720 sc
->sc_counter
= tmp_counter
;
722 sc_tv
.tv_sec
= sc
->sc_advbase
;
723 if (carp_suppress_preempt
&& sc
->sc_advskew
< 240)
724 sc_tv
.tv_usec
= 240 * 1000000 / 256;
726 sc_tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
727 ch_tv
.tv_sec
= ch
->carp_advbase
;
728 ch_tv
.tv_usec
= ch
->carp_advskew
* 1000000 / 256;
730 switch (sc
->sc_state
) {
735 * If we receive an advertisement from a master who's going to
736 * be more frequent than us, go into BACKUP state.
738 if (timevalcmp(&sc_tv
, &ch_tv
, >) ||
739 timevalcmp(&sc_tv
, &ch_tv
, ==)) {
740 callout_stop(&sc
->sc_ad_tmo
);
741 CARP_DEBUG("%s: MASTER -> BACKUP "
742 "(more frequent advertisement received)\n",
743 SC2IFP(sc
)->if_xname
);
744 carp_set_state(sc
, BACKUP
);
746 carp_setroute(sc
, RTM_DELETE
);
751 * If we're pre-empting masters who advertise slower than us,
752 * and this one claims to be slower, treat him as down.
754 if (carp_opts
[CARPCTL_PREEMPT
] &&
755 timevalcmp(&sc_tv
, &ch_tv
, <)) {
756 CARP_DEBUG("%s: BACKUP -> MASTER "
757 "(preempting a slower master)\n",
758 SC2IFP(sc
)->if_xname
);
759 carp_master_down_locked(sc
);
764 * If the master is going to advertise at such a low frequency
765 * that he's guaranteed to time out, we'd might as well just
766 * treat him as timed out now.
768 sc_tv
.tv_sec
= sc
->sc_advbase
* 3;
769 if (timevalcmp(&sc_tv
, &ch_tv
, <)) {
770 CARP_DEBUG("%s: BACKUP -> MASTER "
771 "(master timed out)\n",
772 SC2IFP(sc
)->if_xname
);
773 carp_master_down_locked(sc
);
778 * Otherwise, we reset the counter and wait for the next
785 CARP_UNLOCK(ifp
->if_carp
);
792 carp_prepare_ad(struct mbuf
*m
, struct carp_softc
*sc
, struct carp_header
*ch
)
795 struct ifnet
*ifp
= SC2IFP(sc
);
797 if (sc
->sc_init_counter
) {
798 /* this could also be seconds since unix epoch */
799 sc
->sc_counter
= karc4random();
800 sc
->sc_counter
= sc
->sc_counter
<< 32;
801 sc
->sc_counter
+= karc4random();
805 ch
->carp_counter
[0] = htonl((sc
->sc_counter
>>32)&0xffffffff);
806 ch
->carp_counter
[1] = htonl(sc
->sc_counter
&0xffffffff);
808 carp_hmac_generate(sc
, ch
->carp_counter
, ch
->carp_md
);
810 /* Tag packet for carp_output */
811 mtag
= m_tag_get(PACKET_TAG_CARP
, sizeof(struct ifnet
*), M_NOWAIT
);
814 SC2IFP(sc
)->if_oerrors
++;
817 bcopy(&ifp
, (caddr_t
)(mtag
+ 1), sizeof(struct ifnet
*));
818 m_tag_prepend(m
, mtag
);
824 carp_send_ad_all(void)
826 struct carp_softc
*sc
;
828 LIST_FOREACH(sc
, &carpif_list
, sc_next
) {
829 if (sc
->sc_carpdev
== NULL
)
832 if ((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
) &&
833 sc
->sc_state
== MASTER
)
834 carp_send_ad_locked(sc
);
840 carp_send_ad(void *v
)
842 struct carp_softc
*sc
= v
;
845 carp_send_ad_locked(sc
);
850 carp_send_ad_locked(struct carp_softc
*sc
)
852 struct carp_header ch
;
854 struct carp_header
*ch_ptr
;
856 int len
, advbase
, advskew
;
859 /* bow out if we've lost our UPness or RUNNINGuiness */
860 if (!((SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
))) {
864 advbase
= sc
->sc_advbase
;
865 if (!carp_suppress_preempt
|| sc
->sc_advskew
> 240)
866 advskew
= sc
->sc_advskew
;
870 tv
.tv_usec
= advskew
* 1000000 / 256;
873 ch
.carp_version
= CARP_VERSION
;
874 ch
.carp_type
= CARP_ADVERTISEMENT
;
875 ch
.carp_vhid
= sc
->sc_vhid
;
876 ch
.carp_advbase
= advbase
;
877 ch
.carp_advskew
= advskew
;
878 ch
.carp_authlen
= 7; /* XXX DEFINE */
879 ch
.carp_pad1
= 0; /* must be zero */
886 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
888 SC2IFP(sc
)->if_oerrors
++;
889 carpstats
.carps_onomem
++;
890 /* XXX maybe less ? */
891 if (advbase
!= 255 || advskew
!= 255)
892 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
896 len
= sizeof(*ip
) + sizeof(ch
);
897 m
->m_pkthdr
.len
= len
;
898 m
->m_pkthdr
.rcvif
= NULL
;
900 MH_ALIGN(m
, m
->m_len
);
901 m
->m_flags
|= M_MCAST
;
902 ip
= mtod(m
, struct ip
*);
903 ip
->ip_v
= IPVERSION
;
904 ip
->ip_hl
= sizeof(*ip
) >> 2;
905 ip
->ip_tos
= IPTOS_LOWDELAY
;
907 ip
->ip_id
= ip_newid();
909 ip
->ip_ttl
= CARP_DFLTTL
;
910 ip
->ip_p
= IPPROTO_CARP
;
912 ip
->ip_src
.s_addr
= sc
->sc_ia
->ia_addr
.sin_addr
.s_addr
;
913 ip
->ip_dst
.s_addr
= htonl(INADDR_CARP_GROUP
);
915 ch_ptr
= (struct carp_header
*)(&ip
[1]);
916 bcopy(&ch
, ch_ptr
, sizeof(ch
));
917 if (carp_prepare_ad(m
, sc
, ch_ptr
))
920 m
->m_data
+= sizeof(*ip
);
921 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip
));
922 m
->m_data
-= sizeof(*ip
);
924 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
925 SC2IFP(sc
)->if_opackets
++;
926 SC2IFP(sc
)->if_obytes
+= len
;
927 carpstats
.carps_opackets
++;
929 if (ip_output(m
, NULL
, NULL
, IP_RAWOUTPUT
, &sc
->sc_imo
, NULL
)) {
930 SC2IFP(sc
)->if_oerrors
++;
931 if (sc
->sc_sendad_errors
< INT_MAX
)
932 sc
->sc_sendad_errors
++;
933 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
934 carp_suppress_preempt
++;
935 if (carp_suppress_preempt
== 1) {
941 sc
->sc_sendad_success
= 0;
943 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
944 if (++sc
->sc_sendad_success
>=
945 CARP_SENDAD_MIN_SUCCESS
) {
946 carp_suppress_preempt
--;
947 sc
->sc_sendad_errors
= 0;
950 sc
->sc_sendad_errors
= 0;
958 MGETHDR(m
, M_NOWAIT
, MT_HEADER
);
960 SC2IFP(sc
)->if_oerrors
++;
961 carpstats
.carps_onomem
++;
962 /* XXX maybe less ? */
963 if (advbase
!= 255 || advskew
!= 255)
964 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
968 len
= sizeof(*ip6
) + sizeof(ch
);
969 m
->m_pkthdr
.len
= len
;
970 m
->m_pkthdr
.rcvif
= NULL
;
972 MH_ALIGN(m
, m
->m_len
);
973 m
->m_flags
|= M_MCAST
;
974 ip6
= mtod(m
, struct ip6_hdr
*);
975 bzero(ip6
, sizeof(*ip6
));
976 ip6
->ip6_vfc
|= IPV6_VERSION
;
977 ip6
->ip6_hlim
= CARP_DFLTTL
;
978 ip6
->ip6_nxt
= IPPROTO_CARP
;
979 bcopy(&sc
->sc_ia6
->ia_addr
.sin6_addr
, &ip6
->ip6_src
,
980 sizeof(struct in6_addr
));
981 /* set the multicast destination */
983 ip6
->ip6_dst
.s6_addr16
[0] = htons(0xff02);
984 ip6
->ip6_dst
.s6_addr8
[15] = 0x12;
985 if (in6_setscope(&ip6
->ip6_dst
, sc
->sc_carpdev
, NULL
) != 0) {
986 SC2IFP(sc
)->if_oerrors
++;
988 CARP_LOG("%s: in6_setscope failed\n", __func__
);
992 ch_ptr
= (struct carp_header
*)(&ip6
[1]);
993 bcopy(&ch
, ch_ptr
, sizeof(ch
));
994 if (carp_prepare_ad(m
, sc
, ch_ptr
))
997 m
->m_data
+= sizeof(*ip6
);
998 ch_ptr
->carp_cksum
= carp_cksum(m
, len
- sizeof(*ip6
));
999 m
->m_data
-= sizeof(*ip6
);
1001 getmicrotime(&SC2IFP(sc
)->if_lastchange
);
1002 SC2IFP(sc
)->if_opackets
++;
1003 SC2IFP(sc
)->if_obytes
+= len
;
1004 carpstats
.carps_opackets6
++;
1006 if (ip6_output(m
, NULL
, NULL
, 0, &sc
->sc_im6o
, NULL
, NULL
)) {
1007 SC2IFP(sc
)->if_oerrors
++;
1008 if (sc
->sc_sendad_errors
< INT_MAX
)
1009 sc
->sc_sendad_errors
++;
1010 if (sc
->sc_sendad_errors
== CARP_SENDAD_MAX_ERRORS
) {
1011 carp_suppress_preempt
++;
1012 if (carp_suppress_preempt
== 1) {
1018 sc
->sc_sendad_success
= 0;
1020 if (sc
->sc_sendad_errors
>= CARP_SENDAD_MAX_ERRORS
) {
1021 if (++sc
->sc_sendad_success
>=
1022 CARP_SENDAD_MIN_SUCCESS
) {
1023 carp_suppress_preempt
--;
1024 sc
->sc_sendad_errors
= 0;
1027 sc
->sc_sendad_errors
= 0;
1032 if (advbase
!= 255 || advskew
!= 255)
1033 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1039 * Broadcast a gratuitous ARP request containing
1040 * the virtual router MAC address for each IP address
1041 * associated with the virtual router.
1044 carp_send_arp(struct carp_softc
*sc
)
1048 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
1050 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
1052 lwkt_serialize_enter(sc
->sc_carpdev
->if_serializer
);
1053 arp_ifinit2(sc
->sc_carpdev
, ifa
, IF_LLADDR(sc
->sc_ifp
));
1054 lwkt_serialize_exit(sc
->sc_carpdev
->if_serializer
);
1056 DELAY(1000); /* XXX */
1062 carp_send_na(struct carp_softc
*sc
)
1065 struct in6_addr
*in6
;
1066 static struct in6_addr mcast
= IN6ADDR_LINKLOCAL_ALLNODES_INIT
;
1068 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
1070 if (ifa
->ifa_addr
->sa_family
!= AF_INET6
)
1073 in6
= &ifatoia6(ifa
)->ia_addr
.sin6_addr
;
1074 nd6_na_output(sc
->sc_carpdev
, &mcast
, in6
,
1075 ND_NA_FLAG_OVERRIDE
, 1, NULL
);
1076 DELAY(1000); /* XXX */
1082 carp_addrcount(struct carp_if
*cif
, struct in_ifaddr
*ia
, int type
)
1084 struct carp_softc
*vh
;
1088 CARP_LOCK_ASSERT(cif
);
1090 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1091 if ((type
== CARP_COUNT_RUNNING
&&
1092 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) ||
1093 (type
== CARP_COUNT_MASTER
&& vh
->sc_state
== MASTER
)) {
1094 TAILQ_FOREACH(ifa
, &SC2IFP(vh
)->if_addrlist
,
1096 if (ifa
->ifa_addr
->sa_family
== AF_INET
&&
1097 ia
->ia_addr
.sin_addr
.s_addr
==
1098 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
)
1107 carp_iamatch(void *v
, struct in_ifaddr
*ia
,
1108 struct in_addr
*isaddr
, u_int8_t
**enaddr
)
1110 struct carp_if
*cif
= v
;
1111 struct carp_softc
*vh
;
1112 int index
, count
= 0;
1117 if (carp_opts
[CARPCTL_ARPBALANCE
]) {
1119 * XXX proof of concept implementation.
1120 * We use the source ip to decide which virtual host should
1121 * handle the request. If we're master of that virtual host,
1122 * then we respond, otherwise, just drop the arp packet on
1125 count
= carp_addrcount(cif
, ia
, CARP_COUNT_RUNNING
);
1127 /* should never reach this */
1132 /* this should be a hash, like pf_hash() */
1133 index
= ntohl(isaddr
->s_addr
) % count
;
1136 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1137 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
)) {
1138 TAILQ_FOREACH(ifa
, &SC2IFP(vh
)->if_addrlist
,
1140 if (ifa
->ifa_addr
->sa_family
==
1142 ia
->ia_addr
.sin_addr
.s_addr
==
1143 ifatoia(ifa
)->ia_addr
.sin_addr
.s_addr
) {
1144 if (count
== index
) {
1147 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1161 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1162 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1163 vh
->sc_state
== MASTER
) {
1164 *enaddr
= IF_LLADDR(vh
->sc_ifp
);
1176 carp_iamatch6(void *v
, struct in6_addr
*taddr
)
1178 struct carp_if
*cif
= v
;
1179 struct carp_softc
*vh
;
1183 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
) {
1184 TAILQ_FOREACH(ifa
, &SC2IFP(vh
)->if_addrlist
, ifa_list
) {
1185 if (IN6_ARE_ADDR_EQUAL(taddr
,
1186 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1187 (SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1188 vh
->sc_state
== MASTER
) {
1200 carp_macmatch6(void *v
, struct mbuf
*m
, const struct in6_addr
*taddr
)
1203 struct carp_if
*cif
= v
;
1204 struct carp_softc
*sc
;
1208 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
) {
1209 TAILQ_FOREACH(ifa
, &SC2IFP(sc
)->if_addrlist
, ifa_list
) {
1210 if (IN6_ARE_ADDR_EQUAL(taddr
,
1211 &ifatoia6(ifa
)->ia_addr
.sin6_addr
) &&
1212 (SC2IFP(sc
)->if_flags
& IFF_UP
) && (SC2IFP(sc
)->if_flags
& IFF_RUNNING
)) {
1213 struct ifnet
*ifp
= SC2IFP(sc
);
1214 mtag
= m_tag_get(PACKET_TAG_CARP
,
1215 sizeof(struct ifnet
*), M_NOWAIT
);
1217 /* better a bit than nothing */
1219 return (IF_LLADDR(sc
->sc_ifp
));
1221 bcopy(&ifp
, (caddr_t
)(mtag
+ 1),
1222 sizeof(struct ifnet
*));
1223 m_tag_prepend(m
, mtag
);
1226 return (IF_LLADDR(sc
->sc_ifp
));
1237 carp_forus(void *v
, void *dhost
)
1239 struct carp_if
*cif
= v
;
1240 struct carp_softc
*vh
;
1241 u_int8_t
*ena
= dhost
;
1244 * XXX: See here for check on MAC adr is not for virtual use
1248 if (ena
[0] || ena
[1] || ena
[2] != 0x5e || ena
[3] || ena
[4] != 1)
1254 TAILQ_FOREACH(vh
, &cif
->vhif_vrs
, sc_list
)
1255 if ((SC2IFP(vh
)->if_flags
& IFF_UP
) && (SC2IFP(vh
)->if_flags
& IFF_RUNNING
) &&
1256 vh
->sc_state
== MASTER
&&
1257 !bcmp(dhost
, IF_LLADDR(vh
->sc_ifp
), ETHER_ADDR_LEN
)) {
1259 return (SC2IFP(vh
));
1267 carp_master_down(void *v
)
1269 struct carp_softc
*sc
= v
;
1271 lwkt_serialize_enter(sc
->sc_ifp
->if_serializer
);
1272 carp_master_down_locked(sc
);
1273 lwkt_serialize_exit(sc
->sc_ifp
->if_serializer
);
1277 carp_master_down_locked(struct carp_softc
*sc
)
1280 CARP_SCLOCK_ASSERT(sc
);
1282 switch (sc
->sc_state
) {
1284 kprintf("%s: master_down event in INIT state\n",
1285 SC2IFP(sc
)->if_xname
);
1290 carp_set_state(sc
, MASTER
);
1291 carp_send_ad_locked(sc
);
1297 carp_setroute(sc
, RTM_ADD
);
1303 * When in backup state, af indicates whether to reset the master down timer
1304 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1307 carp_setrun(struct carp_softc
*sc
, sa_family_t af
)
1311 if (sc
->sc_carpdev
== NULL
) {
1312 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1313 carp_set_state(sc
, INIT
);
1317 if (SC2IFP(sc
)->if_flags
& IFF_UP
&&
1318 sc
->sc_vhid
> 0 && (sc
->sc_naddrs
|| sc
->sc_naddrs6
))
1319 SC2IFP(sc
)->if_flags
|= IFF_RUNNING
;
1321 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1322 carp_setroute(sc
, RTM_DELETE
);
1326 switch (sc
->sc_state
) {
1328 if (carp_opts
[CARPCTL_PREEMPT
] && !carp_suppress_preempt
) {
1329 carp_send_ad_locked(sc
);
1334 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1335 SC2IFP(sc
)->if_xname
);
1336 carp_set_state(sc
, MASTER
);
1337 carp_setroute(sc
, RTM_ADD
);
1339 CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc
)->if_xname
);
1340 carp_set_state(sc
, BACKUP
);
1341 carp_setroute(sc
, RTM_DELETE
);
1346 callout_stop(&sc
->sc_ad_tmo
);
1347 tv
.tv_sec
= 3 * sc
->sc_advbase
;
1348 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1352 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1353 carp_master_down
, sc
);
1358 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1359 carp_master_down
, sc
);
1364 callout_reset(&sc
->sc_md_tmo
, tvtohz_high(&tv
),
1365 carp_master_down
, sc
);
1367 callout_reset(&sc
->sc_md6_tmo
, tvtohz_high(&tv
),
1368 carp_master_down
, sc
);
1373 tv
.tv_sec
= sc
->sc_advbase
;
1374 tv
.tv_usec
= sc
->sc_advskew
* 1000000 / 256;
1375 callout_reset(&sc
->sc_ad_tmo
, tvtohz_high(&tv
),
1382 carp_multicast_cleanup(struct carp_softc
*sc
)
1384 struct ip_moptions
*imo
= &sc
->sc_imo
;
1385 u_int16_t n
= imo
->imo_num_memberships
;
1387 /* Clean up our own multicast memberships */
1389 if (imo
->imo_membership
[n
] != NULL
) {
1390 in_delmulti(imo
->imo_membership
[n
]);
1391 imo
->imo_membership
[n
] = NULL
;
1394 imo
->imo_num_memberships
= 0;
1395 imo
->imo_multicast_ifp
= NULL
;
1400 carp_multicast6_cleanup(struct carp_softc
*sc
)
1402 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1404 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1405 struct in6_multi_mship
*imm
=
1406 LIST_FIRST(&im6o
->im6o_memberships
);
1408 LIST_REMOVE(imm
, i6mm_chain
);
1409 in6_leavegroup(imm
);
1411 im6o
->im6o_multicast_ifp
= NULL
;
1416 carp_set_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1419 struct carp_if
*cif
;
1420 struct in_ifaddr
*ia
, *ia_if
;
1421 struct ip_moptions
*imo
= &sc
->sc_imo
;
1422 struct in_addr addr
;
1423 u_long iaddr
= htonl(sin
->sin_addr
.s_addr
);
1426 if (sin
->sin_addr
.s_addr
== 0)
1428 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1430 carp_set_state(sc
, INIT
);
1434 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1439 /* we have to do it by hands to check we won't match on us */
1440 ia_if
= NULL
; own
= 0;
1441 TAILQ_FOREACH(ia
, &in_ifaddrhead
, ia_link
) {
1442 /* and, yeah, we need a multicast-capable iface too */
1443 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1444 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1445 (iaddr
& ia
->ia_subnetmask
) == ia
->ia_subnet
) {
1448 if (sin
->sin_addr
.s_addr
==
1449 ia
->ia_addr
.sin_addr
.s_addr
)
1456 return (EADDRNOTAVAIL
);
1461 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1462 (imo
->imo_multicast_ifp
&& imo
->imo_multicast_ifp
!= ifp
))
1463 return (EADDRNOTAVAIL
);
1465 if (imo
->imo_num_memberships
== 0) {
1466 addr
.s_addr
= htonl(INADDR_CARP_GROUP
);
1467 if ((imo
->imo_membership
[0] = in_addmulti(&addr
, ifp
)) == NULL
)
1469 imo
->imo_num_memberships
++;
1470 imo
->imo_multicast_ifp
= ifp
;
1471 imo
->imo_multicast_ttl
= CARP_DFLTTL
;
1472 imo
->imo_multicast_loop
= 0;
1475 if (!ifp
->if_carp
) {
1477 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1483 if ((error
= ifpromisc(ifp
, 1))) {
1488 CARP_LOCK_INIT(cif
);
1490 cif
->vhif_ifp
= ifp
;
1491 TAILQ_INIT(&cif
->vhif_vrs
);
1495 struct carp_softc
*vr
;
1497 cif
= (struct carp_if
*)ifp
->if_carp
;
1499 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1500 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1507 sc
->sc_carpdev
= ifp
;
1509 { /* XXX prevent endless loop if already in queue */
1510 struct carp_softc
*vr
, *after
= NULL
;
1512 cif
= (struct carp_if
*)ifp
->if_carp
;
1514 /* XXX: cif should not change, right? So we still hold the lock */
1515 CARP_LOCK_ASSERT(cif
);
1517 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1520 if (vr
->sc_vhid
< sc
->sc_vhid
)
1525 /* We're trying to keep things in order */
1526 if (after
== NULL
) {
1527 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1529 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1536 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1541 carp_sc_state_locked(sc
);
1549 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1555 carp_del_addr(struct carp_softc
*sc
, struct sockaddr_in
*sin
)
1559 if (!--sc
->sc_naddrs
) {
1560 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1561 struct ip_moptions
*imo
= &sc
->sc_imo
;
1564 callout_stop(&sc
->sc_ad_tmo
);
1565 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1566 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1568 in_delmulti(imo
->imo_membership
[--imo
->imo_num_memberships
]);
1569 imo
->imo_multicast_ifp
= NULL
;
1570 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1571 if (!--cif
->vhif_nvrs
) {
1572 sc
->sc_carpdev
->if_carp
= NULL
;
1573 CARP_LOCK_DESTROY(cif
);
1574 FREE(cif
, M_IFADDR
);
1585 carp_set_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1588 struct carp_if
*cif
;
1589 struct in6_ifaddr
*ia
, *ia_if
;
1590 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1591 struct in6_multi_mship
*imm
;
1592 struct in6_addr in6
;
1595 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
1596 if (!(SC2IFP(sc
)->if_flags
& IFF_UP
))
1597 carp_set_state(sc
, INIT
);
1599 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1604 /* we have to do it by hands to check we won't match on us */
1605 ia_if
= NULL
; own
= 0;
1606 for (ia
= in6_ifaddr
; ia
; ia
= ia
->ia_next
) {
1609 for (i
= 0; i
< 4; i
++) {
1610 if ((sin6
->sin6_addr
.s6_addr32
[i
] &
1611 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]) !=
1612 (ia
->ia_addr
.sin6_addr
.s6_addr32
[i
] &
1613 ia
->ia_prefixmask
.sin6_addr
.s6_addr32
[i
]))
1616 /* and, yeah, we need a multicast-capable iface too */
1617 if (ia
->ia_ifp
!= SC2IFP(sc
) &&
1618 (ia
->ia_ifp
->if_flags
& IFF_MULTICAST
) &&
1622 if (IN6_ARE_ADDR_EQUAL(&sin6
->sin6_addr
,
1623 &ia
->ia_addr
.sin6_addr
))
1629 return (EADDRNOTAVAIL
);
1633 if (ifp
== NULL
|| (ifp
->if_flags
& IFF_MULTICAST
) == 0 ||
1634 (im6o
->im6o_multicast_ifp
&& im6o
->im6o_multicast_ifp
!= ifp
))
1635 return (EADDRNOTAVAIL
);
1637 if (!sc
->sc_naddrs6
) {
1638 im6o
->im6o_multicast_ifp
= ifp
;
1640 /* join CARP multicast address */
1641 bzero(&in6
, sizeof(in6
));
1642 in6
.s6_addr16
[0] = htons(0xff02);
1643 in6
.s6_addr8
[15] = 0x12;
1644 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1646 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1648 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1650 /* join solicited multicast address */
1651 bzero(&in6
, sizeof(in6
));
1652 in6
.s6_addr16
[0] = htons(0xff02);
1653 in6
.s6_addr32
[1] = 0;
1654 in6
.s6_addr32
[2] = htonl(1);
1655 in6
.s6_addr32
[3] = sin6
->sin6_addr
.s6_addr32
[3];
1656 in6
.s6_addr8
[12] = 0xff;
1657 if (in6_setscope(&in6
, ifp
, NULL
) != 0)
1659 if ((imm
= in6_joingroup(ifp
, &in6
, &error
)) == NULL
)
1661 LIST_INSERT_HEAD(&im6o
->im6o_memberships
, imm
, i6mm_chain
);
1664 if (!ifp
->if_carp
) {
1665 MALLOC(cif
, struct carp_if
*, sizeof(*cif
), M_CARP
,
1671 if ((error
= ifpromisc(ifp
, 1))) {
1676 CARP_LOCK_INIT(cif
);
1678 cif
->vhif_ifp
= ifp
;
1679 TAILQ_INIT(&cif
->vhif_vrs
);
1683 struct carp_softc
*vr
;
1685 cif
= (struct carp_if
*)ifp
->if_carp
;
1687 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1688 if (vr
!= sc
&& vr
->sc_vhid
== sc
->sc_vhid
) {
1695 sc
->sc_carpdev
= ifp
;
1697 { /* XXX prevent endless loop if already in queue */
1698 struct carp_softc
*vr
, *after
= NULL
;
1700 cif
= (struct carp_if
*)ifp
->if_carp
;
1701 CARP_LOCK_ASSERT(cif
);
1703 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
) {
1706 if (vr
->sc_vhid
< sc
->sc_vhid
)
1711 /* We're trying to keep things in order */
1712 if (after
== NULL
) {
1713 TAILQ_INSERT_TAIL(&cif
->vhif_vrs
, sc
, sc_list
);
1715 TAILQ_INSERT_AFTER(&cif
->vhif_vrs
, after
, sc
, sc_list
);
1722 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1725 carp_sc_state_locked(sc
);
1733 /* clean up multicast memberships */
1734 if (!sc
->sc_naddrs6
) {
1735 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1736 imm
= LIST_FIRST(&im6o
->im6o_memberships
);
1737 LIST_REMOVE(imm
, i6mm_chain
);
1738 in6_leavegroup(imm
);
1745 carp_del_addr6(struct carp_softc
*sc
, struct sockaddr_in6
*sin6
)
1749 if (!--sc
->sc_naddrs6
) {
1750 struct carp_if
*cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1751 struct ip6_moptions
*im6o
= &sc
->sc_im6o
;
1754 callout_stop(&sc
->sc_ad_tmo
);
1755 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
1756 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
1758 while (!LIST_EMPTY(&im6o
->im6o_memberships
)) {
1759 struct in6_multi_mship
*imm
=
1760 LIST_FIRST(&im6o
->im6o_memberships
);
1762 LIST_REMOVE(imm
, i6mm_chain
);
1763 in6_leavegroup(imm
);
1765 im6o
->im6o_multicast_ifp
= NULL
;
1766 TAILQ_REMOVE(&cif
->vhif_vrs
, sc
, sc_list
);
1767 if (!--cif
->vhif_nvrs
) {
1768 CARP_LOCK_DESTROY(cif
);
1769 sc
->sc_carpdev
->if_carp
= NULL
;
1770 FREE(cif
, M_IFADDR
);
1780 carp_ioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t addr
, struct ucred
*creds
)
1782 struct carp_softc
*sc
= ifp
->if_softc
, *vr
;
1783 struct carpreq carpr
;
1786 struct ifaliasreq
*ifra
;
1787 int locked
= 0, error
= 0;
1789 ifa
= (struct ifaddr
*)addr
;
1790 ifra
= (struct ifaliasreq
*)addr
;
1791 ifr
= (struct ifreq
*)addr
;
1796 switch (ifa
->ifa_addr
->sa_family
) {
1799 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1800 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1801 sizeof(struct sockaddr
));
1802 error
= carp_set_addr(sc
, satosin(ifa
->ifa_addr
));
1807 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1808 error
= carp_set_addr6(sc
, satosin6(ifa
->ifa_addr
));
1812 error
= EAFNOSUPPORT
;
1818 switch (ifa
->ifa_addr
->sa_family
) {
1821 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1822 bcopy(ifa
->ifa_addr
, ifa
->ifa_dstaddr
,
1823 sizeof(struct sockaddr
));
1824 error
= carp_set_addr(sc
, satosin(&ifra
->ifra_addr
));
1829 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1830 error
= carp_set_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1834 error
= EAFNOSUPPORT
;
1840 switch (ifa
->ifa_addr
->sa_family
) {
1843 error
= carp_del_addr(sc
, satosin(&ifra
->ifra_addr
));
1848 error
= carp_del_addr6(sc
, satosin6(&ifra
->ifra_addr
));
1852 error
= EAFNOSUPPORT
;
1858 if (sc
->sc_carpdev
) {
1862 if (sc
->sc_state
!= INIT
&& !(ifr
->ifr_flags
& IFF_UP
)) {
1863 callout_stop(&sc
->sc_ad_tmo
);
1864 callout_stop(&sc
->sc_md_tmo
);
1865 callout_stop(&sc
->sc_md6_tmo
);
1866 if (sc
->sc_state
== MASTER
)
1867 carp_send_ad_locked(sc
);
1868 carp_set_state(sc
, INIT
);
1870 } else if (sc
->sc_state
== INIT
&& (ifr
->ifr_flags
& IFF_UP
)) {
1871 SC2IFP(sc
)->if_flags
|= IFF_UP
;
1877 error
= suser(curthread
);
1880 if ((error
= copyin(ifr
->ifr_data
, &carpr
, sizeof carpr
)))
1883 if (sc
->sc_carpdev
) {
1887 if (sc
->sc_state
!= INIT
&& carpr
.carpr_state
!= sc
->sc_state
) {
1888 switch (carpr
.carpr_state
) {
1890 callout_stop(&sc
->sc_ad_tmo
);
1891 carp_set_state(sc
, BACKUP
);
1893 carp_setroute(sc
, RTM_DELETE
);
1896 carp_master_down_locked(sc
);
1902 if (carpr
.carpr_vhid
> 0) {
1903 if (carpr
.carpr_vhid
> 255) {
1907 if (sc
->sc_carpdev
) {
1908 struct carp_if
*cif
;
1909 cif
= (struct carp_if
*)sc
->sc_carpdev
->if_carp
;
1910 TAILQ_FOREACH(vr
, &cif
->vhif_vrs
, sc_list
)
1912 vr
->sc_vhid
== carpr
.carpr_vhid
)
1915 sc
->sc_vhid
= carpr
.carpr_vhid
;
1916 IF_LLADDR(sc
->sc_ifp
)[0] = 0;
1917 IF_LLADDR(sc
->sc_ifp
)[1] = 0;
1918 IF_LLADDR(sc
->sc_ifp
)[2] = 0x5e;
1919 IF_LLADDR(sc
->sc_ifp
)[3] = 0;
1920 IF_LLADDR(sc
->sc_ifp
)[4] = 1;
1921 IF_LLADDR(sc
->sc_ifp
)[5] = sc
->sc_vhid
;
1924 if (carpr
.carpr_advbase
> 0 || carpr
.carpr_advskew
> 0) {
1925 if (carpr
.carpr_advskew
>= 255) {
1929 if (carpr
.carpr_advbase
> 255) {
1933 sc
->sc_advbase
= carpr
.carpr_advbase
;
1934 sc
->sc_advskew
= carpr
.carpr_advskew
;
1937 bcopy(carpr
.carpr_key
, sc
->sc_key
, sizeof(sc
->sc_key
));
1947 /* XXX: lockless read */
1948 bzero(&carpr
, sizeof(carpr
));
1949 carpr
.carpr_state
= sc
->sc_state
;
1950 carpr
.carpr_vhid
= sc
->sc_vhid
;
1951 carpr
.carpr_advbase
= sc
->sc_advbase
;
1952 carpr
.carpr_advskew
= sc
->sc_advskew
;
1953 error
= suser(curthread
);
1955 bcopy(sc
->sc_key
, carpr
.carpr_key
,
1956 sizeof(carpr
.carpr_key
));
1957 error
= copyout(&carpr
, ifr
->ifr_data
, sizeof(carpr
));
1967 carp_hmac_prepare(sc
);
1973 * XXX: this is looutput. We should eventually use it from there.
1976 carp_looutput(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*dst
,
1981 M_ASSERTPKTHDR(m
); /* check if we have the packet header */
1983 if (rt
&& rt
->rt_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
1985 return (rt
->rt_flags
& RTF_BLACKHOLE
? 0 :
1986 rt
->rt_flags
& RTF_HOST
? EHOSTUNREACH
: ENETUNREACH
);
1990 ifp
->if_obytes
+= m
->m_pkthdr
.len
;
1992 /* BPF writes need to be handled specially. */
1993 if (dst
->sa_family
== AF_UNSPEC
) {
1994 bcopy(dst
->sa_data
, &af
, sizeof(af
));
1995 dst
->sa_family
= af
;
1999 switch (dst
->sa_family
) {
2007 return (EAFNOSUPPORT
);
2010 return(if_simloop(ifp
, m
, dst
->sa_family
, 0));
2014 * Start output on carp interface. This function should never be called.
2017 carp_start(struct ifnet
*ifp
)
2020 kprintf("%s: start called\n", ifp
->if_xname
);
2025 carp_output(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*sa
,
2029 struct carp_softc
*sc
;
2030 struct ifnet
*carp_ifp
;
2035 switch (sa
->sa_family
) {
2048 mtag
= m_tag_find(m
, PACKET_TAG_CARP
, NULL
);
2052 bcopy(mtag
+ 1, &carp_ifp
, sizeof(struct ifnet
*));
2053 sc
= carp_ifp
->if_softc
;
2055 /* Set the source MAC address to Virtual Router MAC Address */
2056 switch (ifp
->if_type
) {
2059 struct ether_header
*eh
;
2061 eh
= mtod(m
, struct ether_header
*);
2062 eh
->ether_shost
[0] = 0;
2063 eh
->ether_shost
[1] = 0;
2064 eh
->ether_shost
[2] = 0x5e;
2065 eh
->ether_shost
[3] = 0;
2066 eh
->ether_shost
[4] = 1;
2067 eh
->ether_shost
[5] = sc
->sc_vhid
;
2071 kprintf("%s: carp is not supported for this interface type\n",
2073 return (EOPNOTSUPP
);
2081 carp_set_state(struct carp_softc
*sc
, int state
)
2085 CARP_SCLOCK_ASSERT(sc
);
2087 if (sc
->sc_state
== state
)
2090 sc
->sc_state
= state
;
2093 SC2IFP(sc
)->if_link_state
= LINK_STATE_DOWN
;
2096 SC2IFP(sc
)->if_link_state
= LINK_STATE_UP
;
2099 SC2IFP(sc
)->if_link_state
= LINK_STATE_UNKNOWN
;
2102 rt_ifmsg(SC2IFP(sc
));
2106 carp_carpdev_state(void *v
)
2108 struct carp_if
*cif
= v
;
2111 carp_carpdev_state_locked(cif
);
2116 carp_carpdev_state_locked(struct carp_if
*cif
)
2118 struct carp_softc
*sc
;
2120 TAILQ_FOREACH(sc
, &cif
->vhif_vrs
, sc_list
)
2121 carp_sc_state_locked(sc
);
2125 carp_sc_state_locked(struct carp_softc
*sc
)
2127 CARP_SCLOCK_ASSERT(sc
);
2129 if ( !(sc
->sc_carpdev
->if_flags
& IFF_UP
)) {
2130 sc
->sc_flags_backup
= SC2IFP(sc
)->if_flags
;
2131 SC2IFP(sc
)->if_flags
&= ~IFF_UP
;
2132 SC2IFP(sc
)->if_flags
&= ~IFF_RUNNING
;
2133 callout_stop(&sc
->sc_ad_tmo
);
2134 callout_stop(&sc
->sc_md_tmo
);
2135 callout_stop(&sc
->sc_md6_tmo
);
2136 carp_set_state(sc
, INIT
);
2138 if (!sc
->sc_suppress
) {
2139 carp_suppress_preempt
++;
2140 if (carp_suppress_preempt
== 1) {
2146 sc
->sc_suppress
= 1;
2148 SC2IFP(sc
)->if_flags
|= sc
->sc_flags_backup
;
2149 carp_set_state(sc
, INIT
);
2151 if (sc
->sc_suppress
)
2152 carp_suppress_preempt
--;
2153 sc
->sc_suppress
= 0;
2160 carp_modevent(module_t mod
, int type
, void *data
)
2164 if_detach_event_tag
= EVENTHANDLER_REGISTER(ifnet_departure_event
,
2165 carp_ifdetach
, NULL
, EVENTHANDLER_PRI_ANY
);
2166 if (if_detach_event_tag
== NULL
)
2169 LIST_INIT(&carpif_list
);
2170 if_clone_attach(&carp_cloner
);
2174 EVENTHANDLER_DEREGISTER(ifnet_departure_event
, if_detach_event_tag
);
2175 if_clone_detach(&carp_cloner
);
2185 static moduledata_t carp_mod
= {
2191 DECLARE_MODULE(carp
, carp_mod
, SI_SUB_PSEUDO
, SI_ORDER_ANY
);