route: Delete ortentry, SIOC{ADD,DEL}RT and RTM_OLD{ADD,DEL}
[dragonfly.git] / sys / net / pf / if_pfsync.c
blobd927af2876375eab94f5b3645677fec43c51318a
1 /*
2 * Copyright (c) 2002 Michael Shalayeff
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_carp.h"
33 #include <sys/param.h>
34 #include <sys/endian.h>
35 #include <sys/proc.h>
36 #include <sys/priv.h>
37 #include <sys/systm.h>
38 #include <sys/time.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/module.h>
44 #include <sys/msgport2.h>
45 #include <sys/sockio.h>
46 #include <sys/thread2.h>
48 #include <machine/inttypes.h>
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/ifq_var.h>
53 #include <net/route.h>
54 #include <net/bpf.h>
55 #include <net/netisr2.h>
56 #include <net/netmsg2.h>
57 #include <netinet/in.h>
58 #include <netinet/if_ether.h>
59 #include <netinet/ip_carp.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_seq.h>
63 #ifdef INET
64 #include <netinet/in_systm.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #endif
70 #ifdef INET6
71 #include <netinet6/nd6.h>
72 #endif /* INET6 */
74 #include <net/pf/pfvar.h>
75 #include <net/pf/if_pfsync.h>
77 #define PFSYNCNAME "pfsync"
79 #define PFSYNC_MINMTU \
80 (sizeof(struct pfsync_header) + sizeof(struct pf_state))
82 #ifdef PFSYNCDEBUG
83 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0)
84 int pfsyncdebug;
85 #else
86 #define DPRINTF(x)
87 #endif
89 struct pfsync_softc *pfsyncif = NULL;
90 struct pfsyncstats pfsyncstats;
92 void pfsyncattach(int);
93 static int pfsync_clone_destroy(struct ifnet *);
94 static int pfsync_clone_create(struct if_clone *, int, caddr_t);
95 void pfsync_setmtu(struct pfsync_softc *, int);
96 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
97 struct pf_state_peer *);
98 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
99 struct rtentry *);
100 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
101 void pfsyncstart(struct ifnet *, struct ifaltq_subque *);
103 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
104 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
105 int pfsync_sendout(struct pfsync_softc *);
106 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
107 void pfsync_timeout(void *);
108 void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
109 void pfsync_bulk_update(void *);
110 void pfsync_bulkfail(void *);
112 static struct in_multi *pfsync_in_addmulti(struct ifnet *);
113 static void pfsync_in_delmulti(struct in_multi *);
115 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
116 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
118 int pfsync_sync_ok;
120 struct if_clone pfsync_cloner =
121 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1);
123 void
124 pfsyncattach(int npfsync)
126 if_clone_attach(&pfsync_cloner);
128 static int
129 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
131 struct pfsync_softc *sc;
132 struct ifnet *ifp;
134 lwkt_gettoken(&pf_token);
136 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO);
137 pfsync_sync_ok = 1;
138 sc->sc_mbuf = NULL;
139 sc->sc_mbuf_net = NULL;
140 sc->sc_mbuf_tdb = NULL;
141 sc->sc_statep.s = NULL;
142 sc->sc_statep_net.s = NULL;
143 sc->sc_statep_tdb.t = NULL;
144 sc->sc_maxupdates = 128;
145 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP);
146 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
147 sc->sc_ureq_received = 0;
148 sc->sc_ureq_sent = 0;
149 sc->sc_bulk_send_next = NULL;
150 sc->sc_bulk_terminator = NULL;
151 sc->sc_bulk_send_cpu = 0;
152 sc->sc_bulk_terminator_cpu = 0;
153 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
154 lwkt_reltoken(&pf_token);
155 ifp = &sc->sc_if;
156 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
157 if_initname(ifp, ifc->ifc_name, unit);
158 ifp->if_ioctl = pfsyncioctl;
159 ifp->if_output = pfsyncoutput;
160 ifp->if_start = pfsyncstart;
161 ifp->if_type = IFT_PFSYNC;
162 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
163 ifp->if_hdrlen = PFSYNC_HDRLEN;
164 ifp->if_baudrate = IF_Mbps(100);
165 ifp->if_softc = sc;
166 pfsync_setmtu(sc, MCLBYTES);
167 callout_init(&sc->sc_tmo);
168 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */
169 callout_init(&sc->sc_bulk_tmo);
170 callout_init(&sc->sc_bulkfail_tmo);
171 if_attach(ifp, NULL);
173 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
176 #if NCARP > 0
177 if_addgroup(ifp, "carp");
178 #endif
180 #if NBPFILTER > 0
181 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
182 #endif
183 lwkt_gettoken(&pf_token);
185 lwkt_reltoken(&pf_token);
186 return (0);
189 static int
190 pfsync_clone_destroy(struct ifnet *ifp)
192 struct netmsg_base msg;
194 lwkt_gettoken(&pf_token);
195 lwkt_reltoken(&pf_token);
197 struct pfsync_softc *sc = ifp->if_softc;
198 callout_stop(&sc->sc_tmo);
199 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */
200 callout_stop(&sc->sc_bulk_tmo);
201 callout_stop(&sc->sc_bulkfail_tmo);
202 #if NCARP > 0
203 if (!pfsync_sync_ok)
204 carp_group_demote_adj(&sc->sc_if, -1);
205 #endif
207 /* Unpend async sendouts. */
208 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler);
209 netisr_domsg(&msg, 0);
211 #if NBPFILTER > 0
212 bpfdetach(ifp);
213 #endif
214 if_detach(ifp);
215 lwkt_gettoken(&pf_token);
216 LIST_REMOVE(sc, sc_next);
217 kfree(sc, M_PFSYNC);
218 lwkt_reltoken(&pf_token);
221 return 0;
225 * Start output on the pfsync interface.
227 void
228 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq)
230 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
231 ifsq_purge(ifsq);
235 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
236 struct pf_state_peer *d)
238 if (s->scrub.scrub_flag && d->scrub == NULL) {
239 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO);
241 if (d->scrub == NULL)
242 return (ENOMEM);
245 return (0);
248 void
249 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
251 bzero(sp, sizeof(struct pfsync_state));
253 /* copy from state key */
254 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
255 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
256 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
257 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
258 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
259 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
260 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
261 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
262 sp->proto = st->key[PF_SK_WIRE]->proto;
263 sp->af = st->key[PF_SK_WIRE]->af;
265 /* copy from state */
266 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
267 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
268 sp->creation = htonl(time_second - st->creation);
269 sp->expire = pf_state_expires(st);
270 if (sp->expire <= time_second)
271 sp->expire = htonl(0);
272 else
273 sp->expire = htonl(sp->expire - time_second);
275 sp->direction = st->direction;
276 sp->log = st->log;
277 sp->cpuid = st->cpuid;
278 sp->pickup_mode = st->pickup_mode;
279 sp->timeout = st->timeout;
280 sp->state_flags = st->state_flags;
281 if (st->src_node)
282 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
283 if (st->nat_src_node)
284 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
286 bcopy(&st->id, &sp->id, sizeof(sp->id));
287 sp->creatorid = st->creatorid;
288 pf_state_peer_hton(&st->src, &sp->src);
289 pf_state_peer_hton(&st->dst, &sp->dst);
291 if (st->rule.ptr == NULL)
292 sp->rule = htonl(-1);
293 else
294 sp->rule = htonl(st->rule.ptr->nr);
295 if (st->anchor.ptr == NULL)
296 sp->anchor = htonl(-1);
297 else
298 sp->anchor = htonl(st->anchor.ptr->nr);
299 if (st->nat_rule.ptr == NULL)
300 sp->nat_rule = htonl(-1);
301 else
302 sp->nat_rule = htonl(st->nat_rule.ptr->nr);
304 pf_state_counter_hton(st->packets[0], sp->packets[0]);
305 pf_state_counter_hton(st->packets[1], sp->packets[1]);
306 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
307 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
312 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
314 struct pf_state *st = NULL;
315 struct pf_state_key *skw = NULL, *sks = NULL;
316 struct pf_rule *r = NULL;
317 struct pfi_kif *kif;
318 int pool_flags;
319 int error;
321 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
322 kprintf("pfsync_insert_net_state: invalid creator id:"
323 " %08x\n", ntohl(sp->creatorid));
324 return (EINVAL);
327 if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
328 if (pf_status.debug >= PF_DEBUG_MISC)
329 kprintf("pfsync_insert_net_state: "
330 "unknown interface: %s\n", sp->ifname);
331 if (flags & PFSYNC_SI_IOCTL)
332 return (EINVAL);
333 return (0); /* skip this state */
337 * If the ruleset checksums match or the state is coming from the ioctl,
338 * it's safe to associate the state with the rule of that number.
340 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
341 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
342 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
343 r = pf_main_ruleset.rules[
344 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
345 else
346 r = &pf_default_rule;
348 if ((r->max_states && r->states_cur >= r->max_states))
349 goto cleanup;
351 if (flags & PFSYNC_SI_IOCTL)
352 pool_flags = M_WAITOK | M_NULLOK | M_ZERO;
353 else
354 pool_flags = M_WAITOK | M_ZERO;
356 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL)
357 goto cleanup;
358 lockinit(&st->lk, "pfstlk", 0, 0);
360 if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
361 goto cleanup;
363 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
364 &sp->key[PF_SK_STACK].addr[0], sp->af) ||
365 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
366 &sp->key[PF_SK_STACK].addr[1], sp->af) ||
367 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
368 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
369 if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
370 goto cleanup;
371 } else
372 sks = skw;
374 /* allocate memory for scrub info */
375 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
376 pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
377 goto cleanup;
379 /* copy to state key(s) */
380 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
381 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
382 skw->port[0] = sp->key[PF_SK_WIRE].port[0];
383 skw->port[1] = sp->key[PF_SK_WIRE].port[1];
384 skw->proto = sp->proto;
385 skw->af = sp->af;
386 if (sks != skw) {
387 sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
388 sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
389 sks->port[0] = sp->key[PF_SK_STACK].port[0];
390 sks->port[1] = sp->key[PF_SK_STACK].port[1];
391 sks->proto = sp->proto;
392 sks->af = sp->af;
395 /* copy to state */
396 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
397 st->creation = time_second - ntohl(sp->creation);
398 st->expire = time_second;
399 if (sp->expire) {
400 /* XXX No adaptive scaling. */
401 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
404 st->expire = ntohl(sp->expire) + time_second;
405 st->direction = sp->direction;
406 st->log = sp->log;
407 st->timeout = sp->timeout;
408 st->state_flags = sp->state_flags;
409 if (!(flags & PFSYNC_SI_IOCTL))
410 st->sync_flags = PFSTATE_FROMSYNC;
412 bcopy(sp->id, &st->id, sizeof(st->id));
413 st->creatorid = sp->creatorid;
414 pf_state_peer_ntoh(&sp->src, &st->src);
415 pf_state_peer_ntoh(&sp->dst, &st->dst);
417 st->rule.ptr = r;
418 st->nat_rule.ptr = NULL;
419 st->anchor.ptr = NULL;
420 st->rt_kif = NULL;
422 st->pfsync_time = 0;
425 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
426 r->states_cur++;
427 r->states_tot++;
429 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
430 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
431 r->states_cur--;
432 goto cleanup_state;
435 return (0);
437 cleanup:
438 error = ENOMEM;
439 if (skw == sks)
440 sks = NULL;
441 if (skw != NULL)
442 kfree(skw, M_PFSYNC);
443 if (sks != NULL)
444 kfree(sks, M_PFSYNC);
446 cleanup_state: /* pf_state_insert frees the state keys */
447 if (st) {
448 if (st->dst.scrub)
449 kfree(st->dst.scrub, M_PFSYNC);
450 if (st->src.scrub)
451 kfree(st->src.scrub, M_PFSYNC);
452 kfree(st, M_PFSYNC);
454 return (error);
457 void
458 pfsync_input(struct mbuf *m, ...)
460 struct ip *ip = mtod(m, struct ip *);
461 struct pfsync_header *ph;
462 struct pfsync_softc *sc = pfsyncif;
463 struct pf_state *st;
464 struct pf_state_key *sk;
465 struct pf_state_item *si;
466 struct pf_state_cmp id_key;
467 struct pfsync_state *sp;
468 struct pfsync_state_upd *up;
469 struct pfsync_state_del *dp;
470 struct pfsync_state_clr *cp;
471 struct pfsync_state_upd_req *rup;
472 struct pfsync_state_bus *bus;
473 #ifdef IPSEC
474 struct pfsync_tdb *pt;
475 #endif
476 struct in_addr src;
477 struct mbuf *mp;
478 int iplen, action, error, i, count, offp, sfail, stale = 0;
479 u_int8_t flags = 0;
481 /* This function is not yet called from anywhere */
482 /* Still we assume for safety that pf_token must be held */
483 ASSERT_LWKT_TOKEN_HELD(&pf_token);
485 pfsyncstats.pfsyncs_ipackets++;
487 /* verify that we have a sync interface configured */
488 if (!sc || !sc->sc_sync_ifp || !pf_status.running)
489 goto done;
491 /* verify that the packet came in on the right interface */
492 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
493 pfsyncstats.pfsyncs_badif++;
494 goto done;
497 /* verify that the IP TTL is 255. */
498 if (ip->ip_ttl != PFSYNC_DFLTTL) {
499 pfsyncstats.pfsyncs_badttl++;
500 goto done;
503 iplen = ip->ip_hl << 2;
505 if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
506 pfsyncstats.pfsyncs_hdrops++;
507 goto done;
510 if (iplen + sizeof(*ph) > m->m_len) {
511 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
512 pfsyncstats.pfsyncs_hdrops++;
513 goto done;
515 ip = mtod(m, struct ip *);
517 ph = (struct pfsync_header *)((char *)ip + iplen);
519 /* verify the version */
520 if (ph->version != PFSYNC_VERSION) {
521 pfsyncstats.pfsyncs_badver++;
522 goto done;
525 action = ph->action;
526 count = ph->count;
528 /* make sure it's a valid action code */
529 if (action >= PFSYNC_ACT_MAX) {
530 pfsyncstats.pfsyncs_badact++;
531 goto done;
534 /* Cheaper to grab this now than having to mess with mbufs later */
535 src = ip->ip_src;
537 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
538 flags |= PFSYNC_SI_CKSUM;
540 switch (action) {
541 case PFSYNC_ACT_CLR: {
542 struct pf_state *nexts;
543 struct pf_state_key *nextsk;
544 struct pfi_kif *kif;
545 globaldata_t save_gd = mycpu;
546 int nn;
548 u_int32_t creatorid;
549 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
550 sizeof(*cp), &offp)) == NULL) {
551 pfsyncstats.pfsyncs_badlen++;
552 return;
554 cp = (struct pfsync_state_clr *)(mp->m_data + offp);
555 creatorid = cp->creatorid;
557 crit_enter();
558 if (cp->ifname[0] == '\0') {
559 lwkt_gettoken(&pf_token);
560 for (nn = 0; nn < ncpus; ++nn) {
561 lwkt_setcpu_self(globaldata_find(nn));
562 for (st = RB_MIN(pf_state_tree_id,
563 &tree_id[nn]);
564 st; st = nexts) {
565 nexts = RB_NEXT(pf_state_tree_id,
566 &tree_id[n], st);
567 if (st->creatorid == creatorid) {
568 st->sync_flags |=
569 PFSTATE_FROMSYNC;
570 pf_unlink_state(st);
574 lwkt_setcpu_self(save_gd);
575 lwkt_reltoken(&pf_token);
576 } else {
577 if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
578 crit_exit();
579 return;
581 /* XXX correct? */
582 lwkt_gettoken(&pf_token);
583 for (nn = 0; nn < ncpus; ++nn) {
584 lwkt_setcpu_self(globaldata_find(nn));
585 for (sk = RB_MIN(pf_state_tree,
586 &pf_statetbl[nn]);
588 sk = nextsk) {
589 nextsk = RB_NEXT(pf_state_tree,
590 &pf_statetbl[n], sk);
591 TAILQ_FOREACH(si, &sk->states, entry) {
592 if (si->s->creatorid ==
593 creatorid) {
594 si->s->sync_flags |=
595 PFSTATE_FROMSYNC;
596 pf_unlink_state(si->s);
601 lwkt_setcpu_self(save_gd);
602 lwkt_reltoken(&pf_token);
604 crit_exit();
606 break;
608 case PFSYNC_ACT_INS:
609 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
610 count * sizeof(*sp), &offp)) == NULL) {
611 pfsyncstats.pfsyncs_badlen++;
612 return;
615 crit_enter();
616 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
617 i < count; i++, sp++) {
618 /* check for invalid values */
619 if (sp->timeout >= PFTM_MAX ||
620 sp->src.state > PF_TCPS_PROXY_DST ||
621 sp->dst.state > PF_TCPS_PROXY_DST ||
622 sp->direction > PF_OUT ||
623 (sp->af != AF_INET && sp->af != AF_INET6)) {
624 if (pf_status.debug >= PF_DEBUG_MISC)
625 kprintf("pfsync_insert: PFSYNC_ACT_INS: "
626 "invalid value\n");
627 pfsyncstats.pfsyncs_badval++;
628 continue;
631 if ((error = pfsync_state_import(sp, flags))) {
632 if (error == ENOMEM) {
633 crit_exit();
634 goto done;
638 crit_exit();
639 break;
640 case PFSYNC_ACT_UPD:
641 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
642 count * sizeof(*sp), &offp)) == NULL) {
643 pfsyncstats.pfsyncs_badlen++;
644 return;
647 crit_enter();
648 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
649 i < count; i++, sp++) {
650 int flags = PFSYNC_FLAG_STALE;
652 /* check for invalid values */
653 if (sp->timeout >= PFTM_MAX ||
654 sp->src.state > PF_TCPS_PROXY_DST ||
655 sp->dst.state > PF_TCPS_PROXY_DST) {
656 if (pf_status.debug >= PF_DEBUG_MISC)
657 kprintf("pfsync_insert: PFSYNC_ACT_UPD: "
658 "invalid value\n");
659 pfsyncstats.pfsyncs_badval++;
660 continue;
663 bcopy(sp->id, &id_key.id, sizeof(id_key.id));
664 id_key.creatorid = sp->creatorid;
666 st = pf_find_state_byid(&id_key);
667 if (st == NULL) {
668 /* insert the update */
669 if (pfsync_state_import(sp, flags))
670 pfsyncstats.pfsyncs_badstate++;
671 continue;
673 sk = st->key[PF_SK_WIRE]; /* XXX right one? */
674 sfail = 0;
675 if (sk->proto == IPPROTO_TCP) {
677 * The state should never go backwards except
678 * for syn-proxy states. Neither should the
679 * sequence window slide backwards.
681 if (st->src.state > sp->src.state &&
682 (st->src.state < PF_TCPS_PROXY_SRC ||
683 sp->src.state >= PF_TCPS_PROXY_SRC))
684 sfail = 1;
685 else if (SEQ_GT(st->src.seqlo,
686 ntohl(sp->src.seqlo)))
687 sfail = 3;
688 else if (st->dst.state > sp->dst.state) {
689 /* There might still be useful
690 * information about the src state here,
691 * so import that part of the update,
692 * then "fail" so we send the updated
693 * state back to the peer who is missing
694 * our what we know. */
695 pf_state_peer_ntoh(&sp->src, &st->src);
696 /* XXX do anything with timeouts? */
697 sfail = 7;
698 flags = 0;
699 } else if (st->dst.state >= TCPS_SYN_SENT &&
700 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
701 sfail = 4;
702 } else {
704 * Non-TCP protocol state machine always go
705 * forwards
707 if (st->src.state > sp->src.state)
708 sfail = 5;
709 else if (st->dst.state > sp->dst.state)
710 sfail = 6;
712 if (sfail) {
713 if (pf_status.debug >= PF_DEBUG_MISC)
714 kprintf("pfsync: %s stale update "
715 "(%d) id: %016jx "
716 "creatorid: %08x\n",
717 (sfail < 7 ? "ignoring"
718 : "partial"), sfail,
719 (uintmax_t)be64toh(st->id),
720 ntohl(st->creatorid));
721 pfsyncstats.pfsyncs_stale++;
723 if (!(sp->sync_flags & PFSTATE_STALE)) {
724 /* we have a better state, send it */
725 if (sc->sc_mbuf != NULL && !stale)
726 pfsync_sendout(sc);
727 stale++;
728 if (!st->sync_flags)
729 pfsync_pack_state(
730 PFSYNC_ACT_UPD, st, flags);
732 continue;
734 pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
735 pf_state_peer_ntoh(&sp->src, &st->src);
736 pf_state_peer_ntoh(&sp->dst, &st->dst);
737 st->expire = ntohl(sp->expire) + time_second;
738 st->timeout = sp->timeout;
740 if (stale && sc->sc_mbuf != NULL)
741 pfsync_sendout(sc);
742 crit_exit();
743 break;
745 * It's not strictly necessary for us to support the "uncompressed"
746 * delete action, but it's relatively simple and maintains consistency.
748 case PFSYNC_ACT_DEL:
749 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
750 count * sizeof(*sp), &offp)) == NULL) {
751 pfsyncstats.pfsyncs_badlen++;
752 return;
755 crit_enter();
756 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
757 i < count; i++, sp++) {
758 bcopy(sp->id, &id_key.id, sizeof(id_key.id));
759 id_key.creatorid = sp->creatorid;
761 st = pf_find_state_byid(&id_key);
762 if (st == NULL) {
763 pfsyncstats.pfsyncs_badstate++;
764 continue;
766 st->sync_flags |= PFSTATE_FROMSYNC;
767 pf_unlink_state(st);
769 crit_exit();
770 break;
771 case PFSYNC_ACT_UPD_C: {
772 int update_requested = 0;
774 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
775 count * sizeof(*up), &offp)) == NULL) {
776 pfsyncstats.pfsyncs_badlen++;
777 return;
780 crit_enter();
781 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
782 i < count; i++, up++) {
783 /* check for invalid values */
784 if (up->timeout >= PFTM_MAX ||
785 up->src.state > PF_TCPS_PROXY_DST ||
786 up->dst.state > PF_TCPS_PROXY_DST) {
787 if (pf_status.debug >= PF_DEBUG_MISC)
788 kprintf("pfsync_insert: "
789 "PFSYNC_ACT_UPD_C: "
790 "invalid value\n");
791 pfsyncstats.pfsyncs_badval++;
792 continue;
795 bcopy(up->id, &id_key.id, sizeof(id_key.id));
796 id_key.creatorid = up->creatorid;
798 st = pf_find_state_byid(&id_key);
799 if (st == NULL) {
800 /* We don't have this state. Ask for it. */
801 error = pfsync_request_update(up, &src);
802 if (error == ENOMEM) {
803 crit_exit();
804 goto done;
806 update_requested = 1;
807 pfsyncstats.pfsyncs_badstate++;
808 continue;
810 sk = st->key[PF_SK_WIRE]; /* XXX right one? */
811 sfail = 0;
812 if (sk->proto == IPPROTO_TCP) {
814 * The state should never go backwards except
815 * for syn-proxy states. Neither should the
816 * sequence window slide backwards.
818 if (st->src.state > up->src.state &&
819 (st->src.state < PF_TCPS_PROXY_SRC ||
820 up->src.state >= PF_TCPS_PROXY_SRC))
821 sfail = 1;
822 else if (st->dst.state > up->dst.state)
823 sfail = 2;
824 else if (SEQ_GT(st->src.seqlo,
825 ntohl(up->src.seqlo)))
826 sfail = 3;
827 else if (st->dst.state >= TCPS_SYN_SENT &&
828 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
829 sfail = 4;
830 } else {
832 * Non-TCP protocol state machine always go
833 * forwards
835 if (st->src.state > up->src.state)
836 sfail = 5;
837 else if (st->dst.state > up->dst.state)
838 sfail = 6;
840 if (sfail) {
841 if (pf_status.debug >= PF_DEBUG_MISC)
842 kprintf("pfsync: ignoring stale update "
843 "(%d) id: %016" PRIx64 " "
844 "creatorid: %08x\n", sfail,
845 be64toh(st->id),
846 ntohl(st->creatorid));
847 pfsyncstats.pfsyncs_stale++;
849 /* we have a better state, send it out */
850 if ((!stale || update_requested) &&
851 sc->sc_mbuf != NULL) {
852 pfsync_sendout(sc);
853 update_requested = 0;
855 stale++;
856 if (!st->sync_flags)
857 pfsync_pack_state(PFSYNC_ACT_UPD, st,
858 PFSYNC_FLAG_STALE);
859 continue;
861 pfsync_alloc_scrub_memory(&up->dst, &st->dst);
862 pf_state_peer_ntoh(&up->src, &st->src);
863 pf_state_peer_ntoh(&up->dst, &st->dst);
864 st->expire = ntohl(up->expire) + time_second;
865 st->timeout = up->timeout;
867 if ((update_requested || stale) && sc->sc_mbuf)
868 pfsync_sendout(sc);
869 crit_exit();
870 break;
872 case PFSYNC_ACT_DEL_C:
873 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
874 count * sizeof(*dp), &offp)) == NULL) {
875 pfsyncstats.pfsyncs_badlen++;
876 return;
879 crit_enter();
880 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
881 i < count; i++, dp++) {
882 bcopy(dp->id, &id_key.id, sizeof(id_key.id));
883 id_key.creatorid = dp->creatorid;
885 st = pf_find_state_byid(&id_key);
886 if (st == NULL) {
887 pfsyncstats.pfsyncs_badstate++;
888 continue;
890 st->sync_flags |= PFSTATE_FROMSYNC;
891 pf_unlink_state(st);
893 crit_exit();
894 break;
895 case PFSYNC_ACT_INS_F:
896 case PFSYNC_ACT_DEL_F:
897 /* not implemented */
898 break;
899 case PFSYNC_ACT_UREQ:
900 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
901 count * sizeof(*rup), &offp)) == NULL) {
902 pfsyncstats.pfsyncs_badlen++;
903 return;
906 crit_enter();
907 if (sc->sc_mbuf != NULL)
908 pfsync_sendout(sc);
909 for (i = 0,
910 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
911 i < count; i++, rup++) {
912 bcopy(rup->id, &id_key.id, sizeof(id_key.id));
913 id_key.creatorid = rup->creatorid;
915 if (id_key.id == 0 && id_key.creatorid == 0) {
916 sc->sc_ureq_received = mycpu->gd_time_seconds;
917 if (sc->sc_bulk_send_next == NULL) {
918 if (++sc->sc_bulk_send_cpu >= ncpus)
919 sc->sc_bulk_send_cpu = 0;
920 sc->sc_bulk_send_next =
921 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]);
923 sc->sc_bulk_terminator =
924 sc->sc_bulk_send_next;
925 sc->sc_bulk_terminator_cpu =
926 sc->sc_bulk_send_cpu;
927 if (pf_status.debug >= PF_DEBUG_MISC)
928 kprintf("pfsync: received "
929 "bulk update request\n");
930 pfsync_send_bus(sc, PFSYNC_BUS_START);
931 lwkt_reltoken(&pf_token);
932 callout_init(&sc->sc_bulk_tmo);
933 lwkt_gettoken(&pf_token);
934 } else {
935 st = pf_find_state_byid(&id_key);
936 if (st == NULL) {
937 pfsyncstats.pfsyncs_badstate++;
938 continue;
940 if (!st->sync_flags)
941 pfsync_pack_state(PFSYNC_ACT_UPD,
942 st, 0);
945 if (sc->sc_mbuf != NULL)
946 pfsync_sendout(sc);
947 crit_exit();
948 break;
949 case PFSYNC_ACT_BUS:
950 /* If we're not waiting for a bulk update, who cares. */
951 if (sc->sc_ureq_sent == 0)
952 break;
954 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
955 sizeof(*bus), &offp)) == NULL) {
956 pfsyncstats.pfsyncs_badlen++;
957 return;
959 bus = (struct pfsync_state_bus *)(mp->m_data + offp);
960 switch (bus->status) {
961 case PFSYNC_BUS_START:
962 lwkt_reltoken(&pf_token);
963 callout_reset(&sc->sc_bulkfail_tmo,
964 pf_pool_limits[PF_LIMIT_STATES].limit /
965 (PFSYNC_BULKPACKETS * sc->sc_maxcount),
966 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
967 lwkt_gettoken(&pf_token);
968 if (pf_status.debug >= PF_DEBUG_MISC)
969 kprintf("pfsync: received bulk "
970 "update start\n");
971 break;
972 case PFSYNC_BUS_END:
973 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >=
974 sc->sc_ureq_sent) {
975 /* that's it, we're happy */
976 sc->sc_ureq_sent = 0;
977 sc->sc_bulk_tries = 0;
978 lwkt_reltoken(&pf_token);
979 callout_stop(&sc->sc_bulkfail_tmo);
980 lwkt_gettoken(&pf_token);
981 #if NCARP > 0
982 if (!pfsync_sync_ok) {
983 lwkt_reltoken(&pf_token);
984 carp_group_demote_adj(&sc->sc_if, -1);
985 lwkt_gettoken(&pf_token);
987 #endif
988 pfsync_sync_ok = 1;
989 if (pf_status.debug >= PF_DEBUG_MISC)
990 kprintf("pfsync: received valid "
991 "bulk update end\n");
992 } else {
993 if (pf_status.debug >= PF_DEBUG_MISC)
994 kprintf("pfsync: received invalid "
995 "bulk update end: bad timestamp\n");
997 break;
999 break;
1000 #ifdef IPSEC
1001 case PFSYNC_ACT_TDB_UPD:
1002 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
1003 count * sizeof(*pt), &offp)) == NULL) {
1004 pfsyncstats.pfsyncs_badlen++;
1005 return;
1007 crit_enter();
1008 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
1009 i < count; i++, pt++)
1010 pfsync_update_net_tdb(pt);
1011 crit_exit();
1012 break;
1013 #endif
1016 done:
1017 if (m)
1018 m_freem(m);
1022 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1023 struct rtentry *rt)
1025 m_freem(m);
1026 return (0);
1029 /* ARGSUSED */
1031 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
1033 struct pfsync_softc *sc = ifp->if_softc;
1034 struct ifreq *ifr = (struct ifreq *)data;
1035 struct ip_moptions *imo = &sc->sc_imo;
1036 struct pfsyncreq pfsyncr;
1037 struct ifnet *sifp;
1038 int error;
1040 lwkt_gettoken(&pf_token);
1042 switch (cmd) {
1043 case SIOCSIFADDR:
1044 case SIOCAIFADDR:
1045 case SIOCSIFDSTADDR:
1046 case SIOCSIFFLAGS:
1047 if (ifp->if_flags & IFF_UP)
1048 ifp->if_flags |= IFF_RUNNING;
1049 else
1050 ifp->if_flags &= ~IFF_RUNNING;
1051 break;
1052 case SIOCSIFMTU:
1053 if (ifr->ifr_mtu < PFSYNC_MINMTU) {
1054 lwkt_reltoken(&pf_token);
1055 return (EINVAL);
1057 if (ifr->ifr_mtu > MCLBYTES)
1058 ifr->ifr_mtu = MCLBYTES;
1059 crit_enter();
1060 if (ifr->ifr_mtu < ifp->if_mtu)
1061 pfsync_sendout(sc);
1062 pfsync_setmtu(sc, ifr->ifr_mtu);
1063 crit_exit();
1064 break;
1065 case SIOCGETPFSYNC:
1066 bzero(&pfsyncr, sizeof(pfsyncr));
1067 if (sc->sc_sync_ifp)
1068 strlcpy(pfsyncr.pfsyncr_syncdev,
1069 sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1070 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1071 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1072 lwkt_reltoken(&pf_token);
1073 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1074 return (error);
1075 lwkt_gettoken(&pf_token);
1076 break;
1077 case SIOCSETPFSYNC:
1078 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) {
1079 lwkt_reltoken(&pf_token);
1080 return (error);
1082 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) {
1083 lwkt_reltoken(&pf_token);
1084 return (error);
1087 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1088 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1089 else
1090 sc->sc_sync_peer.s_addr =
1091 pfsyncr.pfsyncr_syncpeer.s_addr;
1093 if (pfsyncr.pfsyncr_maxupdates > 255) {
1094 lwkt_reltoken(&pf_token);
1095 return (EINVAL);
1097 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1099 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1100 sc->sc_sync_ifp = NULL;
1101 if (sc->sc_mbuf_net != NULL) {
1102 /* Don't keep stale pfsync packets around. */
1103 crit_enter();
1104 m_freem(sc->sc_mbuf_net);
1105 sc->sc_mbuf_net = NULL;
1106 sc->sc_statep_net.s = NULL;
1107 crit_exit();
1109 if (imo->imo_num_memberships > 0) {
1110 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1111 imo->imo_multicast_ifp = NULL;
1113 break;
1117 * XXX not that MPSAFE; pfsync needs serious rework
1119 ifnet_deserialize_all(ifp);
1120 ifnet_lock();
1121 sifp = ifunit(pfsyncr.pfsyncr_syncdev);
1122 ifnet_unlock();
1123 ifnet_serialize_all(ifp);
1125 if (sifp == NULL) {
1126 lwkt_reltoken(&pf_token);
1127 return (EINVAL);
1130 crit_enter();
1131 if (sifp->if_mtu < sc->sc_if.if_mtu ||
1132 (sc->sc_sync_ifp != NULL &&
1133 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1134 sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1135 pfsync_sendout(sc);
1136 sc->sc_sync_ifp = sifp;
1138 pfsync_setmtu(sc, sc->sc_if.if_mtu);
1140 if (imo->imo_num_memberships > 0) {
1141 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1142 imo->imo_multicast_ifp = NULL;
1145 if (sc->sc_sync_ifp &&
1146 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1147 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1148 sc->sc_sync_ifp = NULL;
1149 lwkt_reltoken(&pf_token);
1150 crit_exit();
1151 return (EADDRNOTAVAIL);
1154 if ((imo->imo_membership[0] =
1155 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) {
1156 sc->sc_sync_ifp = NULL;
1157 lwkt_reltoken(&pf_token);
1158 crit_exit();
1159 return (ENOBUFS);
1161 imo->imo_num_memberships++;
1162 imo->imo_multicast_ifp = sc->sc_sync_ifp;
1163 imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1164 imo->imo_multicast_loop = 0;
1167 if (sc->sc_sync_ifp ||
1168 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1169 /* Request a full state table update. */
1170 sc->sc_ureq_sent = mycpu->gd_time_seconds;
1171 #if NCARP > 0
1172 if (pfsync_sync_ok)
1173 carp_group_demote_adj(&sc->sc_if, 1);
1174 #endif
1175 pfsync_sync_ok = 0;
1176 if (pf_status.debug >= PF_DEBUG_MISC)
1177 kprintf("pfsync: requesting bulk update\n");
1178 lwkt_reltoken(&pf_token);
1179 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1180 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1181 lwkt_gettoken(&pf_token);
1182 error = pfsync_request_update(NULL, NULL);
1183 if (error == ENOMEM) {
1184 lwkt_reltoken(&pf_token);
1185 crit_exit();
1186 return (ENOMEM);
1188 pfsync_sendout(sc);
1190 crit_exit();
1192 break;
1194 default:
1195 lwkt_reltoken(&pf_token);
1196 return (ENOTTY);
1199 lwkt_reltoken(&pf_token);
1200 return (0);
1203 void
1204 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1206 int mtu;
1208 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1209 mtu = sc->sc_sync_ifp->if_mtu;
1210 else
1211 mtu = mtu_req;
1213 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1214 sizeof(struct pfsync_state);
1215 if (sc->sc_maxcount > 254)
1216 sc->sc_maxcount = 254;
1217 sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1218 sc->sc_maxcount * sizeof(struct pfsync_state);
1221 struct mbuf *
1222 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1224 struct pfsync_header *h;
1225 struct mbuf *m;
1226 int len;
1228 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1230 MGETHDR(m, M_WAITOK, MT_DATA);
1231 if (m == NULL) {
1232 IFNET_STAT_INC(&sc->sc_if, oerrors, 1);
1233 return (NULL);
1236 switch (action) {
1237 case PFSYNC_ACT_CLR:
1238 len = sizeof(struct pfsync_header) +
1239 sizeof(struct pfsync_state_clr);
1240 break;
1241 case PFSYNC_ACT_UPD_C:
1242 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1243 sizeof(struct pfsync_header);
1244 break;
1245 case PFSYNC_ACT_DEL_C:
1246 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1247 sizeof(struct pfsync_header);
1248 break;
1249 case PFSYNC_ACT_UREQ:
1250 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1251 sizeof(struct pfsync_header);
1252 break;
1253 case PFSYNC_ACT_BUS:
1254 len = sizeof(struct pfsync_header) +
1255 sizeof(struct pfsync_state_bus);
1256 break;
1257 case PFSYNC_ACT_TDB_UPD:
1258 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
1259 sizeof(struct pfsync_header);
1260 break;
1261 default:
1262 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1263 sizeof(struct pfsync_header);
1264 break;
1267 if (len > MHLEN) {
1268 MCLGET(m, M_WAITOK);
1269 if ((m->m_flags & M_EXT) == 0) {
1270 m_free(m);
1271 IFNET_STAT_INC(&sc->sc_if, oerrors, 1);
1272 return (NULL);
1274 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1275 } else
1276 MH_ALIGN(m, len);
1278 m->m_pkthdr.rcvif = NULL;
1279 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1280 h = mtod(m, struct pfsync_header *);
1281 h->version = PFSYNC_VERSION;
1282 h->af = 0;
1283 h->count = 0;
1284 h->action = action;
1286 *sp = (void *)((char *)h + PFSYNC_HDRLEN);
1287 lwkt_reltoken(&pf_token);
1288 callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1289 LIST_FIRST(&pfsync_list));
1290 lwkt_gettoken(&pf_token);
1291 return (m);
1295 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1297 struct ifnet *ifp = NULL;
1298 struct pfsync_softc *sc = pfsyncif;
1299 struct pfsync_header *h, *h_net;
1300 struct pfsync_state *sp = NULL;
1301 struct pfsync_state_upd *up = NULL;
1302 struct pfsync_state_del *dp = NULL;
1303 int ret = 0;
1304 u_int8_t i = 255, newaction = 0;
1306 if (sc == NULL)
1307 return (0);
1308 ifp = &sc->sc_if;
1311 * If a packet falls in the forest and there's nobody around to
1312 * hear, does it make a sound?
1314 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1315 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1316 /* Don't leave any stale pfsync packets hanging around. */
1317 if (sc->sc_mbuf != NULL) {
1318 m_freem(sc->sc_mbuf);
1319 sc->sc_mbuf = NULL;
1320 sc->sc_statep.s = NULL;
1322 return (0);
1325 if (action >= PFSYNC_ACT_MAX)
1326 return (EINVAL);
1328 crit_enter();
1329 if (sc->sc_mbuf == NULL) {
1330 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1331 (void *)&sc->sc_statep.s)) == NULL) {
1332 crit_exit();
1333 return (ENOMEM);
1335 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1336 } else {
1337 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1338 if (h->action != action) {
1339 pfsync_sendout(sc);
1340 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1341 (void *)&sc->sc_statep.s)) == NULL) {
1342 crit_exit();
1343 return (ENOMEM);
1345 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1346 } else {
1348 * If it's an update, look in the packet to see if
1349 * we already have an update for the state.
1351 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1352 struct pfsync_state *usp =
1353 (void *)((char *)h + PFSYNC_HDRLEN);
1355 for (i = 0; i < h->count; i++) {
1356 if (!memcmp(usp->id, &st->id,
1357 PFSYNC_ID_LEN) &&
1358 usp->creatorid == st->creatorid) {
1359 sp = usp;
1360 sp->updates++;
1361 break;
1363 usp++;
1369 st->pfsync_time = mycpu->gd_time_seconds;
1371 if (sp == NULL) {
1372 /* not a "duplicate" update */
1373 i = 255;
1374 sp = sc->sc_statep.s++;
1375 sc->sc_mbuf->m_pkthdr.len =
1376 sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1377 h->count++;
1378 bzero(sp, sizeof(*sp));
1380 pfsync_state_export(sp, st);
1382 if (flags & PFSYNC_FLAG_STALE)
1383 sp->sync_flags |= PFSTATE_STALE;
1384 } else {
1385 pf_state_peer_hton(&st->src, &sp->src);
1386 pf_state_peer_hton(&st->dst, &sp->dst);
1388 if (st->expire <= time_second)
1389 sp->expire = htonl(0);
1390 else
1391 sp->expire = htonl(st->expire - time_second);
1394 /* do we need to build "compressed" actions for network transfer? */
1395 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1396 switch (action) {
1397 case PFSYNC_ACT_UPD:
1398 newaction = PFSYNC_ACT_UPD_C;
1399 break;
1400 case PFSYNC_ACT_DEL:
1401 newaction = PFSYNC_ACT_DEL_C;
1402 break;
1403 default:
1404 /* by default we just send the uncompressed states */
1405 break;
1409 if (newaction) {
1410 if (sc->sc_mbuf_net == NULL) {
1411 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1412 (void *)&sc->sc_statep_net.s)) == NULL) {
1413 crit_exit();
1414 return (ENOMEM);
1417 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1419 switch (newaction) {
1420 case PFSYNC_ACT_UPD_C:
1421 if (i != 255) {
1422 up = (void *)((char *)h_net +
1423 PFSYNC_HDRLEN + (i * sizeof(*up)));
1424 up->updates++;
1425 } else {
1426 h_net->count++;
1427 sc->sc_mbuf_net->m_pkthdr.len =
1428 sc->sc_mbuf_net->m_len += sizeof(*up);
1429 up = sc->sc_statep_net.u++;
1431 bzero(up, sizeof(*up));
1432 bcopy(&st->id, up->id, sizeof(up->id));
1433 up->creatorid = st->creatorid;
1435 up->timeout = st->timeout;
1436 up->expire = sp->expire;
1437 up->src = sp->src;
1438 up->dst = sp->dst;
1439 break;
1440 case PFSYNC_ACT_DEL_C:
1441 sc->sc_mbuf_net->m_pkthdr.len =
1442 sc->sc_mbuf_net->m_len += sizeof(*dp);
1443 dp = sc->sc_statep_net.d++;
1444 h_net->count++;
1446 bzero(dp, sizeof(*dp));
1447 bcopy(&st->id, dp->id, sizeof(dp->id));
1448 dp->creatorid = st->creatorid;
1449 break;
1453 if (h->count == sc->sc_maxcount ||
1454 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1455 ret = pfsync_sendout(sc);
1457 crit_exit();
1458 return (ret);
1462 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1464 struct pfsync_header *h;
1465 struct pfsync_softc *sc = pfsyncif;
1466 struct pfsync_state_upd_req *rup;
1467 int ret = 0;
1469 if (sc == NULL)
1470 return (0);
1472 if (sc->sc_mbuf == NULL) {
1473 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1474 (void *)&sc->sc_statep.s)) == NULL)
1475 return (ENOMEM);
1476 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1477 } else {
1478 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1479 if (h->action != PFSYNC_ACT_UREQ) {
1480 pfsync_sendout(sc);
1481 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1482 (void *)&sc->sc_statep.s)) == NULL)
1483 return (ENOMEM);
1484 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1488 if (src != NULL)
1489 sc->sc_sendaddr = *src;
1490 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1491 h->count++;
1492 rup = sc->sc_statep.r++;
1493 bzero(rup, sizeof(*rup));
1494 if (up != NULL) {
1495 bcopy(up->id, rup->id, sizeof(rup->id));
1496 rup->creatorid = up->creatorid;
1499 if (h->count == sc->sc_maxcount)
1500 ret = pfsync_sendout(sc);
1502 return (ret);
1506 pfsync_clear_states(u_int32_t creatorid, char *ifname)
1508 struct pfsync_softc *sc = pfsyncif;
1509 struct pfsync_state_clr *cp;
1510 int ret;
1512 if (sc == NULL)
1513 return (0);
1515 crit_enter();
1516 if (sc->sc_mbuf != NULL)
1517 pfsync_sendout(sc);
1518 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1519 (void *)&sc->sc_statep.c)) == NULL) {
1520 crit_exit();
1521 return (ENOMEM);
1523 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1524 cp = sc->sc_statep.c;
1525 cp->creatorid = creatorid;
1526 if (ifname != NULL)
1527 strlcpy(cp->ifname, ifname, IFNAMSIZ);
1529 ret = (pfsync_sendout(sc));
1530 crit_exit();
1531 return (ret);
1534 void
1535 pfsync_timeout(void *v)
1537 struct pfsync_softc *sc = v;
1539 crit_enter();
1540 pfsync_sendout(sc);
1541 crit_exit();
1544 void
1545 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1547 struct pfsync_state_bus *bus;
1549 if (sc->sc_mbuf != NULL)
1550 pfsync_sendout(sc);
1552 if (pfsync_sync_ok &&
1553 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1554 (void *)&sc->sc_statep.b)) != NULL) {
1555 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1556 bus = sc->sc_statep.b;
1557 bus->creatorid = pf_status.hostid;
1558 bus->status = status;
1559 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received);
1560 pfsync_sendout(sc);
1564 void
1565 pfsync_bulk_update(void *v)
1567 struct pfsync_softc *sc = v;
1568 int i = 0;
1569 int cpu;
1570 struct pf_state *state;
1572 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1574 crit_enter();
1575 if (sc->sc_mbuf != NULL)
1576 pfsync_sendout(sc);
1579 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1580 * been sent since the latest request was made.
1582 state = sc->sc_bulk_send_next;
1583 cpu = sc->sc_bulk_send_cpu;
1584 if (state)
1585 do {
1586 /* send state update if syncable and not already sent */
1587 if (!state->sync_flags
1588 && state->timeout < PFTM_MAX
1589 && state->pfsync_time <= sc->sc_ureq_received) {
1590 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1591 i++;
1594 /* figure next state to send */
1595 state = TAILQ_NEXT(state, entry_list);
1597 /* wrap to start of list if we hit the end */
1598 if (state == NULL) {
1599 if (++cpu >= ncpus)
1600 cpu = 0;
1601 state = TAILQ_FIRST(&state_list[cpu]);
1603 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
1604 cpu != sc->sc_bulk_terminator_cpu &&
1605 state != sc->sc_bulk_terminator);
1607 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu &&
1608 state == sc->sc_bulk_terminator)) {
1609 /* we're done */
1610 pfsync_send_bus(sc, PFSYNC_BUS_END);
1611 sc->sc_ureq_received = 0;
1612 sc->sc_bulk_send_next = NULL;
1613 sc->sc_bulk_terminator = NULL;
1614 sc->sc_bulk_send_cpu = 0;
1615 sc->sc_bulk_terminator_cpu = 0;
1616 lwkt_reltoken(&pf_token);
1617 callout_stop(&sc->sc_bulk_tmo);
1618 lwkt_gettoken(&pf_token);
1619 if (pf_status.debug >= PF_DEBUG_MISC)
1620 kprintf("pfsync: bulk update complete\n");
1621 } else {
1622 /* look again for more in a bit */
1623 lwkt_reltoken(&pf_token);
1624 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1625 LIST_FIRST(&pfsync_list));
1626 lwkt_gettoken(&pf_token);
1627 sc->sc_bulk_send_next = state;
1628 sc->sc_bulk_send_cpu = cpu;
1630 if (sc->sc_mbuf != NULL)
1631 pfsync_sendout(sc);
1632 crit_exit();
1635 void
1636 pfsync_bulkfail(void *v)
1638 struct pfsync_softc *sc = v;
1639 int error;
1641 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1643 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1644 /* Try again in a bit */
1645 lwkt_reltoken(&pf_token);
1646 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1647 LIST_FIRST(&pfsync_list));
1648 lwkt_gettoken(&pf_token);
1649 crit_enter();
1650 error = pfsync_request_update(NULL, NULL);
1651 if (error == ENOMEM) {
1652 if (pf_status.debug >= PF_DEBUG_MISC)
1653 kprintf("pfsync: cannot allocate mbufs for "
1654 "bulk update\n");
1655 } else
1656 pfsync_sendout(sc);
1657 crit_exit();
1658 } else {
1659 /* Pretend like the transfer was ok */
1660 sc->sc_ureq_sent = 0;
1661 sc->sc_bulk_tries = 0;
1662 #if NCARP > 0
1663 if (!pfsync_sync_ok)
1664 carp_group_demote_adj(&sc->sc_if, -1);
1665 #endif
1666 pfsync_sync_ok = 1;
1667 if (pf_status.debug >= PF_DEBUG_MISC)
1668 kprintf("pfsync: failed to receive "
1669 "bulk update status\n");
1670 lwkt_reltoken(&pf_token);
1671 callout_stop(&sc->sc_bulkfail_tmo);
1672 lwkt_gettoken(&pf_token);
1676 static void
1677 pfsync_sendout_handler(netmsg_t nmsg)
1679 struct netmsg_genpkt *msg = (struct netmsg_genpkt *)nmsg;
1681 pfsync_sendout_mbuf(msg->arg1, msg->m);
1685 pfsync_sendout(struct pfsync_softc *sc)
1687 #if NBPFILTER > 0
1688 struct ifnet *ifp = &sc->sc_if;
1689 #endif
1690 struct mbuf *m;
1691 struct netmsg_genpkt *msg;
1693 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1695 lwkt_reltoken(&pf_token);
1696 callout_stop(&sc->sc_tmo);
1697 lwkt_gettoken(&pf_token);
1699 if (sc->sc_mbuf == NULL)
1700 return (0);
1701 m = sc->sc_mbuf;
1702 sc->sc_mbuf = NULL;
1703 sc->sc_statep.s = NULL;
1705 #if NBPFILTER > 0
1706 if (ifp->if_bpf) {
1707 bpf_gettoken();
1708 if (ifp->if_bpf)
1709 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1710 bpf_reltoken();
1712 #endif
1714 if (sc->sc_mbuf_net) {
1715 m_freem(m);
1716 m = sc->sc_mbuf_net;
1717 sc->sc_mbuf_net = NULL;
1718 sc->sc_statep_net.s = NULL;
1721 msg = &m->m_hdr.mh_genmsg;
1722 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0,
1723 pfsync_sendout_handler);
1724 msg->m = m;
1725 msg->arg1 = sc;
1726 netisr_sendmsg(&msg->base, 0);
1728 return (0);
1732 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
1734 struct sockaddr sa;
1735 struct ip *ip;
1737 if (sc->sc_sync_ifp ||
1738 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1739 M_PREPEND(m, sizeof(struct ip), M_WAITOK);
1740 if (m == NULL) {
1741 pfsyncstats.pfsyncs_onomem++;
1742 return (0);
1744 ip = mtod(m, struct ip *);
1745 ip->ip_v = IPVERSION;
1746 ip->ip_hl = sizeof(*ip) >> 2;
1747 ip->ip_tos = IPTOS_LOWDELAY;
1748 ip->ip_len = htons(m->m_pkthdr.len);
1749 ip->ip_id = htons(ip_randomid());
1750 ip->ip_off = htons(IP_DF);
1751 ip->ip_ttl = PFSYNC_DFLTTL;
1752 ip->ip_p = IPPROTO_PFSYNC;
1753 ip->ip_sum = 0;
1755 bzero(&sa, sizeof(sa));
1756 ip->ip_src.s_addr = INADDR_ANY;
1758 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1759 m->m_flags |= M_MCAST;
1760 ip->ip_dst = sc->sc_sendaddr;
1761 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1763 pfsyncstats.pfsyncs_opackets++;
1765 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1766 pfsyncstats.pfsyncs_oerrors++;
1767 } else
1768 m_freem(m);
1770 return (0);
1773 static int
1774 pfsync_modevent(module_t mod, int type, void *data)
1776 int error = 0;
1778 struct pfsync_softc *pfs_if, *tmp;
1780 lwkt_gettoken(&pf_token);
1782 switch (type) {
1783 case MOD_LOAD:
1784 LIST_INIT(&pfsync_list);
1785 lwkt_reltoken(&pf_token);
1786 if_clone_attach(&pfsync_cloner);
1787 lwkt_gettoken(&pf_token);
1788 /* Override the function pointer for pf_ioctl.c */
1789 break;
1791 case MOD_UNLOAD:
1792 lwkt_reltoken(&pf_token);
1793 if_clone_detach(&pfsync_cloner);
1794 lwkt_gettoken(&pf_token);
1795 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) {
1796 pfsync_clone_destroy(&pfs_if->sc_if);
1798 break;
1800 default:
1801 error = EINVAL;
1802 break;
1805 lwkt_reltoken(&pf_token);
1806 return error;
1809 static moduledata_t pfsync_mod = {
1810 "pfsync",
1811 pfsync_modevent,
1815 #define PFSYNC_MODVER 44
1817 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1818 MODULE_VERSION(pfsync, PFSYNC_MODVER);
1820 static void
1821 pfsync_in_addmulti_dispatch(netmsg_t nmsg)
1823 struct lwkt_msg *lmsg = &nmsg->lmsg;
1824 struct ifnet *ifp = lmsg->u.ms_resultp;
1825 struct in_addr addr;
1827 addr.s_addr = INADDR_PFSYNC_GROUP;
1828 lmsg->u.ms_resultp = in_addmulti(&addr, ifp);
1830 lwkt_replymsg(lmsg, 0);
1833 static struct in_multi *
1834 pfsync_in_addmulti(struct ifnet *ifp)
1836 struct netmsg_base nmsg;
1837 struct lwkt_msg *lmsg = &nmsg.lmsg;
1839 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0,
1840 pfsync_in_addmulti_dispatch);
1841 lmsg->u.ms_resultp = ifp;
1843 lwkt_domsg(netisr_cpuport(0), lmsg, 0);
1844 return lmsg->u.ms_resultp;
1847 static void
1848 pfsync_in_delmulti_dispatch(netmsg_t nmsg)
1850 struct lwkt_msg *lmsg = &nmsg->lmsg;
1852 in_delmulti(lmsg->u.ms_resultp);
1853 lwkt_replymsg(lmsg, 0);
1856 static void
1857 pfsync_in_delmulti(struct in_multi *inm)
1859 struct netmsg_base nmsg;
1860 struct lwkt_msg *lmsg = &nmsg.lmsg;
1862 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0,
1863 pfsync_in_delmulti_dispatch);
1864 lmsg->u.ms_resultp = inm;
1866 lwkt_domsg(netisr_cpuport(0), lmsg, 0);