sys/vfs/hammer2: Fix comment on bmradix in freemap
[dragonfly.git] / sys / net / pf / if_pfsync.c
blobb015bd526f8331cf5883f8e23806e50aaee27544
1 /*
2 * Copyright (c) 2002 Michael Shalayeff
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24 * THE POSSIBILITY OF SUCH DAMAGE.
26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_carp.h"
32 #include "use_bpf.h"
34 #include <sys/param.h>
35 #include <sys/endian.h>
36 #include <sys/proc.h>
37 #include <sys/priv.h>
38 #include <sys/systm.h>
39 #include <sys/time.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/module.h>
45 #include <sys/msgport2.h>
46 #include <sys/sockio.h>
47 #include <sys/thread2.h>
49 #include <machine/inttypes.h>
51 #include <net/if.h>
52 #include <net/if_types.h>
53 #include <net/ifq_var.h>
54 #include <net/route.h>
55 #include <net/bpf.h>
56 #include <net/netisr2.h>
57 #include <net/netmsg2.h>
58 #include <netinet/in.h>
59 #include <netinet/if_ether.h>
60 #include <netinet/ip_carp.h>
61 #include <netinet/tcp.h>
62 #include <netinet/tcp_seq.h>
64 #ifdef INET
65 #include <netinet/in_systm.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip_var.h>
69 #endif
71 #ifdef INET6
72 #include <netinet6/nd6.h>
73 #endif /* INET6 */
75 #include <net/pf/pfvar.h>
76 #include <net/pf/if_pfsync.h>
78 #define PFSYNCNAME "pfsync"
80 #define PFSYNC_MINMTU \
81 (sizeof(struct pfsync_header) + sizeof(struct pf_state))
83 #ifdef PFSYNCDEBUG
84 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0)
85 int pfsyncdebug;
86 #else
87 #define DPRINTF(x)
88 #endif
90 struct pfsync_softc *pfsyncif = NULL;
91 struct pfsyncstats pfsyncstats;
93 void pfsyncattach(int);
94 static int pfsync_clone_destroy(struct ifnet *);
95 static int pfsync_clone_create(struct if_clone *, int, caddr_t, caddr_t);
96 void pfsync_setmtu(struct pfsync_softc *, int);
97 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
98 struct pf_state_peer *);
99 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
100 struct rtentry *);
101 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
102 void pfsyncstart(struct ifnet *, struct ifaltq_subque *);
104 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
105 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
106 int pfsync_sendout(struct pfsync_softc *);
107 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
108 void pfsync_timeout(void *);
109 void pfsync_send_bus(struct pfsync_softc *, u_int8_t);
110 void pfsync_bulk_update(void *);
111 void pfsync_bulkfail(void *);
113 static struct in_multi *pfsync_in_addmulti(struct ifnet *);
114 static void pfsync_in_delmulti(struct in_multi *);
116 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
117 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
119 int pfsync_sync_ok;
121 struct if_clone pfsync_cloner =
122 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1);
125 void
126 pfsyncattach(int npfsync)
128 if_clone_attach(&pfsync_cloner);
131 static int
132 pfsync_clone_create(struct if_clone *ifc, int unit,
133 caddr_t params __unused, caddr_t data __unused)
135 struct pfsync_softc *sc;
136 struct ifnet *ifp;
138 lwkt_gettoken(&pf_token);
140 pfsync_sync_ok = 1;
142 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO);
143 sc->sc_mbuf = NULL;
144 sc->sc_mbuf_net = NULL;
145 sc->sc_mbuf_tdb = NULL;
146 sc->sc_statep.s = NULL;
147 sc->sc_statep_net.s = NULL;
148 sc->sc_statep_tdb.t = NULL;
149 sc->sc_maxupdates = 128;
150 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP);
151 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
152 sc->sc_ureq_received = 0;
153 sc->sc_ureq_sent = 0;
154 sc->sc_bulk_send_next = NULL;
155 sc->sc_bulk_terminator = NULL;
156 sc->sc_bulk_send_cpu = 0;
157 sc->sc_bulk_terminator_cpu = 0;
158 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
159 lwkt_reltoken(&pf_token);
161 ifp = &sc->sc_if;
162 if_initname(ifp, ifc->ifc_name, unit);
163 ifp->if_ioctl = pfsyncioctl;
164 ifp->if_output = pfsyncoutput;
165 ifp->if_start = pfsyncstart;
166 ifp->if_type = IFT_PFSYNC;
167 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
168 ifp->if_hdrlen = PFSYNC_HDRLEN;
169 ifp->if_baudrate = IF_Mbps(100);
170 ifp->if_softc = sc;
172 pfsync_setmtu(sc, MCLBYTES);
173 callout_init(&sc->sc_tmo);
174 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */
175 callout_init(&sc->sc_bulk_tmo);
176 callout_init(&sc->sc_bulkfail_tmo);
178 if_attach(ifp, NULL);
179 #if NBPF > 0
180 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
181 #endif
183 #ifdef CARP
184 if_addgroup(ifp, "carp");
185 #endif
187 lwkt_gettoken(&pf_token);
188 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
189 lwkt_reltoken(&pf_token);
191 return (0);
194 static int
195 pfsync_clone_destroy(struct ifnet *ifp)
197 struct netmsg_base msg;
199 lwkt_gettoken(&pf_token);
200 lwkt_reltoken(&pf_token);
202 struct pfsync_softc *sc = ifp->if_softc;
203 callout_stop(&sc->sc_tmo);
204 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */
205 callout_stop(&sc->sc_bulk_tmo);
206 callout_stop(&sc->sc_bulkfail_tmo);
207 #ifdef CARP
208 if (!pfsync_sync_ok)
209 carp_group_demote_adj(&sc->sc_if, -1);
210 #endif
212 /* Unpend async sendouts. */
213 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler);
214 netisr_domsg(&msg, 0);
216 #if NBPF > 0
217 bpfdetach(ifp);
218 #endif
219 if_detach(ifp);
220 lwkt_gettoken(&pf_token);
221 LIST_REMOVE(sc, sc_next);
222 kfree(sc, M_PFSYNC);
223 lwkt_reltoken(&pf_token);
225 return 0;
229 * Start output on the pfsync interface.
231 void
232 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq)
234 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
235 ifsq_purge(ifsq);
239 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
240 struct pf_state_peer *d)
242 if (s->scrub.scrub_flag && d->scrub == NULL) {
243 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO);
245 if (d->scrub == NULL)
246 return (ENOMEM);
249 return (0);
252 void
253 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
255 bzero(sp, sizeof(struct pfsync_state));
257 /* copy from state key */
258 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
259 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
260 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
261 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
262 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
263 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
264 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
265 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
266 sp->proto = st->key[PF_SK_WIRE]->proto;
267 sp->af = st->key[PF_SK_WIRE]->af;
269 /* copy from state */
270 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
271 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
272 sp->creation = htonl(time_second - st->creation);
273 sp->expire = pf_state_expires(st);
274 if (sp->expire <= time_second)
275 sp->expire = htonl(0);
276 else
277 sp->expire = htonl(sp->expire - time_second);
279 sp->direction = st->direction;
280 sp->log = st->log;
281 sp->cpuid = st->cpuid;
282 sp->pickup_mode = st->pickup_mode;
283 sp->timeout = st->timeout;
284 sp->state_flags = st->state_flags;
285 if (st->src_node)
286 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
287 if (st->nat_src_node)
288 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
290 bcopy(&st->id, &sp->id, sizeof(sp->id));
291 sp->creatorid = st->creatorid;
292 pf_state_peer_hton(&st->src, &sp->src);
293 pf_state_peer_hton(&st->dst, &sp->dst);
295 if (st->rule.ptr == NULL)
296 sp->rule = htonl(-1);
297 else
298 sp->rule = htonl(st->rule.ptr->nr);
299 if (st->anchor.ptr == NULL)
300 sp->anchor = htonl(-1);
301 else
302 sp->anchor = htonl(st->anchor.ptr->nr);
303 if (st->nat_rule.ptr == NULL)
304 sp->nat_rule = htonl(-1);
305 else
306 sp->nat_rule = htonl(st->nat_rule.ptr->nr);
308 pf_state_counter_hton(st->packets[0], sp->packets[0]);
309 pf_state_counter_hton(st->packets[1], sp->packets[1]);
310 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
311 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
316 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
318 struct pf_state *st = NULL;
319 struct pf_state_key *skw = NULL, *sks = NULL;
320 struct pf_rule *r = NULL;
321 struct pfi_kif *kif;
322 int pool_flags;
323 int error;
325 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
326 kprintf("pfsync_insert_net_state: invalid creator id:"
327 " %08x\n", ntohl(sp->creatorid));
328 return (EINVAL);
331 if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
332 if (pf_status.debug >= PF_DEBUG_MISC)
333 kprintf("pfsync_insert_net_state: "
334 "unknown interface: %s\n", sp->ifname);
335 if (flags & PFSYNC_SI_IOCTL)
336 return (EINVAL);
337 return (0); /* skip this state */
341 * If the ruleset checksums match or the state is coming from the ioctl,
342 * it's safe to associate the state with the rule of that number.
344 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
345 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
346 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
347 r = pf_main_ruleset.rules[
348 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
349 else
350 r = &pf_default_rule;
352 if ((r->max_states && r->states_cur >= r->max_states))
353 goto cleanup;
355 if (flags & PFSYNC_SI_IOCTL)
356 pool_flags = M_WAITOK | M_NULLOK | M_ZERO;
357 else
358 pool_flags = M_WAITOK | M_ZERO;
360 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL)
361 goto cleanup;
362 lockinit(&st->lk, "pfstlk", 0, 0);
364 if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
365 goto cleanup;
367 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
368 &sp->key[PF_SK_STACK].addr[0], sp->af) ||
369 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
370 &sp->key[PF_SK_STACK].addr[1], sp->af) ||
371 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
372 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
373 if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
374 goto cleanup;
375 } else
376 sks = skw;
378 /* allocate memory for scrub info */
379 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
380 pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
381 goto cleanup;
383 /* copy to state key(s) */
384 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
385 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
386 skw->port[0] = sp->key[PF_SK_WIRE].port[0];
387 skw->port[1] = sp->key[PF_SK_WIRE].port[1];
388 skw->proto = sp->proto;
389 skw->af = sp->af;
390 if (sks != skw) {
391 sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
392 sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
393 sks->port[0] = sp->key[PF_SK_STACK].port[0];
394 sks->port[1] = sp->key[PF_SK_STACK].port[1];
395 sks->proto = sp->proto;
396 sks->af = sp->af;
399 /* copy to state */
400 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
401 st->creation = time_second - ntohl(sp->creation);
402 st->expire = time_second;
403 if (sp->expire) {
404 /* XXX No adaptive scaling. */
405 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
408 st->expire = ntohl(sp->expire) + time_second;
409 st->direction = sp->direction;
410 st->log = sp->log;
411 st->timeout = sp->timeout;
412 st->state_flags = sp->state_flags;
413 if (!(flags & PFSYNC_SI_IOCTL))
414 st->sync_flags = PFSTATE_FROMSYNC;
416 bcopy(sp->id, &st->id, sizeof(st->id));
417 st->creatorid = sp->creatorid;
418 pf_state_peer_ntoh(&sp->src, &st->src);
419 pf_state_peer_ntoh(&sp->dst, &st->dst);
421 st->rule.ptr = r;
422 st->nat_rule.ptr = NULL;
423 st->anchor.ptr = NULL;
424 st->rt_kif = NULL;
426 st->pfsync_time = 0;
429 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
430 r->states_cur++;
431 r->states_tot++;
433 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
434 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
435 r->states_cur--;
436 goto cleanup_state;
439 return (0);
441 cleanup:
442 error = ENOMEM;
443 if (skw == sks)
444 sks = NULL;
445 if (skw != NULL)
446 kfree(skw, M_PFSYNC);
447 if (sks != NULL)
448 kfree(sks, M_PFSYNC);
450 cleanup_state: /* pf_state_insert frees the state keys */
451 if (st) {
452 if (st->dst.scrub)
453 kfree(st->dst.scrub, M_PFSYNC);
454 if (st->src.scrub)
455 kfree(st->src.scrub, M_PFSYNC);
456 kfree(st, M_PFSYNC);
458 return (error);
461 void
462 pfsync_input(struct mbuf *m, ...)
464 struct ip *ip = mtod(m, struct ip *);
465 struct pfsync_header *ph;
466 struct pfsync_softc *sc = pfsyncif;
467 struct pf_state *st;
468 struct pf_state_key *sk;
469 struct pf_state_item *si;
470 struct pf_state_cmp id_key;
471 struct pfsync_state *sp;
472 struct pfsync_state_upd *up;
473 struct pfsync_state_del *dp;
474 struct pfsync_state_clr *cp;
475 struct pfsync_state_upd_req *rup;
476 struct pfsync_state_bus *bus;
477 struct in_addr src;
478 struct mbuf *mp;
479 int iplen, action, error, i, count, offp, sfail, stale = 0;
480 u_int8_t flags = 0;
482 /* This function is not yet called from anywhere */
483 /* Still we assume for safety that pf_token must be held */
484 ASSERT_LWKT_TOKEN_HELD(&pf_token);
486 pfsyncstats.pfsyncs_ipackets++;
488 /* verify that we have a sync interface configured */
489 if (!sc || !sc->sc_sync_ifp || !pf_status.running)
490 goto done;
492 /* verify that the packet came in on the right interface */
493 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
494 pfsyncstats.pfsyncs_badif++;
495 goto done;
498 /* verify that the IP TTL is 255. */
499 if (ip->ip_ttl != PFSYNC_DFLTTL) {
500 pfsyncstats.pfsyncs_badttl++;
501 goto done;
504 iplen = ip->ip_hl << 2;
506 if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
507 pfsyncstats.pfsyncs_hdrops++;
508 goto done;
511 if (iplen + sizeof(*ph) > m->m_len) {
512 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
513 pfsyncstats.pfsyncs_hdrops++;
514 goto done;
516 ip = mtod(m, struct ip *);
518 ph = (struct pfsync_header *)((char *)ip + iplen);
520 /* verify the version */
521 if (ph->version != PFSYNC_VERSION) {
522 pfsyncstats.pfsyncs_badver++;
523 goto done;
526 action = ph->action;
527 count = ph->count;
529 /* make sure it's a valid action code */
530 if (action >= PFSYNC_ACT_MAX) {
531 pfsyncstats.pfsyncs_badact++;
532 goto done;
535 /* Cheaper to grab this now than having to mess with mbufs later */
536 src = ip->ip_src;
538 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
539 flags |= PFSYNC_SI_CKSUM;
541 switch (action) {
542 case PFSYNC_ACT_CLR: {
543 struct pf_state *nexts;
544 struct pf_state_key *nextsk;
545 struct pfi_kif *kif;
546 globaldata_t save_gd = mycpu;
547 int nn;
549 u_int32_t creatorid;
550 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
551 sizeof(*cp), &offp)) == NULL) {
552 pfsyncstats.pfsyncs_badlen++;
553 return;
555 cp = (struct pfsync_state_clr *)(mp->m_data + offp);
556 creatorid = cp->creatorid;
558 crit_enter();
559 if (cp->ifname[0] == '\0') {
560 lwkt_gettoken(&pf_token);
561 for (nn = 0; nn < ncpus; ++nn) {
562 lwkt_setcpu_self(globaldata_find(nn));
563 for (st = RB_MIN(pf_state_tree_id,
564 &tree_id[nn]);
565 st; st = nexts) {
566 nexts = RB_NEXT(pf_state_tree_id,
567 &tree_id[n], st);
568 if (st->creatorid == creatorid) {
569 st->sync_flags |=
570 PFSTATE_FROMSYNC;
571 pf_unlink_state(st);
575 lwkt_setcpu_self(save_gd);
576 lwkt_reltoken(&pf_token);
577 } else {
578 if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
579 crit_exit();
580 return;
582 /* XXX correct? */
583 lwkt_gettoken(&pf_token);
584 for (nn = 0; nn < ncpus; ++nn) {
585 lwkt_setcpu_self(globaldata_find(nn));
586 for (sk = RB_MIN(pf_state_tree,
587 &pf_statetbl[nn]);
589 sk = nextsk) {
590 nextsk = RB_NEXT(pf_state_tree,
591 &pf_statetbl[n], sk);
592 TAILQ_FOREACH(si, &sk->states, entry) {
593 if (si->s->creatorid ==
594 creatorid) {
595 si->s->sync_flags |=
596 PFSTATE_FROMSYNC;
597 pf_unlink_state(si->s);
602 lwkt_setcpu_self(save_gd);
603 lwkt_reltoken(&pf_token);
605 crit_exit();
607 break;
609 case PFSYNC_ACT_INS:
610 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
611 count * sizeof(*sp), &offp)) == NULL) {
612 pfsyncstats.pfsyncs_badlen++;
613 return;
616 crit_enter();
617 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
618 i < count; i++, sp++) {
619 /* check for invalid values */
620 if (sp->timeout >= PFTM_MAX ||
621 sp->src.state > PF_TCPS_PROXY_DST ||
622 sp->dst.state > PF_TCPS_PROXY_DST ||
623 sp->direction > PF_OUT ||
624 (sp->af != AF_INET && sp->af != AF_INET6)) {
625 if (pf_status.debug >= PF_DEBUG_MISC)
626 kprintf("pfsync_insert: PFSYNC_ACT_INS: "
627 "invalid value\n");
628 pfsyncstats.pfsyncs_badval++;
629 continue;
632 if ((error = pfsync_state_import(sp, flags))) {
633 if (error == ENOMEM) {
634 crit_exit();
635 goto done;
639 crit_exit();
640 break;
641 case PFSYNC_ACT_UPD:
642 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
643 count * sizeof(*sp), &offp)) == NULL) {
644 pfsyncstats.pfsyncs_badlen++;
645 return;
648 crit_enter();
649 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
650 i < count; i++, sp++) {
651 int flags = PFSYNC_FLAG_STALE;
653 /* check for invalid values */
654 if (sp->timeout >= PFTM_MAX ||
655 sp->src.state > PF_TCPS_PROXY_DST ||
656 sp->dst.state > PF_TCPS_PROXY_DST) {
657 if (pf_status.debug >= PF_DEBUG_MISC)
658 kprintf("pfsync_insert: PFSYNC_ACT_UPD: "
659 "invalid value\n");
660 pfsyncstats.pfsyncs_badval++;
661 continue;
664 bcopy(sp->id, &id_key.id, sizeof(id_key.id));
665 id_key.creatorid = sp->creatorid;
667 st = pf_find_state_byid(&id_key);
668 if (st == NULL) {
669 /* insert the update */
670 if (pfsync_state_import(sp, flags))
671 pfsyncstats.pfsyncs_badstate++;
672 continue;
674 sk = st->key[PF_SK_WIRE]; /* XXX right one? */
675 sfail = 0;
676 if (sk->proto == IPPROTO_TCP) {
678 * The state should never go backwards except
679 * for syn-proxy states. Neither should the
680 * sequence window slide backwards.
682 if (st->src.state > sp->src.state &&
683 (st->src.state < PF_TCPS_PROXY_SRC ||
684 sp->src.state >= PF_TCPS_PROXY_SRC))
685 sfail = 1;
686 else if (SEQ_GT(st->src.seqlo,
687 ntohl(sp->src.seqlo)))
688 sfail = 3;
689 else if (st->dst.state > sp->dst.state) {
690 /* There might still be useful
691 * information about the src state here,
692 * so import that part of the update,
693 * then "fail" so we send the updated
694 * state back to the peer who is missing
695 * our what we know. */
696 pf_state_peer_ntoh(&sp->src, &st->src);
697 /* XXX do anything with timeouts? */
698 sfail = 7;
699 flags = 0;
700 } else if (st->dst.state >= TCPS_SYN_SENT &&
701 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
702 sfail = 4;
703 } else {
705 * Non-TCP protocol state machine always go
706 * forwards
708 if (st->src.state > sp->src.state)
709 sfail = 5;
710 else if (st->dst.state > sp->dst.state)
711 sfail = 6;
713 if (sfail) {
714 if (pf_status.debug >= PF_DEBUG_MISC)
715 kprintf("pfsync: %s stale update "
716 "(%d) id: %016jx "
717 "creatorid: %08x\n",
718 (sfail < 7 ? "ignoring"
719 : "partial"), sfail,
720 (uintmax_t)be64toh(st->id),
721 ntohl(st->creatorid));
722 pfsyncstats.pfsyncs_stale++;
724 if (!(sp->sync_flags & PFSTATE_STALE)) {
725 /* we have a better state, send it */
726 if (sc->sc_mbuf != NULL && !stale)
727 pfsync_sendout(sc);
728 stale++;
729 if (!st->sync_flags)
730 pfsync_pack_state(
731 PFSYNC_ACT_UPD, st, flags);
733 continue;
735 pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
736 pf_state_peer_ntoh(&sp->src, &st->src);
737 pf_state_peer_ntoh(&sp->dst, &st->dst);
738 st->expire = ntohl(sp->expire) + time_second;
739 st->timeout = sp->timeout;
741 if (stale && sc->sc_mbuf != NULL)
742 pfsync_sendout(sc);
743 crit_exit();
744 break;
746 * It's not strictly necessary for us to support the "uncompressed"
747 * delete action, but it's relatively simple and maintains consistency.
749 case PFSYNC_ACT_DEL:
750 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
751 count * sizeof(*sp), &offp)) == NULL) {
752 pfsyncstats.pfsyncs_badlen++;
753 return;
756 crit_enter();
757 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
758 i < count; i++, sp++) {
759 bcopy(sp->id, &id_key.id, sizeof(id_key.id));
760 id_key.creatorid = sp->creatorid;
762 st = pf_find_state_byid(&id_key);
763 if (st == NULL) {
764 pfsyncstats.pfsyncs_badstate++;
765 continue;
767 st->sync_flags |= PFSTATE_FROMSYNC;
768 pf_unlink_state(st);
770 crit_exit();
771 break;
772 case PFSYNC_ACT_UPD_C: {
773 int update_requested = 0;
775 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
776 count * sizeof(*up), &offp)) == NULL) {
777 pfsyncstats.pfsyncs_badlen++;
778 return;
781 crit_enter();
782 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
783 i < count; i++, up++) {
784 /* check for invalid values */
785 if (up->timeout >= PFTM_MAX ||
786 up->src.state > PF_TCPS_PROXY_DST ||
787 up->dst.state > PF_TCPS_PROXY_DST) {
788 if (pf_status.debug >= PF_DEBUG_MISC)
789 kprintf("pfsync_insert: "
790 "PFSYNC_ACT_UPD_C: "
791 "invalid value\n");
792 pfsyncstats.pfsyncs_badval++;
793 continue;
796 bcopy(up->id, &id_key.id, sizeof(id_key.id));
797 id_key.creatorid = up->creatorid;
799 st = pf_find_state_byid(&id_key);
800 if (st == NULL) {
801 /* We don't have this state. Ask for it. */
802 error = pfsync_request_update(up, &src);
803 if (error == ENOMEM) {
804 crit_exit();
805 goto done;
807 update_requested = 1;
808 pfsyncstats.pfsyncs_badstate++;
809 continue;
811 sk = st->key[PF_SK_WIRE]; /* XXX right one? */
812 sfail = 0;
813 if (sk->proto == IPPROTO_TCP) {
815 * The state should never go backwards except
816 * for syn-proxy states. Neither should the
817 * sequence window slide backwards.
819 if (st->src.state > up->src.state &&
820 (st->src.state < PF_TCPS_PROXY_SRC ||
821 up->src.state >= PF_TCPS_PROXY_SRC))
822 sfail = 1;
823 else if (st->dst.state > up->dst.state)
824 sfail = 2;
825 else if (SEQ_GT(st->src.seqlo,
826 ntohl(up->src.seqlo)))
827 sfail = 3;
828 else if (st->dst.state >= TCPS_SYN_SENT &&
829 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
830 sfail = 4;
831 } else {
833 * Non-TCP protocol state machine always go
834 * forwards
836 if (st->src.state > up->src.state)
837 sfail = 5;
838 else if (st->dst.state > up->dst.state)
839 sfail = 6;
841 if (sfail) {
842 if (pf_status.debug >= PF_DEBUG_MISC)
843 kprintf("pfsync: ignoring stale update "
844 "(%d) id: %016" PRIx64 " "
845 "creatorid: %08x\n", sfail,
846 be64toh(st->id),
847 ntohl(st->creatorid));
848 pfsyncstats.pfsyncs_stale++;
850 /* we have a better state, send it out */
851 if ((!stale || update_requested) &&
852 sc->sc_mbuf != NULL) {
853 pfsync_sendout(sc);
854 update_requested = 0;
856 stale++;
857 if (!st->sync_flags)
858 pfsync_pack_state(PFSYNC_ACT_UPD, st,
859 PFSYNC_FLAG_STALE);
860 continue;
862 pfsync_alloc_scrub_memory(&up->dst, &st->dst);
863 pf_state_peer_ntoh(&up->src, &st->src);
864 pf_state_peer_ntoh(&up->dst, &st->dst);
865 st->expire = ntohl(up->expire) + time_second;
866 st->timeout = up->timeout;
868 if ((update_requested || stale) && sc->sc_mbuf)
869 pfsync_sendout(sc);
870 crit_exit();
871 break;
873 case PFSYNC_ACT_DEL_C:
874 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
875 count * sizeof(*dp), &offp)) == NULL) {
876 pfsyncstats.pfsyncs_badlen++;
877 return;
880 crit_enter();
881 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
882 i < count; i++, dp++) {
883 bcopy(dp->id, &id_key.id, sizeof(id_key.id));
884 id_key.creatorid = dp->creatorid;
886 st = pf_find_state_byid(&id_key);
887 if (st == NULL) {
888 pfsyncstats.pfsyncs_badstate++;
889 continue;
891 st->sync_flags |= PFSTATE_FROMSYNC;
892 pf_unlink_state(st);
894 crit_exit();
895 break;
896 case PFSYNC_ACT_INS_F:
897 case PFSYNC_ACT_DEL_F:
898 /* not implemented */
899 break;
900 case PFSYNC_ACT_UREQ:
901 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
902 count * sizeof(*rup), &offp)) == NULL) {
903 pfsyncstats.pfsyncs_badlen++;
904 return;
907 crit_enter();
908 if (sc->sc_mbuf != NULL)
909 pfsync_sendout(sc);
910 for (i = 0,
911 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
912 i < count; i++, rup++) {
913 bcopy(rup->id, &id_key.id, sizeof(id_key.id));
914 id_key.creatorid = rup->creatorid;
916 if (id_key.id == 0 && id_key.creatorid == 0) {
917 sc->sc_ureq_received = mycpu->gd_time_seconds;
918 if (sc->sc_bulk_send_next == NULL) {
919 if (++sc->sc_bulk_send_cpu >= ncpus)
920 sc->sc_bulk_send_cpu = 0;
921 sc->sc_bulk_send_next =
922 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]);
924 sc->sc_bulk_terminator =
925 sc->sc_bulk_send_next;
926 sc->sc_bulk_terminator_cpu =
927 sc->sc_bulk_send_cpu;
928 if (pf_status.debug >= PF_DEBUG_MISC)
929 kprintf("pfsync: received "
930 "bulk update request\n");
931 pfsync_send_bus(sc, PFSYNC_BUS_START);
932 lwkt_reltoken(&pf_token);
933 callout_init(&sc->sc_bulk_tmo);
934 lwkt_gettoken(&pf_token);
935 } else {
936 st = pf_find_state_byid(&id_key);
937 if (st == NULL) {
938 pfsyncstats.pfsyncs_badstate++;
939 continue;
941 if (!st->sync_flags)
942 pfsync_pack_state(PFSYNC_ACT_UPD,
943 st, 0);
946 if (sc->sc_mbuf != NULL)
947 pfsync_sendout(sc);
948 crit_exit();
949 break;
950 case PFSYNC_ACT_BUS:
951 /* If we're not waiting for a bulk update, who cares. */
952 if (sc->sc_ureq_sent == 0)
953 break;
955 if ((mp = m_pulldown(m, iplen + sizeof(*ph),
956 sizeof(*bus), &offp)) == NULL) {
957 pfsyncstats.pfsyncs_badlen++;
958 return;
960 bus = (struct pfsync_state_bus *)(mp->m_data + offp);
961 switch (bus->status) {
962 case PFSYNC_BUS_START:
963 lwkt_reltoken(&pf_token);
964 callout_reset(&sc->sc_bulkfail_tmo,
965 pf_pool_limits[PF_LIMIT_STATES].limit /
966 (PFSYNC_BULKPACKETS * sc->sc_maxcount),
967 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
968 lwkt_gettoken(&pf_token);
969 if (pf_status.debug >= PF_DEBUG_MISC)
970 kprintf("pfsync: received bulk "
971 "update start\n");
972 break;
973 case PFSYNC_BUS_END:
974 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >=
975 sc->sc_ureq_sent) {
976 /* that's it, we're happy */
977 sc->sc_ureq_sent = 0;
978 sc->sc_bulk_tries = 0;
979 lwkt_reltoken(&pf_token);
980 callout_stop(&sc->sc_bulkfail_tmo);
981 lwkt_gettoken(&pf_token);
982 #ifdef CARP
983 if (!pfsync_sync_ok) {
984 lwkt_reltoken(&pf_token);
985 carp_group_demote_adj(&sc->sc_if, -1);
986 lwkt_gettoken(&pf_token);
988 #endif
989 pfsync_sync_ok = 1;
990 if (pf_status.debug >= PF_DEBUG_MISC)
991 kprintf("pfsync: received valid "
992 "bulk update end\n");
993 } else {
994 if (pf_status.debug >= PF_DEBUG_MISC)
995 kprintf("pfsync: received invalid "
996 "bulk update end: bad timestamp\n");
998 break;
1000 break;
1003 done:
1004 if (m)
1005 m_freem(m);
1009 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1010 struct rtentry *rt)
1012 m_freem(m);
1013 return (0);
1016 /* ARGSUSED */
1018 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
1020 struct pfsync_softc *sc = ifp->if_softc;
1021 struct ifreq *ifr = (struct ifreq *)data;
1022 struct ip_moptions *imo = &sc->sc_imo;
1023 struct pfsyncreq pfsyncr;
1024 struct ifnet *sifp;
1025 int error;
1027 lwkt_gettoken(&pf_token);
1029 switch (cmd) {
1030 case SIOCSIFADDR:
1031 case SIOCAIFADDR:
1032 case SIOCSIFDSTADDR:
1033 case SIOCSIFFLAGS:
1034 if (ifp->if_flags & IFF_UP)
1035 ifp->if_flags |= IFF_RUNNING;
1036 else
1037 ifp->if_flags &= ~IFF_RUNNING;
1038 break;
1039 case SIOCSIFMTU:
1040 if (ifr->ifr_mtu < PFSYNC_MINMTU) {
1041 lwkt_reltoken(&pf_token);
1042 return (EINVAL);
1044 if (ifr->ifr_mtu > MCLBYTES)
1045 ifr->ifr_mtu = MCLBYTES;
1046 crit_enter();
1047 if (ifr->ifr_mtu < ifp->if_mtu)
1048 pfsync_sendout(sc);
1049 pfsync_setmtu(sc, ifr->ifr_mtu);
1050 crit_exit();
1051 break;
1052 case SIOCGETPFSYNC:
1053 bzero(&pfsyncr, sizeof(pfsyncr));
1054 if (sc->sc_sync_ifp)
1055 strlcpy(pfsyncr.pfsyncr_syncdev,
1056 sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1057 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1058 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1059 lwkt_reltoken(&pf_token);
1060 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1061 return (error);
1062 lwkt_gettoken(&pf_token);
1063 break;
1064 case SIOCSETPFSYNC:
1065 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) {
1066 lwkt_reltoken(&pf_token);
1067 return (error);
1069 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) {
1070 lwkt_reltoken(&pf_token);
1071 return (error);
1074 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1075 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1076 else
1077 sc->sc_sync_peer.s_addr =
1078 pfsyncr.pfsyncr_syncpeer.s_addr;
1080 if (pfsyncr.pfsyncr_maxupdates > 255) {
1081 lwkt_reltoken(&pf_token);
1082 return (EINVAL);
1084 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1086 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1087 sc->sc_sync_ifp = NULL;
1088 if (sc->sc_mbuf_net != NULL) {
1089 /* Don't keep stale pfsync packets around. */
1090 crit_enter();
1091 m_freem(sc->sc_mbuf_net);
1092 sc->sc_mbuf_net = NULL;
1093 sc->sc_statep_net.s = NULL;
1094 crit_exit();
1096 if (imo->imo_num_memberships > 0) {
1097 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1098 imo->imo_multicast_ifp = NULL;
1100 break;
1104 * XXX not that MPSAFE; pfsync needs serious rework
1106 ifnet_deserialize_all(ifp);
1107 ifnet_lock();
1108 sifp = ifunit(pfsyncr.pfsyncr_syncdev);
1109 ifnet_unlock();
1110 ifnet_serialize_all(ifp);
1112 if (sifp == NULL) {
1113 lwkt_reltoken(&pf_token);
1114 return (EINVAL);
1117 crit_enter();
1118 if (sifp->if_mtu < sc->sc_if.if_mtu ||
1119 (sc->sc_sync_ifp != NULL &&
1120 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1121 sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1122 pfsync_sendout(sc);
1123 sc->sc_sync_ifp = sifp;
1125 pfsync_setmtu(sc, sc->sc_if.if_mtu);
1127 if (imo->imo_num_memberships > 0) {
1128 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1129 imo->imo_multicast_ifp = NULL;
1132 if (sc->sc_sync_ifp &&
1133 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1134 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1135 sc->sc_sync_ifp = NULL;
1136 lwkt_reltoken(&pf_token);
1137 crit_exit();
1138 return (EADDRNOTAVAIL);
1141 if ((imo->imo_membership[0] =
1142 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) {
1143 sc->sc_sync_ifp = NULL;
1144 lwkt_reltoken(&pf_token);
1145 crit_exit();
1146 return (ENOBUFS);
1148 imo->imo_num_memberships++;
1149 imo->imo_multicast_ifp = sc->sc_sync_ifp;
1150 imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1151 imo->imo_multicast_loop = 0;
1154 if (sc->sc_sync_ifp ||
1155 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1156 /* Request a full state table update. */
1157 sc->sc_ureq_sent = mycpu->gd_time_seconds;
1158 #ifdef CARP
1159 if (pfsync_sync_ok)
1160 carp_group_demote_adj(&sc->sc_if, 1);
1161 #endif
1162 pfsync_sync_ok = 0;
1163 if (pf_status.debug >= PF_DEBUG_MISC)
1164 kprintf("pfsync: requesting bulk update\n");
1165 lwkt_reltoken(&pf_token);
1166 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1167 pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1168 lwkt_gettoken(&pf_token);
1169 error = pfsync_request_update(NULL, NULL);
1170 if (error == ENOMEM) {
1171 lwkt_reltoken(&pf_token);
1172 crit_exit();
1173 return (ENOMEM);
1175 pfsync_sendout(sc);
1177 crit_exit();
1179 break;
1181 default:
1182 lwkt_reltoken(&pf_token);
1183 return (ENOTTY);
1186 lwkt_reltoken(&pf_token);
1187 return (0);
1190 void
1191 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1193 int mtu;
1195 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1196 mtu = sc->sc_sync_ifp->if_mtu;
1197 else
1198 mtu = mtu_req;
1200 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1201 sizeof(struct pfsync_state);
1202 if (sc->sc_maxcount > 254)
1203 sc->sc_maxcount = 254;
1204 sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1205 sc->sc_maxcount * sizeof(struct pfsync_state);
1208 struct mbuf *
1209 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1211 struct pfsync_header *h;
1212 struct mbuf *m;
1213 int len;
1215 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1217 MGETHDR(m, M_WAITOK, MT_DATA);
1218 if (m == NULL) {
1219 IFNET_STAT_INC(&sc->sc_if, oerrors, 1);
1220 return (NULL);
1223 switch (action) {
1224 case PFSYNC_ACT_CLR:
1225 len = sizeof(struct pfsync_header) +
1226 sizeof(struct pfsync_state_clr);
1227 break;
1228 case PFSYNC_ACT_UPD_C:
1229 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1230 sizeof(struct pfsync_header);
1231 break;
1232 case PFSYNC_ACT_DEL_C:
1233 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1234 sizeof(struct pfsync_header);
1235 break;
1236 case PFSYNC_ACT_UREQ:
1237 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1238 sizeof(struct pfsync_header);
1239 break;
1240 case PFSYNC_ACT_BUS:
1241 len = sizeof(struct pfsync_header) +
1242 sizeof(struct pfsync_state_bus);
1243 break;
1244 case PFSYNC_ACT_TDB_UPD:
1245 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
1246 sizeof(struct pfsync_header);
1247 break;
1248 default:
1249 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1250 sizeof(struct pfsync_header);
1251 break;
1254 if (len > MHLEN) {
1255 MCLGET(m, M_WAITOK);
1256 if ((m->m_flags & M_EXT) == 0) {
1257 m_free(m);
1258 IFNET_STAT_INC(&sc->sc_if, oerrors, 1);
1259 return (NULL);
1261 m->m_data += rounddown2(MCLBYTES - len, sizeof(long));
1262 } else
1263 MH_ALIGN(m, len);
1265 m->m_pkthdr.rcvif = NULL;
1266 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1267 h = mtod(m, struct pfsync_header *);
1268 h->version = PFSYNC_VERSION;
1269 h->af = 0;
1270 h->count = 0;
1271 h->action = action;
1273 *sp = (void *)((char *)h + PFSYNC_HDRLEN);
1274 lwkt_reltoken(&pf_token);
1275 callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1276 LIST_FIRST(&pfsync_list));
1277 lwkt_gettoken(&pf_token);
1278 return (m);
1282 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1284 struct ifnet *ifp = NULL;
1285 struct pfsync_softc *sc = pfsyncif;
1286 struct pfsync_header *h, *h_net;
1287 struct pfsync_state *sp = NULL;
1288 struct pfsync_state_upd *up = NULL;
1289 struct pfsync_state_del *dp = NULL;
1290 int ret = 0;
1291 u_int8_t i = 255, newaction = 0;
1293 if (sc == NULL)
1294 return (0);
1295 ifp = &sc->sc_if;
1298 * If a packet falls in the forest and there's nobody around to
1299 * hear, does it make a sound?
1301 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1302 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1303 /* Don't leave any stale pfsync packets hanging around. */
1304 if (sc->sc_mbuf != NULL) {
1305 m_freem(sc->sc_mbuf);
1306 sc->sc_mbuf = NULL;
1307 sc->sc_statep.s = NULL;
1309 return (0);
1312 if (action >= PFSYNC_ACT_MAX)
1313 return (EINVAL);
1315 crit_enter();
1316 if (sc->sc_mbuf == NULL) {
1317 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1318 (void *)&sc->sc_statep.s)) == NULL) {
1319 crit_exit();
1320 return (ENOMEM);
1322 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1323 } else {
1324 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1325 if (h->action != action) {
1326 pfsync_sendout(sc);
1327 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1328 (void *)&sc->sc_statep.s)) == NULL) {
1329 crit_exit();
1330 return (ENOMEM);
1332 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1333 } else {
1335 * If it's an update, look in the packet to see if
1336 * we already have an update for the state.
1338 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1339 struct pfsync_state *usp =
1340 (void *)((char *)h + PFSYNC_HDRLEN);
1342 for (i = 0; i < h->count; i++) {
1343 if (!memcmp(usp->id, &st->id,
1344 PFSYNC_ID_LEN) &&
1345 usp->creatorid == st->creatorid) {
1346 sp = usp;
1347 sp->updates++;
1348 break;
1350 usp++;
1356 st->pfsync_time = mycpu->gd_time_seconds;
1358 if (sp == NULL) {
1359 /* not a "duplicate" update */
1360 i = 255;
1361 sp = sc->sc_statep.s++;
1362 sc->sc_mbuf->m_pkthdr.len =
1363 sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1364 h->count++;
1365 bzero(sp, sizeof(*sp));
1367 pfsync_state_export(sp, st);
1369 if (flags & PFSYNC_FLAG_STALE)
1370 sp->sync_flags |= PFSTATE_STALE;
1371 } else {
1372 pf_state_peer_hton(&st->src, &sp->src);
1373 pf_state_peer_hton(&st->dst, &sp->dst);
1375 if (st->expire <= time_second)
1376 sp->expire = htonl(0);
1377 else
1378 sp->expire = htonl(st->expire - time_second);
1381 /* do we need to build "compressed" actions for network transfer? */
1382 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1383 switch (action) {
1384 case PFSYNC_ACT_UPD:
1385 newaction = PFSYNC_ACT_UPD_C;
1386 break;
1387 case PFSYNC_ACT_DEL:
1388 newaction = PFSYNC_ACT_DEL_C;
1389 break;
1390 default:
1391 /* by default we just send the uncompressed states */
1392 break;
1396 if (newaction) {
1397 if (sc->sc_mbuf_net == NULL) {
1398 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1399 (void *)&sc->sc_statep_net.s)) == NULL) {
1400 crit_exit();
1401 return (ENOMEM);
1404 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1406 switch (newaction) {
1407 case PFSYNC_ACT_UPD_C:
1408 if (i != 255) {
1409 up = (void *)((char *)h_net +
1410 PFSYNC_HDRLEN + (i * sizeof(*up)));
1411 up->updates++;
1412 } else {
1413 h_net->count++;
1414 sc->sc_mbuf_net->m_pkthdr.len =
1415 sc->sc_mbuf_net->m_len += sizeof(*up);
1416 up = sc->sc_statep_net.u++;
1418 bzero(up, sizeof(*up));
1419 bcopy(&st->id, up->id, sizeof(up->id));
1420 up->creatorid = st->creatorid;
1422 up->timeout = st->timeout;
1423 up->expire = sp->expire;
1424 up->src = sp->src;
1425 up->dst = sp->dst;
1426 break;
1427 case PFSYNC_ACT_DEL_C:
1428 sc->sc_mbuf_net->m_pkthdr.len =
1429 sc->sc_mbuf_net->m_len += sizeof(*dp);
1430 dp = sc->sc_statep_net.d++;
1431 h_net->count++;
1433 bzero(dp, sizeof(*dp));
1434 bcopy(&st->id, dp->id, sizeof(dp->id));
1435 dp->creatorid = st->creatorid;
1436 break;
1440 if (h->count == sc->sc_maxcount ||
1441 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1442 ret = pfsync_sendout(sc);
1444 crit_exit();
1445 return (ret);
1449 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1451 struct pfsync_header *h;
1452 struct pfsync_softc *sc = pfsyncif;
1453 struct pfsync_state_upd_req *rup;
1454 int ret = 0;
1456 if (sc == NULL)
1457 return (0);
1459 if (sc->sc_mbuf == NULL) {
1460 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1461 (void *)&sc->sc_statep.s)) == NULL)
1462 return (ENOMEM);
1463 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1464 } else {
1465 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1466 if (h->action != PFSYNC_ACT_UREQ) {
1467 pfsync_sendout(sc);
1468 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1469 (void *)&sc->sc_statep.s)) == NULL)
1470 return (ENOMEM);
1471 h = mtod(sc->sc_mbuf, struct pfsync_header *);
1475 if (src != NULL)
1476 sc->sc_sendaddr = *src;
1477 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1478 h->count++;
1479 rup = sc->sc_statep.r++;
1480 bzero(rup, sizeof(*rup));
1481 if (up != NULL) {
1482 bcopy(up->id, rup->id, sizeof(rup->id));
1483 rup->creatorid = up->creatorid;
1486 if (h->count == sc->sc_maxcount)
1487 ret = pfsync_sendout(sc);
1489 return (ret);
1493 pfsync_clear_states(u_int32_t creatorid, char *ifname)
1495 struct pfsync_softc *sc = pfsyncif;
1496 struct pfsync_state_clr *cp;
1497 int ret;
1499 if (sc == NULL)
1500 return (0);
1502 crit_enter();
1503 if (sc->sc_mbuf != NULL)
1504 pfsync_sendout(sc);
1505 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1506 (void *)&sc->sc_statep.c)) == NULL) {
1507 crit_exit();
1508 return (ENOMEM);
1510 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1511 cp = sc->sc_statep.c;
1512 cp->creatorid = creatorid;
1513 if (ifname != NULL)
1514 strlcpy(cp->ifname, ifname, IFNAMSIZ);
1516 ret = (pfsync_sendout(sc));
1517 crit_exit();
1518 return (ret);
1521 void
1522 pfsync_timeout(void *v)
1524 struct pfsync_softc *sc = v;
1526 crit_enter();
1527 pfsync_sendout(sc);
1528 crit_exit();
1531 void
1532 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1534 struct pfsync_state_bus *bus;
1536 if (sc->sc_mbuf != NULL)
1537 pfsync_sendout(sc);
1539 if (pfsync_sync_ok &&
1540 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1541 (void *)&sc->sc_statep.b)) != NULL) {
1542 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1543 bus = sc->sc_statep.b;
1544 bus->creatorid = pf_status.hostid;
1545 bus->status = status;
1546 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received);
1547 pfsync_sendout(sc);
1551 void
1552 pfsync_bulk_update(void *v)
1554 struct pfsync_softc *sc = v;
1555 int i = 0;
1556 int cpu;
1557 struct pf_state *state;
1559 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1561 crit_enter();
1562 if (sc->sc_mbuf != NULL)
1563 pfsync_sendout(sc);
1566 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1567 * been sent since the latest request was made.
1569 state = sc->sc_bulk_send_next;
1570 cpu = sc->sc_bulk_send_cpu;
1571 if (state)
1572 do {
1573 /* send state update if syncable and not already sent */
1574 if (!state->sync_flags
1575 && state->timeout < PFTM_MAX
1576 && state->pfsync_time <= sc->sc_ureq_received) {
1577 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1578 i++;
1581 /* figure next state to send */
1582 state = TAILQ_NEXT(state, entry_list);
1584 /* wrap to start of list if we hit the end */
1585 if (state == NULL) {
1586 if (++cpu >= ncpus)
1587 cpu = 0;
1588 state = TAILQ_FIRST(&state_list[cpu]);
1590 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
1591 cpu != sc->sc_bulk_terminator_cpu &&
1592 state != sc->sc_bulk_terminator);
1594 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu &&
1595 state == sc->sc_bulk_terminator)) {
1596 /* we're done */
1597 pfsync_send_bus(sc, PFSYNC_BUS_END);
1598 sc->sc_ureq_received = 0;
1599 sc->sc_bulk_send_next = NULL;
1600 sc->sc_bulk_terminator = NULL;
1601 sc->sc_bulk_send_cpu = 0;
1602 sc->sc_bulk_terminator_cpu = 0;
1603 lwkt_reltoken(&pf_token);
1604 callout_stop(&sc->sc_bulk_tmo);
1605 lwkt_gettoken(&pf_token);
1606 if (pf_status.debug >= PF_DEBUG_MISC)
1607 kprintf("pfsync: bulk update complete\n");
1608 } else {
1609 /* look again for more in a bit */
1610 lwkt_reltoken(&pf_token);
1611 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1612 LIST_FIRST(&pfsync_list));
1613 lwkt_gettoken(&pf_token);
1614 sc->sc_bulk_send_next = state;
1615 sc->sc_bulk_send_cpu = cpu;
1617 if (sc->sc_mbuf != NULL)
1618 pfsync_sendout(sc);
1619 crit_exit();
1622 void
1623 pfsync_bulkfail(void *v)
1625 struct pfsync_softc *sc = v;
1626 int error;
1628 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1630 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1631 /* Try again in a bit */
1632 lwkt_reltoken(&pf_token);
1633 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1634 LIST_FIRST(&pfsync_list));
1635 lwkt_gettoken(&pf_token);
1636 crit_enter();
1637 error = pfsync_request_update(NULL, NULL);
1638 if (error == ENOMEM) {
1639 if (pf_status.debug >= PF_DEBUG_MISC)
1640 kprintf("pfsync: cannot allocate mbufs for "
1641 "bulk update\n");
1642 } else
1643 pfsync_sendout(sc);
1644 crit_exit();
1645 } else {
1646 /* Pretend like the transfer was ok */
1647 sc->sc_ureq_sent = 0;
1648 sc->sc_bulk_tries = 0;
1649 #ifdef CARP
1650 if (!pfsync_sync_ok)
1651 carp_group_demote_adj(&sc->sc_if, -1);
1652 #endif
1653 pfsync_sync_ok = 1;
1654 if (pf_status.debug >= PF_DEBUG_MISC)
1655 kprintf("pfsync: failed to receive "
1656 "bulk update status\n");
1657 lwkt_reltoken(&pf_token);
1658 callout_stop(&sc->sc_bulkfail_tmo);
1659 lwkt_gettoken(&pf_token);
1663 static void
1664 pfsync_sendout_handler(netmsg_t nmsg)
1666 struct netmsg_genpkt *msg = (struct netmsg_genpkt *)nmsg;
1668 pfsync_sendout_mbuf(msg->arg1, msg->m);
1672 pfsync_sendout(struct pfsync_softc *sc)
1674 #if NBPF > 0
1675 struct ifnet *ifp = &sc->sc_if;
1676 #endif
1677 struct mbuf *m;
1678 struct netmsg_genpkt *msg;
1680 ASSERT_LWKT_TOKEN_HELD(&pf_token);
1682 lwkt_reltoken(&pf_token);
1683 callout_stop(&sc->sc_tmo);
1684 lwkt_gettoken(&pf_token);
1686 if (sc->sc_mbuf == NULL)
1687 return (0);
1688 m = sc->sc_mbuf;
1689 sc->sc_mbuf = NULL;
1690 sc->sc_statep.s = NULL;
1692 #if NBPF > 0
1693 if (ifp->if_bpf) {
1694 bpf_gettoken();
1695 if (ifp->if_bpf)
1696 bpf_mtap(ifp->if_bpf, m);
1697 bpf_reltoken();
1699 #endif
1701 if (sc->sc_mbuf_net) {
1702 m_freem(m);
1703 m = sc->sc_mbuf_net;
1704 sc->sc_mbuf_net = NULL;
1705 sc->sc_statep_net.s = NULL;
1708 msg = &m->m_hdr.mh_genmsg;
1709 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0,
1710 pfsync_sendout_handler);
1711 msg->m = m;
1712 msg->arg1 = sc;
1713 netisr_sendmsg(&msg->base, 0);
1715 return (0);
1719 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
1721 struct sockaddr sa;
1722 struct ip *ip;
1724 if (sc->sc_sync_ifp ||
1725 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1726 M_PREPEND(m, sizeof(struct ip), M_WAITOK);
1727 if (m == NULL) {
1728 pfsyncstats.pfsyncs_onomem++;
1729 return (0);
1731 ip = mtod(m, struct ip *);
1732 ip->ip_v = IPVERSION;
1733 ip->ip_hl = sizeof(*ip) >> 2;
1734 ip->ip_tos = IPTOS_LOWDELAY;
1735 ip->ip_len = htons(m->m_pkthdr.len);
1736 ip->ip_id = htons(ip_randomid());
1737 ip->ip_off = htons(IP_DF);
1738 ip->ip_ttl = PFSYNC_DFLTTL;
1739 ip->ip_p = IPPROTO_PFSYNC;
1740 ip->ip_sum = 0;
1742 bzero(&sa, sizeof(sa));
1743 ip->ip_src.s_addr = INADDR_ANY;
1745 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1746 m->m_flags |= M_MCAST;
1747 ip->ip_dst = sc->sc_sendaddr;
1748 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1750 pfsyncstats.pfsyncs_opackets++;
1752 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1753 pfsyncstats.pfsyncs_oerrors++;
1754 } else
1755 m_freem(m);
1757 return (0);
1760 static int
1761 pfsync_modevent(module_t mod, int type, void *data)
1763 int error = 0;
1765 struct pfsync_softc *pfs_if, *tmp;
1767 lwkt_gettoken(&pf_token);
1769 switch (type) {
1770 case MOD_LOAD:
1771 LIST_INIT(&pfsync_list);
1772 lwkt_reltoken(&pf_token);
1773 if_clone_attach(&pfsync_cloner);
1774 lwkt_gettoken(&pf_token);
1775 /* Override the function pointer for pf_ioctl.c */
1776 break;
1778 case MOD_UNLOAD:
1779 lwkt_reltoken(&pf_token);
1780 if_clone_detach(&pfsync_cloner);
1781 lwkt_gettoken(&pf_token);
1782 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) {
1783 pfsync_clone_destroy(&pfs_if->sc_if);
1785 break;
1787 default:
1788 error = EINVAL;
1789 break;
1792 lwkt_reltoken(&pf_token);
1793 return error;
1796 static moduledata_t pfsync_mod = {
1797 "pfsync",
1798 pfsync_modevent,
1802 #define PFSYNC_MODVER 44
1804 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1805 MODULE_VERSION(pfsync, PFSYNC_MODVER);
1806 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
1808 static void
1809 pfsync_in_addmulti_dispatch(netmsg_t nmsg)
1811 struct lwkt_msg *lmsg = &nmsg->lmsg;
1812 struct ifnet *ifp = lmsg->u.ms_resultp;
1813 struct in_addr addr;
1815 addr.s_addr = INADDR_PFSYNC_GROUP;
1816 lmsg->u.ms_resultp = in_addmulti(&addr, ifp);
1818 lwkt_replymsg(lmsg, 0);
1821 static struct in_multi *
1822 pfsync_in_addmulti(struct ifnet *ifp)
1824 struct netmsg_base nmsg;
1825 struct lwkt_msg *lmsg = &nmsg.lmsg;
1827 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0,
1828 pfsync_in_addmulti_dispatch);
1829 lmsg->u.ms_resultp = ifp;
1831 lwkt_domsg(netisr_cpuport(0), lmsg, 0);
1832 return lmsg->u.ms_resultp;
1835 static void
1836 pfsync_in_delmulti_dispatch(netmsg_t nmsg)
1838 struct lwkt_msg *lmsg = &nmsg->lmsg;
1840 in_delmulti(lmsg->u.ms_resultp);
1841 lwkt_replymsg(lmsg, 0);
1844 static void
1845 pfsync_in_delmulti(struct in_multi *inm)
1847 struct netmsg_base nmsg;
1848 struct lwkt_msg *lmsg = &nmsg.lmsg;
1850 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0,
1851 pfsync_in_delmulti_dispatch);
1852 lmsg->u.ms_resultp = inm;
1854 lwkt_domsg(netisr_cpuport(0), lmsg, 0);