16875 ndpd/ipmgmtd addrconf race
[illumos-gate.git] / usr / src / uts / common / io / pfmod.c
blobe6c241eafaac22981685d1232c15995522ac0c07
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * STREAMS Packet Filter Module
29 * This module applies a filter to messages arriving on its read
30 * queue, passing on messages that the filter accepts adn discarding
31 * the others. It supports ioctls for setting the filter.
33 * On the write side, the module simply passes everything through
34 * unchanged.
36 * Based on SunOS 4.x version. This version has minor changes:
37 * - general SVR4 porting stuff
38 * - change name and prefixes from "nit" buffer to streams buffer
39 * - multithreading assumes configured as D_MTQPAIR
42 #include <sys/types.h>
43 #include <sys/sysmacros.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/time.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/conf.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/kmem.h>
53 #include <sys/strsun.h>
54 #include <sys/pfmod.h>
55 #include <sys/modctl.h>
58 * Expanded version of the Packetfilt structure that includes
59 * some additional fields that aid filter execution efficiency.
61 struct epacketfilt {
62 struct Pf_ext_packetfilt pf;
63 #define pf_Priority pf.Pf_Priority
64 #define pf_FilterLen pf.Pf_FilterLen
65 #define pf_Filter pf.Pf_Filter
66 /* pointer to word immediately past end of filter */
67 ushort_t *pf_FilterEnd;
68 /* length in bytes of packet prefix the filter examines */
69 ushort_t pf_PByteLen;
73 * (Internal) packet descriptor for FilterPacket
75 struct packdesc {
76 ushort_t *pd_hdr; /* header starting address */
77 uint_t pd_hdrlen; /* header length in shorts */
78 ushort_t *pd_body; /* body starting address */
79 uint_t pd_bodylen; /* body length in shorts */
84 * Function prototypes.
86 static int pfopen(queue_t *, dev_t *, int, int, cred_t *);
87 static int pfclose(queue_t *, int, cred_t *);
88 static void pfioctl(queue_t *wq, mblk_t *mp);
89 static int FilterPacket(struct packdesc *, struct epacketfilt *);
90 static int pfwput(queue_t *, mblk_t *);
91 static int pfrput(queue_t *, mblk_t *);
93 static struct module_info pf_minfo = {
94 22, /* mi_idnum */
95 "pfmod", /* mi_idname */
96 0, /* mi_minpsz */
97 INFPSZ, /* mi_maxpsz */
98 0, /* mi_hiwat */
99 0 /* mi_lowat */
102 static struct qinit pf_rinit = {
103 pfrput, /* qi_putp */
104 NULL,
105 pfopen, /* qi_qopen */
106 pfclose, /* qi_qclose */
107 NULL, /* qi_qadmin */
108 &pf_minfo, /* qi_minfo */
109 NULL /* qi_mstat */
112 static struct qinit pf_winit = {
113 pfwput, /* qi_putp */
114 NULL, /* qi_srvp */
115 NULL, /* qi_qopen */
116 NULL, /* qi_qclose */
117 NULL, /* qi_qadmin */
118 &pf_minfo, /* qi_minfo */
119 NULL /* qi_mstat */
122 static struct streamtab pf_info = {
123 &pf_rinit, /* st_rdinit */
124 &pf_winit, /* st_wrinit */
125 NULL, /* st_muxrinit */
126 NULL /* st_muxwinit */
129 static struct fmodsw fsw = {
130 "pfmod",
131 &pf_info,
132 D_MTQPAIR | D_MP
135 static struct modlstrmod modlstrmod = {
136 &mod_strmodops, "streams packet filter module", &fsw
139 static struct modlinkage modlinkage = {
140 MODREV_1, &modlstrmod, NULL
144 _init(void)
146 return (mod_install(&modlinkage));
150 _fini(void)
152 return (mod_remove(&modlinkage));
156 _info(struct modinfo *modinfop)
158 return (mod_info(&modlinkage, modinfop));
161 /*ARGSUSED*/
162 static int
163 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
165 struct epacketfilt *pfp;
167 ASSERT(rq);
169 if (sflag != MODOPEN)
170 return (EINVAL);
172 if (rq->q_ptr)
173 return (0);
176 * Allocate and initialize per-Stream structure.
178 pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
179 rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
181 qprocson(rq);
183 return (0);
186 /* ARGSUSED */
187 static int
188 pfclose(queue_t *rq, int flags __unused, cred_t *credp __unused)
190 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
192 ASSERT(pfp);
194 qprocsoff(rq);
196 kmem_free(pfp, sizeof (struct epacketfilt));
197 rq->q_ptr = WR(rq)->q_ptr = NULL;
199 return (0);
203 * Write-side put procedure. Its main task is to detect ioctls.
204 * Other message types are passed on through.
206 static int
207 pfwput(queue_t *wq, mblk_t *mp)
209 switch (mp->b_datap->db_type) {
210 case M_IOCTL:
211 pfioctl(wq, mp);
212 break;
214 default:
215 putnext(wq, mp);
216 break;
218 return (0);
222 * Read-side put procedure. It's responsible for applying the
223 * packet filter and passing upstream message on or discarding it
224 * depending upon the results.
226 * Upstream messages can start with zero or more M_PROTO mblks
227 * which are skipped over before executing the packet filter
228 * on any remaining M_DATA mblks.
230 static int
231 pfrput(queue_t *rq, mblk_t *mp)
233 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
234 mblk_t *mbp, *mpp;
235 struct packdesc pd;
236 int need;
238 ASSERT(pfp);
240 switch (DB_TYPE(mp)) {
241 case M_PROTO:
242 case M_DATA:
244 * Skip over protocol information and find the start
245 * of the message body, saving the overall message
246 * start in mpp.
248 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
252 * Null body (exclusive of M_PROTO blocks) ==> accept.
253 * Note that a null body is not the same as an empty body.
255 if (mp == NULL) {
256 putnext(rq, mpp);
257 break;
261 * Pull the packet up to the length required by
262 * the filter. Note that doing so destroys sharing
263 * relationships, which is unfortunate, since the
264 * results of pulling up here are likely to be useful
265 * for shared messages applied to a filter on a sibling
266 * stream.
268 * Most packet sources will provide the packet in two
269 * logical pieces: an initial header in a single mblk,
270 * and a body in a sequence of mblks hooked to the
271 * header. We're prepared to deal with variant forms,
272 * but in any case, the pullup applies only to the body
273 * part.
275 mbp = mp->b_cont;
276 need = pfp->pf_PByteLen;
277 if (mbp && (MBLKL(mbp) < need)) {
278 int len = msgdsize(mbp);
280 /* XXX discard silently on pullupmsg failure */
281 if (pullupmsg(mbp, MIN(need, len)) == 0) {
282 freemsg(mpp);
283 break;
288 * Misalignment (not on short boundary) ==> reject.
290 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
291 (mbp != NULL &&
292 ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
293 freemsg(mpp);
294 break;
298 * These assignments are distasteful, but necessary,
299 * since the packet filter wants to work in terms of
300 * shorts. Odd bytes at the end of header or data can't
301 * participate in the filtering operation.
303 pd.pd_hdr = (ushort_t *)mp->b_rptr;
304 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
305 if (mbp) {
306 pd.pd_body = (ushort_t *)mbp->b_rptr;
307 pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
308 sizeof (ushort_t);
309 } else {
310 pd.pd_body = NULL;
311 pd.pd_bodylen = 0;
315 * Apply the filter.
317 if (FilterPacket(&pd, pfp))
318 putnext(rq, mpp);
319 else
320 freemsg(mpp);
322 break;
324 default:
325 putnext(rq, mp);
326 break;
328 return (0);
332 * Handle write-side M_IOCTL messages.
334 static void
335 pfioctl(queue_t *wq, mblk_t *mp)
337 struct epacketfilt *pfp = (struct epacketfilt *)wq->q_ptr;
338 struct Pf_ext_packetfilt *upfp;
339 struct packetfilt *opfp;
340 ushort_t *fwp;
341 int arg;
342 int maxoff = 0;
343 int maxoffreg = 0;
344 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
345 int error;
347 switch (iocp->ioc_cmd) {
348 case PFIOCSETF:
350 * Verify argument length. Since the size of packet filter
351 * got increased (ENMAXFILTERS was bumped up to 2047), to
352 * maintain backwards binary compatibility, we need to
353 * check for both possible sizes.
355 switch (iocp->ioc_count) {
356 case sizeof (struct Pf_ext_packetfilt):
357 error = miocpullup(mp,
358 sizeof (struct Pf_ext_packetfilt));
359 if (error != 0) {
360 miocnak(wq, mp, 0, error);
361 return;
363 upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
364 if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
365 miocnak(wq, mp, 0, EINVAL);
366 return;
369 bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
370 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
371 break;
373 case sizeof (struct packetfilt):
374 error = miocpullup(mp, sizeof (struct packetfilt));
375 if (error != 0) {
376 miocnak(wq, mp, 0, error);
377 return;
379 opfp = (struct packetfilt *)mp->b_cont->b_rptr;
380 /* this strange comparison keeps gcc from complaining */
381 if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
382 miocnak(wq, mp, 0, EINVAL);
383 return;
386 pfp->pf.Pf_Priority = opfp->Pf_Priority;
387 pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
389 bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
390 sizeof (opfp->Pf_Filter));
391 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
392 break;
394 default:
395 miocnak(wq, mp, 0, EINVAL);
396 return;
400 * Find and record maximum byte offset that the
401 * filter users. We use this when executing the
402 * filter to determine how much of the packet
403 * body to pull up. This code depends on the
404 * filter encoding.
406 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
407 arg = *fwp & ((1 << ENF_NBPA) - 1);
408 switch (arg) {
409 default:
410 if ((arg -= ENF_PUSHWORD) > maxoff)
411 maxoff = arg;
412 break;
414 case ENF_LOAD_OFFSET:
415 /* Point to the offset */
416 fwp++;
417 if (*fwp > maxoffreg)
418 maxoffreg = *fwp;
419 break;
421 case ENF_PUSHLIT:
422 case ENF_BRTR:
423 case ENF_BRFL:
424 /* Skip over the literal. */
425 fwp++;
426 break;
428 case ENF_PUSHZERO:
429 case ENF_PUSHONE:
430 case ENF_PUSHFFFF:
431 case ENF_PUSHFF00:
432 case ENF_PUSH00FF:
433 case ENF_NOPUSH:
434 case ENF_POP:
435 break;
440 * Convert word offset to length in bytes.
442 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
443 miocack(wq, mp, 0, 0);
444 break;
446 default:
447 putnext(wq, mp);
448 break;
452 /* #define DEBUG 1 */
453 /* #define INNERDEBUG 1 */
455 #ifdef INNERDEBUG
456 #define enprintf(a) printf a
457 #else
458 #define enprintf(a)
459 #endif
462 * Apply the packet filter given by pfp to the packet given by
463 * pp. Return nonzero iff the filter accepts the packet.
465 * The packet comes in two pieces, a header and a body, since
466 * that's the most convenient form for our caller. The header
467 * is in contiguous memory, whereas the body is in a mbuf.
468 * Our caller will have adjusted the mbuf chain so that its first
469 * min(MLEN, length(body)) bytes are guaranteed contiguous. For
470 * the sake of efficiency (and some laziness) the filter is prepared
471 * to examine only these two contiguous pieces. Furthermore, it
472 * assumes that the header length is even, so that there's no need
473 * to glue the last byte of header to the first byte of data.
476 #define opx(i) ((i) >> ENF_NBPA)
478 static int
479 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
481 int maxhdr = pp->pd_hdrlen;
482 int maxword = maxhdr + pp->pd_bodylen;
483 ushort_t *sp;
484 ushort_t *fp;
485 ushort_t *fpe;
486 unsigned op;
487 unsigned arg;
488 unsigned offreg = 0;
489 ushort_t stack[ENMAXFILTERS+1];
491 fp = &pfp->pf_Filter[0];
492 fpe = pfp->pf_FilterEnd;
494 enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
497 * Push TRUE on stack to start. The stack size is chosen such
498 * that overflow can't occur -- each operation can push at most
499 * one item on the stack, and the stack size equals the maximum
500 * program length.
502 sp = &stack[ENMAXFILTERS];
503 *sp = 1;
505 while (fp < fpe) {
506 op = *fp >> ENF_NBPA;
507 arg = *fp & ((1 << ENF_NBPA) - 1);
508 fp++;
510 switch (arg) {
511 default:
512 arg -= ENF_PUSHWORD;
514 * Since arg is unsigned,
515 * if it were less than ENF_PUSHWORD before,
516 * it would now be huge.
518 if (arg + offreg < maxhdr)
519 *--sp = pp->pd_hdr[arg + offreg];
520 else if (arg + offreg < maxword)
521 *--sp = pp->pd_body[arg - maxhdr + offreg];
522 else {
523 enprintf(("=>0(len)\n"));
524 return (0);
526 break;
527 case ENF_PUSHLIT:
528 *--sp = *fp++;
529 break;
530 case ENF_PUSHZERO:
531 *--sp = 0;
532 break;
533 case ENF_PUSHONE:
534 *--sp = 1;
535 break;
536 case ENF_PUSHFFFF:
537 *--sp = 0xffff;
538 break;
539 case ENF_PUSHFF00:
540 *--sp = 0xff00;
541 break;
542 case ENF_PUSH00FF:
543 *--sp = 0x00ff;
544 break;
545 case ENF_LOAD_OFFSET:
546 offreg = *fp++;
547 break;
548 case ENF_BRTR:
549 if (*sp != 0)
550 fp += *fp;
551 else
552 fp++;
553 if (fp >= fpe) {
554 enprintf(("BRTR: fp>=fpe\n"));
555 return (0);
557 break;
558 case ENF_BRFL:
559 if (*sp == 0)
560 fp += *fp;
561 else
562 fp++;
563 if (fp >= fpe) {
564 enprintf(("BRFL: fp>=fpe\n"));
565 return (0);
567 break;
568 case ENF_POP:
569 ++sp;
570 if (sp > &stack[ENMAXFILTERS]) {
571 enprintf(("stack underflow\n"));
572 return (0);
574 break;
575 case ENF_NOPUSH:
576 break;
579 if (sp < &stack[2]) { /* check stack overflow: small yellow zone */
580 enprintf(("=>0(--sp)\n"));
581 return (0);
584 if (op == ENF_NOP)
585 continue;
588 * all non-NOP operators binary, must have at least two operands
589 * on stack to evaluate.
591 if (sp > &stack[ENMAXFILTERS-2]) {
592 enprintf(("=>0(sp++)\n"));
593 return (0);
596 arg = *sp++;
597 switch (op) {
598 default:
599 enprintf(("=>0(def)\n"));
600 return (0);
601 case opx(ENF_AND):
602 *sp &= arg;
603 break;
604 case opx(ENF_OR):
605 *sp |= arg;
606 break;
607 case opx(ENF_XOR):
608 *sp ^= arg;
609 break;
610 case opx(ENF_EQ):
611 *sp = (*sp == arg);
612 break;
613 case opx(ENF_NEQ):
614 *sp = (*sp != arg);
615 break;
616 case opx(ENF_LT):
617 *sp = (*sp < arg);
618 break;
619 case opx(ENF_LE):
620 *sp = (*sp <= arg);
621 break;
622 case opx(ENF_GT):
623 *sp = (*sp > arg);
624 break;
625 case opx(ENF_GE):
626 *sp = (*sp >= arg);
627 break;
629 /* short-circuit operators */
631 case opx(ENF_COR):
632 if (*sp++ == arg) {
633 enprintf(("=>COR %x\n", *sp));
634 return (1);
636 break;
637 case opx(ENF_CAND):
638 if (*sp++ != arg) {
639 enprintf(("=>CAND %x\n", *sp));
640 return (0);
642 break;
643 case opx(ENF_CNOR):
644 if (*sp++ == arg) {
645 enprintf(("=>COR %x\n", *sp));
646 return (0);
648 break;
649 case opx(ENF_CNAND):
650 if (*sp++ != arg) {
651 enprintf(("=>CNAND %x\n", *sp));
652 return (1);
654 break;
657 enprintf(("=>%x\n", *sp));
658 return (*sp);