Merge commit 'cb41b9c565d4eec9e1f06e24d429696f59f2f07d'
[unleashed.git] / usr / src / uts / common / io / pfmod.c
blob9a7d66e995f2534af3a6454cb23efe529d36fe0a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * STREAMS Packet Filter Module
31 * This module applies a filter to messages arriving on its read
32 * queue, passing on messages that the filter accepts adn discarding
33 * the others. It supports ioctls for setting the filter.
35 * On the write side, the module simply passes everything through
36 * unchanged.
38 * Based on SunOS 4.x version. This version has minor changes:
39 * - general SVR4 porting stuff
40 * - change name and prefixes from "nit" buffer to streams buffer
41 * - multithreading assumes configured as D_MTQPAIR
44 #include <sys/types.h>
45 #include <sys/sysmacros.h>
46 #include <sys/errno.h>
47 #include <sys/debug.h>
48 #include <sys/time.h>
49 #include <sys/stropts.h>
50 #include <sys/stream.h>
51 #include <sys/conf.h>
52 #include <sys/ddi.h>
53 #include <sys/sunddi.h>
54 #include <sys/kmem.h>
55 #include <sys/strsun.h>
56 #include <sys/pfmod.h>
57 #include <sys/modctl.h>
60 * Expanded version of the Packetfilt structure that includes
61 * some additional fields that aid filter execution efficiency.
63 struct epacketfilt {
64 struct Pf_ext_packetfilt pf;
65 #define pf_Priority pf.Pf_Priority
66 #define pf_FilterLen pf.Pf_FilterLen
67 #define pf_Filter pf.Pf_Filter
68 /* pointer to word immediately past end of filter */
69 ushort_t *pf_FilterEnd;
70 /* length in bytes of packet prefix the filter examines */
71 ushort_t pf_PByteLen;
75 * (Internal) packet descriptor for FilterPacket
77 struct packdesc {
78 ushort_t *pd_hdr; /* header starting address */
79 uint_t pd_hdrlen; /* header length in shorts */
80 ushort_t *pd_body; /* body starting address */
81 uint_t pd_bodylen; /* body length in shorts */
86 * Function prototypes.
88 static int pfopen(queue_t *, dev_t *, int, int, cred_t *);
89 static int pfclose(queue_t *);
90 static void pfioctl(queue_t *wq, mblk_t *mp);
91 static int FilterPacket(struct packdesc *, struct epacketfilt *);
93 * To save instructions, since STREAMS ignores the return value
94 * from these functions, they are defined as void here. Kind of icky, but...
96 static void pfwput(queue_t *, mblk_t *);
97 static void pfrput(queue_t *, mblk_t *);
99 static struct module_info pf_minfo = {
100 22, /* mi_idnum */
101 "pfmod", /* mi_idname */
102 0, /* mi_minpsz */
103 INFPSZ, /* mi_maxpsz */
104 0, /* mi_hiwat */
105 0 /* mi_lowat */
108 static struct qinit pf_rinit = {
109 (int (*)())pfrput, /* qi_putp */
110 NULL,
111 pfopen, /* qi_qopen */
112 pfclose, /* qi_qclose */
113 NULL, /* qi_qadmin */
114 &pf_minfo, /* qi_minfo */
115 NULL /* qi_mstat */
118 static struct qinit pf_winit = {
119 (int (*)())pfwput, /* qi_putp */
120 NULL, /* qi_srvp */
121 NULL, /* qi_qopen */
122 NULL, /* qi_qclose */
123 NULL, /* qi_qadmin */
124 &pf_minfo, /* qi_minfo */
125 NULL /* qi_mstat */
128 static struct streamtab pf_info = {
129 &pf_rinit, /* st_rdinit */
130 &pf_winit, /* st_wrinit */
131 NULL, /* st_muxrinit */
132 NULL /* st_muxwinit */
135 static struct fmodsw fsw = {
136 "pfmod",
137 &pf_info,
138 D_MTQPAIR | D_MP
141 static struct modlstrmod modlstrmod = {
142 &mod_strmodops, "streams packet filter module", &fsw
145 static struct modlinkage modlinkage = {
146 MODREV_1, &modlstrmod, NULL
150 _init(void)
152 return (mod_install(&modlinkage));
156 _fini(void)
158 return (mod_remove(&modlinkage));
162 _info(struct modinfo *modinfop)
164 return (mod_info(&modlinkage, modinfop));
167 /*ARGSUSED*/
168 static int
169 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
171 struct epacketfilt *pfp;
173 ASSERT(rq);
175 if (sflag != MODOPEN)
176 return (EINVAL);
178 if (rq->q_ptr)
179 return (0);
182 * Allocate and initialize per-Stream structure.
184 pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
185 rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
187 qprocson(rq);
189 return (0);
192 static int
193 pfclose(queue_t *rq)
195 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
197 ASSERT(pfp);
199 qprocsoff(rq);
201 kmem_free(pfp, sizeof (struct epacketfilt));
202 rq->q_ptr = WR(rq)->q_ptr = NULL;
204 return (0);
208 * Write-side put procedure. Its main task is to detect ioctls.
209 * Other message types are passed on through.
211 static void
212 pfwput(queue_t *wq, mblk_t *mp)
214 switch (mp->b_datap->db_type) {
215 case M_IOCTL:
216 pfioctl(wq, mp);
217 break;
219 default:
220 putnext(wq, mp);
221 break;
226 * Read-side put procedure. It's responsible for applying the
227 * packet filter and passing upstream message on or discarding it
228 * depending upon the results.
230 * Upstream messages can start with zero or more M_PROTO mblks
231 * which are skipped over before executing the packet filter
232 * on any remaining M_DATA mblks.
234 static void
235 pfrput(queue_t *rq, mblk_t *mp)
237 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
238 mblk_t *mbp, *mpp;
239 struct packdesc pd;
240 int need;
242 ASSERT(pfp);
244 switch (DB_TYPE(mp)) {
245 case M_PROTO:
246 case M_DATA:
248 * Skip over protocol information and find the start
249 * of the message body, saving the overall message
250 * start in mpp.
252 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
256 * Null body (exclusive of M_PROTO blocks) ==> accept.
257 * Note that a null body is not the same as an empty body.
259 if (mp == NULL) {
260 putnext(rq, mpp);
261 break;
265 * Pull the packet up to the length required by
266 * the filter. Note that doing so destroys sharing
267 * relationships, which is unfortunate, since the
268 * results of pulling up here are likely to be useful
269 * for shared messages applied to a filter on a sibling
270 * stream.
272 * Most packet sources will provide the packet in two
273 * logical pieces: an initial header in a single mblk,
274 * and a body in a sequence of mblks hooked to the
275 * header. We're prepared to deal with variant forms,
276 * but in any case, the pullup applies only to the body
277 * part.
279 mbp = mp->b_cont;
280 need = pfp->pf_PByteLen;
281 if (mbp && (MBLKL(mbp) < need)) {
282 int len = msgdsize(mbp);
284 /* XXX discard silently on pullupmsg failure */
285 if (pullupmsg(mbp, MIN(need, len)) == 0) {
286 freemsg(mpp);
287 break;
292 * Misalignment (not on short boundary) ==> reject.
294 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
295 (mbp != NULL &&
296 ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
297 freemsg(mpp);
298 break;
302 * These assignments are distasteful, but necessary,
303 * since the packet filter wants to work in terms of
304 * shorts. Odd bytes at the end of header or data can't
305 * participate in the filtering operation.
307 pd.pd_hdr = (ushort_t *)mp->b_rptr;
308 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
309 if (mbp) {
310 pd.pd_body = (ushort_t *)mbp->b_rptr;
311 pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
312 sizeof (ushort_t);
313 } else {
314 pd.pd_body = NULL;
315 pd.pd_bodylen = 0;
319 * Apply the filter.
321 if (FilterPacket(&pd, pfp))
322 putnext(rq, mpp);
323 else
324 freemsg(mpp);
326 break;
328 default:
329 putnext(rq, mp);
330 break;
336 * Handle write-side M_IOCTL messages.
338 static void
339 pfioctl(queue_t *wq, mblk_t *mp)
341 struct epacketfilt *pfp = (struct epacketfilt *)wq->q_ptr;
342 struct Pf_ext_packetfilt *upfp;
343 struct packetfilt *opfp;
344 ushort_t *fwp;
345 int arg;
346 int maxoff = 0;
347 int maxoffreg = 0;
348 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
349 int error;
351 switch (iocp->ioc_cmd) {
352 case PFIOCSETF:
354 * Verify argument length. Since the size of packet filter
355 * got increased (ENMAXFILTERS was bumped up to 2047), to
356 * maintain backwards binary compatibility, we need to
357 * check for both possible sizes.
359 switch (iocp->ioc_count) {
360 case sizeof (struct Pf_ext_packetfilt):
361 error = miocpullup(mp,
362 sizeof (struct Pf_ext_packetfilt));
363 if (error != 0) {
364 miocnak(wq, mp, 0, error);
365 return;
367 upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
368 if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
369 miocnak(wq, mp, 0, EINVAL);
370 return;
373 bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
374 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
375 break;
377 case sizeof (struct packetfilt):
378 error = miocpullup(mp, sizeof (struct packetfilt));
379 if (error != 0) {
380 miocnak(wq, mp, 0, error);
381 return;
383 opfp = (struct packetfilt *)mp->b_cont->b_rptr;
384 /* this strange comparison keeps gcc from complaining */
385 if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
386 miocnak(wq, mp, 0, EINVAL);
387 return;
390 pfp->pf.Pf_Priority = opfp->Pf_Priority;
391 pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
393 bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
394 sizeof (opfp->Pf_Filter));
395 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
396 break;
398 default:
399 miocnak(wq, mp, 0, EINVAL);
400 return;
404 * Find and record maximum byte offset that the
405 * filter users. We use this when executing the
406 * filter to determine how much of the packet
407 * body to pull up. This code depends on the
408 * filter encoding.
410 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
411 arg = *fwp & ((1 << ENF_NBPA) - 1);
412 switch (arg) {
413 default:
414 if ((arg -= ENF_PUSHWORD) > maxoff)
415 maxoff = arg;
416 break;
418 case ENF_LOAD_OFFSET:
419 /* Point to the offset */
420 fwp++;
421 if (*fwp > maxoffreg)
422 maxoffreg = *fwp;
423 break;
425 case ENF_PUSHLIT:
426 case ENF_BRTR:
427 case ENF_BRFL:
428 /* Skip over the literal. */
429 fwp++;
430 break;
432 case ENF_PUSHZERO:
433 case ENF_PUSHONE:
434 case ENF_PUSHFFFF:
435 case ENF_PUSHFF00:
436 case ENF_PUSH00FF:
437 case ENF_NOPUSH:
438 case ENF_POP:
439 break;
444 * Convert word offset to length in bytes.
446 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
447 miocack(wq, mp, 0, 0);
448 break;
450 default:
451 putnext(wq, mp);
452 break;
456 /* #define DEBUG 1 */
457 /* #define INNERDEBUG 1 */
459 #ifdef INNERDEBUG
460 #define enprintf(a) printf a
461 #else
462 #define enprintf(a)
463 #endif
466 * Apply the packet filter given by pfp to the packet given by
467 * pp. Return nonzero iff the filter accepts the packet.
469 * The packet comes in two pieces, a header and a body, since
470 * that's the most convenient form for our caller. The header
471 * is in contiguous memory, whereas the body is in a mbuf.
472 * Our caller will have adjusted the mbuf chain so that its first
473 * min(MLEN, length(body)) bytes are guaranteed contiguous. For
474 * the sake of efficiency (and some laziness) the filter is prepared
475 * to examine only these two contiguous pieces. Furthermore, it
476 * assumes that the header length is even, so that there's no need
477 * to glue the last byte of header to the first byte of data.
480 #define opx(i) ((i) >> ENF_NBPA)
482 static int
483 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
485 int maxhdr = pp->pd_hdrlen;
486 int maxword = maxhdr + pp->pd_bodylen;
487 ushort_t *sp;
488 ushort_t *fp;
489 ushort_t *fpe;
490 unsigned op;
491 unsigned arg;
492 unsigned offreg = 0;
493 ushort_t stack[ENMAXFILTERS+1];
495 fp = &pfp->pf_Filter[0];
496 fpe = pfp->pf_FilterEnd;
498 enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
501 * Push TRUE on stack to start. The stack size is chosen such
502 * that overflow can't occur -- each operation can push at most
503 * one item on the stack, and the stack size equals the maximum
504 * program length.
506 sp = &stack[ENMAXFILTERS];
507 *sp = 1;
509 while (fp < fpe) {
510 op = *fp >> ENF_NBPA;
511 arg = *fp & ((1 << ENF_NBPA) - 1);
512 fp++;
514 switch (arg) {
515 default:
516 arg -= ENF_PUSHWORD;
518 * Since arg is unsigned,
519 * if it were less than ENF_PUSHWORD before,
520 * it would now be huge.
522 if (arg + offreg < maxhdr)
523 *--sp = pp->pd_hdr[arg + offreg];
524 else if (arg + offreg < maxword)
525 *--sp = pp->pd_body[arg - maxhdr + offreg];
526 else {
527 enprintf(("=>0(len)\n"));
528 return (0);
530 break;
531 case ENF_PUSHLIT:
532 *--sp = *fp++;
533 break;
534 case ENF_PUSHZERO:
535 *--sp = 0;
536 break;
537 case ENF_PUSHONE:
538 *--sp = 1;
539 break;
540 case ENF_PUSHFFFF:
541 *--sp = 0xffff;
542 break;
543 case ENF_PUSHFF00:
544 *--sp = 0xff00;
545 break;
546 case ENF_PUSH00FF:
547 *--sp = 0x00ff;
548 break;
549 case ENF_LOAD_OFFSET:
550 offreg = *fp++;
551 break;
552 case ENF_BRTR:
553 if (*sp != 0)
554 fp += *fp;
555 else
556 fp++;
557 if (fp >= fpe) {
558 enprintf(("BRTR: fp>=fpe\n"));
559 return (0);
561 break;
562 case ENF_BRFL:
563 if (*sp == 0)
564 fp += *fp;
565 else
566 fp++;
567 if (fp >= fpe) {
568 enprintf(("BRFL: fp>=fpe\n"));
569 return (0);
571 break;
572 case ENF_POP:
573 ++sp;
574 if (sp > &stack[ENMAXFILTERS]) {
575 enprintf(("stack underflow\n"));
576 return (0);
578 break;
579 case ENF_NOPUSH:
580 break;
583 if (sp < &stack[2]) { /* check stack overflow: small yellow zone */
584 enprintf(("=>0(--sp)\n"));
585 return (0);
588 if (op == ENF_NOP)
589 continue;
592 * all non-NOP operators binary, must have at least two operands
593 * on stack to evaluate.
595 if (sp > &stack[ENMAXFILTERS-2]) {
596 enprintf(("=>0(sp++)\n"));
597 return (0);
600 arg = *sp++;
601 switch (op) {
602 default:
603 enprintf(("=>0(def)\n"));
604 return (0);
605 case opx(ENF_AND):
606 *sp &= arg;
607 break;
608 case opx(ENF_OR):
609 *sp |= arg;
610 break;
611 case opx(ENF_XOR):
612 *sp ^= arg;
613 break;
614 case opx(ENF_EQ):
615 *sp = (*sp == arg);
616 break;
617 case opx(ENF_NEQ):
618 *sp = (*sp != arg);
619 break;
620 case opx(ENF_LT):
621 *sp = (*sp < arg);
622 break;
623 case opx(ENF_LE):
624 *sp = (*sp <= arg);
625 break;
626 case opx(ENF_GT):
627 *sp = (*sp > arg);
628 break;
629 case opx(ENF_GE):
630 *sp = (*sp >= arg);
631 break;
633 /* short-circuit operators */
635 case opx(ENF_COR):
636 if (*sp++ == arg) {
637 enprintf(("=>COR %x\n", *sp));
638 return (1);
640 break;
641 case opx(ENF_CAND):
642 if (*sp++ != arg) {
643 enprintf(("=>CAND %x\n", *sp));
644 return (0);
646 break;
647 case opx(ENF_CNOR):
648 if (*sp++ == arg) {
649 enprintf(("=>COR %x\n", *sp));
650 return (0);
652 break;
653 case opx(ENF_CNAND):
654 if (*sp++ != arg) {
655 enprintf(("=>CNAND %x\n", *sp));
656 return (1);
658 break;
661 enprintf(("=>%x\n", *sp));
662 return (*sp);