Kill st_fstype member.
[linux-2.6/linux-mips.git] / net / sched / cls_rsvp.h
blobbe4471d78c5b364bc81b61337223bed7bb83b3be
1 /*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
22 IMPLEMENTATION.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
68 #include <linux/config.h>
70 struct rsvp_head
72 u32 tmap[256/32];
73 u32 hgenerator;
74 u8 tgenerator;
75 struct rsvp_session *ht[256];
78 struct rsvp_session
80 struct rsvp_session *next;
81 u32 dst[RSVP_DST_LEN];
82 struct tc_rsvp_gpi dpi;
83 u8 protocol;
84 u8 tunnelid;
85 /* 16 (src,sport) hash slots, and one wildcard source slot */
86 struct rsvp_filter *ht[16+1];
90 struct rsvp_filter
92 struct rsvp_filter *next;
93 u32 src[RSVP_DST_LEN];
94 struct tc_rsvp_gpi spi;
95 u8 tunnelhdr;
97 struct tcf_result res;
98 #ifdef CONFIG_NET_CLS_POLICE
99 struct tcf_police *police;
100 #endif
102 u32 handle;
103 struct rsvp_session *sess;
106 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
108 unsigned h = dst[RSVP_DST_LEN-1];
109 h ^= h>>16;
110 h ^= h>>8;
111 return (h ^ protocol ^ tunnelid) & 0xFF;
114 static __inline__ unsigned hash_src(u32 *src)
116 unsigned h = src[RSVP_DST_LEN-1];
117 h ^= h>>16;
118 h ^= h>>8;
119 h ^= h>>4;
120 return h & 0xF;
123 #ifdef CONFIG_NET_CLS_POLICE
124 #define RSVP_POLICE() \
125 if (f->police) { \
126 int pol_res = tcf_police(skb, f->police); \
127 if (pol_res < 0) continue; \
128 if (pol_res) return pol_res; \
130 #else
131 #define RSVP_POLICE()
132 #endif
135 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
136 struct tcf_result *res)
138 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
139 struct rsvp_session *s;
140 struct rsvp_filter *f;
141 unsigned h1, h2;
142 u32 *dst, *src;
143 u8 protocol;
144 u8 tunnelid = 0;
145 u8 *xprt;
146 #if RSVP_DST_LEN == 4
147 struct ipv6hdr *nhptr = skb->nh.ipv6h;
148 #else
149 struct iphdr *nhptr = skb->nh.iph;
150 #endif
152 #if !defined( __i386__) && !defined(__mc68000__)
153 if ((unsigned long)nhptr & 3)
154 return -1;
155 #endif
157 restart:
159 #if RSVP_DST_LEN == 4
160 src = &nhptr->saddr.s6_addr32[0];
161 dst = &nhptr->daddr.s6_addr32[0];
162 protocol = nhptr->nexthdr;
163 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
164 #else
165 src = &nhptr->saddr;
166 dst = &nhptr->daddr;
167 protocol = nhptr->protocol;
168 xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
169 if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
170 return -1;
171 #endif
173 h1 = hash_dst(dst, protocol, tunnelid);
174 h2 = hash_src(src);
176 for (s = sht[h1]; s; s = s->next) {
177 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
178 protocol == s->protocol &&
179 !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
180 #if RSVP_DST_LEN == 4
181 && dst[0] == s->dst[0]
182 && dst[1] == s->dst[1]
183 && dst[2] == s->dst[2]
184 #endif
185 && tunnelid == s->tunnelid) {
187 for (f = s->ht[h2]; f; f = f->next) {
188 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
189 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
190 #if RSVP_DST_LEN == 4
191 && src[0] == f->src[0]
192 && src[1] == f->src[1]
193 && src[2] == f->src[2]
194 #endif
196 *res = f->res;
198 RSVP_POLICE();
200 matched:
201 if (f->tunnelhdr == 0)
202 return 0;
204 tunnelid = f->res.classid;
205 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
206 goto restart;
210 /* And wildcard bucket... */
211 for (f = s->ht[16]; f; f = f->next) {
212 *res = f->res;
213 RSVP_POLICE();
214 goto matched;
216 return -1;
219 return -1;
222 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
224 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
225 struct rsvp_session *s;
226 struct rsvp_filter *f;
227 unsigned h1 = handle&0xFF;
228 unsigned h2 = (handle>>8)&0xFF;
230 if (h2 > 16)
231 return 0;
233 for (s = sht[h1]; s; s = s->next) {
234 for (f = s->ht[h2]; f; f = f->next) {
235 if (f->handle == handle)
236 return (unsigned long)f;
239 return 0;
242 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
246 static int rsvp_init(struct tcf_proto *tp)
248 struct rsvp_head *data;
250 MOD_INC_USE_COUNT;
251 data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
252 if (data) {
253 memset(data, 0, sizeof(struct rsvp_head));
254 tp->root = data;
255 return 0;
257 MOD_DEC_USE_COUNT;
258 return -ENOBUFS;
261 static void rsvp_destroy(struct tcf_proto *tp)
263 struct rsvp_head *data = xchg(&tp->root, NULL);
264 struct rsvp_session **sht;
265 int h1, h2;
267 if (data == NULL)
268 return;
270 sht = data->ht;
272 for (h1=0; h1<256; h1++) {
273 struct rsvp_session *s;
275 while ((s = sht[h1]) != NULL) {
276 sht[h1] = s->next;
278 for (h2=0; h2<=16; h2++) {
279 struct rsvp_filter *f;
281 while ((f = s->ht[h2]) != NULL) {
282 unsigned long cl;
284 s->ht[h2] = f->next;
285 if ((cl = __cls_set_class(&f->res.class, 0)) != 0)
286 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
287 #ifdef CONFIG_NET_CLS_POLICE
288 tcf_police_release(f->police);
289 #endif
290 kfree(f);
293 kfree(s);
296 kfree(data);
297 MOD_DEC_USE_COUNT;
300 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
302 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
303 unsigned h = f->handle;
304 struct rsvp_session **sp;
305 struct rsvp_session *s = f->sess;
306 int i;
308 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
309 if (*fp == f) {
310 unsigned long cl;
313 tcf_tree_lock(tp);
314 *fp = f->next;
315 tcf_tree_unlock(tp);
317 if ((cl = cls_set_class(tp, &f->res.class, 0)) != 0)
318 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
320 #ifdef CONFIG_NET_CLS_POLICE
321 tcf_police_release(f->police);
322 #endif
324 kfree(f);
326 /* Strip tree */
328 for (i=0; i<=16; i++)
329 if (s->ht[i])
330 return 0;
332 /* OK, session has no flows */
333 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
334 *sp; sp = &(*sp)->next) {
335 if (*sp == s) {
336 tcf_tree_lock(tp);
337 *sp = s->next;
338 tcf_tree_unlock(tp);
340 kfree(s);
341 return 0;
345 return 0;
348 return 0;
351 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
353 struct rsvp_head *data = tp->root;
354 int i = 0xFFFF;
356 while (i-- > 0) {
357 u32 h;
358 if ((data->hgenerator += 0x10000) == 0)
359 data->hgenerator = 0x10000;
360 h = data->hgenerator|salt;
361 if (rsvp_get(tp, h) == 0)
362 return h;
364 return 0;
367 static int tunnel_bts(struct rsvp_head *data)
369 int n = data->tgenerator>>5;
370 u32 b = 1<<(data->tgenerator&0x1F);
372 if (data->tmap[n]&b)
373 return 0;
374 data->tmap[n] |= b;
375 return 1;
378 static void tunnel_recycle(struct rsvp_head *data)
380 struct rsvp_session **sht = data->ht;
381 u32 tmap[256/32];
382 int h1, h2;
384 memset(tmap, 0, sizeof(tmap));
386 for (h1=0; h1<256; h1++) {
387 struct rsvp_session *s;
388 for (s = sht[h1]; s; s = s->next) {
389 for (h2=0; h2<=16; h2++) {
390 struct rsvp_filter *f;
392 for (f = s->ht[h2]; f; f = f->next) {
393 if (f->tunnelhdr == 0)
394 continue;
395 data->tgenerator = f->res.classid;
396 tunnel_bts(data);
402 memcpy(data->tmap, tmap, sizeof(tmap));
405 static u32 gen_tunnel(struct rsvp_head *data)
407 int i, k;
409 for (k=0; k<2; k++) {
410 for (i=255; i>0; i--) {
411 if (++data->tgenerator == 0)
412 data->tgenerator = 1;
413 if (tunnel_bts(data))
414 return data->tgenerator;
416 tunnel_recycle(data);
418 return 0;
421 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
422 u32 handle,
423 struct rtattr **tca,
424 unsigned long *arg)
426 struct rsvp_head *data = tp->root;
427 struct rsvp_filter *f, **fp;
428 struct rsvp_session *s, **sp;
429 struct tc_rsvp_pinfo *pinfo = NULL;
430 struct rtattr *opt = tca[TCA_OPTIONS-1];
431 struct rtattr *tb[TCA_RSVP_MAX];
432 unsigned h1, h2;
433 u32 *dst;
434 int err;
436 if (opt == NULL)
437 return handle ? -EINVAL : 0;
439 if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
440 return -EINVAL;
442 if ((f = (struct rsvp_filter*)*arg) != NULL) {
443 /* Node exists: adjust only classid */
445 if (f->handle != handle && handle)
446 return -EINVAL;
447 if (tb[TCA_RSVP_CLASSID-1]) {
448 unsigned long cl;
450 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
451 cl = cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
452 if (cl)
453 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
455 #ifdef CONFIG_NET_CLS_POLICE
456 if (tb[TCA_RSVP_POLICE-1]) {
457 struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
459 tcf_tree_lock(tp);
460 police = xchg(&f->police, police);
461 tcf_tree_unlock(tp);
463 tcf_police_release(police);
465 #endif
466 return 0;
469 /* Now more serious part... */
470 if (handle)
471 return -EINVAL;
472 if (tb[TCA_RSVP_DST-1] == NULL)
473 return -EINVAL;
475 f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
476 if (f == NULL)
477 return -ENOBUFS;
479 memset(f, 0, sizeof(*f));
480 h2 = 16;
481 if (tb[TCA_RSVP_SRC-1]) {
482 err = -EINVAL;
483 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
484 goto errout;
485 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
486 h2 = hash_src(f->src);
488 if (tb[TCA_RSVP_PINFO-1]) {
489 err = -EINVAL;
490 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
491 goto errout;
492 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
493 f->spi = pinfo->spi;
494 f->tunnelhdr = pinfo->tunnelhdr;
496 if (tb[TCA_RSVP_CLASSID-1]) {
497 err = -EINVAL;
498 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
499 goto errout;
500 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
503 err = -EINVAL;
504 if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
505 goto errout;
506 dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
507 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
509 err = -ENOMEM;
510 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
511 goto errout;
513 if (f->tunnelhdr) {
514 err = -EINVAL;
515 if (f->res.classid > 255)
516 goto errout;
518 err = -ENOMEM;
519 if (f->res.classid == 0 &&
520 (f->res.classid = gen_tunnel(data)) == 0)
521 goto errout;
524 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
525 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
526 pinfo->protocol == s->protocol &&
527 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
528 #if RSVP_DST_LEN == 4
529 && dst[0] == s->dst[0]
530 && dst[1] == s->dst[1]
531 && dst[2] == s->dst[2]
532 #endif
533 && pinfo->tunnelid == s->tunnelid) {
535 insert:
536 /* OK, we found appropriate session */
538 fp = &s->ht[h2];
540 f->sess = s;
541 if (f->tunnelhdr == 0)
542 cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
543 #ifdef CONFIG_NET_CLS_POLICE
544 if (tb[TCA_RSVP_POLICE-1])
545 f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
546 #endif
548 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
549 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
550 break;
551 f->next = *fp;
552 wmb();
553 *fp = f;
555 *arg = (unsigned long)f;
556 return 0;
560 /* No session found. Create new one. */
562 err = -ENOBUFS;
563 s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
564 if (s == NULL)
565 goto errout;
566 memset(s, 0, sizeof(*s));
567 memcpy(s->dst, dst, sizeof(s->dst));
568 s->dpi = pinfo->dpi;
569 s->protocol = pinfo->protocol;
570 s->tunnelid = pinfo->tunnelid;
571 for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
572 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
573 break;
575 s->next = *sp;
576 wmb();
577 *sp = s;
579 goto insert;
581 errout:
582 if (f)
583 kfree(f);
584 return err;
587 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
589 struct rsvp_head *head = tp->root;
590 unsigned h, h1;
592 if (arg->stop)
593 return;
595 for (h = 0; h < 256; h++) {
596 struct rsvp_session *s;
598 for (s = head->ht[h]; s; s = s->next) {
599 for (h1 = 0; h1 <= 16; h1++) {
600 struct rsvp_filter *f;
602 for (f = s->ht[h1]; f; f = f->next) {
603 if (arg->count < arg->skip) {
604 arg->count++;
605 continue;
607 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
608 arg->stop = 1;
609 break;
611 arg->count++;
618 #ifdef CONFIG_RTNETLINK
619 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
620 struct sk_buff *skb, struct tcmsg *t)
622 struct rsvp_filter *f = (struct rsvp_filter*)fh;
623 struct rsvp_session *s;
624 unsigned char *b = skb->tail;
625 struct rtattr *rta;
626 struct tc_rsvp_pinfo pinfo;
628 if (f == NULL)
629 return skb->len;
630 s = f->sess;
632 t->tcm_handle = f->handle;
635 rta = (struct rtattr*)b;
636 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
638 RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
639 pinfo.dpi = s->dpi;
640 pinfo.spi = f->spi;
641 pinfo.protocol = s->protocol;
642 pinfo.tunnelid = s->tunnelid;
643 pinfo.tunnelhdr = f->tunnelhdr;
644 RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
645 if (f->res.classid)
646 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
647 if (((f->handle>>8)&0xFF) != 16)
648 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
649 #ifdef CONFIG_NET_CLS_POLICE
650 if (f->police) {
651 struct rtattr * p_rta = (struct rtattr*)skb->tail;
653 RTA_PUT(skb, TCA_RSVP_POLICE, 0, NULL);
655 if (tcf_police_dump(skb, f->police) < 0)
656 goto rtattr_failure;
658 p_rta->rta_len = skb->tail - (u8*)p_rta;
660 #endif
662 rta->rta_len = skb->tail - b;
663 #ifdef CONFIG_NET_CLS_POLICE
664 if (f->police) {
665 if (qdisc_copy_stats(skb, &f->police->stats))
666 goto rtattr_failure;
668 #endif
669 return skb->len;
671 rtattr_failure:
672 skb_trim(skb, b - skb->data);
673 return -1;
675 #endif
677 struct tcf_proto_ops RSVP_OPS = {
678 NULL,
679 RSVP_ID,
680 rsvp_classify,
681 rsvp_init,
682 rsvp_destroy,
684 rsvp_get,
685 rsvp_put,
686 rsvp_change,
687 rsvp_delete,
688 rsvp_walk,
689 #ifdef CONFIG_RTNETLINK
690 rsvp_dump
691 #else
692 NULL
693 #endif
696 #ifdef MODULE
697 int init_module(void)
699 return register_tcf_proto_ops(&RSVP_OPS);
702 void cleanup_module(void)
704 unregister_tcf_proto_ops(&RSVP_OPS);
706 #endif