initial commit with v2.6.9
[linux-2.6.9-moxart.git] / net / sched / cls_rsvp.h
blobf70fad175a6f1b52511c84ee278f068133f821de
1 /*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
22 IMPLEMENTATION.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
68 #include <linux/config.h>
70 struct rsvp_head
72 u32 tmap[256/32];
73 u32 hgenerator;
74 u8 tgenerator;
75 struct rsvp_session *ht[256];
78 struct rsvp_session
80 struct rsvp_session *next;
81 u32 dst[RSVP_DST_LEN];
82 struct tc_rsvp_gpi dpi;
83 u8 protocol;
84 u8 tunnelid;
85 /* 16 (src,sport) hash slots, and one wildcard source slot */
86 struct rsvp_filter *ht[16+1];
90 struct rsvp_filter
92 struct rsvp_filter *next;
93 u32 src[RSVP_DST_LEN];
94 struct tc_rsvp_gpi spi;
95 u8 tunnelhdr;
97 struct tcf_result res;
98 #ifdef CONFIG_NET_CLS_POLICE
99 struct tcf_police *police;
100 #endif
102 u32 handle;
103 struct rsvp_session *sess;
106 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
108 unsigned h = dst[RSVP_DST_LEN-1];
109 h ^= h>>16;
110 h ^= h>>8;
111 return (h ^ protocol ^ tunnelid) & 0xFF;
114 static __inline__ unsigned hash_src(u32 *src)
116 unsigned h = src[RSVP_DST_LEN-1];
117 h ^= h>>16;
118 h ^= h>>8;
119 h ^= h>>4;
120 return h & 0xF;
123 #ifdef CONFIG_NET_CLS_POLICE
124 #define RSVP_POLICE() \
125 if (f->police) { \
126 int pol_res = tcf_police(skb, f->police); \
127 if (pol_res < 0) continue; \
128 if (pol_res) return pol_res; \
130 #else
131 #define RSVP_POLICE()
132 #endif
135 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
136 struct tcf_result *res)
138 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
139 struct rsvp_session *s;
140 struct rsvp_filter *f;
141 unsigned h1, h2;
142 u32 *dst, *src;
143 u8 protocol;
144 u8 tunnelid = 0;
145 u8 *xprt;
146 #if RSVP_DST_LEN == 4
147 struct ipv6hdr *nhptr = skb->nh.ipv6h;
148 #else
149 struct iphdr *nhptr = skb->nh.iph;
150 #endif
152 restart:
154 #if RSVP_DST_LEN == 4
155 src = &nhptr->saddr.s6_addr32[0];
156 dst = &nhptr->daddr.s6_addr32[0];
157 protocol = nhptr->nexthdr;
158 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
159 #else
160 src = &nhptr->saddr;
161 dst = &nhptr->daddr;
162 protocol = nhptr->protocol;
163 xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
164 if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
165 return -1;
166 #endif
168 h1 = hash_dst(dst, protocol, tunnelid);
169 h2 = hash_src(src);
171 for (s = sht[h1]; s; s = s->next) {
172 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
173 protocol == s->protocol &&
174 !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
175 #if RSVP_DST_LEN == 4
176 && dst[0] == s->dst[0]
177 && dst[1] == s->dst[1]
178 && dst[2] == s->dst[2]
179 #endif
180 && tunnelid == s->tunnelid) {
182 for (f = s->ht[h2]; f; f = f->next) {
183 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
184 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
185 #if RSVP_DST_LEN == 4
186 && src[0] == f->src[0]
187 && src[1] == f->src[1]
188 && src[2] == f->src[2]
189 #endif
191 *res = f->res;
193 RSVP_POLICE();
195 matched:
196 if (f->tunnelhdr == 0)
197 return 0;
199 tunnelid = f->res.classid;
200 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
201 goto restart;
205 /* And wildcard bucket... */
206 for (f = s->ht[16]; f; f = f->next) {
207 *res = f->res;
208 RSVP_POLICE();
209 goto matched;
211 return -1;
214 return -1;
217 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
219 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
220 struct rsvp_session *s;
221 struct rsvp_filter *f;
222 unsigned h1 = handle&0xFF;
223 unsigned h2 = (handle>>8)&0xFF;
225 if (h2 > 16)
226 return 0;
228 for (s = sht[h1]; s; s = s->next) {
229 for (f = s->ht[h2]; f; f = f->next) {
230 if (f->handle == handle)
231 return (unsigned long)f;
234 return 0;
237 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
241 static int rsvp_init(struct tcf_proto *tp)
243 struct rsvp_head *data;
245 data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
246 if (data) {
247 memset(data, 0, sizeof(struct rsvp_head));
248 tp->root = data;
249 return 0;
251 return -ENOBUFS;
254 static void rsvp_destroy(struct tcf_proto *tp)
256 struct rsvp_head *data = xchg(&tp->root, NULL);
257 struct rsvp_session **sht;
258 int h1, h2;
260 if (data == NULL)
261 return;
263 sht = data->ht;
265 for (h1=0; h1<256; h1++) {
266 struct rsvp_session *s;
268 while ((s = sht[h1]) != NULL) {
269 sht[h1] = s->next;
271 for (h2=0; h2<=16; h2++) {
272 struct rsvp_filter *f;
274 while ((f = s->ht[h2]) != NULL) {
275 unsigned long cl;
277 s->ht[h2] = f->next;
278 if ((cl = __cls_set_class(&f->res.class, 0)) != 0)
279 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
280 #ifdef CONFIG_NET_CLS_POLICE
281 tcf_police_release(f->police,TCA_ACT_UNBIND);
282 #endif
283 kfree(f);
286 kfree(s);
289 kfree(data);
292 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
294 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
295 unsigned h = f->handle;
296 struct rsvp_session **sp;
297 struct rsvp_session *s = f->sess;
298 int i;
300 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
301 if (*fp == f) {
302 unsigned long cl;
305 tcf_tree_lock(tp);
306 *fp = f->next;
307 tcf_tree_unlock(tp);
309 if ((cl = cls_set_class(tp, &f->res.class, 0)) != 0)
310 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
312 #ifdef CONFIG_NET_CLS_POLICE
313 tcf_police_release(f->police,TCA_ACT_UNBIND);
314 #endif
316 kfree(f);
318 /* Strip tree */
320 for (i=0; i<=16; i++)
321 if (s->ht[i])
322 return 0;
324 /* OK, session has no flows */
325 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
326 *sp; sp = &(*sp)->next) {
327 if (*sp == s) {
328 tcf_tree_lock(tp);
329 *sp = s->next;
330 tcf_tree_unlock(tp);
332 kfree(s);
333 return 0;
337 return 0;
340 return 0;
343 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
345 struct rsvp_head *data = tp->root;
346 int i = 0xFFFF;
348 while (i-- > 0) {
349 u32 h;
350 if ((data->hgenerator += 0x10000) == 0)
351 data->hgenerator = 0x10000;
352 h = data->hgenerator|salt;
353 if (rsvp_get(tp, h) == 0)
354 return h;
356 return 0;
359 static int tunnel_bts(struct rsvp_head *data)
361 int n = data->tgenerator>>5;
362 u32 b = 1<<(data->tgenerator&0x1F);
364 if (data->tmap[n]&b)
365 return 0;
366 data->tmap[n] |= b;
367 return 1;
370 static void tunnel_recycle(struct rsvp_head *data)
372 struct rsvp_session **sht = data->ht;
373 u32 tmap[256/32];
374 int h1, h2;
376 memset(tmap, 0, sizeof(tmap));
378 for (h1=0; h1<256; h1++) {
379 struct rsvp_session *s;
380 for (s = sht[h1]; s; s = s->next) {
381 for (h2=0; h2<=16; h2++) {
382 struct rsvp_filter *f;
384 for (f = s->ht[h2]; f; f = f->next) {
385 if (f->tunnelhdr == 0)
386 continue;
387 data->tgenerator = f->res.classid;
388 tunnel_bts(data);
394 memcpy(data->tmap, tmap, sizeof(tmap));
397 static u32 gen_tunnel(struct rsvp_head *data)
399 int i, k;
401 for (k=0; k<2; k++) {
402 for (i=255; i>0; i--) {
403 if (++data->tgenerator == 0)
404 data->tgenerator = 1;
405 if (tunnel_bts(data))
406 return data->tgenerator;
408 tunnel_recycle(data);
410 return 0;
413 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
414 u32 handle,
415 struct rtattr **tca,
416 unsigned long *arg)
418 struct rsvp_head *data = tp->root;
419 struct rsvp_filter *f, **fp;
420 struct rsvp_session *s, **sp;
421 struct tc_rsvp_pinfo *pinfo = NULL;
422 struct rtattr *opt = tca[TCA_OPTIONS-1];
423 struct rtattr *tb[TCA_RSVP_MAX];
424 unsigned h1, h2;
425 u32 *dst;
426 int err;
428 if (opt == NULL)
429 return handle ? -EINVAL : 0;
431 if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
432 return -EINVAL;
434 if ((f = (struct rsvp_filter*)*arg) != NULL) {
435 /* Node exists: adjust only classid */
437 if (f->handle != handle && handle)
438 return -EINVAL;
439 if (tb[TCA_RSVP_CLASSID-1]) {
440 unsigned long cl;
442 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
443 cl = cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
444 if (cl)
445 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
447 #ifdef CONFIG_NET_CLS_POLICE
448 if (tb[TCA_RSVP_POLICE-1]) {
449 struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
451 tcf_tree_lock(tp);
452 police = xchg(&f->police, police);
453 tcf_tree_unlock(tp);
455 tcf_police_release(police,TCA_ACT_UNBIND);
457 #endif
458 return 0;
461 /* Now more serious part... */
462 if (handle)
463 return -EINVAL;
464 if (tb[TCA_RSVP_DST-1] == NULL)
465 return -EINVAL;
467 f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
468 if (f == NULL)
469 return -ENOBUFS;
471 memset(f, 0, sizeof(*f));
472 h2 = 16;
473 if (tb[TCA_RSVP_SRC-1]) {
474 err = -EINVAL;
475 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
476 goto errout;
477 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
478 h2 = hash_src(f->src);
480 if (tb[TCA_RSVP_PINFO-1]) {
481 err = -EINVAL;
482 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
483 goto errout;
484 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
485 f->spi = pinfo->spi;
486 f->tunnelhdr = pinfo->tunnelhdr;
488 if (tb[TCA_RSVP_CLASSID-1]) {
489 err = -EINVAL;
490 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
491 goto errout;
492 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
495 err = -EINVAL;
496 if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
497 goto errout;
498 dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
499 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
501 err = -ENOMEM;
502 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
503 goto errout;
505 if (f->tunnelhdr) {
506 err = -EINVAL;
507 if (f->res.classid > 255)
508 goto errout;
510 err = -ENOMEM;
511 if (f->res.classid == 0 &&
512 (f->res.classid = gen_tunnel(data)) == 0)
513 goto errout;
516 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
517 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
518 pinfo && pinfo->protocol == s->protocol &&
519 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
520 #if RSVP_DST_LEN == 4
521 && dst[0] == s->dst[0]
522 && dst[1] == s->dst[1]
523 && dst[2] == s->dst[2]
524 #endif
525 && pinfo->tunnelid == s->tunnelid) {
527 insert:
528 /* OK, we found appropriate session */
530 fp = &s->ht[h2];
532 f->sess = s;
533 if (f->tunnelhdr == 0)
534 cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
535 #ifdef CONFIG_NET_CLS_POLICE
536 if (tb[TCA_RSVP_POLICE-1])
537 f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
538 #endif
540 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
541 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
542 break;
543 f->next = *fp;
544 wmb();
545 *fp = f;
547 *arg = (unsigned long)f;
548 return 0;
552 /* No session found. Create new one. */
554 err = -ENOBUFS;
555 s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
556 if (s == NULL)
557 goto errout;
558 memset(s, 0, sizeof(*s));
559 memcpy(s->dst, dst, sizeof(s->dst));
561 if (pinfo) {
562 s->dpi = pinfo->dpi;
563 s->protocol = pinfo->protocol;
564 s->tunnelid = pinfo->tunnelid;
566 for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
567 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
568 break;
570 s->next = *sp;
571 wmb();
572 *sp = s;
574 goto insert;
576 errout:
577 if (f)
578 kfree(f);
579 return err;
582 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
584 struct rsvp_head *head = tp->root;
585 unsigned h, h1;
587 if (arg->stop)
588 return;
590 for (h = 0; h < 256; h++) {
591 struct rsvp_session *s;
593 for (s = head->ht[h]; s; s = s->next) {
594 for (h1 = 0; h1 <= 16; h1++) {
595 struct rsvp_filter *f;
597 for (f = s->ht[h1]; f; f = f->next) {
598 if (arg->count < arg->skip) {
599 arg->count++;
600 continue;
602 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
603 arg->stop = 1;
604 break;
606 arg->count++;
613 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
614 struct sk_buff *skb, struct tcmsg *t)
616 struct rsvp_filter *f = (struct rsvp_filter*)fh;
617 struct rsvp_session *s;
618 unsigned char *b = skb->tail;
619 struct rtattr *rta;
620 struct tc_rsvp_pinfo pinfo;
622 if (f == NULL)
623 return skb->len;
624 s = f->sess;
626 t->tcm_handle = f->handle;
629 rta = (struct rtattr*)b;
630 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
632 RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
633 pinfo.dpi = s->dpi;
634 pinfo.spi = f->spi;
635 pinfo.protocol = s->protocol;
636 pinfo.tunnelid = s->tunnelid;
637 pinfo.tunnelhdr = f->tunnelhdr;
638 RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
639 if (f->res.classid)
640 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
641 if (((f->handle>>8)&0xFF) != 16)
642 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
643 #ifdef CONFIG_NET_CLS_POLICE
644 if (f->police) {
645 struct rtattr * p_rta = (struct rtattr*)skb->tail;
647 RTA_PUT(skb, TCA_RSVP_POLICE, 0, NULL);
649 if (tcf_police_dump(skb, f->police) < 0)
650 goto rtattr_failure;
652 p_rta->rta_len = skb->tail - (u8*)p_rta;
654 #endif
656 rta->rta_len = skb->tail - b;
657 #ifdef CONFIG_NET_CLS_POLICE
658 if (f->police) {
659 if (qdisc_copy_stats(skb, &f->police->stats,
660 f->police->stats_lock))
661 goto rtattr_failure;
663 #endif
664 return skb->len;
666 rtattr_failure:
667 skb_trim(skb, b - skb->data);
668 return -1;
671 static struct tcf_proto_ops RSVP_OPS = {
672 .next = NULL,
673 .kind = RSVP_ID,
674 .classify = rsvp_classify,
675 .init = rsvp_init,
676 .destroy = rsvp_destroy,
677 .get = rsvp_get,
678 .put = rsvp_put,
679 .change = rsvp_change,
680 .delete = rsvp_delete,
681 .walk = rsvp_walk,
682 .dump = rsvp_dump,
683 .owner = THIS_MODULE,
686 static int __init init_rsvp(void)
688 return register_tcf_proto_ops(&RSVP_OPS);
691 static void __exit exit_rsvp(void)
693 unregister_tcf_proto_ops(&RSVP_OPS);
696 module_init(init_rsvp)
697 module_exit(exit_rsvp)