Import 2.2.5pre2
[davej-history.git] / net / sched / cls_rsvp.h
blobb53fbf0d6f8c22e53d61c28c831ab8ec2464b7ab
1 /*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
22 IMPLEMENTATION.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
68 #include <linux/config.h>
70 struct rsvp_head
72 u32 tmap[256/32];
73 u32 hgenerator;
74 u8 tgenerator;
75 struct rsvp_session *ht[256];
78 struct rsvp_session
80 struct rsvp_session *next;
81 u32 dst[RSVP_DST_LEN];
82 struct tc_rsvp_gpi dpi;
83 u8 protocol;
84 u8 tunnelid;
85 /* 16 (src,sport) hash slots, and one wildcard source slot */
86 struct rsvp_filter *ht[16+1];
90 struct rsvp_filter
92 struct rsvp_filter *next;
93 u32 src[RSVP_DST_LEN];
94 struct tc_rsvp_gpi spi;
95 u8 tunnelhdr;
97 struct tcf_result res;
98 #ifdef CONFIG_NET_CLS_POLICE
99 struct tcf_police *police;
100 #endif
102 u32 handle;
103 struct rsvp_session *sess;
106 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
108 unsigned h = dst[RSVP_DST_LEN-1];
109 h ^= h>>16;
110 h ^= h>>8;
111 return (h ^ protocol ^ tunnelid) & 0xFF;
114 static __inline__ unsigned hash_src(u32 *src)
116 unsigned h = src[RSVP_DST_LEN-1];
117 h ^= h>>16;
118 h ^= h>>8;
119 h ^= h>>4;
120 return h & 0xF;
123 #ifdef CONFIG_NET_CLS_POLICE
124 #define RSVP_POLICE() \
125 if (f->police) { \
126 int pol_res = tcf_police(skb, f->police); \
127 if (pol_res < 0) continue; \
128 if (pol_res) return pol_res; \
130 #else
131 #define RSVP_POLICE()
132 #endif
135 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
136 struct tcf_result *res)
138 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
139 struct rsvp_session *s;
140 struct rsvp_filter *f;
141 unsigned h1, h2;
142 u32 *dst, *src;
143 u8 protocol;
144 u8 tunnelid = 0;
145 u8 *xprt;
146 #if RSVP_DST_LEN == 4
147 struct ipv6hdr *nhptr = skb->nh.ipv6h;
148 #else
149 struct iphdr *nhptr = skb->nh.iph;
150 #endif
152 #if !defined( __i386__) && !defined(__mc68000__)
153 if ((unsigned long)nhptr & 3)
154 return -1;
155 #endif
157 restart:
159 #if RSVP_DST_LEN == 4
160 src = &nhptr->saddr.s6_addr32[0];
161 dst = &nhptr->daddr.s6_addr32[0];
162 protocol = nhptr->nexthdr;
163 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
164 #else
165 src = &nhptr->saddr;
166 dst = &nhptr->daddr;
167 protocol = nhptr->protocol;
168 xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
169 if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
170 return -1;
171 #endif
173 h1 = hash_dst(dst, protocol, tunnelid);
174 h2 = hash_src(src);
176 for (s = sht[h1]; s; s = s->next) {
177 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
178 protocol == s->protocol &&
179 !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
180 #if RSVP_DST_LEN == 4
181 && dst[0] == s->dst[0]
182 && dst[1] == s->dst[1]
183 && dst[2] == s->dst[2]
184 #endif
185 && tunnelid == s->tunnelid) {
187 for (f = s->ht[h2]; f; f = f->next) {
188 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
189 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
190 #if RSVP_DST_LEN == 4
191 && src[0] == f->src[0]
192 && src[1] == f->src[1]
193 && src[2] == f->src[2]
194 #endif
197 RSVP_POLICE();
199 matched:
200 if (f->tunnelhdr == 0) {
201 *res = f->res;
202 return 0;
203 } else {
204 tunnelid = f->res.classid;
205 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
206 goto restart;
211 /* And wildcard bucket... */
212 for (f = s->ht[16]; f; f = f->next) {
213 RSVP_POLICE();
214 goto matched;
216 return -1;
219 return -1;
222 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
224 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
225 struct rsvp_session *s;
226 struct rsvp_filter *f;
227 unsigned h1 = handle&0xFF;
228 unsigned h2 = (handle>>8)&0xFF;
230 if (h2 > 16)
231 return 0;
233 for (s = sht[h1]; s; s = s->next) {
234 for (f = s->ht[h2]; f; f = f->next) {
235 if (f->handle == handle)
236 return (unsigned long)f;
239 return 0;
242 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
246 static int rsvp_init(struct tcf_proto *tp)
248 struct rsvp_head *data;
250 MOD_INC_USE_COUNT;
251 data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
252 if (data) {
253 memset(data, 0, sizeof(struct rsvp_head));
254 tp->root = data;
255 return 0;
257 MOD_DEC_USE_COUNT;
258 return -ENOBUFS;
261 static void rsvp_destroy(struct tcf_proto *tp)
263 struct rsvp_head *data = xchg(&tp->root, NULL);
264 struct rsvp_session **sht;
265 int h1, h2;
267 if (data == NULL)
268 return;
270 sht = data->ht;
272 for (h1=0; h1<256; h1++) {
273 struct rsvp_session *s;
275 while ((s = sht[h1]) != NULL) {
276 sht[h1] = s->next;
278 for (h2=0; h2<=16; h2++) {
279 struct rsvp_filter *f;
281 while ((f = s->ht[h2]) != NULL) {
282 unsigned long cl;
284 s->ht[h2] = f->next;
285 if ((cl = cls_set_class(&f->res.class, 0)) != 0)
286 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
287 #ifdef CONFIG_NET_CLS_POLICE
288 tcf_police_release(f->police);
289 #endif
290 kfree(f);
293 kfree(s);
296 kfree(data);
297 MOD_DEC_USE_COUNT;
300 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
302 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
303 unsigned h = f->handle;
304 struct rsvp_session **sp;
305 struct rsvp_session *s = f->sess;
306 int i;
308 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
309 if (*fp == f) {
310 unsigned long cl;
313 *fp = f->next;
314 synchronize_bh();
316 if ((cl = cls_set_class(&f->res.class, 0)) != 0)
317 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
319 #ifdef CONFIG_NET_CLS_POLICE
320 tcf_police_release(f->police);
321 #endif
323 kfree(f);
325 /* Strip tree */
327 for (i=0; i<=16; i++)
328 if (s->ht[i])
329 return 0;
331 /* OK, session has no flows */
332 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
333 *sp; sp = &(*sp)->next) {
334 if (*sp == s) {
335 *sp = s->next;
336 synchronize_bh();
338 kfree(s);
339 return 0;
343 return 0;
346 return 0;
349 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
351 struct rsvp_head *data = tp->root;
352 int i = 0xFFFF;
354 while (i-- > 0) {
355 u32 h;
356 if ((data->hgenerator += 0x10000) == 0)
357 data->hgenerator = 0x10000;
358 h = data->hgenerator|salt;
359 if (rsvp_get(tp, h) == 0)
360 return h;
362 return 0;
365 static int tunnel_bts(struct rsvp_head *data)
367 int n = data->tgenerator>>5;
368 u32 b = 1<<(data->tgenerator&0x1F);
370 if (data->tmap[n]&b)
371 return 0;
372 data->tmap[n] |= b;
373 return 1;
376 static void tunnel_recycle(struct rsvp_head *data)
378 struct rsvp_session **sht = data->ht;
379 u32 tmap[256/32];
380 int h1, h2;
382 memset(tmap, 0, sizeof(tmap));
384 for (h1=0; h1<256; h1++) {
385 struct rsvp_session *s;
386 for (s = sht[h1]; s; s = s->next) {
387 for (h2=0; h2<=16; h2++) {
388 struct rsvp_filter *f;
390 for (f = s->ht[h2]; f; f = f->next) {
391 if (f->tunnelhdr == 0)
392 continue;
393 data->tgenerator = f->res.classid;
394 tunnel_bts(data);
400 memcpy(data->tmap, tmap, sizeof(tmap));
403 static u32 gen_tunnel(struct rsvp_head *data)
405 int i, k;
407 for (k=0; k<2; k++) {
408 for (i=255; i>0; i--) {
409 if (++data->tgenerator == 0)
410 data->tgenerator = 1;
411 if (tunnel_bts(data))
412 return data->tgenerator;
414 tunnel_recycle(data);
416 return 0;
419 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
420 u32 handle,
421 struct rtattr **tca,
422 unsigned long *arg)
424 struct rsvp_head *data = tp->root;
425 struct rsvp_filter *f, **fp;
426 struct rsvp_session *s, **sp;
427 struct tc_rsvp_pinfo *pinfo = NULL;
428 struct rtattr *opt = tca[TCA_OPTIONS-1];
429 struct rtattr *tb[TCA_RSVP_MAX];
430 unsigned h1, h2;
431 u32 *dst;
432 int err;
434 if (opt == NULL)
435 return handle ? -EINVAL : 0;
437 if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
438 return -EINVAL;
440 if ((f = (struct rsvp_filter*)*arg) != NULL) {
441 /* Node exists: adjust only classid */
443 if (f->handle != handle && handle)
444 return -EINVAL;
445 if (tb[TCA_RSVP_CLASSID-1]) {
446 unsigned long cl;
448 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
449 cl = cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
450 if (cl)
451 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
453 #ifdef CONFIG_NET_CLS_POLICE
454 if (tb[TCA_RSVP_POLICE-1]) {
455 struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
457 police = xchg(&f->police, police);
458 synchronize_bh();
460 tcf_police_release(police);
462 #endif
463 return 0;
466 /* Now more serious part... */
467 if (handle)
468 return -EINVAL;
469 if (tb[TCA_RSVP_DST-1] == NULL)
470 return -EINVAL;
472 f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
473 if (f == NULL)
474 return -ENOBUFS;
476 memset(f, 0, sizeof(*f));
477 h2 = 16;
478 if (tb[TCA_RSVP_SRC-1]) {
479 err = -EINVAL;
480 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
481 goto errout;
482 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
483 h2 = hash_src(f->src);
485 if (tb[TCA_RSVP_PINFO-1]) {
486 err = -EINVAL;
487 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
488 goto errout;
489 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
490 f->spi = pinfo->spi;
491 f->tunnelhdr = pinfo->tunnelhdr;
493 if (tb[TCA_RSVP_CLASSID-1]) {
494 err = -EINVAL;
495 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
496 goto errout;
497 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
500 err = -EINVAL;
501 if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
502 goto errout;
503 dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
504 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
506 err = -ENOMEM;
507 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
508 goto errout;
510 if (f->tunnelhdr) {
511 err = -EINVAL;
512 if (f->res.classid > 255)
513 goto errout;
515 err = -ENOMEM;
516 if (f->res.classid == 0 &&
517 (f->res.classid = gen_tunnel(data)) == 0)
518 goto errout;
521 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
522 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
523 pinfo->protocol == s->protocol &&
524 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
525 #if RSVP_DST_LEN == 4
526 && dst[0] == s->dst[0]
527 && dst[1] == s->dst[1]
528 && dst[2] == s->dst[2]
529 #endif
530 && pinfo->tunnelid == s->tunnelid) {
532 insert:
533 /* OK, we found appropriate session */
535 fp = &s->ht[h2];
537 f->sess = s;
538 if (f->tunnelhdr == 0)
539 cls_set_class(&f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
540 #ifdef CONFIG_NET_CLS_POLICE
541 if (tb[TCA_RSVP_POLICE-1])
542 f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
543 #endif
545 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
546 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
547 break;
548 f->next = *fp;
549 wmb();
550 *fp = f;
552 *arg = (unsigned long)f;
553 return 0;
557 /* No session found. Create new one. */
559 err = -ENOBUFS;
560 s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
561 if (s == NULL)
562 goto errout;
563 memset(s, 0, sizeof(*s));
564 memcpy(s->dst, dst, sizeof(s->dst));
565 s->dpi = pinfo->dpi;
566 s->protocol = pinfo->protocol;
567 s->tunnelid = pinfo->tunnelid;
568 for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
569 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
570 break;
572 s->next = *sp;
573 wmb();
574 *sp = s;
576 goto insert;
578 errout:
579 if (f)
580 kfree(f);
581 return err;
584 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
586 struct rsvp_head *head = tp->root;
587 unsigned h, h1;
589 if (arg->stop)
590 return;
592 for (h = 0; h < 256; h++) {
593 struct rsvp_session *s;
595 for (s = head->ht[h]; s; s = s->next) {
596 for (h1 = 0; h1 <= 16; h1++) {
597 struct rsvp_filter *f;
599 for (f = s->ht[h1]; f; f = f->next) {
600 if (arg->count < arg->skip) {
601 arg->count++;
602 continue;
604 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
605 arg->stop = 1;
606 break;
608 arg->count++;
615 #ifdef CONFIG_RTNETLINK
616 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
617 struct sk_buff *skb, struct tcmsg *t)
619 struct rsvp_filter *f = (struct rsvp_filter*)fh;
620 struct rsvp_session *s;
621 unsigned char *b = skb->tail;
622 struct rtattr *rta;
623 struct tc_rsvp_pinfo pinfo;
625 if (f == NULL)
626 return skb->len;
627 s = f->sess;
629 t->tcm_handle = f->handle;
632 rta = (struct rtattr*)b;
633 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
635 RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
636 pinfo.dpi = s->dpi;
637 pinfo.spi = f->spi;
638 pinfo.protocol = s->protocol;
639 pinfo.tunnelid = s->tunnelid;
640 pinfo.tunnelhdr = f->tunnelhdr;
641 RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
642 if (f->res.classid)
643 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
644 if (((f->handle>>8)&0xFF) != 16)
645 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
646 #ifdef CONFIG_NET_CLS_POLICE
647 if (f->police) {
648 struct rtattr * p_rta = (struct rtattr*)skb->tail;
650 RTA_PUT(skb, TCA_RSVP_POLICE, 0, NULL);
652 if (tcf_police_dump(skb, f->police) < 0)
653 goto rtattr_failure;
655 p_rta->rta_len = skb->tail - (u8*)p_rta;
657 #endif
659 rta->rta_len = skb->tail - b;
660 #ifdef CONFIG_NET_CLS_POLICE
661 if (f->police) {
662 RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), &f->police->stats);
664 #endif
665 return skb->len;
667 rtattr_failure:
668 skb_trim(skb, b - skb->data);
669 return -1;
671 #endif
673 struct tcf_proto_ops RSVP_OPS = {
674 NULL,
675 RSVP_ID,
676 rsvp_classify,
677 rsvp_init,
678 rsvp_destroy,
680 rsvp_get,
681 rsvp_put,
682 rsvp_change,
683 rsvp_delete,
684 rsvp_walk,
685 #ifdef CONFIG_RTNETLINK
686 rsvp_dump
687 #else
688 NULL
689 #endif
692 #ifdef MODULE
693 int init_module(void)
695 return register_tcf_proto_ops(&RSVP_OPS);
698 void cleanup_module(void)
700 unregister_tcf_proto_ops(&RSVP_OPS);
702 #endif