2 * net/sched/police.c Input police filter.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * J Hadi Salim (action changes)
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
34 #include <net/act_api.h>
36 #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
40 /* use generic hash table */
41 #define MY_TAB_SIZE 16
42 #define MY_TAB_MASK 15
44 static struct tcf_police
*tcf_police_ht
[MY_TAB_SIZE
];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock
);
48 /* Each policer is serialized by its individual spinlock */
50 static __inline__
unsigned tcf_police_hash(u32 index
)
55 static __inline__
struct tcf_police
* tcf_police_lookup(u32 index
)
59 read_lock(&police_lock
);
60 for (p
= tcf_police_ht
[tcf_police_hash(index
)]; p
; p
= p
->next
) {
61 if (p
->index
== index
)
64 read_unlock(&police_lock
);
68 #ifdef CONFIG_NET_CLS_ACT
69 static int tcf_generic_walker(struct sk_buff
*skb
, struct netlink_callback
*cb
,
70 int type
, struct tc_action
*a
)
73 int err
= 0, index
= -1, i
= 0, s_i
= 0, n_i
= 0;
76 read_lock(&police_lock
);
80 for (i
= 0; i
< MY_TAB_SIZE
; i
++) {
81 p
= tcf_police_ht
[tcf_police_hash(i
)];
83 for (; p
; p
= p
->next
) {
89 r
= (struct rtattr
*) skb
->tail
;
90 RTA_PUT(skb
, a
->order
, 0, NULL
);
91 if (type
== RTM_DELACTION
)
92 err
= tcf_action_dump_1(skb
, a
, 0, 1);
94 err
= tcf_action_dump_1(skb
, a
, 0, 0);
97 skb_trim(skb
, (u8
*)r
- skb
->data
);
100 r
->rta_len
= skb
->tail
- (u8
*)r
;
105 read_unlock(&police_lock
);
111 skb_trim(skb
, (u8
*)r
- skb
->data
);
116 tcf_hash_search(struct tc_action
*a
, u32 index
)
118 struct tcf_police
*p
= tcf_police_lookup(index
);
129 static inline u32
tcf_police_new_index(void)
134 } while (tcf_police_lookup(idx_gen
));
139 void tcf_police_destroy(struct tcf_police
*p
)
141 unsigned h
= tcf_police_hash(p
->index
);
142 struct tcf_police
**p1p
;
144 for (p1p
= &tcf_police_ht
[h
]; *p1p
; p1p
= &(*p1p
)->next
) {
146 write_lock_bh(&police_lock
);
148 write_unlock_bh(&police_lock
);
149 #ifdef CONFIG_NET_ESTIMATOR
150 gen_kill_estimator(&p
->bstats
, &p
->rate_est
);
153 qdisc_put_rtab(p
->R_tab
);
155 qdisc_put_rtab(p
->P_tab
);
163 #ifdef CONFIG_NET_CLS_ACT
164 static int tcf_act_police_locate(struct rtattr
*rta
, struct rtattr
*est
,
165 struct tc_action
*a
, int ovr
, int bind
)
169 struct rtattr
*tb
[TCA_POLICE_MAX
];
170 struct tc_police
*parm
;
171 struct tcf_police
*p
;
172 struct qdisc_rate_table
*R_tab
= NULL
, *P_tab
= NULL
;
174 if (rta
== NULL
|| rtattr_parse_nested(tb
, TCA_POLICE_MAX
, rta
) < 0)
177 if (tb
[TCA_POLICE_TBF
-1] == NULL
||
178 RTA_PAYLOAD(tb
[TCA_POLICE_TBF
-1]) != sizeof(*parm
))
180 parm
= RTA_DATA(tb
[TCA_POLICE_TBF
-1]);
182 if (tb
[TCA_POLICE_RESULT
-1] != NULL
&&
183 RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
185 if (tb
[TCA_POLICE_RESULT
-1] != NULL
&&
186 RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
189 if (parm
->index
&& (p
= tcf_police_lookup(parm
->index
)) != NULL
) {
200 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
203 memset(p
, 0, sizeof(*p
));
207 spin_lock_init(&p
->lock
);
208 p
->stats_lock
= &p
->lock
;
212 if (parm
->rate
.rate
) {
214 R_tab
= qdisc_get_rtab(&parm
->rate
, tb
[TCA_POLICE_RATE
-1]);
217 if (parm
->peakrate
.rate
) {
218 P_tab
= qdisc_get_rtab(&parm
->peakrate
,
219 tb
[TCA_POLICE_PEAKRATE
-1]);
220 if (p
->P_tab
== NULL
) {
221 qdisc_put_rtab(R_tab
);
226 /* No failure allowed after this point */
227 spin_lock_bh(&p
->lock
);
229 qdisc_put_rtab(p
->R_tab
);
233 qdisc_put_rtab(p
->P_tab
);
237 if (tb
[TCA_POLICE_RESULT
-1])
238 p
->result
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_RESULT
-1]);
239 p
->toks
= p
->burst
= parm
->burst
;
244 p
->mtu
= 255<<p
->R_tab
->rate
.cell_log
;
247 p
->ptoks
= L2T_P(p
, p
->mtu
);
248 p
->action
= parm
->action
;
250 #ifdef CONFIG_NET_ESTIMATOR
251 if (tb
[TCA_POLICE_AVRATE
-1])
252 p
->ewma_rate
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_AVRATE
-1]);
254 gen_replace_estimator(&p
->bstats
, &p
->rate_est
, p
->stats_lock
, est
);
257 spin_unlock_bh(&p
->lock
);
258 if (ret
!= ACT_P_CREATED
)
261 PSCHED_GET_TIME(p
->t_c
);
262 p
->index
= parm
->index
? : tcf_police_new_index();
263 h
= tcf_police_hash(p
->index
);
264 write_lock_bh(&police_lock
);
265 p
->next
= tcf_police_ht
[h
];
266 tcf_police_ht
[h
] = p
;
267 write_unlock_bh(&police_lock
);
273 if (ret
== ACT_P_CREATED
)
278 static int tcf_act_police_cleanup(struct tc_action
*a
, int bind
)
280 struct tcf_police
*p
= PRIV(a
);
283 return tcf_police_release(p
, bind
);
287 static int tcf_act_police(struct sk_buff
**pskb
, struct tc_action
*a
,
288 struct tcf_result
*res
)
291 struct sk_buff
*skb
= *pskb
;
292 struct tcf_police
*p
= PRIV(a
);
298 p
->bstats
.bytes
+= skb
->len
;
301 #ifdef CONFIG_NET_ESTIMATOR
302 if (p
->ewma_rate
&& p
->rate_est
.bps
>= p
->ewma_rate
) {
303 p
->qstats
.overlimits
++;
304 spin_unlock(&p
->lock
);
309 if (skb
->len
<= p
->mtu
) {
310 if (p
->R_tab
== NULL
) {
311 spin_unlock(&p
->lock
);
315 PSCHED_GET_TIME(now
);
317 toks
= PSCHED_TDIFF_SAFE(now
, p
->t_c
, p
->burst
);
320 ptoks
= toks
+ p
->ptoks
;
321 if (ptoks
> (long)L2T_P(p
, p
->mtu
))
322 ptoks
= (long)L2T_P(p
, p
->mtu
);
323 ptoks
-= L2T_P(p
, skb
->len
);
326 if (toks
> (long)p
->burst
)
328 toks
-= L2T(p
, skb
->len
);
330 if ((toks
|ptoks
) >= 0) {
334 spin_unlock(&p
->lock
);
339 p
->qstats
.overlimits
++;
340 spin_unlock(&p
->lock
);
345 tcf_act_police_dump(struct sk_buff
*skb
, struct tc_action
*a
, int bind
, int ref
)
347 unsigned char *b
= skb
->tail
;
348 struct tc_police opt
;
349 struct tcf_police
*p
= PRIV(a
);
351 opt
.index
= p
->index
;
352 opt
.action
= p
->action
;
354 opt
.burst
= p
->burst
;
355 opt
.refcnt
= p
->refcnt
- ref
;
356 opt
.bindcnt
= p
->bindcnt
- bind
;
358 opt
.rate
= p
->R_tab
->rate
;
360 memset(&opt
.rate
, 0, sizeof(opt
.rate
));
362 opt
.peakrate
= p
->P_tab
->rate
;
364 memset(&opt
.peakrate
, 0, sizeof(opt
.peakrate
));
365 RTA_PUT(skb
, TCA_POLICE_TBF
, sizeof(opt
), &opt
);
367 RTA_PUT(skb
, TCA_POLICE_RESULT
, sizeof(int), &p
->result
);
368 #ifdef CONFIG_NET_ESTIMATOR
370 RTA_PUT(skb
, TCA_POLICE_AVRATE
, 4, &p
->ewma_rate
);
375 skb_trim(skb
, b
- skb
->data
);
379 MODULE_AUTHOR("Alexey Kuznetsov");
380 MODULE_DESCRIPTION("Policing actions");
381 MODULE_LICENSE("GPL");
383 static struct tc_action_ops act_police_ops
= {
385 .type
= TCA_ID_POLICE
,
386 .capab
= TCA_CAP_NONE
,
387 .owner
= THIS_MODULE
,
388 .act
= tcf_act_police
,
389 .dump
= tcf_act_police_dump
,
390 .cleanup
= tcf_act_police_cleanup
,
391 .lookup
= tcf_hash_search
,
392 .init
= tcf_act_police_locate
,
393 .walk
= tcf_generic_walker
397 police_init_module(void)
399 return tcf_register_action(&act_police_ops
);
403 police_cleanup_module(void)
405 tcf_unregister_action(&act_police_ops
);
408 module_init(police_init_module
);
409 module_exit(police_cleanup_module
);
413 struct tcf_police
* tcf_police_locate(struct rtattr
*rta
, struct rtattr
*est
)
416 struct tcf_police
*p
;
417 struct rtattr
*tb
[TCA_POLICE_MAX
];
418 struct tc_police
*parm
;
420 if (rtattr_parse_nested(tb
, TCA_POLICE_MAX
, rta
) < 0)
423 if (tb
[TCA_POLICE_TBF
-1] == NULL
||
424 RTA_PAYLOAD(tb
[TCA_POLICE_TBF
-1]) != sizeof(*parm
))
427 parm
= RTA_DATA(tb
[TCA_POLICE_TBF
-1]);
429 if (parm
->index
&& (p
= tcf_police_lookup(parm
->index
)) != NULL
) {
434 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
438 memset(p
, 0, sizeof(*p
));
440 spin_lock_init(&p
->lock
);
441 p
->stats_lock
= &p
->lock
;
442 if (parm
->rate
.rate
) {
443 p
->R_tab
= qdisc_get_rtab(&parm
->rate
, tb
[TCA_POLICE_RATE
-1]);
444 if (p
->R_tab
== NULL
)
446 if (parm
->peakrate
.rate
) {
447 p
->P_tab
= qdisc_get_rtab(&parm
->peakrate
,
448 tb
[TCA_POLICE_PEAKRATE
-1]);
449 if (p
->P_tab
== NULL
)
453 if (tb
[TCA_POLICE_RESULT
-1]) {
454 if (RTA_PAYLOAD(tb
[TCA_POLICE_RESULT
-1]) != sizeof(u32
))
456 p
->result
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_RESULT
-1]);
458 #ifdef CONFIG_NET_ESTIMATOR
459 if (tb
[TCA_POLICE_AVRATE
-1]) {
460 if (RTA_PAYLOAD(tb
[TCA_POLICE_AVRATE
-1]) != sizeof(u32
))
462 p
->ewma_rate
= *(u32
*)RTA_DATA(tb
[TCA_POLICE_AVRATE
-1]);
465 p
->toks
= p
->burst
= parm
->burst
;
470 p
->mtu
= 255<<p
->R_tab
->rate
.cell_log
;
473 p
->ptoks
= L2T_P(p
, p
->mtu
);
474 PSCHED_GET_TIME(p
->t_c
);
475 p
->index
= parm
->index
? : tcf_police_new_index();
476 p
->action
= parm
->action
;
477 #ifdef CONFIG_NET_ESTIMATOR
479 gen_new_estimator(&p
->bstats
, &p
->rate_est
, p
->stats_lock
, est
);
481 h
= tcf_police_hash(p
->index
);
482 write_lock_bh(&police_lock
);
483 p
->next
= tcf_police_ht
[h
];
484 tcf_police_ht
[h
] = p
;
485 write_unlock_bh(&police_lock
);
490 qdisc_put_rtab(p
->R_tab
);
495 int tcf_police(struct sk_buff
*skb
, struct tcf_police
*p
)
503 p
->bstats
.bytes
+= skb
->len
;
506 #ifdef CONFIG_NET_ESTIMATOR
507 if (p
->ewma_rate
&& p
->rate_est
.bps
>= p
->ewma_rate
) {
508 p
->qstats
.overlimits
++;
509 spin_unlock(&p
->lock
);
514 if (skb
->len
<= p
->mtu
) {
515 if (p
->R_tab
== NULL
) {
516 spin_unlock(&p
->lock
);
520 PSCHED_GET_TIME(now
);
522 toks
= PSCHED_TDIFF_SAFE(now
, p
->t_c
, p
->burst
);
525 ptoks
= toks
+ p
->ptoks
;
526 if (ptoks
> (long)L2T_P(p
, p
->mtu
))
527 ptoks
= (long)L2T_P(p
, p
->mtu
);
528 ptoks
-= L2T_P(p
, skb
->len
);
531 if (toks
> (long)p
->burst
)
533 toks
-= L2T(p
, skb
->len
);
535 if ((toks
|ptoks
) >= 0) {
539 spin_unlock(&p
->lock
);
544 p
->qstats
.overlimits
++;
545 spin_unlock(&p
->lock
);
549 int tcf_police_dump(struct sk_buff
*skb
, struct tcf_police
*p
)
551 unsigned char *b
= skb
->tail
;
552 struct tc_police opt
;
554 opt
.index
= p
->index
;
555 opt
.action
= p
->action
;
557 opt
.burst
= p
->burst
;
559 opt
.rate
= p
->R_tab
->rate
;
561 memset(&opt
.rate
, 0, sizeof(opt
.rate
));
563 opt
.peakrate
= p
->P_tab
->rate
;
565 memset(&opt
.peakrate
, 0, sizeof(opt
.peakrate
));
566 RTA_PUT(skb
, TCA_POLICE_TBF
, sizeof(opt
), &opt
);
568 RTA_PUT(skb
, TCA_POLICE_RESULT
, sizeof(int), &p
->result
);
569 #ifdef CONFIG_NET_ESTIMATOR
571 RTA_PUT(skb
, TCA_POLICE_AVRATE
, 4, &p
->ewma_rate
);
576 skb_trim(skb
, b
- skb
->data
);
580 int tcf_police_dump_stats(struct sk_buff
*skb
, struct tcf_police
*p
)
584 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
585 TCA_XSTATS
, p
->stats_lock
, &d
) < 0)
588 if (gnet_stats_copy_basic(&d
, &p
->bstats
) < 0 ||
589 #ifdef CONFIG_NET_ESTIMATOR
590 gnet_stats_copy_rate_est(&d
, &p
->rate_est
) < 0 ||
592 gnet_stats_copy_queue(&d
, &p
->qstats
) < 0)
595 if (gnet_stats_finish_copy(&d
) < 0)
605 EXPORT_SYMBOL(tcf_police
);
606 EXPORT_SYMBOL(tcf_police_destroy
);
607 EXPORT_SYMBOL(tcf_police_dump
);
608 EXPORT_SYMBOL(tcf_police_dump_stats
);
609 EXPORT_SYMBOL(tcf_police_hash
);
610 EXPORT_SYMBOL(tcf_police_ht
);
611 EXPORT_SYMBOL(tcf_police_locate
);
612 EXPORT_SYMBOL(tcf_police_lookup
);
613 EXPORT_SYMBOL(tcf_police_new_index
);