1 /* $KAME: altq_subr.c,v 1.23 2004/04/20 16:10:06 itojun Exp $ */
2 /* $DragonFly: src/sys/net/altq/altq_subr.c,v 1.12 2008/05/14 11:59:23 sephe Exp $ */
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include "opt_inet6.h"
34 #include <sys/param.h>
35 #include <sys/malloc.h>
37 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/callout.h>
43 #include <sys/errno.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46 #include <sys/queue.h>
47 #include <sys/thread2.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/ifq_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
58 #include <netinet/ip6.h>
60 #include <netinet/tcp.h>
61 #include <netinet/udp.h>
63 #include <net/pf/pfvar.h>
64 #include <net/altq/altq.h>
66 /* machine dependent clock related includes */
68 #include <machine/clock.h> /* for tsc_frequency */
69 #include <machine/md_var.h> /* for cpu_feature */
70 #include <machine/specialreg.h> /* for CPUID_TSC */
74 * internal function prototypes
76 static void tbr_timeout(void *);
77 static int altq_enable_locked(struct ifaltq
*);
78 static int altq_disable_locked(struct ifaltq
*);
79 static int altq_detach_locked(struct ifaltq
*);
80 static int tbr_set_locked(struct ifaltq
*, struct tb_profile
*);
82 int (*altq_input
)(struct mbuf
*, int) = NULL
;
83 static int tbr_timer
= 0; /* token bucket regulator timer */
84 static struct callout tbr_callout
;
86 int pfaltq_running
; /* keep track of running state */
88 MALLOC_DEFINE(M_ALTQ
, "altq", "ALTQ structures");
91 * alternate queueing support routines
94 /* look up the queue state by the interface name and the queueing type. */
96 altq_lookup(const char *name
, int type
)
100 if ((ifp
= ifunit(name
)) != NULL
) {
101 if (type
!= ALTQT_NONE
&& ifp
->if_snd
.altq_type
== type
)
102 return (ifp
->if_snd
.altq_disc
);
109 altq_attach(struct ifaltq
*ifq
, int type
, void *discipline
,
110 int (*enqueue
)(struct ifaltq
*, struct mbuf
*, struct altq_pktattr
*),
111 struct mbuf
*(*dequeue
)(struct ifaltq
*, struct mbuf
*, int),
112 int (*request
)(struct ifaltq
*, int, void *),
114 void *(*classify
)(struct ifaltq
*, struct mbuf
*,
115 struct altq_pktattr
*))
117 if (!ifq_is_ready(ifq
))
120 ifq
->altq_type
= type
;
121 ifq
->altq_disc
= discipline
;
122 ifq
->altq_enqueue
= enqueue
;
123 ifq
->altq_dequeue
= dequeue
;
124 ifq
->altq_request
= request
;
125 ifq
->altq_clfier
= clfier
;
126 ifq
->altq_classify
= classify
;
127 ifq
->altq_flags
&= (ALTQF_CANTCHANGE
|ALTQF_ENABLED
);
132 altq_detach_locked(struct ifaltq
*ifq
)
134 if (!ifq_is_ready(ifq
))
136 if (ifq_is_enabled(ifq
))
138 if (!ifq_is_attached(ifq
))
141 ifq_set_classic(ifq
);
142 ifq
->altq_type
= ALTQT_NONE
;
143 ifq
->altq_disc
= NULL
;
144 ifq
->altq_clfier
= NULL
;
145 ifq
->altq_classify
= NULL
;
146 ifq
->altq_flags
&= ALTQF_CANTCHANGE
;
151 altq_detach(struct ifaltq
*ifq
)
156 error
= altq_detach_locked(ifq
);
162 altq_enable_locked(struct ifaltq
*ifq
)
164 if (!ifq_is_ready(ifq
))
166 if (ifq_is_enabled(ifq
))
169 ifq_purge_locked(ifq
);
170 KKASSERT(ifq
->ifq_len
== 0);
172 ifq
->altq_flags
|= ALTQF_ENABLED
;
173 if (ifq
->altq_clfier
!= NULL
)
174 ifq
->altq_flags
|= ALTQF_CLASSIFY
;
179 altq_enable(struct ifaltq
*ifq
)
184 error
= altq_enable_locked(ifq
);
190 altq_disable_locked(struct ifaltq
*ifq
)
192 if (!ifq_is_enabled(ifq
))
195 ifq_purge_locked(ifq
);
196 KKASSERT(ifq
->ifq_len
== 0);
197 ifq
->altq_flags
&= ~(ALTQF_ENABLED
|ALTQF_CLASSIFY
);
202 altq_disable(struct ifaltq
*ifq
)
207 error
= altq_disable_locked(ifq
);
213 * internal representation of token bucket parameters
214 * rate: byte_per_unittime << 32
215 * (((bits_per_sec) / 8) << 32) / machclk_freq
220 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
221 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
224 tbr_dequeue(struct ifaltq
*ifq
, struct mbuf
*mpolled
, int op
)
226 struct tb_regulator
*tbr
;
233 if (op
== ALTDQ_REMOVE
&& tbr
->tbr_lastop
== ALTDQ_POLL
) {
234 /* if this is a remove after poll, bypass tbr check */
236 /* update token only when it is negative */
237 if (tbr
->tbr_token
<= 0) {
238 now
= read_machclk();
239 interval
= now
- tbr
->tbr_last
;
240 if (interval
>= tbr
->tbr_filluptime
)
241 tbr
->tbr_token
= tbr
->tbr_depth
;
243 tbr
->tbr_token
+= interval
* tbr
->tbr_rate
;
244 if (tbr
->tbr_token
> tbr
->tbr_depth
)
245 tbr
->tbr_token
= tbr
->tbr_depth
;
249 /* if token is still negative, don't allow dequeue */
250 if (tbr
->tbr_token
<= 0) {
256 if (ifq_is_enabled(ifq
)) {
257 m
= (*ifq
->altq_dequeue
)(ifq
, mpolled
, op
);
258 } else if (op
== ALTDQ_POLL
) {
262 KKASSERT(mpolled
== NULL
|| mpolled
== m
);
265 if (m
!= NULL
&& op
== ALTDQ_REMOVE
)
266 tbr
->tbr_token
-= TBR_SCALE(m_pktlen(m
));
267 tbr
->tbr_lastop
= op
;
273 * set a token bucket regulator.
274 * if the specified rate is zero, the token bucket regulator is deleted.
277 tbr_set_locked(struct ifaltq
*ifq
, struct tb_profile
*profile
)
279 struct tb_regulator
*tbr
, *otbr
;
281 if (machclk_freq
== 0)
283 if (machclk_freq
== 0) {
284 kprintf("%s: no cpu clock available!\n", __func__
);
288 if (profile
->rate
== 0) {
289 /* delete this tbr */
290 if ((tbr
= ifq
->altq_tbr
) == NULL
)
292 ifq
->altq_tbr
= NULL
;
297 tbr
= kmalloc(sizeof(*tbr
), M_ALTQ
, M_WAITOK
| M_ZERO
);
298 tbr
->tbr_rate
= TBR_SCALE(profile
->rate
/ 8) / machclk_freq
;
299 tbr
->tbr_depth
= TBR_SCALE(profile
->depth
);
300 if (tbr
->tbr_rate
> 0)
301 tbr
->tbr_filluptime
= tbr
->tbr_depth
/ tbr
->tbr_rate
;
303 tbr
->tbr_filluptime
= 0xffffffffffffffffLL
;
304 tbr
->tbr_token
= tbr
->tbr_depth
;
305 tbr
->tbr_last
= read_machclk();
306 tbr
->tbr_lastop
= ALTDQ_REMOVE
;
308 otbr
= ifq
->altq_tbr
;
309 ifq
->altq_tbr
= tbr
; /* set the new tbr */
313 else if (tbr_timer
== 0) {
314 callout_reset(&tbr_callout
, 1, tbr_timeout
, NULL
);
321 tbr_set(struct ifaltq
*ifq
, struct tb_profile
*profile
)
326 error
= tbr_set_locked(ifq
, profile
);
332 * tbr_timeout goes through the interface list, and kicks the drivers
336 tbr_timeout(void *arg
)
343 for (ifp
= TAILQ_FIRST(&ifnet
); ifp
; ifp
= TAILQ_NEXT(ifp
, if_list
)) {
344 if (ifp
->if_snd
.altq_tbr
== NULL
)
347 if (!ifq_is_empty(&ifp
->if_snd
) && ifp
->if_start
!= NULL
) {
348 ifnet_serialize_tx(ifp
);
349 (*ifp
->if_start
)(ifp
);
350 ifnet_deserialize_tx(ifp
);
355 callout_reset(&tbr_callout
, 1, tbr_timeout
, NULL
);
357 tbr_timer
= 0; /* don't need tbr_timer anymore */
361 * get token bucket regulator profile
364 tbr_get(struct ifaltq
*ifq
, struct tb_profile
*profile
)
366 struct tb_regulator
*tbr
;
368 if ((tbr
= ifq
->altq_tbr
) == NULL
) {
373 (u_int
)TBR_UNSCALE(tbr
->tbr_rate
* 8 * machclk_freq
);
374 profile
->depth
= (u_int
)TBR_UNSCALE(tbr
->tbr_depth
);
380 * attach a discipline to the interface. if one already exists, it is
384 altq_pfattach(struct pf_altq
*a
)
390 if (a
->scheduler
== ALTQT_NONE
)
393 if (a
->altq_disc
== NULL
)
396 ifp
= ifunit(a
->ifname
);
403 switch (a
->scheduler
) {
406 error
= cbq_pfattach(a
, ifq
);
411 error
= priq_pfattach(a
, ifq
);
416 error
= hfsc_pfattach(a
, ifq
);
421 error
= fairq_pfattach(a
, ifq
);
429 /* if the state is running, enable altq */
430 if (error
== 0 && pfaltq_running
&& ifq
->altq_type
!= ALTQT_NONE
&&
431 !ifq_is_enabled(ifq
))
432 error
= altq_enable_locked(ifq
);
434 /* if altq is already enabled, reset set tokenbucket regulator */
435 if (error
== 0 && ifq_is_enabled(ifq
)) {
436 struct tb_profile tb
;
438 tb
.rate
= a
->ifbandwidth
;
439 tb
.depth
= a
->tbrsize
;
440 error
= tbr_set_locked(ifq
, &tb
);
448 * detach a discipline from the interface.
449 * it is possible that the discipline was already overridden by another
453 altq_pfdetach(struct pf_altq
*a
)
459 ifp
= ifunit(a
->ifname
);
464 /* if this discipline is no longer referenced, just return */
465 if (a
->altq_disc
== NULL
)
470 if (a
->altq_disc
!= ifq
->altq_disc
)
473 if (ifq_is_enabled(ifq
))
474 error
= altq_disable_locked(ifq
);
476 error
= altq_detach_locked(ifq
);
484 * add a discipline or a queue
487 altq_add(struct pf_altq
*a
)
491 if (a
->qname
[0] != 0)
492 return (altq_add_queue(a
));
494 if (machclk_freq
== 0)
496 if (machclk_freq
== 0)
497 panic("altq_add: no cpu clock");
499 switch (a
->scheduler
) {
502 error
= cbq_add_altq(a
);
507 error
= priq_add_altq(a
);
512 error
= hfsc_add_altq(a
);
517 error
= fairq_add_altq(a
);
528 * remove a discipline or a queue
531 altq_remove(struct pf_altq
*a
)
535 if (a
->qname
[0] != 0)
536 return (altq_remove_queue(a
));
538 switch (a
->scheduler
) {
541 error
= cbq_remove_altq(a
);
546 error
= priq_remove_altq(a
);
551 error
= hfsc_remove_altq(a
);
556 error
= fairq_remove_altq(a
);
567 * add a queue to the discipline
570 altq_add_queue(struct pf_altq
*a
)
574 switch (a
->scheduler
) {
577 error
= cbq_add_queue(a
);
582 error
= priq_add_queue(a
);
587 error
= hfsc_add_queue(a
);
592 error
= fairq_add_queue(a
);
603 * remove a queue from the discipline
606 altq_remove_queue(struct pf_altq
*a
)
610 switch (a
->scheduler
) {
613 error
= cbq_remove_queue(a
);
618 error
= priq_remove_queue(a
);
623 error
= hfsc_remove_queue(a
);
628 error
= fairq_remove_queue(a
);
639 * get queue statistics
642 altq_getqstats(struct pf_altq
*a
, void *ubuf
, int *nbytes
)
646 switch (a
->scheduler
) {
649 error
= cbq_getqstats(a
, ubuf
, nbytes
);
654 error
= priq_getqstats(a
, ubuf
, nbytes
);
659 error
= hfsc_getqstats(a
, ubuf
, nbytes
);
664 error
= fairq_getqstats(a
, ubuf
, nbytes
);
675 * read and write diffserv field in IPv4 or IPv6 header
678 read_dsfield(struct mbuf
*m
, struct altq_pktattr
*pktattr
)
681 uint8_t ds_field
= 0;
683 if (pktattr
== NULL
||
684 (pktattr
->pattr_af
!= AF_INET
&& pktattr
->pattr_af
!= AF_INET6
))
687 /* verify that pattr_hdr is within the mbuf data */
688 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
689 if ((pktattr
->pattr_hdr
>= m0
->m_data
) &&
690 (pktattr
->pattr_hdr
< m0
->m_data
+ m0
->m_len
))
694 /* ick, pattr_hdr is stale */
695 pktattr
->pattr_af
= AF_UNSPEC
;
697 kprintf("read_dsfield: can't locate header!\n");
702 if (pktattr
->pattr_af
== AF_INET
) {
703 struct ip
*ip
= (struct ip
*)pktattr
->pattr_hdr
;
706 return ((uint8_t)0); /* version mismatch! */
707 ds_field
= ip
->ip_tos
;
710 else if (pktattr
->pattr_af
== AF_INET6
) {
711 struct ip6_hdr
*ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
714 flowlabel
= ntohl(ip6
->ip6_flow
);
715 if ((flowlabel
>> 28) != 6)
716 return ((uint8_t)0); /* version mismatch! */
717 ds_field
= (flowlabel
>> 20) & 0xff;
724 write_dsfield(struct mbuf
*m
, struct altq_pktattr
*pktattr
, uint8_t dsfield
)
728 if (pktattr
== NULL
||
729 (pktattr
->pattr_af
!= AF_INET
&& pktattr
->pattr_af
!= AF_INET6
))
732 /* verify that pattr_hdr is within the mbuf data */
733 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
734 if ((pktattr
->pattr_hdr
>= m0
->m_data
) &&
735 (pktattr
->pattr_hdr
< m0
->m_data
+ m0
->m_len
))
739 /* ick, pattr_hdr is stale */
740 pktattr
->pattr_af
= AF_UNSPEC
;
742 kprintf("write_dsfield: can't locate header!\n");
747 if (pktattr
->pattr_af
== AF_INET
) {
748 struct ip
*ip
= (struct ip
*)pktattr
->pattr_hdr
;
753 return; /* version mismatch! */
755 dsfield
|= old
& 3; /* leave CU bits */
758 ip
->ip_tos
= dsfield
;
760 * update checksum (from RFC1624)
761 * HC' = ~(~HC + ~m + m')
763 sum
= ~ntohs(ip
->ip_sum
) & 0xffff;
764 sum
+= 0xff00 + (~old
& 0xff) + dsfield
;
765 sum
= (sum
>> 16) + (sum
& 0xffff);
766 sum
+= (sum
>> 16); /* add carry */
768 ip
->ip_sum
= htons(~sum
& 0xffff);
771 else if (pktattr
->pattr_af
== AF_INET6
) {
772 struct ip6_hdr
*ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
775 flowlabel
= ntohl(ip6
->ip6_flow
);
776 if ((flowlabel
>> 28) != 6)
777 return; /* version mismatch! */
778 flowlabel
= (flowlabel
& 0xf03fffff) | (dsfield
<< 20);
779 ip6
->ip6_flow
= htonl(flowlabel
);
785 * high resolution clock support taking advantage of a machine dependent
786 * high resolution time counter (e.g., timestamp counter of intel pentium).
788 * - 64-bit-long monotonically-increasing counter
789 * - frequency range is 100M-4GHz (CPU speed)
791 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
792 #define MACHCLK_SHIFT 8
795 uint64_t machclk_freq
= 0;
796 uint32_t machclk_per_tick
= 0;
801 callout_init(&tbr_callout
);
805 #if !defined(__i386__) || defined(ALTQ_NOPCC)
807 #elif defined(__DragonFly__) && defined(SMP)
809 #elif defined(__i386__)
810 /* check if TSC is available */
811 if (machclk_usepcc
== 1 && (cpu_feature
& CPUID_TSC
) == 0)
815 if (machclk_usepcc
== 0) {
816 /* emulate 256MHz using microtime() */
817 machclk_freq
= 1000000LLU << MACHCLK_SHIFT
;
818 machclk_per_tick
= machclk_freq
/ hz
;
820 kprintf("altq: emulate %lluHz cpu clock\n", machclk_freq
);
826 * if the clock frequency (of Pentium TSC or Alpha PCC) is
827 * accessible, just use it.
830 if (cpu_feature
& CPUID_TSC
)
831 machclk_freq
= (uint64_t)tsc_frequency
;
835 * if we don't know the clock frequency, measure it.
837 if (machclk_freq
== 0) {
839 struct timeval tv_start
, tv_end
;
840 uint64_t start
, end
, diff
;
843 microtime(&tv_start
);
844 start
= read_machclk();
845 timo
= hz
; /* 1 sec */
846 tsleep(&wait
, PCATCH
, "init_machclk", timo
);
848 end
= read_machclk();
849 diff
= (uint64_t)(tv_end
.tv_sec
- tv_start
.tv_sec
) * 1000000
850 + tv_end
.tv_usec
- tv_start
.tv_usec
;
852 machclk_freq
= (end
- start
) * 1000000 / diff
;
855 machclk_per_tick
= machclk_freq
/ hz
;
858 kprintf("altq: CPU clock: %lluHz\n", machclk_freq
);
867 if (machclk_usepcc
) {
868 #ifdef _RDTSC_SUPPORTED_
871 panic("read_machclk");
877 val
= (((uint64_t)(tv
.tv_sec
- boottime
.tv_sec
) * 1000000
878 + tv
.tv_usec
) << MACHCLK_SHIFT
);