More minor IPI work.
[dragonfly/vkernel-mp.git] / sys / net / altq / altq_subr.c
blob45f3467432a4f01d5b5bdef2520d8fce68fe88bc
1 /* $KAME: altq_subr.c,v 1.23 2004/04/20 16:10:06 itojun Exp $ */
2 /* $DragonFly: src/sys/net/altq/altq_subr.c,v 1.9 2006/12/23 00:44:55 swildner Exp $ */
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include "opt_altq.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
34 #include <sys/param.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/callout.h>
43 #include <sys/errno.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46 #include <sys/queue.h>
47 #include <sys/thread2.h>
49 #include <net/if.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/ifq_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #ifdef INET6
58 #include <netinet/ip6.h>
59 #endif
60 #include <netinet/tcp.h>
61 #include <netinet/udp.h>
63 #include <net/pf/pfvar.h>
64 #include <net/altq/altq.h>
66 /* machine dependent clock related includes */
67 #if defined(__i386__)
68 #include <machine/clock.h> /* for tsc_freq */
69 #include <machine/md_var.h> /* for cpu_feature */
70 #include <machine/specialreg.h> /* for CPUID_TSC */
71 #endif /* __i386__ */
74 * internal function prototypes
76 static void tbr_timeout(void *);
77 int (*altq_input)(struct mbuf *, int) = NULL;
78 static int tbr_timer = 0; /* token bucket regulator timer */
79 static struct callout tbr_callout;
81 int pfaltq_running; /* keep track of running state */
83 MALLOC_DEFINE(M_ALTQ, "altq", "ALTQ structures");
86 * alternate queueing support routines
89 /* look up the queue state by the interface name and the queueing type. */
90 void *
91 altq_lookup(const char *name, int type)
93 struct ifnet *ifp;
95 if ((ifp = ifunit(name)) != NULL) {
96 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
97 return (ifp->if_snd.altq_disc);
100 return (NULL);
104 altq_attach(struct ifaltq *ifq, int type, void *discipline,
105 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *),
106 struct mbuf *(*dequeue)(struct ifaltq *, struct mbuf *, int),
107 int (*request)(struct ifaltq *, int, void *),
108 void *clfier,
109 void *(*classify)(struct ifaltq *, struct mbuf *,
110 struct altq_pktattr *))
112 if (!ifq_is_ready(ifq))
113 return ENXIO;
115 ifq->altq_type = type;
116 ifq->altq_disc = discipline;
117 ifq->altq_enqueue = enqueue;
118 ifq->altq_dequeue = dequeue;
119 ifq->altq_request = request;
120 ifq->altq_clfier = clfier;
121 ifq->altq_classify = classify;
122 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
123 return 0;
127 altq_detach(struct ifaltq *ifq)
129 if (!ifq_is_ready(ifq))
130 return ENXIO;
131 if (ifq_is_enabled(ifq))
132 return EBUSY;
133 if (!ifq_is_attached(ifq))
134 return (0);
136 ifq_set_classic(ifq);
137 ifq->altq_type = ALTQT_NONE;
138 ifq->altq_disc = NULL;
139 ifq->altq_clfier = NULL;
140 ifq->altq_classify = NULL;
141 ifq->altq_flags &= ALTQF_CANTCHANGE;
142 return 0;
146 altq_enable(struct ifaltq *ifq)
148 if (!ifq_is_ready(ifq))
149 return ENXIO;
150 if (ifq_is_enabled(ifq))
151 return 0;
153 crit_enter();
154 ifq_purge(ifq);
155 KKASSERT(ifq->ifq_len == 0);
156 ifq->altq_flags |= ALTQF_ENABLED;
157 if (ifq->altq_clfier != NULL)
158 ifq->altq_flags |= ALTQF_CLASSIFY;
159 crit_exit();
161 return 0;
165 altq_disable(struct ifaltq *ifq)
167 if (!ifq_is_enabled(ifq))
168 return 0;
170 crit_enter();
171 ifq_purge(ifq);
172 KKASSERT(ifq->ifq_len == 0);
173 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
174 crit_exit();
175 return 0;
179 * internal representation of token bucket parameters
180 * rate: byte_per_unittime << 32
181 * (((bits_per_sec) / 8) << 32) / machclk_freq
182 * depth: byte << 32
185 #define TBR_SHIFT 32
186 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
187 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
189 struct mbuf *
190 tbr_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
192 struct tb_regulator *tbr;
193 struct mbuf *m;
194 int64_t interval;
195 uint64_t now;
197 crit_enter();
198 tbr = ifq->altq_tbr;
199 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
200 /* if this is a remove after poll, bypass tbr check */
201 } else {
202 /* update token only when it is negative */
203 if (tbr->tbr_token <= 0) {
204 now = read_machclk();
205 interval = now - tbr->tbr_last;
206 if (interval >= tbr->tbr_filluptime)
207 tbr->tbr_token = tbr->tbr_depth;
208 else {
209 tbr->tbr_token += interval * tbr->tbr_rate;
210 if (tbr->tbr_token > tbr->tbr_depth)
211 tbr->tbr_token = tbr->tbr_depth;
213 tbr->tbr_last = now;
215 /* if token is still negative, don't allow dequeue */
216 if (tbr->tbr_token <= 0) {
217 crit_exit();
218 return (NULL);
222 if (ifq_is_enabled(ifq)) {
223 m = (*ifq->altq_dequeue)(ifq, mpolled, op);
224 } else if (op == ALTDQ_POLL) {
225 IF_POLL(ifq, m);
226 } else {
227 IF_DEQUEUE(ifq, m);
228 KKASSERT(mpolled == NULL || mpolled == m);
231 if (m != NULL && op == ALTDQ_REMOVE)
232 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
233 tbr->tbr_lastop = op;
234 crit_exit();
235 return (m);
239 * set a token bucket regulator.
240 * if the specified rate is zero, the token bucket regulator is deleted.
243 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
245 struct tb_regulator *tbr, *otbr;
247 if (machclk_freq == 0)
248 init_machclk();
249 if (machclk_freq == 0) {
250 kprintf("tbr_set: no cpu clock available!\n");
251 return (ENXIO);
254 if (profile->rate == 0) {
255 /* delete this tbr */
256 if ((tbr = ifq->altq_tbr) == NULL)
257 return (ENOENT);
258 ifq->altq_tbr = NULL;
259 kfree(tbr, M_ALTQ);
260 return (0);
263 tbr = kmalloc(sizeof(*tbr), M_ALTQ, M_WAITOK | M_ZERO);
264 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
265 tbr->tbr_depth = TBR_SCALE(profile->depth);
266 if (tbr->tbr_rate > 0)
267 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
268 else
269 tbr->tbr_filluptime = 0xffffffffffffffffLL;
270 tbr->tbr_token = tbr->tbr_depth;
271 tbr->tbr_last = read_machclk();
272 tbr->tbr_lastop = ALTDQ_REMOVE;
274 otbr = ifq->altq_tbr;
275 ifq->altq_tbr = tbr; /* set the new tbr */
277 if (otbr != NULL)
278 kfree(otbr, M_ALTQ);
279 else if (tbr_timer == 0) {
280 callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
281 tbr_timer = 1;
283 return (0);
287 * tbr_timeout goes through the interface list, and kicks the drivers
288 * if necessary.
290 static void
291 tbr_timeout(void *arg)
293 struct ifnet *ifp;
294 int active;
296 active = 0;
297 crit_enter();
298 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
299 if (ifp->if_snd.altq_tbr == NULL)
300 continue;
301 active++;
302 if (!ifq_is_empty(&ifp->if_snd) && ifp->if_start != NULL) {
303 lwkt_serialize_enter(ifp->if_serializer);
304 (*ifp->if_start)(ifp);
305 lwkt_serialize_exit(ifp->if_serializer);
308 crit_exit();
309 if (active > 0)
310 callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
311 else
312 tbr_timer = 0; /* don't need tbr_timer anymore */
316 * get token bucket regulator profile
319 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
321 struct tb_regulator *tbr;
323 if ((tbr = ifq->altq_tbr) == NULL) {
324 profile->rate = 0;
325 profile->depth = 0;
326 } else {
327 profile->rate =
328 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
329 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
331 return (0);
335 * attach a discipline to the interface. if one already exists, it is
336 * overridden.
339 altq_pfattach(struct pf_altq *a)
341 struct ifnet *ifp;
342 struct tb_profile tb;
343 int error = 0;
345 switch (a->scheduler) {
346 case ALTQT_NONE:
347 break;
348 #ifdef ALTQ_CBQ
349 case ALTQT_CBQ:
350 error = cbq_pfattach(a);
351 break;
352 #endif
353 #ifdef ALTQ_PRIQ
354 case ALTQT_PRIQ:
355 error = priq_pfattach(a);
356 break;
357 #endif
358 #ifdef ALTQ_HFSC
359 case ALTQT_HFSC:
360 error = hfsc_pfattach(a);
361 break;
362 #endif
363 default:
364 error = ENXIO;
367 ifp = ifunit(a->ifname);
369 /* if the state is running, enable altq */
370 if (error == 0 && pfaltq_running &&
371 ifp != NULL && ifp->if_snd.altq_type != ALTQT_NONE &&
372 !ifq_is_enabled(&ifp->if_snd))
373 error = altq_enable(&ifp->if_snd);
375 /* if altq is already enabled, reset set tokenbucket regulator */
376 if (error == 0 && ifp != NULL && ifq_is_enabled(&ifp->if_snd)) {
377 tb.rate = a->ifbandwidth;
378 tb.depth = a->tbrsize;
379 crit_enter();
380 error = tbr_set(&ifp->if_snd, &tb);
381 crit_exit();
384 return (error);
388 * detach a discipline from the interface.
389 * it is possible that the discipline was already overridden by another
390 * discipline.
393 altq_pfdetach(struct pf_altq *a)
395 struct ifnet *ifp;
396 int error = 0;
398 if ((ifp = ifunit(a->ifname)) == NULL)
399 return (EINVAL);
401 /* if this discipline is no longer referenced, just return */
402 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
403 return (0);
405 crit_enter();
406 if (ifq_is_enabled(&ifp->if_snd))
407 error = altq_disable(&ifp->if_snd);
408 if (error == 0)
409 error = altq_detach(&ifp->if_snd);
410 crit_exit();
412 return (error);
416 * add a discipline or a queue
419 altq_add(struct pf_altq *a)
421 int error = 0;
423 if (a->qname[0] != 0)
424 return (altq_add_queue(a));
426 if (machclk_freq == 0)
427 init_machclk();
428 if (machclk_freq == 0)
429 panic("altq_add: no cpu clock");
431 switch (a->scheduler) {
432 #ifdef ALTQ_CBQ
433 case ALTQT_CBQ:
434 error = cbq_add_altq(a);
435 break;
436 #endif
437 #ifdef ALTQ_PRIQ
438 case ALTQT_PRIQ:
439 error = priq_add_altq(a);
440 break;
441 #endif
442 #ifdef ALTQ_HFSC
443 case ALTQT_HFSC:
444 error = hfsc_add_altq(a);
445 break;
446 #endif
447 default:
448 error = ENXIO;
451 return (error);
455 * remove a discipline or a queue
458 altq_remove(struct pf_altq *a)
460 int error = 0;
462 if (a->qname[0] != 0)
463 return (altq_remove_queue(a));
465 switch (a->scheduler) {
466 #ifdef ALTQ_CBQ
467 case ALTQT_CBQ:
468 error = cbq_remove_altq(a);
469 break;
470 #endif
471 #ifdef ALTQ_PRIQ
472 case ALTQT_PRIQ:
473 error = priq_remove_altq(a);
474 break;
475 #endif
476 #ifdef ALTQ_HFSC
477 case ALTQT_HFSC:
478 error = hfsc_remove_altq(a);
479 break;
480 #endif
481 default:
482 error = ENXIO;
485 return (error);
489 * add a queue to the discipline
492 altq_add_queue(struct pf_altq *a)
494 int error = 0;
496 switch (a->scheduler) {
497 #ifdef ALTQ_CBQ
498 case ALTQT_CBQ:
499 error = cbq_add_queue(a);
500 break;
501 #endif
502 #ifdef ALTQ_PRIQ
503 case ALTQT_PRIQ:
504 error = priq_add_queue(a);
505 break;
506 #endif
507 #ifdef ALTQ_HFSC
508 case ALTQT_HFSC:
509 error = hfsc_add_queue(a);
510 break;
511 #endif
512 default:
513 error = ENXIO;
516 return (error);
520 * remove a queue from the discipline
523 altq_remove_queue(struct pf_altq *a)
525 int error = 0;
527 switch (a->scheduler) {
528 #ifdef ALTQ_CBQ
529 case ALTQT_CBQ:
530 error = cbq_remove_queue(a);
531 break;
532 #endif
533 #ifdef ALTQ_PRIQ
534 case ALTQT_PRIQ:
535 error = priq_remove_queue(a);
536 break;
537 #endif
538 #ifdef ALTQ_HFSC
539 case ALTQT_HFSC:
540 error = hfsc_remove_queue(a);
541 break;
542 #endif
543 default:
544 error = ENXIO;
547 return (error);
551 * get queue statistics
554 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
556 int error = 0;
558 switch (a->scheduler) {
559 #ifdef ALTQ_CBQ
560 case ALTQT_CBQ:
561 error = cbq_getqstats(a, ubuf, nbytes);
562 break;
563 #endif
564 #ifdef ALTQ_PRIQ
565 case ALTQT_PRIQ:
566 error = priq_getqstats(a, ubuf, nbytes);
567 break;
568 #endif
569 #ifdef ALTQ_HFSC
570 case ALTQT_HFSC:
571 error = hfsc_getqstats(a, ubuf, nbytes);
572 break;
573 #endif
574 default:
575 error = ENXIO;
578 return (error);
582 * read and write diffserv field in IPv4 or IPv6 header
584 uint8_t
585 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
587 struct mbuf *m0;
588 uint8_t ds_field = 0;
590 if (pktattr == NULL ||
591 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
592 return ((uint8_t)0);
594 /* verify that pattr_hdr is within the mbuf data */
595 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
596 if ((pktattr->pattr_hdr >= m0->m_data) &&
597 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
598 break;
600 if (m0 == NULL) {
601 /* ick, pattr_hdr is stale */
602 pktattr->pattr_af = AF_UNSPEC;
603 #ifdef ALTQ_DEBUG
604 kprintf("read_dsfield: can't locate header!\n");
605 #endif
606 return ((uint8_t)0);
609 if (pktattr->pattr_af == AF_INET) {
610 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
612 if (ip->ip_v != 4)
613 return ((uint8_t)0); /* version mismatch! */
614 ds_field = ip->ip_tos;
616 #ifdef INET6
617 else if (pktattr->pattr_af == AF_INET6) {
618 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
619 uint32_t flowlabel;
621 flowlabel = ntohl(ip6->ip6_flow);
622 if ((flowlabel >> 28) != 6)
623 return ((uint8_t)0); /* version mismatch! */
624 ds_field = (flowlabel >> 20) & 0xff;
626 #endif
627 return (ds_field);
630 void
631 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, uint8_t dsfield)
633 struct mbuf *m0;
635 if (pktattr == NULL ||
636 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
637 return;
639 /* verify that pattr_hdr is within the mbuf data */
640 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
641 if ((pktattr->pattr_hdr >= m0->m_data) &&
642 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
643 break;
645 if (m0 == NULL) {
646 /* ick, pattr_hdr is stale */
647 pktattr->pattr_af = AF_UNSPEC;
648 #ifdef ALTQ_DEBUG
649 kprintf("write_dsfield: can't locate header!\n");
650 #endif
651 return;
654 if (pktattr->pattr_af == AF_INET) {
655 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
656 uint8_t old;
657 int32_t sum;
659 if (ip->ip_v != 4)
660 return; /* version mismatch! */
661 old = ip->ip_tos;
662 dsfield |= old & 3; /* leave CU bits */
663 if (old == dsfield)
664 return;
665 ip->ip_tos = dsfield;
667 * update checksum (from RFC1624)
668 * HC' = ~(~HC + ~m + m')
670 sum = ~ntohs(ip->ip_sum) & 0xffff;
671 sum += 0xff00 + (~old & 0xff) + dsfield;
672 sum = (sum >> 16) + (sum & 0xffff);
673 sum += (sum >> 16); /* add carry */
675 ip->ip_sum = htons(~sum & 0xffff);
677 #ifdef INET6
678 else if (pktattr->pattr_af == AF_INET6) {
679 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
680 uint32_t flowlabel;
682 flowlabel = ntohl(ip6->ip6_flow);
683 if ((flowlabel >> 28) != 6)
684 return; /* version mismatch! */
685 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
686 ip6->ip6_flow = htonl(flowlabel);
688 #endif
692 * high resolution clock support taking advantage of a machine dependent
693 * high resolution time counter (e.g., timestamp counter of intel pentium).
694 * we assume
695 * - 64-bit-long monotonically-increasing counter
696 * - frequency range is 100M-4GHz (CPU speed)
698 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
699 #define MACHCLK_SHIFT 8
701 int machclk_usepcc;
702 uint32_t machclk_freq = 0;
703 uint32_t machclk_per_tick = 0;
705 void
706 init_machclk(void)
708 callout_init(&tbr_callout);
710 machclk_usepcc = 1;
712 #if !defined(__i386__) || defined(ALTQ_NOPCC)
713 machclk_usepcc = 0;
714 #elif defined(__DragonFly__) && defined(SMP)
715 machclk_usepcc = 0;
716 #elif defined(__i386__)
717 /* check if TSC is available */
718 if (machclk_usepcc == 1 && (cpu_feature & CPUID_TSC) == 0)
719 machclk_usepcc = 0;
720 #endif
722 if (machclk_usepcc == 0) {
723 /* emulate 256MHz using microtime() */
724 machclk_freq = 1000000 << MACHCLK_SHIFT;
725 machclk_per_tick = machclk_freq / hz;
726 #ifdef ALTQ_DEBUG
727 kprintf("altq: emulate %uHz cpu clock\n", machclk_freq);
728 #endif
729 return;
733 * if the clock frequency (of Pentium TSC or Alpha PCC) is
734 * accessible, just use it.
736 #ifdef __i386__
737 machclk_freq = tsc_freq;
738 #else
739 #error "machclk_freq interface not implemented"
740 #endif
743 * if we don't know the clock frequency, measure it.
745 if (machclk_freq == 0) {
746 static int wait;
747 struct timeval tv_start, tv_end;
748 uint64_t start, end, diff;
749 int timo;
751 microtime(&tv_start);
752 start = read_machclk();
753 timo = hz; /* 1 sec */
754 tsleep(&wait, PCATCH, "init_machclk", timo);
755 microtime(&tv_end);
756 end = read_machclk();
757 diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
758 + tv_end.tv_usec - tv_start.tv_usec;
759 if (diff != 0)
760 machclk_freq = (u_int)((end - start) * 1000000 / diff);
763 machclk_per_tick = machclk_freq / hz;
765 #ifdef ALTQ_DEBUG
766 kprintf("altq: CPU clock: %uHz\n", machclk_freq);
767 #endif
770 uint64_t
771 read_machclk(void)
773 uint64_t val;
775 if (machclk_usepcc) {
776 #if defined(__i386__)
777 val = rdtsc();
778 #else
779 panic("read_machclk");
780 #endif
781 } else {
782 struct timeval tv;
784 microtime(&tv);
785 val = (((uint64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
786 + tv.tv_usec) << MACHCLK_SHIFT);
788 return (val);