1 /* $KAME: altq_hfsc.c,v 1.25 2004/04/17 10:54:48 kjc Exp $ */
2 /* $DragonFly: src/sys/net/altq/altq_hfsc.c,v 1.9 2008/05/14 11:59:23 sephe Exp $ */
5 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
7 * Permission to use, copy, modify, and distribute this software and
8 * its documentation is hereby granted (including for commercial or
9 * for-profit use), provided that both the copyright notice and this
10 * permission notice appear in all copies of the software, derivative
11 * works, or modified versions, and any portions thereof.
13 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
14 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
15 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
16 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 * Carnegie Mellon encourages (but does not require) users of this
29 * software to return any improvements or extensions that they make,
30 * and to grant Carnegie Mellon the rights to redistribute these
31 * changes without encumbrance.
34 * H-FSC is described in Proceedings of SIGCOMM'97,
35 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
36 * Real-Time and Priority Service"
37 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
39 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
40 * when a class has an upperlimit, the fit-time is computed from the
41 * upperlimit service curve. the link-sharing scheduler does not schedule
42 * a class whose fit-time exceeds the current time.
47 #include "opt_inet6.h"
49 #ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
51 #include <sys/param.h>
52 #include <sys/malloc.h>
54 #include <sys/socket.h>
55 #include <sys/systm.h>
56 #include <sys/errno.h>
57 #include <sys/queue.h>
58 #include <sys/thread.h>
61 #include <net/ifq_var.h>
62 #include <netinet/in.h>
64 #include <net/pf/pfvar.h>
65 #include <net/altq/altq.h>
66 #include <net/altq/altq_hfsc.h>
68 #include <sys/thread2.h>
73 static int hfsc_clear_interface(struct hfsc_if
*);
74 static int hfsc_request(struct ifaltq
*, int, void *);
75 static void hfsc_purge(struct hfsc_if
*);
76 static struct hfsc_class
*hfsc_class_create(struct hfsc_if
*,
77 struct service_curve
*,
78 struct service_curve
*,
79 struct service_curve
*,
80 struct hfsc_class
*, int, int, int);
81 static int hfsc_class_destroy(struct hfsc_class
*);
82 static struct hfsc_class
*hfsc_nextclass(struct hfsc_class
*);
83 static int hfsc_enqueue(struct ifaltq
*, struct mbuf
*,
84 struct altq_pktattr
*);
85 static struct mbuf
*hfsc_dequeue(struct ifaltq
*, struct mbuf
*, int);
87 static int hfsc_addq(struct hfsc_class
*, struct mbuf
*);
88 static struct mbuf
*hfsc_getq(struct hfsc_class
*);
89 static struct mbuf
*hfsc_pollq(struct hfsc_class
*);
90 static void hfsc_purgeq(struct hfsc_class
*);
92 static void update_cfmin(struct hfsc_class
*);
93 static void set_active(struct hfsc_class
*, int);
94 static void set_passive(struct hfsc_class
*);
96 static void init_ed(struct hfsc_class
*, int);
97 static void update_ed(struct hfsc_class
*, int);
98 static void update_d(struct hfsc_class
*, int);
99 static void init_vf(struct hfsc_class
*, int);
100 static void update_vf(struct hfsc_class
*, int, uint64_t);
101 static ellist_t
*ellist_alloc(void);
102 static void ellist_destroy(ellist_t
*);
103 static void ellist_insert(struct hfsc_class
*);
104 static void ellist_remove(struct hfsc_class
*);
105 static void ellist_update(struct hfsc_class
*);
106 struct hfsc_class
*ellist_get_mindl(ellist_t
*, uint64_t);
107 static actlist_t
*actlist_alloc(void);
108 static void actlist_destroy(actlist_t
*);
109 static void actlist_insert(struct hfsc_class
*);
110 static void actlist_remove(struct hfsc_class
*);
111 static void actlist_update(struct hfsc_class
*);
113 static struct hfsc_class
*actlist_firstfit(struct hfsc_class
*, uint64_t);
115 static __inline
uint64_t seg_x2y(uint64_t, uint64_t);
116 static __inline
uint64_t seg_y2x(uint64_t, uint64_t);
117 static __inline
uint64_t m2sm(u_int
);
118 static __inline
uint64_t m2ism(u_int
);
119 static __inline
uint64_t d2dx(u_int
);
120 static u_int
sm2m(uint64_t);
121 static u_int
dx2d(uint64_t);
123 static void sc2isc(struct service_curve
*, struct internal_sc
*);
124 static void rtsc_init(struct runtime_sc
*, struct internal_sc
*,
126 static uint64_t rtsc_y2x(struct runtime_sc
*, uint64_t);
127 static uint64_t rtsc_x2y(struct runtime_sc
*, uint64_t);
128 static void rtsc_min(struct runtime_sc
*, struct internal_sc
*,
131 static void get_class_stats(struct hfsc_classstats
*, struct hfsc_class
*);
132 static struct hfsc_class
*clh_to_clp(struct hfsc_if
*, uint32_t);
137 #define is_a_parent_class(cl) ((cl)->cl_children != NULL)
139 #define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
142 hfsc_pfattach(struct pf_altq
*a
, struct ifaltq
*ifq
)
144 return altq_attach(ifq
, ALTQT_HFSC
, a
->altq_disc
,
145 hfsc_enqueue
, hfsc_dequeue
, hfsc_request
, NULL
, NULL
);
149 hfsc_add_altq(struct pf_altq
*a
)
154 if ((ifp
= ifunit(a
->ifname
)) == NULL
)
156 if (!ifq_is_ready(&ifp
->if_snd
))
159 hif
= kmalloc(sizeof(struct hfsc_if
), M_ALTQ
, M_WAITOK
| M_ZERO
);
161 hif
->hif_eligible
= ellist_alloc();
162 hif
->hif_ifq
= &ifp
->if_snd
;
163 ifq_purge(&ifp
->if_snd
);
165 /* keep the state in pf_altq */
172 hfsc_remove_altq(struct pf_altq
*a
)
176 if ((hif
= a
->altq_disc
) == NULL
)
180 hfsc_clear_interface(hif
);
181 hfsc_class_destroy(hif
->hif_rootclass
);
183 ellist_destroy(hif
->hif_eligible
);
191 hfsc_add_queue_locked(struct pf_altq
*a
, struct hfsc_if
*hif
)
193 struct hfsc_class
*cl
, *parent
;
194 struct hfsc_opts
*opts
;
195 struct service_curve rtsc
, lssc
, ulsc
;
197 KKASSERT(a
->qid
!= 0);
199 opts
= &a
->pq_u
.hfsc_opts
;
201 if (a
->parent_qid
== HFSC_NULLCLASS_HANDLE
&& hif
->hif_rootclass
== NULL
)
203 else if ((parent
= clh_to_clp(hif
, a
->parent_qid
)) == NULL
)
206 if (clh_to_clp(hif
, a
->qid
) != NULL
)
209 rtsc
.m1
= opts
->rtsc_m1
;
210 rtsc
.d
= opts
->rtsc_d
;
211 rtsc
.m2
= opts
->rtsc_m2
;
212 lssc
.m1
= opts
->lssc_m1
;
213 lssc
.d
= opts
->lssc_d
;
214 lssc
.m2
= opts
->lssc_m2
;
215 ulsc
.m1
= opts
->ulsc_m1
;
216 ulsc
.d
= opts
->ulsc_d
;
217 ulsc
.m2
= opts
->ulsc_m2
;
219 cl
= hfsc_class_create(hif
, &rtsc
, &lssc
, &ulsc
, parent
, a
->qlimit
,
220 opts
->flags
, a
->qid
);
228 hfsc_add_queue(struct pf_altq
*a
)
237 /* XXX not MP safe */
238 if ((hif
= a
->altq_disc
) == NULL
)
243 error
= hfsc_add_queue_locked(a
, hif
);
250 hfsc_remove_queue_locked(struct pf_altq
*a
, struct hfsc_if
*hif
)
252 struct hfsc_class
*cl
;
254 if ((cl
= clh_to_clp(hif
, a
->qid
)) == NULL
)
257 return (hfsc_class_destroy(cl
));
261 hfsc_remove_queue(struct pf_altq
*a
)
267 /* XXX not MP safe */
268 if ((hif
= a
->altq_disc
) == NULL
)
273 error
= hfsc_remove_queue_locked(a
, hif
);
280 hfsc_getqstats(struct pf_altq
*a
, void *ubuf
, int *nbytes
)
283 struct hfsc_class
*cl
;
284 struct hfsc_classstats stats
;
288 if (*nbytes
< sizeof(stats
))
291 /* XXX not MP safe */
292 if ((hif
= altq_lookup(a
->ifname
, ALTQT_HFSC
)) == NULL
)
298 if ((cl
= clh_to_clp(hif
, a
->qid
)) == NULL
) {
303 get_class_stats(&stats
, cl
);
307 if ((error
= copyout((caddr_t
)&stats
, ubuf
, sizeof(stats
))) != 0)
309 *nbytes
= sizeof(stats
);
314 * bring the interface back to the initial state by discarding
315 * all the filters and classes except the root class.
318 hfsc_clear_interface(struct hfsc_if
*hif
)
320 struct hfsc_class
*cl
;
322 if (hif
->hif_rootclass
== NULL
)
326 /* clear out the classes */
327 while ((cl
= hif
->hif_rootclass
->cl_children
) != NULL
) {
329 * remove the first leaf class found in the hierarchy
332 for (; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
333 if (!is_a_parent_class(cl
)) {
334 hfsc_class_destroy(cl
);
344 hfsc_request(struct ifaltq
*ifq
, int req
, void *arg
)
346 struct hfsc_if
*hif
= (struct hfsc_if
*)ifq
->altq_disc
;
358 /* discard all the queued packets on the interface */
360 hfsc_purge(struct hfsc_if
*hif
)
362 struct hfsc_class
*cl
;
364 for (cl
= hif
->hif_rootclass
; cl
!= NULL
; cl
= hfsc_nextclass(cl
)) {
365 if (!qempty(cl
->cl_q
))
368 if (ifq_is_enabled(hif
->hif_ifq
))
369 hif
->hif_ifq
->ifq_len
= 0;
373 hfsc_class_create(struct hfsc_if
*hif
, struct service_curve
*rsc
,
374 struct service_curve
*fsc
, struct service_curve
*usc
,
375 struct hfsc_class
*parent
, int qlimit
, int flags
, int qid
)
377 struct hfsc_class
*cl
, *p
;
380 if (hif
->hif_classes
>= HFSC_MAX_CLASSES
)
384 if (flags
& HFCF_RED
) {
386 kprintf("hfsc_class_create: RED not configured for HFSC!\n");
392 cl
= kmalloc(sizeof(*cl
), M_ALTQ
, M_WAITOK
| M_ZERO
);
393 cl
->cl_q
= kmalloc(sizeof(*cl
->cl_q
), M_ALTQ
, M_WAITOK
| M_ZERO
);
394 cl
->cl_actc
= actlist_alloc();
397 qlimit
= 50; /* use default */
398 qlimit(cl
->cl_q
) = qlimit
;
399 qtype(cl
->cl_q
) = Q_DROPTAIL
;
401 cl
->cl_flags
= flags
;
403 if (flags
& (HFCF_RED
|HFCF_RIO
)) {
404 int red_flags
, red_pkttime
;
408 if (rsc
!= NULL
&& rsc
->m2
> m2
)
410 if (fsc
!= NULL
&& fsc
->m2
> m2
)
412 if (usc
!= NULL
&& usc
->m2
> m2
)
416 if (flags
& HFCF_ECN
)
417 red_flags
|= REDF_ECN
;
419 if (flags
& HFCF_CLEARDSCP
)
420 red_flags
|= RIOF_CLEARDSCP
;
423 red_pkttime
= 1000 * 1000 * 1000; /* 1 sec */
425 red_pkttime
= (int64_t)hif
->hif_ifq
->altq_ifp
->if_mtu
426 * 1000 * 1000 * 1000 / (m2
/ 8);
427 if (flags
& HFCF_RED
) {
428 cl
->cl_red
= red_alloc(0, 0,
429 qlimit(cl
->cl_q
) * 10/100,
430 qlimit(cl
->cl_q
) * 30/100,
431 red_flags
, red_pkttime
);
432 if (cl
->cl_red
!= NULL
)
433 qtype(cl
->cl_q
) = Q_RED
;
437 cl
->cl_red
= (red_t
*)rio_alloc(0, NULL
,
438 red_flags
, red_pkttime
);
439 if (cl
->cl_red
!= NULL
)
440 qtype(cl
->cl_q
) = Q_RIO
;
444 #endif /* ALTQ_RED */
446 if (rsc
!= NULL
&& (rsc
->m1
!= 0 || rsc
->m2
!= 0)) {
447 cl
->cl_rsc
= kmalloc(sizeof(*cl
->cl_rsc
), M_ALTQ
, M_WAITOK
);
448 sc2isc(rsc
, cl
->cl_rsc
);
449 rtsc_init(&cl
->cl_deadline
, cl
->cl_rsc
, 0, 0);
450 rtsc_init(&cl
->cl_eligible
, cl
->cl_rsc
, 0, 0);
452 if (fsc
!= NULL
&& (fsc
->m1
!= 0 || fsc
->m2
!= 0)) {
453 cl
->cl_fsc
= kmalloc(sizeof(*cl
->cl_fsc
), M_ALTQ
, M_WAITOK
);
454 if (cl
->cl_fsc
== NULL
)
456 sc2isc(fsc
, cl
->cl_fsc
);
457 rtsc_init(&cl
->cl_virtual
, cl
->cl_fsc
, 0, 0);
459 if (usc
!= NULL
&& (usc
->m1
!= 0 || usc
->m2
!= 0)) {
460 cl
->cl_usc
= kmalloc(sizeof(*cl
->cl_usc
), M_ALTQ
, M_WAITOK
);
461 if (cl
->cl_usc
== NULL
)
463 sc2isc(usc
, cl
->cl_usc
);
464 rtsc_init(&cl
->cl_ulimit
, cl
->cl_usc
, 0, 0);
467 cl
->cl_id
= hif
->hif_classid
++;
470 cl
->cl_parent
= parent
;
476 * find a free slot in the class table. if the slot matching
477 * the lower bits of qid is free, use this slot. otherwise,
478 * use the first free slot.
480 i
= qid
% HFSC_MAX_CLASSES
;
481 if (hif
->hif_class_tbl
[i
] == NULL
)
482 hif
->hif_class_tbl
[i
] = cl
;
484 for (i
= 0; i
< HFSC_MAX_CLASSES
; i
++) {
485 if (hif
->hif_class_tbl
[i
] == NULL
) {
486 hif
->hif_class_tbl
[i
] = cl
;
490 if (i
== HFSC_MAX_CLASSES
) {
496 if (flags
& HFCF_DEFAULTCLASS
)
497 hif
->hif_defaultclass
= cl
;
499 if (parent
== NULL
) {
500 /* this is root class */
501 hif
->hif_rootclass
= cl
;
502 } else if (parent
->cl_children
== NULL
) {
503 /* add this class to the children list of the parent */
504 parent
->cl_children
= cl
;
506 p
= parent
->cl_children
;
507 while (p
->cl_siblings
!= NULL
)
516 if (cl
->cl_actc
!= NULL
)
517 actlist_destroy(cl
->cl_actc
);
518 if (cl
->cl_red
!= NULL
) {
520 if (q_is_rio(cl
->cl_q
))
521 rio_destroy((rio_t
*)cl
->cl_red
);
524 if (q_is_red(cl
->cl_q
))
525 red_destroy(cl
->cl_red
);
528 if (cl
->cl_fsc
!= NULL
)
529 kfree(cl
->cl_fsc
, M_ALTQ
);
530 if (cl
->cl_rsc
!= NULL
)
531 kfree(cl
->cl_rsc
, M_ALTQ
);
532 if (cl
->cl_usc
!= NULL
)
533 kfree(cl
->cl_usc
, M_ALTQ
);
534 if (cl
->cl_q
!= NULL
)
535 kfree(cl
->cl_q
, M_ALTQ
);
541 hfsc_class_destroy(struct hfsc_class
*cl
)
548 if (is_a_parent_class(cl
))
553 if (!qempty(cl
->cl_q
))
556 if (cl
->cl_parent
== NULL
) {
557 /* this is root class */
559 struct hfsc_class
*p
= cl
->cl_parent
->cl_children
;
562 cl
->cl_parent
->cl_children
= cl
->cl_siblings
;
565 if (p
->cl_siblings
== cl
) {
566 p
->cl_siblings
= cl
->cl_siblings
;
569 } while ((p
= p
->cl_siblings
) != NULL
);
574 for (i
= 0; i
< HFSC_MAX_CLASSES
; i
++) {
575 if (cl
->cl_hif
->hif_class_tbl
[i
] == cl
) {
576 cl
->cl_hif
->hif_class_tbl
[i
] = NULL
;
581 cl
->cl_hif
->hif_classes
--;
584 actlist_destroy(cl
->cl_actc
);
586 if (cl
->cl_red
!= NULL
) {
588 if (q_is_rio(cl
->cl_q
))
589 rio_destroy((rio_t
*)cl
->cl_red
);
592 if (q_is_red(cl
->cl_q
))
593 red_destroy(cl
->cl_red
);
597 if (cl
== cl
->cl_hif
->hif_rootclass
)
598 cl
->cl_hif
->hif_rootclass
= NULL
;
599 if (cl
== cl
->cl_hif
->hif_defaultclass
)
600 cl
->cl_hif
->hif_defaultclass
= NULL
;
602 if (cl
->cl_usc
!= NULL
)
603 kfree(cl
->cl_usc
, M_ALTQ
);
604 if (cl
->cl_fsc
!= NULL
)
605 kfree(cl
->cl_fsc
, M_ALTQ
);
606 if (cl
->cl_rsc
!= NULL
)
607 kfree(cl
->cl_rsc
, M_ALTQ
);
608 kfree(cl
->cl_q
, M_ALTQ
);
615 * hfsc_nextclass returns the next class in the tree.
617 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
620 static struct hfsc_class
*
621 hfsc_nextclass(struct hfsc_class
*cl
)
623 if (cl
->cl_children
!= NULL
) {
624 cl
= cl
->cl_children
;
625 } else if (cl
->cl_siblings
!= NULL
) {
626 cl
= cl
->cl_siblings
;
628 while ((cl
= cl
->cl_parent
) != NULL
) {
629 if (cl
->cl_siblings
!= NULL
) {
630 cl
= cl
->cl_siblings
;
640 * hfsc_enqueue is an enqueue function to be registered to
641 * (*altq_enqueue) in struct ifaltq.
644 hfsc_enqueue(struct ifaltq
*ifq
, struct mbuf
*m
, struct altq_pktattr
*pktattr
)
646 struct hfsc_if
*hif
= (struct hfsc_if
*)ifq
->altq_disc
;
647 struct hfsc_class
*cl
;
650 /* grab class set by classifier */
651 if ((m
->m_flags
& M_PKTHDR
) == 0) {
652 /* should not happen */
653 if_printf(ifq
->altq_ifp
, "altq: packet does not have pkthdr\n");
658 if (m
->m_pkthdr
.fw_flags
& ALTQ_MBUF_TAGGED
)
659 cl
= clh_to_clp(hif
, m
->m_pkthdr
.altq_qid
);
662 if (cl
== NULL
|| is_a_parent_class(cl
)) {
663 cl
= hif
->hif_defaultclass
;
670 cl
->cl_pktattr
= NULL
;
672 if (hfsc_addq(cl
, m
) != 0) {
673 /* drop occurred. mbuf was freed in hfsc_addq. */
674 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, len
);
679 cl
->cl_hif
->hif_packets
++;
681 /* successfully queued. */
682 if (qlen(cl
->cl_q
) == 1)
683 set_active(cl
, m_pktlen(m
));
689 * hfsc_dequeue is a dequeue function to be registered to
690 * (*altq_dequeue) in struct ifaltq.
692 * note: ALTDQ_POLL returns the next packet without removing the packet
693 * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
694 * ALTDQ_REMOVE must return the same packet if called immediately
698 hfsc_dequeue(struct ifaltq
*ifq
, struct mbuf
*mpolled
, int op
)
700 struct hfsc_if
*hif
= (struct hfsc_if
*)ifq
->altq_disc
;
701 struct hfsc_class
*cl
;
707 if (hif
->hif_packets
== 0) {
708 /* no packet in the tree */
713 cur_time
= read_machclk();
715 if (op
== ALTDQ_REMOVE
&& hif
->hif_pollcache
!= NULL
) {
716 cl
= hif
->hif_pollcache
;
717 hif
->hif_pollcache
= NULL
;
718 /* check if the class was scheduled by real-time criteria */
719 if (cl
->cl_rsc
!= NULL
)
720 realtime
= (cl
->cl_e
<= cur_time
);
723 * if there are eligible classes, use real-time criteria.
724 * find the class with the minimum deadline among
725 * the eligible classes.
727 if ((cl
= ellist_get_mindl(hif
->hif_eligible
, cur_time
)) != NULL
) {
734 * use link-sharing criteria
735 * get the class with the minimum vt in the hierarchy
737 cl
= hif
->hif_rootclass
;
738 while (is_a_parent_class(cl
)) {
740 cl
= actlist_firstfit(cl
, cur_time
);
744 kprintf("%d fit but none found\n",fits
);
750 * update parent's cl_cvtmin.
751 * don't update if the new vt is smaller.
753 if (cl
->cl_parent
->cl_cvtmin
< cl
->cl_vt
)
754 cl
->cl_parent
->cl_cvtmin
= cl
->cl_vt
;
761 if (op
== ALTDQ_POLL
) {
762 hif
->hif_pollcache
= cl
;
770 panic("hfsc_dequeue:");
772 cl
->cl_hif
->hif_packets
--;
774 PKTCNTR_ADD(&cl
->cl_stats
.xmit_cnt
, len
);
776 update_vf(cl
, len
, cur_time
);
780 if (!qempty(cl
->cl_q
)) {
781 if (cl
->cl_rsc
!= NULL
) {
783 next_len
= m_pktlen(qhead(cl
->cl_q
));
786 update_ed(cl
, next_len
);
788 update_d(cl
, next_len
);
791 /* the class becomes passive */
796 KKASSERT(mpolled
== NULL
|| m
== mpolled
);
801 hfsc_addq(struct hfsc_class
*cl
, struct mbuf
*m
)
805 if (q_is_rio(cl
->cl_q
))
806 return rio_addq((rio_t
*)cl
->cl_red
, cl
->cl_q
,
810 if (q_is_red(cl
->cl_q
))
811 return red_addq(cl
->cl_red
, cl
->cl_q
, m
, cl
->cl_pktattr
);
813 if (qlen(cl
->cl_q
) >= qlimit(cl
->cl_q
)) {
818 if (cl
->cl_flags
& HFCF_CLEARDSCP
)
819 write_dsfield(m
, cl
->cl_pktattr
, 0);
827 hfsc_getq(struct hfsc_class
*cl
)
830 if (q_is_rio(cl
->cl_q
))
831 return rio_getq((rio_t
*)cl
->cl_red
, cl
->cl_q
);
834 if (q_is_red(cl
->cl_q
))
835 return red_getq(cl
->cl_red
, cl
->cl_q
);
837 return _getq(cl
->cl_q
);
841 hfsc_pollq(struct hfsc_class
*cl
)
843 return qhead(cl
->cl_q
);
847 hfsc_purgeq(struct hfsc_class
*cl
)
851 if (qempty(cl
->cl_q
))
854 while ((m
= _getq(cl
->cl_q
)) != NULL
) {
855 PKTCNTR_ADD(&cl
->cl_stats
.drop_cnt
, m_pktlen(m
));
857 cl
->cl_hif
->hif_packets
--;
858 cl
->cl_hif
->hif_ifq
->ifq_len
--;
860 KKASSERT(qlen(cl
->cl_q
) == 0);
862 update_vf(cl
, 0, 0); /* remove cl from the actlist */
867 set_active(struct hfsc_class
*cl
, int len
)
869 if (cl
->cl_rsc
!= NULL
)
871 if (cl
->cl_fsc
!= NULL
)
874 cl
->cl_stats
.period
++;
878 set_passive(struct hfsc_class
*cl
)
880 if (cl
->cl_rsc
!= NULL
)
884 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
885 * needs to be called explicitly to remove a class from actlist
890 init_ed(struct hfsc_class
*cl
, int next_len
)
894 cur_time
= read_machclk();
896 /* update the deadline curve */
897 rtsc_min(&cl
->cl_deadline
, cl
->cl_rsc
, cur_time
, cl
->cl_cumul
);
900 * update the eligible curve.
901 * for concave, it is equal to the deadline curve.
902 * for convex, it is a linear curve with slope m2.
904 cl
->cl_eligible
= cl
->cl_deadline
;
905 if (cl
->cl_rsc
->sm1
<= cl
->cl_rsc
->sm2
) {
906 cl
->cl_eligible
.dx
= 0;
907 cl
->cl_eligible
.dy
= 0;
910 /* compute e and d */
911 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
912 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
918 update_ed(struct hfsc_class
*cl
, int next_len
)
920 cl
->cl_e
= rtsc_y2x(&cl
->cl_eligible
, cl
->cl_cumul
);
921 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
927 update_d(struct hfsc_class
*cl
, int next_len
)
929 cl
->cl_d
= rtsc_y2x(&cl
->cl_deadline
, cl
->cl_cumul
+ next_len
);
933 init_vf(struct hfsc_class
*cl
, int len
)
935 struct hfsc_class
*max_cl
, *p
;
936 uint64_t vt
, f
, cur_time
;
941 for ( ; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
942 if (go_active
&& cl
->cl_nactive
++ == 0)
948 max_cl
= actlist_last(cl
->cl_parent
->cl_actc
);
949 if (max_cl
!= NULL
) {
951 * set vt to the average of the min and max
952 * classes. if the parent's period didn't
953 * change, don't decrease vt of the class.
956 if (cl
->cl_parent
->cl_cvtmin
!= 0)
957 vt
= (cl
->cl_parent
->cl_cvtmin
+ vt
)/2;
959 if (cl
->cl_parent
->cl_vtperiod
!=
960 cl
->cl_parentperiod
|| vt
> cl
->cl_vt
)
964 * first child for a new parent backlog period.
965 * add parent's cvtmax to vtoff of children
966 * to make a new vt (vtoff + vt) larger than
967 * the vt in the last period for all children.
969 vt
= cl
->cl_parent
->cl_cvtmax
;
970 for (p
= cl
->cl_parent
->cl_children
; p
!= NULL
;
974 cl
->cl_parent
->cl_cvtmax
= 0;
975 cl
->cl_parent
->cl_cvtmin
= 0;
977 cl
->cl_initvt
= cl
->cl_vt
;
979 /* update the virtual curve */
980 vt
= cl
->cl_vt
+ cl
->cl_vtoff
;
981 rtsc_min(&cl
->cl_virtual
, cl
->cl_fsc
, vt
, cl
->cl_total
);
982 if (cl
->cl_virtual
.x
== vt
) {
983 cl
->cl_virtual
.x
-= cl
->cl_vtoff
;
988 cl
->cl_vtperiod
++; /* increment vt period */
989 cl
->cl_parentperiod
= cl
->cl_parent
->cl_vtperiod
;
990 if (cl
->cl_parent
->cl_nactive
== 0)
991 cl
->cl_parentperiod
++;
996 if (cl
->cl_usc
!= NULL
) {
997 /* class has upper limit curve */
999 cur_time
= read_machclk();
1001 /* update the ulimit curve */
1002 rtsc_min(&cl
->cl_ulimit
, cl
->cl_usc
, cur_time
,
1005 cl
->cl_myf
= rtsc_y2x(&cl
->cl_ulimit
,
1011 if (cl
->cl_myf
> cl
->cl_cfmin
)
1015 if (f
!= cl
->cl_f
) {
1017 update_cfmin(cl
->cl_parent
);
1023 update_vf(struct hfsc_class
*cl
, int len
, uint64_t cur_time
)
1025 uint64_t f
, myf_bound
, delta
;
1028 go_passive
= qempty(cl
->cl_q
);
1030 for (; cl
->cl_parent
!= NULL
; cl
= cl
->cl_parent
) {
1031 cl
->cl_total
+= len
;
1033 if (cl
->cl_fsc
== NULL
|| cl
->cl_nactive
== 0)
1036 if (go_passive
&& --cl
->cl_nactive
== 0)
1042 /* no more active child, going passive */
1044 /* update cvtmax of the parent class */
1045 if (cl
->cl_vt
> cl
->cl_parent
->cl_cvtmax
)
1046 cl
->cl_parent
->cl_cvtmax
= cl
->cl_vt
;
1048 /* remove this class from the vt list */
1051 update_cfmin(cl
->cl_parent
);
1059 cl
->cl_vt
= rtsc_y2x(&cl
->cl_virtual
, cl
->cl_total
)
1060 - cl
->cl_vtoff
+ cl
->cl_vtadj
;
1063 * if vt of the class is smaller than cvtmin,
1064 * the class was skipped in the past due to non-fit.
1065 * if so, we need to adjust vtadj.
1067 if (cl
->cl_vt
< cl
->cl_parent
->cl_cvtmin
) {
1068 cl
->cl_vtadj
+= cl
->cl_parent
->cl_cvtmin
- cl
->cl_vt
;
1069 cl
->cl_vt
= cl
->cl_parent
->cl_cvtmin
;
1072 /* update the vt list */
1075 if (cl
->cl_usc
!= NULL
) {
1076 cl
->cl_myf
= cl
->cl_myfadj
1077 + rtsc_y2x(&cl
->cl_ulimit
, cl
->cl_total
);
1080 * if myf lags behind by more than one clock tick
1081 * from the current time, adjust myfadj to prevent
1082 * a rate-limited class from going greedy.
1083 * in a steady state under rate-limiting, myf
1084 * fluctuates within one clock tick.
1086 myf_bound
= cur_time
- machclk_per_tick
;
1087 if (cl
->cl_myf
< myf_bound
) {
1088 delta
= cur_time
- cl
->cl_myf
;
1089 cl
->cl_myfadj
+= delta
;
1090 cl
->cl_myf
+= delta
;
1094 /* cl_f is max(cl_myf, cl_cfmin) */
1095 if (cl
->cl_myf
> cl
->cl_cfmin
)
1099 if (f
!= cl
->cl_f
) {
1101 update_cfmin(cl
->cl_parent
);
1107 update_cfmin(struct hfsc_class
*cl
)
1109 struct hfsc_class
*p
;
1112 if (TAILQ_EMPTY(cl
->cl_actc
)) {
1116 cfmin
= HT_INFINITY
;
1117 TAILQ_FOREACH(p
, cl
->cl_actc
, cl_actlist
) {
1122 if (p
->cl_f
< cfmin
)
1125 cl
->cl_cfmin
= cfmin
;
1129 * TAILQ based ellist and actlist implementation
1130 * (ion wanted to make a calendar queue based implementation)
1133 * eligible list holds backlogged classes being sorted by their eligible times.
1134 * there is one eligible list per interface.
1142 head
= kmalloc(sizeof(ellist_t
*), M_ALTQ
, M_WAITOK
);
1148 ellist_destroy(ellist_t
*head
)
1150 kfree(head
, M_ALTQ
);
1154 ellist_insert(struct hfsc_class
*cl
)
1156 struct hfsc_if
*hif
= cl
->cl_hif
;
1157 struct hfsc_class
*p
;
1159 /* check the last entry first */
1160 if ((p
= TAILQ_LAST(hif
->hif_eligible
, _eligible
)) == NULL
||
1161 p
->cl_e
<= cl
->cl_e
) {
1162 TAILQ_INSERT_TAIL(hif
->hif_eligible
, cl
, cl_ellist
);
1166 TAILQ_FOREACH(p
, hif
->hif_eligible
, cl_ellist
) {
1167 if (cl
->cl_e
< p
->cl_e
) {
1168 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1172 KKASSERT(0); /* should not reach here */
1176 ellist_remove(struct hfsc_class
*cl
)
1178 struct hfsc_if
*hif
= cl
->cl_hif
;
1180 TAILQ_REMOVE(hif
->hif_eligible
, cl
, cl_ellist
);
1184 ellist_update(struct hfsc_class
*cl
)
1186 struct hfsc_if
*hif
= cl
->cl_hif
;
1187 struct hfsc_class
*p
, *last
;
1190 * the eligible time of a class increases monotonically.
1191 * if the next entry has a larger eligible time, nothing to do.
1193 p
= TAILQ_NEXT(cl
, cl_ellist
);
1194 if (p
== NULL
|| cl
->cl_e
<= p
->cl_e
)
1197 /* check the last entry */
1198 last
= TAILQ_LAST(hif
->hif_eligible
, _eligible
);
1199 KKASSERT(last
!= NULL
);
1200 if (last
->cl_e
<= cl
->cl_e
) {
1201 TAILQ_REMOVE(hif
->hif_eligible
, cl
, cl_ellist
);
1202 TAILQ_INSERT_TAIL(hif
->hif_eligible
, cl
, cl_ellist
);
1207 * the new position must be between the next entry
1208 * and the last entry
1210 while ((p
= TAILQ_NEXT(p
, cl_ellist
)) != NULL
) {
1211 if (cl
->cl_e
< p
->cl_e
) {
1212 TAILQ_REMOVE(hif
->hif_eligible
, cl
, cl_ellist
);
1213 TAILQ_INSERT_BEFORE(p
, cl
, cl_ellist
);
1217 KKASSERT(0); /* should not reach here */
1220 /* find the class with the minimum deadline among the eligible classes */
1222 ellist_get_mindl(ellist_t
*head
, uint64_t cur_time
)
1224 struct hfsc_class
*p
, *cl
= NULL
;
1226 TAILQ_FOREACH(p
, head
, cl_ellist
) {
1227 if (p
->cl_e
> cur_time
)
1229 if (cl
== NULL
|| p
->cl_d
< cl
->cl_d
)
1236 * active children list holds backlogged child classes being sorted
1237 * by their virtual time.
1238 * each intermediate class has one active children list.
1245 head
= kmalloc(sizeof(*head
), M_ALTQ
, M_WAITOK
);
1251 actlist_destroy(actlist_t
*head
)
1253 kfree(head
, M_ALTQ
);
1256 actlist_insert(struct hfsc_class
*cl
)
1258 struct hfsc_class
*p
;
1260 /* check the last entry first */
1261 if ((p
= TAILQ_LAST(cl
->cl_parent
->cl_actc
, _active
)) == NULL
1262 || p
->cl_vt
<= cl
->cl_vt
) {
1263 TAILQ_INSERT_TAIL(cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1267 TAILQ_FOREACH(p
, cl
->cl_parent
->cl_actc
, cl_actlist
) {
1268 if (cl
->cl_vt
< p
->cl_vt
) {
1269 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1273 KKASSERT(0); /* should not reach here */
1277 actlist_remove(struct hfsc_class
*cl
)
1279 TAILQ_REMOVE(cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1283 actlist_update(struct hfsc_class
*cl
)
1285 struct hfsc_class
*p
, *last
;
1288 * the virtual time of a class increases monotonically during its
1289 * backlogged period.
1290 * if the next entry has a larger virtual time, nothing to do.
1292 p
= TAILQ_NEXT(cl
, cl_actlist
);
1293 if (p
== NULL
|| cl
->cl_vt
< p
->cl_vt
)
1296 /* check the last entry */
1297 last
= TAILQ_LAST(cl
->cl_parent
->cl_actc
, _active
);
1298 KKASSERT(last
!= NULL
);
1299 if (last
->cl_vt
<= cl
->cl_vt
) {
1300 TAILQ_REMOVE(cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1301 TAILQ_INSERT_TAIL(cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1306 * the new position must be between the next entry
1307 * and the last entry
1309 while ((p
= TAILQ_NEXT(p
, cl_actlist
)) != NULL
) {
1310 if (cl
->cl_vt
< p
->cl_vt
) {
1311 TAILQ_REMOVE(cl
->cl_parent
->cl_actc
, cl
, cl_actlist
);
1312 TAILQ_INSERT_BEFORE(p
, cl
, cl_actlist
);
1316 KKASSERT(0); /* should not reach here */
1319 static struct hfsc_class
*
1320 actlist_firstfit(struct hfsc_class
*cl
, uint64_t cur_time
)
1322 struct hfsc_class
*p
;
1324 TAILQ_FOREACH(p
, cl
->cl_actc
, cl_actlist
) {
1325 if (p
->cl_f
<= cur_time
)
1332 * service curve support functions
1334 * external service curve parameters
1337 * internal service curve parameters
1338 * sm: (bytes/tsc_interval) << SM_SHIFT
1339 * ism: (tsc_count/byte) << ISM_SHIFT
1342 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1343 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1344 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1345 * digits in decimal using the following table.
1347 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
1348 * ----------+-------------------------------------------------------
1349 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
1350 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
1351 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
1353 * nsec/byte 80000 8000 800 80 8
1354 * ism(500MHz) 40000 4000 400 40 4
1355 * ism(200MHz) 16000 1600 160 16 1.6
1358 #define ISM_SHIFT 10
1360 #define SM_MASK ((1LL << SM_SHIFT) - 1)
1361 #define ISM_MASK ((1LL << ISM_SHIFT) - 1)
1363 static __inline
uint64_t
1364 seg_x2y(uint64_t x
, uint64_t sm
)
1370 * y = x * sm >> SM_SHIFT
1371 * but divide it for the upper and lower bits to avoid overflow
1373 y
= (x
>> SM_SHIFT
) * sm
+ (((x
& SM_MASK
) * sm
) >> SM_SHIFT
);
1377 static __inline
uint64_t
1378 seg_y2x(uint64_t y
, uint64_t ism
)
1384 else if (ism
== HT_INFINITY
)
1387 x
= (y
>> ISM_SHIFT
) * ism
+ (((y
& ISM_MASK
) * ism
) >> ISM_SHIFT
);
1392 static __inline
uint64_t
1397 sm
= ((uint64_t)m
<< SM_SHIFT
) / 8 / machclk_freq
;
1401 static __inline
uint64_t
1409 ism
= ((uint64_t)machclk_freq
<< ISM_SHIFT
) * 8 / m
;
1413 static __inline
uint64_t
1418 dx
= ((uint64_t)d
* machclk_freq
) / 1000;
1427 m
= (sm
* 8 * machclk_freq
) >> SM_SHIFT
;
1436 d
= dx
* 1000 / machclk_freq
;
1441 sc2isc(struct service_curve
*sc
, struct internal_sc
*isc
)
1443 isc
->sm1
= m2sm(sc
->m1
);
1444 isc
->ism1
= m2ism(sc
->m1
);
1445 isc
->dx
= d2dx(sc
->d
);
1446 isc
->dy
= seg_x2y(isc
->dx
, isc
->sm1
);
1447 isc
->sm2
= m2sm(sc
->m2
);
1448 isc
->ism2
= m2ism(sc
->m2
);
1452 * initialize the runtime service curve with the given internal
1453 * service curve starting at (x, y).
1456 rtsc_init(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, uint64_t x
, uint64_t y
)
1460 rtsc
->sm1
= isc
->sm1
;
1461 rtsc
->ism1
= isc
->ism1
;
1464 rtsc
->sm2
= isc
->sm2
;
1465 rtsc
->ism2
= isc
->ism2
;
1469 * calculate the y-projection of the runtime service curve by the
1470 * given x-projection value
1473 rtsc_y2x(struct runtime_sc
*rtsc
, uint64_t y
)
1479 } else if (y
<= rtsc
->y
+ rtsc
->dy
) {
1480 /* x belongs to the 1st segment */
1482 x
= rtsc
->x
+ rtsc
->dx
;
1484 x
= rtsc
->x
+ seg_y2x(y
- rtsc
->y
, rtsc
->ism1
);
1486 /* x belongs to the 2nd segment */
1487 x
= rtsc
->x
+ rtsc
->dx
1488 + seg_y2x(y
- rtsc
->y
- rtsc
->dy
, rtsc
->ism2
);
1494 rtsc_x2y(struct runtime_sc
*rtsc
, uint64_t x
)
1500 } else if (x
<= rtsc
->x
+ rtsc
->dx
) {
1501 /* y belongs to the 1st segment */
1502 y
= rtsc
->y
+ seg_x2y(x
- rtsc
->x
, rtsc
->sm1
);
1504 /* y belongs to the 2nd segment */
1505 y
= rtsc
->y
+ rtsc
->dy
1506 + seg_x2y(x
- rtsc
->x
- rtsc
->dx
, rtsc
->sm2
);
1511 * update the runtime service curve by taking the minimum of the current
1512 * runtime service curve and the service curve starting at (x, y).
1515 rtsc_min(struct runtime_sc
*rtsc
, struct internal_sc
*isc
, uint64_t x
, uint64_t y
)
1517 uint64_t y1
, y2
, dx
, dy
;
1519 if (isc
->sm1
<= isc
->sm2
) {
1520 /* service curve is convex */
1521 y1
= rtsc_x2y(rtsc
, x
);
1523 /* the current rtsc is smaller */
1531 * service curve is concave
1532 * compute the two y values of the current rtsc
1536 y1
= rtsc_x2y(rtsc
, x
);
1538 /* rtsc is below isc, no change to rtsc */
1542 y2
= rtsc_x2y(rtsc
, x
+ isc
->dx
);
1543 if (y2
>= y
+ isc
->dy
) {
1544 /* rtsc is above isc, replace rtsc by isc */
1553 * the two curves intersect
1554 * compute the offsets (dx, dy) using the reverse
1555 * function of seg_x2y()
1556 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1558 dx
= ((y1
- y
) << SM_SHIFT
) / (isc
->sm1
- isc
->sm2
);
1560 * check if (x, y1) belongs to the 1st segment of rtsc.
1561 * if so, add the offset.
1563 if (rtsc
->x
+ rtsc
->dx
> x
)
1564 dx
+= rtsc
->x
+ rtsc
->dx
- x
;
1565 dy
= seg_x2y(dx
, isc
->sm1
);
1574 get_class_stats(struct hfsc_classstats
*sp
, struct hfsc_class
*cl
)
1576 sp
->class_id
= cl
->cl_id
;
1577 sp
->class_handle
= cl
->cl_handle
;
1579 if (cl
->cl_rsc
!= NULL
) {
1580 sp
->rsc
.m1
= sm2m(cl
->cl_rsc
->sm1
);
1581 sp
->rsc
.d
= dx2d(cl
->cl_rsc
->dx
);
1582 sp
->rsc
.m2
= sm2m(cl
->cl_rsc
->sm2
);
1588 if (cl
->cl_fsc
!= NULL
) {
1589 sp
->fsc
.m1
= sm2m(cl
->cl_fsc
->sm1
);
1590 sp
->fsc
.d
= dx2d(cl
->cl_fsc
->dx
);
1591 sp
->fsc
.m2
= sm2m(cl
->cl_fsc
->sm2
);
1597 if (cl
->cl_usc
!= NULL
) {
1598 sp
->usc
.m1
= sm2m(cl
->cl_usc
->sm1
);
1599 sp
->usc
.d
= dx2d(cl
->cl_usc
->dx
);
1600 sp
->usc
.m2
= sm2m(cl
->cl_usc
->sm2
);
1607 sp
->total
= cl
->cl_total
;
1608 sp
->cumul
= cl
->cl_cumul
;
1615 sp
->initvt
= cl
->cl_initvt
;
1616 sp
->vtperiod
= cl
->cl_vtperiod
;
1617 sp
->parentperiod
= cl
->cl_parentperiod
;
1618 sp
->nactive
= cl
->cl_nactive
;
1619 sp
->vtoff
= cl
->cl_vtoff
;
1620 sp
->cvtmax
= cl
->cl_cvtmax
;
1621 sp
->myf
= cl
->cl_myf
;
1622 sp
->cfmin
= cl
->cl_cfmin
;
1623 sp
->cvtmin
= cl
->cl_cvtmin
;
1624 sp
->myfadj
= cl
->cl_myfadj
;
1625 sp
->vtadj
= cl
->cl_vtadj
;
1627 sp
->cur_time
= read_machclk();
1628 sp
->machclk_freq
= machclk_freq
;
1630 sp
->qlength
= qlen(cl
->cl_q
);
1631 sp
->qlimit
= qlimit(cl
->cl_q
);
1632 sp
->xmit_cnt
= cl
->cl_stats
.xmit_cnt
;
1633 sp
->drop_cnt
= cl
->cl_stats
.drop_cnt
;
1634 sp
->period
= cl
->cl_stats
.period
;
1636 sp
->qtype
= qtype(cl
->cl_q
);
1638 if (q_is_red(cl
->cl_q
))
1639 red_getstats(cl
->cl_red
, &sp
->red
[0]);
1642 if (q_is_rio(cl
->cl_q
))
1643 rio_getstats((rio_t
*)cl
->cl_red
, &sp
->red
[0]);
1647 /* convert a class handle to the corresponding class pointer */
1648 static struct hfsc_class
*
1649 clh_to_clp(struct hfsc_if
*hif
, uint32_t chandle
)
1652 struct hfsc_class
*cl
;
1657 * first, try optimistically the slot matching the lower bits of
1658 * the handle. if it fails, do the linear table search.
1660 i
= chandle
% HFSC_MAX_CLASSES
;
1661 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&& cl
->cl_handle
== chandle
)
1663 for (i
= 0; i
< HFSC_MAX_CLASSES
; i
++)
1664 if ((cl
= hif
->hif_class_tbl
[i
]) != NULL
&&
1665 cl
->cl_handle
== chandle
)
1670 #endif /* ALTQ_HFSC */