2 * Copyright (c) 2001-2002 Luigi Rizzo
4 * Supported by: the Xorp Project (www.xorp.org)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28 * $DragonFly: src/sys/kern/kern_poll.c,v 1.42 2007/11/11 07:38:29 sephe Exp $
31 #include "opt_polling.h"
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/socket.h> /* needed by net/if.h */
36 #include <sys/sysctl.h>
38 #include <sys/thread2.h>
39 #include <sys/msgport2.h>
41 #include <net/if.h> /* for IFF_* flags */
42 #include <net/netmsg2.h>
45 * Polling support for [network] device drivers.
47 * Drivers which support this feature try to register with the
50 * If registration is successful, the driver must disable interrupts,
51 * and further I/O is performed through the handler, which is invoked
52 * (at least once per clock tick) with 3 arguments: the "arg" passed at
53 * register time (a struct ifnet pointer), a command, and a "count" limit.
55 * The command can be one of the following:
56 * POLL_ONLY: quick move of "count" packets from input/output queues.
57 * POLL_AND_CHECK_STATUS: as above, plus check status registers or do
58 * other more expensive operations. This command is issued periodically
59 * but less frequently than POLL_ONLY.
60 * POLL_DEREGISTER: deregister and return to interrupt mode.
61 * POLL_REGISTER: register and disable interrupts
63 * The first two commands are only issued if the interface is marked as
64 * 'IFF_UP, IFF_RUNNING and IFF_POLLING', the last two only if IFF_RUNNING
67 * The count limit specifies how much work the handler can do during the
68 * call -- typically this is the number of packets to be received, or
69 * transmitted, etc. (drivers are free to interpret this number, as long
70 * as the max time spent in the function grows roughly linearly with the
73 * Deregistration can be requested by the driver itself (typically in the
74 * *_stop() routine), or by the polling code, by invoking the handler.
76 * Polling can be enabled or disabled on particular CPU_X with the sysctl
77 * variable kern.polling.X.enable (default is 1, enabled)
79 * A second variable controls the sharing of CPU between polling/kernel
80 * network processing, and other activities (typically userlevel tasks):
81 * kern.polling.X.user_frac (between 0 and 100, default 50) sets the share
82 * of CPU allocated to user tasks. CPU is allocated proportionally to the
83 * shares, by dynamically adjusting the "count" (poll_burst).
85 * Other parameters can should be left to their default values.
86 * The following constraints hold
88 * 1 <= poll_burst <= poll_burst_max
89 * 1 <= poll_each_burst <= poll_burst_max
90 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
93 #define MIN_POLL_BURST_MAX 10
94 #define MAX_POLL_BURST_MAX 1000
96 #ifndef DEVICE_POLLING_FREQ_MAX
97 #define DEVICE_POLLING_FREQ_MAX 30000
99 #define DEVICE_POLLING_FREQ_DEFAULT 2000
101 #define POLL_LIST_LEN 128
106 #define POLLCTX_MAX 32
109 struct sysctl_ctx_list poll_sysctl_ctx
;
110 struct sysctl_oid
*poll_sysctl_tree
;
112 uint32_t poll_burst
; /* state */
113 uint32_t poll_each_burst
; /* tunable */
114 uint32_t poll_burst_max
; /* tunable */
115 uint32_t user_frac
; /* tunable */
116 int reg_frac_count
; /* state */
117 uint32_t reg_frac
; /* tunable */
118 uint32_t short_ticks
; /* statistics */
119 uint32_t lost_polls
; /* statistics */
120 uint32_t pending_polls
; /* state */
121 int residual_burst
; /* state */
122 uint32_t phase
; /* state */
123 uint32_t suspect
; /* statistics */
124 uint32_t stalled
; /* statistics */
125 struct timeval poll_start_t
; /* state */
126 struct timeval prev_t
; /* state */
128 uint32_t poll_handlers
; /* next free entry in pr[]. */
129 struct pollrec pr
[POLL_LIST_LEN
];
132 struct systimer pollclock
;
133 int polling_enabled
; /* tunable */
134 int pollhz
; /* tunable */
136 struct netmsg poll_netmsg
;
137 struct netmsg poll_more_netmsg
;
140 static struct pollctx
*poll_context
[POLLCTX_MAX
];
142 SYSCTL_NODE(_kern
, OID_AUTO
, polling
, CTLFLAG_RW
, 0,
143 "Device polling parameters");
145 static int poll_defcpu
= -1;
146 SYSCTL_INT(_kern_polling
, OID_AUTO
, defcpu
, CTLFLAG_RD
,
147 &poll_defcpu
, 0, "default CPU to run device polling");
149 static uint32_t poll_cpumask0
= 0xffffffff;
150 TUNABLE_INT("kern.polling.cpumask", (int *)&poll_cpumask0
);
152 static uint32_t poll_cpumask
;
153 SYSCTL_INT(_kern_polling
, OID_AUTO
, cpumask
, CTLFLAG_RD
,
154 &poll_cpumask
, 0, "CPUs that can run device polling");
156 static int polling_enabled
= 1; /* global polling enable */
157 TUNABLE_INT("kern.polling.enable", &polling_enabled
);
159 static int pollhz
= DEVICE_POLLING_FREQ_DEFAULT
;
160 TUNABLE_INT("kern.polling.pollhz", &pollhz
);
162 /* Netisr handlers */
163 static void netisr_poll(struct netmsg
*);
164 static void netisr_pollmore(struct netmsg
*);
165 static void poll_register(struct netmsg
*);
166 static void poll_deregister(struct netmsg
*);
167 static void poll_sysctl_pollhz(struct netmsg
*);
168 static void poll_sysctl_polling(struct netmsg
*);
169 static void poll_sysctl_regfrac(struct netmsg
*);
170 static void poll_sysctl_burstmax(struct netmsg
*);
171 static void poll_sysctl_eachburst(struct netmsg
*);
173 /* Systimer handler */
174 static void pollclock(systimer_t
, struct intrframe
*);
176 /* Sysctl handlers */
177 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS
);
178 static int sysctl_polling(SYSCTL_HANDLER_ARGS
);
179 static int sysctl_regfrac(SYSCTL_HANDLER_ARGS
);
180 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS
);
181 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS
);
182 static void poll_add_sysctl(struct sysctl_ctx_list
*,
183 struct sysctl_oid_list
*, struct pollctx
*);
185 static void schedpoll_oncpu(struct pollctx
*, struct netmsg
*, netisr_fn_t
);
187 void init_device_poll_pcpu(int); /* per-cpu init routine */
190 poll_reset_state(struct pollctx
*pctx
)
193 pctx
->poll_burst
= 5;
194 pctx
->reg_frac_count
= 0;
195 pctx
->pending_polls
= 0;
196 pctx
->residual_burst
= 0;
198 bzero(&pctx
->poll_start_t
, sizeof(pctx
->poll_start_t
));
199 bzero(&pctx
->prev_t
, sizeof(pctx
->prev_t
));
204 * Initialize per-cpu polling(4) context. Called from kern_clock.c:
207 init_device_poll_pcpu(int cpuid
)
209 struct pollctx
*pctx
;
212 if (cpuid
>= POLLCTX_MAX
)
215 if (((1 << cpuid
) & poll_cpumask0
) == 0)
218 poll_cpumask
|= (1 << cpuid
);
220 pctx
= kmalloc(sizeof(*pctx
), M_DEVBUF
, M_WAITOK
| M_ZERO
);
222 pctx
->poll_each_burst
= 5;
223 pctx
->poll_burst_max
= 150; /* good for 100Mbit net and HZ=1000 */
224 pctx
->user_frac
= 50;
226 pctx
->polling_enabled
= polling_enabled
;
227 pctx
->pollhz
= pollhz
;
228 pctx
->poll_cpuid
= cpuid
;
229 netmsg_init(&pctx
->poll_netmsg
, &netisr_adone_rport
, 0, NULL
);
230 netmsg_init(&pctx
->poll_more_netmsg
, &netisr_adone_rport
, 0, NULL
);
231 poll_reset_state(pctx
);
233 KASSERT(cpuid
< POLLCTX_MAX
, ("cpu id must < %d", cpuid
));
234 poll_context
[cpuid
] = pctx
;
236 if (poll_defcpu
< 0) {
240 * Initialize global sysctl nodes, for compat
242 poll_add_sysctl(NULL
, SYSCTL_STATIC_CHILDREN(_kern_polling
),
247 * Initialize per-cpu sysctl nodes
249 ksnprintf(cpuid_str
, sizeof(cpuid_str
), "%d", pctx
->poll_cpuid
);
251 sysctl_ctx_init(&pctx
->poll_sysctl_ctx
);
252 pctx
->poll_sysctl_tree
= SYSCTL_ADD_NODE(&pctx
->poll_sysctl_ctx
,
253 SYSCTL_STATIC_CHILDREN(_kern_polling
),
254 OID_AUTO
, cpuid_str
, CTLFLAG_RD
, 0, "");
255 poll_add_sysctl(&pctx
->poll_sysctl_ctx
,
256 SYSCTL_CHILDREN(pctx
->poll_sysctl_tree
), pctx
);
259 * Initialize systimer
261 systimer_init_periodic_nq(&pctx
->pollclock
, pollclock
, pctx
, 1);
265 schedpoll(struct pollctx
*pctx
)
268 schedpoll_oncpu(pctx
, &pctx
->poll_netmsg
, netisr_poll
);
273 schedpollmore(struct pollctx
*pctx
)
275 schedpoll_oncpu(pctx
, &pctx
->poll_more_netmsg
, netisr_pollmore
);
279 * Set the polling frequency
282 sysctl_pollhz(SYSCTL_HANDLER_ARGS
)
284 struct pollctx
*pctx
= arg1
;
290 error
= sysctl_handle_int(oidp
, &phz
, 0, req
);
291 if (error
|| req
->newptr
== NULL
)
295 else if (phz
> DEVICE_POLLING_FREQ_MAX
)
296 phz
= DEVICE_POLLING_FREQ_MAX
;
298 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_sysctl_pollhz
);
299 msg
.nm_lmsg
.u
.ms_result
= phz
;
301 port
= cpu_portfn(pctx
->poll_cpuid
);
302 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
310 sysctl_polling(SYSCTL_HANDLER_ARGS
)
312 struct pollctx
*pctx
= arg1
;
317 enabled
= pctx
->polling_enabled
;
318 error
= sysctl_handle_int(oidp
, &enabled
, 0, req
);
319 if (error
|| req
->newptr
== NULL
)
322 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_sysctl_polling
);
323 msg
.nm_lmsg
.u
.ms_result
= enabled
;
325 port
= cpu_portfn(pctx
->poll_cpuid
);
326 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
331 sysctl_regfrac(SYSCTL_HANDLER_ARGS
)
333 struct pollctx
*pctx
= arg1
;
339 reg_frac
= pctx
->reg_frac
;
340 error
= sysctl_handle_int(oidp
, ®_frac
, 0, req
);
341 if (error
|| req
->newptr
== NULL
)
344 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_sysctl_regfrac
);
345 msg
.nm_lmsg
.u
.ms_result
= reg_frac
;
347 port
= cpu_portfn(pctx
->poll_cpuid
);
348 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
353 sysctl_burstmax(SYSCTL_HANDLER_ARGS
)
355 struct pollctx
*pctx
= arg1
;
361 burst_max
= pctx
->poll_burst_max
;
362 error
= sysctl_handle_int(oidp
, &burst_max
, 0, req
);
363 if (error
|| req
->newptr
== NULL
)
365 if (burst_max
< MIN_POLL_BURST_MAX
)
366 burst_max
= MIN_POLL_BURST_MAX
;
367 else if (burst_max
> MAX_POLL_BURST_MAX
)
368 burst_max
= MAX_POLL_BURST_MAX
;
370 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_sysctl_burstmax
);
371 msg
.nm_lmsg
.u
.ms_result
= burst_max
;
373 port
= cpu_portfn(pctx
->poll_cpuid
);
374 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
379 sysctl_eachburst(SYSCTL_HANDLER_ARGS
)
381 struct pollctx
*pctx
= arg1
;
387 each_burst
= pctx
->poll_each_burst
;
388 error
= sysctl_handle_int(oidp
, &each_burst
, 0, req
);
389 if (error
|| req
->newptr
== NULL
)
392 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_sysctl_eachburst
);
393 msg
.nm_lmsg
.u
.ms_result
= each_burst
;
395 port
= cpu_portfn(pctx
->poll_cpuid
);
396 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
401 * Hook from polling systimer. Tries to schedule a netisr, but keeps
402 * track of lost ticks due to the previous handler taking too long.
403 * Normally, this should not happen, because polling handler should
404 * run for a short time. However, in some cases (e.g. when there are
405 * changes in link status etc.) the drivers take a very long time
406 * (even in the order of milliseconds) to reset and reconfigure the
407 * device, causing apparent lost polls.
409 * The first part of the code is just for debugging purposes, and tries
410 * to count how often hardclock ticks are shorter than they should,
411 * meaning either stray interrupts or delayed events.
413 * WARNING! called from fastint or IPI, the MP lock might not be held.
416 pollclock(systimer_t info
, struct intrframe
*frame __unused
)
418 struct pollctx
*pctx
= info
->data
;
422 if (pctx
->poll_handlers
== 0)
426 delta
= (t
.tv_usec
- pctx
->prev_t
.tv_usec
) +
427 (t
.tv_sec
- pctx
->prev_t
.tv_sec
)*1000000;
428 if (delta
* pctx
->pollhz
< 500000)
433 if (pctx
->pending_polls
> 100) {
435 * Too much, assume it has stalled (not always true
436 * see comment above).
439 pctx
->pending_polls
= 0;
443 if (pctx
->phase
<= 2) {
444 if (pctx
->phase
!= 0)
450 if (pctx
->pending_polls
++ > 0)
455 * netisr_pollmore is called after other netisr's, possibly scheduling
456 * another NETISR_POLL call, or adapting the burst size for the next cycle.
458 * It is very bad to fetch large bursts of packets from a single card at once,
459 * because the burst could take a long time to be completely processed, or
460 * could saturate the intermediate queue (ipintrq or similar) leading to
461 * losses or unfairness. To reduce the problem, and also to account better for
462 * time spent in network-related processing, we split the burst in smaller
463 * chunks of fixed size, giving control to the other netisr's between chunks.
464 * This helps in improving the fairness, reducing livelock (because we
465 * emulate more closely the "process to completion" that we have with
466 * fastforwarding) and accounting for the work performed in low level
467 * handling and forwarding.
472 netisr_pollmore(struct netmsg
*msg
)
474 struct pollctx
*pctx
;
476 int kern_load
, cpuid
;
477 uint32_t pending_polls
;
479 cpuid
= mycpu
->gd_cpuid
;
480 KKASSERT(cpuid
< POLLCTX_MAX
);
482 pctx
= poll_context
[cpuid
];
483 KKASSERT(pctx
!= NULL
);
484 KKASSERT(pctx
->poll_cpuid
== cpuid
);
485 KKASSERT(pctx
== msg
->nm_lmsg
.u
.ms_resultp
);
487 lwkt_replymsg(&msg
->nm_lmsg
, 0);
489 if (pctx
->poll_handlers
== 0)
492 KASSERT(pctx
->polling_enabled
,
493 ("# of registered poll handlers are not zero, "
494 "but polling is not enabled\n"));
497 if (pctx
->residual_burst
> 0) {
499 /* will run immediately on return, followed by netisrs */
502 /* here we can account time spent in netisr's in this tick */
504 kern_load
= (t
.tv_usec
- pctx
->poll_start_t
.tv_usec
) +
505 (t
.tv_sec
- pctx
->poll_start_t
.tv_sec
)*1000000; /* us */
506 kern_load
= (kern_load
* pctx
->pollhz
) / 10000; /* 0..100 */
507 if (kern_load
> (100 - pctx
->user_frac
)) { /* try decrease ticks */
508 if (pctx
->poll_burst
> 1)
511 if (pctx
->poll_burst
< pctx
->poll_burst_max
)
516 pctx
->pending_polls
--;
517 pending_polls
= pctx
->pending_polls
;
520 if (pending_polls
== 0) { /* we are done */
524 * Last cycle was long and caused us to miss one or more
525 * hardclock ticks. Restart processing again, but slightly
526 * reduce the burst size to prevent that this happens again.
528 pctx
->poll_burst
-= (pctx
->poll_burst
/ 8);
529 if (pctx
->poll_burst
< 1)
530 pctx
->poll_burst
= 1;
537 * netisr_poll is scheduled by schedpoll when appropriate, typically once
538 * per polling systimer tick.
540 * Note that the message is replied immediately in order to allow a new
541 * ISR to be scheduled in the handler.
543 * XXX each registration should indicate whether it needs a critical
544 * section to operate.
548 netisr_poll(struct netmsg
*msg
)
550 struct pollctx
*pctx
;
551 int i
, cycles
, cpuid
;
552 enum poll_cmd arg
= POLL_ONLY
;
554 cpuid
= mycpu
->gd_cpuid
;
555 KKASSERT(cpuid
< POLLCTX_MAX
);
557 pctx
= poll_context
[cpuid
];
558 KKASSERT(pctx
!= NULL
);
559 KKASSERT(pctx
->poll_cpuid
== cpuid
);
560 KKASSERT(pctx
== msg
->nm_lmsg
.u
.ms_resultp
);
563 lwkt_replymsg(&msg
->nm_lmsg
, 0);
566 if (pctx
->poll_handlers
== 0)
569 KASSERT(pctx
->polling_enabled
,
570 ("# of registered poll handlers are not zero, "
571 "but polling is not enabled\n"));
574 if (pctx
->residual_burst
== 0) { /* first call in this tick */
575 microuptime(&pctx
->poll_start_t
);
577 if (pctx
->reg_frac_count
-- == 0) {
578 arg
= POLL_AND_CHECK_STATUS
;
579 pctx
->reg_frac_count
= pctx
->reg_frac
- 1;
582 pctx
->residual_burst
= pctx
->poll_burst
;
584 cycles
= (pctx
->residual_burst
< pctx
->poll_each_burst
) ?
585 pctx
->residual_burst
: pctx
->poll_each_burst
;
586 pctx
->residual_burst
-= cycles
;
588 for (i
= 0 ; i
< pctx
->poll_handlers
; i
++) {
589 struct ifnet
*ifp
= pctx
->pr
[i
].ifp
;
591 if (!lwkt_serialize_try(ifp
->if_serializer
))
594 if ((ifp
->if_flags
& (IFF_UP
|IFF_RUNNING
|IFF_POLLING
))
595 == (IFF_UP
|IFF_RUNNING
|IFF_POLLING
))
596 ifp
->if_poll(ifp
, arg
, cycles
);
598 lwkt_serialize_exit(ifp
->if_serializer
);
606 poll_register(struct netmsg
*msg
)
608 struct ifnet
*ifp
= msg
->nm_lmsg
.u
.ms_resultp
;
609 struct pollctx
*pctx
;
612 cpuid
= mycpu
->gd_cpuid
;
613 KKASSERT(cpuid
< POLLCTX_MAX
);
615 pctx
= poll_context
[cpuid
];
616 KKASSERT(pctx
!= NULL
);
617 KKASSERT(pctx
->poll_cpuid
== cpuid
);
619 if (pctx
->polling_enabled
== 0) {
620 /* Polling disabled, cannot register */
626 * Check if there is room.
628 if (pctx
->poll_handlers
>= POLL_LIST_LEN
) {
630 * List full, cannot register more entries.
631 * This should never happen; if it does, it is probably a
632 * broken driver trying to register multiple times. Checking
633 * this at runtime is expensive, and won't solve the problem
634 * anyways, so just report a few times and then give up.
636 static int verbose
= 10; /* XXX */
638 kprintf("poll handlers list full, "
639 "maybe a broken driver ?\n");
644 pctx
->pr
[pctx
->poll_handlers
].ifp
= ifp
;
645 pctx
->poll_handlers
++;
648 if (pctx
->poll_handlers
== 1) {
649 KKASSERT(pctx
->polling_enabled
);
650 systimer_adjust_periodic(&pctx
->pollclock
,
655 lwkt_replymsg(&msg
->nm_lmsg
, rc
);
659 * Try to register routine for polling. Returns 1 if successful
660 * (and polling should be enabled), 0 otherwise.
662 * Called from mainline code only, not called from an interrupt.
665 ether_poll_register(struct ifnet
*ifp
)
669 KKASSERT(poll_defcpu
< POLLCTX_MAX
);
671 return ether_pollcpu_register(ifp
, poll_defcpu
);
675 ether_pollcpu_register(struct ifnet
*ifp
, int cpuid
)
681 if (ifp
->if_poll
== NULL
) {
682 /* Device does not support polling */
686 if (cpuid
< 0 || cpuid
>= POLLCTX_MAX
)
689 if (((1 << cpuid
) & poll_cpumask
) == 0) {
690 /* Polling is not supported on 'cpuid' */
693 KKASSERT(poll_context
[cpuid
] != NULL
);
696 * Attempt to register. Interlock with IFF_POLLING.
698 crit_enter(); /* XXX MP - not mp safe */
700 lwkt_serialize_enter(ifp
->if_serializer
);
701 if (ifp
->if_flags
& IFF_POLLING
) {
702 /* Already polling */
703 KKASSERT(ifp
->if_poll_cpuid
>= 0);
704 lwkt_serialize_exit(ifp
->if_serializer
);
708 KKASSERT(ifp
->if_poll_cpuid
< 0);
709 ifp
->if_flags
|= IFF_POLLING
;
710 ifp
->if_poll_cpuid
= cpuid
;
711 if (ifp
->if_flags
& IFF_RUNNING
)
712 ifp
->if_poll(ifp
, POLL_REGISTER
, 0);
713 lwkt_serialize_exit(ifp
->if_serializer
);
715 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_register
);
716 msg
.nm_lmsg
.u
.ms_resultp
= ifp
;
718 port
= cpu_portfn(cpuid
);
719 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
721 if (msg
.nm_lmsg
.ms_error
) {
722 lwkt_serialize_enter(ifp
->if_serializer
);
723 ifp
->if_flags
&= ~IFF_POLLING
;
724 ifp
->if_poll_cpuid
= -1;
725 if (ifp
->if_flags
& IFF_RUNNING
)
726 ifp
->if_poll(ifp
, POLL_DEREGISTER
, 0);
727 lwkt_serialize_exit(ifp
->if_serializer
);
738 poll_deregister(struct netmsg
*msg
)
740 struct ifnet
*ifp
= msg
->nm_lmsg
.u
.ms_resultp
;
741 struct pollctx
*pctx
;
744 cpuid
= mycpu
->gd_cpuid
;
745 KKASSERT(cpuid
< POLLCTX_MAX
);
747 pctx
= poll_context
[cpuid
];
748 KKASSERT(pctx
!= NULL
);
749 KKASSERT(pctx
->poll_cpuid
== cpuid
);
751 for (i
= 0 ; i
< pctx
->poll_handlers
; i
++) {
752 if (pctx
->pr
[i
].ifp
== ifp
) /* Found it */
755 if (i
== pctx
->poll_handlers
) {
756 kprintf("ether_poll_deregister: ifp not found!!!\n");
759 pctx
->poll_handlers
--;
760 if (i
< pctx
->poll_handlers
) {
761 /* Last entry replaces this one. */
762 pctx
->pr
[i
].ifp
= pctx
->pr
[pctx
->poll_handlers
].ifp
;
765 if (pctx
->poll_handlers
== 0) {
766 systimer_adjust_periodic(&pctx
->pollclock
, 1);
767 poll_reset_state(pctx
);
771 lwkt_replymsg(&msg
->nm_lmsg
, rc
);
775 * Remove interface from the polling list. Occurs when polling is turned
776 * off. Called from mainline code only, not called from an interrupt.
779 ether_poll_deregister(struct ifnet
*ifp
)
785 KKASSERT(ifp
!= NULL
);
787 if (ifp
->if_poll
== NULL
)
792 lwkt_serialize_enter(ifp
->if_serializer
);
793 if ((ifp
->if_flags
& IFF_POLLING
) == 0) {
794 KKASSERT(ifp
->if_poll_cpuid
< 0);
795 lwkt_serialize_exit(ifp
->if_serializer
);
800 cpuid
= ifp
->if_poll_cpuid
;
801 KKASSERT(cpuid
>= 0);
802 KKASSERT(poll_context
[cpuid
] != NULL
);
804 ifp
->if_flags
&= ~IFF_POLLING
;
805 ifp
->if_poll_cpuid
= -1;
806 lwkt_serialize_exit(ifp
->if_serializer
);
808 netmsg_init(&msg
, &curthread
->td_msgport
, 0, poll_deregister
);
809 msg
.nm_lmsg
.u
.ms_resultp
= ifp
;
811 port
= cpu_portfn(cpuid
);
812 lwkt_domsg(port
, &msg
.nm_lmsg
, 0);
814 if (!msg
.nm_lmsg
.ms_error
) {
815 lwkt_serialize_enter(ifp
->if_serializer
);
816 if (ifp
->if_flags
& IFF_RUNNING
)
817 ifp
->if_poll(ifp
, POLL_DEREGISTER
, 1);
818 lwkt_serialize_exit(ifp
->if_serializer
);
829 poll_add_sysctl(struct sysctl_ctx_list
*ctx
, struct sysctl_oid_list
*parent
,
830 struct pollctx
*pctx
)
832 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "enable",
833 CTLTYPE_INT
| CTLFLAG_RW
, pctx
, 0, sysctl_polling
,
834 "I", "Polling enabled");
836 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "pollhz",
837 CTLTYPE_INT
| CTLFLAG_RW
, pctx
, 0, sysctl_pollhz
,
838 "I", "Device polling frequency");
840 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "reg_frac",
841 CTLTYPE_UINT
| CTLFLAG_RW
, pctx
, 0, sysctl_regfrac
,
842 "IU", "Every this many cycles poll register");
844 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "burst_max",
845 CTLTYPE_UINT
| CTLFLAG_RW
, pctx
, 0, sysctl_burstmax
,
846 "IU", "Max Polling burst size");
848 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "each_burst",
849 CTLTYPE_UINT
| CTLFLAG_RW
, pctx
, 0, sysctl_eachburst
,
850 "IU", "Max size of each burst");
852 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "phase", CTLFLAG_RD
,
853 &pctx
->phase
, 0, "Polling phase");
855 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "suspect", CTLFLAG_RW
,
856 &pctx
->suspect
, 0, "suspect event");
858 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "stalled", CTLFLAG_RW
,
859 &pctx
->stalled
, 0, "potential stalls");
861 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "burst", CTLFLAG_RD
,
862 &pctx
->poll_burst
, 0, "Current polling burst size");
864 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "user_frac", CTLFLAG_RW
,
866 "Desired user fraction of cpu time");
868 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "short_ticks", CTLFLAG_RW
,
869 &pctx
->short_ticks
, 0,
870 "Hardclock ticks shorter than they should be");
872 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "lost_polls", CTLFLAG_RW
,
873 &pctx
->lost_polls
, 0,
874 "How many times we would have lost a poll tick");
876 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "pending_polls", CTLFLAG_RD
,
877 &pctx
->pending_polls
, 0, "Do we need to poll again");
879 SYSCTL_ADD_INT(ctx
, parent
, OID_AUTO
, "residual_burst", CTLFLAG_RD
,
880 &pctx
->residual_burst
, 0,
881 "# of residual cycles in burst");
883 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "handlers", CTLFLAG_RD
,
884 &pctx
->poll_handlers
, 0,
885 "Number of registered poll handlers");
889 schedpoll_oncpu(struct pollctx
*pctx
, struct netmsg
*msg
, netisr_fn_t handler
)
891 if (msg
->nm_lmsg
.ms_flags
& MSGF_DONE
) {
894 netmsg_init(msg
, &netisr_adone_rport
, 0, handler
);
896 msg
->nm_lmsg
.u
.ms_resultp
= pctx
;
898 port
= cpu_portfn(mycpu
->gd_cpuid
);
899 lwkt_sendmsg(port
, &msg
->nm_lmsg
);
904 poll_sysctl_pollhz(struct netmsg
*msg
)
906 struct pollctx
*pctx
;
909 cpuid
= mycpu
->gd_cpuid
;
910 KKASSERT(cpuid
< POLLCTX_MAX
);
912 pctx
= poll_context
[cpuid
];
913 KKASSERT(pctx
!= NULL
);
914 KKASSERT(pctx
->poll_cpuid
== cpuid
);
917 * If polling is disabled or there is no device registered,
918 * don't adjust polling systimer frequency.
919 * Polling systimer frequency will be adjusted once polling
920 * is enabled and there are registered devices.
922 pctx
->pollhz
= msg
->nm_lmsg
.u
.ms_result
;
923 if (pctx
->polling_enabled
&& pctx
->poll_handlers
)
924 systimer_adjust_periodic(&pctx
->pollclock
, pctx
->pollhz
);
927 * Make sure that reg_frac and reg_frac_count are within valid range.
929 if (pctx
->reg_frac
> pctx
->pollhz
) {
930 pctx
->reg_frac
= pctx
->pollhz
;
931 if (pctx
->reg_frac_count
> pctx
->reg_frac
)
932 pctx
->reg_frac_count
= pctx
->reg_frac
- 1;
935 lwkt_replymsg(&msg
->nm_lmsg
, 0);
939 poll_sysctl_polling(struct netmsg
*msg
)
941 struct pollctx
*pctx
;
944 cpuid
= mycpu
->gd_cpuid
;
945 KKASSERT(cpuid
< POLLCTX_MAX
);
947 pctx
= poll_context
[cpuid
];
948 KKASSERT(pctx
!= NULL
);
949 KKASSERT(pctx
->poll_cpuid
== cpuid
);
952 * If polling is disabled or there is no device registered,
953 * cut the polling systimer frequency to 1hz.
955 pctx
->polling_enabled
= msg
->nm_lmsg
.u
.ms_result
;
956 if (pctx
->polling_enabled
&& pctx
->poll_handlers
) {
957 systimer_adjust_periodic(&pctx
->pollclock
, pctx
->pollhz
);
959 systimer_adjust_periodic(&pctx
->pollclock
, 1);
960 poll_reset_state(pctx
);
963 if (!pctx
->polling_enabled
&& pctx
->poll_handlers
!= 0) {
966 for (i
= 0 ; i
< pctx
->poll_handlers
; i
++) {
967 struct ifnet
*ifp
= pctx
->pr
[i
].ifp
;
969 lwkt_serialize_enter(ifp
->if_serializer
);
971 if ((ifp
->if_flags
& IFF_POLLING
) == 0) {
972 KKASSERT(ifp
->if_poll_cpuid
< 0);
973 lwkt_serialize_exit(ifp
->if_serializer
);
976 ifp
->if_flags
&= ~IFF_POLLING
;
977 ifp
->if_poll_cpuid
= -1;
980 * Only call the interface deregistration
981 * function if the interface is still
984 if (ifp
->if_flags
& IFF_RUNNING
)
985 ifp
->if_poll(ifp
, POLL_DEREGISTER
, 1);
987 lwkt_serialize_exit(ifp
->if_serializer
);
989 pctx
->poll_handlers
= 0;
992 lwkt_replymsg(&msg
->nm_lmsg
, 0);
996 poll_sysctl_regfrac(struct netmsg
*msg
)
998 struct pollctx
*pctx
;
1002 cpuid
= mycpu
->gd_cpuid
;
1003 KKASSERT(cpuid
< POLLCTX_MAX
);
1005 pctx
= poll_context
[cpuid
];
1006 KKASSERT(pctx
!= NULL
);
1007 KKASSERT(pctx
->poll_cpuid
== cpuid
);
1009 reg_frac
= msg
->nm_lmsg
.u
.ms_result
;
1010 if (reg_frac
> pctx
->pollhz
)
1011 reg_frac
= pctx
->pollhz
;
1012 else if (reg_frac
< 1)
1015 pctx
->reg_frac
= reg_frac
;
1016 if (pctx
->reg_frac_count
> pctx
->reg_frac
)
1017 pctx
->reg_frac_count
= pctx
->reg_frac
- 1;
1019 lwkt_replymsg(&msg
->nm_lmsg
, 0);
1023 poll_sysctl_burstmax(struct netmsg
*msg
)
1025 struct pollctx
*pctx
;
1028 cpuid
= mycpu
->gd_cpuid
;
1029 KKASSERT(cpuid
< POLLCTX_MAX
);
1031 pctx
= poll_context
[cpuid
];
1032 KKASSERT(pctx
!= NULL
);
1033 KKASSERT(pctx
->poll_cpuid
== cpuid
);
1035 pctx
->poll_burst_max
= msg
->nm_lmsg
.u
.ms_result
;
1036 if (pctx
->poll_each_burst
> pctx
->poll_burst_max
)
1037 pctx
->poll_each_burst
= pctx
->poll_burst_max
;
1038 if (pctx
->poll_burst
> pctx
->poll_burst_max
)
1039 pctx
->poll_burst
= pctx
->poll_burst_max
;
1040 if (pctx
->residual_burst
> pctx
->poll_burst_max
)
1041 pctx
->residual_burst
= pctx
->poll_burst_max
;
1043 lwkt_replymsg(&msg
->nm_lmsg
, 0);
1047 poll_sysctl_eachburst(struct netmsg
*msg
)
1049 struct pollctx
*pctx
;
1050 uint32_t each_burst
;
1053 cpuid
= mycpu
->gd_cpuid
;
1054 KKASSERT(cpuid
< POLLCTX_MAX
);
1056 pctx
= poll_context
[cpuid
];
1057 KKASSERT(pctx
!= NULL
);
1058 KKASSERT(pctx
->poll_cpuid
== cpuid
);
1060 each_burst
= msg
->nm_lmsg
.u
.ms_result
;
1061 if (each_burst
> pctx
->poll_burst_max
)
1062 each_burst
= pctx
->poll_burst_max
;
1063 else if (each_burst
< 1)
1065 pctx
->poll_each_burst
= each_burst
;
1067 lwkt_replymsg(&msg
->nm_lmsg
, 0);