2 * Copyright (c) 2001-2002 Luigi Rizzo
4 * Supported by: the Xorp Project (www.xorp.org)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28 * $DragonFly: src/sys/kern/kern_poll.c,v 1.48 2008/09/24 12:07:19 sephe Exp $
31 #include "opt_polling.h"
33 #include <sys/param.h>
34 #include <sys/kernel.h>
36 #include <sys/socket.h> /* needed by net/if.h */
37 #include <sys/sysctl.h>
39 #include <sys/thread2.h>
40 #include <sys/msgport2.h>
42 #include <net/if.h> /* for IFF_* flags */
43 #include <net/netmsg2.h>
46 * Polling support for [network] device drivers.
48 * Drivers which support this feature try to register with the
51 * If registration is successful, the driver must disable interrupts,
52 * and further I/O is performed through the handler, which is invoked
53 * (at least once per clock tick) with 3 arguments: the "arg" passed at
54 * register time (a struct ifnet pointer), a command, and a "count" limit.
56 * The command can be one of the following:
57 * POLL_ONLY: quick move of "count" packets from input/output queues.
58 * POLL_AND_CHECK_STATUS: as above, plus check status registers or do
59 * other more expensive operations. This command is issued periodically
60 * but less frequently than POLL_ONLY.
61 * POLL_DEREGISTER: deregister and return to interrupt mode.
62 * POLL_REGISTER: register and disable interrupts
64 * The first two commands are only issued if the interface is marked as
65 * 'IFF_UP, IFF_RUNNING and IFF_POLLING', the last two only if IFF_RUNNING
68 * The count limit specifies how much work the handler can do during the
69 * call -- typically this is the number of packets to be received, or
70 * transmitted, etc. (drivers are free to interpret this number, as long
71 * as the max time spent in the function grows roughly linearly with the
74 * Deregistration can be requested by the driver itself (typically in the
75 * *_stop() routine), or by the polling code, by invoking the handler.
77 * Polling can be enabled or disabled on particular CPU_X with the sysctl
78 * variable kern.polling.X.enable (default is 1, enabled)
80 * A second variable controls the sharing of CPU between polling/kernel
81 * network processing, and other activities (typically userlevel tasks):
82 * kern.polling.X.user_frac (between 0 and 100, default 50) sets the share
83 * of CPU allocated to user tasks. CPU is allocated proportionally to the
84 * shares, by dynamically adjusting the "count" (poll_burst).
86 * Other parameters can should be left to their default values.
87 * The following constraints hold
89 * 1 <= poll_burst <= poll_burst_max
90 * 1 <= poll_each_burst <= poll_burst_max
91 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
94 #define MIN_POLL_BURST_MAX 10
95 #define MAX_POLL_BURST_MAX 1000
96 #define POLL_BURST_MAX 150 /* good for 100Mbit net and HZ=1000 */
97 #define POLL_EACH_BURST 5
99 #ifndef DEVICE_POLLING_FREQ_MAX
100 #define DEVICE_POLLING_FREQ_MAX 30000
102 #define DEVICE_POLLING_FREQ_DEFAULT 2000
104 #define POLL_LIST_LEN 128
109 #define POLLCTX_MAX 32
112 struct sysctl_ctx_list poll_sysctl_ctx
;
113 struct sysctl_oid
*poll_sysctl_tree
;
115 uint32_t poll_burst
; /* state */
116 uint32_t poll_each_burst
; /* tunable */
117 uint32_t poll_burst_max
; /* tunable */
118 uint32_t user_frac
; /* tunable */
119 int reg_frac_count
; /* state */
120 uint32_t reg_frac
; /* tunable */
121 uint32_t short_ticks
; /* statistics */
122 uint32_t lost_polls
; /* statistics */
123 uint32_t pending_polls
; /* state */
124 int residual_burst
; /* state */
125 uint32_t phase
; /* state */
126 uint32_t suspect
; /* statistics */
127 uint32_t stalled
; /* statistics */
128 struct timeval poll_start_t
; /* state */
129 struct timeval prev_t
; /* state */
131 uint32_t poll_handlers
; /* next free entry in pr[]. */
132 struct pollrec pr
[POLL_LIST_LEN
];
135 struct systimer pollclock
;
136 int polling_enabled
; /* tunable */
137 int pollhz
; /* tunable */
139 struct netmsg_base poll_netmsg
;
140 struct netmsg_base poll_more_netmsg
;
143 static struct pollctx
*poll_context
[POLLCTX_MAX
];
145 SYSCTL_NODE(_kern
, OID_AUTO
, polling
, CTLFLAG_RW
, 0,
146 "Device polling parameters");
148 static int poll_defcpu
= -1;
149 SYSCTL_INT(_kern_polling
, OID_AUTO
, defcpu
, CTLFLAG_RD
,
150 &poll_defcpu
, 0, "default CPU to run device polling");
152 static cpumask_t poll_cpumask0
= (cpumask_t
)-1;
153 TUNABLE_ULONG("kern.polling.cpumask", (u_long
*)&poll_cpumask0
);
155 static cpumask_t poll_cpumask
;
156 SYSCTL_LONG(_kern_polling
, OID_AUTO
, cpumask
, CTLFLAG_RD
,
157 &poll_cpumask
, 0, "CPUs that can run device polling");
159 static int polling_enabled
= 1; /* global polling enable */
160 TUNABLE_INT("kern.polling.enable", &polling_enabled
);
162 static int pollhz
= DEVICE_POLLING_FREQ_DEFAULT
;
163 TUNABLE_INT("kern.polling.pollhz", &pollhz
);
165 static int poll_burst_max
= POLL_BURST_MAX
;
166 TUNABLE_INT("kern.polling.burst_max", &poll_burst_max
);
168 static int poll_each_burst
= POLL_EACH_BURST
;
169 TUNABLE_INT("kern.polling.each_burst", &poll_each_burst
);
171 /* Netisr handlers */
172 static void netisr_poll(netmsg_t
);
173 static void netisr_pollmore(netmsg_t
);
174 static void poll_register(netmsg_t
);
175 static void poll_deregister(netmsg_t
);
176 static void poll_sysctl_pollhz(netmsg_t
);
177 static void poll_sysctl_polling(netmsg_t
);
178 static void poll_sysctl_regfrac(netmsg_t
);
179 static void poll_sysctl_burstmax(netmsg_t
);
180 static void poll_sysctl_eachburst(netmsg_t
);
182 /* Systimer handler */
183 static void pollclock(systimer_t
, int, struct intrframe
*);
185 /* Sysctl handlers */
186 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS
);
187 static int sysctl_polling(SYSCTL_HANDLER_ARGS
);
188 static int sysctl_regfrac(SYSCTL_HANDLER_ARGS
);
189 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS
);
190 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS
);
191 static void poll_add_sysctl(struct sysctl_ctx_list
*,
192 struct sysctl_oid_list
*, struct pollctx
*);
194 void init_device_poll_pcpu(int); /* per-cpu init routine */
196 #define POLL_KTR_STRING "ifp=%p"
197 #define POLL_KTR_ARG_SIZE (sizeof(void *))
200 #define KTR_POLLING KTR_ALL
202 KTR_INFO_MASTER(poll
);
203 KTR_INFO(KTR_POLLING
, poll
, beg
, 0, POLL_KTR_STRING
, POLL_KTR_ARG_SIZE
);
204 KTR_INFO(KTR_POLLING
, poll
, end
, 1, POLL_KTR_STRING
, POLL_KTR_ARG_SIZE
);
206 #define logpoll(name, arg) KTR_LOG(poll_ ## name, arg)
209 poll_reset_state(struct pollctx
*pctx
)
212 pctx
->poll_burst
= 5;
213 pctx
->reg_frac_count
= 0;
214 pctx
->pending_polls
= 0;
215 pctx
->residual_burst
= 0;
217 bzero(&pctx
->poll_start_t
, sizeof(pctx
->poll_start_t
));
218 bzero(&pctx
->prev_t
, sizeof(pctx
->prev_t
));
223 * Initialize per-cpu polling(4) context. Called from kern_clock.c:
226 init_device_poll_pcpu(int cpuid
)
228 struct pollctx
*pctx
;
231 if (cpuid
>= POLLCTX_MAX
)
234 if ((CPUMASK(cpuid
) & poll_cpumask0
) == 0)
237 if (poll_burst_max
< MIN_POLL_BURST_MAX
)
238 poll_burst_max
= MIN_POLL_BURST_MAX
;
239 else if (poll_burst_max
> MAX_POLL_BURST_MAX
)
240 poll_burst_max
= MAX_POLL_BURST_MAX
;
242 if (poll_each_burst
> poll_burst_max
)
243 poll_each_burst
= poll_burst_max
;
245 poll_cpumask
|= CPUMASK(cpuid
);
247 pctx
= kmalloc(sizeof(*pctx
), M_DEVBUF
, M_WAITOK
| M_ZERO
);
249 pctx
->poll_each_burst
= poll_each_burst
;
250 pctx
->poll_burst_max
= poll_burst_max
;
251 pctx
->user_frac
= 50;
253 pctx
->polling_enabled
= polling_enabled
;
254 pctx
->pollhz
= pollhz
;
255 pctx
->poll_cpuid
= cpuid
;
256 poll_reset_state(pctx
);
258 netmsg_init(&pctx
->poll_netmsg
, NULL
, &netisr_adone_rport
,
261 pctx
->poll_netmsg
.lmsg
.u
.ms_resultp
= pctx
;
264 netmsg_init(&pctx
->poll_more_netmsg
, NULL
, &netisr_adone_rport
,
267 pctx
->poll_more_netmsg
.lmsg
.u
.ms_resultp
= pctx
;
270 KASSERT(cpuid
< POLLCTX_MAX
, ("cpu id must < %d", cpuid
));
271 poll_context
[cpuid
] = pctx
;
273 if (poll_defcpu
< 0) {
277 * Initialize global sysctl nodes, for compat
279 poll_add_sysctl(NULL
, SYSCTL_STATIC_CHILDREN(_kern_polling
),
284 * Initialize per-cpu sysctl nodes
286 ksnprintf(cpuid_str
, sizeof(cpuid_str
), "%d", pctx
->poll_cpuid
);
288 sysctl_ctx_init(&pctx
->poll_sysctl_ctx
);
289 pctx
->poll_sysctl_tree
= SYSCTL_ADD_NODE(&pctx
->poll_sysctl_ctx
,
290 SYSCTL_STATIC_CHILDREN(_kern_polling
),
291 OID_AUTO
, cpuid_str
, CTLFLAG_RD
, 0, "");
292 poll_add_sysctl(&pctx
->poll_sysctl_ctx
,
293 SYSCTL_CHILDREN(pctx
->poll_sysctl_tree
), pctx
);
296 * Initialize systimer
298 systimer_init_periodic_nq(&pctx
->pollclock
, pollclock
, pctx
, 1);
302 schedpoll_oncpu(netmsg_t msg
)
304 if (msg
->lmsg
.ms_flags
& MSGF_DONE
)
305 lwkt_sendmsg(cpu_portfn(mycpuid
), &msg
->lmsg
);
309 schedpoll(struct pollctx
*pctx
)
312 schedpoll_oncpu((netmsg_t
)&pctx
->poll_netmsg
);
317 schedpollmore(struct pollctx
*pctx
)
319 schedpoll_oncpu((netmsg_t
)&pctx
->poll_more_netmsg
);
323 * Set the polling frequency
326 sysctl_pollhz(SYSCTL_HANDLER_ARGS
)
328 struct pollctx
*pctx
= arg1
;
329 struct netmsg_base msg
;
334 error
= sysctl_handle_int(oidp
, &phz
, 0, req
);
335 if (error
|| req
->newptr
== NULL
)
339 else if (phz
> DEVICE_POLLING_FREQ_MAX
)
340 phz
= DEVICE_POLLING_FREQ_MAX
;
342 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
343 0, poll_sysctl_pollhz
);
344 msg
.lmsg
.u
.ms_result
= phz
;
346 port
= cpu_portfn(pctx
->poll_cpuid
);
347 lwkt_domsg(port
, &msg
.lmsg
, 0);
355 sysctl_polling(SYSCTL_HANDLER_ARGS
)
357 struct pollctx
*pctx
= arg1
;
358 struct netmsg_base msg
;
362 enabled
= pctx
->polling_enabled
;
363 error
= sysctl_handle_int(oidp
, &enabled
, 0, req
);
364 if (error
|| req
->newptr
== NULL
)
367 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
368 0, poll_sysctl_polling
);
369 msg
.lmsg
.u
.ms_result
= enabled
;
371 port
= cpu_portfn(pctx
->poll_cpuid
);
372 lwkt_domsg(port
, &msg
.lmsg
, 0);
377 sysctl_regfrac(SYSCTL_HANDLER_ARGS
)
379 struct pollctx
*pctx
= arg1
;
380 struct netmsg_base msg
;
385 reg_frac
= pctx
->reg_frac
;
386 error
= sysctl_handle_int(oidp
, ®_frac
, 0, req
);
387 if (error
|| req
->newptr
== NULL
)
390 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
391 0, poll_sysctl_regfrac
);
392 msg
.lmsg
.u
.ms_result
= reg_frac
;
394 port
= cpu_portfn(pctx
->poll_cpuid
);
395 lwkt_domsg(port
, &msg
.lmsg
, 0);
400 sysctl_burstmax(SYSCTL_HANDLER_ARGS
)
402 struct pollctx
*pctx
= arg1
;
403 struct netmsg_base msg
;
408 burst_max
= pctx
->poll_burst_max
;
409 error
= sysctl_handle_int(oidp
, &burst_max
, 0, req
);
410 if (error
|| req
->newptr
== NULL
)
412 if (burst_max
< MIN_POLL_BURST_MAX
)
413 burst_max
= MIN_POLL_BURST_MAX
;
414 else if (burst_max
> MAX_POLL_BURST_MAX
)
415 burst_max
= MAX_POLL_BURST_MAX
;
417 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
418 0, poll_sysctl_burstmax
);
419 msg
.lmsg
.u
.ms_result
= burst_max
;
421 port
= cpu_portfn(pctx
->poll_cpuid
);
422 lwkt_domsg(port
, &msg
.lmsg
, 0);
427 sysctl_eachburst(SYSCTL_HANDLER_ARGS
)
429 struct pollctx
*pctx
= arg1
;
430 struct netmsg_base msg
;
435 each_burst
= pctx
->poll_each_burst
;
436 error
= sysctl_handle_int(oidp
, &each_burst
, 0, req
);
437 if (error
|| req
->newptr
== NULL
)
440 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
441 0, poll_sysctl_eachburst
);
442 msg
.lmsg
.u
.ms_result
= each_burst
;
444 port
= cpu_portfn(pctx
->poll_cpuid
);
445 lwkt_domsg(port
, &msg
.lmsg
, 0);
450 * Hook from polling systimer. Tries to schedule a netisr, but keeps
451 * track of lost ticks due to the previous handler taking too long.
452 * Normally, this should not happen, because polling handler should
453 * run for a short time. However, in some cases (e.g. when there are
454 * changes in link status etc.) the drivers take a very long time
455 * (even in the order of milliseconds) to reset and reconfigure the
456 * device, causing apparent lost polls.
458 * The first part of the code is just for debugging purposes, and tries
459 * to count how often hardclock ticks are shorter than they should,
460 * meaning either stray interrupts or delayed events.
462 * WARNING! called from fastint or IPI, the MP lock might not be held.
465 pollclock(systimer_t info
, int in_ipi __unused
,
466 struct intrframe
*frame __unused
)
468 struct pollctx
*pctx
= info
->data
;
472 if (pctx
->poll_handlers
== 0)
476 delta
= (t
.tv_usec
- pctx
->prev_t
.tv_usec
) +
477 (t
.tv_sec
- pctx
->prev_t
.tv_sec
)*1000000;
478 if (delta
* pctx
->pollhz
< 500000)
483 if (pctx
->pending_polls
> 100) {
485 * Too much, assume it has stalled (not always true
486 * see comment above).
489 pctx
->pending_polls
= 0;
493 if (pctx
->phase
<= 2) {
494 if (pctx
->phase
!= 0)
500 if (pctx
->pending_polls
++ > 0)
505 * netisr_pollmore is called after other netisr's, possibly scheduling
506 * another NETISR_POLL call, or adapting the burst size for the next cycle.
508 * It is very bad to fetch large bursts of packets from a single card at once,
509 * because the burst could take a long time to be completely processed leading
510 * to unfairness. To reduce the problem, and also to account better for time
511 * spent in network-related processing, we split the burst in smaller chunks
512 * of fixed size, giving control to the other netisr's between chunks. This
513 * helps in improving the fairness, reducing livelock (because we emulate more
514 * closely the "process to completion" that we have with fastforwarding) and
515 * accounting for the work performed in low level handling and forwarding.
520 netisr_pollmore(netmsg_t msg
)
522 struct pollctx
*pctx
;
524 int kern_load
, cpuid
;
525 uint32_t pending_polls
;
527 cpuid
= mycpu
->gd_cpuid
;
528 KKASSERT(cpuid
< POLLCTX_MAX
);
530 pctx
= poll_context
[cpuid
];
531 KKASSERT(pctx
!= NULL
);
532 KKASSERT(pctx
->poll_cpuid
== cpuid
);
533 KKASSERT(pctx
== msg
->lmsg
.u
.ms_resultp
);
535 lwkt_replymsg(&msg
->lmsg
, 0);
537 if (pctx
->poll_handlers
== 0)
540 KASSERT(pctx
->polling_enabled
,
541 ("# of registered poll handlers are not zero, "
542 "but polling is not enabled\n"));
545 if (pctx
->residual_burst
> 0) {
547 /* will run immediately on return, followed by netisrs */
550 /* here we can account time spent in netisr's in this tick */
552 kern_load
= (t
.tv_usec
- pctx
->poll_start_t
.tv_usec
) +
553 (t
.tv_sec
- pctx
->poll_start_t
.tv_sec
)*1000000; /* us */
554 kern_load
= (kern_load
* pctx
->pollhz
) / 10000; /* 0..100 */
555 if (kern_load
> (100 - pctx
->user_frac
)) { /* try decrease ticks */
556 if (pctx
->poll_burst
> 1)
559 if (pctx
->poll_burst
< pctx
->poll_burst_max
)
564 pctx
->pending_polls
--;
565 pending_polls
= pctx
->pending_polls
;
568 if (pending_polls
== 0) { /* we are done */
572 * Last cycle was long and caused us to miss one or more
573 * hardclock ticks. Restart processing again, but slightly
574 * reduce the burst size to prevent that this happens again.
576 pctx
->poll_burst
-= (pctx
->poll_burst
/ 8);
577 if (pctx
->poll_burst
< 1)
578 pctx
->poll_burst
= 1;
585 * netisr_poll is scheduled by schedpoll when appropriate, typically once
586 * per polling systimer tick.
588 * Note that the message is replied immediately in order to allow a new
589 * ISR to be scheduled in the handler.
591 * XXX each registration should indicate whether it needs a critical
592 * section to operate.
596 netisr_poll(netmsg_t msg
)
598 struct pollctx
*pctx
;
599 int i
, cycles
, cpuid
;
600 enum poll_cmd arg
= POLL_ONLY
;
602 cpuid
= mycpu
->gd_cpuid
;
603 KKASSERT(cpuid
< POLLCTX_MAX
);
605 pctx
= poll_context
[cpuid
];
606 KKASSERT(pctx
!= NULL
);
607 KKASSERT(pctx
->poll_cpuid
== cpuid
);
608 KKASSERT(pctx
== msg
->lmsg
.u
.ms_resultp
);
611 lwkt_replymsg(&msg
->lmsg
, 0);
614 if (pctx
->poll_handlers
== 0)
617 KASSERT(pctx
->polling_enabled
,
618 ("# of registered poll handlers are not zero, "
619 "but polling is not enabled\n"));
622 if (pctx
->residual_burst
== 0) { /* first call in this tick */
623 microuptime(&pctx
->poll_start_t
);
625 if (pctx
->reg_frac_count
-- == 0) {
626 arg
= POLL_AND_CHECK_STATUS
;
627 pctx
->reg_frac_count
= pctx
->reg_frac
- 1;
630 pctx
->residual_burst
= pctx
->poll_burst
;
632 cycles
= (pctx
->residual_burst
< pctx
->poll_each_burst
) ?
633 pctx
->residual_burst
: pctx
->poll_each_burst
;
634 pctx
->residual_burst
-= cycles
;
636 for (i
= 0 ; i
< pctx
->poll_handlers
; i
++) {
637 struct ifnet
*ifp
= pctx
->pr
[i
].ifp
;
639 if (!ifnet_tryserialize_main(ifp
))
642 if ((ifp
->if_flags
& (IFF_UP
|IFF_RUNNING
|IFF_POLLING
))
643 == (IFF_UP
|IFF_RUNNING
|IFF_POLLING
)) {
646 ifp
->if_poll(ifp
, arg
, cycles
);
651 ifnet_deserialize_main(ifp
);
659 poll_register(netmsg_t msg
)
661 struct ifnet
*ifp
= msg
->lmsg
.u
.ms_resultp
;
662 struct pollctx
*pctx
;
665 cpuid
= mycpu
->gd_cpuid
;
666 KKASSERT(cpuid
< POLLCTX_MAX
);
668 pctx
= poll_context
[cpuid
];
669 KKASSERT(pctx
!= NULL
);
670 KKASSERT(pctx
->poll_cpuid
== cpuid
);
672 if (pctx
->polling_enabled
== 0) {
673 /* Polling disabled, cannot register */
679 * Check if there is room.
681 if (pctx
->poll_handlers
>= POLL_LIST_LEN
) {
683 * List full, cannot register more entries.
684 * This should never happen; if it does, it is probably a
685 * broken driver trying to register multiple times. Checking
686 * this at runtime is expensive, and won't solve the problem
687 * anyways, so just report a few times and then give up.
689 static int verbose
= 10; /* XXX */
691 kprintf("poll handlers list full, "
692 "maybe a broken driver ?\n");
697 pctx
->pr
[pctx
->poll_handlers
].ifp
= ifp
;
698 pctx
->poll_handlers
++;
701 if (pctx
->poll_handlers
== 1) {
702 KKASSERT(pctx
->polling_enabled
);
703 systimer_adjust_periodic(&pctx
->pollclock
,
708 lwkt_replymsg(&msg
->lmsg
, rc
);
712 * Try to register routine for polling. Returns 1 if successful
713 * (and polling should be enabled), 0 otherwise.
715 * Called from mainline code only, not called from an interrupt.
718 ether_poll_register(struct ifnet
*ifp
)
722 KKASSERT(poll_defcpu
< POLLCTX_MAX
);
724 return ether_pollcpu_register(ifp
, poll_defcpu
);
728 ether_pollcpu_register(struct ifnet
*ifp
, int cpuid
)
730 struct netmsg_base msg
;
734 if (ifp
->if_poll
== NULL
) {
735 /* Device does not support polling */
739 if (cpuid
< 0 || cpuid
>= POLLCTX_MAX
)
742 if ((CPUMASK(cpuid
) & poll_cpumask
) == 0) {
743 /* Polling is not supported on 'cpuid' */
746 KKASSERT(poll_context
[cpuid
] != NULL
);
749 * Attempt to register. Interlock with IFF_POLLING.
751 crit_enter(); /* XXX MP - not mp safe */
753 ifnet_serialize_all(ifp
);
754 if (ifp
->if_flags
& IFF_POLLING
) {
755 /* Already polling */
756 KKASSERT(ifp
->if_poll_cpuid
>= 0);
757 ifnet_deserialize_all(ifp
);
761 KKASSERT(ifp
->if_poll_cpuid
< 0);
762 ifp
->if_flags
|= IFF_POLLING
;
763 ifp
->if_poll_cpuid
= cpuid
;
764 if (ifp
->if_flags
& IFF_RUNNING
)
765 ifp
->if_poll(ifp
, POLL_REGISTER
, 0);
766 ifnet_deserialize_all(ifp
);
768 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
770 msg
.lmsg
.u
.ms_resultp
= ifp
;
772 port
= cpu_portfn(cpuid
);
773 lwkt_domsg(port
, &msg
.lmsg
, 0);
775 if (msg
.lmsg
.ms_error
) {
776 ifnet_serialize_all(ifp
);
777 ifp
->if_flags
&= ~IFF_POLLING
;
778 ifp
->if_poll_cpuid
= -1;
779 if (ifp
->if_flags
& IFF_RUNNING
)
780 ifp
->if_poll(ifp
, POLL_DEREGISTER
, 0);
781 ifnet_deserialize_all(ifp
);
792 poll_deregister(netmsg_t msg
)
794 struct ifnet
*ifp
= msg
->lmsg
.u
.ms_resultp
;
795 struct pollctx
*pctx
;
798 cpuid
= mycpu
->gd_cpuid
;
799 KKASSERT(cpuid
< POLLCTX_MAX
);
801 pctx
= poll_context
[cpuid
];
802 KKASSERT(pctx
!= NULL
);
803 KKASSERT(pctx
->poll_cpuid
== cpuid
);
805 for (i
= 0 ; i
< pctx
->poll_handlers
; i
++) {
806 if (pctx
->pr
[i
].ifp
== ifp
) /* Found it */
809 if (i
== pctx
->poll_handlers
) {
810 kprintf("ether_poll_deregister: ifp not found!!!\n");
813 pctx
->poll_handlers
--;
814 if (i
< pctx
->poll_handlers
) {
815 /* Last entry replaces this one. */
816 pctx
->pr
[i
].ifp
= pctx
->pr
[pctx
->poll_handlers
].ifp
;
819 if (pctx
->poll_handlers
== 0) {
820 systimer_adjust_periodic(&pctx
->pollclock
, 1);
821 poll_reset_state(pctx
);
825 lwkt_replymsg(&msg
->lmsg
, rc
);
829 * Remove interface from the polling list. Occurs when polling is turned
830 * off. Called from mainline code only, not called from an interrupt.
833 ether_poll_deregister(struct ifnet
*ifp
)
835 struct netmsg_base msg
;
839 KKASSERT(ifp
!= NULL
);
841 if (ifp
->if_poll
== NULL
)
846 ifnet_serialize_all(ifp
);
847 if ((ifp
->if_flags
& IFF_POLLING
) == 0) {
848 KKASSERT(ifp
->if_poll_cpuid
< 0);
849 ifnet_deserialize_all(ifp
);
854 cpuid
= ifp
->if_poll_cpuid
;
855 KKASSERT(cpuid
>= 0);
856 KKASSERT(poll_context
[cpuid
] != NULL
);
858 ifp
->if_flags
&= ~IFF_POLLING
;
859 ifp
->if_poll_cpuid
= -1;
860 ifnet_deserialize_all(ifp
);
862 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
,
864 msg
.lmsg
.u
.ms_resultp
= ifp
;
866 port
= cpu_portfn(cpuid
);
867 lwkt_domsg(port
, &msg
.lmsg
, 0);
869 if (!msg
.lmsg
.ms_error
) {
870 ifnet_serialize_all(ifp
);
871 if (ifp
->if_flags
& IFF_RUNNING
)
872 ifp
->if_poll(ifp
, POLL_DEREGISTER
, 1);
873 ifnet_deserialize_all(ifp
);
884 poll_add_sysctl(struct sysctl_ctx_list
*ctx
, struct sysctl_oid_list
*parent
,
885 struct pollctx
*pctx
)
887 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "enable",
888 CTLTYPE_INT
| CTLFLAG_RW
, pctx
, 0, sysctl_polling
,
889 "I", "Polling enabled");
891 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "pollhz",
892 CTLTYPE_INT
| CTLFLAG_RW
, pctx
, 0, sysctl_pollhz
,
893 "I", "Device polling frequency");
895 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "reg_frac",
896 CTLTYPE_UINT
| CTLFLAG_RW
, pctx
, 0, sysctl_regfrac
,
897 "IU", "Every this many cycles poll register");
899 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "burst_max",
900 CTLTYPE_UINT
| CTLFLAG_RW
, pctx
, 0, sysctl_burstmax
,
901 "IU", "Max Polling burst size");
903 SYSCTL_ADD_PROC(ctx
, parent
, OID_AUTO
, "each_burst",
904 CTLTYPE_UINT
| CTLFLAG_RW
, pctx
, 0, sysctl_eachburst
,
905 "IU", "Max size of each burst");
907 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "phase", CTLFLAG_RD
,
908 &pctx
->phase
, 0, "Polling phase");
910 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "suspect", CTLFLAG_RW
,
911 &pctx
->suspect
, 0, "suspect event");
913 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "stalled", CTLFLAG_RW
,
914 &pctx
->stalled
, 0, "potential stalls");
916 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "burst", CTLFLAG_RD
,
917 &pctx
->poll_burst
, 0, "Current polling burst size");
919 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "user_frac", CTLFLAG_RW
,
921 "Desired user fraction of cpu time");
923 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "short_ticks", CTLFLAG_RW
,
924 &pctx
->short_ticks
, 0,
925 "Hardclock ticks shorter than they should be");
927 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "lost_polls", CTLFLAG_RW
,
928 &pctx
->lost_polls
, 0,
929 "How many times we would have lost a poll tick");
931 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "pending_polls", CTLFLAG_RD
,
932 &pctx
->pending_polls
, 0, "Do we need to poll again");
934 SYSCTL_ADD_INT(ctx
, parent
, OID_AUTO
, "residual_burst", CTLFLAG_RD
,
935 &pctx
->residual_burst
, 0,
936 "# of residual cycles in burst");
938 SYSCTL_ADD_UINT(ctx
, parent
, OID_AUTO
, "handlers", CTLFLAG_RD
,
939 &pctx
->poll_handlers
, 0,
940 "Number of registered poll handlers");
944 poll_sysctl_pollhz(netmsg_t msg
)
946 struct pollctx
*pctx
;
949 cpuid
= mycpu
->gd_cpuid
;
950 KKASSERT(cpuid
< POLLCTX_MAX
);
952 pctx
= poll_context
[cpuid
];
953 KKASSERT(pctx
!= NULL
);
954 KKASSERT(pctx
->poll_cpuid
== cpuid
);
957 * If polling is disabled or there is no device registered,
958 * don't adjust polling systimer frequency.
959 * Polling systimer frequency will be adjusted once polling
960 * is enabled and there are registered devices.
962 pctx
->pollhz
= msg
->lmsg
.u
.ms_result
;
963 if (pctx
->polling_enabled
&& pctx
->poll_handlers
)
964 systimer_adjust_periodic(&pctx
->pollclock
, pctx
->pollhz
);
967 * Make sure that reg_frac and reg_frac_count are within valid range.
969 if (pctx
->reg_frac
> pctx
->pollhz
) {
970 pctx
->reg_frac
= pctx
->pollhz
;
971 if (pctx
->reg_frac_count
> pctx
->reg_frac
)
972 pctx
->reg_frac_count
= pctx
->reg_frac
- 1;
975 lwkt_replymsg(&msg
->lmsg
, 0);
979 poll_sysctl_polling(netmsg_t msg
)
981 struct pollctx
*pctx
;
984 cpuid
= mycpu
->gd_cpuid
;
985 KKASSERT(cpuid
< POLLCTX_MAX
);
987 pctx
= poll_context
[cpuid
];
988 KKASSERT(pctx
!= NULL
);
989 KKASSERT(pctx
->poll_cpuid
== cpuid
);
992 * If polling is disabled or there is no device registered,
993 * cut the polling systimer frequency to 1hz.
995 pctx
->polling_enabled
= msg
->lmsg
.u
.ms_result
;
996 if (pctx
->polling_enabled
&& pctx
->poll_handlers
) {
997 systimer_adjust_periodic(&pctx
->pollclock
, pctx
->pollhz
);
999 systimer_adjust_periodic(&pctx
->pollclock
, 1);
1000 poll_reset_state(pctx
);
1003 if (!pctx
->polling_enabled
&& pctx
->poll_handlers
!= 0) {
1006 for (i
= 0 ; i
< pctx
->poll_handlers
; i
++) {
1007 struct ifnet
*ifp
= pctx
->pr
[i
].ifp
;
1009 ifnet_serialize_all(ifp
);
1011 if ((ifp
->if_flags
& IFF_POLLING
) == 0) {
1012 KKASSERT(ifp
->if_poll_cpuid
< 0);
1013 ifnet_deserialize_all(ifp
);
1016 ifp
->if_flags
&= ~IFF_POLLING
;
1017 ifp
->if_poll_cpuid
= -1;
1020 * Only call the interface deregistration
1021 * function if the interface is still
1024 if (ifp
->if_flags
& IFF_RUNNING
)
1025 ifp
->if_poll(ifp
, POLL_DEREGISTER
, 1);
1027 ifnet_deserialize_all(ifp
);
1029 pctx
->poll_handlers
= 0;
1032 lwkt_replymsg(&msg
->lmsg
, 0);
1036 poll_sysctl_regfrac(netmsg_t msg
)
1038 struct pollctx
*pctx
;
1042 cpuid
= mycpu
->gd_cpuid
;
1043 KKASSERT(cpuid
< POLLCTX_MAX
);
1045 pctx
= poll_context
[cpuid
];
1046 KKASSERT(pctx
!= NULL
);
1047 KKASSERT(pctx
->poll_cpuid
== cpuid
);
1049 reg_frac
= msg
->lmsg
.u
.ms_result
;
1050 if (reg_frac
> pctx
->pollhz
)
1051 reg_frac
= pctx
->pollhz
;
1052 else if (reg_frac
< 1)
1055 pctx
->reg_frac
= reg_frac
;
1056 if (pctx
->reg_frac_count
> pctx
->reg_frac
)
1057 pctx
->reg_frac_count
= pctx
->reg_frac
- 1;
1059 lwkt_replymsg(&msg
->lmsg
, 0);
1063 poll_sysctl_burstmax(netmsg_t msg
)
1065 struct pollctx
*pctx
;
1068 cpuid
= mycpu
->gd_cpuid
;
1069 KKASSERT(cpuid
< POLLCTX_MAX
);
1071 pctx
= poll_context
[cpuid
];
1072 KKASSERT(pctx
!= NULL
);
1073 KKASSERT(pctx
->poll_cpuid
== cpuid
);
1075 pctx
->poll_burst_max
= msg
->lmsg
.u
.ms_result
;
1076 if (pctx
->poll_each_burst
> pctx
->poll_burst_max
)
1077 pctx
->poll_each_burst
= pctx
->poll_burst_max
;
1078 if (pctx
->poll_burst
> pctx
->poll_burst_max
)
1079 pctx
->poll_burst
= pctx
->poll_burst_max
;
1080 if (pctx
->residual_burst
> pctx
->poll_burst_max
)
1081 pctx
->residual_burst
= pctx
->poll_burst_max
;
1083 lwkt_replymsg(&msg
->lmsg
, 0);
1087 poll_sysctl_eachburst(netmsg_t msg
)
1089 struct pollctx
*pctx
;
1090 uint32_t each_burst
;
1093 cpuid
= mycpu
->gd_cpuid
;
1094 KKASSERT(cpuid
< POLLCTX_MAX
);
1096 pctx
= poll_context
[cpuid
];
1097 KKASSERT(pctx
!= NULL
);
1098 KKASSERT(pctx
->poll_cpuid
== cpuid
);
1100 each_burst
= msg
->lmsg
.u
.ms_result
;
1101 if (each_burst
> pctx
->poll_burst_max
)
1102 each_burst
= pctx
->poll_burst_max
;
1103 else if (each_burst
< 1)
1105 pctx
->poll_each_burst
= each_burst
;
1107 lwkt_replymsg(&msg
->lmsg
, 0);