Return after calling bridge_ouput_p.
[dragonfly.git] / sys / kern / kern_poll.c
bloba1f21d9ad83a0721432f30f666b612f5b9e00758
1 /*-
2 * Copyright (c) 2001-2002 Luigi Rizzo
4 * Supported by: the Xorp Project (www.xorp.org)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28 * $DragonFly: src/sys/kern/kern_poll.c,v 1.46 2008/05/01 02:03:28 sephe Exp $
31 #include "opt_polling.h"
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/ktr.h>
36 #include <sys/socket.h> /* needed by net/if.h */
37 #include <sys/sysctl.h>
39 #include <sys/thread2.h>
40 #include <sys/msgport2.h>
42 #include <net/if.h> /* for IFF_* flags */
43 #include <net/netmsg2.h>
46 * Polling support for [network] device drivers.
48 * Drivers which support this feature try to register with the
49 * polling code.
51 * If registration is successful, the driver must disable interrupts,
52 * and further I/O is performed through the handler, which is invoked
53 * (at least once per clock tick) with 3 arguments: the "arg" passed at
54 * register time (a struct ifnet pointer), a command, and a "count" limit.
56 * The command can be one of the following:
57 * POLL_ONLY: quick move of "count" packets from input/output queues.
58 * POLL_AND_CHECK_STATUS: as above, plus check status registers or do
59 * other more expensive operations. This command is issued periodically
60 * but less frequently than POLL_ONLY.
61 * POLL_DEREGISTER: deregister and return to interrupt mode.
62 * POLL_REGISTER: register and disable interrupts
64 * The first two commands are only issued if the interface is marked as
65 * 'IFF_UP, IFF_RUNNING and IFF_POLLING', the last two only if IFF_RUNNING
66 * is set.
68 * The count limit specifies how much work the handler can do during the
69 * call -- typically this is the number of packets to be received, or
70 * transmitted, etc. (drivers are free to interpret this number, as long
71 * as the max time spent in the function grows roughly linearly with the
72 * count).
74 * Deregistration can be requested by the driver itself (typically in the
75 * *_stop() routine), or by the polling code, by invoking the handler.
77 * Polling can be enabled or disabled on particular CPU_X with the sysctl
78 * variable kern.polling.X.enable (default is 1, enabled)
80 * A second variable controls the sharing of CPU between polling/kernel
81 * network processing, and other activities (typically userlevel tasks):
82 * kern.polling.X.user_frac (between 0 and 100, default 50) sets the share
83 * of CPU allocated to user tasks. CPU is allocated proportionally to the
84 * shares, by dynamically adjusting the "count" (poll_burst).
86 * Other parameters can should be left to their default values.
87 * The following constraints hold
89 * 1 <= poll_burst <= poll_burst_max
90 * 1 <= poll_each_burst <= poll_burst_max
91 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
94 #define MIN_POLL_BURST_MAX 10
95 #define MAX_POLL_BURST_MAX 1000
96 #define POLL_BURST_MAX 150 /* good for 100Mbit net and HZ=1000 */
97 #define POLL_EACH_BURST 5
99 #ifndef DEVICE_POLLING_FREQ_MAX
100 #define DEVICE_POLLING_FREQ_MAX 30000
101 #endif
102 #define DEVICE_POLLING_FREQ_DEFAULT 2000
104 #define POLL_LIST_LEN 128
105 struct pollrec {
106 struct ifnet *ifp;
109 #define POLLCTX_MAX 32
111 struct pollctx {
112 struct sysctl_ctx_list poll_sysctl_ctx;
113 struct sysctl_oid *poll_sysctl_tree;
115 uint32_t poll_burst; /* state */
116 uint32_t poll_each_burst; /* tunable */
117 uint32_t poll_burst_max; /* tunable */
118 uint32_t user_frac; /* tunable */
119 int reg_frac_count; /* state */
120 uint32_t reg_frac; /* tunable */
121 uint32_t short_ticks; /* statistics */
122 uint32_t lost_polls; /* statistics */
123 uint32_t pending_polls; /* state */
124 int residual_burst; /* state */
125 uint32_t phase; /* state */
126 uint32_t suspect; /* statistics */
127 uint32_t stalled; /* statistics */
128 struct timeval poll_start_t; /* state */
129 struct timeval prev_t; /* state */
131 uint32_t poll_handlers; /* next free entry in pr[]. */
132 struct pollrec pr[POLL_LIST_LEN];
134 int poll_cpuid;
135 struct systimer pollclock;
136 int polling_enabled; /* tunable */
137 int pollhz; /* tunable */
139 struct netmsg poll_netmsg;
140 struct netmsg poll_more_netmsg;
143 static struct pollctx *poll_context[POLLCTX_MAX];
145 SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
146 "Device polling parameters");
148 static int poll_defcpu = -1;
149 SYSCTL_INT(_kern_polling, OID_AUTO, defcpu, CTLFLAG_RD,
150 &poll_defcpu, 0, "default CPU to run device polling");
152 static uint32_t poll_cpumask0 = 0xffffffff;
153 TUNABLE_INT("kern.polling.cpumask", (int *)&poll_cpumask0);
155 static uint32_t poll_cpumask;
156 SYSCTL_INT(_kern_polling, OID_AUTO, cpumask, CTLFLAG_RD,
157 &poll_cpumask, 0, "CPUs that can run device polling");
159 static int polling_enabled = 1; /* global polling enable */
160 TUNABLE_INT("kern.polling.enable", &polling_enabled);
162 static int pollhz = DEVICE_POLLING_FREQ_DEFAULT;
163 TUNABLE_INT("kern.polling.pollhz", &pollhz);
165 static int poll_burst_max = POLL_BURST_MAX;
166 TUNABLE_INT("kern.polling.burst_max", &poll_burst_max);
168 static int poll_each_burst = POLL_EACH_BURST;
169 TUNABLE_INT("kern.polling.each_burst", &poll_each_burst);
171 /* Netisr handlers */
172 static void netisr_poll(struct netmsg *);
173 static void netisr_pollmore(struct netmsg *);
174 static void poll_register(struct netmsg *);
175 static void poll_deregister(struct netmsg *);
176 static void poll_sysctl_pollhz(struct netmsg *);
177 static void poll_sysctl_polling(struct netmsg *);
178 static void poll_sysctl_regfrac(struct netmsg *);
179 static void poll_sysctl_burstmax(struct netmsg *);
180 static void poll_sysctl_eachburst(struct netmsg *);
182 /* Systimer handler */
183 static void pollclock(systimer_t, struct intrframe *);
185 /* Sysctl handlers */
186 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS);
187 static int sysctl_polling(SYSCTL_HANDLER_ARGS);
188 static int sysctl_regfrac(SYSCTL_HANDLER_ARGS);
189 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS);
190 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS);
191 static void poll_add_sysctl(struct sysctl_ctx_list *,
192 struct sysctl_oid_list *, struct pollctx *);
194 static void schedpoll_oncpu(struct pollctx *, struct netmsg *, netisr_fn_t);
196 void init_device_poll_pcpu(int); /* per-cpu init routine */
198 #define POLL_KTR_STRING "ifp=%p"
199 #define POLL_KTR_ARG_SIZE (sizeof(void *))
201 #ifndef KTR_POLLING
202 #define KTR_POLLING KTR_ALL
203 #endif
204 KTR_INFO_MASTER(poll);
205 KTR_INFO(KTR_POLLING, poll, beg, 0, POLL_KTR_STRING, POLL_KTR_ARG_SIZE);
206 KTR_INFO(KTR_POLLING, poll, end, 1, POLL_KTR_STRING, POLL_KTR_ARG_SIZE);
208 #define logpoll(name, arg) KTR_LOG(poll_ ## name, arg)
210 static __inline void
211 poll_reset_state(struct pollctx *pctx)
213 crit_enter();
214 pctx->poll_burst = 5;
215 pctx->reg_frac_count = 0;
216 pctx->pending_polls = 0;
217 pctx->residual_burst = 0;
218 pctx->phase = 0;
219 bzero(&pctx->poll_start_t, sizeof(pctx->poll_start_t));
220 bzero(&pctx->prev_t, sizeof(pctx->prev_t));
221 crit_exit();
225 * Initialize per-cpu polling(4) context. Called from kern_clock.c:
227 void
228 init_device_poll_pcpu(int cpuid)
230 struct pollctx *pctx;
231 char cpuid_str[3];
233 if (cpuid >= POLLCTX_MAX)
234 return;
236 if (((1 << cpuid) & poll_cpumask0) == 0)
237 return;
239 if (poll_burst_max < MIN_POLL_BURST_MAX)
240 poll_burst_max = MIN_POLL_BURST_MAX;
241 else if (poll_burst_max > MAX_POLL_BURST_MAX)
242 poll_burst_max = MAX_POLL_BURST_MAX;
244 if (poll_each_burst > poll_burst_max)
245 poll_each_burst = poll_burst_max;
247 poll_cpumask |= (1 << cpuid);
249 pctx = kmalloc(sizeof(*pctx), M_DEVBUF, M_WAITOK | M_ZERO);
251 pctx->poll_each_burst = poll_each_burst;
252 pctx->poll_burst_max = poll_burst_max;
253 pctx->user_frac = 50;
254 pctx->reg_frac = 20;
255 pctx->polling_enabled = polling_enabled;
256 pctx->pollhz = pollhz;
257 pctx->poll_cpuid = cpuid;
258 netmsg_init(&pctx->poll_netmsg, &netisr_adone_rport, 0, NULL);
259 netmsg_init(&pctx->poll_more_netmsg, &netisr_adone_rport, 0, NULL);
260 poll_reset_state(pctx);
262 KASSERT(cpuid < POLLCTX_MAX, ("cpu id must < %d", cpuid));
263 poll_context[cpuid] = pctx;
265 if (poll_defcpu < 0) {
266 poll_defcpu = cpuid;
269 * Initialize global sysctl nodes, for compat
271 poll_add_sysctl(NULL, SYSCTL_STATIC_CHILDREN(_kern_polling),
272 pctx);
276 * Initialize per-cpu sysctl nodes
278 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", pctx->poll_cpuid);
280 sysctl_ctx_init(&pctx->poll_sysctl_ctx);
281 pctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&pctx->poll_sysctl_ctx,
282 SYSCTL_STATIC_CHILDREN(_kern_polling),
283 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, "");
284 poll_add_sysctl(&pctx->poll_sysctl_ctx,
285 SYSCTL_CHILDREN(pctx->poll_sysctl_tree), pctx);
288 * Initialize systimer
290 systimer_init_periodic_nq(&pctx->pollclock, pollclock, pctx, 1);
293 static __inline void
294 schedpoll(struct pollctx *pctx)
296 crit_enter();
297 schedpoll_oncpu(pctx, &pctx->poll_netmsg, netisr_poll);
298 crit_exit();
301 static __inline void
302 schedpollmore(struct pollctx *pctx)
304 schedpoll_oncpu(pctx, &pctx->poll_more_netmsg, netisr_pollmore);
308 * Set the polling frequency
310 static int
311 sysctl_pollhz(SYSCTL_HANDLER_ARGS)
313 struct pollctx *pctx = arg1;
314 struct netmsg msg;
315 lwkt_port_t port;
316 int error, phz;
318 phz = pctx->pollhz;
319 error = sysctl_handle_int(oidp, &phz, 0, req);
320 if (error || req->newptr == NULL)
321 return error;
322 if (phz <= 0)
323 return EINVAL;
324 else if (phz > DEVICE_POLLING_FREQ_MAX)
325 phz = DEVICE_POLLING_FREQ_MAX;
327 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_pollhz);
328 msg.nm_lmsg.u.ms_result = phz;
330 port = cpu_portfn(pctx->poll_cpuid);
331 lwkt_domsg(port, &msg.nm_lmsg, 0);
332 return 0;
336 * Master enable.
338 static int
339 sysctl_polling(SYSCTL_HANDLER_ARGS)
341 struct pollctx *pctx = arg1;
342 struct netmsg msg;
343 lwkt_port_t port;
344 int error, enabled;
346 enabled = pctx->polling_enabled;
347 error = sysctl_handle_int(oidp, &enabled, 0, req);
348 if (error || req->newptr == NULL)
349 return error;
351 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_polling);
352 msg.nm_lmsg.u.ms_result = enabled;
354 port = cpu_portfn(pctx->poll_cpuid);
355 lwkt_domsg(port, &msg.nm_lmsg, 0);
356 return 0;
359 static int
360 sysctl_regfrac(SYSCTL_HANDLER_ARGS)
362 struct pollctx *pctx = arg1;
363 struct netmsg msg;
364 lwkt_port_t port;
365 uint32_t reg_frac;
366 int error;
368 reg_frac = pctx->reg_frac;
369 error = sysctl_handle_int(oidp, &reg_frac, 0, req);
370 if (error || req->newptr == NULL)
371 return error;
373 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_regfrac);
374 msg.nm_lmsg.u.ms_result = reg_frac;
376 port = cpu_portfn(pctx->poll_cpuid);
377 lwkt_domsg(port, &msg.nm_lmsg, 0);
378 return 0;
381 static int
382 sysctl_burstmax(SYSCTL_HANDLER_ARGS)
384 struct pollctx *pctx = arg1;
385 struct netmsg msg;
386 lwkt_port_t port;
387 uint32_t burst_max;
388 int error;
390 burst_max = pctx->poll_burst_max;
391 error = sysctl_handle_int(oidp, &burst_max, 0, req);
392 if (error || req->newptr == NULL)
393 return error;
394 if (burst_max < MIN_POLL_BURST_MAX)
395 burst_max = MIN_POLL_BURST_MAX;
396 else if (burst_max > MAX_POLL_BURST_MAX)
397 burst_max = MAX_POLL_BURST_MAX;
399 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_burstmax);
400 msg.nm_lmsg.u.ms_result = burst_max;
402 port = cpu_portfn(pctx->poll_cpuid);
403 lwkt_domsg(port, &msg.nm_lmsg, 0);
404 return 0;
407 static int
408 sysctl_eachburst(SYSCTL_HANDLER_ARGS)
410 struct pollctx *pctx = arg1;
411 struct netmsg msg;
412 lwkt_port_t port;
413 uint32_t each_burst;
414 int error;
416 each_burst = pctx->poll_each_burst;
417 error = sysctl_handle_int(oidp, &each_burst, 0, req);
418 if (error || req->newptr == NULL)
419 return error;
421 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_eachburst);
422 msg.nm_lmsg.u.ms_result = each_burst;
424 port = cpu_portfn(pctx->poll_cpuid);
425 lwkt_domsg(port, &msg.nm_lmsg, 0);
426 return 0;
430 * Hook from polling systimer. Tries to schedule a netisr, but keeps
431 * track of lost ticks due to the previous handler taking too long.
432 * Normally, this should not happen, because polling handler should
433 * run for a short time. However, in some cases (e.g. when there are
434 * changes in link status etc.) the drivers take a very long time
435 * (even in the order of milliseconds) to reset and reconfigure the
436 * device, causing apparent lost polls.
438 * The first part of the code is just for debugging purposes, and tries
439 * to count how often hardclock ticks are shorter than they should,
440 * meaning either stray interrupts or delayed events.
442 * WARNING! called from fastint or IPI, the MP lock might not be held.
444 static void
445 pollclock(systimer_t info, struct intrframe *frame __unused)
447 struct pollctx *pctx = info->data;
448 struct timeval t;
449 int delta;
451 if (pctx->poll_handlers == 0)
452 return;
454 microuptime(&t);
455 delta = (t.tv_usec - pctx->prev_t.tv_usec) +
456 (t.tv_sec - pctx->prev_t.tv_sec)*1000000;
457 if (delta * pctx->pollhz < 500000)
458 pctx->short_ticks++;
459 else
460 pctx->prev_t = t;
462 if (pctx->pending_polls > 100) {
464 * Too much, assume it has stalled (not always true
465 * see comment above).
467 pctx->stalled++;
468 pctx->pending_polls = 0;
469 pctx->phase = 0;
472 if (pctx->phase <= 2) {
473 if (pctx->phase != 0)
474 pctx->suspect++;
475 pctx->phase = 1;
476 schedpoll(pctx);
477 pctx->phase = 2;
479 if (pctx->pending_polls++ > 0)
480 pctx->lost_polls++;
484 * netisr_pollmore is called after other netisr's, possibly scheduling
485 * another NETISR_POLL call, or adapting the burst size for the next cycle.
487 * It is very bad to fetch large bursts of packets from a single card at once,
488 * because the burst could take a long time to be completely processed leading
489 * to unfairness. To reduce the problem, and also to account better for time
490 * spent in network-related processing, we split the burst in smaller chunks
491 * of fixed size, giving control to the other netisr's between chunks. This
492 * helps in improving the fairness, reducing livelock (because we emulate more
493 * closely the "process to completion" that we have with fastforwarding) and
494 * accounting for the work performed in low level handling and forwarding.
497 /* ARGSUSED */
498 static void
499 netisr_pollmore(struct netmsg *msg)
501 struct pollctx *pctx;
502 struct timeval t;
503 int kern_load, cpuid;
504 uint32_t pending_polls;
506 cpuid = mycpu->gd_cpuid;
507 KKASSERT(cpuid < POLLCTX_MAX);
509 pctx = poll_context[cpuid];
510 KKASSERT(pctx != NULL);
511 KKASSERT(pctx->poll_cpuid == cpuid);
512 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
514 lwkt_replymsg(&msg->nm_lmsg, 0);
516 if (pctx->poll_handlers == 0)
517 return;
519 KASSERT(pctx->polling_enabled,
520 ("# of registered poll handlers are not zero, "
521 "but polling is not enabled\n"));
523 pctx->phase = 5;
524 if (pctx->residual_burst > 0) {
525 schedpoll(pctx);
526 /* will run immediately on return, followed by netisrs */
527 return;
529 /* here we can account time spent in netisr's in this tick */
530 microuptime(&t);
531 kern_load = (t.tv_usec - pctx->poll_start_t.tv_usec) +
532 (t.tv_sec - pctx->poll_start_t.tv_sec)*1000000; /* us */
533 kern_load = (kern_load * pctx->pollhz) / 10000; /* 0..100 */
534 if (kern_load > (100 - pctx->user_frac)) { /* try decrease ticks */
535 if (pctx->poll_burst > 1)
536 pctx->poll_burst--;
537 } else {
538 if (pctx->poll_burst < pctx->poll_burst_max)
539 pctx->poll_burst++;
542 crit_enter();
543 pctx->pending_polls--;
544 pending_polls = pctx->pending_polls;
545 crit_exit();
547 if (pending_polls == 0) { /* we are done */
548 pctx->phase = 0;
549 } else {
551 * Last cycle was long and caused us to miss one or more
552 * hardclock ticks. Restart processing again, but slightly
553 * reduce the burst size to prevent that this happens again.
555 pctx->poll_burst -= (pctx->poll_burst / 8);
556 if (pctx->poll_burst < 1)
557 pctx->poll_burst = 1;
558 schedpoll(pctx);
559 pctx->phase = 6;
564 * netisr_poll is scheduled by schedpoll when appropriate, typically once
565 * per polling systimer tick.
567 * Note that the message is replied immediately in order to allow a new
568 * ISR to be scheduled in the handler.
570 * XXX each registration should indicate whether it needs a critical
571 * section to operate.
573 /* ARGSUSED */
574 static void
575 netisr_poll(struct netmsg *msg)
577 struct pollctx *pctx;
578 int i, cycles, cpuid;
579 enum poll_cmd arg = POLL_ONLY;
581 cpuid = mycpu->gd_cpuid;
582 KKASSERT(cpuid < POLLCTX_MAX);
584 pctx = poll_context[cpuid];
585 KKASSERT(pctx != NULL);
586 KKASSERT(pctx->poll_cpuid == cpuid);
587 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
589 crit_enter();
590 lwkt_replymsg(&msg->nm_lmsg, 0);
591 crit_exit();
593 if (pctx->poll_handlers == 0)
594 return;
596 KASSERT(pctx->polling_enabled,
597 ("# of registered poll handlers are not zero, "
598 "but polling is not enabled\n"));
600 pctx->phase = 3;
601 if (pctx->residual_burst == 0) { /* first call in this tick */
602 microuptime(&pctx->poll_start_t);
604 if (pctx->reg_frac_count-- == 0) {
605 arg = POLL_AND_CHECK_STATUS;
606 pctx->reg_frac_count = pctx->reg_frac - 1;
609 pctx->residual_burst = pctx->poll_burst;
611 cycles = (pctx->residual_burst < pctx->poll_each_burst) ?
612 pctx->residual_burst : pctx->poll_each_burst;
613 pctx->residual_burst -= cycles;
615 for (i = 0 ; i < pctx->poll_handlers ; i++) {
616 struct ifnet *ifp = pctx->pr[i].ifp;
618 if (!lwkt_serialize_try(ifp->if_serializer))
619 continue;
621 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING|IFF_POLLING))
622 == (IFF_UP|IFF_RUNNING|IFF_POLLING)) {
623 logpoll(beg, ifp);
624 crit_enter();
625 ifp->if_poll(ifp, arg, cycles);
626 crit_exit();
627 logpoll(end, ifp);
630 lwkt_serialize_exit(ifp->if_serializer);
633 schedpollmore(pctx);
634 pctx->phase = 4;
637 static void
638 poll_register(struct netmsg *msg)
640 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
641 struct pollctx *pctx;
642 int rc, cpuid;
644 cpuid = mycpu->gd_cpuid;
645 KKASSERT(cpuid < POLLCTX_MAX);
647 pctx = poll_context[cpuid];
648 KKASSERT(pctx != NULL);
649 KKASSERT(pctx->poll_cpuid == cpuid);
651 if (pctx->polling_enabled == 0) {
652 /* Polling disabled, cannot register */
653 rc = EOPNOTSUPP;
654 goto back;
658 * Check if there is room.
660 if (pctx->poll_handlers >= POLL_LIST_LEN) {
662 * List full, cannot register more entries.
663 * This should never happen; if it does, it is probably a
664 * broken driver trying to register multiple times. Checking
665 * this at runtime is expensive, and won't solve the problem
666 * anyways, so just report a few times and then give up.
668 static int verbose = 10; /* XXX */
669 if (verbose >0) {
670 kprintf("poll handlers list full, "
671 "maybe a broken driver ?\n");
672 verbose--;
674 rc = ENOMEM;
675 } else {
676 pctx->pr[pctx->poll_handlers].ifp = ifp;
677 pctx->poll_handlers++;
678 rc = 0;
680 if (pctx->poll_handlers == 1) {
681 KKASSERT(pctx->polling_enabled);
682 systimer_adjust_periodic(&pctx->pollclock,
683 pctx->pollhz);
686 back:
687 lwkt_replymsg(&msg->nm_lmsg, rc);
691 * Try to register routine for polling. Returns 1 if successful
692 * (and polling should be enabled), 0 otherwise.
694 * Called from mainline code only, not called from an interrupt.
697 ether_poll_register(struct ifnet *ifp)
699 if (poll_defcpu < 0)
700 return 0;
701 KKASSERT(poll_defcpu < POLLCTX_MAX);
703 return ether_pollcpu_register(ifp, poll_defcpu);
707 ether_pollcpu_register(struct ifnet *ifp, int cpuid)
709 struct netmsg msg;
710 lwkt_port_t port;
711 int rc;
713 if (ifp->if_poll == NULL) {
714 /* Device does not support polling */
715 return 0;
718 if (cpuid < 0 || cpuid >= POLLCTX_MAX)
719 return 0;
721 if (((1 << cpuid) & poll_cpumask) == 0) {
722 /* Polling is not supported on 'cpuid' */
723 return 0;
725 KKASSERT(poll_context[cpuid] != NULL);
728 * Attempt to register. Interlock with IFF_POLLING.
730 crit_enter(); /* XXX MP - not mp safe */
732 lwkt_serialize_enter(ifp->if_serializer);
733 if (ifp->if_flags & IFF_POLLING) {
734 /* Already polling */
735 KKASSERT(ifp->if_poll_cpuid >= 0);
736 lwkt_serialize_exit(ifp->if_serializer);
737 crit_exit();
738 return 0;
740 KKASSERT(ifp->if_poll_cpuid < 0);
741 ifp->if_flags |= IFF_POLLING;
742 ifp->if_poll_cpuid = cpuid;
743 if (ifp->if_flags & IFF_RUNNING)
744 ifp->if_poll(ifp, POLL_REGISTER, 0);
745 lwkt_serialize_exit(ifp->if_serializer);
747 netmsg_init(&msg, &curthread->td_msgport, 0, poll_register);
748 msg.nm_lmsg.u.ms_resultp = ifp;
750 port = cpu_portfn(cpuid);
751 lwkt_domsg(port, &msg.nm_lmsg, 0);
753 if (msg.nm_lmsg.ms_error) {
754 lwkt_serialize_enter(ifp->if_serializer);
755 ifp->if_flags &= ~IFF_POLLING;
756 ifp->if_poll_cpuid = -1;
757 if (ifp->if_flags & IFF_RUNNING)
758 ifp->if_poll(ifp, POLL_DEREGISTER, 0);
759 lwkt_serialize_exit(ifp->if_serializer);
760 rc = 0;
761 } else {
762 rc = 1;
765 crit_exit();
766 return rc;
769 static void
770 poll_deregister(struct netmsg *msg)
772 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
773 struct pollctx *pctx;
774 int rc, i, cpuid;
776 cpuid = mycpu->gd_cpuid;
777 KKASSERT(cpuid < POLLCTX_MAX);
779 pctx = poll_context[cpuid];
780 KKASSERT(pctx != NULL);
781 KKASSERT(pctx->poll_cpuid == cpuid);
783 for (i = 0 ; i < pctx->poll_handlers ; i++) {
784 if (pctx->pr[i].ifp == ifp) /* Found it */
785 break;
787 if (i == pctx->poll_handlers) {
788 kprintf("ether_poll_deregister: ifp not found!!!\n");
789 rc = ENOENT;
790 } else {
791 pctx->poll_handlers--;
792 if (i < pctx->poll_handlers) {
793 /* Last entry replaces this one. */
794 pctx->pr[i].ifp = pctx->pr[pctx->poll_handlers].ifp;
797 if (pctx->poll_handlers == 0) {
798 systimer_adjust_periodic(&pctx->pollclock, 1);
799 poll_reset_state(pctx);
801 rc = 0;
803 lwkt_replymsg(&msg->nm_lmsg, rc);
807 * Remove interface from the polling list. Occurs when polling is turned
808 * off. Called from mainline code only, not called from an interrupt.
811 ether_poll_deregister(struct ifnet *ifp)
813 struct netmsg msg;
814 lwkt_port_t port;
815 int rc, cpuid;
817 KKASSERT(ifp != NULL);
819 if (ifp->if_poll == NULL)
820 return 0;
822 crit_enter();
824 lwkt_serialize_enter(ifp->if_serializer);
825 if ((ifp->if_flags & IFF_POLLING) == 0) {
826 KKASSERT(ifp->if_poll_cpuid < 0);
827 lwkt_serialize_exit(ifp->if_serializer);
828 crit_exit();
829 return 0;
832 cpuid = ifp->if_poll_cpuid;
833 KKASSERT(cpuid >= 0);
834 KKASSERT(poll_context[cpuid] != NULL);
836 ifp->if_flags &= ~IFF_POLLING;
837 ifp->if_poll_cpuid = -1;
838 lwkt_serialize_exit(ifp->if_serializer);
840 netmsg_init(&msg, &curthread->td_msgport, 0, poll_deregister);
841 msg.nm_lmsg.u.ms_resultp = ifp;
843 port = cpu_portfn(cpuid);
844 lwkt_domsg(port, &msg.nm_lmsg, 0);
846 if (!msg.nm_lmsg.ms_error) {
847 lwkt_serialize_enter(ifp->if_serializer);
848 if (ifp->if_flags & IFF_RUNNING)
849 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
850 lwkt_serialize_exit(ifp->if_serializer);
851 rc = 1;
852 } else {
853 rc = 0;
856 crit_exit();
857 return rc;
860 static void
861 poll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent,
862 struct pollctx *pctx)
864 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "enable",
865 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_polling,
866 "I", "Polling enabled");
868 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "pollhz",
869 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_pollhz,
870 "I", "Device polling frequency");
872 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "reg_frac",
873 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_regfrac,
874 "IU", "Every this many cycles poll register");
876 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max",
877 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_burstmax,
878 "IU", "Max Polling burst size");
880 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst",
881 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_eachburst,
882 "IU", "Max size of each burst");
884 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD,
885 &pctx->phase, 0, "Polling phase");
887 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW,
888 &pctx->suspect, 0, "suspect event");
890 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW,
891 &pctx->stalled, 0, "potential stalls");
893 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD,
894 &pctx->poll_burst, 0, "Current polling burst size");
896 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW,
897 &pctx->user_frac, 0,
898 "Desired user fraction of cpu time");
900 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW,
901 &pctx->short_ticks, 0,
902 "Hardclock ticks shorter than they should be");
904 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW,
905 &pctx->lost_polls, 0,
906 "How many times we would have lost a poll tick");
908 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD,
909 &pctx->pending_polls, 0, "Do we need to poll again");
911 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD,
912 &pctx->residual_burst, 0,
913 "# of residual cycles in burst");
915 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD,
916 &pctx->poll_handlers, 0,
917 "Number of registered poll handlers");
920 static void
921 schedpoll_oncpu(struct pollctx *pctx, struct netmsg *msg, netisr_fn_t handler)
923 if (msg->nm_lmsg.ms_flags & MSGF_DONE) {
924 lwkt_port_t port;
926 netmsg_init(msg, &netisr_adone_rport, 0, handler);
927 #ifdef INVARIANTS
928 msg->nm_lmsg.u.ms_resultp = pctx;
929 #endif
930 port = cpu_portfn(mycpu->gd_cpuid);
931 lwkt_sendmsg(port, &msg->nm_lmsg);
935 static void
936 poll_sysctl_pollhz(struct netmsg *msg)
938 struct pollctx *pctx;
939 int cpuid;
941 cpuid = mycpu->gd_cpuid;
942 KKASSERT(cpuid < POLLCTX_MAX);
944 pctx = poll_context[cpuid];
945 KKASSERT(pctx != NULL);
946 KKASSERT(pctx->poll_cpuid == cpuid);
949 * If polling is disabled or there is no device registered,
950 * don't adjust polling systimer frequency.
951 * Polling systimer frequency will be adjusted once polling
952 * is enabled and there are registered devices.
954 pctx->pollhz = msg->nm_lmsg.u.ms_result;
955 if (pctx->polling_enabled && pctx->poll_handlers)
956 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
959 * Make sure that reg_frac and reg_frac_count are within valid range.
961 if (pctx->reg_frac > pctx->pollhz) {
962 pctx->reg_frac = pctx->pollhz;
963 if (pctx->reg_frac_count > pctx->reg_frac)
964 pctx->reg_frac_count = pctx->reg_frac - 1;
967 lwkt_replymsg(&msg->nm_lmsg, 0);
970 static void
971 poll_sysctl_polling(struct netmsg *msg)
973 struct pollctx *pctx;
974 int cpuid;
976 cpuid = mycpu->gd_cpuid;
977 KKASSERT(cpuid < POLLCTX_MAX);
979 pctx = poll_context[cpuid];
980 KKASSERT(pctx != NULL);
981 KKASSERT(pctx->poll_cpuid == cpuid);
984 * If polling is disabled or there is no device registered,
985 * cut the polling systimer frequency to 1hz.
987 pctx->polling_enabled = msg->nm_lmsg.u.ms_result;
988 if (pctx->polling_enabled && pctx->poll_handlers) {
989 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
990 } else {
991 systimer_adjust_periodic(&pctx->pollclock, 1);
992 poll_reset_state(pctx);
995 if (!pctx->polling_enabled && pctx->poll_handlers != 0) {
996 int i;
998 for (i = 0 ; i < pctx->poll_handlers ; i++) {
999 struct ifnet *ifp = pctx->pr[i].ifp;
1001 lwkt_serialize_enter(ifp->if_serializer);
1003 if ((ifp->if_flags & IFF_POLLING) == 0) {
1004 KKASSERT(ifp->if_poll_cpuid < 0);
1005 lwkt_serialize_exit(ifp->if_serializer);
1006 continue;
1008 ifp->if_flags &= ~IFF_POLLING;
1009 ifp->if_poll_cpuid = -1;
1012 * Only call the interface deregistration
1013 * function if the interface is still
1014 * running.
1016 if (ifp->if_flags & IFF_RUNNING)
1017 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
1019 lwkt_serialize_exit(ifp->if_serializer);
1021 pctx->poll_handlers = 0;
1024 lwkt_replymsg(&msg->nm_lmsg, 0);
1027 static void
1028 poll_sysctl_regfrac(struct netmsg *msg)
1030 struct pollctx *pctx;
1031 uint32_t reg_frac;
1032 int cpuid;
1034 cpuid = mycpu->gd_cpuid;
1035 KKASSERT(cpuid < POLLCTX_MAX);
1037 pctx = poll_context[cpuid];
1038 KKASSERT(pctx != NULL);
1039 KKASSERT(pctx->poll_cpuid == cpuid);
1041 reg_frac = msg->nm_lmsg.u.ms_result;
1042 if (reg_frac > pctx->pollhz)
1043 reg_frac = pctx->pollhz;
1044 else if (reg_frac < 1)
1045 reg_frac = 1;
1047 pctx->reg_frac = reg_frac;
1048 if (pctx->reg_frac_count > pctx->reg_frac)
1049 pctx->reg_frac_count = pctx->reg_frac - 1;
1051 lwkt_replymsg(&msg->nm_lmsg, 0);
1054 static void
1055 poll_sysctl_burstmax(struct netmsg *msg)
1057 struct pollctx *pctx;
1058 int cpuid;
1060 cpuid = mycpu->gd_cpuid;
1061 KKASSERT(cpuid < POLLCTX_MAX);
1063 pctx = poll_context[cpuid];
1064 KKASSERT(pctx != NULL);
1065 KKASSERT(pctx->poll_cpuid == cpuid);
1067 pctx->poll_burst_max = msg->nm_lmsg.u.ms_result;
1068 if (pctx->poll_each_burst > pctx->poll_burst_max)
1069 pctx->poll_each_burst = pctx->poll_burst_max;
1070 if (pctx->poll_burst > pctx->poll_burst_max)
1071 pctx->poll_burst = pctx->poll_burst_max;
1072 if (pctx->residual_burst > pctx->poll_burst_max)
1073 pctx->residual_burst = pctx->poll_burst_max;
1075 lwkt_replymsg(&msg->nm_lmsg, 0);
1078 static void
1079 poll_sysctl_eachburst(struct netmsg *msg)
1081 struct pollctx *pctx;
1082 uint32_t each_burst;
1083 int cpuid;
1085 cpuid = mycpu->gd_cpuid;
1086 KKASSERT(cpuid < POLLCTX_MAX);
1088 pctx = poll_context[cpuid];
1089 KKASSERT(pctx != NULL);
1090 KKASSERT(pctx->poll_cpuid == cpuid);
1092 each_burst = msg->nm_lmsg.u.ms_result;
1093 if (each_burst > pctx->poll_burst_max)
1094 each_burst = pctx->poll_burst_max;
1095 else if (each_burst < 1)
1096 each_burst = 1;
1097 pctx->poll_each_burst = each_burst;
1099 lwkt_replymsg(&msg->nm_lmsg, 0);