* Remove the remains of the obsolete timeout()/untimeout() interface.
[dragonfly.git] / sys / kern / kern_poll.c
blob1c1aaece4068a54e723ed31b7a7b556aa691ca96
1 /*-
2 * Copyright (c) 2001-2002 Luigi Rizzo
4 * Supported by: the Xorp Project (www.xorp.org)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28 * $DragonFly: src/sys/kern/kern_poll.c,v 1.42 2007/11/11 07:38:29 sephe Exp $
31 #include "opt_polling.h"
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/socket.h> /* needed by net/if.h */
36 #include <sys/sysctl.h>
38 #include <sys/thread2.h>
39 #include <sys/msgport2.h>
41 #include <net/if.h> /* for IFF_* flags */
42 #include <net/netmsg2.h>
45 * Polling support for [network] device drivers.
47 * Drivers which support this feature try to register with the
48 * polling code.
50 * If registration is successful, the driver must disable interrupts,
51 * and further I/O is performed through the handler, which is invoked
52 * (at least once per clock tick) with 3 arguments: the "arg" passed at
53 * register time (a struct ifnet pointer), a command, and a "count" limit.
55 * The command can be one of the following:
56 * POLL_ONLY: quick move of "count" packets from input/output queues.
57 * POLL_AND_CHECK_STATUS: as above, plus check status registers or do
58 * other more expensive operations. This command is issued periodically
59 * but less frequently than POLL_ONLY.
60 * POLL_DEREGISTER: deregister and return to interrupt mode.
61 * POLL_REGISTER: register and disable interrupts
63 * The first two commands are only issued if the interface is marked as
64 * 'IFF_UP, IFF_RUNNING and IFF_POLLING', the last two only if IFF_RUNNING
65 * is set.
67 * The count limit specifies how much work the handler can do during the
68 * call -- typically this is the number of packets to be received, or
69 * transmitted, etc. (drivers are free to interpret this number, as long
70 * as the max time spent in the function grows roughly linearly with the
71 * count).
73 * Deregistration can be requested by the driver itself (typically in the
74 * *_stop() routine), or by the polling code, by invoking the handler.
76 * Polling can be enabled or disabled on particular CPU_X with the sysctl
77 * variable kern.polling.X.enable (default is 1, enabled)
79 * A second variable controls the sharing of CPU between polling/kernel
80 * network processing, and other activities (typically userlevel tasks):
81 * kern.polling.X.user_frac (between 0 and 100, default 50) sets the share
82 * of CPU allocated to user tasks. CPU is allocated proportionally to the
83 * shares, by dynamically adjusting the "count" (poll_burst).
85 * Other parameters can should be left to their default values.
86 * The following constraints hold
88 * 1 <= poll_burst <= poll_burst_max
89 * 1 <= poll_each_burst <= poll_burst_max
90 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
93 #define MIN_POLL_BURST_MAX 10
94 #define MAX_POLL_BURST_MAX 1000
96 #ifndef DEVICE_POLLING_FREQ_MAX
97 #define DEVICE_POLLING_FREQ_MAX 30000
98 #endif
99 #define DEVICE_POLLING_FREQ_DEFAULT 2000
101 #define POLL_LIST_LEN 128
102 struct pollrec {
103 struct ifnet *ifp;
106 #define POLLCTX_MAX 32
108 struct pollctx {
109 struct sysctl_ctx_list poll_sysctl_ctx;
110 struct sysctl_oid *poll_sysctl_tree;
112 uint32_t poll_burst; /* state */
113 uint32_t poll_each_burst; /* tunable */
114 uint32_t poll_burst_max; /* tunable */
115 uint32_t user_frac; /* tunable */
116 int reg_frac_count; /* state */
117 uint32_t reg_frac; /* tunable */
118 uint32_t short_ticks; /* statistics */
119 uint32_t lost_polls; /* statistics */
120 uint32_t pending_polls; /* state */
121 int residual_burst; /* state */
122 uint32_t phase; /* state */
123 uint32_t suspect; /* statistics */
124 uint32_t stalled; /* statistics */
125 struct timeval poll_start_t; /* state */
126 struct timeval prev_t; /* state */
128 uint32_t poll_handlers; /* next free entry in pr[]. */
129 struct pollrec pr[POLL_LIST_LEN];
131 int poll_cpuid;
132 struct systimer pollclock;
133 int polling_enabled; /* tunable */
134 int pollhz; /* tunable */
136 struct netmsg poll_netmsg;
137 struct netmsg poll_more_netmsg;
140 static struct pollctx *poll_context[POLLCTX_MAX];
142 SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
143 "Device polling parameters");
145 static int poll_defcpu = -1;
146 SYSCTL_INT(_kern_polling, OID_AUTO, defcpu, CTLFLAG_RD,
147 &poll_defcpu, 0, "default CPU to run device polling");
149 static uint32_t poll_cpumask0 = 0xffffffff;
150 TUNABLE_INT("kern.polling.cpumask", (int *)&poll_cpumask0);
152 static uint32_t poll_cpumask;
153 SYSCTL_INT(_kern_polling, OID_AUTO, cpumask, CTLFLAG_RD,
154 &poll_cpumask, 0, "CPUs that can run device polling");
156 static int polling_enabled = 1; /* global polling enable */
157 TUNABLE_INT("kern.polling.enable", &polling_enabled);
159 static int pollhz = DEVICE_POLLING_FREQ_DEFAULT;
160 TUNABLE_INT("kern.polling.pollhz", &pollhz);
162 /* Netisr handlers */
163 static void netisr_poll(struct netmsg *);
164 static void netisr_pollmore(struct netmsg *);
165 static void poll_register(struct netmsg *);
166 static void poll_deregister(struct netmsg *);
167 static void poll_sysctl_pollhz(struct netmsg *);
168 static void poll_sysctl_polling(struct netmsg *);
169 static void poll_sysctl_regfrac(struct netmsg *);
170 static void poll_sysctl_burstmax(struct netmsg *);
171 static void poll_sysctl_eachburst(struct netmsg *);
173 /* Systimer handler */
174 static void pollclock(systimer_t, struct intrframe *);
176 /* Sysctl handlers */
177 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS);
178 static int sysctl_polling(SYSCTL_HANDLER_ARGS);
179 static int sysctl_regfrac(SYSCTL_HANDLER_ARGS);
180 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS);
181 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS);
182 static void poll_add_sysctl(struct sysctl_ctx_list *,
183 struct sysctl_oid_list *, struct pollctx *);
185 static void schedpoll_oncpu(struct pollctx *, struct netmsg *, netisr_fn_t);
187 void init_device_poll_pcpu(int); /* per-cpu init routine */
189 static __inline void
190 poll_reset_state(struct pollctx *pctx)
192 crit_enter();
193 pctx->poll_burst = 5;
194 pctx->reg_frac_count = 0;
195 pctx->pending_polls = 0;
196 pctx->residual_burst = 0;
197 pctx->phase = 0;
198 bzero(&pctx->poll_start_t, sizeof(pctx->poll_start_t));
199 bzero(&pctx->prev_t, sizeof(pctx->prev_t));
200 crit_exit();
204 * Initialize per-cpu polling(4) context. Called from kern_clock.c:
206 void
207 init_device_poll_pcpu(int cpuid)
209 struct pollctx *pctx;
210 char cpuid_str[3];
212 if (cpuid >= POLLCTX_MAX)
213 return;
215 if (((1 << cpuid) & poll_cpumask0) == 0)
216 return;
218 poll_cpumask |= (1 << cpuid);
220 pctx = kmalloc(sizeof(*pctx), M_DEVBUF, M_WAITOK | M_ZERO);
222 pctx->poll_each_burst = 5;
223 pctx->poll_burst_max = 150; /* good for 100Mbit net and HZ=1000 */
224 pctx->user_frac = 50;
225 pctx->reg_frac = 20;
226 pctx->polling_enabled = polling_enabled;
227 pctx->pollhz = pollhz;
228 pctx->poll_cpuid = cpuid;
229 netmsg_init(&pctx->poll_netmsg, &netisr_adone_rport, 0, NULL);
230 netmsg_init(&pctx->poll_more_netmsg, &netisr_adone_rport, 0, NULL);
231 poll_reset_state(pctx);
233 KASSERT(cpuid < POLLCTX_MAX, ("cpu id must < %d", cpuid));
234 poll_context[cpuid] = pctx;
236 if (poll_defcpu < 0) {
237 poll_defcpu = cpuid;
240 * Initialize global sysctl nodes, for compat
242 poll_add_sysctl(NULL, SYSCTL_STATIC_CHILDREN(_kern_polling),
243 pctx);
247 * Initialize per-cpu sysctl nodes
249 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", pctx->poll_cpuid);
251 sysctl_ctx_init(&pctx->poll_sysctl_ctx);
252 pctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&pctx->poll_sysctl_ctx,
253 SYSCTL_STATIC_CHILDREN(_kern_polling),
254 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, "");
255 poll_add_sysctl(&pctx->poll_sysctl_ctx,
256 SYSCTL_CHILDREN(pctx->poll_sysctl_tree), pctx);
259 * Initialize systimer
261 systimer_init_periodic_nq(&pctx->pollclock, pollclock, pctx, 1);
264 static __inline void
265 schedpoll(struct pollctx *pctx)
267 crit_enter();
268 schedpoll_oncpu(pctx, &pctx->poll_netmsg, netisr_poll);
269 crit_exit();
272 static __inline void
273 schedpollmore(struct pollctx *pctx)
275 schedpoll_oncpu(pctx, &pctx->poll_more_netmsg, netisr_pollmore);
279 * Set the polling frequency
281 static int
282 sysctl_pollhz(SYSCTL_HANDLER_ARGS)
284 struct pollctx *pctx = arg1;
285 struct netmsg msg;
286 lwkt_port_t port;
287 int error, phz;
289 phz = pctx->pollhz;
290 error = sysctl_handle_int(oidp, &phz, 0, req);
291 if (error || req->newptr == NULL)
292 return error;
293 if (phz <= 0)
294 return EINVAL;
295 else if (phz > DEVICE_POLLING_FREQ_MAX)
296 phz = DEVICE_POLLING_FREQ_MAX;
298 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_pollhz);
299 msg.nm_lmsg.u.ms_result = phz;
301 port = cpu_portfn(pctx->poll_cpuid);
302 lwkt_domsg(port, &msg.nm_lmsg, 0);
303 return 0;
307 * Master enable.
309 static int
310 sysctl_polling(SYSCTL_HANDLER_ARGS)
312 struct pollctx *pctx = arg1;
313 struct netmsg msg;
314 lwkt_port_t port;
315 int error, enabled;
317 enabled = pctx->polling_enabled;
318 error = sysctl_handle_int(oidp, &enabled, 0, req);
319 if (error || req->newptr == NULL)
320 return error;
322 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_polling);
323 msg.nm_lmsg.u.ms_result = enabled;
325 port = cpu_portfn(pctx->poll_cpuid);
326 lwkt_domsg(port, &msg.nm_lmsg, 0);
327 return 0;
330 static int
331 sysctl_regfrac(SYSCTL_HANDLER_ARGS)
333 struct pollctx *pctx = arg1;
334 struct netmsg msg;
335 lwkt_port_t port;
336 uint32_t reg_frac;
337 int error;
339 reg_frac = pctx->reg_frac;
340 error = sysctl_handle_int(oidp, &reg_frac, 0, req);
341 if (error || req->newptr == NULL)
342 return error;
344 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_regfrac);
345 msg.nm_lmsg.u.ms_result = reg_frac;
347 port = cpu_portfn(pctx->poll_cpuid);
348 lwkt_domsg(port, &msg.nm_lmsg, 0);
349 return 0;
352 static int
353 sysctl_burstmax(SYSCTL_HANDLER_ARGS)
355 struct pollctx *pctx = arg1;
356 struct netmsg msg;
357 lwkt_port_t port;
358 uint32_t burst_max;
359 int error;
361 burst_max = pctx->poll_burst_max;
362 error = sysctl_handle_int(oidp, &burst_max, 0, req);
363 if (error || req->newptr == NULL)
364 return error;
365 if (burst_max < MIN_POLL_BURST_MAX)
366 burst_max = MIN_POLL_BURST_MAX;
367 else if (burst_max > MAX_POLL_BURST_MAX)
368 burst_max = MAX_POLL_BURST_MAX;
370 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_burstmax);
371 msg.nm_lmsg.u.ms_result = burst_max;
373 port = cpu_portfn(pctx->poll_cpuid);
374 lwkt_domsg(port, &msg.nm_lmsg, 0);
375 return 0;
378 static int
379 sysctl_eachburst(SYSCTL_HANDLER_ARGS)
381 struct pollctx *pctx = arg1;
382 struct netmsg msg;
383 lwkt_port_t port;
384 uint32_t each_burst;
385 int error;
387 each_burst = pctx->poll_each_burst;
388 error = sysctl_handle_int(oidp, &each_burst, 0, req);
389 if (error || req->newptr == NULL)
390 return error;
392 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_eachburst);
393 msg.nm_lmsg.u.ms_result = each_burst;
395 port = cpu_portfn(pctx->poll_cpuid);
396 lwkt_domsg(port, &msg.nm_lmsg, 0);
397 return 0;
401 * Hook from polling systimer. Tries to schedule a netisr, but keeps
402 * track of lost ticks due to the previous handler taking too long.
403 * Normally, this should not happen, because polling handler should
404 * run for a short time. However, in some cases (e.g. when there are
405 * changes in link status etc.) the drivers take a very long time
406 * (even in the order of milliseconds) to reset and reconfigure the
407 * device, causing apparent lost polls.
409 * The first part of the code is just for debugging purposes, and tries
410 * to count how often hardclock ticks are shorter than they should,
411 * meaning either stray interrupts or delayed events.
413 * WARNING! called from fastint or IPI, the MP lock might not be held.
415 static void
416 pollclock(systimer_t info, struct intrframe *frame __unused)
418 struct pollctx *pctx = info->data;
419 struct timeval t;
420 int delta;
422 if (pctx->poll_handlers == 0)
423 return;
425 microuptime(&t);
426 delta = (t.tv_usec - pctx->prev_t.tv_usec) +
427 (t.tv_sec - pctx->prev_t.tv_sec)*1000000;
428 if (delta * pctx->pollhz < 500000)
429 pctx->short_ticks++;
430 else
431 pctx->prev_t = t;
433 if (pctx->pending_polls > 100) {
435 * Too much, assume it has stalled (not always true
436 * see comment above).
438 pctx->stalled++;
439 pctx->pending_polls = 0;
440 pctx->phase = 0;
443 if (pctx->phase <= 2) {
444 if (pctx->phase != 0)
445 pctx->suspect++;
446 pctx->phase = 1;
447 schedpoll(pctx);
448 pctx->phase = 2;
450 if (pctx->pending_polls++ > 0)
451 pctx->lost_polls++;
455 * netisr_pollmore is called after other netisr's, possibly scheduling
456 * another NETISR_POLL call, or adapting the burst size for the next cycle.
458 * It is very bad to fetch large bursts of packets from a single card at once,
459 * because the burst could take a long time to be completely processed, or
460 * could saturate the intermediate queue (ipintrq or similar) leading to
461 * losses or unfairness. To reduce the problem, and also to account better for
462 * time spent in network-related processing, we split the burst in smaller
463 * chunks of fixed size, giving control to the other netisr's between chunks.
464 * This helps in improving the fairness, reducing livelock (because we
465 * emulate more closely the "process to completion" that we have with
466 * fastforwarding) and accounting for the work performed in low level
467 * handling and forwarding.
470 /* ARGSUSED */
471 static void
472 netisr_pollmore(struct netmsg *msg)
474 struct pollctx *pctx;
475 struct timeval t;
476 int kern_load, cpuid;
477 uint32_t pending_polls;
479 cpuid = mycpu->gd_cpuid;
480 KKASSERT(cpuid < POLLCTX_MAX);
482 pctx = poll_context[cpuid];
483 KKASSERT(pctx != NULL);
484 KKASSERT(pctx->poll_cpuid == cpuid);
485 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
487 lwkt_replymsg(&msg->nm_lmsg, 0);
489 if (pctx->poll_handlers == 0)
490 return;
492 KASSERT(pctx->polling_enabled,
493 ("# of registered poll handlers are not zero, "
494 "but polling is not enabled\n"));
496 pctx->phase = 5;
497 if (pctx->residual_burst > 0) {
498 schedpoll(pctx);
499 /* will run immediately on return, followed by netisrs */
500 return;
502 /* here we can account time spent in netisr's in this tick */
503 microuptime(&t);
504 kern_load = (t.tv_usec - pctx->poll_start_t.tv_usec) +
505 (t.tv_sec - pctx->poll_start_t.tv_sec)*1000000; /* us */
506 kern_load = (kern_load * pctx->pollhz) / 10000; /* 0..100 */
507 if (kern_load > (100 - pctx->user_frac)) { /* try decrease ticks */
508 if (pctx->poll_burst > 1)
509 pctx->poll_burst--;
510 } else {
511 if (pctx->poll_burst < pctx->poll_burst_max)
512 pctx->poll_burst++;
515 crit_enter();
516 pctx->pending_polls--;
517 pending_polls = pctx->pending_polls;
518 crit_exit();
520 if (pending_polls == 0) { /* we are done */
521 pctx->phase = 0;
522 } else {
524 * Last cycle was long and caused us to miss one or more
525 * hardclock ticks. Restart processing again, but slightly
526 * reduce the burst size to prevent that this happens again.
528 pctx->poll_burst -= (pctx->poll_burst / 8);
529 if (pctx->poll_burst < 1)
530 pctx->poll_burst = 1;
531 schedpoll(pctx);
532 pctx->phase = 6;
537 * netisr_poll is scheduled by schedpoll when appropriate, typically once
538 * per polling systimer tick.
540 * Note that the message is replied immediately in order to allow a new
541 * ISR to be scheduled in the handler.
543 * XXX each registration should indicate whether it needs a critical
544 * section to operate.
546 /* ARGSUSED */
547 static void
548 netisr_poll(struct netmsg *msg)
550 struct pollctx *pctx;
551 int i, cycles, cpuid;
552 enum poll_cmd arg = POLL_ONLY;
554 cpuid = mycpu->gd_cpuid;
555 KKASSERT(cpuid < POLLCTX_MAX);
557 pctx = poll_context[cpuid];
558 KKASSERT(pctx != NULL);
559 KKASSERT(pctx->poll_cpuid == cpuid);
560 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
562 crit_enter();
563 lwkt_replymsg(&msg->nm_lmsg, 0);
564 crit_exit();
566 if (pctx->poll_handlers == 0)
567 return;
569 KASSERT(pctx->polling_enabled,
570 ("# of registered poll handlers are not zero, "
571 "but polling is not enabled\n"));
573 pctx->phase = 3;
574 if (pctx->residual_burst == 0) { /* first call in this tick */
575 microuptime(&pctx->poll_start_t);
577 if (pctx->reg_frac_count-- == 0) {
578 arg = POLL_AND_CHECK_STATUS;
579 pctx->reg_frac_count = pctx->reg_frac - 1;
582 pctx->residual_burst = pctx->poll_burst;
584 cycles = (pctx->residual_burst < pctx->poll_each_burst) ?
585 pctx->residual_burst : pctx->poll_each_burst;
586 pctx->residual_burst -= cycles;
588 for (i = 0 ; i < pctx->poll_handlers ; i++) {
589 struct ifnet *ifp = pctx->pr[i].ifp;
591 if (!lwkt_serialize_try(ifp->if_serializer))
592 continue;
594 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING|IFF_POLLING))
595 == (IFF_UP|IFF_RUNNING|IFF_POLLING))
596 ifp->if_poll(ifp, arg, cycles);
598 lwkt_serialize_exit(ifp->if_serializer);
601 schedpollmore(pctx);
602 pctx->phase = 4;
605 static void
606 poll_register(struct netmsg *msg)
608 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
609 struct pollctx *pctx;
610 int rc, cpuid;
612 cpuid = mycpu->gd_cpuid;
613 KKASSERT(cpuid < POLLCTX_MAX);
615 pctx = poll_context[cpuid];
616 KKASSERT(pctx != NULL);
617 KKASSERT(pctx->poll_cpuid == cpuid);
619 if (pctx->polling_enabled == 0) {
620 /* Polling disabled, cannot register */
621 rc = EOPNOTSUPP;
622 goto back;
626 * Check if there is room.
628 if (pctx->poll_handlers >= POLL_LIST_LEN) {
630 * List full, cannot register more entries.
631 * This should never happen; if it does, it is probably a
632 * broken driver trying to register multiple times. Checking
633 * this at runtime is expensive, and won't solve the problem
634 * anyways, so just report a few times and then give up.
636 static int verbose = 10; /* XXX */
637 if (verbose >0) {
638 kprintf("poll handlers list full, "
639 "maybe a broken driver ?\n");
640 verbose--;
642 rc = ENOMEM;
643 } else {
644 pctx->pr[pctx->poll_handlers].ifp = ifp;
645 pctx->poll_handlers++;
646 rc = 0;
648 if (pctx->poll_handlers == 1) {
649 KKASSERT(pctx->polling_enabled);
650 systimer_adjust_periodic(&pctx->pollclock,
651 pctx->pollhz);
654 back:
655 lwkt_replymsg(&msg->nm_lmsg, rc);
659 * Try to register routine for polling. Returns 1 if successful
660 * (and polling should be enabled), 0 otherwise.
662 * Called from mainline code only, not called from an interrupt.
665 ether_poll_register(struct ifnet *ifp)
667 if (poll_defcpu < 0)
668 return 0;
669 KKASSERT(poll_defcpu < POLLCTX_MAX);
671 return ether_pollcpu_register(ifp, poll_defcpu);
675 ether_pollcpu_register(struct ifnet *ifp, int cpuid)
677 struct netmsg msg;
678 lwkt_port_t port;
679 int rc;
681 if (ifp->if_poll == NULL) {
682 /* Device does not support polling */
683 return 0;
686 if (cpuid < 0 || cpuid >= POLLCTX_MAX)
687 return 0;
689 if (((1 << cpuid) & poll_cpumask) == 0) {
690 /* Polling is not supported on 'cpuid' */
691 return 0;
693 KKASSERT(poll_context[cpuid] != NULL);
696 * Attempt to register. Interlock with IFF_POLLING.
698 crit_enter(); /* XXX MP - not mp safe */
700 lwkt_serialize_enter(ifp->if_serializer);
701 if (ifp->if_flags & IFF_POLLING) {
702 /* Already polling */
703 KKASSERT(ifp->if_poll_cpuid >= 0);
704 lwkt_serialize_exit(ifp->if_serializer);
705 crit_exit();
706 return 0;
708 KKASSERT(ifp->if_poll_cpuid < 0);
709 ifp->if_flags |= IFF_POLLING;
710 ifp->if_poll_cpuid = cpuid;
711 if (ifp->if_flags & IFF_RUNNING)
712 ifp->if_poll(ifp, POLL_REGISTER, 0);
713 lwkt_serialize_exit(ifp->if_serializer);
715 netmsg_init(&msg, &curthread->td_msgport, 0, poll_register);
716 msg.nm_lmsg.u.ms_resultp = ifp;
718 port = cpu_portfn(cpuid);
719 lwkt_domsg(port, &msg.nm_lmsg, 0);
721 if (msg.nm_lmsg.ms_error) {
722 lwkt_serialize_enter(ifp->if_serializer);
723 ifp->if_flags &= ~IFF_POLLING;
724 ifp->if_poll_cpuid = -1;
725 if (ifp->if_flags & IFF_RUNNING)
726 ifp->if_poll(ifp, POLL_DEREGISTER, 0);
727 lwkt_serialize_exit(ifp->if_serializer);
728 rc = 0;
729 } else {
730 rc = 1;
733 crit_exit();
734 return rc;
737 static void
738 poll_deregister(struct netmsg *msg)
740 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
741 struct pollctx *pctx;
742 int rc, i, cpuid;
744 cpuid = mycpu->gd_cpuid;
745 KKASSERT(cpuid < POLLCTX_MAX);
747 pctx = poll_context[cpuid];
748 KKASSERT(pctx != NULL);
749 KKASSERT(pctx->poll_cpuid == cpuid);
751 for (i = 0 ; i < pctx->poll_handlers ; i++) {
752 if (pctx->pr[i].ifp == ifp) /* Found it */
753 break;
755 if (i == pctx->poll_handlers) {
756 kprintf("ether_poll_deregister: ifp not found!!!\n");
757 rc = ENOENT;
758 } else {
759 pctx->poll_handlers--;
760 if (i < pctx->poll_handlers) {
761 /* Last entry replaces this one. */
762 pctx->pr[i].ifp = pctx->pr[pctx->poll_handlers].ifp;
765 if (pctx->poll_handlers == 0) {
766 systimer_adjust_periodic(&pctx->pollclock, 1);
767 poll_reset_state(pctx);
769 rc = 0;
771 lwkt_replymsg(&msg->nm_lmsg, rc);
775 * Remove interface from the polling list. Occurs when polling is turned
776 * off. Called from mainline code only, not called from an interrupt.
779 ether_poll_deregister(struct ifnet *ifp)
781 struct netmsg msg;
782 lwkt_port_t port;
783 int rc, cpuid;
785 KKASSERT(ifp != NULL);
787 if (ifp->if_poll == NULL)
788 return 0;
790 crit_enter();
792 lwkt_serialize_enter(ifp->if_serializer);
793 if ((ifp->if_flags & IFF_POLLING) == 0) {
794 KKASSERT(ifp->if_poll_cpuid < 0);
795 lwkt_serialize_exit(ifp->if_serializer);
796 crit_exit();
797 return 0;
800 cpuid = ifp->if_poll_cpuid;
801 KKASSERT(cpuid >= 0);
802 KKASSERT(poll_context[cpuid] != NULL);
804 ifp->if_flags &= ~IFF_POLLING;
805 ifp->if_poll_cpuid = -1;
806 lwkt_serialize_exit(ifp->if_serializer);
808 netmsg_init(&msg, &curthread->td_msgport, 0, poll_deregister);
809 msg.nm_lmsg.u.ms_resultp = ifp;
811 port = cpu_portfn(cpuid);
812 lwkt_domsg(port, &msg.nm_lmsg, 0);
814 if (!msg.nm_lmsg.ms_error) {
815 lwkt_serialize_enter(ifp->if_serializer);
816 if (ifp->if_flags & IFF_RUNNING)
817 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
818 lwkt_serialize_exit(ifp->if_serializer);
819 rc = 1;
820 } else {
821 rc = 0;
824 crit_exit();
825 return rc;
828 static void
829 poll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent,
830 struct pollctx *pctx)
832 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "enable",
833 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_polling,
834 "I", "Polling enabled");
836 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "pollhz",
837 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_pollhz,
838 "I", "Device polling frequency");
840 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "reg_frac",
841 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_regfrac,
842 "IU", "Every this many cycles poll register");
844 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max",
845 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_burstmax,
846 "IU", "Max Polling burst size");
848 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst",
849 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_eachburst,
850 "IU", "Max size of each burst");
852 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD,
853 &pctx->phase, 0, "Polling phase");
855 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW,
856 &pctx->suspect, 0, "suspect event");
858 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW,
859 &pctx->stalled, 0, "potential stalls");
861 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD,
862 &pctx->poll_burst, 0, "Current polling burst size");
864 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW,
865 &pctx->user_frac, 0,
866 "Desired user fraction of cpu time");
868 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW,
869 &pctx->short_ticks, 0,
870 "Hardclock ticks shorter than they should be");
872 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW,
873 &pctx->lost_polls, 0,
874 "How many times we would have lost a poll tick");
876 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD,
877 &pctx->pending_polls, 0, "Do we need to poll again");
879 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD,
880 &pctx->residual_burst, 0,
881 "# of residual cycles in burst");
883 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD,
884 &pctx->poll_handlers, 0,
885 "Number of registered poll handlers");
888 static void
889 schedpoll_oncpu(struct pollctx *pctx, struct netmsg *msg, netisr_fn_t handler)
891 if (msg->nm_lmsg.ms_flags & MSGF_DONE) {
892 lwkt_port_t port;
894 netmsg_init(msg, &netisr_adone_rport, 0, handler);
895 #ifdef INVARIANTS
896 msg->nm_lmsg.u.ms_resultp = pctx;
897 #endif
898 port = cpu_portfn(mycpu->gd_cpuid);
899 lwkt_sendmsg(port, &msg->nm_lmsg);
903 static void
904 poll_sysctl_pollhz(struct netmsg *msg)
906 struct pollctx *pctx;
907 int cpuid;
909 cpuid = mycpu->gd_cpuid;
910 KKASSERT(cpuid < POLLCTX_MAX);
912 pctx = poll_context[cpuid];
913 KKASSERT(pctx != NULL);
914 KKASSERT(pctx->poll_cpuid == cpuid);
917 * If polling is disabled or there is no device registered,
918 * don't adjust polling systimer frequency.
919 * Polling systimer frequency will be adjusted once polling
920 * is enabled and there are registered devices.
922 pctx->pollhz = msg->nm_lmsg.u.ms_result;
923 if (pctx->polling_enabled && pctx->poll_handlers)
924 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
927 * Make sure that reg_frac and reg_frac_count are within valid range.
929 if (pctx->reg_frac > pctx->pollhz) {
930 pctx->reg_frac = pctx->pollhz;
931 if (pctx->reg_frac_count > pctx->reg_frac)
932 pctx->reg_frac_count = pctx->reg_frac - 1;
935 lwkt_replymsg(&msg->nm_lmsg, 0);
938 static void
939 poll_sysctl_polling(struct netmsg *msg)
941 struct pollctx *pctx;
942 int cpuid;
944 cpuid = mycpu->gd_cpuid;
945 KKASSERT(cpuid < POLLCTX_MAX);
947 pctx = poll_context[cpuid];
948 KKASSERT(pctx != NULL);
949 KKASSERT(pctx->poll_cpuid == cpuid);
952 * If polling is disabled or there is no device registered,
953 * cut the polling systimer frequency to 1hz.
955 pctx->polling_enabled = msg->nm_lmsg.u.ms_result;
956 if (pctx->polling_enabled && pctx->poll_handlers) {
957 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
958 } else {
959 systimer_adjust_periodic(&pctx->pollclock, 1);
960 poll_reset_state(pctx);
963 if (!pctx->polling_enabled && pctx->poll_handlers != 0) {
964 int i;
966 for (i = 0 ; i < pctx->poll_handlers ; i++) {
967 struct ifnet *ifp = pctx->pr[i].ifp;
969 lwkt_serialize_enter(ifp->if_serializer);
971 if ((ifp->if_flags & IFF_POLLING) == 0) {
972 KKASSERT(ifp->if_poll_cpuid < 0);
973 lwkt_serialize_exit(ifp->if_serializer);
974 continue;
976 ifp->if_flags &= ~IFF_POLLING;
977 ifp->if_poll_cpuid = -1;
980 * Only call the interface deregistration
981 * function if the interface is still
982 * running.
984 if (ifp->if_flags & IFF_RUNNING)
985 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
987 lwkt_serialize_exit(ifp->if_serializer);
989 pctx->poll_handlers = 0;
992 lwkt_replymsg(&msg->nm_lmsg, 0);
995 static void
996 poll_sysctl_regfrac(struct netmsg *msg)
998 struct pollctx *pctx;
999 uint32_t reg_frac;
1000 int cpuid;
1002 cpuid = mycpu->gd_cpuid;
1003 KKASSERT(cpuid < POLLCTX_MAX);
1005 pctx = poll_context[cpuid];
1006 KKASSERT(pctx != NULL);
1007 KKASSERT(pctx->poll_cpuid == cpuid);
1009 reg_frac = msg->nm_lmsg.u.ms_result;
1010 if (reg_frac > pctx->pollhz)
1011 reg_frac = pctx->pollhz;
1012 else if (reg_frac < 1)
1013 reg_frac = 1;
1015 pctx->reg_frac = reg_frac;
1016 if (pctx->reg_frac_count > pctx->reg_frac)
1017 pctx->reg_frac_count = pctx->reg_frac - 1;
1019 lwkt_replymsg(&msg->nm_lmsg, 0);
1022 static void
1023 poll_sysctl_burstmax(struct netmsg *msg)
1025 struct pollctx *pctx;
1026 int cpuid;
1028 cpuid = mycpu->gd_cpuid;
1029 KKASSERT(cpuid < POLLCTX_MAX);
1031 pctx = poll_context[cpuid];
1032 KKASSERT(pctx != NULL);
1033 KKASSERT(pctx->poll_cpuid == cpuid);
1035 pctx->poll_burst_max = msg->nm_lmsg.u.ms_result;
1036 if (pctx->poll_each_burst > pctx->poll_burst_max)
1037 pctx->poll_each_burst = pctx->poll_burst_max;
1038 if (pctx->poll_burst > pctx->poll_burst_max)
1039 pctx->poll_burst = pctx->poll_burst_max;
1040 if (pctx->residual_burst > pctx->poll_burst_max)
1041 pctx->residual_burst = pctx->poll_burst_max;
1043 lwkt_replymsg(&msg->nm_lmsg, 0);
1046 static void
1047 poll_sysctl_eachburst(struct netmsg *msg)
1049 struct pollctx *pctx;
1050 uint32_t each_burst;
1051 int cpuid;
1053 cpuid = mycpu->gd_cpuid;
1054 KKASSERT(cpuid < POLLCTX_MAX);
1056 pctx = poll_context[cpuid];
1057 KKASSERT(pctx != NULL);
1058 KKASSERT(pctx->poll_cpuid == cpuid);
1060 each_burst = msg->nm_lmsg.u.ms_result;
1061 if (each_burst > pctx->poll_burst_max)
1062 each_burst = pctx->poll_burst_max;
1063 else if (each_burst < 1)
1064 each_burst = 1;
1065 pctx->poll_each_burst = each_burst;
1067 lwkt_replymsg(&msg->nm_lmsg, 0);