Add tunable for each_burst.
[dragonfly.git] / sys / kern / kern_poll.c
blob5db31ed560fd8546a896d60ecf746479b261d1fc
1 /*-
2 * Copyright (c) 2001-2002 Luigi Rizzo
4 * Supported by: the Xorp Project (www.xorp.org)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
27 * $FreeBSD: src/sys/kern/kern_poll.c,v 1.2.2.4 2002/06/27 23:26:33 luigi Exp $
28 * $DragonFly: src/sys/kern/kern_poll.c,v 1.45 2008/04/30 09:30:59 sephe Exp $
31 #include "opt_polling.h"
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/socket.h> /* needed by net/if.h */
36 #include <sys/sysctl.h>
38 #include <sys/thread2.h>
39 #include <sys/msgport2.h>
41 #include <net/if.h> /* for IFF_* flags */
42 #include <net/netmsg2.h>
45 * Polling support for [network] device drivers.
47 * Drivers which support this feature try to register with the
48 * polling code.
50 * If registration is successful, the driver must disable interrupts,
51 * and further I/O is performed through the handler, which is invoked
52 * (at least once per clock tick) with 3 arguments: the "arg" passed at
53 * register time (a struct ifnet pointer), a command, and a "count" limit.
55 * The command can be one of the following:
56 * POLL_ONLY: quick move of "count" packets from input/output queues.
57 * POLL_AND_CHECK_STATUS: as above, plus check status registers or do
58 * other more expensive operations. This command is issued periodically
59 * but less frequently than POLL_ONLY.
60 * POLL_DEREGISTER: deregister and return to interrupt mode.
61 * POLL_REGISTER: register and disable interrupts
63 * The first two commands are only issued if the interface is marked as
64 * 'IFF_UP, IFF_RUNNING and IFF_POLLING', the last two only if IFF_RUNNING
65 * is set.
67 * The count limit specifies how much work the handler can do during the
68 * call -- typically this is the number of packets to be received, or
69 * transmitted, etc. (drivers are free to interpret this number, as long
70 * as the max time spent in the function grows roughly linearly with the
71 * count).
73 * Deregistration can be requested by the driver itself (typically in the
74 * *_stop() routine), or by the polling code, by invoking the handler.
76 * Polling can be enabled or disabled on particular CPU_X with the sysctl
77 * variable kern.polling.X.enable (default is 1, enabled)
79 * A second variable controls the sharing of CPU between polling/kernel
80 * network processing, and other activities (typically userlevel tasks):
81 * kern.polling.X.user_frac (between 0 and 100, default 50) sets the share
82 * of CPU allocated to user tasks. CPU is allocated proportionally to the
83 * shares, by dynamically adjusting the "count" (poll_burst).
85 * Other parameters can should be left to their default values.
86 * The following constraints hold
88 * 1 <= poll_burst <= poll_burst_max
89 * 1 <= poll_each_burst <= poll_burst_max
90 * MIN_POLL_BURST_MAX <= poll_burst_max <= MAX_POLL_BURST_MAX
93 #define MIN_POLL_BURST_MAX 10
94 #define MAX_POLL_BURST_MAX 1000
95 #define POLL_BURST_MAX 150 /* good for 100Mbit net and HZ=1000 */
96 #define POLL_EACH_BURST 5
98 #ifndef DEVICE_POLLING_FREQ_MAX
99 #define DEVICE_POLLING_FREQ_MAX 30000
100 #endif
101 #define DEVICE_POLLING_FREQ_DEFAULT 2000
103 #define POLL_LIST_LEN 128
104 struct pollrec {
105 struct ifnet *ifp;
108 #define POLLCTX_MAX 32
110 struct pollctx {
111 struct sysctl_ctx_list poll_sysctl_ctx;
112 struct sysctl_oid *poll_sysctl_tree;
114 uint32_t poll_burst; /* state */
115 uint32_t poll_each_burst; /* tunable */
116 uint32_t poll_burst_max; /* tunable */
117 uint32_t user_frac; /* tunable */
118 int reg_frac_count; /* state */
119 uint32_t reg_frac; /* tunable */
120 uint32_t short_ticks; /* statistics */
121 uint32_t lost_polls; /* statistics */
122 uint32_t pending_polls; /* state */
123 int residual_burst; /* state */
124 uint32_t phase; /* state */
125 uint32_t suspect; /* statistics */
126 uint32_t stalled; /* statistics */
127 struct timeval poll_start_t; /* state */
128 struct timeval prev_t; /* state */
130 uint32_t poll_handlers; /* next free entry in pr[]. */
131 struct pollrec pr[POLL_LIST_LEN];
133 int poll_cpuid;
134 struct systimer pollclock;
135 int polling_enabled; /* tunable */
136 int pollhz; /* tunable */
138 struct netmsg poll_netmsg;
139 struct netmsg poll_more_netmsg;
142 static struct pollctx *poll_context[POLLCTX_MAX];
144 SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0,
145 "Device polling parameters");
147 static int poll_defcpu = -1;
148 SYSCTL_INT(_kern_polling, OID_AUTO, defcpu, CTLFLAG_RD,
149 &poll_defcpu, 0, "default CPU to run device polling");
151 static uint32_t poll_cpumask0 = 0xffffffff;
152 TUNABLE_INT("kern.polling.cpumask", (int *)&poll_cpumask0);
154 static uint32_t poll_cpumask;
155 SYSCTL_INT(_kern_polling, OID_AUTO, cpumask, CTLFLAG_RD,
156 &poll_cpumask, 0, "CPUs that can run device polling");
158 static int polling_enabled = 1; /* global polling enable */
159 TUNABLE_INT("kern.polling.enable", &polling_enabled);
161 static int pollhz = DEVICE_POLLING_FREQ_DEFAULT;
162 TUNABLE_INT("kern.polling.pollhz", &pollhz);
164 static int poll_burst_max = POLL_BURST_MAX;
165 TUNABLE_INT("kern.polling.burst_max", &poll_burst_max);
167 static int poll_each_burst = POLL_EACH_BURST;
168 TUNABLE_INT("kern.polling.each_burst", &poll_each_burst);
170 /* Netisr handlers */
171 static void netisr_poll(struct netmsg *);
172 static void netisr_pollmore(struct netmsg *);
173 static void poll_register(struct netmsg *);
174 static void poll_deregister(struct netmsg *);
175 static void poll_sysctl_pollhz(struct netmsg *);
176 static void poll_sysctl_polling(struct netmsg *);
177 static void poll_sysctl_regfrac(struct netmsg *);
178 static void poll_sysctl_burstmax(struct netmsg *);
179 static void poll_sysctl_eachburst(struct netmsg *);
181 /* Systimer handler */
182 static void pollclock(systimer_t, struct intrframe *);
184 /* Sysctl handlers */
185 static int sysctl_pollhz(SYSCTL_HANDLER_ARGS);
186 static int sysctl_polling(SYSCTL_HANDLER_ARGS);
187 static int sysctl_regfrac(SYSCTL_HANDLER_ARGS);
188 static int sysctl_burstmax(SYSCTL_HANDLER_ARGS);
189 static int sysctl_eachburst(SYSCTL_HANDLER_ARGS);
190 static void poll_add_sysctl(struct sysctl_ctx_list *,
191 struct sysctl_oid_list *, struct pollctx *);
193 static void schedpoll_oncpu(struct pollctx *, struct netmsg *, netisr_fn_t);
195 void init_device_poll_pcpu(int); /* per-cpu init routine */
197 static __inline void
198 poll_reset_state(struct pollctx *pctx)
200 crit_enter();
201 pctx->poll_burst = 5;
202 pctx->reg_frac_count = 0;
203 pctx->pending_polls = 0;
204 pctx->residual_burst = 0;
205 pctx->phase = 0;
206 bzero(&pctx->poll_start_t, sizeof(pctx->poll_start_t));
207 bzero(&pctx->prev_t, sizeof(pctx->prev_t));
208 crit_exit();
212 * Initialize per-cpu polling(4) context. Called from kern_clock.c:
214 void
215 init_device_poll_pcpu(int cpuid)
217 struct pollctx *pctx;
218 char cpuid_str[3];
220 if (cpuid >= POLLCTX_MAX)
221 return;
223 if (((1 << cpuid) & poll_cpumask0) == 0)
224 return;
226 if (poll_burst_max < MIN_POLL_BURST_MAX)
227 poll_burst_max = MIN_POLL_BURST_MAX;
228 else if (poll_burst_max > MAX_POLL_BURST_MAX)
229 poll_burst_max = MAX_POLL_BURST_MAX;
231 if (poll_each_burst > poll_burst_max)
232 poll_each_burst = poll_burst_max;
234 poll_cpumask |= (1 << cpuid);
236 pctx = kmalloc(sizeof(*pctx), M_DEVBUF, M_WAITOK | M_ZERO);
238 pctx->poll_each_burst = poll_each_burst;
239 pctx->poll_burst_max = poll_burst_max;
240 pctx->user_frac = 50;
241 pctx->reg_frac = 20;
242 pctx->polling_enabled = polling_enabled;
243 pctx->pollhz = pollhz;
244 pctx->poll_cpuid = cpuid;
245 netmsg_init(&pctx->poll_netmsg, &netisr_adone_rport, 0, NULL);
246 netmsg_init(&pctx->poll_more_netmsg, &netisr_adone_rport, 0, NULL);
247 poll_reset_state(pctx);
249 KASSERT(cpuid < POLLCTX_MAX, ("cpu id must < %d", cpuid));
250 poll_context[cpuid] = pctx;
252 if (poll_defcpu < 0) {
253 poll_defcpu = cpuid;
256 * Initialize global sysctl nodes, for compat
258 poll_add_sysctl(NULL, SYSCTL_STATIC_CHILDREN(_kern_polling),
259 pctx);
263 * Initialize per-cpu sysctl nodes
265 ksnprintf(cpuid_str, sizeof(cpuid_str), "%d", pctx->poll_cpuid);
267 sysctl_ctx_init(&pctx->poll_sysctl_ctx);
268 pctx->poll_sysctl_tree = SYSCTL_ADD_NODE(&pctx->poll_sysctl_ctx,
269 SYSCTL_STATIC_CHILDREN(_kern_polling),
270 OID_AUTO, cpuid_str, CTLFLAG_RD, 0, "");
271 poll_add_sysctl(&pctx->poll_sysctl_ctx,
272 SYSCTL_CHILDREN(pctx->poll_sysctl_tree), pctx);
275 * Initialize systimer
277 systimer_init_periodic_nq(&pctx->pollclock, pollclock, pctx, 1);
280 static __inline void
281 schedpoll(struct pollctx *pctx)
283 crit_enter();
284 schedpoll_oncpu(pctx, &pctx->poll_netmsg, netisr_poll);
285 crit_exit();
288 static __inline void
289 schedpollmore(struct pollctx *pctx)
291 schedpoll_oncpu(pctx, &pctx->poll_more_netmsg, netisr_pollmore);
295 * Set the polling frequency
297 static int
298 sysctl_pollhz(SYSCTL_HANDLER_ARGS)
300 struct pollctx *pctx = arg1;
301 struct netmsg msg;
302 lwkt_port_t port;
303 int error, phz;
305 phz = pctx->pollhz;
306 error = sysctl_handle_int(oidp, &phz, 0, req);
307 if (error || req->newptr == NULL)
308 return error;
309 if (phz <= 0)
310 return EINVAL;
311 else if (phz > DEVICE_POLLING_FREQ_MAX)
312 phz = DEVICE_POLLING_FREQ_MAX;
314 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_pollhz);
315 msg.nm_lmsg.u.ms_result = phz;
317 port = cpu_portfn(pctx->poll_cpuid);
318 lwkt_domsg(port, &msg.nm_lmsg, 0);
319 return 0;
323 * Master enable.
325 static int
326 sysctl_polling(SYSCTL_HANDLER_ARGS)
328 struct pollctx *pctx = arg1;
329 struct netmsg msg;
330 lwkt_port_t port;
331 int error, enabled;
333 enabled = pctx->polling_enabled;
334 error = sysctl_handle_int(oidp, &enabled, 0, req);
335 if (error || req->newptr == NULL)
336 return error;
338 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_polling);
339 msg.nm_lmsg.u.ms_result = enabled;
341 port = cpu_portfn(pctx->poll_cpuid);
342 lwkt_domsg(port, &msg.nm_lmsg, 0);
343 return 0;
346 static int
347 sysctl_regfrac(SYSCTL_HANDLER_ARGS)
349 struct pollctx *pctx = arg1;
350 struct netmsg msg;
351 lwkt_port_t port;
352 uint32_t reg_frac;
353 int error;
355 reg_frac = pctx->reg_frac;
356 error = sysctl_handle_int(oidp, &reg_frac, 0, req);
357 if (error || req->newptr == NULL)
358 return error;
360 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_regfrac);
361 msg.nm_lmsg.u.ms_result = reg_frac;
363 port = cpu_portfn(pctx->poll_cpuid);
364 lwkt_domsg(port, &msg.nm_lmsg, 0);
365 return 0;
368 static int
369 sysctl_burstmax(SYSCTL_HANDLER_ARGS)
371 struct pollctx *pctx = arg1;
372 struct netmsg msg;
373 lwkt_port_t port;
374 uint32_t burst_max;
375 int error;
377 burst_max = pctx->poll_burst_max;
378 error = sysctl_handle_int(oidp, &burst_max, 0, req);
379 if (error || req->newptr == NULL)
380 return error;
381 if (burst_max < MIN_POLL_BURST_MAX)
382 burst_max = MIN_POLL_BURST_MAX;
383 else if (burst_max > MAX_POLL_BURST_MAX)
384 burst_max = MAX_POLL_BURST_MAX;
386 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_burstmax);
387 msg.nm_lmsg.u.ms_result = burst_max;
389 port = cpu_portfn(pctx->poll_cpuid);
390 lwkt_domsg(port, &msg.nm_lmsg, 0);
391 return 0;
394 static int
395 sysctl_eachburst(SYSCTL_HANDLER_ARGS)
397 struct pollctx *pctx = arg1;
398 struct netmsg msg;
399 lwkt_port_t port;
400 uint32_t each_burst;
401 int error;
403 each_burst = pctx->poll_each_burst;
404 error = sysctl_handle_int(oidp, &each_burst, 0, req);
405 if (error || req->newptr == NULL)
406 return error;
408 netmsg_init(&msg, &curthread->td_msgport, 0, poll_sysctl_eachburst);
409 msg.nm_lmsg.u.ms_result = each_burst;
411 port = cpu_portfn(pctx->poll_cpuid);
412 lwkt_domsg(port, &msg.nm_lmsg, 0);
413 return 0;
417 * Hook from polling systimer. Tries to schedule a netisr, but keeps
418 * track of lost ticks due to the previous handler taking too long.
419 * Normally, this should not happen, because polling handler should
420 * run for a short time. However, in some cases (e.g. when there are
421 * changes in link status etc.) the drivers take a very long time
422 * (even in the order of milliseconds) to reset and reconfigure the
423 * device, causing apparent lost polls.
425 * The first part of the code is just for debugging purposes, and tries
426 * to count how often hardclock ticks are shorter than they should,
427 * meaning either stray interrupts or delayed events.
429 * WARNING! called from fastint or IPI, the MP lock might not be held.
431 static void
432 pollclock(systimer_t info, struct intrframe *frame __unused)
434 struct pollctx *pctx = info->data;
435 struct timeval t;
436 int delta;
438 if (pctx->poll_handlers == 0)
439 return;
441 microuptime(&t);
442 delta = (t.tv_usec - pctx->prev_t.tv_usec) +
443 (t.tv_sec - pctx->prev_t.tv_sec)*1000000;
444 if (delta * pctx->pollhz < 500000)
445 pctx->short_ticks++;
446 else
447 pctx->prev_t = t;
449 if (pctx->pending_polls > 100) {
451 * Too much, assume it has stalled (not always true
452 * see comment above).
454 pctx->stalled++;
455 pctx->pending_polls = 0;
456 pctx->phase = 0;
459 if (pctx->phase <= 2) {
460 if (pctx->phase != 0)
461 pctx->suspect++;
462 pctx->phase = 1;
463 schedpoll(pctx);
464 pctx->phase = 2;
466 if (pctx->pending_polls++ > 0)
467 pctx->lost_polls++;
471 * netisr_pollmore is called after other netisr's, possibly scheduling
472 * another NETISR_POLL call, or adapting the burst size for the next cycle.
474 * It is very bad to fetch large bursts of packets from a single card at once,
475 * because the burst could take a long time to be completely processed leading
476 * to unfairness. To reduce the problem, and also to account better for time
477 * spent in network-related processing, we split the burst in smaller chunks
478 * of fixed size, giving control to the other netisr's between chunks. This
479 * helps in improving the fairness, reducing livelock (because we emulate more
480 * closely the "process to completion" that we have with fastforwarding) and
481 * accounting for the work performed in low level handling and forwarding.
484 /* ARGSUSED */
485 static void
486 netisr_pollmore(struct netmsg *msg)
488 struct pollctx *pctx;
489 struct timeval t;
490 int kern_load, cpuid;
491 uint32_t pending_polls;
493 cpuid = mycpu->gd_cpuid;
494 KKASSERT(cpuid < POLLCTX_MAX);
496 pctx = poll_context[cpuid];
497 KKASSERT(pctx != NULL);
498 KKASSERT(pctx->poll_cpuid == cpuid);
499 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
501 lwkt_replymsg(&msg->nm_lmsg, 0);
503 if (pctx->poll_handlers == 0)
504 return;
506 KASSERT(pctx->polling_enabled,
507 ("# of registered poll handlers are not zero, "
508 "but polling is not enabled\n"));
510 pctx->phase = 5;
511 if (pctx->residual_burst > 0) {
512 schedpoll(pctx);
513 /* will run immediately on return, followed by netisrs */
514 return;
516 /* here we can account time spent in netisr's in this tick */
517 microuptime(&t);
518 kern_load = (t.tv_usec - pctx->poll_start_t.tv_usec) +
519 (t.tv_sec - pctx->poll_start_t.tv_sec)*1000000; /* us */
520 kern_load = (kern_load * pctx->pollhz) / 10000; /* 0..100 */
521 if (kern_load > (100 - pctx->user_frac)) { /* try decrease ticks */
522 if (pctx->poll_burst > 1)
523 pctx->poll_burst--;
524 } else {
525 if (pctx->poll_burst < pctx->poll_burst_max)
526 pctx->poll_burst++;
529 crit_enter();
530 pctx->pending_polls--;
531 pending_polls = pctx->pending_polls;
532 crit_exit();
534 if (pending_polls == 0) { /* we are done */
535 pctx->phase = 0;
536 } else {
538 * Last cycle was long and caused us to miss one or more
539 * hardclock ticks. Restart processing again, but slightly
540 * reduce the burst size to prevent that this happens again.
542 pctx->poll_burst -= (pctx->poll_burst / 8);
543 if (pctx->poll_burst < 1)
544 pctx->poll_burst = 1;
545 schedpoll(pctx);
546 pctx->phase = 6;
551 * netisr_poll is scheduled by schedpoll when appropriate, typically once
552 * per polling systimer tick.
554 * Note that the message is replied immediately in order to allow a new
555 * ISR to be scheduled in the handler.
557 * XXX each registration should indicate whether it needs a critical
558 * section to operate.
560 /* ARGSUSED */
561 static void
562 netisr_poll(struct netmsg *msg)
564 struct pollctx *pctx;
565 int i, cycles, cpuid;
566 enum poll_cmd arg = POLL_ONLY;
568 cpuid = mycpu->gd_cpuid;
569 KKASSERT(cpuid < POLLCTX_MAX);
571 pctx = poll_context[cpuid];
572 KKASSERT(pctx != NULL);
573 KKASSERT(pctx->poll_cpuid == cpuid);
574 KKASSERT(pctx == msg->nm_lmsg.u.ms_resultp);
576 crit_enter();
577 lwkt_replymsg(&msg->nm_lmsg, 0);
578 crit_exit();
580 if (pctx->poll_handlers == 0)
581 return;
583 KASSERT(pctx->polling_enabled,
584 ("# of registered poll handlers are not zero, "
585 "but polling is not enabled\n"));
587 pctx->phase = 3;
588 if (pctx->residual_burst == 0) { /* first call in this tick */
589 microuptime(&pctx->poll_start_t);
591 if (pctx->reg_frac_count-- == 0) {
592 arg = POLL_AND_CHECK_STATUS;
593 pctx->reg_frac_count = pctx->reg_frac - 1;
596 pctx->residual_burst = pctx->poll_burst;
598 cycles = (pctx->residual_burst < pctx->poll_each_burst) ?
599 pctx->residual_burst : pctx->poll_each_burst;
600 pctx->residual_burst -= cycles;
602 for (i = 0 ; i < pctx->poll_handlers ; i++) {
603 struct ifnet *ifp = pctx->pr[i].ifp;
605 if (!lwkt_serialize_try(ifp->if_serializer))
606 continue;
608 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING|IFF_POLLING))
609 == (IFF_UP|IFF_RUNNING|IFF_POLLING))
610 ifp->if_poll(ifp, arg, cycles);
612 lwkt_serialize_exit(ifp->if_serializer);
615 schedpollmore(pctx);
616 pctx->phase = 4;
619 static void
620 poll_register(struct netmsg *msg)
622 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
623 struct pollctx *pctx;
624 int rc, cpuid;
626 cpuid = mycpu->gd_cpuid;
627 KKASSERT(cpuid < POLLCTX_MAX);
629 pctx = poll_context[cpuid];
630 KKASSERT(pctx != NULL);
631 KKASSERT(pctx->poll_cpuid == cpuid);
633 if (pctx->polling_enabled == 0) {
634 /* Polling disabled, cannot register */
635 rc = EOPNOTSUPP;
636 goto back;
640 * Check if there is room.
642 if (pctx->poll_handlers >= POLL_LIST_LEN) {
644 * List full, cannot register more entries.
645 * This should never happen; if it does, it is probably a
646 * broken driver trying to register multiple times. Checking
647 * this at runtime is expensive, and won't solve the problem
648 * anyways, so just report a few times and then give up.
650 static int verbose = 10; /* XXX */
651 if (verbose >0) {
652 kprintf("poll handlers list full, "
653 "maybe a broken driver ?\n");
654 verbose--;
656 rc = ENOMEM;
657 } else {
658 pctx->pr[pctx->poll_handlers].ifp = ifp;
659 pctx->poll_handlers++;
660 rc = 0;
662 if (pctx->poll_handlers == 1) {
663 KKASSERT(pctx->polling_enabled);
664 systimer_adjust_periodic(&pctx->pollclock,
665 pctx->pollhz);
668 back:
669 lwkt_replymsg(&msg->nm_lmsg, rc);
673 * Try to register routine for polling. Returns 1 if successful
674 * (and polling should be enabled), 0 otherwise.
676 * Called from mainline code only, not called from an interrupt.
679 ether_poll_register(struct ifnet *ifp)
681 if (poll_defcpu < 0)
682 return 0;
683 KKASSERT(poll_defcpu < POLLCTX_MAX);
685 return ether_pollcpu_register(ifp, poll_defcpu);
689 ether_pollcpu_register(struct ifnet *ifp, int cpuid)
691 struct netmsg msg;
692 lwkt_port_t port;
693 int rc;
695 if (ifp->if_poll == NULL) {
696 /* Device does not support polling */
697 return 0;
700 if (cpuid < 0 || cpuid >= POLLCTX_MAX)
701 return 0;
703 if (((1 << cpuid) & poll_cpumask) == 0) {
704 /* Polling is not supported on 'cpuid' */
705 return 0;
707 KKASSERT(poll_context[cpuid] != NULL);
710 * Attempt to register. Interlock with IFF_POLLING.
712 crit_enter(); /* XXX MP - not mp safe */
714 lwkt_serialize_enter(ifp->if_serializer);
715 if (ifp->if_flags & IFF_POLLING) {
716 /* Already polling */
717 KKASSERT(ifp->if_poll_cpuid >= 0);
718 lwkt_serialize_exit(ifp->if_serializer);
719 crit_exit();
720 return 0;
722 KKASSERT(ifp->if_poll_cpuid < 0);
723 ifp->if_flags |= IFF_POLLING;
724 ifp->if_poll_cpuid = cpuid;
725 if (ifp->if_flags & IFF_RUNNING)
726 ifp->if_poll(ifp, POLL_REGISTER, 0);
727 lwkt_serialize_exit(ifp->if_serializer);
729 netmsg_init(&msg, &curthread->td_msgport, 0, poll_register);
730 msg.nm_lmsg.u.ms_resultp = ifp;
732 port = cpu_portfn(cpuid);
733 lwkt_domsg(port, &msg.nm_lmsg, 0);
735 if (msg.nm_lmsg.ms_error) {
736 lwkt_serialize_enter(ifp->if_serializer);
737 ifp->if_flags &= ~IFF_POLLING;
738 ifp->if_poll_cpuid = -1;
739 if (ifp->if_flags & IFF_RUNNING)
740 ifp->if_poll(ifp, POLL_DEREGISTER, 0);
741 lwkt_serialize_exit(ifp->if_serializer);
742 rc = 0;
743 } else {
744 rc = 1;
747 crit_exit();
748 return rc;
751 static void
752 poll_deregister(struct netmsg *msg)
754 struct ifnet *ifp = msg->nm_lmsg.u.ms_resultp;
755 struct pollctx *pctx;
756 int rc, i, cpuid;
758 cpuid = mycpu->gd_cpuid;
759 KKASSERT(cpuid < POLLCTX_MAX);
761 pctx = poll_context[cpuid];
762 KKASSERT(pctx != NULL);
763 KKASSERT(pctx->poll_cpuid == cpuid);
765 for (i = 0 ; i < pctx->poll_handlers ; i++) {
766 if (pctx->pr[i].ifp == ifp) /* Found it */
767 break;
769 if (i == pctx->poll_handlers) {
770 kprintf("ether_poll_deregister: ifp not found!!!\n");
771 rc = ENOENT;
772 } else {
773 pctx->poll_handlers--;
774 if (i < pctx->poll_handlers) {
775 /* Last entry replaces this one. */
776 pctx->pr[i].ifp = pctx->pr[pctx->poll_handlers].ifp;
779 if (pctx->poll_handlers == 0) {
780 systimer_adjust_periodic(&pctx->pollclock, 1);
781 poll_reset_state(pctx);
783 rc = 0;
785 lwkt_replymsg(&msg->nm_lmsg, rc);
789 * Remove interface from the polling list. Occurs when polling is turned
790 * off. Called from mainline code only, not called from an interrupt.
793 ether_poll_deregister(struct ifnet *ifp)
795 struct netmsg msg;
796 lwkt_port_t port;
797 int rc, cpuid;
799 KKASSERT(ifp != NULL);
801 if (ifp->if_poll == NULL)
802 return 0;
804 crit_enter();
806 lwkt_serialize_enter(ifp->if_serializer);
807 if ((ifp->if_flags & IFF_POLLING) == 0) {
808 KKASSERT(ifp->if_poll_cpuid < 0);
809 lwkt_serialize_exit(ifp->if_serializer);
810 crit_exit();
811 return 0;
814 cpuid = ifp->if_poll_cpuid;
815 KKASSERT(cpuid >= 0);
816 KKASSERT(poll_context[cpuid] != NULL);
818 ifp->if_flags &= ~IFF_POLLING;
819 ifp->if_poll_cpuid = -1;
820 lwkt_serialize_exit(ifp->if_serializer);
822 netmsg_init(&msg, &curthread->td_msgport, 0, poll_deregister);
823 msg.nm_lmsg.u.ms_resultp = ifp;
825 port = cpu_portfn(cpuid);
826 lwkt_domsg(port, &msg.nm_lmsg, 0);
828 if (!msg.nm_lmsg.ms_error) {
829 lwkt_serialize_enter(ifp->if_serializer);
830 if (ifp->if_flags & IFF_RUNNING)
831 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
832 lwkt_serialize_exit(ifp->if_serializer);
833 rc = 1;
834 } else {
835 rc = 0;
838 crit_exit();
839 return rc;
842 static void
843 poll_add_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent,
844 struct pollctx *pctx)
846 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "enable",
847 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_polling,
848 "I", "Polling enabled");
850 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "pollhz",
851 CTLTYPE_INT | CTLFLAG_RW, pctx, 0, sysctl_pollhz,
852 "I", "Device polling frequency");
854 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "reg_frac",
855 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_regfrac,
856 "IU", "Every this many cycles poll register");
858 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "burst_max",
859 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_burstmax,
860 "IU", "Max Polling burst size");
862 SYSCTL_ADD_PROC(ctx, parent, OID_AUTO, "each_burst",
863 CTLTYPE_UINT | CTLFLAG_RW, pctx, 0, sysctl_eachburst,
864 "IU", "Max size of each burst");
866 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "phase", CTLFLAG_RD,
867 &pctx->phase, 0, "Polling phase");
869 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "suspect", CTLFLAG_RW,
870 &pctx->suspect, 0, "suspect event");
872 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "stalled", CTLFLAG_RW,
873 &pctx->stalled, 0, "potential stalls");
875 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "burst", CTLFLAG_RD,
876 &pctx->poll_burst, 0, "Current polling burst size");
878 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "user_frac", CTLFLAG_RW,
879 &pctx->user_frac, 0,
880 "Desired user fraction of cpu time");
882 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "short_ticks", CTLFLAG_RW,
883 &pctx->short_ticks, 0,
884 "Hardclock ticks shorter than they should be");
886 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "lost_polls", CTLFLAG_RW,
887 &pctx->lost_polls, 0,
888 "How many times we would have lost a poll tick");
890 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "pending_polls", CTLFLAG_RD,
891 &pctx->pending_polls, 0, "Do we need to poll again");
893 SYSCTL_ADD_INT(ctx, parent, OID_AUTO, "residual_burst", CTLFLAG_RD,
894 &pctx->residual_burst, 0,
895 "# of residual cycles in burst");
897 SYSCTL_ADD_UINT(ctx, parent, OID_AUTO, "handlers", CTLFLAG_RD,
898 &pctx->poll_handlers, 0,
899 "Number of registered poll handlers");
902 static void
903 schedpoll_oncpu(struct pollctx *pctx, struct netmsg *msg, netisr_fn_t handler)
905 if (msg->nm_lmsg.ms_flags & MSGF_DONE) {
906 lwkt_port_t port;
908 netmsg_init(msg, &netisr_adone_rport, 0, handler);
909 #ifdef INVARIANTS
910 msg->nm_lmsg.u.ms_resultp = pctx;
911 #endif
912 port = cpu_portfn(mycpu->gd_cpuid);
913 lwkt_sendmsg(port, &msg->nm_lmsg);
917 static void
918 poll_sysctl_pollhz(struct netmsg *msg)
920 struct pollctx *pctx;
921 int cpuid;
923 cpuid = mycpu->gd_cpuid;
924 KKASSERT(cpuid < POLLCTX_MAX);
926 pctx = poll_context[cpuid];
927 KKASSERT(pctx != NULL);
928 KKASSERT(pctx->poll_cpuid == cpuid);
931 * If polling is disabled or there is no device registered,
932 * don't adjust polling systimer frequency.
933 * Polling systimer frequency will be adjusted once polling
934 * is enabled and there are registered devices.
936 pctx->pollhz = msg->nm_lmsg.u.ms_result;
937 if (pctx->polling_enabled && pctx->poll_handlers)
938 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
941 * Make sure that reg_frac and reg_frac_count are within valid range.
943 if (pctx->reg_frac > pctx->pollhz) {
944 pctx->reg_frac = pctx->pollhz;
945 if (pctx->reg_frac_count > pctx->reg_frac)
946 pctx->reg_frac_count = pctx->reg_frac - 1;
949 lwkt_replymsg(&msg->nm_lmsg, 0);
952 static void
953 poll_sysctl_polling(struct netmsg *msg)
955 struct pollctx *pctx;
956 int cpuid;
958 cpuid = mycpu->gd_cpuid;
959 KKASSERT(cpuid < POLLCTX_MAX);
961 pctx = poll_context[cpuid];
962 KKASSERT(pctx != NULL);
963 KKASSERT(pctx->poll_cpuid == cpuid);
966 * If polling is disabled or there is no device registered,
967 * cut the polling systimer frequency to 1hz.
969 pctx->polling_enabled = msg->nm_lmsg.u.ms_result;
970 if (pctx->polling_enabled && pctx->poll_handlers) {
971 systimer_adjust_periodic(&pctx->pollclock, pctx->pollhz);
972 } else {
973 systimer_adjust_periodic(&pctx->pollclock, 1);
974 poll_reset_state(pctx);
977 if (!pctx->polling_enabled && pctx->poll_handlers != 0) {
978 int i;
980 for (i = 0 ; i < pctx->poll_handlers ; i++) {
981 struct ifnet *ifp = pctx->pr[i].ifp;
983 lwkt_serialize_enter(ifp->if_serializer);
985 if ((ifp->if_flags & IFF_POLLING) == 0) {
986 KKASSERT(ifp->if_poll_cpuid < 0);
987 lwkt_serialize_exit(ifp->if_serializer);
988 continue;
990 ifp->if_flags &= ~IFF_POLLING;
991 ifp->if_poll_cpuid = -1;
994 * Only call the interface deregistration
995 * function if the interface is still
996 * running.
998 if (ifp->if_flags & IFF_RUNNING)
999 ifp->if_poll(ifp, POLL_DEREGISTER, 1);
1001 lwkt_serialize_exit(ifp->if_serializer);
1003 pctx->poll_handlers = 0;
1006 lwkt_replymsg(&msg->nm_lmsg, 0);
1009 static void
1010 poll_sysctl_regfrac(struct netmsg *msg)
1012 struct pollctx *pctx;
1013 uint32_t reg_frac;
1014 int cpuid;
1016 cpuid = mycpu->gd_cpuid;
1017 KKASSERT(cpuid < POLLCTX_MAX);
1019 pctx = poll_context[cpuid];
1020 KKASSERT(pctx != NULL);
1021 KKASSERT(pctx->poll_cpuid == cpuid);
1023 reg_frac = msg->nm_lmsg.u.ms_result;
1024 if (reg_frac > pctx->pollhz)
1025 reg_frac = pctx->pollhz;
1026 else if (reg_frac < 1)
1027 reg_frac = 1;
1029 pctx->reg_frac = reg_frac;
1030 if (pctx->reg_frac_count > pctx->reg_frac)
1031 pctx->reg_frac_count = pctx->reg_frac - 1;
1033 lwkt_replymsg(&msg->nm_lmsg, 0);
1036 static void
1037 poll_sysctl_burstmax(struct netmsg *msg)
1039 struct pollctx *pctx;
1040 int cpuid;
1042 cpuid = mycpu->gd_cpuid;
1043 KKASSERT(cpuid < POLLCTX_MAX);
1045 pctx = poll_context[cpuid];
1046 KKASSERT(pctx != NULL);
1047 KKASSERT(pctx->poll_cpuid == cpuid);
1049 pctx->poll_burst_max = msg->nm_lmsg.u.ms_result;
1050 if (pctx->poll_each_burst > pctx->poll_burst_max)
1051 pctx->poll_each_burst = pctx->poll_burst_max;
1052 if (pctx->poll_burst > pctx->poll_burst_max)
1053 pctx->poll_burst = pctx->poll_burst_max;
1054 if (pctx->residual_burst > pctx->poll_burst_max)
1055 pctx->residual_burst = pctx->poll_burst_max;
1057 lwkt_replymsg(&msg->nm_lmsg, 0);
1060 static void
1061 poll_sysctl_eachburst(struct netmsg *msg)
1063 struct pollctx *pctx;
1064 uint32_t each_burst;
1065 int cpuid;
1067 cpuid = mycpu->gd_cpuid;
1068 KKASSERT(cpuid < POLLCTX_MAX);
1070 pctx = poll_context[cpuid];
1071 KKASSERT(pctx != NULL);
1072 KKASSERT(pctx->poll_cpuid == cpuid);
1074 each_burst = msg->nm_lmsg.u.ms_result;
1075 if (each_burst > pctx->poll_burst_max)
1076 each_burst = pctx->poll_burst_max;
1077 else if (each_burst < 1)
1078 each_burst = 1;
1079 pctx->poll_each_burst = each_burst;
1081 lwkt_replymsg(&msg->nm_lmsg, 0);