sys/net/netisr.c

   1 /*
   2  * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved.
   3  * Copyright (c) 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
   4  * Copyright (c) 2003 Jonathan Lemon.  All rights reserved.
   5  * Copyright (c) 2003, 2004 The DragonFly Project.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to The DragonFly Project
   8  * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon.
   9  *
  10  * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright
  11  * into this one around July 8 2004.
  12  *
  13  * Redistribution and use in source and binary forms, with or without
  14  * modification, are permitted provided that the following conditions
  15  * are met:
  16  * 1. Redistributions of source code must retain the above copyright
  17  *    notice, this list of conditions and the following disclaimer.
  18  * 2. Redistributions in binary form must reproduce the above copyright
  19  *    notice, this list of conditions and the following disclaimer in the
  20  *    documentation and/or other materials provided with the distribution.
  21  * 3. Neither the name of The DragonFly Project nor the names of its
  22  *    contributors may be used to endorse or promote products derived
  23  *    from this software without specific, prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  29  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  34  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  35  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  */
  38
  39 #include <sys/param.h>
  40 #include <sys/systm.h>
  41 #include <sys/kernel.h>
  42 #include <sys/malloc.h>
  43 #include <sys/msgport.h>
  44 #include <sys/proc.h>
  45 #include <sys/interrupt.h>
  46 #include <sys/socket.h>
  47 #include <sys/sysctl.h>
  48 #include <sys/socketvar.h>
  49 #include <net/if.h>
  50 #include <net/if_var.h>
  51 #include <net/netisr2.h>
  52 #include <machine/cpufunc.h>
  53 #include <machine/smp.h>
  54
  55 #include <sys/thread2.h>
  56 #include <sys/msgport2.h>
  57 #include <net/netmsg2.h>
  58 #include <sys/mplock2.h>
  59
  60 static void netmsg_service_port_init(lwkt_port_t);
  61 static void netmsg_service_loop(void *arg);
  62 static void netisr_hashfn0(struct mbuf **mp, int hoff);
  63 static void netisr_nohashck(struct mbuf *, const struct pktinfo *);
  64
  65 struct netmsg_port_registration {
  66         TAILQ_ENTRY(netmsg_port_registration) npr_entry;
  67         lwkt_port_t     npr_port;
  68 };
  69
  70 struct netmsg_rollup {
  71         TAILQ_ENTRY(netmsg_rollup) ru_entry;
  72         netisr_ru_t     ru_func;
  73         int             ru_prio;
  74 };
  75
  76 struct netmsg_barrier {
  77         struct netmsg_base      base;
  78         volatile cpumask_t      *br_cpumask;
  79         volatile uint32_t       br_done;
  80 };
  81
  82 #define NETISR_BR_NOTDONE       0x1
  83 #define NETISR_BR_WAITDONE      0x80000000
  84
  85 struct netisr_barrier {
  86         struct netmsg_barrier   *br_msgs[MAXCPU];
  87         int                     br_isset;
  88 };
  89
  90 void *netlastfunc[MAXCPU];
  91 static struct netisr netisrs[NETISR_MAX];
  92 static TAILQ_HEAD(,netmsg_port_registration) netreglist;
  93 static TAILQ_HEAD(,netmsg_rollup) netrulist;
  94
  95 /* Per-CPU thread to handle any protocol.  */
  96 struct thread *netisr_threads[MAXCPU];
  97
  98 lwkt_port netisr_afree_rport;
  99 lwkt_port netisr_afree_free_so_rport;
 100 lwkt_port netisr_adone_rport;
 101 lwkt_port netisr_apanic_rport;
 102 lwkt_port netisr_sync_port;
 103
 104 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t);
 105
 106 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr");
 107
 108 static int netisr_rollup_limit = 32;
 109 SYSCTL_INT(_net_netisr, OID_AUTO, rollup_limit, CTLFLAG_RW,
 110         &netisr_rollup_limit, 0, "Message to process before rollup");
 111
 112 int netisr_ncpus;
 113 TUNABLE_INT("net.netisr.ncpus", &netisr_ncpus);
 114 SYSCTL_INT(_net_netisr, OID_AUTO, ncpus, CTLFLAG_RD,
 115         &netisr_ncpus, 0, "# of CPUs to handle network messages");
 116
 117 /*
 118  * netisr_afree_rport replymsg function, only used to handle async
 119  * messages which the sender has abandoned to their fate.
 120  */
 121 static void
 122 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg)
 123 {
 124         kfree(msg, M_LWKTMSG);
 125 }
 126
 127 static void
 128 netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg)
 129 {
 130         sofree(((netmsg_t)msg)->base.nm_so);
 131         kfree(msg, M_LWKTMSG);
 132 }
 133
 134 /*
 135  * We need a custom putport function to handle the case where the
 136  * message target is the current thread's message port.  This case
 137  * can occur when the TCP or UDP stack does a direct callback to NFS and NFS
 138  * then turns around and executes a network operation synchronously.
 139  *
 140  * To prevent deadlocking, we must execute these self-referential messages
 141  * synchronously, effectively turning the message into a glorified direct
 142  * procedure call back into the protocol stack.  The operation must be
 143  * complete on return or we will deadlock, so panic if it isn't.
 144  *
 145  * However, the target function is under no obligation to immediately
 146  * reply the message.  It may forward it elsewhere.
 147  */
 148 static int
 149 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg)
 150 {
 151         netmsg_base_t nmsg = (void *)lmsg;
 152
 153         if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) {
 154                 nmsg->nm_dispatch((netmsg_t)nmsg);
 155                 return(EASYNC);
 156         } else {
 157                 return(netmsg_fwd_port_fn(port, lmsg));
 158         }
 159 }
 160
 161 /*
 162  * UNIX DOMAIN sockets still have to run their uipc functions synchronously,
 163  * because they depend on the user proc context for a number of things
 164  * (like creds) which we have not yet incorporated into the message structure.
 165  *
 166  * However, we maintain or message/port abstraction.  Having a special
 167  * synchronous port which runs the commands synchronously gives us the
 168  * ability to serialize operations in one place later on when we start
 169  * removing the BGL.
 170  */
 171 static int
 172 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg)
 173 {
 174         netmsg_base_t nmsg = (void *)lmsg;
 175
 176         KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0);
 177
 178         lmsg->ms_target_port = port;    /* required for abort */
 179         nmsg->nm_dispatch((netmsg_t)nmsg);
 180         return(EASYNC);
 181 }
 182
 183 static void
 184 netisr_init(void)
 185 {
 186         int i;
 187
 188         if (netisr_ncpus <= 0) {
 189                 /* Default. */
 190                 netisr_ncpus = ncpus2;
 191         } else if (netisr_ncpus > ncpus) {
 192                 netisr_ncpus = ncpus;
 193         }
 194         if (netisr_ncpus > NETISR_CPUMAX)
 195                 netisr_ncpus = NETISR_CPUMAX;
 196
 197         TAILQ_INIT(&netreglist);
 198         TAILQ_INIT(&netrulist);
 199
 200         /*
 201          * Create default per-cpu threads for generic protocol handling.
 202          */
 203         for (i = 0; i < ncpus; ++i) {
 204                 lwkt_create(netmsg_service_loop, NULL, &netisr_threads[i], NULL,
 205                             TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU,
 206                             i, "netisr %d", i);
 207                 netmsg_service_port_init(&netisr_threads[i]->td_msgport);
 208                 lwkt_schedule(netisr_threads[i]);
 209         }
 210
 211         /*
 212          * The netisr_afree_rport is a special reply port which automatically
 213          * frees the replied message.  The netisr_adone_rport simply marks
 214          * the message as being done.  The netisr_apanic_rport panics if
 215          * the message is replied to.
 216          */
 217         lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply);
 218         lwkt_initport_replyonly(&netisr_afree_free_so_rport,
 219                                 netisr_autofree_free_so_reply);
 220         lwkt_initport_replyonly_null(&netisr_adone_rport);
 221         lwkt_initport_panic(&netisr_apanic_rport);
 222
 223         /*
 224          * The netisr_syncport is a special port which executes the message
 225          * synchronously and waits for it if EASYNC is returned.
 226          */
 227         lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport);
 228 }
 229 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL);
 230
 231 /*
 232  * Finish initializing the message port for a netmsg service.  This also
 233  * registers the port for synchronous cleanup operations such as when an
 234  * ifnet is being destroyed.  There is no deregistration API yet.
 235  */
 236 static void
 237 netmsg_service_port_init(lwkt_port_t port)
 238 {
 239         struct netmsg_port_registration *reg;
 240
 241         /*
 242          * Override the putport function.  Our custom function checks for
 243          * self-references and executes such commands synchronously.
 244          */
 245         if (netmsg_fwd_port_fn == NULL)
 246                 netmsg_fwd_port_fn = port->mp_putport;
 247         KKASSERT(netmsg_fwd_port_fn == port->mp_putport);
 248         port->mp_putport = netmsg_put_port;
 249
 250         /*
 251          * Keep track of ports using the netmsg API so we can synchronize
 252          * certain operations (such as freeing an ifnet structure) across all
 253          * consumers.
 254          */
 255         reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO);
 256         reg->npr_port = port;
 257         TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry);
 258 }
 259
 260 /*
 261  * This function synchronizes the caller with all netmsg services.  For
 262  * example, if an interface is being removed we must make sure that all
 263  * packets related to that interface complete processing before the structure
 264  * can actually be freed.  This sort of synchronization is an alternative to
 265  * ref-counting the netif, removing the ref counting overhead in favor of
 266  * placing additional overhead in the netif freeing sequence (where it is
 267  * inconsequential).
 268  */
 269 void
 270 netmsg_service_sync(void)
 271 {
 272         struct netmsg_port_registration *reg;
 273         struct netmsg_base smsg;
 274
 275         netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler);
 276
 277         TAILQ_FOREACH(reg, &netreglist, npr_entry) {
 278                 lwkt_domsg(reg->npr_port, &smsg.lmsg, 0);
 279         }
 280 }
 281
 282 /*
 283  * The netmsg function simply replies the message.  API semantics require
 284  * EASYNC to be returned if the netmsg function disposes of the message.
 285  */
 286 void
 287 netmsg_sync_handler(netmsg_t msg)
 288 {
 289         lwkt_replymsg(&msg->lmsg, 0);
 290 }
 291
 292 /*
 293  * Generic netmsg service loop.  Some protocols may roll their own but all
 294  * must do the basic command dispatch function call done here.
 295  */
 296 static void
 297 netmsg_service_loop(void *arg)
 298 {
 299         struct netmsg_rollup *ru;
 300         netmsg_base_t msg;
 301         thread_t td = curthread;
 302         int limit;
 303
 304         td->td_type = TD_TYPE_NETISR;
 305
 306         while ((msg = lwkt_waitport(&td->td_msgport, 0))) {
 307                 /*
 308                  * Run up to 512 pending netmsgs.
 309                  */
 310                 limit = netisr_rollup_limit;
 311                 do {
 312                         KASSERT(msg->nm_dispatch != NULL,
 313                                 ("netmsg_service isr %d badmsg",
 314                                 msg->lmsg.u.ms_result));
 315                         /*
 316                          * Don't match so_port, if the msg explicitly
 317                          * asks us to ignore its so_port.
 318                          */
 319                         if ((msg->lmsg.ms_flags & MSGF_IGNSOPORT) == 0 &&
 320                             msg->nm_so &&
 321                             msg->nm_so->so_port != &td->td_msgport) {
 322                                 /*
 323                                  * Sockets undergoing connect or disconnect
 324                                  * ops can change ports on us.  Chase the
 325                                  * port.
 326                                  */
 327 #ifdef foo
 328                                 /*
 329                                  * This could be quite common for protocols
 330                                  * which support asynchronous pru_connect,
 331                                  * e.g. TCP, so kprintf socket port chasing
 332                                  * could be too verbose for the console.
 333                                  */
 334                                 kprintf("%s: Warning, port changed so=%p\n",
 335                                         __func__, msg->nm_so);
 336 #endif
 337                                 lwkt_forwardmsg(msg->nm_so->so_port,
 338                                                 &msg->lmsg);
 339                         } else {
 340                                 /*
 341                                  * We are on the correct port, dispatch it.
 342                                  */
 343                                 netlastfunc[mycpuid] = msg->nm_dispatch;
 344                                 msg->nm_dispatch((netmsg_t)msg);
 345                         }
 346                         if (--limit == 0)
 347                                 break;
 348                 } while ((msg = lwkt_getport(&td->td_msgport)) != NULL);
 349
 350                 /*
 351                  * Run all registered rollup functions for this cpu
 352                  * (e.g. tcp_willblock()).
 353                  */
 354                 TAILQ_FOREACH(ru, &netrulist, ru_entry)
 355                         ru->ru_func();
 356         }
 357 }
 358
 359 /*
 360  * Forward a packet to a netisr service function.
 361  *
 362  * If the packet has not been assigned to a protocol thread we call
 363  * the port characterization function to assign it.  The caller must
 364  * clear M_HASH (or not have set it in the first place) if the caller
 365  * wishes the packet to be recharacterized.
 366  */
 367 int
 368 netisr_queue(int num, struct mbuf *m)
 369 {
 370         struct netisr *ni;
 371         struct netmsg_packet *pmsg;
 372         lwkt_port_t port;
 373
 374         KASSERT((num > 0 && num <= NELEM(netisrs)),
 375                 ("Bad isr %d", num));
 376
 377         ni = &netisrs[num];
 378         if (ni->ni_handler == NULL) {
 379                 kprintf("%s: Unregistered isr %d\n", __func__, num);
 380                 m_freem(m);
 381                 return (EIO);
 382         }
 383
 384         /*
 385          * Figure out which protocol thread to send to.  This does not
 386          * have to be perfect but performance will be really good if it
 387          * is correct.  Major protocol inputs such as ip_input() will
 388          * re-characterize the packet as necessary.
 389          */
 390         if ((m->m_flags & M_HASH) == 0) {
 391                 ni->ni_hashfn(&m, 0);
 392                 if (m == NULL)
 393                         return (EIO);
 394                 if ((m->m_flags & M_HASH) == 0) {
 395                         kprintf("%s(%d): packet hash failed\n",
 396                                 __func__, num);
 397                         m_freem(m);
 398                         return (EIO);
 399                 }
 400         }
 401
 402         /*
 403          * Get the protocol port based on the packet hash, initialize
 404          * the netmsg, and send it off.
 405          */
 406         port = netisr_hashport(m->m_pkthdr.hash);
 407         pmsg = &m->m_hdr.mh_netmsg;
 408         netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
 409                     0, ni->ni_handler);
 410         pmsg->nm_packet = m;
 411         pmsg->base.lmsg.u.ms_result = num;
 412         lwkt_sendmsg(port, &pmsg->base.lmsg);
 413
 414         return (0);
 415 }
 416
 417 /*
 418  * Run a netisr service function on the packet.
 419  *
 420  * The packet must have been correctly characterized!
 421  */
 422 int
 423 netisr_handle(int num, struct mbuf *m)
 424 {
 425         struct netisr *ni;
 426         struct netmsg_packet *pmsg;
 427         lwkt_port_t port;
 428
 429         /*
 430          * Get the protocol port based on the packet hash
 431          */
 432         KASSERT((m->m_flags & M_HASH), ("packet not characterized"));
 433         port = netisr_hashport(m->m_pkthdr.hash);
 434         KASSERT(&curthread->td_msgport == port, ("wrong msgport"));
 435
 436         KASSERT((num > 0 && num <= NELEM(netisrs)), ("bad isr %d", num));
 437         ni = &netisrs[num];
 438         if (ni->ni_handler == NULL) {
 439                 kprintf("%s: unregistered isr %d\n", __func__, num);
 440                 m_freem(m);
 441                 return EIO;
 442         }
 443
 444         /*
 445          * Initialize the netmsg, and run the handler directly.
 446          */
 447         pmsg = &m->m_hdr.mh_netmsg;
 448         netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
 449                     0, ni->ni_handler);
 450         pmsg->nm_packet = m;
 451         pmsg->base.lmsg.u.ms_result = num;
 452         ni->ni_handler((netmsg_t)&pmsg->base);
 453
 454         return 0;
 455 }
 456
 457 /*
 458  * Pre-characterization of a deeper portion of the packet for the
 459  * requested isr.
 460  *
 461  * The base of the ISR type (e.g. IP) that we want to characterize is
 462  * at (hoff) relative to the beginning of the mbuf.  This allows
 463  * e.g. ether_characterize() to not have to adjust the m_data/m_len.
 464  */
 465 void
 466 netisr_characterize(int num, struct mbuf **mp, int hoff)
 467 {
 468         struct netisr *ni;
 469         struct mbuf *m;
 470
 471         /*
 472          * Validation
 473          */
 474         m = *mp;
 475         KKASSERT(m != NULL);
 476
 477         if (num < 0 || num >= NETISR_MAX) {
 478                 if (num == NETISR_MAX) {
 479                         m_sethash(m, 0);
 480                         return;
 481                 }
 482                 panic("Bad isr %d", num);
 483         }
 484
 485         /*
 486          * Valid netisr?
 487          */
 488         ni = &netisrs[num];
 489         if (ni->ni_handler == NULL) {
 490                 kprintf("%s: Unregistered isr %d\n", __func__, num);
 491                 m_freem(m);
 492                 *mp = NULL;
 493         }
 494
 495         /*
 496          * Characterize the packet
 497          */
 498         if ((m->m_flags & M_HASH) == 0) {
 499                 ni->ni_hashfn(mp, hoff);
 500                 m = *mp;
 501                 if (m && (m->m_flags & M_HASH) == 0) {
 502                         kprintf("%s(%d): packet hash failed\n",
 503                                 __func__, num);
 504                 }
 505         }
 506 }
 507
 508 void
 509 netisr_register(int num, netisr_fn_t handler, netisr_hashfn_t hashfn)
 510 {
 511         struct netisr *ni;
 512
 513         KASSERT((num > 0 && num <= NELEM(netisrs)),
 514                 ("netisr_register: bad isr %d", num));
 515         KKASSERT(handler != NULL);
 516
 517         if (hashfn == NULL)
 518                 hashfn = netisr_hashfn0;
 519
 520         ni = &netisrs[num];
 521
 522         ni->ni_handler = handler;
 523         ni->ni_hashck = netisr_nohashck;
 524         ni->ni_hashfn = hashfn;
 525         netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL);
 526 }
 527
 528 void
 529 netisr_register_hashcheck(int num, netisr_hashck_t hashck)
 530 {
 531         struct netisr *ni;
 532
 533         KASSERT((num > 0 && num <= NELEM(netisrs)),
 534                 ("netisr_register: bad isr %d", num));
 535
 536         ni = &netisrs[num];
 537         ni->ni_hashck = hashck;
 538 }
 539
 540 void
 541 netisr_register_rollup(netisr_ru_t ru_func, int prio)
 542 {
 543         struct netmsg_rollup *new_ru, *ru;
 544
 545         new_ru = kmalloc(sizeof(*new_ru), M_TEMP, M_WAITOK|M_ZERO);
 546         new_ru->ru_func = ru_func;
 547         new_ru->ru_prio = prio;
 548
 549         /*
 550          * Higher priority "rollup" appears first
 551          */
 552         TAILQ_FOREACH(ru, &netrulist, ru_entry) {
 553                 if (ru->ru_prio < new_ru->ru_prio) {
 554                         TAILQ_INSERT_BEFORE(ru, new_ru, ru_entry);
 555                         return;
 556                 }
 557         }
 558         TAILQ_INSERT_TAIL(&netrulist, new_ru, ru_entry);
 559 }
 560
 561 /*
 562  * Return a default protocol control message processing thread port
 563  */
 564 lwkt_port_t
 565 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused,
 566     void *extra __unused, int *cpuid)
 567 {
 568         *cpuid = 0;
 569         return netisr_cpuport(*cpuid);
 570 }
 571
 572 /*
 573  * This is a default netisr packet characterization function which
 574  * sets M_HASH.  If a netisr is registered with a NULL hashfn function
 575  * this one is assigned.
 576  *
 577  * This function makes no attempt to validate the packet.
 578  */
 579 static void
 580 netisr_hashfn0(struct mbuf **mp, int hoff __unused)
 581 {
 582
 583         m_sethash(*mp, 0);
 584 }
 585
 586 /*
 587  * schednetisr() is used to call the netisr handler from the appropriate
 588  * netisr thread for polling and other purposes.
 589  *
 590  * This function may be called from a hard interrupt or IPI and must be
 591  * MP SAFE and non-blocking.  We use a fixed per-cpu message instead of
 592  * trying to allocate one.  We must get ourselves onto the target cpu
 593  * to safely check the MSGF_DONE bit on the message but since the message
 594  * will be sent to that cpu anyway this does not add any extra work beyond
 595  * what lwkt_sendmsg() would have already had to do to schedule the target
 596  * thread.
 597  */
 598 static void
 599 schednetisr_remote(void *data)
 600 {
 601         int num = (int)(intptr_t)data;
 602         struct netisr *ni = &netisrs[num];
 603         lwkt_port_t port = &netisr_threads[0]->td_msgport;
 604         netmsg_base_t pmsg;
 605
 606         pmsg = &netisrs[num].ni_netmsg;
 607         if (pmsg->lmsg.ms_flags & MSGF_DONE) {
 608                 netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler);
 609                 pmsg->lmsg.u.ms_result = num;
 610                 lwkt_sendmsg(port, &pmsg->lmsg);
 611         }
 612 }
 613
 614 void
 615 schednetisr(int num)
 616 {
 617         KASSERT((num > 0 && num <= NELEM(netisrs)),
 618                 ("schednetisr: bad isr %d", num));
 619         KKASSERT(netisrs[num].ni_handler != NULL);
 620         if (mycpu->gd_cpuid != 0) {
 621                 lwkt_send_ipiq(globaldata_find(0),
 622                                schednetisr_remote, (void *)(intptr_t)num);
 623         } else {
 624                 crit_enter();
 625                 schednetisr_remote((void *)(intptr_t)num);
 626                 crit_exit();
 627         }
 628 }
 629
 630 static void
 631 netisr_barrier_dispatch(netmsg_t nmsg)
 632 {
 633         struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg;
 634
 635         ATOMIC_CPUMASK_NANDBIT(*msg->br_cpumask, mycpu->gd_cpuid);
 636         if (CPUMASK_TESTZERO(*msg->br_cpumask))
 637                 wakeup(msg->br_cpumask);
 638
 639         for (;;) {
 640                 uint32_t done = msg->br_done;
 641
 642                 cpu_ccfence();
 643                 if ((done & NETISR_BR_NOTDONE) == 0)
 644                         break;
 645
 646                 tsleep_interlock(&msg->br_done, 0);
 647                 if (atomic_cmpset_int(&msg->br_done,
 648                     done, done | NETISR_BR_WAITDONE))
 649                         tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0);
 650         }
 651
 652         lwkt_replymsg(&nmsg->lmsg, 0);
 653 }
 654
 655 struct netisr_barrier *
 656 netisr_barrier_create(void)
 657 {
 658         struct netisr_barrier *br;
 659
 660         br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO);
 661         return br;
 662 }
 663
 664 void
 665 netisr_barrier_set(struct netisr_barrier *br)
 666 {
 667         volatile cpumask_t other_cpumask;
 668         int i, cur_cpuid;
 669
 670         ASSERT_IN_NETISR(0);
 671         KKASSERT(!br->br_isset);
 672
 673         other_cpumask = mycpu->gd_other_cpus;
 674         CPUMASK_ANDMASK(other_cpumask, smp_active_mask);
 675         cur_cpuid = mycpuid;
 676
 677         for (i = 0; i < ncpus; ++i) {
 678                 struct netmsg_barrier *msg;
 679
 680                 if (i == cur_cpuid)
 681                         continue;
 682
 683                 msg = kmalloc(sizeof(struct netmsg_barrier),
 684                               M_LWKTMSG, M_WAITOK);
 685
 686                 /*
 687                  * Don't use priority message here; mainly to keep
 688                  * it ordered w/ the previous data packets sent by
 689                  * the caller.
 690                  */
 691                 netmsg_init(&msg->base, NULL, &netisr_afree_rport, 0,
 692                             netisr_barrier_dispatch);
 693                 msg->br_cpumask = &other_cpumask;
 694                 msg->br_done = NETISR_BR_NOTDONE;
 695
 696                 KKASSERT(br->br_msgs[i] == NULL);
 697                 br->br_msgs[i] = msg;
 698         }
 699
 700         for (i = 0; i < ncpus; ++i) {
 701                 if (i == cur_cpuid)
 702                         continue;
 703                 lwkt_sendmsg(netisr_cpuport(i), &br->br_msgs[i]->base.lmsg);
 704         }
 705
 706         while (CPUMASK_TESTNZERO(other_cpumask)) {
 707                 tsleep_interlock(&other_cpumask, 0);
 708                 if (CPUMASK_TESTNZERO(other_cpumask))
 709                         tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0);
 710         }
 711         br->br_isset = 1;
 712 }
 713
 714 void
 715 netisr_barrier_rem(struct netisr_barrier *br)
 716 {
 717         int i, cur_cpuid;
 718
 719         ASSERT_IN_NETISR(0);
 720         KKASSERT(br->br_isset);
 721
 722         cur_cpuid = mycpuid;
 723         for (i = 0; i < ncpus; ++i) {
 724                 struct netmsg_barrier *msg = br->br_msgs[i];
 725                 uint32_t done;
 726
 727                 msg = br->br_msgs[i];
 728                 br->br_msgs[i] = NULL;
 729
 730                 if (i == cur_cpuid)
 731                         continue;
 732
 733                 done = atomic_swap_int(&msg->br_done, 0);
 734                 if (done & NETISR_BR_WAITDONE)
 735                         wakeup(&msg->br_done);
 736         }
 737         br->br_isset = 0;
 738 }
 739
 740 static void
 741 netisr_nohashck(struct mbuf *m, const struct pktinfo *pi __unused)
 742 {
 743         m->m_flags &= ~M_HASH;
 744 }
 745
 746 void
 747 netisr_hashcheck(int num, struct mbuf *m, const struct pktinfo *pi)
 748 {
 749         struct netisr *ni;
 750
 751         if (num < 0 || num >= NETISR_MAX)
 752                 panic("Bad isr %d", num);
 753
 754         /*
 755          * Valid netisr?
 756          */
 757         ni = &netisrs[num];
 758         if (ni->ni_handler == NULL)
 759                 panic("Unregistered isr %d", num);
 760
 761         ni->ni_hashck(m, pi);
 762 }