sys/net/netisr.c

   1 /*
   2  * Copyright (c) 2003, 2004 Matthew Dillon. All rights reserved.
   3  * Copyright (c) 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
   4  * Copyright (c) 2003 Jonathan Lemon.  All rights reserved.
   5  * Copyright (c) 2003, 2004 The DragonFly Project.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to The DragonFly Project
   8  * by Jonathan Lemon, Jeffrey M. Hsu, and Matthew Dillon.
   9  *
  10  * Jonathan Lemon gave Jeffrey Hsu permission to combine his copyright
  11  * into this one around July 8 2004.
  12  *
  13  * Redistribution and use in source and binary forms, with or without
  14  * modification, are permitted provided that the following conditions
  15  * are met:
  16  * 1. Redistributions of source code must retain the above copyright
  17  *    notice, this list of conditions and the following disclaimer.
  18  * 2. Redistributions in binary form must reproduce the above copyright
  19  *    notice, this list of conditions and the following disclaimer in the
  20  *    documentation and/or other materials provided with the distribution.
  21  * 3. Neither the name of The DragonFly Project nor the names of its
  22  *    contributors may be used to endorse or promote products derived
  23  *    from this software without specific, prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  28  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  29  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  30  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  31  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  32  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  33  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  34  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  35  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  */
  38
  39 #include <sys/param.h>
  40 #include <sys/systm.h>
  41 #include <sys/kernel.h>
  42 #include <sys/malloc.h>
  43 #include <sys/msgport.h>
  44 #include <sys/proc.h>
  45 #include <sys/interrupt.h>
  46 #include <sys/socket.h>
  47 #include <sys/sysctl.h>
  48 #include <sys/socketvar.h>
  49 #include <net/if.h>
  50 #include <net/if_var.h>
  51 #include <net/netisr2.h>
  52 #include <machine/cpufunc.h>
  53 #include <machine/smp.h>
  54
  55 #include <sys/thread2.h>
  56 #include <sys/msgport2.h>
  57 #include <net/netmsg2.h>
  58 #include <sys/mplock2.h>
  59
  60 static void netmsg_service_loop(void *arg);
  61 static void netisr_hashfn0(struct mbuf **mp, int hoff);
  62 static void netisr_nohashck(struct mbuf *, const struct pktinfo *);
  63
  64 struct netmsg_port_registration {
  65         TAILQ_ENTRY(netmsg_port_registration) npr_entry;
  66         lwkt_port_t     npr_port;
  67 };
  68
  69 struct netmsg_rollup {
  70         TAILQ_ENTRY(netmsg_rollup) ru_entry;
  71         netisr_ru_t     ru_func;
  72         int             ru_prio;
  73 };
  74
  75 struct netmsg_barrier {
  76         struct netmsg_base      base;
  77         volatile cpumask_t      *br_cpumask;
  78         volatile uint32_t       br_done;
  79 };
  80
  81 #define NETISR_BR_NOTDONE       0x1
  82 #define NETISR_BR_WAITDONE      0x80000000
  83
  84 struct netisr_barrier {
  85         struct netmsg_barrier   *br_msgs[MAXCPU];
  86         int                     br_isset;
  87 };
  88
  89 static struct netisr netisrs[NETISR_MAX];
  90 static TAILQ_HEAD(,netmsg_port_registration) netreglist;
  91 static TAILQ_HEAD(,netmsg_rollup) netrulist;
  92
  93 /* Per-CPU thread to handle any protocol.  */
  94 struct thread netisr_cpu[MAXCPU];
  95 lwkt_port netisr_afree_rport;
  96 lwkt_port netisr_afree_free_so_rport;
  97 lwkt_port netisr_adone_rport;
  98 lwkt_port netisr_apanic_rport;
  99 lwkt_port netisr_sync_port;
 100
 101 static int (*netmsg_fwd_port_fn)(lwkt_port_t, lwkt_msg_t);
 102
 103 SYSCTL_NODE(_net, OID_AUTO, netisr, CTLFLAG_RW, 0, "netisr");
 104 static int netisr_rollup_limit = 32;
 105 SYSCTL_INT(_net_netisr, OID_AUTO, rollup_limit, CTLFLAG_RW,
 106         &netisr_rollup_limit, 0, "Message to process before rollup");
 107
 108
 109 /*
 110  * netisr_afree_rport replymsg function, only used to handle async
 111  * messages which the sender has abandoned to their fate.
 112  */
 113 static void
 114 netisr_autofree_reply(lwkt_port_t port, lwkt_msg_t msg)
 115 {
 116         kfree(msg, M_LWKTMSG);
 117 }
 118
 119 static void
 120 netisr_autofree_free_so_reply(lwkt_port_t port, lwkt_msg_t msg)
 121 {
 122         sofree(((netmsg_t)msg)->base.nm_so);
 123         kfree(msg, M_LWKTMSG);
 124 }
 125
 126 /*
 127  * We need a custom putport function to handle the case where the
 128  * message target is the current thread's message port.  This case
 129  * can occur when the TCP or UDP stack does a direct callback to NFS and NFS
 130  * then turns around and executes a network operation synchronously.
 131  *
 132  * To prevent deadlocking, we must execute these self-referential messages
 133  * synchronously, effectively turning the message into a glorified direct
 134  * procedure call back into the protocol stack.  The operation must be
 135  * complete on return or we will deadlock, so panic if it isn't.
 136  *
 137  * However, the target function is under no obligation to immediately
 138  * reply the message.  It may forward it elsewhere.
 139  */
 140 static int
 141 netmsg_put_port(lwkt_port_t port, lwkt_msg_t lmsg)
 142 {
 143         netmsg_base_t nmsg = (void *)lmsg;
 144
 145         if ((lmsg->ms_flags & MSGF_SYNC) && port == &curthread->td_msgport) {
 146                 nmsg->nm_dispatch((netmsg_t)nmsg);
 147                 return(EASYNC);
 148         } else {
 149                 return(netmsg_fwd_port_fn(port, lmsg));
 150         }
 151 }
 152
 153 /*
 154  * UNIX DOMAIN sockets still have to run their uipc functions synchronously,
 155  * because they depend on the user proc context for a number of things
 156  * (like creds) which we have not yet incorporated into the message structure.
 157  *
 158  * However, we maintain or message/port abstraction.  Having a special
 159  * synchronous port which runs the commands synchronously gives us the
 160  * ability to serialize operations in one place later on when we start
 161  * removing the BGL.
 162  */
 163 static int
 164 netmsg_sync_putport(lwkt_port_t port, lwkt_msg_t lmsg)
 165 {
 166         netmsg_base_t nmsg = (void *)lmsg;
 167
 168         KKASSERT((lmsg->ms_flags & MSGF_DONE) == 0);
 169
 170         lmsg->ms_target_port = port;    /* required for abort */
 171         nmsg->nm_dispatch((netmsg_t)nmsg);
 172         return(EASYNC);
 173 }
 174
 175 static void
 176 netisr_init(void)
 177 {
 178         int i;
 179
 180         TAILQ_INIT(&netreglist);
 181         TAILQ_INIT(&netrulist);
 182
 183         /*
 184          * Create default per-cpu threads for generic protocol handling.
 185          */
 186         for (i = 0; i < ncpus; ++i) {
 187                 lwkt_create(netmsg_service_loop, NULL, NULL,
 188                             &netisr_cpu[i],
 189                             TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU,
 190                             i, "netisr_cpu %d", i);
 191                 netmsg_service_port_init(&netisr_cpu[i].td_msgport);
 192                 lwkt_schedule(&netisr_cpu[i]);
 193         }
 194
 195         /*
 196          * The netisr_afree_rport is a special reply port which automatically
 197          * frees the replied message.  The netisr_adone_rport simply marks
 198          * the message as being done.  The netisr_apanic_rport panics if
 199          * the message is replied to.
 200          */
 201         lwkt_initport_replyonly(&netisr_afree_rport, netisr_autofree_reply);
 202         lwkt_initport_replyonly(&netisr_afree_free_so_rport,
 203                                 netisr_autofree_free_so_reply);
 204         lwkt_initport_replyonly_null(&netisr_adone_rport);
 205         lwkt_initport_panic(&netisr_apanic_rport);
 206
 207         /*
 208          * The netisr_syncport is a special port which executes the message
 209          * synchronously and waits for it if EASYNC is returned.
 210          */
 211         lwkt_initport_putonly(&netisr_sync_port, netmsg_sync_putport);
 212 }
 213
 214 SYSINIT(netisr, SI_SUB_PRE_DRIVERS, SI_ORDER_FIRST, netisr_init, NULL);
 215
 216 /*
 217  * Finish initializing the message port for a netmsg service.  This also
 218  * registers the port for synchronous cleanup operations such as when an
 219  * ifnet is being destroyed.  There is no deregistration API yet.
 220  */
 221 void
 222 netmsg_service_port_init(lwkt_port_t port)
 223 {
 224         struct netmsg_port_registration *reg;
 225
 226         /*
 227          * Override the putport function.  Our custom function checks for
 228          * self-references and executes such commands synchronously.
 229          */
 230         if (netmsg_fwd_port_fn == NULL)
 231                 netmsg_fwd_port_fn = port->mp_putport;
 232         KKASSERT(netmsg_fwd_port_fn == port->mp_putport);
 233         port->mp_putport = netmsg_put_port;
 234
 235         /*
 236          * Keep track of ports using the netmsg API so we can synchronize
 237          * certain operations (such as freeing an ifnet structure) across all
 238          * consumers.
 239          */
 240         reg = kmalloc(sizeof(*reg), M_TEMP, M_WAITOK|M_ZERO);
 241         reg->npr_port = port;
 242         TAILQ_INSERT_TAIL(&netreglist, reg, npr_entry);
 243 }
 244
 245 /*
 246  * This function synchronizes the caller with all netmsg services.  For
 247  * example, if an interface is being removed we must make sure that all
 248  * packets related to that interface complete processing before the structure
 249  * can actually be freed.  This sort of synchronization is an alternative to
 250  * ref-counting the netif, removing the ref counting overhead in favor of
 251  * placing additional overhead in the netif freeing sequence (where it is
 252  * inconsequential).
 253  */
 254 void
 255 netmsg_service_sync(void)
 256 {
 257         struct netmsg_port_registration *reg;
 258         struct netmsg_base smsg;
 259
 260         netmsg_init(&smsg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler);
 261
 262         TAILQ_FOREACH(reg, &netreglist, npr_entry) {
 263                 lwkt_domsg(reg->npr_port, &smsg.lmsg, 0);
 264         }
 265 }
 266
 267 /*
 268  * The netmsg function simply replies the message.  API semantics require
 269  * EASYNC to be returned if the netmsg function disposes of the message.
 270  */
 271 void
 272 netmsg_sync_handler(netmsg_t msg)
 273 {
 274         lwkt_replymsg(&msg->lmsg, 0);
 275 }
 276
 277 /*
 278  * Generic netmsg service loop.  Some protocols may roll their own but all
 279  * must do the basic command dispatch function call done here.
 280  */
 281 static void
 282 netmsg_service_loop(void *arg)
 283 {
 284         struct netmsg_rollup *ru;
 285         netmsg_base_t msg;
 286         thread_t td = curthread;
 287         int limit;
 288
 289         td->td_type = TD_TYPE_NETISR;
 290
 291         while ((msg = lwkt_waitport(&td->td_msgport, 0))) {
 292                 /*
 293                  * Run up to 512 pending netmsgs.
 294                  */
 295                 limit = netisr_rollup_limit;
 296                 do {
 297                         KASSERT(msg->nm_dispatch != NULL,
 298                                 ("netmsg_service isr %d badmsg",
 299                                 msg->lmsg.u.ms_result));
 300                         /*
 301                          * Don't match so_port, if the msg explicitly
 302                          * asks us to ignore its so_port.
 303                          */
 304                         if ((msg->lmsg.ms_flags & MSGF_IGNSOPORT) == 0 &&
 305                             msg->nm_so &&
 306                             msg->nm_so->so_port != &td->td_msgport) {
 307                                 /*
 308                                  * Sockets undergoing connect or disconnect
 309                                  * ops can change ports on us.  Chase the
 310                                  * port.
 311                                  */
 312 #ifdef foo
 313                                 /*
 314                                  * This could be quite common for protocols
 315                                  * which support asynchronous pru_connect,
 316                                  * e.g. TCP, so kprintf socket port chasing
 317                                  * could be too verbose for the console.
 318                                  */
 319                                 kprintf("%s: Warning, port changed so=%p\n",
 320                                         __func__, msg->nm_so);
 321 #endif
 322                                 lwkt_forwardmsg(msg->nm_so->so_port,
 323                                                 &msg->lmsg);
 324                         } else {
 325                                 /*
 326                                  * We are on the correct port, dispatch it.
 327                                  */
 328                                 msg->nm_dispatch((netmsg_t)msg);
 329                         }
 330                         if (--limit == 0)
 331                                 break;
 332                 } while ((msg = lwkt_getport(&td->td_msgport)) != NULL);
 333
 334                 /*
 335                  * Run all registered rollup functions for this cpu
 336                  * (e.g. tcp_willblock()).
 337                  */
 338                 TAILQ_FOREACH(ru, &netrulist, ru_entry)
 339                         ru->ru_func();
 340         }
 341 }
 342
 343 /*
 344  * Forward a packet to a netisr service function.
 345  *
 346  * If the packet has not been assigned to a protocol thread we call
 347  * the port characterization function to assign it.  The caller must
 348  * clear M_HASH (or not have set it in the first place) if the caller
 349  * wishes the packet to be recharacterized.
 350  */
 351 int
 352 netisr_queue(int num, struct mbuf *m)
 353 {
 354         struct netisr *ni;
 355         struct netmsg_packet *pmsg;
 356         lwkt_port_t port;
 357
 358         KASSERT((num > 0 && num <= NELEM(netisrs)),
 359                 ("Bad isr %d", num));
 360
 361         ni = &netisrs[num];
 362         if (ni->ni_handler == NULL) {
 363                 kprintf("%s: Unregistered isr %d\n", __func__, num);
 364                 m_freem(m);
 365                 return (EIO);
 366         }
 367
 368         /*
 369          * Figure out which protocol thread to send to.  This does not
 370          * have to be perfect but performance will be really good if it
 371          * is correct.  Major protocol inputs such as ip_input() will
 372          * re-characterize the packet as necessary.
 373          */
 374         if ((m->m_flags & M_HASH) == 0) {
 375                 ni->ni_hashfn(&m, 0);
 376                 if (m == NULL)
 377                         return (EIO);
 378                 if ((m->m_flags & M_HASH) == 0) {
 379                         kprintf("%s(%d): packet hash failed\n",
 380                                 __func__, num);
 381                         m_freem(m);
 382                         return (EIO);
 383                 }
 384         }
 385
 386         /*
 387          * Get the protocol port based on the packet hash, initialize
 388          * the netmsg, and send it off.
 389          */
 390         port = netisr_hashport(m->m_pkthdr.hash);
 391         pmsg = &m->m_hdr.mh_netmsg;
 392         netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
 393                     0, ni->ni_handler);
 394         pmsg->nm_packet = m;
 395         pmsg->base.lmsg.u.ms_result = num;
 396         lwkt_sendmsg(port, &pmsg->base.lmsg);
 397
 398         return (0);
 399 }
 400
 401 /*
 402  * Run a netisr service function on the packet.
 403  *
 404  * The packet must have been correctly characterized!
 405  */
 406 int
 407 netisr_handle(int num, struct mbuf *m)
 408 {
 409         struct netisr *ni;
 410         struct netmsg_packet *pmsg;
 411         lwkt_port_t port;
 412
 413         /*
 414          * Get the protocol port based on the packet hash
 415          */
 416         KASSERT((m->m_flags & M_HASH), ("packet not characterized"));
 417         port = netisr_hashport(m->m_pkthdr.hash);
 418         KASSERT(&curthread->td_msgport == port, ("wrong msgport"));
 419
 420         KASSERT((num > 0 && num <= NELEM(netisrs)), ("bad isr %d", num));
 421         ni = &netisrs[num];
 422         if (ni->ni_handler == NULL) {
 423                 kprintf("%s: unregistered isr %d\n", __func__, num);
 424                 m_freem(m);
 425                 return EIO;
 426         }
 427
 428         /*
 429          * Initialize the netmsg, and run the handler directly.
 430          */
 431         pmsg = &m->m_hdr.mh_netmsg;
 432         netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
 433                     0, ni->ni_handler);
 434         pmsg->nm_packet = m;
 435         pmsg->base.lmsg.u.ms_result = num;
 436         ni->ni_handler((netmsg_t)&pmsg->base);
 437
 438         return 0;
 439 }
 440
 441 /*
 442  * Pre-characterization of a deeper portion of the packet for the
 443  * requested isr.
 444  *
 445  * The base of the ISR type (e.g. IP) that we want to characterize is
 446  * at (hoff) relative to the beginning of the mbuf.  This allows
 447  * e.g. ether_characterize() to not have to adjust the m_data/m_len.
 448  */
 449 void
 450 netisr_characterize(int num, struct mbuf **mp, int hoff)
 451 {
 452         struct netisr *ni;
 453         struct mbuf *m;
 454
 455         /*
 456          * Validation
 457          */
 458         m = *mp;
 459         KKASSERT(m != NULL);
 460
 461         if (num < 0 || num >= NETISR_MAX) {
 462                 if (num == NETISR_MAX) {
 463                         m->m_flags |= M_HASH;
 464                         m->m_pkthdr.hash = 0;
 465                         return;
 466                 }
 467                 panic("Bad isr %d", num);
 468         }
 469
 470         /*
 471          * Valid netisr?
 472          */
 473         ni = &netisrs[num];
 474         if (ni->ni_handler == NULL) {
 475                 kprintf("%s: Unregistered isr %d\n", __func__, num);
 476                 m_freem(m);
 477                 *mp = NULL;
 478         }
 479
 480         /*
 481          * Characterize the packet
 482          */
 483         if ((m->m_flags & M_HASH) == 0) {
 484                 ni->ni_hashfn(mp, hoff);
 485                 m = *mp;
 486                 if (m && (m->m_flags & M_HASH) == 0) {
 487                         kprintf("%s(%d): packet hash failed\n",
 488                                 __func__, num);
 489                 }
 490         }
 491 }
 492
 493 void
 494 netisr_register(int num, netisr_fn_t handler, netisr_hashfn_t hashfn)
 495 {
 496         struct netisr *ni;
 497
 498         KASSERT((num > 0 && num <= NELEM(netisrs)),
 499                 ("netisr_register: bad isr %d", num));
 500         KKASSERT(handler != NULL);
 501
 502         if (hashfn == NULL)
 503                 hashfn = netisr_hashfn0;
 504
 505         ni = &netisrs[num];
 506
 507         ni->ni_handler = handler;
 508         ni->ni_hashck = netisr_nohashck;
 509         ni->ni_hashfn = hashfn;
 510         netmsg_init(&ni->ni_netmsg, NULL, &netisr_adone_rport, 0, NULL);
 511 }
 512
 513 void
 514 netisr_register_hashcheck(int num, netisr_hashck_t hashck)
 515 {
 516         struct netisr *ni;
 517
 518         KASSERT((num > 0 && num <= NELEM(netisrs)),
 519                 ("netisr_register: bad isr %d", num));
 520
 521         ni = &netisrs[num];
 522         ni->ni_hashck = hashck;
 523 }
 524
 525 void
 526 netisr_register_rollup(netisr_ru_t ru_func, int prio)
 527 {
 528         struct netmsg_rollup *new_ru, *ru;
 529
 530         new_ru = kmalloc(sizeof(*new_ru), M_TEMP, M_WAITOK|M_ZERO);
 531         new_ru->ru_func = ru_func;
 532         new_ru->ru_prio = prio;
 533
 534         /*
 535          * Higher priority "rollup" appears first
 536          */
 537         TAILQ_FOREACH(ru, &netrulist, ru_entry) {
 538                 if (ru->ru_prio < new_ru->ru_prio) {
 539                         TAILQ_INSERT_BEFORE(ru, new_ru, ru_entry);
 540                         return;
 541                 }
 542         }
 543         TAILQ_INSERT_TAIL(&netrulist, new_ru, ru_entry);
 544 }
 545
 546 /*
 547  * Return a default protocol control message processing thread port
 548  */
 549 lwkt_port_t
 550 cpu0_ctlport(int cmd __unused, struct sockaddr *sa __unused,
 551     void *extra __unused, int *cpuid)
 552 {
 553         *cpuid = 0;
 554         return netisr_cpuport(*cpuid);
 555 }
 556
 557 /*
 558  * This is a default netisr packet characterization function which
 559  * sets M_HASH.  If a netisr is registered with a NULL hashfn function
 560  * this one is assigned.
 561  *
 562  * This function makes no attempt to validate the packet.
 563  */
 564 static void
 565 netisr_hashfn0(struct mbuf **mp, int hoff __unused)
 566 {
 567         struct mbuf *m = *mp;
 568
 569         m->m_flags |= M_HASH;
 570         m->m_pkthdr.hash = 0;
 571 }
 572
 573 /*
 574  * schednetisr() is used to call the netisr handler from the appropriate
 575  * netisr thread for polling and other purposes.
 576  *
 577  * This function may be called from a hard interrupt or IPI and must be
 578  * MP SAFE and non-blocking.  We use a fixed per-cpu message instead of
 579  * trying to allocate one.  We must get ourselves onto the target cpu
 580  * to safely check the MSGF_DONE bit on the message but since the message
 581  * will be sent to that cpu anyway this does not add any extra work beyond
 582  * what lwkt_sendmsg() would have already had to do to schedule the target
 583  * thread.
 584  */
 585 static void
 586 schednetisr_remote(void *data)
 587 {
 588         int num = (int)(intptr_t)data;
 589         struct netisr *ni = &netisrs[num];
 590         lwkt_port_t port = &netisr_cpu[0].td_msgport;
 591         netmsg_base_t pmsg;
 592
 593         pmsg = &netisrs[num].ni_netmsg;
 594         if (pmsg->lmsg.ms_flags & MSGF_DONE) {
 595                 netmsg_init(pmsg, NULL, &netisr_adone_rport, 0, ni->ni_handler);
 596                 pmsg->lmsg.u.ms_result = num;
 597                 lwkt_sendmsg(port, &pmsg->lmsg);
 598         }
 599 }
 600
 601 void
 602 schednetisr(int num)
 603 {
 604         KASSERT((num > 0 && num <= NELEM(netisrs)),
 605                 ("schednetisr: bad isr %d", num));
 606         KKASSERT(netisrs[num].ni_handler != NULL);
 607         if (mycpu->gd_cpuid != 0) {
 608                 lwkt_send_ipiq(globaldata_find(0),
 609                                schednetisr_remote, (void *)(intptr_t)num);
 610         } else {
 611                 crit_enter();
 612                 schednetisr_remote((void *)(intptr_t)num);
 613                 crit_exit();
 614         }
 615 }
 616
 617 static void
 618 netisr_barrier_dispatch(netmsg_t nmsg)
 619 {
 620         struct netmsg_barrier *msg = (struct netmsg_barrier *)nmsg;
 621
 622         ATOMIC_CPUMASK_NANDBIT(*msg->br_cpumask, mycpu->gd_cpuid);
 623         if (CPUMASK_TESTZERO(*msg->br_cpumask))
 624                 wakeup(msg->br_cpumask);
 625
 626         for (;;) {
 627                 uint32_t done = msg->br_done;
 628
 629                 cpu_ccfence();
 630                 if ((done & NETISR_BR_NOTDONE) == 0)
 631                         break;
 632
 633                 tsleep_interlock(&msg->br_done, 0);
 634                 if (atomic_cmpset_int(&msg->br_done,
 635                     done, done | NETISR_BR_WAITDONE))
 636                         tsleep(&msg->br_done, PINTERLOCKED, "nbrdsp", 0);
 637         }
 638
 639         lwkt_replymsg(&nmsg->lmsg, 0);
 640 }
 641
 642 struct netisr_barrier *
 643 netisr_barrier_create(void)
 644 {
 645         struct netisr_barrier *br;
 646
 647         br = kmalloc(sizeof(*br), M_LWKTMSG, M_WAITOK | M_ZERO);
 648         return br;
 649 }
 650
 651 void
 652 netisr_barrier_set(struct netisr_barrier *br)
 653 {
 654         volatile cpumask_t other_cpumask;
 655         int i, cur_cpuid;
 656
 657         ASSERT_IN_NETISR(0);
 658         KKASSERT(!br->br_isset);
 659
 660         other_cpumask = mycpu->gd_other_cpus;
 661         CPUMASK_ANDMASK(other_cpumask, smp_active_mask);
 662         cur_cpuid = mycpuid;
 663
 664         for (i = 0; i < ncpus; ++i) {
 665                 struct netmsg_barrier *msg;
 666
 667                 if (i == cur_cpuid)
 668                         continue;
 669
 670                 msg = kmalloc(sizeof(struct netmsg_barrier),
 671                               M_LWKTMSG, M_WAITOK);
 672
 673                 /*
 674                  * Don't use priority message here; mainly to keep
 675                  * it ordered w/ the previous data packets sent by
 676                  * the caller.
 677                  */
 678                 netmsg_init(&msg->base, NULL, &netisr_afree_rport, 0,
 679                             netisr_barrier_dispatch);
 680                 msg->br_cpumask = &other_cpumask;
 681                 msg->br_done = NETISR_BR_NOTDONE;
 682
 683                 KKASSERT(br->br_msgs[i] == NULL);
 684                 br->br_msgs[i] = msg;
 685         }
 686
 687         for (i = 0; i < ncpus; ++i) {
 688                 if (i == cur_cpuid)
 689                         continue;
 690                 lwkt_sendmsg(netisr_cpuport(i), &br->br_msgs[i]->base.lmsg);
 691         }
 692
 693         while (CPUMASK_TESTNZERO(other_cpumask)) {
 694                 tsleep_interlock(&other_cpumask, 0);
 695                 if (CPUMASK_TESTNZERO(other_cpumask))
 696                         tsleep(&other_cpumask, PINTERLOCKED, "nbrset", 0);
 697         }
 698         br->br_isset = 1;
 699 }
 700
 701 void
 702 netisr_barrier_rem(struct netisr_barrier *br)
 703 {
 704         int i, cur_cpuid;
 705
 706         ASSERT_IN_NETISR(0);
 707         KKASSERT(br->br_isset);
 708
 709         cur_cpuid = mycpuid;
 710         for (i = 0; i < ncpus; ++i) {
 711                 struct netmsg_barrier *msg = br->br_msgs[i];
 712                 uint32_t done;
 713
 714                 msg = br->br_msgs[i];
 715                 br->br_msgs[i] = NULL;
 716
 717                 if (i == cur_cpuid)
 718                         continue;
 719
 720                 done = atomic_swap_int(&msg->br_done, 0);
 721                 if (done & NETISR_BR_WAITDONE)
 722                         wakeup(&msg->br_done);
 723         }
 724         br->br_isset = 0;
 725 }
 726
 727 static void
 728 netisr_nohashck(struct mbuf *m, const struct pktinfo *pi __unused)
 729 {
 730         m->m_flags &= ~M_HASH;
 731 }
 732
 733 void
 734 netisr_hashcheck(int num, struct mbuf *m, const struct pktinfo *pi)
 735 {
 736         struct netisr *ni;
 737
 738         if (num < 0 || num >= NETISR_MAX)
 739                 panic("Bad isr %d", num);
 740
 741         /*
 742          * Valid netisr?
 743          */
 744         ni = &netisrs[num];
 745         if (ni->ni_handler == NULL)
 746                 panic("Unregistered isr %d", num);
 747
 748         ni->ni_hashck(m, pi);
 749 }