sys/kern/lwkt_ipiq.c

   1 /*
   2  * Copyright (c) 2003,2004 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/sys/kern/lwkt_ipiq.c,v 1.10 2005/04/18 01:02:58 dillon Exp $
  35  */
  36
  37 /*
  38  * This module implements IPI message queueing and the MI portion of IPI
  39  * message processing.
  40  */
  41
  42 #ifdef _KERNEL
  43
  44 #include <sys/param.h>
  45 #include <sys/systm.h>
  46 #include <sys/kernel.h>
  47 #include <sys/proc.h>
  48 #include <sys/rtprio.h>
  49 #include <sys/queue.h>
  50 #include <sys/thread2.h>
  51 #include <sys/sysctl.h>
  52 #include <sys/kthread.h>
  53 #include <machine/cpu.h>
  54 #include <sys/lock.h>
  55 #include <sys/caps.h>
  56
  57 #include <vm/vm.h>
  58 #include <vm/vm_param.h>
  59 #include <vm/vm_kern.h>
  60 #include <vm/vm_object.h>
  61 #include <vm/vm_page.h>
  62 #include <vm/vm_map.h>
  63 #include <vm/vm_pager.h>
  64 #include <vm/vm_extern.h>
  65 #include <vm/vm_zone.h>
  66
  67 #include <machine/stdarg.h>
  68 #include <machine/ipl.h>
  69 #include <machine/smp.h>
  70 #include <machine/atomic.h>
  71
  72 #define THREAD_STACK    (UPAGES * PAGE_SIZE)
  73
  74 #else
  75
  76 #include <sys/stdint.h>
  77 #include <libcaps/thread.h>
  78 #include <sys/thread.h>
  79 #include <sys/msgport.h>
  80 #include <sys/errno.h>
  81 #include <libcaps/globaldata.h>
  82 #include <machine/cpufunc.h>
  83 #include <sys/thread2.h>
  84 #include <sys/msgport2.h>
  85 #include <stdio.h>
  86 #include <stdlib.h>
  87 #include <string.h>
  88 #include <machine/lock.h>
  89 #include <machine/cpu.h>
  90 #include <machine/atomic.h>
  91
  92 #endif
  93
  94 #ifdef SMP
  95 static __int64_t ipiq_count;    /* total calls to lwkt_send_ipiq*() */
  96 static __int64_t ipiq_fifofull; /* number of fifo full conditions detected */
  97 static __int64_t ipiq_avoided;  /* interlock with target avoids cpu ipi */
  98 static __int64_t ipiq_passive;  /* passive IPI messages */
  99 static __int64_t ipiq_cscount;  /* number of cpu synchronizations */
 100 static int ipiq_optimized = 1;  /* XXX temporary sysctl */
 101 #endif
 102
 103 #ifdef _KERNEL
 104
 105 #ifdef SMP
 106 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_count, CTLFLAG_RW, &ipiq_count, 0, "");
 107 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_fifofull, CTLFLAG_RW, &ipiq_fifofull, 0, "");
 108 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_avoided, CTLFLAG_RW, &ipiq_avoided, 0, "");
 109 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_passive, CTLFLAG_RW, &ipiq_passive, 0, "");
 110 SYSCTL_QUAD(_lwkt, OID_AUTO, ipiq_cscount, CTLFLAG_RW, &ipiq_cscount, 0, "");
 111 SYSCTL_INT(_lwkt, OID_AUTO, ipiq_optimized, CTLFLAG_RW, &ipiq_optimized, 0, "");
 112 #endif
 113
 114 #endif
 115
 116 #ifdef SMP
 117
 118 static int lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame);
 119 static void lwkt_cpusync_remote1(lwkt_cpusync_t poll);
 120 static void lwkt_cpusync_remote2(lwkt_cpusync_t poll);
 121
 122 /*
 123  * Send a function execution request to another cpu.  The request is queued
 124  * on the cpu<->cpu ipiq matrix.  Each cpu owns a unique ipiq FIFO for every
 125  * possible target cpu.  The FIFO can be written.
 126  *
 127  * If the FIFO fills up we have to enable interrupts to avoid an APIC
 128  * deadlock and process pending IPIQs while waiting for it to empty.
 129  * Otherwise we may soft-deadlock with another cpu whos FIFO is also full.
 130  *
 131  * We can safely bump gd_intr_nesting_level because our crit_exit() at the
 132  * end will take care of any pending interrupts.
 133  *
 134  * The actual hardware IPI is avoided if the target cpu is already processing
 135  * the queue from a prior IPI.  It is possible to pipeline IPI messages
 136  * very quickly between cpus due to the FIFO hysteresis.
 137  *
 138  * Need not be called from a critical section.
 139  */
 140 int
 141 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
 142 {
 143     lwkt_ipiq_t ip;
 144     int windex;
 145     struct globaldata *gd = mycpu;
 146
 147     if (target == gd) {
 148         func(arg);
 149         return(0);
 150     }
 151     crit_enter();
 152     ++gd->gd_intr_nesting_level;
 153 #ifdef INVARIANTS
 154     if (gd->gd_intr_nesting_level > 20)
 155         panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
 156 #endif
 157     KKASSERT(curthread->td_pri >= TDPRI_CRIT);
 158     ++ipiq_count;
 159     ip = &gd->gd_ipiq[target->gd_cpuid];
 160
 161     /*
 162      * Do not allow the FIFO to become full.  Interrupts must be physically
 163      * enabled while we liveloop to avoid deadlocking the APIC.
 164      */
 165     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
 166         unsigned int eflags = read_eflags();
 167
 168         if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0)
 169             cpu_send_ipiq(target->gd_cpuid);
 170         cpu_enable_intr();
 171         ++ipiq_fifofull;
 172         while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
 173             KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
 174             lwkt_process_ipiq();
 175         }
 176         write_eflags(eflags);
 177     }
 178
 179     /*
 180      * Queue the new message
 181      */
 182     windex = ip->ip_windex & MAXCPUFIFO_MASK;
 183     ip->ip_func[windex] = (ipifunc2_t)func;
 184     ip->ip_arg[windex] = arg;
 185     cpu_mb1();
 186     ++ip->ip_windex;
 187     --gd->gd_intr_nesting_level;
 188
 189     /*
 190      * signal the target cpu that there is work pending.
 191      */
 192     if (atomic_poll_acquire_int(&ip->ip_npoll)) {
 193         cpu_send_ipiq(target->gd_cpuid);
 194     } else {
 195         if (ipiq_optimized == 0)
 196             cpu_send_ipiq(target->gd_cpuid);
 197         ++ipiq_avoided;
 198     }
 199     crit_exit();
 200     return(ip->ip_windex);
 201 }
 202
 203 /*
 204  * Similar to lwkt_send_ipiq() but this function does not actually initiate
 205  * the IPI to the target cpu unless the FIFO has become too full, so it is
 206  * very fast.
 207  *
 208  * This function is used for non-critical IPI messages, such as memory
 209  * deallocations.  The queue will typically be flushed by the target cpu at
 210  * the next clock interrupt.
 211  *
 212  * Need not be called from a critical section.
 213  */
 214 int
 215 lwkt_send_ipiq_passive(globaldata_t target, ipifunc_t func, void *arg)
 216 {
 217     lwkt_ipiq_t ip;
 218     int windex;
 219     struct globaldata *gd = mycpu;
 220
 221     KKASSERT(target != gd);
 222     crit_enter();
 223     ++gd->gd_intr_nesting_level;
 224 #ifdef INVARIANTS
 225     if (gd->gd_intr_nesting_level > 20)
 226         panic("lwkt_send_ipiq: TOO HEAVILY NESTED!");
 227 #endif
 228     KKASSERT(curthread->td_pri >= TDPRI_CRIT);
 229     ++ipiq_count;
 230     ++ipiq_passive;
 231     ip = &gd->gd_ipiq[target->gd_cpuid];
 232
 233     /*
 234      * Do not allow the FIFO to become full.  Interrupts must be physically
 235      * enabled while we liveloop to avoid deadlocking the APIC.
 236      */
 237     if (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 2) {
 238         unsigned int eflags = read_eflags();
 239
 240         if (atomic_poll_acquire_int(&ip->ip_npoll) || ipiq_optimized == 0)
 241             cpu_send_ipiq(target->gd_cpuid);
 242         cpu_enable_intr();
 243         ++ipiq_fifofull;
 244         while (ip->ip_windex - ip->ip_rindex > MAXCPUFIFO / 4) {
 245             KKASSERT(ip->ip_windex - ip->ip_rindex != MAXCPUFIFO - 1);
 246             lwkt_process_ipiq();
 247         }
 248         write_eflags(eflags);
 249     }
 250
 251     /*
 252      * Queue the new message
 253      */
 254     windex = ip->ip_windex & MAXCPUFIFO_MASK;
 255     ip->ip_func[windex] = (ipifunc2_t)func;
 256     ip->ip_arg[windex] = arg;
 257     cpu_mb1();
 258     ++ip->ip_windex;
 259     --gd->gd_intr_nesting_level;
 260
 261     /*
 262      * Do not signal the target cpu, it will pick up the IPI when it next
 263      * polls (typically on the next tick).
 264      */
 265     crit_exit();
 266     return(ip->ip_windex);
 267 }
 268
 269 /*
 270  * Send an IPI request without blocking, return 0 on success, ENOENT on
 271  * failure.  The actual queueing of the hardware IPI may still force us
 272  * to spin and process incoming IPIs but that will eventually go away
 273  * when we've gotten rid of the other general IPIs.
 274  */
 275 int
 276 lwkt_send_ipiq_nowait(globaldata_t target, ipifunc_t func, void *arg)
 277 {
 278     lwkt_ipiq_t ip;
 279     int windex;
 280     struct globaldata *gd = mycpu;
 281
 282     KKASSERT(curthread->td_pri >= TDPRI_CRIT);
 283     if (target == gd) {
 284         func(arg);
 285         return(0);
 286     }
 287     ++ipiq_count;
 288     ip = &gd->gd_ipiq[target->gd_cpuid];
 289
 290     if (ip->ip_windex - ip->ip_rindex >= MAXCPUFIFO * 2 / 3)
 291         return(ENOENT);
 292     windex = ip->ip_windex & MAXCPUFIFO_MASK;
 293     ip->ip_func[windex] = (ipifunc2_t)func;
 294     ip->ip_arg[windex] = arg;
 295     cpu_mb1();
 296     ++ip->ip_windex;
 297
 298     /*
 299      * This isn't a passive IPI, we still have to signal the target cpu.
 300      */
 301     if (atomic_poll_acquire_int(&ip->ip_npoll)) {
 302         cpu_send_ipiq(target->gd_cpuid);
 303     } else {
 304         if (ipiq_optimized == 0)
 305             cpu_send_ipiq(target->gd_cpuid);
 306         ++ipiq_avoided;
 307     }
 308     return(0);
 309 }
 310
 311 /*
 312  * deprecated, used only by fast int forwarding.
 313  */
 314 int
 315 lwkt_send_ipiq_bycpu(int dcpu, ipifunc_t func, void *arg)
 316 {
 317     return(lwkt_send_ipiq(globaldata_find(dcpu), func, arg));
 318 }
 319
 320 /*
 321  * Send a message to several target cpus.  Typically used for scheduling.
 322  * The message will not be sent to stopped cpus.
 323  */
 324 int
 325 lwkt_send_ipiq_mask(u_int32_t mask, ipifunc_t func, void *arg)
 326 {
 327     int cpuid;
 328     int count = 0;
 329
 330     mask &= ~stopped_cpus;
 331     while (mask) {
 332         cpuid = bsfl(mask);
 333         lwkt_send_ipiq(globaldata_find(cpuid), func, arg);
 334         mask &= ~(1 << cpuid);
 335         ++count;
 336     }
 337     return(count);
 338 }
 339
 340 /*
 341  * Wait for the remote cpu to finish processing a function.
 342  *
 343  * YYY we have to enable interrupts and process the IPIQ while waiting
 344  * for it to empty or we may deadlock with another cpu.  Create a CPU_*()
 345  * function to do this!  YYY we really should 'block' here.
 346  *
 347  * MUST be called from a critical section.  This routine may be called
 348  * from an interrupt (for example, if an interrupt wakes a foreign thread
 349  * up).
 350  */
 351 void
 352 lwkt_wait_ipiq(globaldata_t target, int seq)
 353 {
 354     lwkt_ipiq_t ip;
 355     int maxc = 100000000;
 356
 357     if (target != mycpu) {
 358         ip = &mycpu->gd_ipiq[target->gd_cpuid];
 359         if ((int)(ip->ip_xindex - seq) < 0) {
 360             unsigned int eflags = read_eflags();
 361             cpu_enable_intr();
 362             while ((int)(ip->ip_xindex - seq) < 0) {
 363                 crit_enter();
 364                 lwkt_process_ipiq();
 365                 crit_exit();
 366                 if (--maxc == 0)
 367                         printf("LWKT_WAIT_IPIQ WARNING! %d wait %d (%d)\n", mycpu->gd_cpuid, target->gd_cpuid, ip->ip_xindex - seq);
 368                 if (maxc < -1000000)
 369                         panic("LWKT_WAIT_IPIQ");
 370             }
 371             write_eflags(eflags);
 372         }
 373     }
 374 }
 375
 376 int
 377 lwkt_seq_ipiq(globaldata_t target)
 378 {
 379     lwkt_ipiq_t ip;
 380
 381     ip = &mycpu->gd_ipiq[target->gd_cpuid];
 382     return(ip->ip_windex);
 383 }
 384
 385 /*
 386  * Called from IPI interrupt (like a fast interrupt), which has placed
 387  * us in a critical section.  The MP lock may or may not be held.
 388  * May also be called from doreti or splz, or be reentrantly called
 389  * indirectly through the ip_func[] we run.
 390  *
 391  * There are two versions, one where no interrupt frame is available (when
 392  * called from the send code and from splz, and one where an interrupt
 393  * frame is available.
 394  */
 395 void
 396 lwkt_process_ipiq(void)
 397 {
 398     globaldata_t gd = mycpu;
 399     lwkt_ipiq_t ip;
 400     int n;
 401
 402 again:
 403     for (n = 0; n < ncpus; ++n) {
 404         if (n != gd->gd_cpuid) {
 405             ip = globaldata_find(n)->gd_ipiq;
 406             if (ip != NULL) {
 407                 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], NULL))
 408                     ;
 409             }
 410         }
 411     }
 412     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
 413         if (lwkt_process_ipiq1(&gd->gd_cpusyncq, NULL)) {
 414             if (gd->gd_curthread->td_cscount == 0)
 415                 goto again;
 416             need_ipiq();
 417         }
 418     }
 419 }
 420
 421 #ifdef _KERNEL
 422 void
 423 lwkt_process_ipiq_frame(struct intrframe frame)
 424 {
 425     globaldata_t gd = mycpu;
 426     lwkt_ipiq_t ip;
 427     int n;
 428
 429 again:
 430     for (n = 0; n < ncpus; ++n) {
 431         if (n != gd->gd_cpuid) {
 432             ip = globaldata_find(n)->gd_ipiq;
 433             if (ip != NULL) {
 434                 while (lwkt_process_ipiq1(&ip[gd->gd_cpuid], &frame))
 435                     ;
 436             }
 437         }
 438     }
 439     if (gd->gd_cpusyncq.ip_rindex != gd->gd_cpusyncq.ip_windex) {
 440         if (lwkt_process_ipiq1(&gd->gd_cpusyncq, &frame)) {
 441             if (gd->gd_curthread->td_cscount == 0)
 442                 goto again;
 443             need_ipiq();
 444         }
 445     }
 446 }
 447 #endif
 448
 449 static int
 450 lwkt_process_ipiq1(lwkt_ipiq_t ip, struct intrframe *frame)
 451 {
 452     int ri;
 453     int wi = ip->ip_windex;
 454     /*
 455      * Note: xindex is only updated after we are sure the function has
 456      * finished execution.  Beware lwkt_process_ipiq() reentrancy!  The
 457      * function may send an IPI which may block/drain.
 458      */
 459     while ((ri = ip->ip_rindex) != wi) {
 460         ip->ip_rindex = ri + 1;
 461         ri &= MAXCPUFIFO_MASK;
 462         ip->ip_func[ri](ip->ip_arg[ri], frame);
 463         /* YYY memory barrier */
 464         ip->ip_xindex = ip->ip_rindex;
 465     }
 466
 467     /*
 468      * Return non-zero if there are more IPI messages pending on this
 469      * ipiq.  ip_npoll is left set as long as possible to reduce the
 470      * number of IPIs queued by the originating cpu, but must be cleared
 471      * *BEFORE* checking windex.
 472      */
 473     atomic_poll_release_int(&ip->ip_npoll);
 474     return(wi != ip->ip_windex);
 475 }
 476
 477 #else
 478
 479 /*
 480  * !SMP dummy routines
 481  */
 482
 483 int
 484 lwkt_send_ipiq(globaldata_t target, ipifunc_t func, void *arg)
 485 {
 486     panic("lwkt_send_ipiq: UP box! (%d,%p,%p)", target->gd_cpuid, func, arg);
 487     return(0); /* NOT REACHED */
 488 }
 489
 490 void
 491 lwkt_wait_ipiq(globaldata_t target, int seq)
 492 {
 493     panic("lwkt_wait_ipiq: UP box! (%d,%d)", target->gd_cpuid, seq);
 494 }
 495
 496 #endif
 497
 498 /*
 499  * CPU Synchronization Support
 500  *
 501  * lwkt_cpusync_simple()
 502  *
 503  *      The function is executed synchronously before return on remote cpus.
 504  *      A lwkt_cpusync_t pointer is passed as an argument.  The data can
 505  *      be accessed via arg->cs_data.
 506  *
 507  *      XXX should I just pass the data as an argument to be consistent?
 508  */
 509
 510 void
 511 lwkt_cpusync_simple(cpumask_t mask, cpusync_func_t func, void *data)
 512 {
 513     struct lwkt_cpusync cmd;
 514
 515     cmd.cs_run_func = NULL;
 516     cmd.cs_fin1_func = func;
 517     cmd.cs_fin2_func = NULL;
 518     cmd.cs_data = data;
 519     lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
 520     if (mask & (1 << mycpu->gd_cpuid))
 521         func(&cmd);
 522     lwkt_cpusync_finish(&cmd);
 523 }
 524
 525 /*
 526  * lwkt_cpusync_fastdata()
 527  *
 528  *      The function is executed in tandem with return on remote cpus.
 529  *      The data is directly passed as an argument.  Do not pass pointers to
 530  *      temporary storage as the storage might have
 531  *      gone poof by the time the target cpu executes
 532  *      the function.
 533  *
 534  *      At the moment lwkt_cpusync is declared on the stack and we must wait
 535  *      for all remote cpus to ack in lwkt_cpusync_finish(), but as a future
 536  *      optimization we should be able to put a counter in the globaldata
 537  *      structure (if it is not otherwise being used) and just poke it and
 538  *      return without waiting. XXX
 539  */
 540 void
 541 lwkt_cpusync_fastdata(cpumask_t mask, cpusync_func2_t func, void *data)
 542 {
 543     struct lwkt_cpusync cmd;
 544
 545     cmd.cs_run_func = NULL;
 546     cmd.cs_fin1_func = NULL;
 547     cmd.cs_fin2_func = func;
 548     cmd.cs_data = NULL;
 549     lwkt_cpusync_start(mask & mycpu->gd_other_cpus, &cmd);
 550     if (mask & (1 << mycpu->gd_cpuid))
 551         func(data);
 552     lwkt_cpusync_finish(&cmd);
 553 }
 554
 555 /*
 556  * lwkt_cpusync_start()
 557  *
 558  *      Start synchronization with a set of target cpus, return once they are
 559  *      known to be in a synchronization loop.  The target cpus will execute
 560  *      poll->cs_run_func() IN TANDEM WITH THE RETURN.
 561  *
 562  *      XXX future: add lwkt_cpusync_start_quick() and require a call to
 563  *      lwkt_cpusync_add() or lwkt_cpusync_wait(), allowing the caller to
 564  *      potentially absorb the IPI latency doing something useful.
 565  */
 566 void
 567 lwkt_cpusync_start(cpumask_t mask, lwkt_cpusync_t poll)
 568 {
 569     globaldata_t gd = mycpu;
 570
 571     poll->cs_count = 0;
 572     poll->cs_mask = mask;
 573 #ifdef SMP
 574     poll->cs_maxcount = lwkt_send_ipiq_mask(
 575                 mask & gd->gd_other_cpus & smp_active_mask,
 576                 (ipifunc_t)lwkt_cpusync_remote1, poll);
 577 #endif
 578     if (mask & gd->gd_cpumask) {
 579         if (poll->cs_run_func)
 580             poll->cs_run_func(poll);
 581     }
 582 #ifdef SMP
 583     if (poll->cs_maxcount) {
 584         ++ipiq_cscount;
 585         ++gd->gd_curthread->td_cscount;
 586         while (poll->cs_count != poll->cs_maxcount) {
 587             crit_enter();
 588             lwkt_process_ipiq();
 589             crit_exit();
 590         }
 591     }
 592 #endif
 593 }
 594
 595 void
 596 lwkt_cpusync_add(cpumask_t mask, lwkt_cpusync_t poll)
 597 {
 598     globaldata_t gd = mycpu;
 599 #ifdef SMP
 600     int count;
 601 #endif
 602
 603     mask &= ~poll->cs_mask;
 604     poll->cs_mask |= mask;
 605 #ifdef SMP
 606     count = lwkt_send_ipiq_mask(
 607                 mask & gd->gd_other_cpus & smp_active_mask,
 608                 (ipifunc_t)lwkt_cpusync_remote1, poll);
 609 #endif
 610     if (mask & gd->gd_cpumask) {
 611         if (poll->cs_run_func)
 612             poll->cs_run_func(poll);
 613     }
 614 #ifdef SMP
 615     poll->cs_maxcount += count;
 616     if (poll->cs_maxcount) {
 617         if (poll->cs_maxcount == count)
 618             ++gd->gd_curthread->td_cscount;
 619         while (poll->cs_count != poll->cs_maxcount) {
 620             crit_enter();
 621             lwkt_process_ipiq();
 622             crit_exit();
 623         }
 624     }
 625 #endif
 626 }
 627
 628 /*
 629  * Finish synchronization with a set of target cpus.  The target cpus will
 630  * execute cs_fin1_func(poll) prior to this function returning, and will
 631  * execute cs_fin2_func(data) IN TANDEM WITH THIS FUNCTION'S RETURN.
 632  *
 633  * If cs_maxcount is non-zero then we are mastering a cpusync with one or
 634  * more remote cpus and must account for it in our thread structure.
 635  */
 636 void
 637 lwkt_cpusync_finish(lwkt_cpusync_t poll)
 638 {
 639     globaldata_t gd = mycpu;
 640
 641     poll->cs_count = -1;
 642     if (poll->cs_mask & gd->gd_cpumask) {
 643         if (poll->cs_fin1_func)
 644             poll->cs_fin1_func(poll);
 645         if (poll->cs_fin2_func)
 646             poll->cs_fin2_func(poll->cs_data);
 647     }
 648 #ifdef SMP
 649     if (poll->cs_maxcount) {
 650         while (poll->cs_count != -(poll->cs_maxcount + 1)) {
 651             crit_enter();
 652             lwkt_process_ipiq();
 653             crit_exit();
 654         }
 655         --gd->gd_curthread->td_cscount;
 656     }
 657 #endif
 658 }
 659
 660 #ifdef SMP
 661
 662 /*
 663  * helper IPI remote messaging function.
 664  *
 665  * Called on remote cpu when a new cpu synchronization request has been
 666  * sent to us.  Execute the run function and adjust cs_count, then requeue
 667  * the request so we spin on it.
 668  */
 669 static void
 670 lwkt_cpusync_remote1(lwkt_cpusync_t poll)
 671 {
 672     atomic_add_int(&poll->cs_count, 1);
 673     if (poll->cs_run_func)
 674         poll->cs_run_func(poll);
 675     lwkt_cpusync_remote2(poll);
 676 }
 677
 678 /*
 679  * helper IPI remote messaging function.
 680  *
 681  * Poll for the originator telling us to finish.  If it hasn't, requeue
 682  * our request so we spin on it.  When the originator requests that we
 683  * finish we execute cs_fin1_func(poll) synchronously and cs_fin2_func(data)
 684  * in tandem with the release.
 685  */
 686 static void
 687 lwkt_cpusync_remote2(lwkt_cpusync_t poll)
 688 {
 689     if (poll->cs_count < 0) {
 690         cpusync_func2_t savef;
 691         void *saved;
 692
 693         if (poll->cs_fin1_func)
 694             poll->cs_fin1_func(poll);
 695         if (poll->cs_fin2_func) {
 696             savef = poll->cs_fin2_func;
 697             saved = poll->cs_data;
 698             atomic_add_int(&poll->cs_count, -1);
 699             savef(saved);
 700         } else {
 701             atomic_add_int(&poll->cs_count, -1);
 702         }
 703     } else {
 704         globaldata_t gd = mycpu;
 705         lwkt_ipiq_t ip;
 706         int wi;
 707
 708         ip = &gd->gd_cpusyncq;
 709         wi = ip->ip_windex & MAXCPUFIFO_MASK;
 710         ip->ip_func[wi] = (ipifunc2_t)lwkt_cpusync_remote2;
 711         ip->ip_arg[wi] = poll;
 712         cpu_mb1();
 713         ++ip->ip_windex;
 714     }
 715 }
 716
 717 #endif