sys/platform/pc64/x86_64/pmap_inval.c

   1 /*
   2  * Copyright (c) 2003-2011 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 /*
  36  * pmap invalidation support code.  Certain hardware requirements must
  37  * be dealt with when manipulating page table entries and page directory
  38  * entries within a pmap.  In particular, we cannot safely manipulate
  39  * page tables which are in active use by another cpu (even if it is
  40  * running in userland) for two reasons: First, TLB writebacks will
  41  * race against our own modifications and tests.  Second, even if we
  42  * were to use bus-locked instruction we can still screw up the
  43  * target cpu's instruction pipeline due to Intel cpu errata.
  44  */
  45
  46 #include <sys/param.h>
  47 #include <sys/systm.h>
  48 #include <sys/kernel.h>
  49 #include <sys/proc.h>
  50 #include <sys/vmmeter.h>
  51 #include <sys/thread2.h>
  52 #include <sys/sysctl.h>
  53
  54 #include <vm/vm.h>
  55 #include <vm/pmap.h>
  56 #include <vm/vm_object.h>
  57
  58 #include <machine/cputypes.h>
  59 #include <machine/md_var.h>
  60 #include <machine/specialreg.h>
  61 #include <machine/smp.h>
  62 #include <machine/globaldata.h>
  63 #include <machine/pmap.h>
  64 #include <machine/pmap_inval.h>
  65
  66 #if 1   /* DEBUGGING */
  67 #define LOOPMASK        (/* 32 * */ 16 * 128 * 1024 - 1)
  68 #endif
  69
  70 #define MAX_INVAL_PAGES         128
  71
  72 struct pmap_inval_info {
  73         vm_offset_t     va;
  74         pt_entry_t      *ptep;
  75         pt_entry_t      opte;
  76         pt_entry_t      npte;
  77         enum { INVDONE, INVSTORE, INVCMPSET } mode;
  78         int             success;
  79         int             npgs;
  80         cpumask_t       done;
  81         cpumask_t       mask;
  82 #ifdef LOOPMASK
  83         cpumask_t       sigmask;
  84         int             failed;
  85         int             xloops;
  86 #endif
  87 } __cachealign;
  88
  89 typedef struct pmap_inval_info pmap_inval_info_t;
  90
  91 static pmap_inval_info_t        invinfo[MAXCPU];
  92 extern cpumask_t                smp_invmask;
  93 #ifdef LOOPMASK
  94 #ifdef LOOPMASK_IN
  95 extern cpumask_t                smp_in_mask;
  96 #endif
  97 extern cpumask_t                smp_smurf_mask;
  98 #endif
  99 static long pmap_inval_bulk_count;
 100
 101 SYSCTL_LONG(_machdep, OID_AUTO, pmap_inval_bulk_count, CTLFLAG_RW,
 102             &pmap_inval_bulk_count, 0, "");
 103
 104 static void
 105 pmap_inval_init(pmap_t pmap)
 106 {
 107         cpulock_t olock;
 108         cpulock_t nlock;
 109
 110         crit_enter_id("inval");
 111
 112         if (pmap != &kernel_pmap) {
 113                 for (;;) {
 114                         olock = pmap->pm_active_lock;
 115                         cpu_ccfence();
 116                         nlock = olock | CPULOCK_EXCL;
 117                         if (olock != nlock &&
 118                             atomic_cmpset_int(&pmap->pm_active_lock,
 119                                               olock, nlock)) {
 120                                 break;
 121                         }
 122                         lwkt_process_ipiq();
 123                         cpu_pause();
 124                 }
 125                 atomic_add_acq_long(&pmap->pm_invgen, 1);
 126         }
 127 }
 128
 129 static void
 130 pmap_inval_done(pmap_t pmap)
 131 {
 132         if (pmap != &kernel_pmap) {
 133                 atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL);
 134                 atomic_add_acq_long(&pmap->pm_invgen, 1);
 135         }
 136         crit_exit_id("inval");
 137 }
 138
 139 /*
 140  * API function - invalidation the pte at (va) and replace *ptep with
 141  * npte atomically across the pmap's active cpus.
 142  *
 143  * This is a holy mess.
 144  *
 145  * Returns the previous contents of *ptep.
 146  */
 147 static
 148 void
 149 loopdebug(const char *msg, pmap_inval_info_t *info)
 150 {
 151         int p;
 152         int cpu = mycpu->gd_cpuid;
 153
 154         cpu_lfence();
 155         atomic_add_long(&smp_smurf_mask.ary[0], 0);
 156         kprintf("%s %d mode=%d m=%08jx d=%08jx s=%08jx "
 157 #ifdef LOOPMASK_IN
 158                 "in=%08jx "
 159 #endif
 160                 "smurf=%08jx\n",
 161                 msg, cpu, info->mode,
 162                 info->mask.ary[0],
 163                 info->done.ary[0],
 164                 info->sigmask.ary[0],
 165 #ifdef LOOPMASK_IN
 166                 smp_in_mask.ary[0],
 167 #endif
 168                 smp_smurf_mask.ary[0]);
 169         kprintf("mdglob ");
 170         for (p = 0; p < ncpus; ++p)
 171                 kprintf(" %d", CPU_prvspace[p]->mdglobaldata.gd_xinvaltlb);
 172         kprintf("\n");
 173 }
 174
 175 #ifdef CHECKSIG
 176
 177 #define CHECKSIGMASK(info)      _checksigmask(info, __FILE__, __LINE__)
 178
 179 static
 180 void
 181 _checksigmask(pmap_inval_info_t *info, const char *file, int line)
 182 {
 183         cpumask_t tmp;
 184
 185         tmp = info->mask;
 186         CPUMASK_ANDMASK(tmp, info->sigmask);
 187         if (CPUMASK_CMPMASKNEQ(tmp, info->mask)) {
 188                 kprintf("\"%s\" line %d: bad sig/mask %08jx %08jx\n",
 189                         file, line, info->sigmask.ary[0], info->mask.ary[0]);
 190         }
 191 }
 192
 193 #else
 194
 195 #define CHECKSIGMASK(info)
 196
 197 #endif
 198
 199 /*
 200  * Invalidate the specified va across all cpus associated with the pmap.
 201  * If va == (vm_offset_t)-1, we invltlb() instead of invlpg().  The operation
 202  * will be done fully synchronously with storing npte into *ptep and returning
 203  * opte.
 204  *
 205  * If ptep is NULL the operation will execute semi-synchronously.
 206  * ptep must be NULL if npgs > 1
 207  */
 208 pt_entry_t
 209 pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs,
 210                pt_entry_t *ptep, pt_entry_t npte)
 211 {
 212         globaldata_t gd = mycpu;
 213         pmap_inval_info_t *info;
 214         pt_entry_t opte = 0;
 215         int cpu = gd->gd_cpuid;
 216         cpumask_t tmpmask;
 217         unsigned long rflags;
 218
 219         /*
 220          * Initialize invalidation for pmap and enter critical section.
 221          */
 222         if (pmap == NULL)
 223                 pmap = &kernel_pmap;
 224         pmap_inval_init(pmap);
 225
 226         /*
 227          * Shortcut single-cpu case if possible.
 228          */
 229         if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) {
 230                 /*
 231                  * Convert to invltlb if there are too many pages to
 232                  * invlpg on.
 233                  */
 234                 if (npgs > MAX_INVAL_PAGES) {
 235                         npgs = 0;
 236                         va = (vm_offset_t)-1;
 237                 }
 238
 239                 /*
 240                  * Invalidate the specified pages, handle invltlb if requested.
 241                  */
 242                 while (npgs) {
 243                         --npgs;
 244                         if (ptep) {
 245                                 opte = atomic_swap_long(ptep, npte);
 246                                 ++ptep;
 247                         }
 248                         if (va == (vm_offset_t)-1)
 249                                 break;
 250                         cpu_invlpg((void *)va);
 251                         va += PAGE_SIZE;
 252                 }
 253                 if (va == (vm_offset_t)-1)
 254                         cpu_invltlb();
 255                 pmap_inval_done(pmap);
 256
 257                 return opte;
 258         }
 259
 260         /*
 261          * We need a critical section to prevent getting preempted while
 262          * we setup our command.  A preemption might execute its own
 263          * pmap_inval*() command and create confusion below.
 264          */
 265         info = &invinfo[cpu];
 266
 267         /*
 268          * We must wait for other cpus which may still be finishing up a
 269          * prior operation that we requested.
 270          *
 271          * We do not have to disable interrupts here.  An Xinvltlb can occur
 272          * at any time (even within a critical section), but it will not
 273          * act on our command until we set our done bits.
 274          */
 275         while (CPUMASK_TESTNZERO(info->done)) {
 276 #ifdef LOOPMASK
 277                 int loops;
 278
 279                 loops = ++info->xloops;
 280                 if ((loops & LOOPMASK) == 0) {
 281                         info->failed = 1;
 282                         loopdebug("orig_waitA", info);
 283                         /* XXX recover from possible bug */
 284                         CPUMASK_ASSZERO(info->done);
 285                 }
 286 #endif
 287                 cpu_pause();
 288         }
 289         KKASSERT(info->mode == INVDONE);
 290
 291         /*
 292          * Must set our cpu in the invalidation scan mask before
 293          * any possibility of [partial] execution (remember, XINVLTLB
 294          * can interrupt a critical section).
 295          */
 296         ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);
 297
 298         info->va = va;
 299         info->npgs = npgs;
 300         info->ptep = ptep;
 301         info->npte = npte;
 302         info->opte = 0;
 303 #ifdef LOOPMASK
 304         info->failed = 0;
 305 #endif
 306         info->mode = INVSTORE;
 307
 308         tmpmask = pmap->pm_active;      /* volatile (bits may be cleared) */
 309         cpu_ccfence();
 310         CPUMASK_ANDMASK(tmpmask, smp_active_mask);
 311
 312         /*
 313          * If ptep is NULL the operation can be semi-synchronous, which means
 314          * we can improve performance by flagging and removing idle cpus
 315          * (see the idleinvlclr function in mp_machdep.c).
 316          *
 317          * Typically kernel page table operation is semi-synchronous.
 318          */
 319         if (ptep == NULL)
 320                 smp_smurf_idleinvlclr(&tmpmask);
 321         CPUMASK_ORBIT(tmpmask, cpu);
 322         info->mask = tmpmask;
 323
 324         /*
 325          * Command may start executing the moment 'done' is initialized,
 326          * disable current cpu interrupt to prevent 'done' field from
 327          * changing (other cpus can't clear done bits until the originating
 328          * cpu clears its mask bit, but other cpus CAN start clearing their
 329          * mask bits).
 330          */
 331 #ifdef LOOPMASK
 332         info->sigmask = tmpmask;
 333         CHECKSIGMASK(info);
 334 #endif
 335         cpu_sfence();
 336         rflags = read_rflags();
 337         cpu_disable_intr();
 338
 339         ATOMIC_CPUMASK_COPY(info->done, tmpmask);
 340         /* execution can begin here due to races */
 341
 342         /*
 343          * Pass our copy of the done bits (so they don't change out from
 344          * under us) to generate the Xinvltlb interrupt on the targets.
 345          */
 346         smp_invlpg(&tmpmask);
 347         opte = info->opte;
 348         KKASSERT(info->mode == INVDONE);
 349
 350         /*
 351          * Target cpus will be in their loop exiting concurrently with our
 352          * cleanup.  They will not lose the bitmask they obtained before so
 353          * we can safely clear this bit.
 354          */
 355         ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
 356         write_rflags(rflags);
 357         pmap_inval_done(pmap);
 358
 359         return opte;
 360 }
 361
 362 /*
 363  * API function - invalidate the pte at (va) and replace *ptep with npte
 364  * atomically only if *ptep equals opte, across the pmap's active cpus.
 365  *
 366  * Returns 1 on success, 0 on failure (caller typically retries).
 367  */
 368 int
 369 pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep,
 370                       pt_entry_t opte, pt_entry_t npte)
 371 {
 372         globaldata_t gd = mycpu;
 373         pmap_inval_info_t *info;
 374         int success;
 375         int cpu = gd->gd_cpuid;
 376         cpumask_t tmpmask;
 377         unsigned long rflags;
 378
 379         /*
 380          * Initialize invalidation for pmap and enter critical section.
 381          */
 382         if (pmap == NULL)
 383                 pmap = &kernel_pmap;
 384         pmap_inval_init(pmap);
 385
 386         /*
 387          * Shortcut single-cpu case if possible.
 388          */
 389         if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) {
 390                 if (atomic_cmpset_long(ptep, opte, npte)) {
 391                         if (va == (vm_offset_t)-1)
 392                                 cpu_invltlb();
 393                         else
 394                                 cpu_invlpg((void *)va);
 395                         pmap_inval_done(pmap);
 396                         return 1;
 397                 } else {
 398                         pmap_inval_done(pmap);
 399                         return 0;
 400                 }
 401         }
 402
 403         /*
 404          * We need a critical section to prevent getting preempted while
 405          * we setup our command.  A preemption might execute its own
 406          * pmap_inval*() command and create confusion below.
 407          */
 408         info = &invinfo[cpu];
 409
 410         /*
 411          * We must wait for other cpus which may still be finishing
 412          * up a prior operation.
 413          */
 414         while (CPUMASK_TESTNZERO(info->done)) {
 415 #ifdef LOOPMASK
 416                 int loops;
 417
 418                 loops = ++info->xloops;
 419                 if ((loops & LOOPMASK) == 0) {
 420                         info->failed = 1;
 421                         loopdebug("orig_waitB", info);
 422                         /* XXX recover from possible bug */
 423                         CPUMASK_ASSZERO(info->done);
 424                 }
 425 #endif
 426                 cpu_pause();
 427         }
 428         KKASSERT(info->mode == INVDONE);
 429
 430         /*
 431          * Must set our cpu in the invalidation scan mask before
 432          * any possibility of [partial] execution (remember, XINVLTLB
 433          * can interrupt a critical section).
 434          */
 435         ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);
 436
 437         info->va = va;
 438         info->npgs = 1;                 /* unused */
 439         info->ptep = ptep;
 440         info->npte = npte;
 441         info->opte = opte;
 442         info->failed = 0;
 443         info->mode = INVCMPSET;
 444         info->success = 0;
 445
 446         tmpmask = pmap->pm_active;      /* volatile */
 447         cpu_ccfence();
 448         CPUMASK_ANDMASK(tmpmask, smp_active_mask);
 449         CPUMASK_ORBIT(tmpmask, cpu);
 450         info->mask = tmpmask;
 451
 452         /*
 453          * Command may start executing the moment 'done' is initialized,
 454          * disable current cpu interrupt to prevent 'done' field from
 455          * changing (other cpus can't clear done bits until the originating
 456          * cpu clears its mask bit).
 457          */
 458 #ifdef LOOPMASK
 459         info->sigmask = tmpmask;
 460         CHECKSIGMASK(info);
 461 #endif
 462         cpu_sfence();
 463         rflags = read_rflags();
 464         cpu_disable_intr();
 465
 466         ATOMIC_CPUMASK_COPY(info->done, tmpmask);
 467
 468         /*
 469          * Pass our copy of the done bits (so they don't change out from
 470          * under us) to generate the Xinvltlb interrupt on the targets.
 471          */
 472         smp_invlpg(&tmpmask);
 473         success = info->success;
 474         KKASSERT(info->mode == INVDONE);
 475
 476         ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
 477         write_rflags(rflags);
 478         pmap_inval_done(pmap);
 479
 480         return success;
 481 }
 482
 483 void
 484 pmap_inval_bulk_init(pmap_inval_bulk_t *bulk, struct pmap *pmap)
 485 {
 486         bulk->pmap = pmap;
 487         bulk->va_beg = 0;
 488         bulk->va_end = 0;
 489         bulk->count = 0;
 490 }
 491
 492 pt_entry_t
 493 pmap_inval_bulk(pmap_inval_bulk_t *bulk, vm_offset_t va,
 494                 pt_entry_t *ptep, pt_entry_t npte)
 495 {
 496         pt_entry_t pte;
 497
 498         /*
 499          * Degenerate case, localized or we don't care (e.g. because we
 500          * are jacking the entire page table) or the pmap is not in-use
 501          * by anyone.  No invalidations are done on any cpu.
 502          */
 503         if (bulk == NULL) {
 504                 pte = atomic_swap_long(ptep, npte);
 505                 return pte;
 506         }
 507
 508         /*
 509          * If it isn't the kernel pmap we execute the operation synchronously
 510          * on all cpus belonging to the pmap, which avoids concurrency bugs in
 511          * the hw related to changing pte's out from under threads.
 512          *
 513          * Eventually I would like to implement streaming pmap invalidation
 514          * for user pmaps to reduce mmap/munmap overheads for heavily-loaded
 515          * threaded programs.
 516          */
 517         if (bulk->pmap != &kernel_pmap) {
 518                 pte = pmap_inval_smp(bulk->pmap, va, 1, ptep, npte);
 519                 return pte;
 520         }
 521
 522         /*
 523          * This is the kernel_pmap.  All unmap operations presume that there
 524          * are no other cpus accessing the addresses in question.  Implement
 525          * the bulking algorithm.  collect the required information and
 526          * synchronize once at the end.
 527          */
 528         pte = atomic_swap_long(ptep, npte);
 529         if (va == (vm_offset_t)-1) {
 530                 bulk->va_beg = va;
 531         } else if (bulk->va_beg == bulk->va_end) {
 532                 bulk->va_beg = va;
 533                 bulk->va_end = va + PAGE_SIZE;
 534         } else if (va == bulk->va_end) {
 535                 bulk->va_end = va + PAGE_SIZE;
 536         } else {
 537                 bulk->va_beg = (vm_offset_t)-1;
 538                 bulk->va_end = 0;
 539 #if 0
 540                 pmap_inval_bulk_flush(bulk);
 541                 bulk->count = 1;
 542                 if (va == (vm_offset_t)-1) {
 543                         bulk->va_beg = va;
 544                         bulk->va_end = 0;
 545                 } else {
 546                         bulk->va_beg = va;
 547                         bulk->va_end = va + PAGE_SIZE;
 548                 }
 549 #endif
 550         }
 551         ++bulk->count;
 552
 553         return pte;
 554 }
 555
 556 void
 557 pmap_inval_bulk_flush(pmap_inval_bulk_t *bulk)
 558 {
 559         if (bulk == NULL)
 560                 return;
 561         if (bulk->count > 0)
 562                 pmap_inval_bulk_count += (bulk->count - 1);
 563         if (bulk->va_beg != bulk->va_end) {
 564                 if (bulk->va_beg == (vm_offset_t)-1) {
 565                         pmap_inval_smp(bulk->pmap, bulk->va_beg, 1, NULL, 0);
 566                 } else {
 567                         long n;
 568
 569                         n = (bulk->va_end - bulk->va_beg) >> PAGE_SHIFT;
 570                         pmap_inval_smp(bulk->pmap, bulk->va_beg, n, NULL, 0);
 571                 }
 572         }
 573         bulk->va_beg = 0;
 574         bulk->va_end = 0;
 575         bulk->count = 0;
 576 }
 577
 578 /*
 579  * Called with a critical section held and interrupts enabled.
 580  */
 581 int
 582 pmap_inval_intr(cpumask_t *cpumaskp, int toolong)
 583 {
 584         globaldata_t gd = mycpu;
 585         pmap_inval_info_t *info;
 586         int loopme = 0;
 587         int cpu;
 588         cpumask_t cpumask;
 589 #ifdef LOOPMASK
 590         int loops;
 591 #endif
 592
 593         /*
 594          * Check all cpus for invalidations we may need to service.
 595          */
 596         cpu_ccfence();
 597         cpu = gd->gd_cpuid;
 598         cpumask = *cpumaskp;
 599
 600         while (CPUMASK_TESTNZERO(cpumask)) {
 601                 int n = BSFCPUMASK(cpumask);
 602
 603 #ifdef LOOPMASK
 604                 KKASSERT(n >= 0 && n < MAXCPU);
 605 #endif
 606
 607                 CPUMASK_NANDBIT(cpumask, n);
 608                 info = &invinfo[n];
 609
 610                 /*
 611                  * Due to interrupts/races we can catch a new operation
 612                  * in an older interrupt.  A fence is needed once we detect
 613                  * the (not) done bit.
 614                  */
 615                 if (!CPUMASK_TESTBIT(info->done, cpu))
 616                         continue;
 617                 cpu_lfence();
 618 #ifdef LOOPMASK
 619                 if (toolong) {
 620                         kprintf("pminvl %d->%d %08jx %08jx mode=%d\n",
 621                                 cpu, n, info->done.ary[0], info->mask.ary[0],
 622                                 info->mode);
 623                 }
 624 #endif
 625
 626                 /*
 627                  * info->mask and info->done always contain the originating
 628                  * cpu until the originator is done.  Targets may still be
 629                  * present in info->done after the originator is done (they
 630                  * will be finishing up their loops).
 631                  *
 632                  * Clear info->mask bits on other cpus to indicate that they
 633                  * have quiesced (entered the loop).  Once the other mask bits
 634                  * are clear we can execute the operation on the original,
 635                  * then clear the mask and done bits on the originator.  The
 636                  * targets will then finish up their side and clear their
 637                  * done bits.
 638                  *
 639                  * The command is considered 100% done when all done bits have
 640                  * been cleared.
 641                  */
 642                 if (n != cpu) {
 643                         /*
 644                          * Command state machine for 'other' cpus.
 645                          */
 646                         if (CPUMASK_TESTBIT(info->mask, cpu)) {
 647                                 /*
 648                                  * Other cpu indicate to originator that they
 649                                  * are quiesced.
 650                                  */
 651                                 ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
 652                                 loopme = 1;
 653                         } else if (info->ptep &&
 654                                    CPUMASK_TESTBIT(info->mask, n)) {
 655                                 /*
 656                                  * Other cpu must wait for the originator (n)
 657                                  * to complete its command if ptep is not NULL.
 658                                  */
 659                                 loopme = 1;
 660                         } else {
 661                                 /*
 662                                  * Other cpu detects that the originator has
 663                                  * completed its command, or there was no
 664                                  * command.
 665                                  *
 666                                  * Now that the page table entry has changed,
 667                                  * we can follow up with our own invalidation.
 668                                  */
 669                                 vm_offset_t va = info->va;
 670                                 int npgs;
 671
 672                                 if (va == (vm_offset_t)-1 ||
 673                                     info->npgs > MAX_INVAL_PAGES) {
 674                                         cpu_invltlb();
 675                                 } else {
 676                                         for (npgs = info->npgs; npgs; --npgs) {
 677                                                 cpu_invlpg((void *)va);
 678                                                 va += PAGE_SIZE;
 679                                         }
 680                                 }
 681                                 ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
 682                                 /* info invalid now */
 683                                 /* loopme left alone */
 684                         }
 685                 } else if (CPUMASK_TESTBIT(info->mask, cpu)) {
 686                         /*
 687                          * Originator is waiting for other cpus
 688                          */
 689                         if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) {
 690                                 /*
 691                                  * Originator waits for other cpus to enter
 692                                  * their loop (aka quiesce).
 693                                  */
 694                                 loopme = 1;
 695 #ifdef LOOPMASK
 696                                 loops = ++info->xloops;
 697                                 if ((loops & LOOPMASK) == 0) {
 698                                         info->failed = 1;
 699                                         loopdebug("orig_waitC", info);
 700                                         /* XXX recover from possible bug */
 701                                         mdcpu->gd_xinvaltlb = 0;
 702                                         cpu_disable_intr();
 703                                         smp_invlpg(&smp_active_mask);
 704                                         cpu_enable_intr();
 705                                 }
 706 #endif
 707                         } else {
 708                                 /*
 709                                  * Originator executes operation and clears
 710                                  * mask to allow other cpus to finish.
 711                                  */
 712                                 KKASSERT(info->mode != INVDONE);
 713                                 if (info->mode == INVSTORE) {
 714                                         if (info->ptep)
 715                                                 info->opte = atomic_swap_long(info->ptep, info->npte);
 716                                         CHECKSIGMASK(info);
 717                                         ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
 718                                         CHECKSIGMASK(info);
 719                                 } else {
 720                                         if (atomic_cmpset_long(info->ptep,
 721                                                               info->opte, info->npte)) {
 722                                                 info->success = 1;
 723                                         } else {
 724                                                 info->success = 0;
 725                                         }
 726                                         CHECKSIGMASK(info);
 727                                         ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
 728                                         CHECKSIGMASK(info);
 729                                 }
 730                                 loopme = 1;
 731                         }
 732                 } else {
 733                         /*
 734                          * Originator does not have to wait for the other
 735                          * cpus to finish.  It clears its done bit.  A new
 736                          * command will not be initiated by the originator
 737                          * until the other cpus have cleared their done bits
 738                          * (asynchronously).
 739                          */
 740                         vm_offset_t va = info->va;
 741                         int npgs;
 742
 743                         if (va == (vm_offset_t)-1 ||
 744                             info->npgs > MAX_INVAL_PAGES) {
 745                                 cpu_invltlb();
 746                         } else {
 747                                 for (npgs = info->npgs; npgs; --npgs) {
 748                                         cpu_invlpg((void *)va);
 749                                         va += PAGE_SIZE;
 750                                 }
 751                         }
 752 #ifdef LOOPMASK
 753                         info->xloops = 0;
 754 #endif
 755                         /* leave loopme alone */
 756                         /* other cpus may still be finishing up */
 757                         /* can't race originator since that's us */
 758                         info->mode = INVDONE;
 759                         ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
 760                 }
 761         }
 762         return loopme;
 763 }