sys/vm/vm_pager.c

   1 /*
   2  * (MPSAFE)
   3  *
   4  * Copyright (c) 1991, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * The Mach Operating System project at Carnegie-Mellon University.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  *      from: @(#)vm_pager.c    8.6 (Berkeley) 1/12/94
  35  *
  36  *
  37  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  38  * All rights reserved.
  39  *
  40  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  41  *
  42  * Permission to use, copy, modify and distribute this software and
  43  * its documentation is hereby granted, provided that both the copyright
  44  * notice and this permission notice appear in all copies of the
  45  * software, derivative works or modified versions, and any portions
  46  * thereof, and that both notices appear in supporting documentation.
  47  *
  48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  51  *
  52  * Carnegie Mellon requests users of this software to return to
  53  *
  54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  55  *  School of Computer Science
  56  *  Carnegie Mellon University
  57  *  Pittsburgh PA 15213-3890
  58  *
  59  * any improvements or extensions that they make and grant Carnegie the
  60  * rights to redistribute these changes.
  61  *
  62  * $FreeBSD: src/sys/vm/vm_pager.c,v 1.54.2.2 2001/11/18 07:11:00 dillon Exp $
  63  */
  64
  65 /*
  66  *      Paging space routine stubs.  Emulates a matchmaker-like interface
  67  *      for builtin pagers.
  68  */
  69
  70 #include <sys/param.h>
  71 #include <sys/systm.h>
  72 #include <sys/kernel.h>
  73 #include <sys/vnode.h>
  74 #include <sys/buf.h>
  75 #include <sys/ucred.h>
  76 #include <sys/dsched.h>
  77 #include <sys/proc.h>
  78 #include <sys/sysctl.h>
  79 #include <sys/thread2.h>
  80
  81 #include <vm/vm.h>
  82 #include <vm/vm_param.h>
  83 #include <vm/vm_kern.h>
  84 #include <vm/vm_object.h>
  85 #include <vm/vm_page.h>
  86 #include <vm/vm_pager.h>
  87 #include <vm/vm_extern.h>
  88
  89 #include <sys/buf2.h>
  90 #include <vm/vm_page2.h>
  91
  92 extern struct pagerops defaultpagerops;
  93 extern struct pagerops swappagerops;
  94 extern struct pagerops vnodepagerops;
  95 extern struct pagerops devicepagerops;
  96 extern struct pagerops physpagerops;
  97
  98 int cluster_pbuf_freecnt = -1;  /* unlimited to begin with */
  99
 100 static int dead_pager_getpage (vm_object_t, vm_page_t *, int);
 101 static void dead_pager_putpages (vm_object_t, vm_page_t *, int, int, int *);
 102 static boolean_t dead_pager_haspage (vm_object_t, vm_pindex_t);
 103 static void dead_pager_dealloc (vm_object_t);
 104
 105 /*
 106  * No requirements.
 107  */
 108 static int
 109 dead_pager_getpage(vm_object_t obj, vm_page_t *mpp, int seqaccess)
 110 {
 111         return VM_PAGER_FAIL;
 112 }
 113
 114 /*
 115  * No requirements.
 116  */
 117 static void
 118 dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags,
 119                     int *rtvals)
 120 {
 121         int i;
 122
 123         for (i = 0; i < count; i++) {
 124                 rtvals[i] = VM_PAGER_AGAIN;
 125         }
 126 }
 127
 128 /*
 129  * No requirements.
 130  */
 131 static boolean_t
 132 dead_pager_haspage(vm_object_t object, vm_pindex_t pindex)
 133 {
 134         return FALSE;
 135 }
 136
 137 /*
 138  * No requirements.
 139  */
 140 static void
 141 dead_pager_dealloc(vm_object_t object)
 142 {
 143         KKASSERT(object->swblock_count == 0);
 144         return;
 145 }
 146
 147 static struct pagerops deadpagerops = {
 148         dead_pager_dealloc,
 149         dead_pager_getpage,
 150         dead_pager_putpages,
 151         dead_pager_haspage
 152 };
 153
 154 struct pagerops *pagertab[] = {
 155         &defaultpagerops,       /* OBJT_DEFAULT */
 156         &swappagerops,          /* OBJT_SWAP */
 157         &vnodepagerops,         /* OBJT_VNODE */
 158         &devicepagerops,        /* OBJT_DEVICE */
 159         &devicepagerops,        /* OBJT_MGTDEVICE */
 160         &physpagerops,          /* OBJT_PHYS */
 161         &deadpagerops           /* OBJT_DEAD */
 162 };
 163
 164 int npagers = NELEM(pagertab);
 165
 166 /*
 167  * Kernel address space for mapping pages.
 168  * Used by pagers where KVAs are needed for IO.
 169  *
 170  * XXX needs to be large enough to support the number of pending async
 171  * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
 172  * (MAXPHYS == 64k) if you want to get the most efficiency.
 173  */
 174 #define PAGER_MAP_SIZE  (8 * 1024 * 1024)
 175
 176 #define BSWHSIZE        16
 177 #define BSWHMASK        (BSWHSIZE - 1)
 178
 179 TAILQ_HEAD(swqueue, buf);
 180
 181 int pager_map_size = PAGER_MAP_SIZE;
 182 struct vm_map pager_map;
 183
 184 static vm_offset_t swapbkva_mem;        /* swap buffers kva */
 185 static vm_offset_t swapbkva_kva;        /* swap buffers kva */
 186 static struct swqueue bswlist_mem[BSWHSIZE];    /* with preallocated memory */
 187 static struct swqueue bswlist_kva[BSWHSIZE];    /* with kva */
 188 static struct swqueue bswlist_raw[BSWHSIZE];    /* without kva */
 189 static struct spinlock bswspin_mem[BSWHSIZE];
 190 static struct spinlock bswspin_kva[BSWHSIZE];
 191 static struct spinlock bswspin_raw[BSWHSIZE];
 192 static int pbuf_raw_count;
 193 static int pbuf_kva_count;
 194 static int pbuf_mem_count;
 195
 196 SYSCTL_INT(_vfs, OID_AUTO, pbuf_raw_count, CTLFLAG_RD, &pbuf_raw_count, 0,
 197     "Kernel pbuf raw reservations");
 198 SYSCTL_INT(_vfs, OID_AUTO, pbuf_kva_count, CTLFLAG_RD, &pbuf_kva_count, 0,
 199     "Kernel pbuf kva reservations");
 200 SYSCTL_INT(_vfs, OID_AUTO, pbuf_mem_count, CTLFLAG_RD, &pbuf_mem_count, 0,
 201     "Kernel pbuf mem reservations");
 202
 203 /*
 204  * Initialize the swap buffer list.
 205  *
 206  * Called from the low level boot code only.
 207  */
 208 static void
 209 vm_pager_init(void *arg __unused)
 210 {
 211         int i;
 212
 213         for (i = 0; i < BSWHSIZE; ++i) {
 214                 TAILQ_INIT(&bswlist_mem[i]);
 215                 TAILQ_INIT(&bswlist_kva[i]);
 216                 TAILQ_INIT(&bswlist_raw[i]);
 217                 spin_init(&bswspin_mem[i], "bswmem");
 218                 spin_init(&bswspin_kva[i], "bswkva");
 219                 spin_init(&bswspin_raw[i], "bswraw");
 220         }
 221 }
 222 SYSINIT(vm_mem, SI_BOOT1_VM, SI_ORDER_SECOND, vm_pager_init, NULL);
 223
 224 /*
 225  * Called from the low level boot code only.
 226  */
 227 static
 228 void
 229 vm_pager_bufferinit(void *dummy __unused)
 230 {
 231         struct buf *bp;
 232         long i;
 233
 234         /*
 235          * Reserve KVM space for pbuf data.
 236          */
 237         swapbkva_mem = kmem_alloc_pageable(&pager_map, nswbuf_mem * MAXPHYS);
 238         if (!swapbkva_mem)
 239                 panic("Not enough pager_map VM space for physical buffers");
 240         swapbkva_kva = kmem_alloc_pageable(&pager_map, nswbuf_kva * MAXPHYS);
 241         if (!swapbkva_kva)
 242                 panic("Not enough pager_map VM space for physical buffers");
 243
 244         /*
 245          * Initial pbuf setup.
 246          *
 247          * mem - These pbufs have permanently allocated memory
 248          * kva - These pbufs have unallocated kva reservations
 249          * raw - These pbufs have no kva reservations
 250          */
 251
 252         /*
 253          * Buffers with pre-allocated kernel memory can be convenient for
 254          * copyin/copyout because no SMP page invalidation or other pmap
 255          * operations are needed.
 256          */
 257 #if 1
 258         bp = swbuf_mem;
 259         for (i = 0; i < nswbuf_mem; ++i, ++bp) {
 260                 vm_page_t m;
 261                 vm_pindex_t pg;
 262                 int j;
 263
 264                 bp->b_kvabase = (caddr_t)((intptr_t)i * MAXPHYS) + swapbkva_mem;
 265                 bp->b_kvasize = MAXPHYS;
 266                 bp->b_swindex = i & BSWHMASK;
 267                 BUF_LOCKINIT(bp);
 268                 buf_dep_init(bp);
 269                 TAILQ_INSERT_HEAD(&bswlist_mem[i & BSWHMASK], bp, b_freelist);
 270                 atomic_add_int(&pbuf_mem_count, 1);
 271                 bp->b_data = bp->b_kvabase;
 272                 bp->b_bcount = MAXPHYS;
 273                 bp->b_xio.xio_pages = bp->b_xio.xio_internal_pages;
 274
 275                 pg = (vm_offset_t)bp->b_kvabase >> PAGE_SHIFT;
 276                 vm_object_hold(&kernel_object);
 277                 for (j = 0; j < MAXPHYS / PAGE_SIZE; ++j) {
 278                         m = vm_page_alloc(&kernel_object, pg, VM_ALLOC_NORMAL |
 279                                                               VM_ALLOC_SYSTEM);
 280                         KKASSERT(m != NULL);
 281                         bp->b_xio.xio_internal_pages[j] = m;
 282                         vm_page_wire(m);
 283                         vm_page_flag_clear(m, PG_ZERO);
 284                         /* early boot, no other cpus running yet */
 285                         pmap_kenter_noinval(pg * PAGE_SIZE, VM_PAGE_TO_PHYS(m));
 286                         cpu_invlpg((void *)(pg * PAGE_SIZE));
 287                         vm_page_wakeup(m);
 288                         ++pg;
 289                 }
 290                 vm_object_drop(&kernel_object);
 291                 bp->b_xio.xio_npages = j;
 292         }
 293 #endif
 294
 295         /*
 296          * Buffers with pre-assigned KVA bases.  The KVA has no memory pages
 297          * assigned to it.  Saves the caller from having to reserve KVA for
 298          * the page map.
 299          */
 300         bp = swbuf_kva;
 301         for (i = 0; i < nswbuf_kva; ++i, ++bp) {
 302                 bp->b_kvabase = (caddr_t)((intptr_t)i * MAXPHYS) + swapbkva_kva;
 303                 bp->b_kvasize = MAXPHYS;
 304                 bp->b_swindex = i & BSWHMASK;
 305                 BUF_LOCKINIT(bp);
 306                 buf_dep_init(bp);
 307                 TAILQ_INSERT_HEAD(&bswlist_kva[i & BSWHMASK], bp, b_freelist);
 308                 atomic_add_int(&pbuf_kva_count, 1);
 309         }
 310
 311         /*
 312          * RAW buffers with no KVA mappings.
 313          *
 314          * NOTE: We use KM_NOTLBSYNC here to reduce unnecessary IPIs
 315          *       during startup, which can really slow down emulated
 316          *       systems.
 317          */
 318         nswbuf_raw = nbuf * 2;
 319         swbuf_raw = (void *)kmem_alloc3(&kernel_map,
 320                                 round_page(nswbuf_raw * sizeof(struct buf)),
 321                                 KM_NOTLBSYNC);
 322         smp_invltlb();
 323         bp = swbuf_raw;
 324         for (i = 0; i < nswbuf_raw; ++i, ++bp) {
 325                 bp->b_swindex = i & BSWHMASK;
 326                 BUF_LOCKINIT(bp);
 327                 buf_dep_init(bp);
 328                 TAILQ_INSERT_HEAD(&bswlist_raw[i & BSWHMASK], bp, b_freelist);
 329                 atomic_add_int(&pbuf_raw_count, 1);
 330         }
 331
 332         /*
 333          * Allow the clustering code to use half of our pbufs.
 334          */
 335         cluster_pbuf_freecnt = nswbuf_kva / 2;
 336 }
 337
 338 SYSINIT(do_vmpg, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, vm_pager_bufferinit, NULL);
 339
 340 /*
 341  * No requirements.
 342  */
 343 void
 344 vm_pager_deallocate(vm_object_t object)
 345 {
 346         (*pagertab[object->type]->pgo_dealloc) (object);
 347 }
 348
 349 /*
 350  * vm_pager_get_pages() - inline, see vm/vm_pager.h
 351  * vm_pager_put_pages() - inline, see vm/vm_pager.h
 352  * vm_pager_has_page() - inline, see vm/vm_pager.h
 353  * vm_pager_page_inserted() - inline, see vm/vm_pager.h
 354  * vm_pager_page_removed() - inline, see vm/vm_pager.h
 355  */
 356
 357 /*
 358  * Search the specified pager object list for an object with the
 359  * specified handle.  If an object with the specified handle is found,
 360  * increase its reference count and return it.  Otherwise, return NULL.
 361  *
 362  * The pager object list must be locked.
 363  */
 364 vm_object_t
 365 vm_pager_object_lookup(struct pagerlst *pg_list, void *handle)
 366 {
 367         vm_object_t object;
 368
 369         TAILQ_FOREACH(object, pg_list, pager_object_list) {
 370                 if (object->handle == handle) {
 371                         VM_OBJECT_LOCK(object);
 372                         if ((object->flags & OBJ_DEAD) == 0) {
 373                                 vm_object_reference_locked(object);
 374                                 VM_OBJECT_UNLOCK(object);
 375                                 break;
 376                         }
 377                         VM_OBJECT_UNLOCK(object);
 378                 }
 379         }
 380         return (object);
 381 }
 382
 383 /*
 384  * Initialize a physical buffer.
 385  *
 386  * No requirements.
 387  */
 388 static void
 389 initpbuf(struct buf *bp)
 390 {
 391         bp->b_qindex = 0;               /* BQUEUE_NONE */
 392         bp->b_data = bp->b_kvabase;     /* NULL if pbuf sans kva */
 393         bp->b_flags = B_PAGING;
 394         bp->b_cmd = BUF_CMD_DONE;
 395         bp->b_error = 0;
 396         bp->b_bcount = 0;
 397         bp->b_bufsize = MAXPHYS;
 398         initbufbio(bp);
 399         xio_init(&bp->b_xio);
 400         BUF_LOCK(bp, LK_EXCLUSIVE);
 401 }
 402
 403 /*
 404  * Allocate a physical buffer
 405  *
 406  *      There are a limited number of physical buffers.  We need to make
 407  *      sure that no single subsystem is able to hog all of them,
 408  *      so each subsystem implements a counter which is typically initialized
 409  *      to 1/2 nswbuf.  getpbuf() decrements this counter in allocation and
 410  *      increments it on release, and blocks if the counter hits zero.  A
 411  *      subsystem may initialize the counter to -1 to disable the feature,
 412  *      but it must still be sure to match up all uses of getpbuf() with
 413  *      relpbuf() using the same variable.
 414  *
 415  *      NOTE: pfreecnt can be NULL, but this 'feature' will be removed
 416  *      relatively soon when the rest of the subsystems get smart about it. XXX
 417  *
 418  *      Physical buffers can be with or without KVA space reserved.  There
 419  *      are severe limitations on the ones with KVA reserved, and fewer
 420  *      limitations on the ones without.  getpbuf() gets one without,
 421  *      getpbuf_kva() gets one with.
 422  *
 423  * No requirements.
 424  */
 425 struct buf *
 426 getpbuf(int *pfreecnt)  /* raw */
 427 {
 428         struct buf *bp;
 429         int iter;
 430         int loops;
 431
 432         for (;;) {
 433                 while (pfreecnt && *pfreecnt <= 0) {
 434                         tsleep_interlock(pfreecnt, 0);
 435                         if (atomic_fetchadd_int(pfreecnt, 0) <= 0)
 436                                 tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0);
 437                 }
 438                 if (pbuf_raw_count <= 0) {
 439                         tsleep_interlock(&pbuf_raw_count, 0);
 440                         if (atomic_fetchadd_int(&pbuf_raw_count, 0) <= 0)
 441                                 tsleep(&pbuf_raw_count, PINTERLOCKED,
 442                                        "wswbuf0", 0);
 443                         continue;
 444                 }
 445                 iter = mycpuid & BSWHMASK;
 446                 for (loops = BSWHSIZE; loops; --loops) {
 447                         if (TAILQ_FIRST(&bswlist_raw[iter]) == NULL) {
 448                                 iter = (iter + 1) & BSWHMASK;
 449                                 continue;
 450                         }
 451                         spin_lock(&bswspin_raw[iter]);
 452                         if ((bp = TAILQ_FIRST(&bswlist_raw[iter])) == NULL) {
 453                                 spin_unlock(&bswspin_raw[iter]);
 454                                 iter = (iter + 1) & BSWHMASK;
 455                                 continue;
 456                         }
 457                         TAILQ_REMOVE(&bswlist_raw[iter], bp, b_freelist);
 458                         atomic_add_int(&pbuf_raw_count, -1);
 459                         if (pfreecnt)
 460                                 atomic_add_int(pfreecnt, -1);
 461                         spin_unlock(&bswspin_raw[iter]);
 462                         initpbuf(bp);
 463
 464                         return bp;
 465                 }
 466         }
 467         /* not reached */
 468 }
 469
 470 struct buf *
 471 getpbuf_kva(int *pfreecnt)
 472 {
 473         struct buf *bp;
 474         int iter;
 475         int loops;
 476
 477         for (;;) {
 478                 while (pfreecnt && *pfreecnt <= 0) {
 479                         tsleep_interlock(pfreecnt, 0);
 480                         if (atomic_fetchadd_int(pfreecnt, 0) <= 0)
 481                                 tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0);
 482                 }
 483                 if (pbuf_kva_count <= 0) {
 484                         tsleep_interlock(&pbuf_kva_count, 0);
 485                         if (atomic_fetchadd_int(&pbuf_kva_count, 0) <= 0)
 486                                 tsleep(&pbuf_kva_count, PINTERLOCKED,
 487                                        "wswbuf0", 0);
 488                         continue;
 489                 }
 490                 iter = mycpuid & BSWHMASK;
 491                 for (loops = BSWHSIZE; loops; --loops) {
 492                         if (TAILQ_FIRST(&bswlist_kva[iter]) == NULL) {
 493                                 iter = (iter + 1) & BSWHMASK;
 494                                 continue;
 495                         }
 496                         spin_lock(&bswspin_kva[iter]);
 497                         if ((bp = TAILQ_FIRST(&bswlist_kva[iter])) == NULL) {
 498                                 spin_unlock(&bswspin_kva[iter]);
 499                                 iter = (iter + 1) & BSWHMASK;
 500                                 continue;
 501                         }
 502                         TAILQ_REMOVE(&bswlist_kva[iter], bp, b_freelist);
 503                         atomic_add_int(&pbuf_kva_count, -1);
 504                         if (pfreecnt)
 505                                 atomic_add_int(pfreecnt, -1);
 506                         spin_unlock(&bswspin_kva[iter]);
 507                         initpbuf(bp);
 508
 509                         return bp;
 510                 }
 511         }
 512         /* not reached */
 513 }
 514
 515 /*
 516  * Allocate a pbuf with kernel memory already preallocated.  Caller must
 517  * not change the mapping.
 518  */
 519 struct buf *
 520 getpbuf_mem(int *pfreecnt)
 521 {
 522         struct buf *bp;
 523         int iter;
 524         int loops;
 525
 526         for (;;) {
 527                 while (pfreecnt && *pfreecnt <= 0) {
 528                         tsleep_interlock(pfreecnt, 0);
 529                         if (atomic_fetchadd_int(pfreecnt, 0) <= 0)
 530                                 tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0);
 531                 }
 532                 if (pbuf_mem_count <= 0) {
 533                         tsleep_interlock(&pbuf_mem_count, 0);
 534                         if (atomic_fetchadd_int(&pbuf_mem_count, 0) <= 0)
 535                                 tsleep(&pbuf_mem_count, PINTERLOCKED,
 536                                        "wswbuf0", 0);
 537                         continue;
 538                 }
 539                 iter = mycpuid & BSWHMASK;
 540                 for (loops = BSWHSIZE; loops; --loops) {
 541                         if (TAILQ_FIRST(&bswlist_mem[iter]) == NULL) {
 542                                 iter = (iter + 1) & BSWHMASK;
 543                                 continue;
 544                         }
 545                         spin_lock(&bswspin_mem[iter]);
 546                         if ((bp = TAILQ_FIRST(&bswlist_mem[iter])) == NULL) {
 547                                 spin_unlock(&bswspin_mem[iter]);
 548                                 iter = (iter + 1) & BSWHMASK;
 549                                 continue;
 550                         }
 551                         TAILQ_REMOVE(&bswlist_mem[iter], bp, b_freelist);
 552                         atomic_add_int(&pbuf_mem_count, -1);
 553                         if (pfreecnt)
 554                                 atomic_add_int(pfreecnt, -1);
 555                         spin_unlock(&bswspin_mem[iter]);
 556                         initpbuf(bp);
 557
 558                         return bp;
 559                 }
 560         }
 561         /* not reached */
 562 }
 563
 564 /*
 565  * Allocate a physical buffer, if one is available.
 566  *
 567  * Note that there is no NULL hack here - all subsystems using this
 568  * call understand how to use pfreecnt.
 569  *
 570  * No requirements.
 571  */
 572 struct buf *
 573 trypbuf(int *pfreecnt)          /* raw */
 574 {
 575         struct buf *bp;
 576         int iter = mycpuid & BSWHMASK;
 577         int loops;
 578
 579         for (loops = BSWHSIZE; loops; --loops) {
 580                 if (*pfreecnt <= 0 || TAILQ_FIRST(&bswlist_raw[iter]) == NULL) {
 581                         iter = (iter + 1) & BSWHMASK;
 582                         continue;
 583                 }
 584                 spin_lock(&bswspin_raw[iter]);
 585                 if (*pfreecnt <= 0 ||
 586                     (bp = TAILQ_FIRST(&bswlist_raw[iter])) == NULL) {
 587                         spin_unlock(&bswspin_raw[iter]);
 588                         iter = (iter + 1) & BSWHMASK;
 589                         continue;
 590                 }
 591                 TAILQ_REMOVE(&bswlist_raw[iter], bp, b_freelist);
 592                 atomic_add_int(&pbuf_raw_count, -1);
 593                 atomic_add_int(pfreecnt, -1);
 594
 595                 spin_unlock(&bswspin_raw[iter]);
 596
 597                 initpbuf(bp);
 598
 599                 return bp;
 600         }
 601         return NULL;
 602 }
 603
 604 struct buf *
 605 trypbuf_kva(int *pfreecnt)
 606 {
 607         struct buf *bp;
 608         int iter = mycpuid & BSWHMASK;
 609         int loops;
 610
 611         for (loops = BSWHSIZE; loops; --loops) {
 612                 if (*pfreecnt <= 0 || TAILQ_FIRST(&bswlist_kva[iter]) == NULL) {
 613                         iter = (iter + 1) & BSWHMASK;
 614                         continue;
 615                 }
 616                 spin_lock(&bswspin_kva[iter]);
 617                 if (*pfreecnt <= 0 ||
 618                     (bp = TAILQ_FIRST(&bswlist_kva[iter])) == NULL) {
 619                         spin_unlock(&bswspin_kva[iter]);
 620                         iter = (iter + 1) & BSWHMASK;
 621                         continue;
 622                 }
 623                 TAILQ_REMOVE(&bswlist_kva[iter], bp, b_freelist);
 624                 atomic_add_int(&pbuf_kva_count, -1);
 625                 atomic_add_int(pfreecnt, -1);
 626
 627                 spin_unlock(&bswspin_kva[iter]);
 628
 629                 initpbuf(bp);
 630
 631                 return bp;
 632         }
 633         return NULL;
 634 }
 635
 636 /*
 637  * Release a physical buffer
 638  *
 639  *      NOTE: pfreecnt can be NULL, but this 'feature' will be removed
 640  *      relatively soon when the rest of the subsystems get smart about it. XXX
 641  *
 642  * No requirements.
 643  */
 644 void
 645 relpbuf(struct buf *bp, int *pfreecnt)
 646 {
 647         int wake = 0;
 648         int wake_free = 0;
 649         int iter = bp->b_swindex;
 650
 651         KKASSERT(bp->b_flags & B_PAGING);
 652         dsched_buf_exit(bp);
 653
 654         BUF_UNLOCK(bp);
 655
 656         if (bp >= swbuf_mem && bp < &swbuf_mem[nswbuf_mem]) {
 657                 KKASSERT(bp->b_kvabase);
 658                 spin_lock(&bswspin_mem[iter]);
 659                 TAILQ_INSERT_HEAD(&bswlist_mem[iter], bp, b_freelist);
 660                 if (atomic_fetchadd_int(&pbuf_mem_count, 1) == nswbuf_mem / 4)
 661                         wake = 1;
 662                 if (pfreecnt) {
 663                         if (atomic_fetchadd_int(pfreecnt, 1) == 1)
 664                                 wake_free = 1;
 665                 }
 666                 spin_unlock(&bswspin_mem[iter]);
 667                 if (wake)
 668                         wakeup(&pbuf_mem_count);
 669         } else if (swbuf_kva && bp < &swbuf_kva[nswbuf_kva]) {
 670                 KKASSERT(bp->b_kvabase);
 671                 spin_lock(&bswspin_kva[iter]);
 672                 TAILQ_INSERT_HEAD(&bswlist_kva[iter], bp, b_freelist);
 673                 if (atomic_fetchadd_int(&pbuf_kva_count, 1) == nswbuf_kva / 4)
 674                         wake = 1;
 675                 if (pfreecnt) {
 676                         if (atomic_fetchadd_int(pfreecnt, 1) == 1)
 677                                 wake_free = 1;
 678                 }
 679                 spin_unlock(&bswspin_kva[iter]);
 680                 if (wake)
 681                         wakeup(&pbuf_kva_count);
 682         } else {
 683                 KKASSERT(bp->b_kvabase == NULL);
 684                 KKASSERT(bp >= swbuf_raw && bp < &swbuf_raw[nswbuf_raw]);
 685                 spin_lock(&bswspin_raw[iter]);
 686                 TAILQ_INSERT_HEAD(&bswlist_raw[iter], bp, b_freelist);
 687                 if (atomic_fetchadd_int(&pbuf_raw_count, 1) == nswbuf_raw / 4)
 688                         wake = 1;
 689                 if (pfreecnt) {
 690                         if (atomic_fetchadd_int(pfreecnt, 1) == 1)
 691                                 wake_free = 1;
 692                 }
 693                 spin_unlock(&bswspin_raw[iter]);
 694                 if (wake)
 695                         wakeup(&pbuf_raw_count);
 696         }
 697         if (wake_free)
 698                 wakeup(pfreecnt);
 699 }
 700
 701 void
 702 pbuf_adjcount(int *pfreecnt, int n)
 703 {
 704         if (n) {
 705                 atomic_add_int(pfreecnt, n);
 706                 wakeup(pfreecnt);
 707         }
 708 }