sys/kern/uipc_mbuf.c

   1 /*
   2  * Copyright (c) 2004 Jeffrey M. Hsu.  All rights reserved.
   3  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
   4  *
   5  * This code is derived from software contributed to The DragonFly Project
   6  * by Jeffrey M. Hsu.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 3. Neither the name of The DragonFly Project nor the names of its
  17  *    contributors may be used to endorse or promote products derived
  18  *    from this software without specific, prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 /*
  35  * Copyright (c) 1982, 1986, 1988, 1991, 1993
  36  *      The Regents of the University of California.  All rights reserved.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  * @(#)uipc_mbuf.c      8.2 (Berkeley) 1/4/94
  67  * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $
  68  * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.70 2008/11/20 14:21:01 sephe Exp $
  69  */
  70
  71 #include "opt_param.h"
  72 #include "opt_mbuf_stress_test.h"
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/malloc.h>
  76 #include <sys/mbuf.h>
  77 #include <sys/kernel.h>
  78 #include <sys/sysctl.h>
  79 #include <sys/domain.h>
  80 #include <sys/objcache.h>
  81 #include <sys/tree.h>
  82 #include <sys/protosw.h>
  83 #include <sys/uio.h>
  84 #include <sys/thread.h>
  85 #include <sys/globaldata.h>
  86 #include <sys/thread2.h>
  87
  88 #include <machine/atomic.h>
  89 #include <machine/limits.h>
  90
  91 #include <vm/vm.h>
  92 #include <vm/vm_kern.h>
  93 #include <vm/vm_extern.h>
  94
  95 #ifdef INVARIANTS
  96 #include <machine/cpu.h>
  97 #endif
  98
  99 /*
 100  * mbuf cluster meta-data
 101  */
 102 struct mbcluster {
 103         int32_t mcl_refs;
 104         void    *mcl_data;
 105 };
 106
 107 /*
 108  * mbuf tracking for debugging purposes
 109  */
 110 #ifdef MBUF_DEBUG
 111
 112 static MALLOC_DEFINE(M_MTRACK, "mtrack", "mtrack");
 113
 114 struct mbctrack;
 115 RB_HEAD(mbuf_rb_tree, mbtrack);
 116 RB_PROTOTYPE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *);
 117
 118 struct mbtrack {
 119         RB_ENTRY(mbtrack) rb_node;
 120         int trackid;
 121         struct mbuf *m;
 122 };
 123
 124 static int
 125 mbtrack_cmp(struct mbtrack *mb1, struct mbtrack *mb2)
 126 {
 127         if (mb1->m < mb2->m)
 128                 return(-1);
 129         if (mb1->m > mb2->m)
 130                 return(1);
 131         return(0);
 132 }
 133
 134 RB_GENERATE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *, m);
 135
 136 struct mbuf_rb_tree     mbuf_track_root;
 137
 138 static void
 139 mbuftrack(struct mbuf *m)
 140 {
 141         struct mbtrack *mbt;
 142
 143         crit_enter();
 144         mbt = kmalloc(sizeof(*mbt), M_MTRACK, M_INTWAIT|M_ZERO);
 145         mbt->m = m;
 146         if (mbuf_rb_tree_RB_INSERT(&mbuf_track_root, mbt))
 147                 panic("mbuftrack: mbuf %p already being tracked\n", m);
 148         crit_exit();
 149 }
 150
 151 static void
 152 mbufuntrack(struct mbuf *m)
 153 {
 154         struct mbtrack *mbt;
 155
 156         crit_enter();
 157         mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m);
 158         if (mbt == NULL) {
 159                 kprintf("mbufuntrack: mbuf %p was not tracked\n", m);
 160         } else {
 161                 mbuf_rb_tree_RB_REMOVE(&mbuf_track_root, mbt);
 162                 kfree(mbt, M_MTRACK);
 163         }
 164         crit_exit();
 165 }
 166
 167 void
 168 mbuftrackid(struct mbuf *m, int trackid)
 169 {
 170         struct mbtrack *mbt;
 171         struct mbuf *n;
 172
 173         crit_enter();
 174         while (m) {
 175                 n = m->m_nextpkt;
 176                 while (m) {
 177                         mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m);
 178                         if (mbt)
 179                                 mbt->trackid = trackid;
 180                         m = m->m_next;
 181                 }
 182                 m = n;
 183         }
 184         crit_exit();
 185 }
 186
 187 static int
 188 mbuftrack_callback(struct mbtrack *mbt, void *arg)
 189 {
 190         struct sysctl_req *req = arg;
 191         char buf[64];
 192         int error;
 193
 194         ksnprintf(buf, sizeof(buf), "mbuf %p track %d\n", mbt->m, mbt->trackid);
 195
 196         error = SYSCTL_OUT(req, buf, strlen(buf));
 197         if (error)
 198                 return(-error);
 199         return(0);
 200 }
 201
 202 static int
 203 mbuftrack_show(SYSCTL_HANDLER_ARGS)
 204 {
 205         int error;
 206
 207         crit_enter();
 208         error = mbuf_rb_tree_RB_SCAN(&mbuf_track_root, NULL,
 209                                      mbuftrack_callback, req);
 210         crit_exit();
 211         return (-error);
 212 }
 213 SYSCTL_PROC(_kern_ipc, OID_AUTO, showmbufs, CTLFLAG_RD|CTLTYPE_STRING,
 214             0, 0, mbuftrack_show, "A", "Show all in-use mbufs");
 215
 216 #else
 217
 218 #define mbuftrack(m)
 219 #define mbufuntrack(m)
 220
 221 #endif
 222
 223 static void mbinit(void *);
 224 SYSINIT(mbuf, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, mbinit, NULL)
 225
 226 static u_long   mbtypes[SMP_MAXCPU][MT_NTYPES];
 227
 228 static struct mbstat mbstat[SMP_MAXCPU];
 229 int     max_linkhdr;
 230 int     max_protohdr;
 231 int     max_hdr;
 232 int     max_datalen;
 233 int     m_defragpackets;
 234 int     m_defragbytes;
 235 int     m_defraguseless;
 236 int     m_defragfailure;
 237 #ifdef MBUF_STRESS_TEST
 238 int     m_defragrandomfailures;
 239 #endif
 240
 241 struct objcache *mbuf_cache, *mbufphdr_cache;
 242 struct objcache *mclmeta_cache;
 243 struct objcache *mbufcluster_cache, *mbufphdrcluster_cache;
 244
 245 int     nmbclusters;
 246 int     nmbufs;
 247
 248 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
 249            &max_linkhdr, 0, "");
 250 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
 251            &max_protohdr, 0, "");
 252 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
 253 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
 254            &max_datalen, 0, "");
 255 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
 256            &mbuf_wait, 0, "");
 257 static int do_mbstat(SYSCTL_HANDLER_ARGS);
 258
 259 SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLTYPE_STRUCT|CTLFLAG_RD,
 260         0, 0, do_mbstat, "S,mbstat", "");
 261
 262 static int do_mbtypes(SYSCTL_HANDLER_ARGS);
 263
 264 SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtypes, CTLTYPE_ULONG|CTLFLAG_RD,
 265         0, 0, do_mbtypes, "LU", "");
 266
 267 static int
 268 do_mbstat(SYSCTL_HANDLER_ARGS)
 269 {
 270         struct mbstat mbstat_total;
 271         struct mbstat *mbstat_totalp;
 272         int i;
 273
 274         bzero(&mbstat_total, sizeof(mbstat_total));
 275         mbstat_totalp = &mbstat_total;
 276
 277         for (i = 0; i < ncpus; i++)
 278         {
 279                 mbstat_total.m_mbufs += mbstat[i].m_mbufs;
 280                 mbstat_total.m_clusters += mbstat[i].m_clusters;
 281                 mbstat_total.m_spare += mbstat[i].m_spare;
 282                 mbstat_total.m_clfree += mbstat[i].m_clfree;
 283                 mbstat_total.m_drops += mbstat[i].m_drops;
 284                 mbstat_total.m_wait += mbstat[i].m_wait;
 285                 mbstat_total.m_drain += mbstat[i].m_drain;
 286                 mbstat_total.m_mcfail += mbstat[i].m_mcfail;
 287                 mbstat_total.m_mpfail += mbstat[i].m_mpfail;
 288
 289         }
 290         /*
 291          * The following fields are not cumulative fields so just
 292          * get their values once.
 293          */
 294         mbstat_total.m_msize = mbstat[0].m_msize;
 295         mbstat_total.m_mclbytes = mbstat[0].m_mclbytes;
 296         mbstat_total.m_minclsize = mbstat[0].m_minclsize;
 297         mbstat_total.m_mlen = mbstat[0].m_mlen;
 298         mbstat_total.m_mhlen = mbstat[0].m_mhlen;
 299
 300         return(sysctl_handle_opaque(oidp, mbstat_totalp, sizeof(mbstat_total), req));
 301 }
 302
 303 static int
 304 do_mbtypes(SYSCTL_HANDLER_ARGS)
 305 {
 306         u_long totals[MT_NTYPES];
 307         int i, j;
 308
 309         for (i = 0; i < MT_NTYPES; i++)
 310                 totals[i] = 0;
 311
 312         for (i = 0; i < ncpus; i++)
 313         {
 314                 for (j = 0; j < MT_NTYPES; j++)
 315                         totals[j] += mbtypes[i][j];
 316         }
 317
 318         return(sysctl_handle_opaque(oidp, totals, sizeof(totals), req));
 319 }
 320
 321 /*
 322  * These are read-only because we do not currently have any code
 323  * to adjust the objcache limits after the fact.  The variables
 324  * may only be set as boot-time tunables.
 325  */
 326 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
 327            &nmbclusters, 0, "Maximum number of mbuf clusters available");
 328 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
 329            "Maximum number of mbufs available");
 330
 331 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
 332            &m_defragpackets, 0, "");
 333 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
 334            &m_defragbytes, 0, "");
 335 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
 336            &m_defraguseless, 0, "");
 337 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
 338            &m_defragfailure, 0, "");
 339 #ifdef MBUF_STRESS_TEST
 340 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
 341            &m_defragrandomfailures, 0, "");
 342 #endif
 343
 344 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
 345 static MALLOC_DEFINE(M_MBUFCL, "mbufcl", "mbufcl");
 346 static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta");
 347
 348 static void m_reclaim (void);
 349 static void m_mclref(void *arg);
 350 static void m_mclfree(void *arg);
 351
 352 #ifndef NMBCLUSTERS
 353 #define NMBCLUSTERS     (512 + maxusers * 16)
 354 #endif
 355 #ifndef NMBUFS
 356 #define NMBUFS          (nmbclusters * 2)
 357 #endif
 358
 359 /*
 360  * Perform sanity checks of tunables declared above.
 361  */
 362 static void
 363 tunable_mbinit(void *dummy)
 364 {
 365         /*
 366          * This has to be done before VM init.
 367          */
 368         nmbclusters = NMBCLUSTERS;
 369         TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 370         nmbufs = NMBUFS;
 371         TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
 372         /* Sanity checks */
 373         if (nmbufs < nmbclusters * 2)
 374                 nmbufs = nmbclusters * 2;
 375 }
 376 SYSINIT(tunable_mbinit, SI_BOOT1_TUNABLES, SI_ORDER_ANY,
 377         tunable_mbinit, NULL);
 378
 379 /* "number of clusters of pages" */
 380 #define NCL_INIT        1
 381
 382 #define NMB_INIT        16
 383
 384 /*
 385  * The mbuf object cache only guarantees that m_next and m_nextpkt are
 386  * NULL and that m_data points to the beginning of the data area.  In
 387  * particular, m_len and m_pkthdr.len are uninitialized.  It is the
 388  * responsibility of the caller to initialize those fields before use.
 389  */
 390
 391 static boolean_t __inline
 392 mbuf_ctor(void *obj, void *private, int ocflags)
 393 {
 394         struct mbuf *m = obj;
 395
 396         m->m_next = NULL;
 397         m->m_nextpkt = NULL;
 398         m->m_data = m->m_dat;
 399         m->m_flags = 0;
 400
 401         return (TRUE);
 402 }
 403
 404 /*
 405  * Initialize the mbuf and the packet header fields.
 406  */
 407 static boolean_t
 408 mbufphdr_ctor(void *obj, void *private, int ocflags)
 409 {
 410         struct mbuf *m = obj;
 411
 412         m->m_next = NULL;
 413         m->m_nextpkt = NULL;
 414         m->m_data = m->m_pktdat;
 415         m->m_flags = M_PKTHDR | M_PHCACHE;
 416
 417         m->m_pkthdr.rcvif = NULL;       /* eliminate XXX JH */
 418         SLIST_INIT(&m->m_pkthdr.tags);
 419         m->m_pkthdr.csum_flags = 0;     /* eliminate XXX JH */
 420         m->m_pkthdr.fw_flags = 0;       /* eliminate XXX JH */
 421
 422         return (TRUE);
 423 }
 424
 425 /*
 426  * A mbcluster object consists of 2K (MCLBYTES) cluster and a refcount.
 427  */
 428 static boolean_t
 429 mclmeta_ctor(void *obj, void *private, int ocflags)
 430 {
 431         struct mbcluster *cl = obj;
 432         void *buf;
 433
 434         if (ocflags & M_NOWAIT)
 435                 buf = kmalloc(MCLBYTES, M_MBUFCL, M_NOWAIT | M_ZERO);
 436         else
 437                 buf = kmalloc(MCLBYTES, M_MBUFCL, M_INTWAIT | M_ZERO);
 438         if (buf == NULL)
 439                 return (FALSE);
 440         cl->mcl_refs = 0;
 441         cl->mcl_data = buf;
 442         return (TRUE);
 443 }
 444
 445 static void
 446 mclmeta_dtor(void *obj, void *private)
 447 {
 448         struct mbcluster *mcl = obj;
 449
 450         KKASSERT(mcl->mcl_refs == 0);
 451         kfree(mcl->mcl_data, M_MBUFCL);
 452 }
 453
 454 static void
 455 linkcluster(struct mbuf *m, struct mbcluster *cl)
 456 {
 457         /*
 458          * Add the cluster to the mbuf.  The caller will detect that the
 459          * mbuf now has an attached cluster.
 460          */
 461         m->m_ext.ext_arg = cl;
 462         m->m_ext.ext_buf = cl->mcl_data;
 463         m->m_ext.ext_ref = m_mclref;
 464         m->m_ext.ext_free = m_mclfree;
 465         m->m_ext.ext_size = MCLBYTES;
 466         atomic_add_int(&cl->mcl_refs, 1);
 467
 468         m->m_data = m->m_ext.ext_buf;
 469         m->m_flags |= M_EXT | M_EXT_CLUSTER;
 470 }
 471
 472 static boolean_t
 473 mbufphdrcluster_ctor(void *obj, void *private, int ocflags)
 474 {
 475         struct mbuf *m = obj;
 476         struct mbcluster *cl;
 477
 478         mbufphdr_ctor(obj, private, ocflags);
 479         cl = objcache_get(mclmeta_cache, ocflags);
 480         if (cl == NULL) {
 481                 ++mbstat[mycpu->gd_cpuid].m_drops;
 482                 return (FALSE);
 483         }
 484         m->m_flags |= M_CLCACHE;
 485         linkcluster(m, cl);
 486         return (TRUE);
 487 }
 488
 489 static boolean_t
 490 mbufcluster_ctor(void *obj, void *private, int ocflags)
 491 {
 492         struct mbuf *m = obj;
 493         struct mbcluster *cl;
 494
 495         mbuf_ctor(obj, private, ocflags);
 496         cl = objcache_get(mclmeta_cache, ocflags);
 497         if (cl == NULL) {
 498                 ++mbstat[mycpu->gd_cpuid].m_drops;
 499                 return (FALSE);
 500         }
 501         m->m_flags |= M_CLCACHE;
 502         linkcluster(m, cl);
 503         return (TRUE);
 504 }
 505
 506 /*
 507  * Used for both the cluster and cluster PHDR caches.
 508  *
 509  * The mbuf may have lost its cluster due to sharing, deal
 510  * with the situation by checking M_EXT.
 511  */
 512 static void
 513 mbufcluster_dtor(void *obj, void *private)
 514 {
 515         struct mbuf *m = obj;
 516         struct mbcluster *mcl;
 517
 518         if (m->m_flags & M_EXT) {
 519                 KKASSERT((m->m_flags & M_EXT_CLUSTER) != 0);
 520                 mcl = m->m_ext.ext_arg;
 521                 KKASSERT(mcl->mcl_refs == 1);
 522                 mcl->mcl_refs = 0;
 523                 objcache_put(mclmeta_cache, mcl);
 524         }
 525 }
 526
 527 struct objcache_malloc_args mbuf_malloc_args = { MSIZE, M_MBUF };
 528 struct objcache_malloc_args mclmeta_malloc_args =
 529         { sizeof(struct mbcluster), M_MCLMETA };
 530
 531 /* ARGSUSED*/
 532 static void
 533 mbinit(void *dummy)
 534 {
 535         int mb_limit, cl_limit;
 536         int limit;
 537         int i;
 538
 539         /*
 540          * Initialize statistics
 541          */
 542         for (i = 0; i < ncpus; i++) {
 543                 atomic_set_long_nonlocked(&mbstat[i].m_msize, MSIZE);
 544                 atomic_set_long_nonlocked(&mbstat[i].m_mclbytes, MCLBYTES);
 545                 atomic_set_long_nonlocked(&mbstat[i].m_minclsize, MINCLSIZE);
 546                 atomic_set_long_nonlocked(&mbstat[i].m_mlen, MLEN);
 547                 atomic_set_long_nonlocked(&mbstat[i].m_mhlen, MHLEN);
 548         }
 549
 550         /*
 551          * Create objtect caches and save cluster limits, which will
 552          * be used to adjust backing kmalloc pools' limit later.
 553          */
 554
 555         mb_limit = cl_limit = 0;
 556
 557         limit = nmbufs;
 558         mbuf_cache = objcache_create("mbuf", &limit, 0,
 559             mbuf_ctor, NULL, NULL,
 560             objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args);
 561         mb_limit += limit;
 562
 563         limit = nmbufs;
 564         mbufphdr_cache = objcache_create("mbuf pkt hdr", &limit, 64,
 565             mbufphdr_ctor, NULL, NULL,
 566             objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args);
 567         mb_limit += limit;
 568
 569         cl_limit = nmbclusters;
 570         mclmeta_cache = objcache_create("cluster mbuf", &cl_limit, 0,
 571             mclmeta_ctor, mclmeta_dtor, NULL,
 572             objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args);
 573
 574         limit = nmbclusters;
 575         mbufcluster_cache = objcache_create("mbuf + cluster", &limit, 0,
 576             mbufcluster_ctor, mbufcluster_dtor, NULL,
 577             objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args);
 578         mb_limit += limit;
 579
 580         limit = nmbclusters;
 581         mbufphdrcluster_cache = objcache_create("mbuf pkt hdr + cluster",
 582             &limit, 64, mbufphdrcluster_ctor, mbufcluster_dtor, NULL,
 583             objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args);
 584         mb_limit += limit;
 585
 586         /*
 587          * Adjust backing kmalloc pools' limit
 588          *
 589          * NOTE: We raise the limit by another 1/8 to take the effect
 590          * of loosememuse into account.
 591          */
 592         cl_limit += cl_limit / 8;
 593         kmalloc_raise_limit(mclmeta_malloc_args.mtype,
 594                             mclmeta_malloc_args.objsize * cl_limit);
 595         kmalloc_raise_limit(M_MBUFCL, MCLBYTES * cl_limit);
 596
 597         mb_limit += mb_limit / 8;
 598         kmalloc_raise_limit(mbuf_malloc_args.mtype,
 599                             mbuf_malloc_args.objsize * mb_limit);
 600 }
 601
 602 /*
 603  * Return the number of references to this mbuf's data.  0 is returned
 604  * if the mbuf is not M_EXT, a reference count is returned if it is
 605  * M_EXT | M_EXT_CLUSTER, and 99 is returned if it is a special M_EXT.
 606  */
 607 int
 608 m_sharecount(struct mbuf *m)
 609 {
 610         switch (m->m_flags & (M_EXT | M_EXT_CLUSTER)) {
 611         case 0:
 612                 return (0);
 613         case M_EXT:
 614                 return (99);
 615         case M_EXT | M_EXT_CLUSTER:
 616                 return (((struct mbcluster *)m->m_ext.ext_arg)->mcl_refs);
 617         }
 618         /* NOTREACHED */
 619         return (0);             /* to shut up compiler */
 620 }
 621
 622 /*
 623  * change mbuf to new type
 624  */
 625 void
 626 m_chtype(struct mbuf *m, int type)
 627 {
 628         struct globaldata *gd = mycpu;
 629
 630         atomic_add_long_nonlocked(&mbtypes[gd->gd_cpuid][type], 1);
 631         atomic_subtract_long_nonlocked(&mbtypes[gd->gd_cpuid][m->m_type], 1);
 632         atomic_set_short_nonlocked(&m->m_type, type);
 633 }
 634
 635 static void
 636 m_reclaim(void)
 637 {
 638         struct domain *dp;
 639         struct protosw *pr;
 640
 641         crit_enter();
 642         SLIST_FOREACH(dp, &domains, dom_next) {
 643                 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
 644                         if (pr->pr_drain)
 645                                 (*pr->pr_drain)();
 646                 }
 647         }
 648         crit_exit();
 649         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_drain, 1);
 650 }
 651
 652 static void __inline
 653 updatestats(struct mbuf *m, int type)
 654 {
 655         struct globaldata *gd = mycpu;
 656         m->m_type = type;
 657
 658         mbuftrack(m);
 659
 660         atomic_add_long_nonlocked(&mbtypes[gd->gd_cpuid][type], 1);
 661         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mbufs, 1);
 662
 663 }
 664
 665 /*
 666  * Allocate an mbuf.
 667  */
 668 struct mbuf *
 669 m_get(int how, int type)
 670 {
 671         struct mbuf *m;
 672         int ntries = 0;
 673         int ocf = MBTOM(how);
 674
 675 retryonce:
 676
 677         m = objcache_get(mbuf_cache, ocf);
 678
 679         if (m == NULL) {
 680                 if ((how & MB_TRYWAIT) && ntries++ == 0) {
 681                         struct objcache *reclaimlist[] = {
 682                                 mbufphdr_cache,
 683                                 mbufcluster_cache, mbufphdrcluster_cache
 684                         };
 685                         const int nreclaims = __arysize(reclaimlist);
 686
 687                         if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf))
 688                                 m_reclaim();
 689                         goto retryonce;
 690                 }
 691                 ++mbstat[mycpu->gd_cpuid].m_drops;
 692                 return (NULL);
 693         }
 694
 695         updatestats(m, type);
 696         return (m);
 697 }
 698
 699 struct mbuf *
 700 m_gethdr(int how, int type)
 701 {
 702         struct mbuf *m;
 703         int ocf = MBTOM(how);
 704         int ntries = 0;
 705
 706 retryonce:
 707
 708         m = objcache_get(mbufphdr_cache, ocf);
 709
 710         if (m == NULL) {
 711                 if ((how & MB_TRYWAIT) && ntries++ == 0) {
 712                         struct objcache *reclaimlist[] = {
 713                                 mbuf_cache,
 714                                 mbufcluster_cache, mbufphdrcluster_cache
 715                         };
 716                         const int nreclaims = __arysize(reclaimlist);
 717
 718                         if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf))
 719                                 m_reclaim();
 720                         goto retryonce;
 721                 }
 722                 ++mbstat[mycpu->gd_cpuid].m_drops;
 723                 return (NULL);
 724         }
 725
 726         updatestats(m, type);
 727         return (m);
 728 }
 729
 730 /*
 731  * Get a mbuf (not a mbuf cluster!) and zero it.
 732  * Deprecated.
 733  */
 734 struct mbuf *
 735 m_getclr(int how, int type)
 736 {
 737         struct mbuf *m;
 738
 739         m = m_get(how, type);
 740         if (m != NULL)
 741                 bzero(m->m_data, MLEN);
 742         return (m);
 743 }
 744
 745 /*
 746  * Returns an mbuf with an attached cluster.
 747  * Because many network drivers use this kind of buffers a lot, it is
 748  * convenient to keep a small pool of free buffers of this kind.
 749  * Even a small size such as 10 gives about 10% improvement in the
 750  * forwarding rate in a bridge or router.
 751  */
 752 struct mbuf *
 753 m_getcl(int how, short type, int flags)
 754 {
 755         struct mbuf *m;
 756         int ocflags = MBTOM(how);
 757         int ntries = 0;
 758
 759 retryonce:
 760
 761         if (flags & M_PKTHDR)
 762                 m = objcache_get(mbufphdrcluster_cache, ocflags);
 763         else
 764                 m = objcache_get(mbufcluster_cache, ocflags);
 765
 766         if (m == NULL) {
 767                 if ((how & MB_TRYWAIT) && ntries++ == 0) {
 768                         struct objcache *reclaimlist[1];
 769
 770                         if (flags & M_PKTHDR)
 771                                 reclaimlist[0] = mbufcluster_cache;
 772                         else
 773                                 reclaimlist[0] = mbufphdrcluster_cache;
 774                         if (!objcache_reclaimlist(reclaimlist, 1, ocflags))
 775                                 m_reclaim();
 776                         goto retryonce;
 777                 }
 778                 ++mbstat[mycpu->gd_cpuid].m_drops;
 779                 return (NULL);
 780         }
 781
 782         m->m_type = type;
 783
 784         mbuftrack(m);
 785
 786         atomic_add_long_nonlocked(&mbtypes[mycpu->gd_cpuid][type], 1);
 787         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1);
 788         return (m);
 789 }
 790
 791 /*
 792  * Allocate chain of requested length.
 793  */
 794 struct mbuf *
 795 m_getc(int len, int how, int type)
 796 {
 797         struct mbuf *n, *nfirst = NULL, **ntail = &nfirst;
 798         int nsize;
 799
 800         while (len > 0) {
 801                 n = m_getl(len, how, type, 0, &nsize);
 802                 if (n == NULL)
 803                         goto failed;
 804                 n->m_len = 0;
 805                 *ntail = n;
 806                 ntail = &n->m_next;
 807                 len -= nsize;
 808         }
 809         return (nfirst);
 810
 811 failed:
 812         m_freem(nfirst);
 813         return (NULL);
 814 }
 815
 816 /*
 817  * Allocate len-worth of mbufs and/or mbuf clusters (whatever fits best)
 818  * and return a pointer to the head of the allocated chain. If m0 is
 819  * non-null, then we assume that it is a single mbuf or an mbuf chain to
 820  * which we want len bytes worth of mbufs and/or clusters attached, and so
 821  * if we succeed in allocating it, we will just return a pointer to m0.
 822  *
 823  * If we happen to fail at any point during the allocation, we will free
 824  * up everything we have already allocated and return NULL.
 825  *
 826  * Deprecated.  Use m_getc() and m_cat() instead.
 827  */
 828 struct mbuf *
 829 m_getm(struct mbuf *m0, int len, int type, int how)
 830 {
 831         struct mbuf *nfirst;
 832
 833         nfirst = m_getc(len, how, type);
 834
 835         if (m0 != NULL) {
 836                 m_last(m0)->m_next = nfirst;
 837                 return (m0);
 838         }
 839
 840         return (nfirst);
 841 }
 842
 843 /*
 844  * Adds a cluster to a normal mbuf, M_EXT is set on success.
 845  * Deprecated.  Use m_getcl() instead.
 846  */
 847 void
 848 m_mclget(struct mbuf *m, int how)
 849 {
 850         struct mbcluster *mcl;
 851
 852         KKASSERT((m->m_flags & M_EXT) == 0);
 853         mcl = objcache_get(mclmeta_cache, MBTOM(how));
 854         if (mcl != NULL) {
 855                 linkcluster(m, mcl);
 856                 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1);
 857         } else {
 858                 ++mbstat[mycpu->gd_cpuid].m_drops;
 859         }
 860 }
 861
 862 /*
 863  * Updates to mbcluster must be MPSAFE.  Only an entity which already has
 864  * a reference to the cluster can ref it, so we are in no danger of
 865  * racing an add with a subtract.  But the operation must still be atomic
 866  * since multiple entities may have a reference on the cluster.
 867  *
 868  * m_mclfree() is almost the same but it must contend with two entities
 869  * freeing the cluster at the same time.  If there is only one reference
 870  * count we are the only entity referencing the cluster and no further
 871  * locking is required.  Otherwise we must protect against a race to 0
 872  * with the serializer.
 873  */
 874 static void
 875 m_mclref(void *arg)
 876 {
 877         struct mbcluster *mcl = arg;
 878
 879         atomic_add_int(&mcl->mcl_refs, 1);
 880 }
 881
 882 /*
 883  * When dereferencing a cluster we have to deal with a N->0 race, where
 884  * N entities free their references simultaniously.  To do this we use
 885  * atomic_fetchadd_int().
 886  */
 887 static void
 888 m_mclfree(void *arg)
 889 {
 890         struct mbcluster *mcl = arg;
 891
 892         if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1)
 893                 objcache_put(mclmeta_cache, mcl);
 894 }
 895
 896 /*
 897  * Free a single mbuf and any associated external storage.  The successor,
 898  * if any, is returned.
 899  *
 900  * We do need to check non-first mbuf for m_aux, since some of existing
 901  * code does not call M_PREPEND properly.
 902  * (example: call to bpf_mtap from drivers)
 903  */
 904 struct mbuf *
 905 m_free(struct mbuf *m)
 906 {
 907         struct mbuf *n;
 908         struct globaldata *gd = mycpu;
 909
 910         KASSERT(m->m_type != MT_FREE, ("freeing free mbuf %p", m));
 911         atomic_subtract_long_nonlocked(&mbtypes[gd->gd_cpuid][m->m_type], 1);
 912
 913         n = m->m_next;
 914
 915         /*
 916          * Make sure the mbuf is in constructed state before returning it
 917          * to the objcache.
 918          */
 919         m->m_next = NULL;
 920         mbufuntrack(m);
 921 #ifdef notyet
 922         KKASSERT(m->m_nextpkt == NULL);
 923 #else
 924         if (m->m_nextpkt != NULL) {
 925                 static int afewtimes = 10;
 926
 927                 if (afewtimes-- > 0) {
 928                         kprintf("mfree: m->m_nextpkt != NULL\n");
 929                         print_backtrace();
 930                 }
 931                 m->m_nextpkt = NULL;
 932         }
 933 #endif
 934         if (m->m_flags & M_PKTHDR) {
 935                 m_tag_delete_chain(m);          /* eliminate XXX JH */
 936         }
 937
 938         m->m_flags &= (M_EXT | M_EXT_CLUSTER | M_CLCACHE | M_PHCACHE);
 939
 940         /*
 941          * Clean the M_PKTHDR state so we can return the mbuf to its original
 942          * cache.  This is based on the PHCACHE flag which tells us whether
 943          * the mbuf was originally allocated out of a packet-header cache
 944          * or a non-packet-header cache.
 945          */
 946         if (m->m_flags & M_PHCACHE) {
 947                 m->m_flags |= M_PKTHDR;
 948                 m->m_pkthdr.rcvif = NULL;       /* eliminate XXX JH */
 949                 m->m_pkthdr.csum_flags = 0;     /* eliminate XXX JH */
 950                 m->m_pkthdr.fw_flags = 0;       /* eliminate XXX JH */
 951                 SLIST_INIT(&m->m_pkthdr.tags);
 952         }
 953
 954         /*
 955          * Handle remaining flags combinations.  M_CLCACHE tells us whether
 956          * the mbuf was originally allocated from a cluster cache or not,
 957          * and is totally separate from whether the mbuf is currently
 958          * associated with a cluster.
 959          */
 960         crit_enter();
 961         switch(m->m_flags & (M_CLCACHE | M_EXT | M_EXT_CLUSTER)) {
 962         case M_CLCACHE | M_EXT | M_EXT_CLUSTER:
 963                 /*
 964                  * mbuf+cluster cache case.  The mbuf was allocated from the
 965                  * combined mbuf_cluster cache and can be returned to the
 966                  * cache if the cluster hasn't been shared.
 967                  */
 968                 if (m_sharecount(m) == 1) {
 969                         /*
 970                          * The cluster has not been shared, we can just
 971                          * reset the data pointer and return the mbuf
 972                          * to the cluster cache.  Note that the reference
 973                          * count is left intact (it is still associated with
 974                          * an mbuf).
 975                          */
 976                         m->m_data = m->m_ext.ext_buf;
 977                         if (m->m_flags & M_PHCACHE)
 978                                 objcache_put(mbufphdrcluster_cache, m);
 979                         else
 980                                 objcache_put(mbufcluster_cache, m);
 981                         atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1);
 982                 } else {
 983                         /*
 984                          * Hell.  Someone else has a ref on this cluster,
 985                          * we have to disconnect it which means we can't
 986                          * put it back into the mbufcluster_cache, we
 987                          * have to destroy the mbuf.
 988                          *
 989                          * Other mbuf references to the cluster will typically
 990                          * be M_EXT | M_EXT_CLUSTER but without M_CLCACHE.
 991                          *
 992                          * XXX we could try to connect another cluster to
 993                          * it.
 994                          */
 995                         m->m_ext.ext_free(m->m_ext.ext_arg);
 996                         m->m_flags &= ~(M_EXT | M_EXT_CLUSTER);
 997                         if (m->m_flags & M_PHCACHE)
 998                                 objcache_dtor(mbufphdrcluster_cache, m);
 999                         else
1000                                 objcache_dtor(mbufcluster_cache, m);
1001                 }
1002                 break;
1003         case M_EXT | M_EXT_CLUSTER:
1004                 /*
1005                  * Normal cluster associated with an mbuf that was allocated
1006                  * from the normal mbuf pool rather then the cluster pool.
1007                  * The cluster has to be independantly disassociated from the
1008                  * mbuf.
1009                  */
1010                 if (m_sharecount(m) == 1)
1011                         atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1);
1012                 /* fall through */
1013         case M_EXT:
1014                 /*
1015                  * Normal cluster association case, disconnect the cluster from
1016                  * the mbuf.  The cluster may or may not be custom.
1017                  */
1018                 m->m_ext.ext_free(m->m_ext.ext_arg);
1019                 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER);
1020                 /* fall through */
1021         case 0:
1022                 /*
1023                  * return the mbuf to the mbuf cache.
1024                  */
1025                 if (m->m_flags & M_PHCACHE) {
1026                         m->m_data = m->m_pktdat;
1027                         objcache_put(mbufphdr_cache, m);
1028                 } else {
1029                         m->m_data = m->m_dat;
1030                         objcache_put(mbuf_cache, m);
1031                 }
1032                 atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mbufs, 1);
1033                 break;
1034         default:
1035                 if (!panicstr)
1036                         panic("bad mbuf flags %p %08x\n", m, m->m_flags);
1037                 break;
1038         }
1039         crit_exit();
1040         return (n);
1041 }
1042
1043 void
1044 m_freem(struct mbuf *m)
1045 {
1046         crit_enter();
1047         while (m)
1048                 m = m_free(m);
1049         crit_exit();
1050 }
1051
1052 /*
1053  * mbuf utility routines
1054  */
1055
1056 /*
1057  * Lesser-used path for M_PREPEND: allocate new mbuf to prepend to chain and
1058  * copy junk along.
1059  */
1060 struct mbuf *
1061 m_prepend(struct mbuf *m, int len, int how)
1062 {
1063         struct mbuf *mn;
1064
1065         if (m->m_flags & M_PKTHDR)
1066             mn = m_gethdr(how, m->m_type);
1067         else
1068             mn = m_get(how, m->m_type);
1069         if (mn == NULL) {
1070                 m_freem(m);
1071                 return (NULL);
1072         }
1073         if (m->m_flags & M_PKTHDR)
1074                 M_MOVE_PKTHDR(mn, m);
1075         mn->m_next = m;
1076         m = mn;
1077         if (len < MHLEN)
1078                 MH_ALIGN(m, len);
1079         m->m_len = len;
1080         return (m);
1081 }
1082
1083 /*
1084  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
1085  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
1086  * The wait parameter is a choice of MB_WAIT/MB_DONTWAIT from caller.
1087  * Note that the copy is read-only, because clusters are not copied,
1088  * only their reference counts are incremented.
1089  */
1090 struct mbuf *
1091 m_copym(const struct mbuf *m, int off0, int len, int wait)
1092 {
1093         struct mbuf *n, **np;
1094         int off = off0;
1095         struct mbuf *top;
1096         int copyhdr = 0;
1097
1098         KASSERT(off >= 0, ("m_copym, negative off %d", off));
1099         KASSERT(len >= 0, ("m_copym, negative len %d", len));
1100         if (off == 0 && m->m_flags & M_PKTHDR)
1101                 copyhdr = 1;
1102         while (off > 0) {
1103                 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
1104                 if (off < m->m_len)
1105                         break;
1106                 off -= m->m_len;
1107                 m = m->m_next;
1108         }
1109         np = &top;
1110         top = 0;
1111         while (len > 0) {
1112                 if (m == NULL) {
1113                         KASSERT(len == M_COPYALL,
1114                             ("m_copym, length > size of mbuf chain"));
1115                         break;
1116                 }
1117                 /*
1118                  * Because we are sharing any cluster attachment below,
1119                  * be sure to get an mbuf that does not have a cluster
1120                  * associated with it.
1121                  */
1122                 if (copyhdr)
1123                         n = m_gethdr(wait, m->m_type);
1124                 else
1125                         n = m_get(wait, m->m_type);
1126                 *np = n;
1127                 if (n == NULL)
1128                         goto nospace;
1129                 if (copyhdr) {
1130                         if (!m_dup_pkthdr(n, m, wait))
1131                                 goto nospace;
1132                         if (len == M_COPYALL)
1133                                 n->m_pkthdr.len -= off0;
1134                         else
1135                                 n->m_pkthdr.len = len;
1136                         copyhdr = 0;
1137                 }
1138                 n->m_len = min(len, m->m_len - off);
1139                 if (m->m_flags & M_EXT) {
1140                         KKASSERT((n->m_flags & M_EXT) == 0);
1141                         n->m_data = m->m_data + off;
1142                         m->m_ext.ext_ref(m->m_ext.ext_arg);
1143                         n->m_ext = m->m_ext;
1144                         n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER);
1145                 } else {
1146                         bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
1147                             (unsigned)n->m_len);
1148                 }
1149                 if (len != M_COPYALL)
1150                         len -= n->m_len;
1151                 off = 0;
1152                 m = m->m_next;
1153                 np = &n->m_next;
1154         }
1155         if (top == NULL)
1156                 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1);
1157         return (top);
1158 nospace:
1159         m_freem(top);
1160         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1);
1161         return (NULL);
1162 }
1163
1164 /*
1165  * Copy an entire packet, including header (which must be present).
1166  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
1167  * Note that the copy is read-only, because clusters are not copied,
1168  * only their reference counts are incremented.
1169  * Preserve alignment of the first mbuf so if the creator has left
1170  * some room at the beginning (e.g. for inserting protocol headers)
1171  * the copies also have the room available.
1172  */
1173 struct mbuf *
1174 m_copypacket(struct mbuf *m, int how)
1175 {
1176         struct mbuf *top, *n, *o;
1177
1178         n = m_gethdr(how, m->m_type);
1179         top = n;
1180         if (!n)
1181                 goto nospace;
1182
1183         if (!m_dup_pkthdr(n, m, how))
1184                 goto nospace;
1185         n->m_len = m->m_len;
1186         if (m->m_flags & M_EXT) {
1187                 KKASSERT((n->m_flags & M_EXT) == 0);
1188                 n->m_data = m->m_data;
1189                 m->m_ext.ext_ref(m->m_ext.ext_arg);
1190                 n->m_ext = m->m_ext;
1191                 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER);
1192         } else {
1193                 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
1194                 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
1195         }
1196
1197         m = m->m_next;
1198         while (m) {
1199                 o = m_get(how, m->m_type);
1200                 if (!o)
1201                         goto nospace;
1202
1203                 n->m_next = o;
1204                 n = n->m_next;
1205
1206                 n->m_len = m->m_len;
1207                 if (m->m_flags & M_EXT) {
1208                         KKASSERT((n->m_flags & M_EXT) == 0);
1209                         n->m_data = m->m_data;
1210                         m->m_ext.ext_ref(m->m_ext.ext_arg);
1211                         n->m_ext = m->m_ext;
1212                         n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER);
1213                 } else {
1214                         bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
1215                 }
1216
1217                 m = m->m_next;
1218         }
1219         return top;
1220 nospace:
1221         m_freem(top);
1222         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1);
1223         return (NULL);
1224 }
1225
1226 /*
1227  * Copy data from an mbuf chain starting "off" bytes from the beginning,
1228  * continuing for "len" bytes, into the indicated buffer.
1229  */
1230 void
1231 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
1232 {
1233         unsigned count;
1234
1235         KASSERT(off >= 0, ("m_copydata, negative off %d", off));
1236         KASSERT(len >= 0, ("m_copydata, negative len %d", len));
1237         while (off > 0) {
1238                 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
1239                 if (off < m->m_len)
1240                         break;
1241                 off -= m->m_len;
1242                 m = m->m_next;
1243         }
1244         while (len > 0) {
1245                 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
1246                 count = min(m->m_len - off, len);
1247                 bcopy(mtod(m, caddr_t) + off, cp, count);
1248                 len -= count;
1249                 cp += count;
1250                 off = 0;
1251                 m = m->m_next;
1252         }
1253 }
1254
1255 /*
1256  * Copy a packet header mbuf chain into a completely new chain, including
1257  * copying any mbuf clusters.  Use this instead of m_copypacket() when
1258  * you need a writable copy of an mbuf chain.
1259  */
1260 struct mbuf *
1261 m_dup(struct mbuf *m, int how)
1262 {
1263         struct mbuf **p, *top = NULL;
1264         int remain, moff, nsize;
1265
1266         /* Sanity check */
1267         if (m == NULL)
1268                 return (NULL);
1269         KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__));
1270
1271         /* While there's more data, get a new mbuf, tack it on, and fill it */
1272         remain = m->m_pkthdr.len;
1273         moff = 0;
1274         p = &top;
1275         while (remain > 0 || top == NULL) {     /* allow m->m_pkthdr.len == 0 */
1276                 struct mbuf *n;
1277
1278                 /* Get the next new mbuf */
1279                 n = m_getl(remain, how, m->m_type, top == NULL ? M_PKTHDR : 0,
1280                            &nsize);
1281                 if (n == NULL)
1282                         goto nospace;
1283                 if (top == NULL)
1284                         if (!m_dup_pkthdr(n, m, how))
1285                                 goto nospace0;
1286
1287                 /* Link it into the new chain */
1288                 *p = n;
1289                 p = &n->m_next;
1290
1291                 /* Copy data from original mbuf(s) into new mbuf */
1292                 n->m_len = 0;
1293                 while (n->m_len < nsize && m != NULL) {
1294                         int chunk = min(nsize - n->m_len, m->m_len - moff);
1295
1296                         bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
1297                         moff += chunk;
1298                         n->m_len += chunk;
1299                         remain -= chunk;
1300                         if (moff == m->m_len) {
1301                                 m = m->m_next;
1302                                 moff = 0;
1303                         }
1304                 }
1305
1306                 /* Check correct total mbuf length */
1307                 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
1308                         ("%s: bogus m_pkthdr.len", __func__));
1309         }
1310         return (top);
1311
1312 nospace:
1313         m_freem(top);
1314 nospace0:
1315         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1);
1316         return (NULL);
1317 }
1318
1319 /*
1320  * Concatenate mbuf chain n to m.
1321  * Both chains must be of the same type (e.g. MT_DATA).
1322  * Any m_pkthdr is not updated.
1323  */
1324 void
1325 m_cat(struct mbuf *m, struct mbuf *n)
1326 {
1327         m = m_last(m);
1328         while (n) {
1329                 if (m->m_flags & M_EXT ||
1330                     m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
1331                         /* just join the two chains */
1332                         m->m_next = n;
1333                         return;
1334                 }
1335                 /* splat the data from one into the other */
1336                 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1337                     (u_int)n->m_len);
1338                 m->m_len += n->m_len;
1339                 n = m_free(n);
1340         }
1341 }
1342
1343 void
1344 m_adj(struct mbuf *mp, int req_len)
1345 {
1346         int len = req_len;
1347         struct mbuf *m;
1348         int count;
1349
1350         if ((m = mp) == NULL)
1351                 return;
1352         if (len >= 0) {
1353                 /*
1354                  * Trim from head.
1355                  */
1356                 while (m != NULL && len > 0) {
1357                         if (m->m_len <= len) {
1358                                 len -= m->m_len;
1359                                 m->m_len = 0;
1360                                 m = m->m_next;
1361                         } else {
1362                                 m->m_len -= len;
1363                                 m->m_data += len;
1364                                 len = 0;
1365                         }
1366                 }
1367                 m = mp;
1368                 if (mp->m_flags & M_PKTHDR)
1369                         m->m_pkthdr.len -= (req_len - len);
1370         } else {
1371                 /*
1372                  * Trim from tail.  Scan the mbuf chain,
1373                  * calculating its length and finding the last mbuf.
1374                  * If the adjustment only affects this mbuf, then just
1375                  * adjust and return.  Otherwise, rescan and truncate
1376                  * after the remaining size.
1377                  */
1378                 len = -len;
1379                 count = 0;
1380                 for (;;) {
1381                         count += m->m_len;
1382                         if (m->m_next == NULL)
1383                                 break;
1384                         m = m->m_next;
1385                 }
1386                 if (m->m_len >= len) {
1387                         m->m_len -= len;
1388                         if (mp->m_flags & M_PKTHDR)
1389                                 mp->m_pkthdr.len -= len;
1390                         return;
1391                 }
1392                 count -= len;
1393                 if (count < 0)
1394                         count = 0;
1395                 /*
1396                  * Correct length for chain is "count".
1397                  * Find the mbuf with last data, adjust its length,
1398                  * and toss data from remaining mbufs on chain.
1399                  */
1400                 m = mp;
1401                 if (m->m_flags & M_PKTHDR)
1402                         m->m_pkthdr.len = count;
1403                 for (; m; m = m->m_next) {
1404                         if (m->m_len >= count) {
1405                                 m->m_len = count;
1406                                 break;
1407                         }
1408                         count -= m->m_len;
1409                 }
1410                 while (m->m_next)
1411                         (m = m->m_next) ->m_len = 0;
1412         }
1413 }
1414
1415 /*
1416  * Rearrange an mbuf chain so that len bytes are contiguous
1417  * and in the data area of an mbuf (so that mtod will work for a structure
1418  * of size len).  Returns the resulting mbuf chain on success, frees it and
1419  * returns null on failure.  If there is room, it will add up to
1420  * max_protohdr-len extra bytes to the contiguous region in an attempt to
1421  * avoid being called next time.
1422  */
1423 struct mbuf *
1424 m_pullup(struct mbuf *n, int len)
1425 {
1426         struct mbuf *m;
1427         int count;
1428         int space;
1429
1430         /*
1431          * If first mbuf has no cluster, and has room for len bytes
1432          * without shifting current data, pullup into it,
1433          * otherwise allocate a new mbuf to prepend to the chain.
1434          */
1435         if (!(n->m_flags & M_EXT) &&
1436             n->m_data + len < &n->m_dat[MLEN] &&
1437             n->m_next) {
1438                 if (n->m_len >= len)
1439                         return (n);
1440                 m = n;
1441                 n = n->m_next;
1442                 len -= m->m_len;
1443         } else {
1444                 if (len > MHLEN)
1445                         goto bad;
1446                 if (n->m_flags & M_PKTHDR)
1447                         m = m_gethdr(MB_DONTWAIT, n->m_type);
1448                 else
1449                         m = m_get(MB_DONTWAIT, n->m_type);
1450                 if (m == NULL)
1451                         goto bad;
1452                 m->m_len = 0;
1453                 if (n->m_flags & M_PKTHDR)
1454                         M_MOVE_PKTHDR(m, n);
1455         }
1456         space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1457         do {
1458                 count = min(min(max(len, max_protohdr), space), n->m_len);
1459                 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1460                   (unsigned)count);
1461                 len -= count;
1462                 m->m_len += count;
1463                 n->m_len -= count;
1464                 space -= count;
1465                 if (n->m_len)
1466                         n->m_data += count;
1467                 else
1468                         n = m_free(n);
1469         } while (len > 0 && n);
1470         if (len > 0) {
1471                 m_free(m);
1472                 goto bad;
1473         }
1474         m->m_next = n;
1475         return (m);
1476 bad:
1477         m_freem(n);
1478         atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1);
1479         return (NULL);
1480 }
1481
1482 /*
1483  * Partition an mbuf chain in two pieces, returning the tail --
1484  * all but the first len0 bytes.  In case of failure, it returns NULL and
1485  * attempts to restore the chain to its original state.
1486  *
1487  * Note that the resulting mbufs might be read-only, because the new
1488  * mbuf can end up sharing an mbuf cluster with the original mbuf if
1489  * the "breaking point" happens to lie within a cluster mbuf. Use the
1490  * M_WRITABLE() macro to check for this case.
1491  */
1492 struct mbuf *
1493 m_split(struct mbuf *m0, int len0, int wait)
1494 {
1495         struct mbuf *m, *n;
1496         unsigned len = len0, remain;
1497
1498         for (m = m0; m && len > m->m_len; m = m->m_next)
1499                 len -= m->m_len;
1500         if (m == NULL)
1501                 return (NULL);
1502         remain = m->m_len - len;
1503         if (m0->m_flags & M_PKTHDR) {
1504                 n = m_gethdr(wait, m0->m_type);
1505                 if (n == NULL)
1506                         return (NULL);
1507                 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1508                 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1509                 m0->m_pkthdr.len = len0;
1510                 if (m->m_flags & M_EXT)
1511                         goto extpacket;
1512                 if (remain > MHLEN) {
1513                         /* m can't be the lead packet */
1514                         MH_ALIGN(n, 0);
1515                         n->m_next = m_split(m, len, wait);
1516                         if (n->m_next == NULL) {
1517                                 m_free(n);
1518                                 return (NULL);
1519                         } else {
1520                                 n->m_len = 0;
1521                                 return (n);
1522                         }
1523                 } else
1524                         MH_ALIGN(n, remain);
1525         } else if (remain == 0) {
1526                 n = m->m_next;
1527                 m->m_next = 0;
1528                 return (n);
1529         } else {
1530                 n = m_get(wait, m->m_type);
1531                 if (n == NULL)
1532                         return (NULL);
1533                 M_ALIGN(n, remain);
1534         }
1535 extpacket:
1536         if (m->m_flags & M_EXT) {
1537                 KKASSERT((n->m_flags & M_EXT) == 0);
1538                 n->m_data = m->m_data + len;
1539                 m->m_ext.ext_ref(m->m_ext.ext_arg);
1540                 n->m_ext = m->m_ext;
1541                 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER);
1542         } else {
1543                 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1544         }
1545         n->m_len = remain;
1546         m->m_len = len;
1547         n->m_next = m->m_next;
1548         m->m_next = 0;
1549         return (n);
1550 }
1551
1552 /*
1553  * Routine to copy from device local memory into mbufs.
1554  * Note: "offset" is ill-defined and always called as 0, so ignore it.
1555  */
1556 struct mbuf *
1557 m_devget(char *buf, int len, int offset, struct ifnet *ifp,
1558     void (*copy)(volatile const void *from, volatile void *to, size_t length))
1559 {
1560         struct mbuf *m, *mfirst = NULL, **mtail;
1561         int nsize, flags;
1562
1563         if (copy == NULL)
1564                 copy = bcopy;
1565         mtail = &mfirst;
1566         flags = M_PKTHDR;
1567
1568         while (len > 0) {
1569                 m = m_getl(len, MB_DONTWAIT, MT_DATA, flags, &nsize);
1570                 if (m == NULL) {
1571                         m_freem(mfirst);
1572                         return (NULL);
1573                 }
1574                 m->m_len = min(len, nsize);
1575
1576                 if (flags & M_PKTHDR) {
1577                         if (len + max_linkhdr <= nsize)
1578                                 m->m_data += max_linkhdr;
1579                         m->m_pkthdr.rcvif = ifp;
1580                         m->m_pkthdr.len = len;
1581                         flags = 0;
1582                 }
1583
1584                 copy(buf, m->m_data, (unsigned)m->m_len);
1585                 buf += m->m_len;
1586                 len -= m->m_len;
1587                 *mtail = m;
1588                 mtail = &m->m_next;
1589         }
1590
1591         return (mfirst);
1592 }
1593
1594 /*
1595  * Routine to pad mbuf to the specified length 'padto'.
1596  */
1597 int
1598 m_devpad(struct mbuf *m, int padto)
1599 {
1600         struct mbuf *last = NULL;
1601         int padlen;
1602
1603         if (padto <= m->m_pkthdr.len)
1604                 return 0;
1605
1606         padlen = padto - m->m_pkthdr.len;
1607
1608         /* if there's only the packet-header and we can pad there, use it. */
1609         if (m->m_pkthdr.len == m->m_len && M_TRAILINGSPACE(m) >= padlen) {
1610                 last = m;
1611         } else {
1612                 /*
1613                  * Walk packet chain to find last mbuf. We will either
1614                  * pad there, or append a new mbuf and pad it
1615                  */
1616                 for (last = m; last->m_next != NULL; last = last->m_next)
1617                         ; /* EMPTY */
1618
1619                 /* `last' now points to last in chain. */
1620                 if (M_TRAILINGSPACE(last) < padlen) {
1621                         struct mbuf *n;
1622
1623                         /* Allocate new empty mbuf, pad it.  Compact later. */
1624                         MGET(n, MB_DONTWAIT, MT_DATA);
1625                         if (n == NULL)
1626                                 return ENOBUFS;
1627                         n->m_len = 0;
1628                         last->m_next = n;
1629                         last = n;
1630                 }
1631         }
1632         KKASSERT(M_TRAILINGSPACE(last) >= padlen);
1633         KKASSERT(M_WRITABLE(last));
1634
1635         /* Now zero the pad area */
1636         bzero(mtod(last, char *) + last->m_len, padlen);
1637         last->m_len += padlen;
1638         m->m_pkthdr.len += padlen;
1639         return 0;
1640 }
1641
1642 /*
1643  * Copy data from a buffer back into the indicated mbuf chain,
1644  * starting "off" bytes from the beginning, extending the mbuf
1645  * chain if necessary.
1646  */
1647 void
1648 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
1649 {
1650         int mlen;
1651         struct mbuf *m = m0, *n;
1652         int totlen = 0;
1653
1654         if (m0 == NULL)
1655                 return;
1656         while (off > (mlen = m->m_len)) {
1657                 off -= mlen;
1658                 totlen += mlen;
1659                 if (m->m_next == NULL) {
1660                         n = m_getclr(MB_DONTWAIT, m->m_type);
1661                         if (n == NULL)
1662                                 goto out;
1663                         n->m_len = min(MLEN, len + off);
1664                         m->m_next = n;
1665                 }
1666                 m = m->m_next;
1667         }
1668         while (len > 0) {
1669                 mlen = min (m->m_len - off, len);
1670                 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
1671                 cp += mlen;
1672                 len -= mlen;
1673                 mlen += off;
1674                 off = 0;
1675                 totlen += mlen;
1676                 if (len == 0)
1677                         break;
1678                 if (m->m_next == NULL) {
1679                         n = m_get(MB_DONTWAIT, m->m_type);
1680                         if (n == NULL)
1681                                 break;
1682                         n->m_len = min(MLEN, len);
1683                         m->m_next = n;
1684                 }
1685                 m = m->m_next;
1686         }
1687 out:    if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1688                 m->m_pkthdr.len = totlen;
1689 }
1690
1691 /*
1692  * Apply function f to the data in an mbuf chain starting "off" bytes from
1693  * the beginning, continuing for "len" bytes.
1694  */
1695 int
1696 m_apply(struct mbuf *m, int off, int len,
1697     int (*f)(void *, void *, u_int), void *arg)
1698 {
1699         u_int count;
1700         int rval;
1701
1702         KASSERT(off >= 0, ("m_apply, negative off %d", off));
1703         KASSERT(len >= 0, ("m_apply, negative len %d", len));
1704         while (off > 0) {
1705                 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1706                 if (off < m->m_len)
1707                         break;
1708                 off -= m->m_len;
1709                 m = m->m_next;
1710         }
1711         while (len > 0) {
1712                 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
1713                 count = min(m->m_len - off, len);
1714                 rval = (*f)(arg, mtod(m, caddr_t) + off, count);
1715                 if (rval)
1716                         return (rval);
1717                 len -= count;
1718                 off = 0;
1719                 m = m->m_next;
1720         }
1721         return (0);
1722 }
1723
1724 /*
1725  * Return a pointer to mbuf/offset of location in mbuf chain.
1726  */
1727 struct mbuf *
1728 m_getptr(struct mbuf *m, int loc, int *off)
1729 {
1730
1731         while (loc >= 0) {
1732                 /* Normal end of search. */
1733                 if (m->m_len > loc) {
1734                         *off = loc;
1735                         return (m);
1736                 } else {
1737                         loc -= m->m_len;
1738                         if (m->m_next == NULL) {
1739                                 if (loc == 0) {
1740                                         /* Point at the end of valid data. */
1741                                         *off = m->m_len;
1742                                         return (m);
1743                                 }
1744                                 return (NULL);
1745                         }
1746                         m = m->m_next;
1747                 }
1748         }
1749         return (NULL);
1750 }
1751
1752 void
1753 m_print(const struct mbuf *m)
1754 {
1755         int len;
1756         const struct mbuf *m2;
1757
1758         len = m->m_pkthdr.len;
1759         m2 = m;
1760         while (len) {
1761                 kprintf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
1762                 len -= m2->m_len;
1763                 m2 = m2->m_next;
1764         }
1765         return;
1766 }
1767
1768 /*
1769  * "Move" mbuf pkthdr from "from" to "to".
1770  * "from" must have M_PKTHDR set, and "to" must be empty.
1771  */
1772 void
1773 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
1774 {
1775         KASSERT((to->m_flags & M_PKTHDR), ("m_move_pkthdr: not packet header"));
1776
1777         to->m_flags |= from->m_flags & M_COPYFLAGS;
1778         to->m_pkthdr = from->m_pkthdr;          /* especially tags */
1779         SLIST_INIT(&from->m_pkthdr.tags);       /* purge tags from src */
1780 }
1781
1782 /*
1783  * Duplicate "from"'s mbuf pkthdr in "to".
1784  * "from" must have M_PKTHDR set, and "to" must be empty.
1785  * In particular, this does a deep copy of the packet tags.
1786  */
1787 int
1788 m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
1789 {
1790         KASSERT((to->m_flags & M_PKTHDR), ("m_dup_pkthdr: not packet header"));
1791
1792         to->m_flags = (from->m_flags & M_COPYFLAGS) |
1793                       (to->m_flags & ~M_COPYFLAGS);
1794         to->m_pkthdr = from->m_pkthdr;
1795         SLIST_INIT(&to->m_pkthdr.tags);
1796         return (m_tag_copy_chain(to, from, how));
1797 }
1798
1799 /*
1800  * Defragment a mbuf chain, returning the shortest possible
1801  * chain of mbufs and clusters.  If allocation fails and
1802  * this cannot be completed, NULL will be returned, but
1803  * the passed in chain will be unchanged.  Upon success,
1804  * the original chain will be freed, and the new chain
1805  * will be returned.
1806  *
1807  * If a non-packet header is passed in, the original
1808  * mbuf (chain?) will be returned unharmed.
1809  *
1810  * m_defrag_nofree doesn't free the passed in mbuf.
1811  */
1812 struct mbuf *
1813 m_defrag(struct mbuf *m0, int how)
1814 {
1815         struct mbuf *m_new;
1816
1817         if ((m_new = m_defrag_nofree(m0, how)) == NULL)
1818                 return (NULL);
1819         if (m_new != m0)
1820                 m_freem(m0);
1821         return (m_new);
1822 }
1823
1824 struct mbuf *
1825 m_defrag_nofree(struct mbuf *m0, int how)
1826 {
1827         struct mbuf     *m_new = NULL, *m_final = NULL;
1828         int             progress = 0, length, nsize;
1829
1830         if (!(m0->m_flags & M_PKTHDR))
1831                 return (m0);
1832
1833 #ifdef MBUF_STRESS_TEST
1834         if (m_defragrandomfailures) {
1835                 int temp = karc4random() & 0xff;
1836                 if (temp == 0xba)
1837                         goto nospace;
1838         }
1839 #endif
1840
1841         m_final = m_getl(m0->m_pkthdr.len, how, MT_DATA, M_PKTHDR, &nsize);
1842         if (m_final == NULL)
1843                 goto nospace;
1844         m_final->m_len = 0;     /* in case m0->m_pkthdr.len is zero */
1845
1846         if (m_dup_pkthdr(m_final, m0, how) == 0)
1847                 goto nospace;
1848
1849         m_new = m_final;
1850
1851         while (progress < m0->m_pkthdr.len) {
1852                 length = m0->m_pkthdr.len - progress;
1853                 if (length > MCLBYTES)
1854                         length = MCLBYTES;
1855
1856                 if (m_new == NULL) {
1857                         m_new = m_getl(length, how, MT_DATA, 0, &nsize);
1858                         if (m_new == NULL)
1859                                 goto nospace;
1860                 }
1861
1862                 m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1863                 progress += length;
1864                 m_new->m_len = length;
1865                 if (m_new != m_final)
1866                         m_cat(m_final, m_new);
1867                 m_new = NULL;
1868         }
1869         if (m0->m_next == NULL)
1870                 m_defraguseless++;
1871         m_defragpackets++;
1872         m_defragbytes += m_final->m_pkthdr.len;
1873         return (m_final);
1874 nospace:
1875         m_defragfailure++;
1876         if (m_new)
1877                 m_free(m_new);
1878         m_freem(m_final);
1879         return (NULL);
1880 }
1881
1882 /*
1883  * Move data from uio into mbufs.
1884  */
1885 struct mbuf *
1886 m_uiomove(struct uio *uio)
1887 {
1888         struct mbuf *m;                 /* current working mbuf */
1889         struct mbuf *head = NULL;       /* result mbuf chain */
1890         struct mbuf **mp = &head;
1891         int flags = M_PKTHDR;
1892         int nsize;
1893         int error;
1894         int resid;
1895
1896         do {
1897                 if (uio->uio_resid > INT_MAX)
1898                         resid = INT_MAX;
1899                 else
1900                         resid = (int)uio->uio_resid;
1901                 m = m_getl(resid, MB_WAIT, MT_DATA, flags, &nsize);
1902                 if (flags) {
1903                         m->m_pkthdr.len = 0;
1904                         /* Leave room for protocol headers. */
1905                         if (resid < MHLEN)
1906                                 MH_ALIGN(m, resid);
1907                         flags = 0;
1908                 }
1909                 m->m_len = imin(nsize, resid);
1910                 error = uiomove(mtod(m, caddr_t), m->m_len, uio);
1911                 if (error) {
1912                         m_free(m);
1913                         goto failed;
1914                 }
1915                 *mp = m;
1916                 mp = &m->m_next;
1917                 head->m_pkthdr.len += m->m_len;
1918         } while (uio->uio_resid > 0);
1919
1920         return (head);
1921
1922 failed:
1923         m_freem(head);
1924         return (NULL);
1925 }
1926
1927 struct mbuf *
1928 m_last(struct mbuf *m)
1929 {
1930         while (m->m_next)
1931                 m = m->m_next;
1932         return (m);
1933 }
1934
1935 /*
1936  * Return the number of bytes in an mbuf chain.
1937  * If lastm is not NULL, also return the last mbuf.
1938  */
1939 u_int
1940 m_lengthm(struct mbuf *m, struct mbuf **lastm)
1941 {
1942         u_int len = 0;
1943         struct mbuf *prev = m;
1944
1945         while (m) {
1946                 len += m->m_len;
1947                 prev = m;
1948                 m = m->m_next;
1949         }
1950         if (lastm != NULL)
1951                 *lastm = prev;
1952         return (len);
1953 }
1954
1955 /*
1956  * Like m_lengthm(), except also keep track of mbuf usage.
1957  */
1958 u_int
1959 m_countm(struct mbuf *m, struct mbuf **lastm, u_int *pmbcnt)
1960 {
1961         u_int len = 0, mbcnt = 0;
1962         struct mbuf *prev = m;
1963
1964         while (m) {
1965                 len += m->m_len;
1966                 mbcnt += MSIZE;
1967                 if (m->m_flags & M_EXT)
1968                         mbcnt += m->m_ext.ext_size;
1969                 prev = m;
1970                 m = m->m_next;
1971         }
1972         if (lastm != NULL)
1973                 *lastm = prev;
1974         *pmbcnt = mbcnt;
1975         return (len);
1976 }